diff options
Diffstat (limited to 'drivers/gpu')
1377 files changed, 123006 insertions, 20901 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 3eee8636f847..2520db0b776e 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -74,16 +74,17 @@ config DRM_KUNIT_TEST_HELPERS config DRM_KUNIT_TEST tristate "KUnit tests for DRM" if !KUNIT_ALL_TESTS - depends on DRM && KUNIT - select PRIME_NUMBERS + depends on DRM && KUNIT && MMU + select DRM_BUDDY select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HELPER - select DRM_LIB_RANDOM - select DRM_KMS_HELPER - select DRM_BUDDY + select DRM_EXEC select DRM_EXPORT_FOR_TESTS if m + select DRM_GEM_SHMEM_HELPER + select DRM_KMS_HELPER select DRM_KUNIT_TEST_HELPERS - select DRM_EXEC + select DRM_LIB_RANDOM + select PRIME_NUMBERS default KUNIT_ALL_TESTS help This builds unit tests for DRM. This option is not useful for @@ -275,6 +276,8 @@ source "drivers/gpu/drm/nouveau/Kconfig" source "drivers/gpu/drm/i915/Kconfig" +source "drivers/gpu/drm/xe/Kconfig" + source "drivers/gpu/drm/kmb/Kconfig" config DRM_VGEM @@ -394,6 +397,8 @@ source "drivers/gpu/drm/solomon/Kconfig" source "drivers/gpu/drm/sprd/Kconfig" +source "drivers/gpu/drm/imagination/Kconfig" + config DRM_HYPERV tristate "DRM Support for Hyper-V synthetic video device" depends on DRM && PCI && MMU && HYPERV @@ -407,27 +412,6 @@ config DRM_HYPERV If M is selected the module will be called hyperv_drm. -# Keep legacy drivers last - -menuconfig DRM_LEGACY - bool "Enable legacy drivers (DANGEROUS)" - depends on DRM && MMU - help - Enable legacy DRI1 drivers. Those drivers expose unsafe and dangerous - APIs to user-space, which can be used to circumvent access - restrictions and other security measures. For backwards compatibility - those drivers are still available, but their use is highly - inadvisable and might harm your system. - - You are recommended to use the safe modeset-only drivers instead, and - perform 3D emulation in user-space. - - Unless you have strong reasons to go rogue, say "N". - -if DRM_LEGACY -# leave here to list legacy drivers -endif # DRM_LEGACY - config DRM_EXPORT_FOR_TESTS bool diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 8e1bde059170..104b42df2e95 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -22,6 +22,7 @@ drm-y := \ drm_drv.o \ drm_dumb_buffers.o \ drm_edid.o \ + drm_eld.o \ drm_encoder.o \ drm_file.o \ drm_fourcc.o \ @@ -46,18 +47,6 @@ drm-y := \ drm_vblank_work.o \ drm_vma_manager.o \ drm_writeback.o -drm-$(CONFIG_DRM_LEGACY) += \ - drm_agpsupport.o \ - drm_bufs.o \ - drm_context.o \ - drm_dma.o \ - drm_hashtab.o \ - drm_irq.o \ - drm_legacy_misc.o \ - drm_lock.o \ - drm_memory.o \ - drm_scatter.o \ - drm_vm.o drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o drm-$(CONFIG_COMPAT) += drm_ioc32.o drm-$(CONFIG_DRM_PANEL) += drm_panel.o @@ -145,6 +134,7 @@ obj-$(CONFIG_DRM_RADEON)+= radeon/ obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/ obj-$(CONFIG_DRM_AMDGPU)+= amd/amdxcp/ obj-$(CONFIG_DRM_I915) += i915/ +obj-$(CONFIG_DRM_XE) += xe/ obj-$(CONFIG_DRM_KMB_DISPLAY) += kmb/ obj-$(CONFIG_DRM_MGAG200) += mgag200/ obj-$(CONFIG_DRM_V3D) += v3d/ @@ -198,3 +188,4 @@ obj-$(CONFIG_DRM_HYPERV) += hyperv/ obj-y += solomon/ obj-$(CONFIG_DRM_SPRD) += sprd/ obj-$(CONFIG_DRM_LOONGSON) += loongson/ +obj-$(CONFIG_DRM_POWERVR) += imagination/ diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 2afecc55090f..260e32ef7bae 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ - amdgpu_ring_mux.o amdgpu_xcp.o + amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index 02f4c6f9d4f6..576067d66bb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -330,6 +330,7 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, { struct list_head *reset_device_list = reset_context->reset_device_list; struct amdgpu_device *tmp_adev = NULL; + struct amdgpu_ras *con; int r; if (reset_device_list == NULL) @@ -355,7 +356,30 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, */ amdgpu_register_gpu_instance(tmp_adev); - /* Resume RAS */ + /* Resume RAS, ecc_irq */ + con = amdgpu_ras_get_context(tmp_adev); + if (!amdgpu_sriov_vf(tmp_adev) && con) { + if (tmp_adev->sdma.ras && + tmp_adev->sdma.ras->ras_block.ras_late_init) { + r = tmp_adev->sdma.ras->ras_block.ras_late_init(tmp_adev, + &tmp_adev->sdma.ras->ras_block.ras_comm); + if (r) { + dev_err(tmp_adev->dev, "SDMA failed to execute ras_late_init! ret:%d\n", r); + goto end; + } + } + + if (tmp_adev->gfx.ras && + tmp_adev->gfx.ras->ras_block.ras_late_init) { + r = tmp_adev->gfx.ras->ras_block.ras_late_init(tmp_adev, + &tmp_adev->gfx.ras->ras_block.ras_comm); + if (r) { + dev_err(tmp_adev->dev, "GFX failed to execute ras_late_init! ret:%d\n", r); + goto end; + } + } + } + amdgpu_ras_resume(tmp_adev); /* Update PSP FW topology after reset */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9d92ca157677..9da14436a373 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -109,6 +109,8 @@ #include "amdgpu_mca.h" #include "amdgpu_ras.h" #include "amdgpu_xcp.h" +#include "amdgpu_seq64.h" +#include "amdgpu_reg_state.h" #define MAX_GPU_INSTANCE 64 @@ -250,6 +252,10 @@ extern int amdgpu_seamless; extern int amdgpu_user_partt_mode; extern int amdgpu_agp; +extern int amdgpu_wbrf; + +extern int fw_bo_location; + #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 @@ -468,6 +474,7 @@ struct amdgpu_fpriv { struct amdgpu_vm vm; struct amdgpu_bo_va *prt_va; struct amdgpu_bo_va *csa_va; + struct amdgpu_bo_va *seq64_va; struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; @@ -506,6 +513,31 @@ struct amdgpu_allowed_register_entry { bool grbm_indexed; }; +/** + * enum amd_reset_method - Methods for resetting AMD GPU devices + * + * @AMD_RESET_METHOD_NONE: The device will not be reset. + * @AMD_RESET_LEGACY: Method reserved for SI, CIK and VI ASICs. + * @AMD_RESET_MODE0: Reset the entire ASIC. Not currently available for the + * any device. + * @AMD_RESET_MODE1: Resets all IP blocks on the ASIC (SDMA, GFX, VCN, etc.) + * individually. Suitable only for some discrete GPU, not + * available for all ASICs. + * @AMD_RESET_MODE2: Resets a lesser level of IPs compared to MODE1. Which IPs + * are reset depends on the ASIC. Notably doesn't reset IPs + * shared with the CPU on APUs or the memory controllers (so + * VRAM is not lost). Not available on all ASICs. + * @AMD_RESET_BACO: BACO (Bus Alive, Chip Off) method powers off and on the card + * but without powering off the PCI bus. Suitable only for + * discrete GPUs. + * @AMD_RESET_PCI: Does a full bus reset using core Linux subsystem PCI reset + * and does a secondary bus reset or FLR, depending on what the + * underlying hardware supports. + * + * Methods available for AMD GPU driver for resetting the device. Not all + * methods are suitable for every device. User can override the method using + * module parameter `reset_method`. + */ enum amd_reset_method { AMD_RESET_METHOD_NONE = -1, AMD_RESET_METHOD_LEGACY = 0, @@ -585,6 +617,10 @@ struct amdgpu_asic_funcs { const struct amdgpu_video_codecs **codecs); /* encode "> 32bits" smn addressing */ u64 (*encode_ext_smn_addressing)(int ext_id); + + ssize_t (*get_reg_state)(struct amdgpu_device *adev, + enum amdgpu_reg_state reg_state, void *buf, + size_t max_size); }; /* @@ -757,6 +793,7 @@ struct amdgpu_mqd_prop { uint64_t eop_gpu_addr; uint32_t hqd_pipe_priority; uint32_t hqd_queue_priority; + bool allow_tunneling; bool hqd_active; }; @@ -986,6 +1023,9 @@ struct amdgpu_device { /* GDS */ struct amdgpu_gds gds; + /* for userq and VM fences */ + struct amdgpu_seq64 seq64; + /* KFD */ struct amdgpu_kfd_dev kfd; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 75dc58470393..067690ba7bff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -142,6 +142,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i; int last_valid_bit; + int ret; amdgpu_amdkfd_gpuvm_init_mem_limits(); @@ -160,6 +161,12 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) .enable_mes = adev->enable_mes, }; + ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", NULL); + if (ret) { + dev_err(adev->dev, "Failed to init DRM client: %d\n", ret); + return; + } + /* this is going to have a few of the MSBs set that we need to * clear */ @@ -198,6 +205,10 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); + if (adev->kfd.init_complete) + drm_client_register(&adev->kfd.client); + else + drm_client_release(&adev->kfd.client); amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; @@ -710,35 +721,6 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) return false; } -int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, - uint16_t vmid) -{ - if (adev->family == AMDGPU_FAMILY_AI) { - int i; - - for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) - amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); - } else { - amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0); - } - - return 0; -} - -int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, - enum TLB_FLUSH_TYPE flush_type, - uint32_t inst) -{ - bool all_hub = false; - - if (adev->family == AMDGPU_FAMILY_AI || - adev->family == AMDGPU_FAMILY_RV) - all_hub = true; - - return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst); -} - bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev) { return adev->have_atomics_support; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index dac983da961d..cf6ed5fce291 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -33,6 +33,7 @@ #include <linux/mmu_notifier.h> #include <linux/memremap.h> #include <kgd_kfd_interface.h> +#include <drm/drm_client.h> #include "amdgpu_sync.h" #include "amdgpu_vm.h" #include "amdgpu_xcp.h" @@ -83,6 +84,7 @@ struct kgd_mem { struct amdgpu_sync sync; + uint32_t gem_handle; bool aql_queue; bool is_imported; }; @@ -105,6 +107,9 @@ struct amdgpu_kfd_dev { /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */ struct dev_pagemap pgmap; + + /* Client for KFD BO GEM handle allocations */ + struct drm_client_dev client; }; enum kgd_engine_type { @@ -162,11 +167,6 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, uint32_t *ib_cmd, uint32_t ib_len); void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle); bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev); -int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, - uint16_t vmid); -int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, enum TLB_FLUSH_TYPE flush_type, - uint32_t inst); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); @@ -314,11 +314,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, struct dma_fence **ef); int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, struct kfd_vm_fault_info *info); -int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, - struct dma_buf *dmabuf, - uint64_t va, void *drm_priv, - struct kgd_mem **mem, uint64_t *size, - uint64_t *mmap_offset); +int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd, + uint64_t va, void *drm_priv, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset); int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem, struct dma_buf **dmabuf); void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 625db444df1c..899e31e3a5e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -200,7 +200,7 @@ int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+10) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -290,7 +290,7 @@ static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool sus for (i = 0; i < adev->gfx.num_compute_rings; i++) { struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - if (!(ring && ring->sched.thread)) + if (!(ring && drm_sched_wqueue_ready(&ring->sched))) continue; /* stop secheduler and drain ring. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 469785d33791..1ef758ac5076 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -90,7 +90,7 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) return NULL; fence = container_of(f, struct amdgpu_amdkfd_fence, base); - if (fence && f->ops == &amdkfd_fence_ops) + if (f->ops == &amdkfd_fence_ops) return fence; return NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index f6598b9e4faa..a5c7259cf2a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -141,7 +141,7 @@ static int kgd_gfx_v9_4_3_hqd_sdma_dump(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32(addr); \ } while (0) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 6bf448ab3dff..ca4a6b82817f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -214,7 +214,7 @@ static int kgd_hqd_dump(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32(addr); \ } while (0) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -301,7 +301,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (19+4) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index cd06e4a6d1da..0f3e2944edd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -238,7 +238,7 @@ static int kgd_hqd_dump(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32(addr); \ } while (0) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -324,7 +324,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (19+4+2+3+7) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 00fbc0f44c92..5a35a8ca8922 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -363,7 +363,7 @@ int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32(addr); \ } while (0) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -460,7 +460,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+10) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 41fbc4fd0fac..d17b2452cb1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -25,6 +25,7 @@ #include <linux/pagemap.h> #include <linux/sched/mm.h> #include <linux/sched/task.h> +#include <linux/fdtable.h> #include <drm/ttm/ttm_tt.h> #include <drm/drm_exec.h> @@ -806,13 +807,22 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem, static int kfd_mem_export_dmabuf(struct kgd_mem *mem) { if (!mem->dmabuf) { - struct dma_buf *ret = amdgpu_gem_prime_export( - &mem->bo->tbo.base, + struct amdgpu_device *bo_adev; + struct dma_buf *dmabuf; + int r, fd; + + bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); + r = drm_gem_prime_handle_to_fd(&bo_adev->ddev, bo_adev->kfd.client.file, + mem->gem_handle, mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? - DRM_RDWR : 0); - if (IS_ERR(ret)) - return PTR_ERR(ret); - mem->dmabuf = ret; + DRM_RDWR : 0, &fd); + if (r) + return r; + dmabuf = dma_buf_get(fd); + close_fd(fd); + if (WARN_ON_ONCE(IS_ERR(dmabuf))) + return PTR_ERR(dmabuf); + mem->dmabuf = dmabuf; } return 0; @@ -1137,7 +1147,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->n_vms = 1; ctx->sync = &mem->sync; - drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&ctx->exec) { ret = amdgpu_vm_lock_pd(vm, &ctx->exec, 2); drm_exec_retry_on_contention(&ctx->exec); @@ -1176,7 +1186,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, int ret; ctx->sync = &mem->sync; - drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&ctx->exec) { ctx->n_vms = 0; list_for_each_entry(entry, &mem->attachments, list) { @@ -1384,7 +1394,6 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, amdgpu_amdkfd_restore_userptr_worker); *process_info = info; - *ef = dma_fence_get(&info->eviction_fence->base); } vm->process_info = *process_info; @@ -1415,6 +1424,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, list_add_tail(&vm->vm_list_node, &(vm->process_info->vm_list_head)); vm->process_info->n_vms++; + + *ef = dma_fence_get(&vm->process_info->eviction_fence->base); mutex_unlock(&vm->process_info->lock); return 0; @@ -1426,10 +1437,7 @@ validate_pd_fail: reserve_pd_fail: vm->process_info = NULL; if (info) { - /* Two fence references: one in info and one in *ef */ dma_fence_put(&info->eviction_fence->base); - dma_fence_put(*ef); - *ef = NULL; *process_info = NULL; put_pid(info->pid); create_evict_fence_fail: @@ -1623,7 +1631,8 @@ int amdgpu_amdkfd_criu_resume(void *p) goto out_unlock; } WRITE_ONCE(pinfo->block_mmu_notifications, false); - schedule_delayed_work(&pinfo->restore_userptr_work, 0); + queue_delayed_work(system_freezable_wq, + &pinfo->restore_userptr_work, 0); out_unlock: mutex_unlock(&pinfo->lock); @@ -1779,6 +1788,9 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( pr_debug("Failed to allow vma node access. ret %d\n", ret); goto err_node_allow; } + ret = drm_gem_handle_create(adev->kfd.client.file, gobj, &(*mem)->gem_handle); + if (ret) + goto err_gem_handle_create; bo = gem_to_amdgpu_bo(gobj); if (bo_type == ttm_bo_type_sg) { bo->tbo.sg = sg; @@ -1830,6 +1842,8 @@ allocate_init_user_pages_failed: err_pin_bo: err_validate_bo: remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); + drm_gem_handle_delete(adev->kfd.client.file, (*mem)->gem_handle); +err_gem_handle_create: drm_vma_node_revoke(&gobj->vma_node, drm_priv); err_node_allow: /* Don't unreserve system mem limit twice */ @@ -1942,8 +1956,11 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Free the BO*/ drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv); - if (mem->dmabuf) + drm_gem_handle_delete(adev->kfd.client.file, mem->gem_handle); + if (mem->dmabuf) { dma_buf_put(mem->dmabuf); + mem->dmabuf = NULL; + } mutex_destroy(&mem->lock); /* If this releases the last reference, it will end up calling @@ -2295,34 +2312,26 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, return 0; } -int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, - struct dma_buf *dma_buf, - uint64_t va, void *drm_priv, - struct kgd_mem **mem, uint64_t *size, - uint64_t *mmap_offset) +static int import_obj_create(struct amdgpu_device *adev, + struct dma_buf *dma_buf, + struct drm_gem_object *obj, + uint64_t va, void *drm_priv, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); - struct drm_gem_object *obj; struct amdgpu_bo *bo; int ret; - obj = amdgpu_gem_prime_import(adev_to_drm(adev), dma_buf); - if (IS_ERR(obj)) - return PTR_ERR(obj); - bo = gem_to_amdgpu_bo(obj); if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GTT))) { + AMDGPU_GEM_DOMAIN_GTT))) /* Only VRAM and GTT BOs are supported */ - ret = -EINVAL; - goto err_put_obj; - } + return -EINVAL; *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); - if (!*mem) { - ret = -ENOMEM; - goto err_put_obj; - } + if (!*mem) + return -ENOMEM; ret = drm_vma_node_allow(&obj->vma_node, drm_priv); if (ret) @@ -2372,8 +2381,41 @@ err_remove_mem: drm_vma_node_revoke(&obj->vma_node, drm_priv); err_free_mem: kfree(*mem); + return ret; +} + +int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd, + uint64_t va, void *drm_priv, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset) +{ + struct drm_gem_object *obj; + uint32_t handle; + int ret; + + ret = drm_gem_prime_fd_to_handle(&adev->ddev, adev->kfd.client.file, fd, + &handle); + if (ret) + return ret; + obj = drm_gem_object_lookup(adev->kfd.client.file, handle); + if (!obj) { + ret = -EINVAL; + goto err_release_handle; + } + + ret = import_obj_create(adev, obj->dma_buf, obj, va, drm_priv, mem, size, + mmap_offset); + if (ret) + goto err_put_obj; + + (*mem)->gem_handle = handle; + + return 0; + err_put_obj: drm_gem_object_put(obj); +err_release_handle: + drm_gem_handle_delete(adev->kfd.client.file, handle); return ret; } @@ -2426,7 +2468,8 @@ int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, KFD_QUEUE_EVICTION_TRIGGER_USERPTR); if (r) pr_err("Failed to quiesce KFD\n"); - schedule_delayed_work(&process_info->restore_userptr_work, + queue_delayed_work(system_freezable_wq, + &process_info->restore_userptr_work, msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); } mutex_unlock(&process_info->notifier_lock); @@ -2552,7 +2595,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) amdgpu_sync_create(&sync); - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); /* Reserve all BOs and page tables for validation */ drm_exec_until_all_locked(&exec) { /* Reserve all the page directories */ @@ -2749,7 +2792,8 @@ unlock_out: /* If validation failed, reschedule another attempt */ if (evicted_bos) { - schedule_delayed_work(&process_info->restore_userptr_work, + queue_delayed_work(system_freezable_wq, + &process_info->restore_userptr_work, msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); kfd_smi_event_queue_restore_rescheduled(mm); @@ -2758,6 +2802,23 @@ unlock_out: put_task_struct(usertask); } +static void replace_eviction_fence(struct dma_fence **ef, + struct dma_fence *new_ef) +{ + struct dma_fence *old_ef = rcu_replace_pointer(*ef, new_ef, true + /* protected by process_info->lock */); + + /* If we're replacing an unsignaled eviction fence, that fence will + * never be signaled, and if anyone is still waiting on that fence, + * they will hang forever. This should never happen. We should only + * replace the fence in restore_work that only gets scheduled after + * eviction work signaled the fence. + */ + WARN_ONCE(!dma_fence_is_signaled(old_ef), + "Replacing unsignaled eviction fence"); + dma_fence_put(old_ef); +} + /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given * KFD process identified by process_info * @@ -2781,7 +2842,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) struct amdkfd_process_info *process_info = info; struct amdgpu_vm *peer_vm; struct kgd_mem *mem; - struct amdgpu_amdkfd_fence *new_fence; struct list_head duplicate_save; struct amdgpu_sync sync_obj; unsigned long failed_size = 0; @@ -2793,7 +2853,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) mutex_lock(&process_info->lock); - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { @@ -2825,12 +2885,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) if (ret) goto validate_map_fail; - ret = process_sync_pds_resv(process_info, &sync_obj); - if (ret) { - pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n"); - goto validate_map_fail; - } - /* Validate BOs and map them to GPUVM (update VM page tables). */ list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) { @@ -2881,6 +2935,19 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) if (failed_size) pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); + /* Update mappings not managed by KFD */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + struct amdgpu_device *adev = amdgpu_ttm_adev( + peer_vm->root.bo->tbo.bdev); + + ret = amdgpu_vm_handle_moved(adev, peer_vm, &exec.ticket); + if (ret) { + pr_debug("Memory eviction: handle moved failed. Try again\n"); + goto validate_map_fail; + } + } + /* Update page directories */ ret = process_update_pds(process_info, &sync_obj); if (ret) { @@ -2888,25 +2955,47 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) goto validate_map_fail; } + /* Sync with fences on all the page tables. They implicitly depend on any + * move fences from amdgpu_vm_handle_moved above. + */ + ret = process_sync_pds_resv(process_info, &sync_obj); + if (ret) { + pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n"); + goto validate_map_fail; + } + /* Wait for validate and PT updates to finish */ amdgpu_sync_wait(&sync_obj, false); - /* Release old eviction fence and create new one, because fence only - * goes from unsignaled to signaled, fence cannot be reused. - * Use context and mm from the old fence. + /* The old eviction fence may be unsignaled if restore happens + * after a GPU reset or suspend/resume. Keep the old fence in that + * case. Otherwise release the old eviction fence and create new + * one, because fence only goes from unsignaled to signaled once + * and cannot be reused. Use context and mm from the old fence. + * + * If an old eviction fence signals after this check, that's OK. + * Anyone signaling an eviction fence must stop the queues first + * and schedule another restore worker. */ - new_fence = amdgpu_amdkfd_fence_create( + if (dma_fence_is_signaled(&process_info->eviction_fence->base)) { + struct amdgpu_amdkfd_fence *new_fence = + amdgpu_amdkfd_fence_create( process_info->eviction_fence->base.context, process_info->eviction_fence->mm, NULL); - if (!new_fence) { - pr_err("Failed to create eviction fence\n"); - ret = -ENOMEM; - goto validate_map_fail; + + if (!new_fence) { + pr_err("Failed to create eviction fence\n"); + ret = -ENOMEM; + goto validate_map_fail; + } + dma_fence_put(&process_info->eviction_fence->base); + process_info->eviction_fence = new_fence; + replace_eviction_fence(ef, dma_fence_get(&new_fence->base)); + } else { + WARN_ONCE(*ef != &process_info->eviction_fence->base, + "KFD eviction fence doesn't match KGD process_info"); } - dma_fence_put(&process_info->eviction_fence->base); - process_info->eviction_fence = new_fence; - *ef = dma_fence_get(&new_fence->base); /* Attach new eviction fence to all BOs except pinned ones */ list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 7473a42f7d45..9caba10315a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -103,7 +103,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector) struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); struct amdgpu_connector_atom_dig *dig_connector; int bpc = 8; - unsigned mode_clock, max_tmds_clock; + unsigned int mode_clock, max_tmds_clock; switch (connector->connector_type) { case DRM_MODE_CONNECTOR_DVII: @@ -255,6 +255,7 @@ struct edid *amdgpu_connector_edid(struct drm_connector *connector) return amdgpu_connector->edid; } else if (edid_blob) { struct edid *edid = kmemdup(edid_blob->data, edid_blob->length, GFP_KERNEL); + if (edid) amdgpu_connector->edid = edid; } @@ -581,6 +582,7 @@ static int amdgpu_connector_set_property(struct drm_connector *connector, amdgpu_encoder = to_amdgpu_encoder(connector->encoder); } else { const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private; + amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector)); } @@ -797,6 +799,7 @@ static int amdgpu_connector_set_lcd_property(struct drm_connector *connector, amdgpu_encoder = to_amdgpu_encoder(connector->encoder); else { const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private; + amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector)); } @@ -979,6 +982,41 @@ amdgpu_connector_check_hpd_status_unchanged(struct drm_connector *connector) return false; } +static void amdgpu_connector_shared_ddc(enum drm_connector_status *status, + struct drm_connector *connector, + struct amdgpu_connector *amdgpu_connector) +{ + struct drm_connector *list_connector; + struct drm_connector_list_iter iter; + struct amdgpu_connector *list_amdgpu_connector; + struct drm_device *dev = connector->dev; + struct amdgpu_device *adev = drm_to_adev(dev); + + if (amdgpu_connector->shared_ddc && *status == connector_status_connected) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(list_connector, + &iter) { + if (connector == list_connector) + continue; + list_amdgpu_connector = to_amdgpu_connector(list_connector); + if (list_amdgpu_connector->shared_ddc && + list_amdgpu_connector->ddc_bus->rec.i2c_id == + amdgpu_connector->ddc_bus->rec.i2c_id) { + /* cases where both connectors are digital */ + if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) { + /* hpd is our only option in this case */ + if (!amdgpu_display_hpd_sense(adev, + amdgpu_connector->hpd.hpd)) { + amdgpu_connector_free_edid(connector); + *status = connector_status_disconnected; + } + } + } + } + drm_connector_list_iter_end(&iter); + } +} + /* * DVI is complicated * Do a DDC probe, if DDC probe passes, get the full EDID so @@ -1065,32 +1103,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) * DDC line. The latter is more complex because with DVI<->HDMI adapters * you don't really know what's connected to which port as both are digital. */ - if (amdgpu_connector->shared_ddc && (ret == connector_status_connected)) { - struct drm_connector *list_connector; - struct drm_connector_list_iter iter; - struct amdgpu_connector *list_amdgpu_connector; - - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(list_connector, - &iter) { - if (connector == list_connector) - continue; - list_amdgpu_connector = to_amdgpu_connector(list_connector); - if (list_amdgpu_connector->shared_ddc && - (list_amdgpu_connector->ddc_bus->rec.i2c_id == - amdgpu_connector->ddc_bus->rec.i2c_id)) { - /* cases where both connectors are digital */ - if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) { - /* hpd is our only option in this case */ - if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) { - amdgpu_connector_free_edid(connector); - ret = connector_status_disconnected; - } - } - } - } - drm_connector_list_iter_end(&iter); - } + amdgpu_connector_shared_ddc(&ret, connector, amdgpu_connector); } } @@ -1192,6 +1205,7 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector) static void amdgpu_connector_dvi_force(struct drm_connector *connector) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); + if (connector->force == DRM_FORCE_ON) amdgpu_connector->use_digital = false; if (connector->force == DRM_FORCE_ON_DIGITAL) @@ -1426,6 +1440,7 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force) ret = connector_status_connected; else if (amdgpu_connector->dac_load_detect) { /* try load detection */ const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private; + ret = encoder_funcs->detect(encoder, connector); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e50be6500030..6adeddfb3d56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -66,7 +66,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, amdgpu_sync_create(&p->sync); drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT | - DRM_EXEC_IGNORE_DUPLICATES); + DRM_EXEC_IGNORE_DUPLICATES, 0); return 0; } @@ -870,9 +870,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_bo *bo = e->bo; int i; - e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); + e->user_pages = kvcalloc(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL); if (!e->user_pages) { DRM_ERROR("kvmalloc_array failure\n"); r = -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 720011019741..796fa6f1420b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -70,7 +70,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_exec exec; int r; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { r = amdgpu_vm_lock_pd(vm, &exec, 0); if (likely(!r)) @@ -110,7 +110,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_exec exec; int r; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { r = amdgpu_vm_lock_pd(vm, &exec, 0); if (likely(!r)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index e2ae9ba147ba..5cb33ac99f70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -73,10 +73,10 @@ amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio) return DRM_SCHED_PRIORITY_NORMAL; case AMDGPU_CTX_PRIORITY_VERY_LOW: - return DRM_SCHED_PRIORITY_MIN; + return DRM_SCHED_PRIORITY_LOW; case AMDGPU_CTX_PRIORITY_LOW: - return DRM_SCHED_PRIORITY_MIN; + return DRM_SCHED_PRIORITY_LOW; case AMDGPU_CTX_PRIORITY_NORMAL: return DRM_SCHED_PRIORITY_NORMAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 0e61ebdb3f3e..e485dd3357c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -540,7 +540,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, while (size) { uint32_t value; - value = RREG32_PCIE(*pos); + if (upper_32_bits(*pos)) + value = RREG32_PCIE_EXT(*pos); + else + value = RREG32_PCIE(*pos); + r = put_user(value, (uint32_t *)buf); if (r) goto out; @@ -600,7 +604,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user if (r) goto out; - WREG32_PCIE(*pos, value); + if (upper_32_bits(*pos)) + WREG32_PCIE_EXT(*pos, value); + else + WREG32_PCIE(*pos, value); result += 4; buf += 4; @@ -755,7 +762,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, int r; if (!adev->smc_rreg) - return -EPERM; + return -EOPNOTSUPP; if (size & 0x3 || *pos & 0x3) return -EINVAL; @@ -814,7 +821,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * int r; if (!adev->smc_wreg) - return -EPERM; + return -EOPNOTSUPP; if (size & 0x3 || *pos & 0x3) return -EINVAL; @@ -1671,9 +1678,9 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !ring->sched.thread) + if (!ring || !drm_sched_wqueue_ready(&ring->sched)) continue; - kthread_park(ring->sched.thread); + drm_sched_wqueue_stop(&ring->sched); } seq_puts(m, "run ib test:\n"); @@ -1687,9 +1694,9 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !ring->sched.thread) + if (!ring || !drm_sched_wqueue_ready(&ring->sched)) continue; - kthread_unpark(ring->sched.thread); + drm_sched_wqueue_start(&ring->sched); } up_write(&adev->reset_domain->sem); @@ -1909,7 +1916,8 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) ring = adev->rings[val]; - if (!ring || !ring->funcs->preempt_ib || !ring->sched.thread) + if (!ring || !ring->funcs->preempt_ib || + !drm_sched_wqueue_ready(&ring->sched)) return -EINVAL; /* the last preemption failed */ @@ -1927,7 +1935,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) goto pro_end; /* stop the scheduler */ - kthread_park(ring->sched.thread); + drm_sched_wqueue_stop(&ring->sched); /* preempt the IB */ r = amdgpu_ring_preempt_ib(ring); @@ -1961,7 +1969,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) failure: /* restart the scheduler */ - kthread_unpark(ring->sched.thread); + drm_sched_wqueue_start(&ring->sched); up_read(&adev->reset_domain->sem); @@ -2146,6 +2154,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_debugfs_firmware_init(adev); amdgpu_ta_if_debugfs_init(adev); + amdgpu_debugfs_mes_event_log_init(adev); + #if defined(CONFIG_DRM_AMD_DC) if (adev->dc_enabled) dtn_debugfs_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h index 371a6f0deb29..0425432d8659 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h @@ -32,3 +32,5 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev); void amdgpu_debugfs_fence_init(struct amdgpu_device *adev); void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); void amdgpu_debugfs_gem_init(struct amdgpu_device *adev); +void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev); + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 93cf73d6fa11..5bb444bb36ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -162,6 +162,65 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, static DEVICE_ATTR(pcie_replay_count, 0444, amdgpu_device_get_pcie_replay_count, NULL); +static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t ppos, size_t count) +{ + struct device *dev = kobj_to_dev(kobj); + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + ssize_t bytes_read; + + switch (ppos) { + case AMDGPU_SYS_REG_STATE_XGMI: + bytes_read = amdgpu_asic_get_reg_state( + adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count); + break; + case AMDGPU_SYS_REG_STATE_WAFL: + bytes_read = amdgpu_asic_get_reg_state( + adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count); + break; + case AMDGPU_SYS_REG_STATE_PCIE: + bytes_read = amdgpu_asic_get_reg_state( + adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count); + break; + case AMDGPU_SYS_REG_STATE_USR: + bytes_read = amdgpu_asic_get_reg_state( + adev, AMDGPU_REG_STATE_TYPE_USR, buf, count); + break; + case AMDGPU_SYS_REG_STATE_USR_1: + bytes_read = amdgpu_asic_get_reg_state( + adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count); + break; + default: + return -EINVAL; + } + + return bytes_read; +} + +BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL, + AMDGPU_SYS_REG_STATE_END); + +int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev) +{ + int ret; + + if (!amdgpu_asic_get_reg_state_supported(adev)) + return 0; + + ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state); + + return ret; +} + +void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev) +{ + if (!amdgpu_asic_get_reg_state_supported(adev)) + return; + sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state); +} + /** * DOC: board_info * @@ -1540,7 +1599,7 @@ bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev) if (adev->mman.keep_stolen_vga_memory) return false; - return adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0); + return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0); } /* @@ -1551,11 +1610,15 @@ bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev) * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/ * https://gitlab.freedesktop.org/drm/amd/-/issues/2663 */ -static bool amdgpu_device_pcie_dynamic_switching_supported(void) +static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev) { #if IS_ENABLED(CONFIG_X86) struct cpuinfo_x86 *c = &cpu_data(0); + /* eGPU change speeds based on USB4 fabric conditions */ + if (dev_is_removable(adev->dev)) + return true; + if (c->x86_vendor == X86_VENDOR_INTEL) return false; #endif @@ -2388,7 +2451,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID) adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK; - if (!amdgpu_device_pcie_dynamic_switching_supported()) + if (!amdgpu_device_pcie_dynamic_switching_supported(adev)) adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK; total = true; @@ -2566,7 +2629,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) break; } - r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, + r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, ring->num_hw_submission, 0, timeout, adev->reset_domain->wq, @@ -2669,6 +2732,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) goto init_failed; } } + + r = amdgpu_seq64_init(adev); + if (r) { + DRM_ERROR("allocate seq64 failed %d\n", r); + goto init_failed; + } } } @@ -3131,6 +3200,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) amdgpu_device_wb_fini(adev); amdgpu_device_mem_scratch_fini(adev); amdgpu_ib_pool_fini(adev); + amdgpu_seq64_fini(adev); } r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); @@ -4211,6 +4281,7 @@ fence_driver_init: "Could not create amdgpu board attributes\n"); amdgpu_fru_sysfs_init(adev); + amdgpu_reg_state_sysfs_init(adev); if (IS_ENABLED(CONFIG_PERF_EVENTS)) r = amdgpu_pmu_init(adev); @@ -4333,6 +4404,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); amdgpu_fru_sysfs_fini(adev); + amdgpu_reg_state_sysfs_fini(adev); + /* disable ras feature must before hw fini */ amdgpu_ras_pre_fini(adev); @@ -4957,7 +5030,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !ring->sched.thread) + if (!ring || !drm_sched_wqueue_ready(&ring->sched)) continue; spin_lock(&ring->sched.job_list_lock); @@ -5096,7 +5169,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !ring->sched.thread) + if (!ring || !drm_sched_wqueue_ready(&ring->sched)) continue; /* Clear job fence from fence drv to avoid force_completion @@ -5585,7 +5658,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; - if (!ring || !ring->sched.thread) + if (!ring || !drm_sched_wqueue_ready(&ring->sched)) continue; drm_sched_stop(&ring->sched, job ? &job->base : NULL); @@ -5661,7 +5734,7 @@ skip_hw_reset: for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; - if (!ring || !ring->sched.thread) + if (!ring || !drm_sched_wqueue_ready(&ring->sched)) continue; drm_sched_start(&ring->sched, true); @@ -5724,6 +5797,39 @@ recover_end: } /** + * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner + * + * @adev: amdgpu_device pointer + * @speed: pointer to the speed of the link + * @width: pointer to the width of the link + * + * Evaluate the hierarchy to find the speed and bandwidth capabilities of the + * first physical partner to an AMD dGPU. + * This will exclude any virtual switches and links. + */ +static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev, + enum pci_bus_speed *speed, + enum pcie_link_width *width) +{ + struct pci_dev *parent = adev->pdev; + + if (!speed || !width) + return; + + *speed = PCI_SPEED_UNKNOWN; + *width = PCIE_LNK_WIDTH_UNKNOWN; + + while ((parent = pci_upstream_bridge(parent))) { + /* skip upstream/downstream switches internal to dGPU*/ + if (parent->vendor == PCI_VENDOR_ID_ATI) + continue; + *speed = pcie_get_speed_cap(parent); + *width = pcie_get_width_cap(parent); + break; + } +} + +/** * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot * * @adev: amdgpu_device pointer @@ -5756,8 +5862,8 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) return; - pcie_bandwidth_available(adev->pdev, NULL, - &platform_speed_cap, &platform_link_width); + amdgpu_device_partner_bandwidth(adev, &platform_speed_cap, + &platform_link_width); if (adev->pm.pcie_gen_mask == 0) { /* asic caps */ @@ -5984,7 +6090,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !ring->sched.thread) + if (!ring || !drm_sched_wqueue_ready(&ring->sched)) continue; drm_sched_stop(&ring->sched, NULL); @@ -6112,7 +6218,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !ring->sched.thread) + if (!ring || !drm_sched_wqueue_ready(&ring->sched)) continue; drm_sched_start(&ring->sched, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index e7e87a3b2601..decbbe3d4f06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -42,6 +42,7 @@ #include <linux/dma-fence-array.h> #include <linux/pci-p2pdma.h> #include <linux/pm_runtime.h> +#include "amdgpu_trace.h" /** * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation @@ -63,6 +64,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, attach->peer2peer = false; r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + trace_amdgpu_runpm_reference_dumps(1, __func__); if (r < 0) goto out; @@ -70,6 +72,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, out: pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + trace_amdgpu_runpm_reference_dumps(0, __func__); return r; } @@ -90,6 +93,7 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf, pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + trace_amdgpu_runpm_reference_dumps(0, __func__); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8b33b130ea36..852cec98ff26 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -115,9 +115,10 @@ * 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query * - 3.56.0 - Update IB start address and size alignment for decode and encode + * - 3.57.0 - Compute tunneling on GFX10+ */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 56 +#define KMS_DRIVER_MINOR 57 #define KMS_DRIVER_PATCHLEVEL 0 /* @@ -208,6 +209,8 @@ int amdgpu_umsch_mm; int amdgpu_seamless = -1; /* auto */ uint amdgpu_debug_mask; int amdgpu_agp = -1; /* auto */ +int amdgpu_wbrf = -1; +int fw_bo_location = -1; static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -971,6 +974,26 @@ module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444); MODULE_PARM_DESC(agp, "AGP (-1 = auto (default), 0 = disable, 1 = enable)"); module_param_named(agp, amdgpu_agp, int, 0444); +/** + * DOC: wbrf (int) + * Enable Wifi RFI interference mitigation feature. + * Due to electrical and mechanical constraints there may be likely interference of + * relatively high-powered harmonics of the (G-)DDR memory clocks with local radio + * module frequency bands used by Wifi 6/6e/7. To mitigate the possible RFI interference, + * with this feature enabled, PMFW will use either “shadowed P-State” or “P-State” based + * on active list of frequencies in-use (to be avoided) as part of initial setting or + * P-state transition. However, there may be potential performance impact with this + * feature enabled. + * (0 = disabled, 1 = enabled, -1 = auto (default setting, will be enabled if supported)) + */ +MODULE_PARM_DESC(wbrf, + "Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)"); +module_param_named(wbrf, amdgpu_wbrf, int, 0444); + +MODULE_PARM_DESC(fw_bo_location, + "location to put firmware bo for frontdoor loading (-1 = auto (default), 0 = on ram, 1 = on vram"); +module_param(fw_bo_location, int, 0644); + /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index dc230212746a..70bff8cecfda 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -183,6 +183,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, flags | AMDGPU_FENCE_FLAG_INT); pm_runtime_get_noresume(adev_to_drm(adev)->dev); + trace_amdgpu_runpm_reference_dumps(1, __func__); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; if (unlikely(rcu_dereference_protected(*ptr, 1))) { struct dma_fence *old; @@ -310,6 +311,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) dma_fence_put(fence); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + trace_amdgpu_runpm_reference_dumps(0, __func__); } while (last_seq != seq); return true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 84beeaa4d21c..49a5f1c73b3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -203,7 +203,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, struct drm_exec exec; long r; - drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES); + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1); drm_exec_retry_on_contention(&exec); @@ -739,7 +739,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, } drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | - DRM_EXEC_IGNORE_DUPLICATES); + DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { if (gobj) { r = drm_exec_lock_obj(&exec, gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index 081267161d40..55b65fc04b65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -190,8 +190,8 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, pr_debug("hmm range: start = 0x%lx, end = 0x%lx", hmm_range->start, hmm_range->end); - /* Assuming 128MB takes maximum 1 second to fault page address */ - timeout = max((hmm_range->end - hmm_range->start) >> 27, 1UL); + /* Assuming 64MB takes maximum 1 second to fault page address */ + timeout = max((hmm_range->end - hmm_range->start) >> 26, 1UL); timeout *= HMM_RANGE_DEFAULT_TIMEOUT; timeout = jiffies + msecs_to_jiffies(timeout); @@ -199,6 +199,7 @@ retry: hmm_range->notifier_seq = mmu_interval_read_begin(notifier); r = hmm_range_fault(hmm_range); if (unlikely(r)) { + schedule(); /* * FIXME: This timeout should encompass the retry from * mmu_interval_read_retry() as well. @@ -212,7 +213,6 @@ retry: break; hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT; hmm_range->start = hmm_range->end; - schedule(); } while (hmm_range->end < end); hmm_range->start = start; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 1f357198533f..71a5cf37b472 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -115,7 +115,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (!entity) return 0; - return drm_sched_job_init(&(*job)->base, entity, owner); + return drm_sched_job_init(&(*job)->base, entity, 1, owner); } int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, @@ -325,7 +325,7 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) int i; /* Signal all jobs not yet scheduled */ - for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { + for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { struct drm_sched_rq *rq = sched->sched_rq[i]; spin_lock(&rq->lock); list_for_each_entry(s_entity, &rq->entities, list) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 583cf03950cd..b5ebafd4a3ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1428,6 +1428,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, fpriv->csa_va = NULL; } + amdgpu_seq64_unmap(adev, fpriv); + pasid = fpriv->vm.pasid; pd = amdgpu_bo_ref(fpriv->vm.root.bo); if (!WARN_ON(amdgpu_bo_reserve(pd, true))) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index cf33eb219e25..59fafb8392e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -218,6 +218,7 @@ static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, st int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data) { struct amdgpu_smuio_mcm_config_info mcm_info; + struct ras_err_addr err_addr = {0}; struct mca_bank_set mca_set; struct mca_bank_node *node; struct mca_bank_entry *entry; @@ -246,10 +247,18 @@ int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_blo mcm_info.socket_id = entry->info.socket_id; mcm_info.die_id = entry->info.aid; + if (blk == AMDGPU_RAS_BLOCK__UMC) { + err_addr.err_status = entry->regs[MCA_REG_IDX_STATUS]; + err_addr.err_ipid = entry->regs[MCA_REG_IDX_IPID]; + err_addr.err_addr = entry->regs[MCA_REG_IDX_ADDR]; + } + if (type == AMDGPU_MCA_ERROR_TYPE_UE) - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, (uint64_t)count); + amdgpu_ras_error_statistic_ue_count(err_data, + &mcm_info, &err_addr, (uint64_t)count); else - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, (uint64_t)count); + amdgpu_ras_error_statistic_ce_count(err_data, + &mcm_info, &err_addr, (uint64_t)count); } out_mca_release: @@ -351,6 +360,9 @@ int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_err const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; int count; + if (!mca_funcs || !mca_funcs->mca_get_mca_entry) + return -EOPNOTSUPP; + switch (type) { case AMDGPU_MCA_ERROR_TYPE_UE: count = mca_funcs->max_ue_count; @@ -365,10 +377,7 @@ int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_err if (idx >= count) return -EINVAL; - if (mca_funcs && mca_funcs->mca_get_mca_entry) - return mca_funcs->mca_get_mca_entry(adev, type, idx, entry); - - return -EOPNOTSUPP; + return mca_funcs->mca_get_mca_entry(adev, type, idx, entry); } #if defined(CONFIG_DEBUG_FS) @@ -377,7 +386,7 @@ static int amdgpu_mca_smu_debug_mode_set(void *data, u64 val) struct amdgpu_device *adev = (struct amdgpu_device *)data; int ret; - ret = amdgpu_mca_smu_set_debug_mode(adev, val ? true : false); + ret = amdgpu_ras_set_mca_debug_mode(adev, val ? true : false); if (ret) return ret; @@ -485,7 +494,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(mca_debug_mode_fops, NULL, amdgpu_mca_smu_debug_mode_se void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root) { #if defined(CONFIG_DEBUG_FS) - if (!root || adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6)) + if (!root || amdgpu_ip_version(adev, MP1_HWIP, 0) != IP_VERSION(13, 0, 6)) return; debugfs_create_file("mca_debug_mode", 0200, root, adev, &mca_debug_mode_fops); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index e51e8918e667..b399f1b62887 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -46,6 +46,8 @@ #define MCA_REG__STATUS__ERRORCODEEXT(x) MCA_REG_FIELD(x, 21, 16) #define MCA_REG__STATUS__ERRORCODE(x) MCA_REG_FIELD(x, 15, 0) +#define MCA_REG__MISC0__ERRCNT(x) MCA_REG_FIELD(x, 43, 32) + #define MCA_REG__SYND__ERRORINFORMATION(x) MCA_REG_FIELD(x, 17, 0) enum amdgpu_mca_ip { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 9ddbf1494326..da48b6da0107 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -98,6 +98,26 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) return 0; } +static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->mes.event_log_gpu_obj, + &adev->mes.event_log_gpu_addr, + &adev->mes.event_log_cpu_addr); + if (r) { + dev_warn(adev->dev, "failed to create MES event log buffer (%d)", r); + return r; + } + + memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE); + + return 0; + +} + static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev) { bitmap_free(adev->mes.doorbell_bitmap); @@ -182,8 +202,14 @@ int amdgpu_mes_init(struct amdgpu_device *adev) if (r) goto error; + r = amdgpu_mes_event_log_init(adev); + if (r) + goto error_doorbell; + return 0; +error_doorbell: + amdgpu_mes_doorbell_free(adev); error: amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); @@ -199,6 +225,10 @@ error_ids: void amdgpu_mes_fini(struct amdgpu_device *adev) { + amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, + &adev->mes.event_log_gpu_addr, + &adev->mes.event_log_cpu_addr); + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); amdgpu_device_wb_free(adev, adev->mes.read_val_offs); @@ -886,6 +916,11 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER; op_input.set_shader_debugger.process_context_addr = process_context_addr; op_input.set_shader_debugger.flags.u32all = flags; + + /* use amdgpu mes_flush_shader_debugger instead */ + if (op_input.set_shader_debugger.flags.process_ctx_flush) + return -EINVAL; + op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl; memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl, sizeof(op_input.set_shader_debugger.tcp_watch_cntl)); @@ -905,6 +940,32 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, return r; } +int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, + uint64_t process_context_addr) +{ + struct mes_misc_op_input op_input = {0}; + int r; + + if (!adev->mes.funcs->misc_op) { + DRM_ERROR("mes flush shader debugger is not supported!\n"); + return -EINVAL; + } + + op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER; + op_input.set_shader_debugger.process_context_addr = process_context_addr; + op_input.set_shader_debugger.flags.process_ctx_flush = true; + + amdgpu_mes_lock(&adev->mes); + + r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + if (r) + DRM_ERROR("failed to set_shader_debugger\n"); + + amdgpu_mes_unlock(&adev->mes); + + return r; +} + static void amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev, struct amdgpu_ring *ring, @@ -1122,7 +1183,7 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, amdgpu_sync_create(&sync); - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_lock_obj(&exec, &ctx_data->meta_data_obj->tbo.base); @@ -1193,7 +1254,7 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, struct drm_exec exec; long r; - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_lock_obj(&exec, &ctx_data->meta_data_obj->tbo.base); @@ -1479,3 +1540,34 @@ out: amdgpu_ucode_release(&adev->mes.fw[pipe]); return r; } + +#if defined(CONFIG_DEBUG_FS) + +static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) +{ + struct amdgpu_device *adev = m->private; + uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr); + + seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4, + mem, PAGE_SIZE, false); + + return 0; +} + + +DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log); + +#endif + +void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev) +{ + +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + + debugfs_create_file("amdgpu_mes_event_log", 0444, root, + adev, &amdgpu_debugfs_mes_event_log_fops); + +#endif +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index a27b424ffe00..7d4f93fea937 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -133,6 +133,11 @@ struct amdgpu_mes { uint32_t num_mes_dbs; unsigned long *doorbell_bitmap; + /* MES event log buffer */ + struct amdgpu_bo *event_log_gpu_obj; + uint64_t event_log_gpu_addr; + void *event_log_cpu_addr; + /* ip specific functions */ const struct amdgpu_mes_funcs *funcs; }; @@ -291,9 +296,10 @@ struct mes_misc_op_input { uint64_t process_context_addr; union { struct { - uint64_t single_memop : 1; - uint64_t single_alu_op : 1; - uint64_t reserved: 30; + uint32_t single_memop : 1; + uint32_t single_alu_op : 1; + uint32_t reserved: 29; + uint32_t process_ctx_flush: 1; }; uint32_t u32all; } flags; @@ -369,7 +375,8 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, const uint32_t *tcp_watch_cntl, uint32_t flags, bool trap_en); - +int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, + uint64_t process_context_addr); int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, int queue_type, int idx, struct amdgpu_mes_ctx_data *ctx_data, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 32fe05c810c6..2e4911050cc5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -32,7 +32,6 @@ #include <drm/display/drm_dp_helper.h> #include <drm/drm_crtc.h> -#include <drm/drm_edid.h> #include <drm/drm_encoder.h> #include <drm/drm_fixed.h> #include <drm/drm_framebuffer.h> @@ -51,6 +50,7 @@ struct amdgpu_device; struct amdgpu_encoder; struct amdgpu_router; struct amdgpu_hpd; +struct edid; #define to_amdgpu_crtc(x) container_of(x, struct amdgpu_crtc, base) #define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base) @@ -343,6 +343,97 @@ struct amdgpu_mode_info { int disp_priority; const struct amdgpu_display_funcs *funcs; const enum drm_plane_type *plane_type; + + /* Driver-private color mgmt props */ + + /* @plane_degamma_lut_property: Plane property to set a degamma LUT to + * convert encoded values to light linear values before sampling or + * blending. + */ + struct drm_property *plane_degamma_lut_property; + /* @plane_degamma_lut_size_property: Plane property to define the max + * size of degamma LUT as supported by the driver (read-only). + */ + struct drm_property *plane_degamma_lut_size_property; + /** + * @plane_degamma_tf_property: Plane pre-defined transfer function to + * to go from scanout/encoded values to linear values. + */ + struct drm_property *plane_degamma_tf_property; + /** + * @plane_hdr_mult_property: + */ + struct drm_property *plane_hdr_mult_property; + + struct drm_property *plane_ctm_property; + /** + * @shaper_lut_property: Plane property to set pre-blending shaper LUT + * that converts color content before 3D LUT. If + * plane_shaper_tf_property != Identity TF, AMD color module will + * combine the user LUT values with pre-defined TF into the LUT + * parameters to be programmed. + */ + struct drm_property *plane_shaper_lut_property; + /** + * @shaper_lut_size_property: Plane property for the size of + * pre-blending shaper LUT as supported by the driver (read-only). + */ + struct drm_property *plane_shaper_lut_size_property; + /** + * @plane_shaper_tf_property: Plane property to set a predefined + * transfer function for pre-blending shaper (before applying 3D LUT) + * with or without LUT. There is no shaper ROM, but we can use AMD + * color modules to program LUT parameters from predefined TF (or + * from a combination of pre-defined TF and the custom 1D LUT). + */ + struct drm_property *plane_shaper_tf_property; + /** + * @plane_lut3d_property: Plane property for color transformation using + * a 3D LUT (pre-blending), a three-dimensional array where each + * element is an RGB triplet. Each dimension has the size of + * lut3d_size. The array contains samples from the approximated + * function. On AMD, values between samples are estimated by + * tetrahedral interpolation. The array is accessed with three indices, + * one for each input dimension (color channel), blue being the + * outermost dimension, red the innermost. + */ + struct drm_property *plane_lut3d_property; + /** + * @plane_degamma_lut_size_property: Plane property to define the max + * size of 3D LUT as supported by the driver (read-only). The max size + * is the max size of one dimension and, therefore, the max number of + * entries for 3D LUT array is the 3D LUT size cubed; + */ + struct drm_property *plane_lut3d_size_property; + /** + * @plane_blend_lut_property: Plane property for output gamma before + * blending. Userspace set a blend LUT to convert colors after 3D LUT + * conversion. It works as a post-3DLUT 1D LUT. With shaper LUT, they + * are sandwiching 3D LUT with two 1D LUT. If plane_blend_tf_property + * != Identity TF, AMD color module will combine the user LUT values + * with pre-defined TF into the LUT parameters to be programmed. + */ + struct drm_property *plane_blend_lut_property; + /** + * @plane_blend_lut_size_property: Plane property to define the max + * size of blend LUT as supported by the driver (read-only). + */ + struct drm_property *plane_blend_lut_size_property; + /** + * @plane_blend_tf_property: Plane property to set a predefined + * transfer function for pre-blending blend/out_gamma (after applying + * 3D LUT) with or without LUT. There is no blend ROM, but we can use + * AMD color modules to program LUT parameters from predefined TF (or + * from a combination of pre-defined TF and the custom 1D LUT). + */ + struct drm_property *plane_blend_tf_property; + /* @regamma_tf_property: Transfer function for CRTC regamma + * (post-blending). Possible values are defined by `enum + * amdgpu_transfer_function`. There is no regamma ROM, but we can use + * AMD color modules to program LUT parameters from predefined TF (or + * from a combination of pre-defined TF and the custom 1D LUT). + */ + struct drm_property *regamma_tf_property; }; #define AMDGPU_MAX_BL_LEVEL 0xFF @@ -416,6 +507,10 @@ struct amdgpu_crtc { int otg_inst; struct drm_pending_vblank_event *event; + + bool wb_pending; + bool wb_enabled; + struct drm_writeback_connector *wb_conn; }; struct amdgpu_encoder_atom_dig { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 5ad03f2afdb4..425cebcc5cbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1245,19 +1245,15 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, * amdgpu_bo_move_notify - notification about a memory move * @bo: pointer to a buffer object * @evict: if this move is evicting the buffer from the graphics address space - * @new_mem: new information of the bufer object * * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs * bookkeeping. * TTM driver callback which is called when ttm moves a buffer. */ -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, - bool evict, - struct ttm_resource *new_mem) +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct amdgpu_bo *abo; - struct ttm_resource *old_mem = bo->resource; if (!amdgpu_bo_is_amdgpu_bo(bo)) return; @@ -1274,13 +1270,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, /* remember the eviction */ if (evict) atomic64_inc(&adev->num_evictions); - - /* update statistics */ - if (!new_mem) - return; - - /* move_notify is called before move happens */ - trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type); } void amdgpu_bo_get_memory(struct amdgpu_bo *bo, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index d28e21baef16..a3ea8a82db23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -344,9 +344,7 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, size_t buffer_size, uint32_t *metadata_size, uint64_t *flags); -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, - bool evict, - struct ttm_resource *new_mem); +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict); void amdgpu_bo_release_notify(struct ttm_buffer_object *bo); vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index a21045d018f2..2addbdf88394 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -466,7 +466,7 @@ static int psp_sw_init(void *handle) } ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - amdgpu_sriov_vf(adev) ? + (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &psp->fw_pri_bo, &psp->fw_pri_mc_addr, @@ -1433,8 +1433,8 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, get_extended_data) || amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6); - bool ta_port_num_support = psp->xgmi_context.xgmi_ta_caps & - EXTEND_PEER_LINK_INFO_CMD_FLAG; + bool ta_port_num_support = amdgpu_sriov_vf(psp->adev) ? 0 : + psp->xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG; /* popluate the shared output buffer rather than the cmd input buffer * with node_ids as the input for GET_PEER_LINKS command execution. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 63fb4cd85e53..fc42fb6ee191 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1156,8 +1156,10 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s for_each_ras_error(err_node, err_data) { err_info = &err_node->err_info; - amdgpu_ras_error_statistic_ce_count(&obj->err_data, &err_info->mcm_info, err_info->ce_count); - amdgpu_ras_error_statistic_ue_count(&obj->err_data, &err_info->mcm_info, err_info->ue_count); + amdgpu_ras_error_statistic_ce_count(&obj->err_data, + &err_info->mcm_info, NULL, err_info->ce_count); + amdgpu_ras_error_statistic_ue_count(&obj->err_data, + &err_info->mcm_info, NULL, err_info->ue_count); } } else { /* for legacy asic path which doesn't has error source info */ @@ -1174,6 +1176,9 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT; struct amdgpu_ras_block_object *block_obj = NULL; + if (blk == AMDGPU_RAS_BLOCK_COUNT) + return -EINVAL; + if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY) return -EINVAL; @@ -2538,7 +2543,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) return 0; data = &con->eh_data; - *data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO); + *data = kzalloc(sizeof(**data), GFP_KERNEL); if (!*data) { ret = -ENOMEM; goto out; @@ -2825,10 +2830,10 @@ int amdgpu_ras_init(struct amdgpu_device *adev) if (con) return 0; - con = kmalloc(sizeof(struct amdgpu_ras) + + con = kzalloc(sizeof(*con) + sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT + sizeof(struct ras_manager) * AMDGPU_RAS_MCA_BLOCK_COUNT, - GFP_KERNEL|__GFP_ZERO); + GFP_KERNEL); if (!con) return -ENOMEM; @@ -3133,6 +3138,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; + amdgpu_ras_set_mca_debug_mode(adev, false); + list_for_each_entry_safe(node, tmp, &adev->ras_list, node) { if (!node->ras_obj) { dev_warn(adev->dev, "Warning: abnormal ras list node.\n"); @@ -3406,12 +3413,18 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) return 0; } -void amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable) +int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int ret = 0; - if (con) - con->is_mca_debug_mode = enable; + if (con) { + ret = amdgpu_mca_smu_set_debug_mode(adev, enable); + if (!ret) + con->is_mca_debug_mode = enable; + } + + return ret; } bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev) @@ -3682,7 +3695,8 @@ static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct } static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info) + struct amdgpu_smuio_mcm_config_info *mcm_info, + struct ras_err_addr *err_addr) { struct ras_err_node *err_node; @@ -3696,6 +3710,9 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info)); + if (err_addr) + memcpy(&err_node->err_info.err_addr, err_addr, sizeof(*err_addr)); + err_data->err_list_count++; list_add_tail(&err_node->node, &err_data->err_node_list); list_sort(NULL, &err_data->err_node_list, ras_err_info_cmp); @@ -3704,7 +3721,8 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d } int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count) + struct amdgpu_smuio_mcm_config_info *mcm_info, + struct ras_err_addr *err_addr, u64 count) { struct ras_err_info *err_info; @@ -3714,7 +3732,7 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, if (!count) return 0; - err_info = amdgpu_ras_error_get_info(err_data, mcm_info); + err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr); if (!err_info) return -EINVAL; @@ -3725,7 +3743,8 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, } int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count) + struct amdgpu_smuio_mcm_config_info *mcm_info, + struct ras_err_addr *err_addr, u64 count) { struct ras_err_info *err_info; @@ -3735,7 +3754,7 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, if (!count) return 0; - err_info = amdgpu_ras_error_get_info(err_data, mcm_info); + err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr); if (!err_info) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 19161916ac46..76fb85628716 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -452,10 +452,17 @@ struct ras_fs_data { char debugfs_name[32]; }; +struct ras_err_addr { + uint64_t err_status; + uint64_t err_ipid; + uint64_t err_addr; +}; + struct ras_err_info { struct amdgpu_smuio_mcm_config_info mcm_info; u64 ce_count; u64 ue_count; + struct ras_err_addr err_addr; }; struct ras_err_node { @@ -773,7 +780,7 @@ struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev); int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con); -void amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable); +int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable); bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev); bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev, unsigned int *mode); @@ -806,8 +813,10 @@ void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev, int amdgpu_ras_error_data_init(struct ras_err_data *err_data); void amdgpu_ras_error_data_fini(struct ras_err_data *err_data); int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count); + struct amdgpu_smuio_mcm_config_info *mcm_info, + struct ras_err_addr *err_addr, u64 count); int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count); + struct amdgpu_smuio_mcm_config_info *mcm_info, + struct ras_err_addr *err_addr, u64 count); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 6a80d3ec887e..45424ebf9681 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -642,6 +642,10 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring, struct amdgpu_mqd_prop *prop) { struct amdgpu_device *adev = ring->adev; + bool is_high_prio_compute = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE && + amdgpu_gfx_is_high_priority_compute_queue(adev, ring); + bool is_high_prio_gfx = ring->funcs->type == AMDGPU_RING_TYPE_GFX && + amdgpu_gfx_is_high_priority_graphics_queue(adev, ring); memset(prop, 0, sizeof(*prop)); @@ -659,10 +663,8 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring, */ prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ; - if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE && - amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) || - (ring->funcs->type == AMDGPU_RING_TYPE_GFX && - amdgpu_gfx_is_high_priority_graphics_queue(adev, ring))) { + prop->allow_tunneling = is_high_prio_compute; + if (is_high_prio_compute || is_high_prio_gfx) { prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c index 35e0ae9acadc..2c3675d91614 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c @@ -531,13 +531,12 @@ int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev, if (version_major == 2 && version_minor == 1) adev->gfx.rlc.is_rlc_v2_1 = true; - if (version_minor >= 0) { - err = amdgpu_gfx_rlc_init_microcode_v2_0(adev); - if (err) { - dev_err(adev->dev, "fail to init rlc v2_0 microcode\n"); - return err; - } + err = amdgpu_gfx_rlc_init_microcode_v2_0(adev); + if (err) { + dev_err(adev->dev, "fail to init rlc v2_0 microcode\n"); + return err; } + if (version_minor >= 1) amdgpu_gfx_rlc_init_microcode_v2_1(adev); if (version_minor >= 2) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c new file mode 100644 index 000000000000..7a6a67275404 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_seq64.h" + +#include <drm/drm_exec.h> + +/** + * DOC: amdgpu_seq64 + * + * amdgpu_seq64 allocates a 64bit memory on each request in sequence order. + * seq64 driver is required for user queue fence memory allocation, TLB + * counters and VM updates. It has maximum count of 32768 64 bit slots. + */ + +/** + * amdgpu_seq64_map - Map the seq64 memory to VM + * + * @adev: amdgpu_device pointer + * @vm: vm pointer + * @bo_va: bo_va pointer + * @seq64_addr: seq64 vaddr start address + * @size: seq64 pool size + * + * Map the seq64 memory to the given VM. + * + * Returns: + * 0 on success or a negative error code on failure + */ +int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo_va **bo_va, u64 seq64_addr, + uint32_t size) +{ + struct amdgpu_bo *bo; + struct drm_exec exec; + int r; + + bo = adev->seq64.sbo; + if (!bo) + return -EINVAL; + + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_until_all_locked(&exec) { + r = amdgpu_vm_lock_pd(vm, &exec, 0); + if (likely(!r)) + r = drm_exec_lock_obj(&exec, &bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error; + } + + *bo_va = amdgpu_vm_bo_add(adev, vm, bo); + if (!*bo_va) { + r = -ENOMEM; + goto error; + } + + r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, size, + AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | + AMDGPU_PTE_EXECUTABLE); + if (r) { + DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r); + amdgpu_vm_bo_del(adev, *bo_va); + goto error; + } + + r = amdgpu_vm_bo_update(adev, *bo_va, false); + if (r) { + DRM_ERROR("failed to do vm_bo_update on userq sem\n"); + amdgpu_vm_bo_del(adev, *bo_va); + goto error; + } + +error: + drm_exec_fini(&exec); + return r; +} + +/** + * amdgpu_seq64_unmap - Unmap the seq64 memory + * + * @adev: amdgpu_device pointer + * @fpriv: DRM file private + * + * Unmap the seq64 memory from the given VM. + */ +void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv) +{ + struct amdgpu_vm *vm; + struct amdgpu_bo *bo; + struct drm_exec exec; + int r; + + if (!fpriv->seq64_va) + return; + + bo = adev->seq64.sbo; + if (!bo) + return; + + vm = &fpriv->vm; + + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_until_all_locked(&exec) { + r = amdgpu_vm_lock_pd(vm, &exec, 0); + if (likely(!r)) + r = drm_exec_lock_obj(&exec, &bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error; + } + + amdgpu_vm_bo_del(adev, fpriv->seq64_va); + + fpriv->seq64_va = NULL; + +error: + drm_exec_fini(&exec); +} + +/** + * amdgpu_seq64_alloc - Allocate a 64 bit memory + * + * @adev: amdgpu_device pointer + * @gpu_addr: allocated gpu VA start address + * @cpu_addr: allocated cpu VA start address + * + * Alloc a 64 bit memory from seq64 pool. + * + * Returns: + * 0 on success or a negative error code on failure + */ +int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, + u64 **cpu_addr) +{ + unsigned long bit_pos; + u32 offset; + + bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem); + + if (bit_pos < adev->seq64.num_sem) { + __set_bit(bit_pos, adev->seq64.used); + offset = bit_pos << 6; /* convert to qw offset */ + } else { + return -EINVAL; + } + + *gpu_addr = offset + AMDGPU_SEQ64_VADDR_START; + *cpu_addr = offset + adev->seq64.cpu_base_addr; + + return 0; +} + +/** + * amdgpu_seq64_free - Free the given 64 bit memory + * + * @adev: amdgpu_device pointer + * @gpu_addr: gpu start address to be freed + * + * Free the given 64 bit memory from seq64 pool. + * + */ +void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr) +{ + u32 offset; + + offset = gpu_addr - AMDGPU_SEQ64_VADDR_START; + + offset >>= 6; + if (offset < adev->seq64.num_sem) + __clear_bit(offset, adev->seq64.used); +} + +/** + * amdgpu_seq64_fini - Cleanup seq64 driver + * + * @adev: amdgpu_device pointer + * + * Free the memory space allocated for seq64. + * + */ +void amdgpu_seq64_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->seq64.sbo, + NULL, + (void **)&adev->seq64.cpu_base_addr); +} + +/** + * amdgpu_seq64_init - Initialize seq64 driver + * + * @adev: amdgpu_device pointer + * + * Allocate the required memory space for seq64. + * + * Returns: + * 0 on success or a negative error code on failure + */ +int amdgpu_seq64_init(struct amdgpu_device *adev) +{ + int r; + + if (adev->seq64.sbo) + return 0; + + /* + * AMDGPU_MAX_SEQ64_SLOTS * sizeof(u64) * 8 = AMDGPU_MAX_SEQ64_SLOTS + * 64bit slots + */ + r = amdgpu_bo_create_kernel(adev, AMDGPU_SEQ64_SIZE, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->seq64.sbo, NULL, + (void **)&adev->seq64.cpu_base_addr); + if (r) { + dev_warn(adev->dev, "(%d) create seq64 failed\n", r); + return r; + } + + memset(adev->seq64.cpu_base_addr, 0, AMDGPU_SEQ64_SIZE); + + adev->seq64.num_sem = AMDGPU_MAX_SEQ64_SLOTS; + memset(&adev->seq64.used, 0, sizeof(adev->seq64.used)); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h new file mode 100644 index 000000000000..2196e72be508 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_SEQ64_H__ +#define __AMDGPU_SEQ64_H__ + +#define AMDGPU_SEQ64_SIZE (2ULL << 20) +#define AMDGPU_MAX_SEQ64_SLOTS (AMDGPU_SEQ64_SIZE / (sizeof(u64) * 8)) +#define AMDGPU_SEQ64_VADDR_OFFSET 0x50000 +#define AMDGPU_SEQ64_VADDR_START (AMDGPU_VA_RESERVED_SIZE + AMDGPU_SEQ64_VADDR_OFFSET) + +struct amdgpu_seq64 { + struct amdgpu_bo *sbo; + u32 num_sem; + u64 *cpu_base_addr; + DECLARE_BITMAP(used, AMDGPU_MAX_SEQ64_SLOTS); +}; + +void amdgpu_seq64_fini(struct amdgpu_device *adev); +int amdgpu_seq64_init(struct amdgpu_device *adev); +int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, u64 **cpu_addr); +void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr); +int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo_va **bo_va, u64 seq64_addr, uint32_t size); +void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv); + +#endif + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index dcd8c066bc1f..1b013a44ca99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -191,7 +191,8 @@ static bool amdgpu_sync_test_fence(struct amdgpu_device *adev, /* Never sync to VM updates either. */ if (fence_owner == AMDGPU_FENCE_OWNER_VM && - owner != AMDGPU_FENCE_OWNER_UNDEFINED) + owner != AMDGPU_FENCE_OWNER_UNDEFINED && + owner != AMDGPU_FENCE_OWNER_KFD) return false; /* Ignore fences depending on the sync mode */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 2fd1bfb35916..f539b1d00234 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -554,6 +554,21 @@ TRACE_EVENT(amdgpu_reset_reg_dumps, __entry->value) ); +TRACE_EVENT(amdgpu_runpm_reference_dumps, + TP_PROTO(uint32_t index, const char *func), + TP_ARGS(index, func), + TP_STRUCT__entry( + __field(uint32_t, index) + __string(func, func) + ), + TP_fast_assign( + __entry->index = index; + __assign_str(func, func); + ), + TP_printk("amdgpu runpm reference dump 0x%x: 0x%s\n", + __entry->index, + __get_str(func)) +); #undef AMDGPU_JOB_GET_TIMELINE_NAME #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index ab4a762aed5b..75c9fd2c6c2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -545,10 +545,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return r; } + trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type); out: /* update statistics */ atomic64_add(bo->base.size, &adev->num_bytes_moved); - amdgpu_bo_move_notify(bo, evict, new_mem); + amdgpu_bo_move_notify(bo, evict); return 0; } @@ -1553,7 +1554,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, static void amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo) { - amdgpu_bo_move_notify(bo, false, NULL); + amdgpu_bo_move_notify(bo, false); } static struct ttm_device_funcs amdgpu_bo_driver = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index b14127429f30..d334e42fe0eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -1062,7 +1062,8 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev) { if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) { amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE, - amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, + (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ? + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &adev->firmware.fw_buf, &adev->firmware.fw_buf_mc, &adev->firmware.fw_buf_ptr); @@ -1397,9 +1398,13 @@ int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw, if (err) return -ENODEV; + err = amdgpu_ucode_validate(*fw); - if (err) + if (err) { dev_dbg(adev->dev, "\"%s\" failed to validate\n", fw_name); + release_firmware(*fw); + *fw = NULL; + } return err; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index ca45ba8ac171..bfbf59326ee1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -86,7 +86,7 @@ static int map_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm, amdgpu_sync_create(&sync); - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_lock_obj(&exec, &bo->tbo.base); drm_exec_retry_on_contention(&exec); @@ -149,7 +149,7 @@ static int unmap_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_exec exec; long r; - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_lock_obj(&exec, &bo->tbo.base); drm_exec_retry_on_contention(&exec); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 3a632c3b1a2c..0dcff2889e25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -1099,7 +1099,8 @@ bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev) { bool xnack_mode = true; - if (amdgpu_sriov_vf(adev) && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) + if (amdgpu_sriov_vf(adev) && + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) xnack_mode = false; return xnack_mode; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index db6fc0cb18eb..453a4b786cfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0+ #include <drm/drm_atomic_helper.h> +#include <drm/drm_edid.h> #include <drm/drm_simple_kms_helper.h> #include <drm/drm_vblank.h> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5baefb548a29..b8fcb6c55698 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1439,6 +1439,51 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, } /** + * amdgpu_vm_flush_compute_tlb - Flush TLB on compute VM + * + * @adev: amdgpu_device pointer + * @vm: requested vm + * @flush_type: flush type + * @xcc_mask: mask of XCCs that belong to the compute partition in need of a TLB flush. + * + * Flush TLB if needed for a compute VM. + * + * Returns: + * 0 for success. + */ +int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + uint32_t flush_type, + uint32_t xcc_mask) +{ + uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm); + bool all_hub = false; + int xcc = 0, r = 0; + + WARN_ON_ONCE(!vm->is_compute_context); + + /* + * It can be that we race and lose here, but that is extremely unlikely + * and the worst thing which could happen is that we flush the changes + * into the TLB once more which is harmless. + */ + if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq) + return 0; + + if (adev->family == AMDGPU_FAMILY_AI || + adev->family == AMDGPU_FAMILY_RV) + all_hub = true; + + for_each_inst(xcc, xcc_mask) { + r = amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, flush_type, + all_hub, xcc); + if (r) + break; + } + return r; +} + +/** * amdgpu_vm_bo_add - add a bo to a specific vm * * @adev: amdgpu_device pointer diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 2cd86d2bf73f..b6cd565562ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -324,6 +324,7 @@ struct amdgpu_vm { /* Last finished delayed update */ atomic64_t tlb_seq; struct dma_fence *last_tlb_flush; + atomic64_t kfd_last_flushed_seq; /* How many times we had to re-generate the page tables */ uint64_t generation; @@ -445,6 +446,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket); +int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + uint32_t flush_type, + uint32_t xcc_mask); void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, struct amdgpu_vm *vm, struct amdgpu_bo *bo); int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index e81579708e96..6f149b54d4d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -26,6 +26,7 @@ #include "amdgpu.h" #include "amdgpu_ucode.h" #include "amdgpu_vpe.h" +#include "amdgpu_smu.h" #include "soc15_common.h" #include "vpe_v6_1.h" @@ -33,8 +34,186 @@ /* VPE CSA resides in the 4th page of CSA */ #define AMDGPU_CSA_VPE_OFFSET (4096 * 3) +/* 1 second timeout */ +#define VPE_IDLE_TIMEOUT msecs_to_jiffies(1000) + +#define VPE_MAX_DPM_LEVEL 4 +#define FIXED1_8_BITS_PER_FRACTIONAL_PART 8 +#define GET_PRATIO_INTEGER_PART(x) ((x) >> FIXED1_8_BITS_PER_FRACTIONAL_PART) + static void vpe_set_ring_funcs(struct amdgpu_device *adev); +static inline uint16_t div16_u16_rem(uint16_t dividend, uint16_t divisor, uint16_t *remainder) +{ + *remainder = dividend % divisor; + return dividend / divisor; +} + +static inline uint16_t complete_integer_division_u16( + uint16_t dividend, + uint16_t divisor, + uint16_t *remainder) +{ + return div16_u16_rem(dividend, divisor, (uint16_t *)remainder); +} + +static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator) +{ + bool arg1_negative = numerator < 0; + bool arg2_negative = denominator < 0; + + uint16_t arg1_value = (uint16_t)(arg1_negative ? -numerator : numerator); + uint16_t arg2_value = (uint16_t)(arg2_negative ? -denominator : denominator); + + uint16_t remainder; + + /* determine integer part */ + uint16_t res_value = complete_integer_division_u16( + arg1_value, arg2_value, &remainder); + + if (res_value > 127 /* CHAR_MAX */) + return 0; + + /* determine fractional part */ + { + unsigned int i = FIXED1_8_BITS_PER_FRACTIONAL_PART; + + do { + remainder <<= 1; + + res_value <<= 1; + + if (remainder >= arg2_value) { + res_value |= 1; + remainder -= arg2_value; + } + } while (--i != 0); + } + + /* round up LSB */ + { + uint16_t summand = (remainder << 1) >= arg2_value; + + if ((res_value + summand) > 32767 /* SHRT_MAX */) + return 0; + + res_value += summand; + } + + if (arg1_negative ^ arg2_negative) + res_value = -res_value; + + return res_value; +} + +static uint16_t vpe_internal_get_pratio(uint16_t from_frequency, uint16_t to_frequency) +{ + uint16_t pratio = vpe_u1_8_from_fraction(from_frequency, to_frequency); + + if (GET_PRATIO_INTEGER_PART(pratio) > 1) + pratio = 0; + + return pratio; +} + +/* + * VPE has 4 DPM levels from level 0 (lowerest) to 3 (highest), + * VPE FW will dynamically decide which level should be used according to current loading. + * + * Get VPE and SOC clocks from PM, and select the appropriate four clock values, + * calculate the ratios of adjusting from one clock to another. + * The VPE FW can then request the appropriate frequency from the PMFW. + */ +int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe) +{ + struct amdgpu_device *adev = vpe->ring.adev; + uint32_t dpm_ctl; + + if (adev->pm.dpm_enabled) { + struct dpm_clocks clock_table = { 0 }; + struct dpm_clock *VPEClks; + struct dpm_clock *SOCClks; + uint32_t idx; + uint32_t pratio_vmax_vnorm = 0, pratio_vnorm_vmid = 0, pratio_vmid_vmin = 0; + uint16_t pratio_vmin_freq = 0, pratio_vmid_freq = 0, pratio_vnorm_freq = 0, pratio_vmax_freq = 0; + + dpm_ctl = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable)); + dpm_ctl |= 1; /* DPM enablement */ + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl); + + /* Get VPECLK and SOCCLK */ + if (amdgpu_dpm_get_dpm_clock_table(adev, &clock_table)) { + dev_dbg(adev->dev, "%s: get clock failed!\n", __func__); + goto disable_dpm; + } + + SOCClks = clock_table.SocClocks; + VPEClks = clock_table.VPEClocks; + + /* vpe dpm only cares 4 levels. */ + for (idx = 0; idx < VPE_MAX_DPM_LEVEL; idx++) { + uint32_t soc_dpm_level; + uint32_t min_freq; + + if (idx == 0) + soc_dpm_level = 0; + else + soc_dpm_level = (idx * 2) + 1; + + /* clamp the max level */ + if (soc_dpm_level > PP_SMU_NUM_VPECLK_DPM_LEVELS - 1) + soc_dpm_level = PP_SMU_NUM_VPECLK_DPM_LEVELS - 1; + + min_freq = (SOCClks[soc_dpm_level].Freq < VPEClks[soc_dpm_level].Freq) ? + SOCClks[soc_dpm_level].Freq : VPEClks[soc_dpm_level].Freq; + + switch (idx) { + case 0: + pratio_vmin_freq = min_freq; + break; + case 1: + pratio_vmid_freq = min_freq; + break; + case 2: + pratio_vnorm_freq = min_freq; + break; + case 3: + pratio_vmax_freq = min_freq; + break; + default: + break; + } + } + + if (pratio_vmin_freq && pratio_vmid_freq && pratio_vnorm_freq && pratio_vmax_freq) { + uint32_t pratio_ctl; + + pratio_vmax_vnorm = (uint32_t)vpe_internal_get_pratio(pratio_vmax_freq, pratio_vnorm_freq); + pratio_vnorm_vmid = (uint32_t)vpe_internal_get_pratio(pratio_vnorm_freq, pratio_vmid_freq); + pratio_vmid_vmin = (uint32_t)vpe_internal_get_pratio(pratio_vmid_freq, pratio_vmin_freq); + + pratio_ctl = pratio_vmax_vnorm | (pratio_vnorm_vmid << 9) | (pratio_vmid_vmin << 18); + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_pratio), pratio_ctl); /* PRatio */ + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_request_interval), 24000); /* 1ms, unit=1/24MHz */ + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_decision_threshold), 1200000); /* 50ms */ + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_busy_clamp_threshold), 1200000);/* 50ms */ + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_idle_clamp_threshold), 1200000);/* 50ms */ + dev_dbg(adev->dev, "%s: configure vpe dpm pratio done!\n", __func__); + } else { + dev_dbg(adev->dev, "%s: invalid pratio parameters!\n", __func__); + goto disable_dpm; + } + } + return 0; + +disable_dpm: + dpm_ctl = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable)); + dpm_ctl &= 0xfffffffe; /* Disable DPM */ + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl); + dev_dbg(adev->dev, "%s: disable vpe dpm\n", __func__); + return 0; +} + int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev) { struct amdgpu_firmware_info ucode = { @@ -134,6 +313,19 @@ static int vpe_early_init(void *handle) return 0; } +static void vpe_idle_work_handler(struct work_struct *work) +{ + struct amdgpu_device *adev = + container_of(work, struct amdgpu_device, vpe.idle_work.work); + unsigned int fences = 0; + + fences += amdgpu_fence_count_emitted(&adev->vpe.ring); + + if (fences == 0) + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE); + else + schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT); +} static int vpe_common_init(struct amdgpu_vpe *vpe) { @@ -150,6 +342,9 @@ static int vpe_common_init(struct amdgpu_vpe *vpe) return r; } + vpe->context_started = false; + INIT_DELAYED_WORK(&adev->vpe.idle_work, vpe_idle_work_handler); + return 0; } @@ -219,6 +414,9 @@ static int vpe_hw_fini(void *handle) vpe_ring_stop(vpe); + /* Power off VPE */ + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE); + return 0; } @@ -226,6 +424,8 @@ static int vpe_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + cancel_delayed_work_sync(&adev->vpe.idle_work); + return vpe_hw_fini(adev); } @@ -430,6 +630,21 @@ static int vpe_set_clockgating_state(void *handle, static int vpe_set_powergating_state(void *handle, enum amd_powergating_state state) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_vpe *vpe = &adev->vpe; + + if (!adev->pm.dpm_enabled) + dev_err(adev->dev, "Without PM, cannot support powergating\n"); + + dev_dbg(adev->dev, "%s: %s!\n", __func__, (state == AMD_PG_STATE_GATE) ? "GATE":"UNGATE"); + + if (state == AMD_PG_STATE_GATE) { + amdgpu_dpm_enable_vpe(adev, false); + vpe->context_started = false; + } else { + amdgpu_dpm_enable_vpe(adev, true); + } + return 0; } @@ -595,6 +810,38 @@ err0: return ret; } +static void vpe_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vpe *vpe = &adev->vpe; + + cancel_delayed_work_sync(&adev->vpe.idle_work); + + /* Power on VPE and notify VPE of new context */ + if (!vpe->context_started) { + uint32_t context_notify; + + /* Power on VPE */ + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_UNGATE); + + /* Indicates that a job from a new context has been submitted. */ + context_notify = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.context_indicator)); + if ((context_notify & 0x1) == 0) + context_notify |= 0x1; + else + context_notify &= ~(0x1); + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.context_indicator), context_notify); + vpe->context_started = true; + } +} + +static void vpe_ring_end_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT); +} + static const struct amdgpu_ring_funcs vpe_ring_funcs = { .type = AMDGPU_RING_TYPE_VPE, .align_mask = 0xf, @@ -625,6 +872,8 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = { .init_cond_exec = vpe_ring_init_cond_exec, .patch_cond_exec = vpe_ring_patch_cond_exec, .preempt_ib = vpe_ring_preempt_ib, + .begin_use = vpe_ring_begin_use, + .end_use = vpe_ring_end_use, }; static void vpe_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h index 29d56f7ae4a9..1153ddaea64d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h @@ -47,6 +47,15 @@ struct vpe_regs { uint32_t queue0_rb_wptr_lo; uint32_t queue0_rb_wptr_hi; uint32_t queue0_preempt; + + uint32_t dpm_enable; + uint32_t dpm_pratio; + uint32_t dpm_request_interval; + uint32_t dpm_decision_threshold; + uint32_t dpm_busy_clamp_threshold; + uint32_t dpm_idle_clamp_threshold; + uint32_t dpm_request_lv; + uint32_t context_indicator; }; struct amdgpu_vpe { @@ -63,12 +72,15 @@ struct amdgpu_vpe { struct amdgpu_bo *cmdbuf_obj; uint64_t cmdbuf_gpu_addr; uint32_t *cmdbuf_cpu_addr; + struct delayed_work idle_work; + bool context_started; }; int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev); int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe); int amdgpu_vpe_ring_init(struct amdgpu_vpe *vpe); int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe); +int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe); #define vpe_ring_init(vpe) ((vpe)->funcs->ring_init ? (vpe)->funcs->ring_init((vpe)) : 0) #define vpe_ring_start(vpe) ((vpe)->funcs->ring_start ? (vpe)->funcs->ring_start((vpe)) : 0) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index bd20cb3b9819..a6c88f2fe6e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -413,6 +413,38 @@ static ssize_t amdgpu_xgmi_show_num_links(struct device *dev, return sysfs_emit(buf, "%s\n", buf); } +static ssize_t amdgpu_xgmi_show_connected_port_num(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; + int i, j, size = 0; + int current_node; + /* + * get the node id in the sysfs for the current socket and show + * it in the port num info output in the sysfs for easy reading. + * it is NOT the one retrieved from xgmi ta. + */ + for (i = 0; i < top->num_nodes; i++) { + if (top->nodes[i].node_id == adev->gmc.xgmi.node_id) { + current_node = i; + break; + } + } + + for (i = 0; i < top->num_nodes; i++) { + for (j = 0; j < top->nodes[i].num_links; j++) + /* node id in sysfs starts from 1 rather than 0 so +1 here */ + size += sysfs_emit_at(buf, size, "%02x:%02x -> %02x:%02x\n", current_node + 1, + top->nodes[i].port_num[j].src_xgmi_port_num, i + 1, + top->nodes[i].port_num[j].dst_xgmi_port_num); + } + + return size; +} + #define AMDGPU_XGMI_SET_FICAA(o) ((o) | 0x456801) static ssize_t amdgpu_xgmi_show_error(struct device *dev, struct device_attribute *attr, @@ -452,6 +484,7 @@ static DEVICE_ATTR(xgmi_physical_id, 0444, amdgpu_xgmi_show_physical_id, NULL); static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL); static DEVICE_ATTR(xgmi_num_hops, S_IRUGO, amdgpu_xgmi_show_num_hops, NULL); static DEVICE_ATTR(xgmi_num_links, S_IRUGO, amdgpu_xgmi_show_num_links, NULL); +static DEVICE_ATTR(xgmi_port_num, S_IRUGO, amdgpu_xgmi_show_connected_port_num, NULL); static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, struct amdgpu_hive_info *hive) @@ -487,6 +520,13 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, if (ret) pr_err("failed to create xgmi_num_links\n"); + /* Create xgmi port num file if supported */ + if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) { + ret = device_create_file(adev->dev, &dev_attr_xgmi_port_num); + if (ret) + dev_err(adev->dev, "failed to create xgmi_port_num\n"); + } + /* Create sysfs link to hive info folder on the first device */ if (hive->kobj.parent != (&adev->dev->kobj)) { ret = sysfs_create_link(&adev->dev->kobj, &hive->kobj, @@ -517,6 +557,8 @@ remove_file: device_remove_file(adev->dev, &dev_attr_xgmi_error); device_remove_file(adev->dev, &dev_attr_xgmi_num_hops); device_remove_file(adev->dev, &dev_attr_xgmi_num_links); + if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) + device_remove_file(adev->dev, &dev_attr_xgmi_port_num); success: return ret; @@ -533,6 +575,8 @@ static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev, device_remove_file(adev->dev, &dev_attr_xgmi_error); device_remove_file(adev->dev, &dev_attr_xgmi_num_hops); device_remove_file(adev->dev, &dev_attr_xgmi_num_links); + if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) + device_remove_file(adev->dev, &dev_attr_xgmi_port_num); if (hive->kobj.parent != (&adev->dev->kobj)) sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info"); @@ -779,6 +823,28 @@ static int amdgpu_xgmi_initialize_hive_get_data_partition(struct amdgpu_hive_inf return 0; } +static void amdgpu_xgmi_fill_topology_info(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev) +{ + struct psp_xgmi_topology_info *top_info = &adev->psp.xgmi_context.top_info; + struct psp_xgmi_topology_info *peer_info = &peer_adev->psp.xgmi_context.top_info; + + for (int i = 0; i < peer_info->num_nodes; i++) { + if (peer_info->nodes[i].node_id == adev->gmc.xgmi.node_id) { + for (int j = 0; j < top_info->num_nodes; j++) { + if (top_info->nodes[j].node_id == peer_adev->gmc.xgmi.node_id) { + peer_info->nodes[i].num_hops = top_info->nodes[j].num_hops; + peer_info->nodes[i].is_sharing_enabled = + top_info->nodes[j].is_sharing_enabled; + peer_info->nodes[i].num_links = + top_info->nodes[j].num_links; + return; + } + } + } + } +} + int amdgpu_xgmi_add_device(struct amdgpu_device *adev) { struct psp_xgmi_topology_info *top_info; @@ -853,18 +919,38 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) goto exit_unlock; } - /* get latest topology info for each device from psp */ - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, - &tmp_adev->psp.xgmi_context.top_info, false); + if (amdgpu_sriov_vf(adev) && + adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) { + /* only get topology for VF being init if it can support full duplex */ + ret = psp_xgmi_get_topology_info(&adev->psp, count, + &adev->psp.xgmi_context.top_info, false); if (ret) { - dev_err(tmp_adev->dev, + dev_err(adev->dev, "XGMI: Get topology failure on device %llx, hive %llx, ret %d", - tmp_adev->gmc.xgmi.node_id, - tmp_adev->gmc.xgmi.hive_id, ret); - /* To do : continue with some node failed or disable the whole hive */ + adev->gmc.xgmi.node_id, + adev->gmc.xgmi.hive_id, ret); + /* To do: continue with some node failed or disable the whole hive*/ goto exit_unlock; } + + /* fill the topology info for peers instead of getting from PSP */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + amdgpu_xgmi_fill_topology_info(adev, tmp_adev); + } + } else { + /* get latest topology info for each device from psp */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, + &tmp_adev->psp.xgmi_context.top_info, false); + if (ret) { + dev_err(tmp_adev->dev, + "XGMI: Get topology failure on device %llx, hive %llx, ret %d", + tmp_adev->gmc.xgmi.node_id, + tmp_adev->gmc.xgmi.hive_id, ret); + /* To do : continue with some node failed or disable the whole hive */ + goto exit_unlock; + } + } } /* get topology again for hives that support extended data */ @@ -1227,10 +1313,10 @@ static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct a switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) { case AMDGPU_MCA_ERROR_TYPE_UE: - amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, 1ULL); + amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, NULL, 1ULL); break; case AMDGPU_MCA_ERROR_TYPE_CE: - amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, 1ULL); + amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, NULL, 1ULL); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 3f715e7fe1a9..d6f808acfb17 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -24,6 +24,7 @@ #include "soc15.h" #include "soc15_common.h" +#include "amdgpu_reg_state.h" #include "amdgpu_xcp.h" #include "gfx_v9_4_3.h" #include "gfxhub_v1_2.h" @@ -656,3 +657,416 @@ int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev) return 0; } + +static void aqua_read_smn(struct amdgpu_device *adev, + struct amdgpu_smn_reg_data *regdata, + uint64_t smn_addr) +{ + regdata->addr = smn_addr; + regdata->value = RREG32_PCIE(smn_addr); +} + +struct aqua_reg_list { + uint64_t start_addr; + uint32_t num_regs; + uint32_t incrx; +}; + +#define DW_ADDR_INCR 4 + +static void aqua_read_smn_ext(struct amdgpu_device *adev, + struct amdgpu_smn_reg_data *regdata, + uint64_t smn_addr, int i) +{ + regdata->addr = + smn_addr + adev->asic_funcs->encode_ext_smn_addressing(i); + regdata->value = RREG32_PCIE_EXT(regdata->addr); +} + +#define smnreg_0x1A340218 0x1A340218 +#define smnreg_0x1A3402E4 0x1A3402E4 +#define smnreg_0x1A340294 0x1A340294 +#define smreg_0x1A380088 0x1A380088 + +#define NUM_PCIE_SMN_REGS 14 + +static struct aqua_reg_list pcie_reg_addrs[] = { + { smnreg_0x1A340218, 1, 0 }, + { smnreg_0x1A3402E4, 1, 0 }, + { smnreg_0x1A340294, 6, DW_ADDR_INCR }, + { smreg_0x1A380088, 6, DW_ADDR_INCR }, +}; + +static ssize_t aqua_vanjaram_read_pcie_state(struct amdgpu_device *adev, + void *buf, size_t max_size) +{ + struct amdgpu_reg_state_pcie_v1_0 *pcie_reg_state; + uint32_t start_addr, incrx, num_regs, szbuf; + struct amdgpu_regs_pcie_v1_0 *pcie_regs; + struct amdgpu_smn_reg_data *reg_data; + struct pci_dev *us_pdev, *ds_pdev; + int aer_cap, r, n; + + if (!buf || !max_size) + return -EINVAL; + + pcie_reg_state = (struct amdgpu_reg_state_pcie_v1_0 *)buf; + + szbuf = sizeof(*pcie_reg_state) + + amdgpu_reginst_size(1, sizeof(*pcie_regs), NUM_PCIE_SMN_REGS); + /* Only one instance of pcie regs */ + if (max_size < szbuf) + return -EOVERFLOW; + + pcie_regs = (struct amdgpu_regs_pcie_v1_0 *)((uint8_t *)buf + + sizeof(*pcie_reg_state)); + pcie_regs->inst_header.instance = 0; + pcie_regs->inst_header.state = AMDGPU_INST_S_OK; + pcie_regs->inst_header.num_smn_regs = NUM_PCIE_SMN_REGS; + + reg_data = pcie_regs->smn_reg_values; + + for (r = 0; r < ARRAY_SIZE(pcie_reg_addrs); r++) { + start_addr = pcie_reg_addrs[r].start_addr; + incrx = pcie_reg_addrs[r].incrx; + num_regs = pcie_reg_addrs[r].num_regs; + for (n = 0; n < num_regs; n++) { + aqua_read_smn(adev, reg_data, start_addr + n * incrx); + ++reg_data; + } + } + + ds_pdev = pci_upstream_bridge(adev->pdev); + us_pdev = pci_upstream_bridge(ds_pdev); + + pcie_capability_read_word(us_pdev, PCI_EXP_DEVSTA, + &pcie_regs->device_status); + pcie_capability_read_word(us_pdev, PCI_EXP_LNKSTA, + &pcie_regs->link_status); + + aer_cap = pci_find_ext_capability(us_pdev, PCI_EXT_CAP_ID_ERR); + if (aer_cap) { + pci_read_config_dword(us_pdev, aer_cap + PCI_ERR_COR_STATUS, + &pcie_regs->pcie_corr_err_status); + pci_read_config_dword(us_pdev, aer_cap + PCI_ERR_UNCOR_STATUS, + &pcie_regs->pcie_uncorr_err_status); + } + + pci_read_config_dword(us_pdev, PCI_PRIMARY_BUS, + &pcie_regs->sub_bus_number_latency); + + pcie_reg_state->common_header.structure_size = szbuf; + pcie_reg_state->common_header.format_revision = 1; + pcie_reg_state->common_header.content_revision = 0; + pcie_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_PCIE; + pcie_reg_state->common_header.num_instances = 1; + + return pcie_reg_state->common_header.structure_size; +} + +#define smnreg_0x11A00050 0x11A00050 +#define smnreg_0x11A00180 0x11A00180 +#define smnreg_0x11A00070 0x11A00070 +#define smnreg_0x11A00200 0x11A00200 +#define smnreg_0x11A0020C 0x11A0020C +#define smnreg_0x11A00210 0x11A00210 +#define smnreg_0x11A00108 0x11A00108 + +#define XGMI_LINK_REG(smnreg, l) ((smnreg) | (l << 20)) + +#define NUM_XGMI_SMN_REGS 25 + +static struct aqua_reg_list xgmi_reg_addrs[] = { + { smnreg_0x11A00050, 1, 0 }, + { smnreg_0x11A00180, 16, DW_ADDR_INCR }, + { smnreg_0x11A00070, 4, DW_ADDR_INCR }, + { smnreg_0x11A00200, 1, 0 }, + { smnreg_0x11A0020C, 1, 0 }, + { smnreg_0x11A00210, 1, 0 }, + { smnreg_0x11A00108, 1, 0 }, +}; + +static ssize_t aqua_vanjaram_read_xgmi_state(struct amdgpu_device *adev, + void *buf, size_t max_size) +{ + struct amdgpu_reg_state_xgmi_v1_0 *xgmi_reg_state; + uint32_t start_addr, incrx, num_regs, szbuf; + struct amdgpu_regs_xgmi_v1_0 *xgmi_regs; + struct amdgpu_smn_reg_data *reg_data; + const int max_xgmi_instances = 8; + int inst = 0, i, j, r, n; + const int xgmi_inst = 2; + void *p; + + if (!buf || !max_size) + return -EINVAL; + + xgmi_reg_state = (struct amdgpu_reg_state_xgmi_v1_0 *)buf; + + szbuf = sizeof(*xgmi_reg_state) + + amdgpu_reginst_size(max_xgmi_instances, sizeof(*xgmi_regs), + NUM_XGMI_SMN_REGS); + /* Only one instance of pcie regs */ + if (max_size < szbuf) + return -EOVERFLOW; + + p = &xgmi_reg_state->xgmi_state_regs[0]; + for_each_inst(i, adev->aid_mask) { + for (j = 0; j < xgmi_inst; ++j) { + xgmi_regs = (struct amdgpu_regs_xgmi_v1_0 *)p; + xgmi_regs->inst_header.instance = inst++; + + xgmi_regs->inst_header.state = AMDGPU_INST_S_OK; + xgmi_regs->inst_header.num_smn_regs = NUM_XGMI_SMN_REGS; + + reg_data = xgmi_regs->smn_reg_values; + + for (r = 0; r < ARRAY_SIZE(xgmi_reg_addrs); r++) { + start_addr = xgmi_reg_addrs[r].start_addr; + incrx = xgmi_reg_addrs[r].incrx; + num_regs = xgmi_reg_addrs[r].num_regs; + + for (n = 0; n < num_regs; n++) { + aqua_read_smn_ext( + adev, reg_data, + XGMI_LINK_REG(start_addr, j) + + n * incrx, + i); + ++reg_data; + } + } + p = reg_data; + } + } + + xgmi_reg_state->common_header.structure_size = szbuf; + xgmi_reg_state->common_header.format_revision = 1; + xgmi_reg_state->common_header.content_revision = 0; + xgmi_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_XGMI; + xgmi_reg_state->common_header.num_instances = max_xgmi_instances; + + return xgmi_reg_state->common_header.structure_size; +} + +#define smnreg_0x11C00070 0x11C00070 +#define smnreg_0x11C00210 0x11C00210 + +static struct aqua_reg_list wafl_reg_addrs[] = { + { smnreg_0x11C00070, 4, DW_ADDR_INCR }, + { smnreg_0x11C00210, 1, 0 }, +}; + +#define WAFL_LINK_REG(smnreg, l) ((smnreg) | (l << 20)) + +#define NUM_WAFL_SMN_REGS 5 + +static ssize_t aqua_vanjaram_read_wafl_state(struct amdgpu_device *adev, + void *buf, size_t max_size) +{ + struct amdgpu_reg_state_wafl_v1_0 *wafl_reg_state; + uint32_t start_addr, incrx, num_regs, szbuf; + struct amdgpu_regs_wafl_v1_0 *wafl_regs; + struct amdgpu_smn_reg_data *reg_data; + const int max_wafl_instances = 8; + int inst = 0, i, j, r, n; + const int wafl_inst = 2; + void *p; + + if (!buf || !max_size) + return -EINVAL; + + wafl_reg_state = (struct amdgpu_reg_state_wafl_v1_0 *)buf; + + szbuf = sizeof(*wafl_reg_state) + + amdgpu_reginst_size(max_wafl_instances, sizeof(*wafl_regs), + NUM_WAFL_SMN_REGS); + + if (max_size < szbuf) + return -EOVERFLOW; + + p = &wafl_reg_state->wafl_state_regs[0]; + for_each_inst(i, adev->aid_mask) { + for (j = 0; j < wafl_inst; ++j) { + wafl_regs = (struct amdgpu_regs_wafl_v1_0 *)p; + wafl_regs->inst_header.instance = inst++; + + wafl_regs->inst_header.state = AMDGPU_INST_S_OK; + wafl_regs->inst_header.num_smn_regs = NUM_WAFL_SMN_REGS; + + reg_data = wafl_regs->smn_reg_values; + + for (r = 0; r < ARRAY_SIZE(wafl_reg_addrs); r++) { + start_addr = wafl_reg_addrs[r].start_addr; + incrx = wafl_reg_addrs[r].incrx; + num_regs = wafl_reg_addrs[r].num_regs; + for (n = 0; n < num_regs; n++) { + aqua_read_smn_ext( + adev, reg_data, + WAFL_LINK_REG(start_addr, j) + + n * incrx, + i); + ++reg_data; + } + } + p = reg_data; + } + } + + wafl_reg_state->common_header.structure_size = szbuf; + wafl_reg_state->common_header.format_revision = 1; + wafl_reg_state->common_header.content_revision = 0; + wafl_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_WAFL; + wafl_reg_state->common_header.num_instances = max_wafl_instances; + + return wafl_reg_state->common_header.structure_size; +} + +#define smnreg_0x1B311060 0x1B311060 +#define smnreg_0x1B411060 0x1B411060 +#define smnreg_0x1B511060 0x1B511060 +#define smnreg_0x1B611060 0x1B611060 + +#define smnreg_0x1C307120 0x1C307120 +#define smnreg_0x1C317120 0x1C317120 + +#define smnreg_0x1C320830 0x1C320830 +#define smnreg_0x1C380830 0x1C380830 +#define smnreg_0x1C3D0830 0x1C3D0830 +#define smnreg_0x1C420830 0x1C420830 + +#define smnreg_0x1C320100 0x1C320100 +#define smnreg_0x1C380100 0x1C380100 +#define smnreg_0x1C3D0100 0x1C3D0100 +#define smnreg_0x1C420100 0x1C420100 + +#define smnreg_0x1B310500 0x1B310500 +#define smnreg_0x1C300400 0x1C300400 + +#define USR_CAKE_INCR 0x11000 +#define USR_LINK_INCR 0x100000 +#define USR_CP_INCR 0x10000 + +#define NUM_USR_SMN_REGS 20 + +struct aqua_reg_list usr_reg_addrs[] = { + { smnreg_0x1B311060, 4, DW_ADDR_INCR }, + { smnreg_0x1B411060, 4, DW_ADDR_INCR }, + { smnreg_0x1B511060, 4, DW_ADDR_INCR }, + { smnreg_0x1B611060, 4, DW_ADDR_INCR }, + { smnreg_0x1C307120, 2, DW_ADDR_INCR }, + { smnreg_0x1C317120, 2, DW_ADDR_INCR }, +}; + +#define NUM_USR1_SMN_REGS 46 +struct aqua_reg_list usr1_reg_addrs[] = { + { smnreg_0x1C320830, 6, USR_CAKE_INCR }, + { smnreg_0x1C380830, 5, USR_CAKE_INCR }, + { smnreg_0x1C3D0830, 5, USR_CAKE_INCR }, + { smnreg_0x1C420830, 4, USR_CAKE_INCR }, + { smnreg_0x1C320100, 6, USR_CAKE_INCR }, + { smnreg_0x1C380100, 5, USR_CAKE_INCR }, + { smnreg_0x1C3D0100, 5, USR_CAKE_INCR }, + { smnreg_0x1C420100, 4, USR_CAKE_INCR }, + { smnreg_0x1B310500, 4, USR_LINK_INCR }, + { smnreg_0x1C300400, 2, USR_CP_INCR }, +}; + +static ssize_t aqua_vanjaram_read_usr_state(struct amdgpu_device *adev, + void *buf, size_t max_size, + int reg_state) +{ + uint32_t start_addr, incrx, num_regs, szbuf, num_smn; + struct amdgpu_reg_state_usr_v1_0 *usr_reg_state; + struct amdgpu_regs_usr_v1_0 *usr_regs; + struct amdgpu_smn_reg_data *reg_data; + const int max_usr_instances = 4; + struct aqua_reg_list *reg_addrs; + int inst = 0, i, n, r, arr_size; + void *p; + + if (!buf || !max_size) + return -EINVAL; + + switch (reg_state) { + case AMDGPU_REG_STATE_TYPE_USR: + arr_size = ARRAY_SIZE(usr_reg_addrs); + reg_addrs = usr_reg_addrs; + num_smn = NUM_USR_SMN_REGS; + break; + case AMDGPU_REG_STATE_TYPE_USR_1: + arr_size = ARRAY_SIZE(usr1_reg_addrs); + reg_addrs = usr1_reg_addrs; + num_smn = NUM_USR1_SMN_REGS; + break; + default: + return -EINVAL; + } + + usr_reg_state = (struct amdgpu_reg_state_usr_v1_0 *)buf; + + szbuf = sizeof(*usr_reg_state) + amdgpu_reginst_size(max_usr_instances, + sizeof(*usr_regs), + num_smn); + if (max_size < szbuf) + return -EOVERFLOW; + + p = &usr_reg_state->usr_state_regs[0]; + for_each_inst(i, adev->aid_mask) { + usr_regs = (struct amdgpu_regs_usr_v1_0 *)p; + usr_regs->inst_header.instance = inst++; + usr_regs->inst_header.state = AMDGPU_INST_S_OK; + usr_regs->inst_header.num_smn_regs = num_smn; + reg_data = usr_regs->smn_reg_values; + + for (r = 0; r < arr_size; r++) { + start_addr = reg_addrs[r].start_addr; + incrx = reg_addrs[r].incrx; + num_regs = reg_addrs[r].num_regs; + for (n = 0; n < num_regs; n++) { + aqua_read_smn_ext(adev, reg_data, + start_addr + n * incrx, i); + reg_data++; + } + } + p = reg_data; + } + + usr_reg_state->common_header.structure_size = szbuf; + usr_reg_state->common_header.format_revision = 1; + usr_reg_state->common_header.content_revision = 0; + usr_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_USR; + usr_reg_state->common_header.num_instances = max_usr_instances; + + return usr_reg_state->common_header.structure_size; +} + +ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev, + enum amdgpu_reg_state reg_state, void *buf, + size_t max_size) +{ + ssize_t size; + + switch (reg_state) { + case AMDGPU_REG_STATE_TYPE_PCIE: + size = aqua_vanjaram_read_pcie_state(adev, buf, max_size); + break; + case AMDGPU_REG_STATE_TYPE_XGMI: + size = aqua_vanjaram_read_xgmi_state(adev, buf, max_size); + break; + case AMDGPU_REG_STATE_TYPE_WAFL: + size = aqua_vanjaram_read_wafl_state(adev, buf, max_size); + break; + case AMDGPU_REG_STATE_TYPE_USR: + size = aqua_vanjaram_read_usr_state(adev, buf, max_size, + AMDGPU_REG_STATE_TYPE_USR); + break; + case AMDGPU_REG_STATE_TYPE_USR_1: + size = aqua_vanjaram_read_usr_state( + adev, buf, max_size, AMDGPU_REG_STATE_TYPE_USR_1); + break; + default: + return -EINVAL; + } + + return size; +} diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 2c221000782c..a33e890c70d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -395,7 +395,6 @@ static void atom_skip_src_int(atom_exec_context *ctx, uint8_t attr, int *ptr) (*ptr)++; return; } - return; } } diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 3ee219aa2891..7672abe6c140 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -28,6 +28,7 @@ #include <acpi/video.h> +#include <drm/drm_edid.h> #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "amdgpu_connectors.h" diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index bb666cb7522e..587ee632a3b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -21,6 +21,7 @@ * */ +#include <drm/drm_edid.h> #include <drm/drm_fourcc.h> #include <drm/drm_modeset_helper.h> #include <drm/drm_modeset_helper_vtables.h> diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 7af277f61cca..f22ec27365bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -21,6 +21,7 @@ * */ +#include <drm/drm_edid.h> #include <drm/drm_fourcc.h> #include <drm/drm_modeset_helper.h> #include <drm/drm_modeset_helper_vtables.h> diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 143efc37a17f..4dbe9b3259b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -23,6 +23,7 @@ #include <linux/pci.h> +#include <drm/drm_edid.h> #include <drm/drm_fourcc.h> #include <drm/drm_modeset_helper.h> #include <drm/drm_modeset_helper_vtables.h> diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index adeddfb7ff12..05bcce23385e 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -21,6 +21,7 @@ * */ +#include <drm/drm_edid.h> #include <drm/drm_fourcc.h> #include <drm/drm_modeset_helper.h> #include <drm/drm_modeset_helper_vtables.h> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index c8a3bf01743f..73f6d7e72c73 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6593,7 +6593,8 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); #endif tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, + prop->allow_tunneling); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); mqd->cp_hqd_pq_control = tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 8ed4a6fb147a..2fbcd9765980 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -67,6 +67,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin"); @@ -293,6 +294,9 @@ static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) { + if (amdgpu_sriov_vf(adev)) + return; + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): @@ -564,7 +568,11 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) } if (!amdgpu_sriov_vf(adev)) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) && + adev->pdev->revision == 0xCE) + snprintf(fw_name, sizeof(fw_name), "amdgpu/gc_11_0_0_rlc_1.bin"); + else + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); if (err) goto out; @@ -3839,7 +3847,8 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, + prop->allow_tunneling); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); mqd->cp_hqd_pq_control = tmp; @@ -4465,11 +4474,43 @@ static int gfx_v11_0_wait_for_idle(void *handle) return -ETIMEDOUT; } +static int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, + int req) +{ + u32 i, tmp, val; + + for (i = 0; i < adev->usec_timeout; i++) { + /* Request with MeId=2, PipeId=0 */ + tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); + tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4); + WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp); + + val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX); + if (req) { + if (val == tmp) + break; + } else { + tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, + REQUEST, 1); + + /* unlocked or locked by firmware */ + if (val != tmp) + break; + } + udelay(1); + } + + if (i >= adev->usec_timeout) + return -EINVAL; + + return 0; +} + static int gfx_v11_0_soft_reset(void *handle) { u32 grbm_soft_reset = 0; u32 tmp; - int i, j, k; + int r, i, j, k; struct amdgpu_device *adev = (struct amdgpu_device *)handle; tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); @@ -4509,6 +4550,13 @@ static int gfx_v11_0_soft_reset(void *handle) } } + /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ + r = gfx_v11_0_request_gfx_index_mutex(adev, 1); + if (r) { + DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); + return r; + } + WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe); // Read CP_VMID_RESET register three times. @@ -4517,6 +4565,13 @@ static int gfx_v11_0_soft_reset(void *handle) RREG32_SOC15(GC, 0, regCP_VMID_RESET); RREG32_SOC15(GC, 0, regCP_VMID_RESET); + /* release the gfx mutex */ + r = gfx_v11_0_request_gfx_index_mutex(adev, 0); + if (r) { + DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); + return r; + } + for (i = 0; i < adev->usec_timeout; i++) { if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) && !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE)) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 4a09cc0d8ce0..131cddbdda0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3828,8 +3828,8 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, /* the caller should make sure initialize value of * err_data->ue_count and err_data->ce_count */ - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); } static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev, @@ -3882,150 +3882,6 @@ static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev, mutex_unlock(&adev->grbm_idx_mutex); } -static void gfx_v9_4_3_inst_query_utc_err_status(struct amdgpu_device *adev, - int xcc_id) -{ - uint32_t data; - - data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS); - if (data) { - dev_warn(adev->dev, "GFX UTCL2 Mem Ecc Status: 0x%x!\n", data); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3); - } - - data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS); - if (data) { - dev_warn(adev->dev, "GFX VML2 Mem Ecc Status: 0x%x!\n", data); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3); - } - - data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), - regVML2_WALKER_MEM_ECC_STATUS); - if (data) { - dev_warn(adev->dev, "GFX VML2 Walker Mem Ecc Status: 0x%x!\n", data); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, - 0x3); - } -} - -static void gfx_v9_4_3_log_cu_timeout_status(struct amdgpu_device *adev, - uint32_t status, int xcc_id) -{ - struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; - uint32_t i, simd, wave; - uint32_t wave_status; - uint32_t wave_pc_lo, wave_pc_hi; - uint32_t wave_exec_lo, wave_exec_hi; - uint32_t wave_inst_dw0, wave_inst_dw1; - uint32_t wave_ib_sts; - - for (i = 0; i < 32; i++) { - if (!((i << 1) & status)) - continue; - - simd = i / cu_info->max_waves_per_simd; - wave = i % cu_info->max_waves_per_simd; - - wave_status = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_STATUS); - wave_pc_lo = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_LO); - wave_pc_hi = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_HI); - wave_exec_lo = - wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_LO); - wave_exec_hi = - wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_HI); - wave_inst_dw0 = - wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW0); - wave_inst_dw1 = - wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW1); - wave_ib_sts = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_STS); - - dev_info( - adev->dev, - "\t SIMD %d, Wave %d: status 0x%x, pc 0x%llx, exec 0x%llx, inst 0x%llx, ib_sts 0x%x\n", - simd, wave, wave_status, - ((uint64_t)wave_pc_hi << 32 | wave_pc_lo), - ((uint64_t)wave_exec_hi << 32 | wave_exec_lo), - ((uint64_t)wave_inst_dw1 << 32 | wave_inst_dw0), - wave_ib_sts); - } -} - -static void gfx_v9_4_3_inst_query_sq_timeout_status(struct amdgpu_device *adev, - int xcc_id) -{ - uint32_t se_idx, sh_idx, cu_idx; - uint32_t status; - - mutex_lock(&adev->grbm_idx_mutex); - for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) { - for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) { - for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) { - gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx, - cu_idx, xcc_id); - status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), - regSQ_TIMEOUT_STATUS); - if (status != 0) { - dev_info( - adev->dev, - "GFX Watchdog Timeout: SE %d, SH %d, CU %d\n", - se_idx, sh_idx, cu_idx); - gfx_v9_4_3_log_cu_timeout_status( - adev, status, xcc_id); - } - /* clear old status */ - WREG32_SOC15(GC, GET_INST(GC, xcc_id), - regSQ_TIMEOUT_STATUS, 0); - } - } - } - gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, - xcc_id); - mutex_unlock(&adev->grbm_idx_mutex); -} - -static void gfx_v9_4_3_inst_query_ras_err_status(struct amdgpu_device *adev, - void *ras_error_status, int xcc_id) -{ - gfx_v9_4_3_inst_query_utc_err_status(adev, xcc_id); - gfx_v9_4_3_inst_query_sq_timeout_status(adev, xcc_id); -} - -static void gfx_v9_4_3_inst_reset_utc_err_status(struct amdgpu_device *adev, - int xcc_id) -{ - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, 0x3); -} - -static void gfx_v9_4_3_inst_reset_sq_timeout_status(struct amdgpu_device *adev, - int xcc_id) -{ - uint32_t se_idx, sh_idx, cu_idx; - - mutex_lock(&adev->grbm_idx_mutex); - for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) { - for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) { - for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) { - gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx, - cu_idx, xcc_id); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), - regSQ_TIMEOUT_STATUS, 0); - } - } - } - gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, - xcc_id); - mutex_unlock(&adev->grbm_idx_mutex); -} - -static void gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev, - void *ras_error_status, int xcc_id) -{ - gfx_v9_4_3_inst_reset_utc_err_status(adev, xcc_id); - gfx_v9_4_3_inst_reset_sq_timeout_status(adev, xcc_id); -} - static void gfx_v9_4_3_inst_enable_watchdog_timer(struct amdgpu_device *adev, void *ras_error_status, int xcc_id) { @@ -4067,16 +3923,6 @@ static void gfx_v9_4_3_reset_ras_error_count(struct amdgpu_device *adev) amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_count); } -static void gfx_v9_4_3_query_ras_error_status(struct amdgpu_device *adev) -{ - amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_query_ras_err_status); -} - -static void gfx_v9_4_3_reset_ras_error_status(struct amdgpu_device *adev) -{ - amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_status); -} - static void gfx_v9_4_3_enable_watchdog_timer(struct amdgpu_device *adev) { amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_enable_watchdog_timer); @@ -4394,8 +4240,6 @@ struct amdgpu_xcp_ip_funcs gfx_v9_4_3_xcp_funcs = { struct amdgpu_ras_block_hw_ops gfx_v9_4_3_ras_ops = { .query_ras_error_count = &gfx_v9_4_3_query_ras_error_count, .reset_ras_error_count = &gfx_v9_4_3_reset_ras_error_count, - .query_ras_error_status = &gfx_v9_4_3_query_ras_error_status, - .reset_ras_error_status = &gfx_v9_4_3_reset_ras_error_status, }; struct amdgpu_gfx_ras gfx_v9_4_3_ras = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 53a2ba5fcf4b..22175da0e16a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -102,7 +102,9 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); - if (adev->apu_flags & AMD_APU_IS_RAVEN2) + if (adev->apu_flags & (AMD_APU_IS_RAVEN2 | + AMD_APU_IS_RENOIR | + AMD_APU_IS_GREEN_SARDINE)) /* * Raven2 has a HW issue that it is unable to use the * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index 55423ff1bb49..95d06da544e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -139,7 +139,9 @@ gfxhub_v1_2_xcc_init_system_aperture_regs(struct amdgpu_device *adev, WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); - if (adev->apu_flags & AMD_APU_IS_RAVEN2) + if (adev->apu_flags & (AMD_APU_IS_RAVEN2 | + AMD_APU_IS_RENOIR | + AMD_APU_IS_GREEN_SARDINE)) /* * Raven2 has a HW issue that it is unable to use the * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index a5a05c16c10d..6c5185608854 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -1041,6 +1041,10 @@ static int gmc_v10_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 23d7b548d13f..c9c653cfc765 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -941,6 +941,11 @@ static int gmc_v11_0_hw_fini(void *handle) } amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + gmc_v11_0_gart_disable(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 2ac5820e9c92..f9039d64ff2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -883,7 +883,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * GRBM interface. */ if ((vmhub == AMDGPU_GFXHUB(0)) && - (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2))) + (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2))) RREG32_NO_KIQ(req); for (j = 0; j < adev->usec_timeout; j++) { @@ -2380,6 +2380,10 @@ static int gmc_v9_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 4dfec56e1b7f..26d71a22395d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -408,6 +408,8 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; mes_set_hw_res_pkt.oversubscription_timer = 50; + mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; return mes_v11_0_submit_pkt_and_poll_completion(mes, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 843219a91736..e3ddd22aa172 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -96,7 +96,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); - if (adev->apu_flags & AMD_APU_IS_RAVEN2) + if (adev->apu_flags & (AMD_APU_IS_RAVEN2 | + AMD_APU_IS_RENOIR | + AMD_APU_IS_GREEN_SARDINE)) /* * Raven2 has a HW issue that it is unable to use the vram which * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 9b0146732e13..fb53aacdcba2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -652,8 +652,8 @@ static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev, AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, &ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); } static void mmhub_v1_8_query_ras_error_count(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 0f24af6f2810..2d688dca26be 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -2156,7 +2156,7 @@ static void sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device *adev, AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, &ue_count); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); } static void sdma_v4_4_2_query_ras_error_count(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 51342809af03..15033efec2ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -902,6 +902,7 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs = .pre_asic_init = &soc15_pre_asic_init, .query_video_codecs = &soc15_query_video_codecs, .encode_ext_smn_addressing = &aqua_vanjaram_encode_ext_smn_addressing, + .get_reg_state = &aqua_vanjaram_get_reg_state, }; static int soc15_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index eac54042c6c0..1444b7765e4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -27,6 +27,7 @@ #include "nbio_v6_1.h" #include "nbio_v7_0.h" #include "nbio_v7_4.h" +#include "amdgpu_reg_state.h" extern const struct amdgpu_ip_block_version vega10_common_ip_block; @@ -114,6 +115,9 @@ int aldebaran_reg_base_init(struct amdgpu_device *adev); void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev); u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id); int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev); +ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev, + enum amdgpu_reg_state reg_state, void *buf, + size_t max_size); void vega10_doorbell_index_init(struct amdgpu_device *adev); void vega20_doorbell_index_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index e9c2ff74f0bc..7458a218e89d 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -26,6 +26,7 @@ #include "amdgpu.h" #include "umc/umc_12_0_0_offset.h" #include "umc/umc_12_0_0_sh_mask.h" +#include "mp/mp_13_0_6_sh_mask.h" const uint32_t umc_v12_0_channel_idx_tbl[] @@ -88,16 +89,26 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev) umc_v12_0_reset_error_count_per_channel, NULL); } -bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status) +bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status) { + if (amdgpu_ras_is_poison_mode_supported(adev) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)) + return true; + return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)); } -bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status) +bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status) { + if (amdgpu_ras_is_poison_mode_supported(adev) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)) + return false; + return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 || (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 && @@ -105,7 +116,7 @@ bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status) /* Identify data parity error in replay mode */ ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0x5 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0xb) && - !(umc_v12_0_is_uncorrectable_error(mc_umc_status))))); + !(umc_v12_0_is_uncorrectable_error(adev, mc_umc_status))))); } static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev, @@ -124,7 +135,7 @@ static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev, mc_umc_status = RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4); - if (umc_v12_0_is_correctable_error(mc_umc_status)) + if (umc_v12_0_is_correctable_error(adev, mc_umc_status)) *error_count += 1; } @@ -142,7 +153,7 @@ static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev mc_umc_status = RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4); - if (umc_v12_0_is_uncorrectable_error(mc_umc_status)) + if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)) *error_count += 1; } @@ -166,8 +177,8 @@ static int umc_v12_0_query_error_count(struct amdgpu_device *adev, umc_v12_0_query_correctable_error_count(adev, umc_reg_offset, &ce_count); umc_v12_0_query_uncorrectable_error_count(adev, umc_reg_offset, &ue_count); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); return 0; } @@ -360,6 +371,59 @@ static int umc_v12_0_err_cnt_init_per_channel(struct amdgpu_device *adev, return 0; } +static void umc_v12_0_ecc_info_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_mca_smu_log_ras_error(adev, + AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_CE, ras_error_status); + amdgpu_mca_smu_log_ras_error(adev, + AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_UE, ras_error_status); +} + +static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_node *err_node; + uint64_t mc_umc_status; + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + for_each_ras_error(err_node, err_data) { + mc_umc_status = err_node->err_info.err_addr.err_status; + if (!mc_umc_status) + continue; + + if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)) { + uint64_t mca_addr, err_addr, mca_ipid; + uint32_t InstanceIdLo; + struct amdgpu_smuio_mcm_config_info *mcm_info; + + mcm_info = &err_node->err_info.mcm_info; + mca_addr = err_node->err_info.err_addr.err_addr; + mca_ipid = err_node->err_info.err_addr.err_ipid; + + err_addr = REG_GET_FIELD(mca_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + InstanceIdLo = REG_GET_FIELD(mca_ipid, MCMP1_IPIDT0, InstanceIdLo); + + dev_info(adev->dev, "UMC:IPID:0x%llx, aid:%d, inst:%d, ch:%d, err_addr:0x%llx\n", + mca_ipid, + mcm_info->die_id, + MCA_IPID_LO_2_UMC_INST(InstanceIdLo), + MCA_IPID_LO_2_UMC_CH(InstanceIdLo), + err_addr); + + umc_v12_0_convert_error_address(adev, + err_data, err_addr, + MCA_IPID_LO_2_UMC_CH(InstanceIdLo), + MCA_IPID_LO_2_UMC_INST(InstanceIdLo), + mcm_info->die_id); + + /* Clear umc error address content */ + memset(&err_node->err_info.err_addr, + 0, sizeof(err_node->err_info.err_addr)); + } + } +} + static void umc_v12_0_err_cnt_init(struct amdgpu_device *adev) { amdgpu_umc_loop_channels(adev, @@ -386,4 +450,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = { }, .err_cnt_init = umc_v12_0_err_cnt_init, .query_ras_poison_mode = umc_v12_0_query_ras_poison_mode, + .ecc_info_query_ras_error_count = umc_v12_0_ecc_info_query_ras_error_count, + .ecc_info_query_ras_error_address = umc_v12_0_ecc_info_query_ras_error_address, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index b34b1e358f8b..e8de3a92251a 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -117,8 +117,12 @@ (pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \ } while (0) -bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status); -bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status); +#define MCA_IPID_LO_2_UMC_CH(_ipid_lo) (((((_ipid_lo) >> 20) & 0x1) * 4) + \ + (((_ipid_lo) >> 12) & 0xF)) +#define MCA_IPID_LO_2_UMC_INST(_ipid_lo) (((_ipid_lo) >> 21) & 0x7) + +bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); +bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); extern const uint32_t umc_v12_0_channel_idx_tbl[] diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 48bfcd0d558b..169ed400ee7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -100,6 +100,31 @@ static int vcn_v4_0_early_init(void *handle) return amdgpu_vcn_early_init(adev); } +static int vcn_v4_0_fw_shared_init(struct amdgpu_device *adev, int inst_idx) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + + fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); + fw_shared->sq.is_enabled = 1; + + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG); + fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ? + AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU; + + if (amdgpu_ip_version(adev, VCN_HWIP, 0) == + IP_VERSION(4, 0, 2)) { + fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT; + fw_shared->drm_key_wa.method = + AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING; + } + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]); + + return 0; +} + /** * vcn_v4_0_sw_init - sw init for VCN block * @@ -124,8 +149,6 @@ static int vcn_v4_0_sw_init(void *handle) return r; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; - if (adev->vcn.harvest_config & (1 << i)) continue; @@ -161,23 +184,7 @@ static int vcn_v4_0_sw_init(void *handle) if (r) return r; - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); - fw_shared->sq.is_enabled = 1; - - fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG); - fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ? - AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU; - - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == - IP_VERSION(4, 0, 2)) { - fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT; - fw_shared->drm_key_wa.method = - AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING; - } - - if (amdgpu_vcnfw_log) - amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); + vcn_v4_0_fw_shared_init(adev, i); } if (amdgpu_sriov_vf(adev)) { @@ -1273,6 +1280,9 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; + // Must re/init fw_shared at beginning + vcn_v4_0_fw_shared_init(adev, i); + table_size = 0; MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i, diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c index 174f13eff575..d20060a51e05 100644 --- a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c @@ -96,6 +96,10 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) adev->vpe.cmdbuf_cpu_addr[1] = f32_cntl; amdgpu_vpe_psp_update_sram(adev); + + /* Config DPM */ + amdgpu_vpe_configure_dpm(vpe); + return 0; } @@ -128,6 +132,8 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) } vpe_v6_1_halt(vpe, false); + /* Config DPM */ + amdgpu_vpe_configure_dpm(vpe); return 0; } @@ -264,6 +270,15 @@ static int vpe_v6_1_set_regs(struct amdgpu_vpe *vpe) vpe->regs.queue0_rb_wptr_hi = regVPEC_QUEUE0_RB_WPTR_HI; vpe->regs.queue0_preempt = regVPEC_QUEUE0_PREEMPT; + vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2; + vpe->regs.dpm_pratio = regVPEC_QUEUE6_DUMMY4; + vpe->regs.dpm_request_interval = regVPEC_QUEUE5_DUMMY3; + vpe->regs.dpm_decision_threshold = regVPEC_QUEUE5_DUMMY4; + vpe->regs.dpm_busy_clamp_threshold = regVPEC_QUEUE7_DUMMY2; + vpe->regs.dpm_idle_clamp_threshold = regVPEC_QUEUE7_DUMMY3; + vpe->regs.dpm_request_lv = regVPEC_QUEUE7_DUMMY1; + vpe->regs.context_indicator = regVPEC_QUEUE6_DUMMY3; + return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index d7cd5fa313ff..df75863393fc 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -2069,7 +2069,7 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { }; static const uint32_t cwsr_trap_gfx10_hex[] = { - 0xbf820001, 0xbf820220, + 0xbf820001, 0xbf820221, 0xb0804004, 0xb978f802, 0x8a78ff78, 0x00020006, 0xb97bf803, 0x876eff78, @@ -2118,391 +2118,391 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xbf900004, 0xbf8cc07f, 0x877aff7f, 0x04000000, 0x8f7a857a, 0x886d7a6d, - 0xbefa037e, 0x877bff7f, - 0x0000ffff, 0xbefe03c1, - 0xbeff03c1, 0xdc5f8000, - 0x007a0000, 0x7e000280, - 0xbefe037a, 0xbeff037b, - 0xb97b02dc, 0x8f7b997b, - 0xb97a3a05, 0x807a817a, - 0xbf0d997b, 0xbf850002, - 0x8f7a897a, 0xbf820001, - 0x8f7a8a7a, 0xb97b1e06, - 0x8f7b8a7b, 0x807a7b7a, + 0x7e008200, 0xbefa037e, 0x877bff7f, 0x0000ffff, - 0x807aff7a, 0x00000200, - 0x807a7e7a, 0x827b807b, - 0xd7610000, 0x00010870, - 0xd7610000, 0x00010a71, - 0xd7610000, 0x00010c72, - 0xd7610000, 0x00010e73, - 0xd7610000, 0x00011074, - 0xd7610000, 0x00011275, - 0xd7610000, 0x00011476, - 0xd7610000, 0x00011677, - 0xd7610000, 0x00011a79, - 0xd7610000, 0x00011c7e, - 0xd7610000, 0x00011e7f, - 0xbefe03ff, 0x00003fff, - 0xbeff0380, 0xdc5f8040, - 0x007a0000, 0xd760007a, - 0x00011d00, 0xd760007b, - 0x00011f00, 0xbefe037a, - 0xbeff037b, 0xbef4037e, - 0x8775ff7f, 0x0000ffff, - 0x8875ff75, 0x00040000, - 0xbef60380, 0xbef703ff, - 0x10807fac, 0xbef1037c, - 0xbef00380, 0xb97302dc, - 0x8f739973, 0xbefe03c1, - 0x907c9973, 0x877c817c, - 0xbf06817c, 0xbf850002, - 0xbeff0380, 0xbf820002, - 0xbeff03c1, 0xbf820009, + 0xbefe03c1, 0xbeff03c1, + 0xdc5f8000, 0x007a0000, + 0x7e000280, 0xbefe037a, + 0xbeff037b, 0xb97b02dc, + 0x8f7b997b, 0xb97a3a05, + 0x807a817a, 0xbf0d997b, + 0xbf850002, 0x8f7a897a, + 0xbf820001, 0x8f7a8a7a, + 0xb97b1e06, 0x8f7b8a7b, + 0x807a7b7a, 0x877bff7f, + 0x0000ffff, 0x807aff7a, + 0x00000200, 0x807a7e7a, + 0x827b807b, 0xd7610000, + 0x00010870, 0xd7610000, + 0x00010a71, 0xd7610000, + 0x00010c72, 0xd7610000, + 0x00010e73, 0xd7610000, + 0x00011074, 0xd7610000, + 0x00011275, 0xd7610000, + 0x00011476, 0xd7610000, + 0x00011677, 0xd7610000, + 0x00011a79, 0xd7610000, + 0x00011c7e, 0xd7610000, + 0x00011e7f, 0xbefe03ff, + 0x00003fff, 0xbeff0380, + 0xdc5f8040, 0x007a0000, + 0xd760007a, 0x00011d00, + 0xd760007b, 0x00011f00, + 0xbefe037a, 0xbeff037b, + 0xbef4037e, 0x8775ff7f, + 0x0000ffff, 0x8875ff75, + 0x00040000, 0xbef60380, + 0xbef703ff, 0x10807fac, + 0xbef1037c, 0xbef00380, + 0xb97302dc, 0x8f739973, + 0xbefe03c1, 0x907c9973, + 0x877c817c, 0xbf06817c, + 0xbf850002, 0xbeff0380, + 0xbf820002, 0xbeff03c1, + 0xbf820009, 0xbef603ff, + 0x01000000, 0xe0704080, + 0x705d0100, 0xe0704100, + 0x705d0200, 0xe0704180, + 0x705d0300, 0xbf820008, 0xbef603ff, 0x01000000, - 0xe0704080, 0x705d0100, - 0xe0704100, 0x705d0200, - 0xe0704180, 0x705d0300, - 0xbf820008, 0xbef603ff, - 0x01000000, 0xe0704100, - 0x705d0100, 0xe0704200, - 0x705d0200, 0xe0704300, - 0x705d0300, 0xb9703a05, - 0x80708170, 0xbf0d9973, - 0xbf850002, 0x8f708970, - 0xbf820001, 0x8f708a70, - 0xb97a1e06, 0x8f7a8a7a, - 0x80707a70, 0x8070ff70, - 0x00000200, 0xbef603ff, - 0x01000000, 0x7e000280, - 0x7e020280, 0x7e040280, - 0xbefc0380, 0xd7610002, - 0x0000f871, 0x807c817c, - 0xd7610002, 0x0000f86c, - 0x807c817c, 0x8a7aff6d, - 0x80000000, 0xd7610002, - 0x0000f87a, 0x807c817c, - 0xd7610002, 0x0000f86e, - 0x807c817c, 0xd7610002, - 0x0000f86f, 0x807c817c, - 0xd7610002, 0x0000f878, - 0x807c817c, 0xb97af803, - 0xd7610002, 0x0000f87a, - 0x807c817c, 0xd7610002, - 0x0000f87b, 0x807c817c, - 0xb971f801, 0xd7610002, - 0x0000f871, 0x807c817c, - 0xb971f814, 0xd7610002, - 0x0000f871, 0x807c817c, - 0xb971f815, 0xd7610002, - 0x0000f871, 0x807c817c, - 0xbefe03ff, 0x0000ffff, - 0xbeff0380, 0xe0704000, - 0x705d0200, 0xbefe03c1, + 0xe0704100, 0x705d0100, + 0xe0704200, 0x705d0200, + 0xe0704300, 0x705d0300, 0xb9703a05, 0x80708170, 0xbf0d9973, 0xbf850002, 0x8f708970, 0xbf820001, 0x8f708a70, 0xb97a1e06, 0x8f7a8a7a, 0x80707a70, + 0x8070ff70, 0x00000200, 0xbef603ff, 0x01000000, - 0xbef90380, 0xbefc0380, - 0xbf800000, 0xbe802f00, - 0xbe822f02, 0xbe842f04, - 0xbe862f06, 0xbe882f08, - 0xbe8a2f0a, 0xbe8c2f0c, - 0xbe8e2f0e, 0xd7610002, - 0x0000f200, 0x80798179, - 0xd7610002, 0x0000f201, + 0x7e000280, 0x7e020280, + 0x7e040280, 0xbefc0380, + 0xd7610002, 0x0000f871, + 0x807c817c, 0xd7610002, + 0x0000f86c, 0x807c817c, + 0x8a7aff6d, 0x80000000, + 0xd7610002, 0x0000f87a, + 0x807c817c, 0xd7610002, + 0x0000f86e, 0x807c817c, + 0xd7610002, 0x0000f86f, + 0x807c817c, 0xd7610002, + 0x0000f878, 0x807c817c, + 0xb97af803, 0xd7610002, + 0x0000f87a, 0x807c817c, + 0xd7610002, 0x0000f87b, + 0x807c817c, 0xb971f801, + 0xd7610002, 0x0000f871, + 0x807c817c, 0xb971f814, + 0xd7610002, 0x0000f871, + 0x807c817c, 0xb971f815, + 0xd7610002, 0x0000f871, + 0x807c817c, 0xbefe03ff, + 0x0000ffff, 0xbeff0380, + 0xe0704000, 0x705d0200, + 0xbefe03c1, 0xb9703a05, + 0x80708170, 0xbf0d9973, + 0xbf850002, 0x8f708970, + 0xbf820001, 0x8f708a70, + 0xb97a1e06, 0x8f7a8a7a, + 0x80707a70, 0xbef603ff, + 0x01000000, 0xbef90380, + 0xbefc0380, 0xbf800000, + 0xbe802f00, 0xbe822f02, + 0xbe842f04, 0xbe862f06, + 0xbe882f08, 0xbe8a2f0a, + 0xbe8c2f0c, 0xbe8e2f0e, + 0xd7610002, 0x0000f200, 0x80798179, 0xd7610002, - 0x0000f202, 0x80798179, - 0xd7610002, 0x0000f203, + 0x0000f201, 0x80798179, + 0xd7610002, 0x0000f202, 0x80798179, 0xd7610002, - 0x0000f204, 0x80798179, - 0xd7610002, 0x0000f205, + 0x0000f203, 0x80798179, + 0xd7610002, 0x0000f204, 0x80798179, 0xd7610002, - 0x0000f206, 0x80798179, - 0xd7610002, 0x0000f207, + 0x0000f205, 0x80798179, + 0xd7610002, 0x0000f206, 0x80798179, 0xd7610002, - 0x0000f208, 0x80798179, - 0xd7610002, 0x0000f209, + 0x0000f207, 0x80798179, + 0xd7610002, 0x0000f208, 0x80798179, 0xd7610002, - 0x0000f20a, 0x80798179, - 0xd7610002, 0x0000f20b, + 0x0000f209, 0x80798179, + 0xd7610002, 0x0000f20a, 0x80798179, 0xd7610002, - 0x0000f20c, 0x80798179, - 0xd7610002, 0x0000f20d, + 0x0000f20b, 0x80798179, + 0xd7610002, 0x0000f20c, 0x80798179, 0xd7610002, - 0x0000f20e, 0x80798179, - 0xd7610002, 0x0000f20f, - 0x80798179, 0xbf06a079, - 0xbf840006, 0xe0704000, - 0x705d0200, 0x8070ff70, - 0x00000080, 0xbef90380, - 0x7e040280, 0x807c907c, - 0xbf0aff7c, 0x00000060, - 0xbf85ffbc, 0xbe802f00, - 0xbe822f02, 0xbe842f04, - 0xbe862f06, 0xbe882f08, - 0xbe8a2f0a, 0xd7610002, - 0x0000f200, 0x80798179, - 0xd7610002, 0x0000f201, + 0x0000f20d, 0x80798179, + 0xd7610002, 0x0000f20e, 0x80798179, 0xd7610002, - 0x0000f202, 0x80798179, - 0xd7610002, 0x0000f203, + 0x0000f20f, 0x80798179, + 0xbf06a079, 0xbf840006, + 0xe0704000, 0x705d0200, + 0x8070ff70, 0x00000080, + 0xbef90380, 0x7e040280, + 0x807c907c, 0xbf0aff7c, + 0x00000060, 0xbf85ffbc, + 0xbe802f00, 0xbe822f02, + 0xbe842f04, 0xbe862f06, + 0xbe882f08, 0xbe8a2f0a, + 0xd7610002, 0x0000f200, 0x80798179, 0xd7610002, - 0x0000f204, 0x80798179, - 0xd7610002, 0x0000f205, + 0x0000f201, 0x80798179, + 0xd7610002, 0x0000f202, 0x80798179, 0xd7610002, - 0x0000f206, 0x80798179, - 0xd7610002, 0x0000f207, + 0x0000f203, 0x80798179, + 0xd7610002, 0x0000f204, 0x80798179, 0xd7610002, - 0x0000f208, 0x80798179, - 0xd7610002, 0x0000f209, + 0x0000f205, 0x80798179, + 0xd7610002, 0x0000f206, 0x80798179, 0xd7610002, - 0x0000f20a, 0x80798179, - 0xd7610002, 0x0000f20b, - 0x80798179, 0xe0704000, - 0x705d0200, 0xbefe03c1, - 0x907c9973, 0x877c817c, - 0xbf06817c, 0xbf850002, - 0xbeff0380, 0xbf820001, - 0xbeff03c1, 0xb97b4306, - 0x877bc17b, 0xbf840044, - 0xbf8a0000, 0x877aff6d, - 0x80000000, 0xbf840040, - 0x8f7b867b, 0x8f7b827b, - 0xbef6037b, 0xb9703a05, - 0x80708170, 0xbf0d9973, - 0xbf850002, 0x8f708970, - 0xbf820001, 0x8f708a70, - 0xb97a1e06, 0x8f7a8a7a, - 0x80707a70, 0x8070ff70, - 0x00000200, 0x8070ff70, - 0x00000080, 0xbef603ff, - 0x01000000, 0xd7650000, - 0x000100c1, 0xd7660000, - 0x000200c1, 0x16000084, - 0x907c9973, 0x877c817c, - 0xbf06817c, 0xbefc0380, - 0xbf850012, 0xbe8303ff, - 0x00000080, 0xbf800000, - 0xbf800000, 0xbf800000, - 0xd8d80000, 0x01000000, - 0xbf8c0000, 0xe0704000, - 0x705d0100, 0x807c037c, - 0x80700370, 0xd5250000, - 0x0001ff00, 0x00000080, - 0xbf0a7b7c, 0xbf85fff4, - 0xbf820011, 0xbe8303ff, - 0x00000100, 0xbf800000, - 0xbf800000, 0xbf800000, - 0xd8d80000, 0x01000000, - 0xbf8c0000, 0xe0704000, - 0x705d0100, 0x807c037c, - 0x80700370, 0xd5250000, - 0x0001ff00, 0x00000100, - 0xbf0a7b7c, 0xbf85fff4, + 0x0000f207, 0x80798179, + 0xd7610002, 0x0000f208, + 0x80798179, 0xd7610002, + 0x0000f209, 0x80798179, + 0xd7610002, 0x0000f20a, + 0x80798179, 0xd7610002, + 0x0000f20b, 0x80798179, + 0xe0704000, 0x705d0200, 0xbefe03c1, 0x907c9973, 0x877c817c, 0xbf06817c, - 0xbf850004, 0xbef003ff, - 0x00000200, 0xbeff0380, - 0xbf820003, 0xbef003ff, - 0x00000400, 0xbeff03c1, - 0xb97b3a05, 0x807b817b, - 0x8f7b827b, 0x907c9973, + 0xbf850002, 0xbeff0380, + 0xbf820001, 0xbeff03c1, + 0xb97b4306, 0x877bc17b, + 0xbf840044, 0xbf8a0000, + 0x877aff6d, 0x80000000, + 0xbf840040, 0x8f7b867b, + 0x8f7b827b, 0xbef6037b, + 0xb9703a05, 0x80708170, + 0xbf0d9973, 0xbf850002, + 0x8f708970, 0xbf820001, + 0x8f708a70, 0xb97a1e06, + 0x8f7a8a7a, 0x80707a70, + 0x8070ff70, 0x00000200, + 0x8070ff70, 0x00000080, + 0xbef603ff, 0x01000000, + 0xd7650000, 0x000100c1, + 0xd7660000, 0x000200c1, + 0x16000084, 0x907c9973, 0x877c817c, 0xbf06817c, - 0xbf850017, 0xbef603ff, - 0x01000000, 0xbefc0384, - 0xbf0a7b7c, 0xbf840037, - 0x7e008700, 0x7e028701, - 0x7e048702, 0x7e068703, - 0xe0704000, 0x705d0000, - 0xe0704080, 0x705d0100, - 0xe0704100, 0x705d0200, - 0xe0704180, 0x705d0300, - 0x807c847c, 0x8070ff70, - 0x00000200, 0xbf0a7b7c, - 0xbf85ffef, 0xbf820025, + 0xbefc0380, 0xbf850012, + 0xbe8303ff, 0x00000080, + 0xbf800000, 0xbf800000, + 0xbf800000, 0xd8d80000, + 0x01000000, 0xbf8c0000, + 0xe0704000, 0x705d0100, + 0x807c037c, 0x80700370, + 0xd5250000, 0x0001ff00, + 0x00000080, 0xbf0a7b7c, + 0xbf85fff4, 0xbf820011, + 0xbe8303ff, 0x00000100, + 0xbf800000, 0xbf800000, + 0xbf800000, 0xd8d80000, + 0x01000000, 0xbf8c0000, + 0xe0704000, 0x705d0100, + 0x807c037c, 0x80700370, + 0xd5250000, 0x0001ff00, + 0x00000100, 0xbf0a7b7c, + 0xbf85fff4, 0xbefe03c1, + 0x907c9973, 0x877c817c, + 0xbf06817c, 0xbf850004, + 0xbef003ff, 0x00000200, + 0xbeff0380, 0xbf820003, + 0xbef003ff, 0x00000400, + 0xbeff03c1, 0xb97b3a05, + 0x807b817b, 0x8f7b827b, + 0x907c9973, 0x877c817c, + 0xbf06817c, 0xbf850017, 0xbef603ff, 0x01000000, 0xbefc0384, 0xbf0a7b7c, - 0xbf840011, 0x7e008700, + 0xbf840037, 0x7e008700, 0x7e028701, 0x7e048702, 0x7e068703, 0xe0704000, - 0x705d0000, 0xe0704100, - 0x705d0100, 0xe0704200, - 0x705d0200, 0xe0704300, + 0x705d0000, 0xe0704080, + 0x705d0100, 0xe0704100, + 0x705d0200, 0xe0704180, 0x705d0300, 0x807c847c, - 0x8070ff70, 0x00000400, + 0x8070ff70, 0x00000200, 0xbf0a7b7c, 0xbf85ffef, - 0xb97b1e06, 0x877bc17b, - 0xbf84000c, 0x8f7b837b, - 0x807b7c7b, 0xbefe03c1, - 0xbeff0380, 0x7e008700, + 0xbf820025, 0xbef603ff, + 0x01000000, 0xbefc0384, + 0xbf0a7b7c, 0xbf840011, + 0x7e008700, 0x7e028701, + 0x7e048702, 0x7e068703, 0xe0704000, 0x705d0000, - 0x807c817c, 0x8070ff70, - 0x00000080, 0xbf0a7b7c, - 0xbf85fff8, 0xbf82013b, - 0xbef4037e, 0x8775ff7f, - 0x0000ffff, 0x8875ff75, - 0x00040000, 0xbef60380, - 0xbef703ff, 0x10807fac, - 0xb97202dc, 0x8f729972, - 0x876eff7f, 0x04000000, - 0xbf840034, 0xbefe03c1, - 0x907c9972, 0x877c817c, - 0xbf06817c, 0xbf850002, - 0xbeff0380, 0xbf820001, - 0xbeff03c1, 0xb96f4306, - 0x876fc16f, 0xbf840029, - 0x8f6f866f, 0x8f6f826f, - 0xbef6036f, 0xb9783a05, - 0x80788178, 0xbf0d9972, - 0xbf850002, 0x8f788978, - 0xbf820001, 0x8f788a78, - 0xb96e1e06, 0x8f6e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0x8078ff78, - 0x00000080, 0xbef603ff, - 0x01000000, 0x907c9972, - 0x877c817c, 0xbf06817c, - 0xbefc0380, 0xbf850009, - 0xe0310000, 0x781d0000, - 0x807cff7c, 0x00000080, - 0x8078ff78, 0x00000080, - 0xbf0a6f7c, 0xbf85fff8, - 0xbf820008, 0xe0310000, - 0x781d0000, 0x807cff7c, - 0x00000100, 0x8078ff78, - 0x00000100, 0xbf0a6f7c, - 0xbf85fff8, 0xbef80380, + 0xe0704100, 0x705d0100, + 0xe0704200, 0x705d0200, + 0xe0704300, 0x705d0300, + 0x807c847c, 0x8070ff70, + 0x00000400, 0xbf0a7b7c, + 0xbf85ffef, 0xb97b1e06, + 0x877bc17b, 0xbf84000c, + 0x8f7b837b, 0x807b7c7b, + 0xbefe03c1, 0xbeff0380, + 0x7e008700, 0xe0704000, + 0x705d0000, 0x807c817c, + 0x8070ff70, 0x00000080, + 0xbf0a7b7c, 0xbf85fff8, + 0xbf82013b, 0xbef4037e, + 0x8775ff7f, 0x0000ffff, + 0x8875ff75, 0x00040000, + 0xbef60380, 0xbef703ff, + 0x10807fac, 0xb97202dc, + 0x8f729972, 0x876eff7f, + 0x04000000, 0xbf840034, 0xbefe03c1, 0x907c9972, 0x877c817c, 0xbf06817c, 0xbf850002, 0xbeff0380, 0xbf820001, 0xbeff03c1, - 0xb96f3a05, 0x806f816f, - 0x8f6f826f, 0x907c9972, - 0x877c817c, 0xbf06817c, - 0xbf850024, 0xbef603ff, - 0x01000000, 0xbeee0378, + 0xb96f4306, 0x876fc16f, + 0xbf840029, 0x8f6f866f, + 0x8f6f826f, 0xbef6036f, + 0xb9783a05, 0x80788178, + 0xbf0d9972, 0xbf850002, + 0x8f788978, 0xbf820001, + 0x8f788a78, 0xb96e1e06, + 0x8f6e8a6e, 0x80786e78, 0x8078ff78, 0x00000200, - 0xbefc0384, 0xbf0a6f7c, - 0xbf840050, 0xe0304000, - 0x785d0000, 0xe0304080, - 0x785d0100, 0xe0304100, - 0x785d0200, 0xe0304180, - 0x785d0300, 0xbf8c3f70, - 0x7e008500, 0x7e028501, - 0x7e048502, 0x7e068503, - 0x807c847c, 0x8078ff78, - 0x00000200, 0xbf0a6f7c, - 0xbf85ffee, 0xe0304000, - 0x6e5d0000, 0xe0304080, - 0x6e5d0100, 0xe0304100, - 0x6e5d0200, 0xe0304180, - 0x6e5d0300, 0xbf8c3f70, - 0xbf820034, 0xbef603ff, - 0x01000000, 0xbeee0378, - 0x8078ff78, 0x00000400, - 0xbefc0384, 0xbf0a6f7c, - 0xbf840012, 0xe0304000, - 0x785d0000, 0xe0304100, - 0x785d0100, 0xe0304200, - 0x785d0200, 0xe0304300, - 0x785d0300, 0xbf8c3f70, - 0x7e008500, 0x7e028501, - 0x7e048502, 0x7e068503, - 0x807c847c, 0x8078ff78, - 0x00000400, 0xbf0a6f7c, - 0xbf85ffee, 0xb96f1e06, - 0x876fc16f, 0xbf84000e, - 0x8f6f836f, 0x806f7c6f, - 0xbefe03c1, 0xbeff0380, + 0x8078ff78, 0x00000080, + 0xbef603ff, 0x01000000, + 0x907c9972, 0x877c817c, + 0xbf06817c, 0xbefc0380, + 0xbf850009, 0xe0310000, + 0x781d0000, 0x807cff7c, + 0x00000080, 0x8078ff78, + 0x00000080, 0xbf0a6f7c, + 0xbf85fff8, 0xbf820008, + 0xe0310000, 0x781d0000, + 0x807cff7c, 0x00000100, + 0x8078ff78, 0x00000100, + 0xbf0a6f7c, 0xbf85fff8, + 0xbef80380, 0xbefe03c1, + 0x907c9972, 0x877c817c, + 0xbf06817c, 0xbf850002, + 0xbeff0380, 0xbf820001, + 0xbeff03c1, 0xb96f3a05, + 0x806f816f, 0x8f6f826f, + 0x907c9972, 0x877c817c, + 0xbf06817c, 0xbf850024, + 0xbef603ff, 0x01000000, + 0xbeee0378, 0x8078ff78, + 0x00000200, 0xbefc0384, + 0xbf0a6f7c, 0xbf840050, 0xe0304000, 0x785d0000, + 0xe0304080, 0x785d0100, + 0xe0304100, 0x785d0200, + 0xe0304180, 0x785d0300, 0xbf8c3f70, 0x7e008500, - 0x807c817c, 0x8078ff78, - 0x00000080, 0xbf0a6f7c, - 0xbf85fff7, 0xbeff03c1, + 0x7e028501, 0x7e048502, + 0x7e068503, 0x807c847c, + 0x8078ff78, 0x00000200, + 0xbf0a6f7c, 0xbf85ffee, 0xe0304000, 0x6e5d0000, - 0xe0304100, 0x6e5d0100, - 0xe0304200, 0x6e5d0200, - 0xe0304300, 0x6e5d0300, - 0xbf8c3f70, 0xb9783a05, - 0x80788178, 0xbf0d9972, - 0xbf850002, 0x8f788978, - 0xbf820001, 0x8f788a78, - 0xb96e1e06, 0x8f6e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0x80f8ff78, - 0x00000050, 0xbef603ff, - 0x01000000, 0xbefc03ff, - 0x0000006c, 0x80f89078, - 0xf429003a, 0xf0000000, - 0xbf8cc07f, 0x80fc847c, - 0xbf800000, 0xbe803100, - 0xbe823102, 0x80f8a078, - 0xf42d003a, 0xf0000000, - 0xbf8cc07f, 0x80fc887c, - 0xbf800000, 0xbe803100, - 0xbe823102, 0xbe843104, - 0xbe863106, 0x80f8c078, - 0xf431003a, 0xf0000000, - 0xbf8cc07f, 0x80fc907c, - 0xbf800000, 0xbe803100, - 0xbe823102, 0xbe843104, - 0xbe863106, 0xbe883108, - 0xbe8a310a, 0xbe8c310c, - 0xbe8e310e, 0xbf06807c, - 0xbf84fff0, 0xba80f801, - 0x00000000, 0xbf8a0000, + 0xe0304080, 0x6e5d0100, + 0xe0304100, 0x6e5d0200, + 0xe0304180, 0x6e5d0300, + 0xbf8c3f70, 0xbf820034, + 0xbef603ff, 0x01000000, + 0xbeee0378, 0x8078ff78, + 0x00000400, 0xbefc0384, + 0xbf0a6f7c, 0xbf840012, + 0xe0304000, 0x785d0000, + 0xe0304100, 0x785d0100, + 0xe0304200, 0x785d0200, + 0xe0304300, 0x785d0300, + 0xbf8c3f70, 0x7e008500, + 0x7e028501, 0x7e048502, + 0x7e068503, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffee, + 0xb96f1e06, 0x876fc16f, + 0xbf84000e, 0x8f6f836f, + 0x806f7c6f, 0xbefe03c1, + 0xbeff0380, 0xe0304000, + 0x785d0000, 0xbf8c3f70, + 0x7e008500, 0x807c817c, + 0x8078ff78, 0x00000080, + 0xbf0a6f7c, 0xbf85fff7, + 0xbeff03c1, 0xe0304000, + 0x6e5d0000, 0xe0304100, + 0x6e5d0100, 0xe0304200, + 0x6e5d0200, 0xe0304300, + 0x6e5d0300, 0xbf8c3f70, 0xb9783a05, 0x80788178, 0xbf0d9972, 0xbf850002, 0x8f788978, 0xbf820001, 0x8f788a78, 0xb96e1e06, 0x8f6e8a6e, 0x80786e78, 0x8078ff78, 0x00000200, + 0x80f8ff78, 0x00000050, 0xbef603ff, 0x01000000, - 0xf4211bfa, 0xf0000000, - 0x80788478, 0xf4211b3a, + 0xbefc03ff, 0x0000006c, + 0x80f89078, 0xf429003a, + 0xf0000000, 0xbf8cc07f, + 0x80fc847c, 0xbf800000, + 0xbe803100, 0xbe823102, + 0x80f8a078, 0xf42d003a, + 0xf0000000, 0xbf8cc07f, + 0x80fc887c, 0xbf800000, + 0xbe803100, 0xbe823102, + 0xbe843104, 0xbe863106, + 0x80f8c078, 0xf431003a, + 0xf0000000, 0xbf8cc07f, + 0x80fc907c, 0xbf800000, + 0xbe803100, 0xbe823102, + 0xbe843104, 0xbe863106, + 0xbe883108, 0xbe8a310a, + 0xbe8c310c, 0xbe8e310e, + 0xbf06807c, 0xbf84fff0, + 0xba80f801, 0x00000000, + 0xbf8a0000, 0xb9783a05, + 0x80788178, 0xbf0d9972, + 0xbf850002, 0x8f788978, + 0xbf820001, 0x8f788a78, + 0xb96e1e06, 0x8f6e8a6e, + 0x80786e78, 0x8078ff78, + 0x00000200, 0xbef603ff, + 0x01000000, 0xf4211bfa, 0xf0000000, 0x80788478, - 0xf4211b7a, 0xf0000000, - 0x80788478, 0xf4211c3a, + 0xf4211b3a, 0xf0000000, + 0x80788478, 0xf4211b7a, 0xf0000000, 0x80788478, - 0xf4211c7a, 0xf0000000, - 0x80788478, 0xf4211eba, + 0xf4211c3a, 0xf0000000, + 0x80788478, 0xf4211c7a, 0xf0000000, 0x80788478, - 0xf4211efa, 0xf0000000, - 0x80788478, 0xf4211e7a, + 0xf4211eba, 0xf0000000, + 0x80788478, 0xf4211efa, 0xf0000000, 0x80788478, - 0xf4211cfa, 0xf0000000, - 0x80788478, 0xf4211bba, + 0xf4211e7a, 0xf0000000, + 0x80788478, 0xf4211cfa, 0xf0000000, 0x80788478, - 0xbf8cc07f, 0xb9eef814, 0xf4211bba, 0xf0000000, 0x80788478, 0xbf8cc07f, - 0xb9eef815, 0xbefc036f, - 0xbefe0370, 0xbeff0371, - 0x876f7bff, 0x000003ff, - 0xb9ef4803, 0x876f7bff, - 0xfffff800, 0x906f8b6f, - 0xb9efa2c3, 0xb9f3f801, - 0xb96e3a05, 0x806e816e, - 0xbf0d9972, 0xbf850002, - 0x8f6e896e, 0xbf820001, - 0x8f6e8a6e, 0xb96f1e06, - 0x8f6f8a6f, 0x806e6f6e, - 0x806eff6e, 0x00000200, - 0x806e746e, 0x826f8075, - 0x876fff6f, 0x0000ffff, - 0xf4091c37, 0xfa000050, - 0xf4091d37, 0xfa000060, - 0xf4011e77, 0xfa000074, - 0xbf8cc07f, 0x876dff6d, - 0x0000ffff, 0x87fe7e7e, - 0x87ea6a6a, 0xb9faf802, - 0xbe80226c, 0xbf810000, + 0xb9eef814, 0xf4211bba, + 0xf0000000, 0x80788478, + 0xbf8cc07f, 0xb9eef815, + 0xbefc036f, 0xbefe0370, + 0xbeff0371, 0x876f7bff, + 0x000003ff, 0xb9ef4803, + 0x876f7bff, 0xfffff800, + 0x906f8b6f, 0xb9efa2c3, + 0xb9f3f801, 0xb96e3a05, + 0x806e816e, 0xbf0d9972, + 0xbf850002, 0x8f6e896e, + 0xbf820001, 0x8f6e8a6e, + 0xb96f1e06, 0x8f6f8a6f, + 0x806e6f6e, 0x806eff6e, + 0x00000200, 0x806e746e, + 0x826f8075, 0x876fff6f, + 0x0000ffff, 0xf4091c37, + 0xfa000050, 0xf4091d37, + 0xfa000060, 0xf4011e77, + 0xfa000074, 0xbf8cc07f, + 0x876dff6d, 0x0000ffff, + 0x87fe7e7e, 0x87ea6a6a, + 0xb9faf802, 0xbe80226c, + 0xbf810000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, - 0xbf9f0000, 0x00000000, }; static const uint32_t cwsr_trap_gfx11_hex[] = { diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index fdab64624422..e0140df0b0ec 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -369,6 +369,12 @@ L_SLEEP: s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp #if NO_SQC_STORE +#if ASIC_FAMILY <= CHIP_SIENNA_CICHLID + // gfx10: If there was a VALU exception, the exception state must be + // cleared before executing the VALU instructions below. + v_clrexcp +#endif + // Trap temporaries must be saved via VGPR but all VGPRs are in use. // There is no ttmp space to hold the resource constant for VGPR save. // Save v0 by itself since it requires only two SGPRs. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f6d4748c1980..ce4c52ec34d8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1564,16 +1564,11 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, { struct kfd_ioctl_import_dmabuf_args *args = data; struct kfd_process_device *pdd; - struct dma_buf *dmabuf; int idr_handle; uint64_t size; void *mem; int r; - dmabuf = dma_buf_get(args->dmabuf_fd); - if (IS_ERR(dmabuf)) - return PTR_ERR(dmabuf); - mutex_lock(&p->mutex); pdd = kfd_process_device_data_by_id(p, args->gpu_id); if (!pdd) { @@ -1587,10 +1582,10 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, goto err_unlock; } - r = amdgpu_amdkfd_gpuvm_import_dmabuf(pdd->dev->adev, dmabuf, - args->va_addr, pdd->drm_priv, - (struct kgd_mem **)&mem, &size, - NULL); + r = amdgpu_amdkfd_gpuvm_import_dmabuf_fd(pdd->dev->adev, args->dmabuf_fd, + args->va_addr, pdd->drm_priv, + (struct kgd_mem **)&mem, &size, + NULL); if (r) goto err_unlock; @@ -1601,7 +1596,6 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, } mutex_unlock(&p->mutex); - dma_buf_put(dmabuf); args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); @@ -1612,7 +1606,6 @@ err_free: pdd->drm_priv, NULL); err_unlock: mutex_unlock(&p->mutex); - dma_buf_put(dmabuf); return r; } @@ -1855,8 +1848,8 @@ static uint32_t get_process_num_bos(struct kfd_process *p) return num_of_bos; } -static int criu_get_prime_handle(struct kgd_mem *mem, int flags, - u32 *shared_fd) +static int criu_get_prime_handle(struct kgd_mem *mem, + int flags, u32 *shared_fd) { struct dma_buf *dmabuf; int ret; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 0f58be65132f..739721254a5d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -880,6 +880,10 @@ static int copy_signaled_event_data(uint32_t num_events, dst = &data[i].memory_exception_data; src = &event->memory_exception_data; size = sizeof(struct kfd_hsa_memory_exception_data); + } else if (event->type == KFD_EVENT_TYPE_HW_EXCEPTION) { + dst = &data[i].memory_exception_data; + src = &event->hw_exception_data; + size = sizeof(struct kfd_hsa_hw_exception_data); } else if (event->type == KFD_EVENT_TYPE_SIGNAL && waiter->event_age_enabled) { dst = &data[i].signal_event_data.last_event_age; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 62b205dac63a..6604a3f99c5e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -330,12 +330,6 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id) pdd->gpuvm_limit = pdd->dev->kfd->shared_resources.gpuvm_size - 1; - /* dGPUs: the reserved space for kernel - * before SVM - */ - pdd->qpd.cwsr_base = SVM_CWSR_BASE; - pdd->qpd.ib_base = SVM_IB_BASE; - pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI(); pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); } @@ -345,18 +339,18 @@ static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id) pdd->lds_base = MAKE_LDS_APP_BASE_V9(); pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); - pdd->gpuvm_base = PAGE_SIZE; + /* Raven needs SVM to support graphic handle, etc. Leave the small + * reserved space before SVM on Raven as well, even though we don't + * have to. + * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they + * are used in Thunk to reserve SVM. + */ + pdd->gpuvm_base = SVM_USER_BASE; pdd->gpuvm_limit = pdd->dev->kfd->shared_resources.gpuvm_size - 1; pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9(); pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); - - /* - * Place TBA/TMA on opposite side of VM hole to prevent - * stray faults from triggering SVM on these pages. - */ - pdd->qpd.cwsr_base = pdd->dev->kfd->shared_resources.gpuvm_size; } int kfd_init_apertures(struct kfd_process *process) @@ -413,6 +407,12 @@ int kfd_init_apertures(struct kfd_process *process) return -EINVAL; } } + + /* dGPUs: the reserved space for kernel + * before SVM + */ + pdd->qpd.cwsr_base = SVM_CWSR_BASE; + pdd->qpd.ib_base = SVM_IB_BASE; } dev_dbg(kfd_device, "node id %u\n", id); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 6c25dab051d5..d630100b9e91 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -260,19 +260,6 @@ static void svm_migrate_put_sys_page(unsigned long addr) put_page(page); } -static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate) -{ - unsigned long cpages = 0; - unsigned long i; - - for (i = 0; i < migrate->npages; i++) { - if (migrate->src[i] & MIGRATE_PFN_VALID && - migrate->src[i] & MIGRATE_PFN_MIGRATE) - cpages++; - } - return cpages; -} - static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate) { unsigned long upages = 0; @@ -402,6 +389,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, struct dma_fence *mfence = NULL; struct migrate_vma migrate = { 0 }; unsigned long cpages = 0; + unsigned long mpages = 0; dma_addr_t *scratch; void *buf; int r = -ENOMEM; @@ -442,20 +430,21 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, goto out_free; } if (cpages != npages) - pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n", + pr_debug("partial migration, 0x%lx/0x%llx pages collected\n", cpages, npages); else - pr_debug("0x%lx pages migrated\n", cpages); + pr_debug("0x%lx pages collected\n", cpages); r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset); migrate_vma_pages(&migrate); - pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", - svm_migrate_successful_pages(&migrate), cpages, migrate.npages); - svm_migrate_copy_done(adev, mfence); migrate_vma_finalize(&migrate); + mpages = cpages - svm_migrate_unsuccessful_pages(&migrate); + pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", + mpages, cpages, migrate.npages); + kfd_smi_event_migration_end(node, p->lead_thread->pid, start >> PAGE_SHIFT, end >> PAGE_SHIFT, 0, node->id, trigger); @@ -465,12 +454,12 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, out_free: kvfree(buf); out: - if (!r && cpages) { + if (!r && mpages) { pdd = svm_range_get_pdd_by_node(prange, node); if (pdd) - WRITE_ONCE(pdd->page_in, pdd->page_in + cpages); + WRITE_ONCE(pdd->page_in, pdd->page_in + mpages); - return cpages; + return mpages; } return r; } @@ -479,6 +468,8 @@ out: * svm_migrate_ram_to_vram - migrate svm range from system to device * @prange: range structure * @best_loc: the device to migrate to + * @start_mgr: start page to migrate + * @last_mgr: last page to migrate * @mm: the process mm structure * @trigger: reason of migration * @@ -489,19 +480,20 @@ out: */ static int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + unsigned long start_mgr, unsigned long last_mgr, struct mm_struct *mm, uint32_t trigger) { unsigned long addr, start, end; struct vm_area_struct *vma; uint64_t ttm_res_offset; struct kfd_node *node; - unsigned long cpages = 0; + unsigned long mpages = 0; long r = 0; - if (prange->actual_loc == best_loc) { - pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n", - prange->svms, prange->start, prange->last, best_loc); - return 0; + if (start_mgr < prange->start || last_mgr > prange->last) { + pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n", + start_mgr, last_mgr, prange->start, prange->last); + return -EFAULT; } node = svm_range_get_node_by_id(prange, best_loc); @@ -510,18 +502,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, return -ENODEV; } - pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, - prange->start, prange->last, best_loc); + pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n", + prange->svms, start_mgr, last_mgr, prange->start, prange->last, + best_loc); - start = prange->start << PAGE_SHIFT; - end = (prange->last + 1) << PAGE_SHIFT; + start = start_mgr << PAGE_SHIFT; + end = (last_mgr + 1) << PAGE_SHIFT; r = svm_range_vram_node_new(node, prange, true); if (r) { dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r); return r; } - ttm_res_offset = prange->offset << PAGE_SHIFT; + ttm_res_offset = (start_mgr - prange->start + prange->offset) << PAGE_SHIFT; for (addr = start; addr < end;) { unsigned long next; @@ -536,16 +529,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, pr_debug("failed %ld to migrate\n", r); break; } else { - cpages += r; + mpages += r; } ttm_res_offset += next - addr; addr = next; } - if (cpages) { + if (mpages) { prange->actual_loc = best_loc; - svm_range_dma_unmap(prange); - } else { + prange->vram_pages += mpages; + } else if (!prange->actual_loc) { + /* if no page migrated and all pages from prange are at + * sys ram drop svm_bo got from svm_range_vram_node_new + */ svm_range_vram_node_free(prange); } @@ -663,9 +659,8 @@ out_oom: * Context: Process context, caller hold mmap read lock, prange->migrate_mutex * * Return: - * 0 - success with all pages migrated * negative values - indicate error - * positive values - partial migration, number of pages not migrated + * positive values or zero - number of pages got migrated */ static long svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, @@ -676,6 +671,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, uint64_t npages = (end - start) >> PAGE_SHIFT; unsigned long upages = npages; unsigned long cpages = 0; + unsigned long mpages = 0; struct amdgpu_device *adev = node->adev; struct kfd_process_device *pdd; struct dma_fence *mfence = NULL; @@ -725,10 +721,10 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, goto out_free; } if (cpages != npages) - pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n", + pr_debug("partial migration, 0x%lx/0x%llx pages collected\n", cpages, npages); else - pr_debug("0x%lx pages migrated\n", cpages); + pr_debug("0x%lx pages collected\n", cpages); r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence, scratch, npages); @@ -751,17 +747,21 @@ out_free: kvfree(buf); out: if (!r && cpages) { + mpages = cpages - upages; pdd = svm_range_get_pdd_by_node(prange, node); if (pdd) - WRITE_ONCE(pdd->page_out, pdd->page_out + cpages); + WRITE_ONCE(pdd->page_out, pdd->page_out + mpages); } - return r ? r : upages; + + return r ? r : mpages; } /** * svm_migrate_vram_to_ram - migrate svm range from device to system * @prange: range structure * @mm: process mm, use current->mm if NULL + * @start_mgr: start page need be migrated to sys ram + * @last_mgr: last page need be migrated to sys ram * @trigger: reason of migration * @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page fault callback * @@ -771,6 +771,7 @@ out: * 0 - OK, otherwise error code */ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, + unsigned long start_mgr, unsigned long last_mgr, uint32_t trigger, struct page *fault_page) { struct kfd_node *node; @@ -778,26 +779,33 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, unsigned long addr; unsigned long start; unsigned long end; - unsigned long upages = 0; + unsigned long mpages = 0; long r = 0; + /* this pragne has no any vram page to migrate to sys ram */ if (!prange->actual_loc) { pr_debug("[0x%lx 0x%lx] already migrated to ram\n", prange->start, prange->last); return 0; } + if (start_mgr < prange->start || last_mgr > prange->last) { + pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n", + start_mgr, last_mgr, prange->start, prange->last); + return -EFAULT; + } + node = svm_range_get_node_by_id(prange, prange->actual_loc); if (!node) { pr_debug("failed to get kfd node by id 0x%x\n", prange->actual_loc); return -ENODEV; } pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n", - prange->svms, prange, prange->start, prange->last, + prange->svms, prange, start_mgr, last_mgr, prange->actual_loc); - start = prange->start << PAGE_SHIFT; - end = (prange->last + 1) << PAGE_SHIFT; + start = start_mgr << PAGE_SHIFT; + end = (last_mgr + 1) << PAGE_SHIFT; for (addr = start; addr < end;) { unsigned long next; @@ -816,14 +824,21 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, pr_debug("failed %ld to migrate prange %p\n", r, prange); break; } else { - upages += r; + mpages += r; } addr = next; } - if (r >= 0 && !upages) { - svm_range_vram_node_free(prange); - prange->actual_loc = 0; + if (r >= 0) { + prange->vram_pages -= mpages; + + /* prange does not have vram page set its actual_loc to system + * and drop its svm_bo ref + */ + if (prange->vram_pages == 0 && prange->ttm_res) { + prange->actual_loc = 0; + svm_range_vram_node_free(prange); + } } return r < 0 ? r : 0; @@ -833,17 +848,23 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, * svm_migrate_vram_to_vram - migrate svm range from device to device * @prange: range structure * @best_loc: the device to migrate to + * @start: start page need be migrated to sys ram + * @last: last page need be migrated to sys ram * @mm: process mm, use current->mm if NULL * @trigger: reason of migration * * Context: Process context, caller hold mmap read lock, svms lock, prange lock * + * migrate all vram pages in prange to sys ram, then migrate + * [start, last] pages from sys ram to gpu node best_loc. + * * Return: * 0 - OK, otherwise error code */ static int svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, - struct mm_struct *mm, uint32_t trigger) + unsigned long start, unsigned long last, + struct mm_struct *mm, uint32_t trigger) { int r, retries = 3; @@ -855,7 +876,8 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc); do { - r = svm_migrate_vram_to_ram(prange, mm, trigger, NULL); + r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last, + trigger, NULL); if (r) return r; } while (prange->actual_loc && --retries); @@ -863,17 +885,21 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, if (prange->actual_loc) return -EDEADLK; - return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger); + return svm_migrate_ram_to_vram(prange, best_loc, start, last, mm, trigger); } int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, + unsigned long start, unsigned long last, struct mm_struct *mm, uint32_t trigger) { - if (!prange->actual_loc) - return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger); + if (!prange->actual_loc || prange->actual_loc == best_loc) + return svm_migrate_ram_to_vram(prange, best_loc, start, last, + mm, trigger); + else - return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger); + return svm_migrate_vram_to_vram(prange, best_loc, start, last, + mm, trigger); } @@ -889,10 +915,9 @@ svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, */ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) { + unsigned long start, last, size; unsigned long addr = vmf->address; struct svm_range_bo *svm_bo; - enum svm_work_list_ops op; - struct svm_range *parent; struct svm_range *prange; struct kfd_process *p; struct mm_struct *mm; @@ -929,51 +954,31 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) mutex_lock(&p->svms.lock); - prange = svm_range_from_addr(&p->svms, addr, &parent); + prange = svm_range_from_addr(&p->svms, addr, NULL); if (!prange) { pr_debug("failed get range svms 0x%p addr 0x%lx\n", &p->svms, addr); r = -EFAULT; goto out_unlock_svms; } - mutex_lock(&parent->migrate_mutex); - if (prange != parent) - mutex_lock_nested(&prange->migrate_mutex, 1); + mutex_lock(&prange->migrate_mutex); if (!prange->actual_loc) goto out_unlock_prange; - svm_range_lock(parent); - if (prange != parent) - mutex_lock_nested(&prange->lock, 1); - r = svm_range_split_by_granularity(p, mm, addr, parent, prange); - if (prange != parent) - mutex_unlock(&prange->lock); - svm_range_unlock(parent); - if (r) { - pr_debug("failed %d to split range by granularity\n", r); - goto out_unlock_prange; - } + /* Align migration range start and size to granularity size */ + size = 1UL << prange->granularity; + start = max(ALIGN_DOWN(addr, size), prange->start); + last = min(ALIGN(addr + 1, size) - 1, prange->last); - r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm, - KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, - vmf->page); + r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm, start, last, + KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, vmf->page); if (r) pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n", - r, prange->svms, prange, prange->start, prange->last); - - /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ - if (p->xnack_enabled && parent == prange) - op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP; - else - op = SVM_OP_UPDATE_RANGE_NOTIFIER; - svm_range_add_list_work(&p->svms, parent, mm, op); - schedule_deferred_list_work(&p->svms); + r, prange->svms, prange, start, last); out_unlock_prange: - if (prange != parent) - mutex_unlock(&prange->migrate_mutex); - mutex_unlock(&parent->migrate_mutex); + mutex_unlock(&prange->migrate_mutex); out_unlock_svms: mutex_unlock(&p->svms.lock); out_unref_process: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index 487f26368164..2eebf67f9c2c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -41,9 +41,13 @@ enum MIGRATION_COPY_DIR { }; int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, + unsigned long start, unsigned long last, struct mm_struct *mm, uint32_t trigger); + int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, + unsigned long start, unsigned long last, uint32_t trigger, struct page *fault_page); + unsigned long svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 4c8e278a0d0c..745024b31340 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -748,7 +748,6 @@ struct kfd_process_device { /* VM context for GPUVM allocations */ struct file *drm_file; void *drm_priv; - atomic64_t tlb_seq; /* GPUVM allocations storage */ struct idr alloc_idr; @@ -971,7 +970,7 @@ struct kfd_process { struct work_struct debug_event_workarea; /* Tracks debug per-vmid request for debug flags */ - bool dbg_flags; + u32 dbg_flags; atomic_t poison; /* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */ @@ -1462,7 +1461,14 @@ void kfd_signal_reset_event(struct kfd_node *dev); void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid); -void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type); +static inline void kfd_flush_tlb(struct kfd_process_device *pdd, + enum TLB_FLUSH_TYPE type) +{ + struct amdgpu_device *adev = pdd->dev->adev; + struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); + + amdgpu_vm_flush_compute_tlb(adev, vm, type, pdd->dev->xcc_mask); +} static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 7a33e06f5c90..71df51fcc1b0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -664,7 +664,8 @@ int kfd_process_create_wq(void) if (!kfd_process_wq) kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0); if (!kfd_restore_wq) - kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0); + kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", + WQ_FREEZABLE); if (!kfd_process_wq || !kfd_restore_wq) { kfd_process_destroy_wq(); @@ -1642,6 +1643,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, struct amdgpu_fpriv *drv_priv; struct amdgpu_vm *avm; struct kfd_process *p; + struct dma_fence *ef; struct kfd_node *dev; int ret; @@ -1661,13 +1663,13 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, avm, &p->kgd_process_info, - &p->ef); + &ef); if (ret) { pr_err("Failed to create process VM object\n"); return ret; } + RCU_INIT_POINTER(p->ef, ef); pdd->drm_priv = drm_file->private_data; - atomic64_set(&pdd->tlb_seq, 0); ret = kfd_process_device_reserve_ib_mem(pdd); if (ret) @@ -1909,6 +1911,21 @@ kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node, return -EINVAL; } +static int signal_eviction_fence(struct kfd_process *p) +{ + struct dma_fence *ef; + int ret; + + rcu_read_lock(); + ef = dma_fence_get_rcu_safe(&p->ef); + rcu_read_unlock(); + + ret = dma_fence_signal(ef); + dma_fence_put(ef); + + return ret; +} + static void evict_process_worker(struct work_struct *work) { int ret; @@ -1921,31 +1938,46 @@ static void evict_process_worker(struct work_struct *work) * lifetime of this thread, kfd_process p will be valid */ p = container_of(dwork, struct kfd_process, eviction_work); - WARN_ONCE(p->last_eviction_seqno != p->ef->seqno, - "Eviction fence mismatch\n"); - - /* Narrow window of overlap between restore and evict work - * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos - * unreserves KFD BOs, it is possible to evicted again. But - * restore has few more steps of finish. So lets wait for any - * previous restore work to complete - */ - flush_delayed_work(&p->restore_work); pr_debug("Started evicting pasid 0x%x\n", p->pasid); ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM); if (!ret) { - dma_fence_signal(p->ef); - dma_fence_put(p->ef); - p->ef = NULL; - queue_delayed_work(kfd_restore_wq, &p->restore_work, + /* If another thread already signaled the eviction fence, + * they are responsible stopping the queues and scheduling + * the restore work. + */ + if (!signal_eviction_fence(p)) + queue_delayed_work(kfd_restore_wq, &p->restore_work, msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); + else + kfd_process_restore_queues(p); pr_debug("Finished evicting pasid 0x%x\n", p->pasid); } else pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid); } +static int restore_process_helper(struct kfd_process *p) +{ + int ret = 0; + + /* VMs may not have been acquired yet during debugging. */ + if (p->kgd_process_info) { + ret = amdgpu_amdkfd_gpuvm_restore_process_bos( + p->kgd_process_info, &p->ef); + if (ret) + return ret; + } + + ret = kfd_process_restore_queues(p); + if (!ret) + pr_debug("Finished restoring pasid 0x%x\n", p->pasid); + else + pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid); + + return ret; +} + static void restore_process_worker(struct work_struct *work) { struct delayed_work *dwork; @@ -1971,24 +2003,15 @@ static void restore_process_worker(struct work_struct *work) */ p->last_restore_timestamp = get_jiffies_64(); - /* VMs may not have been acquired yet during debugging. */ - if (p->kgd_process_info) - ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info, - &p->ef); + + ret = restore_process_helper(p); if (ret) { pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n", p->pasid, PROCESS_BACK_OFF_TIME_MS); ret = queue_delayed_work(kfd_restore_wq, &p->restore_work, msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS)); WARN(!ret, "reschedule restore work failed\n"); - return; } - - ret = kfd_process_restore_queues(p); - if (!ret) - pr_debug("Finished restoring pasid 0x%x\n", p->pasid); - else - pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid); } void kfd_suspend_all_processes(void) @@ -1999,14 +2022,9 @@ void kfd_suspend_all_processes(void) WARN(debug_evictions, "Evicting all processes"); hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { - cancel_delayed_work_sync(&p->eviction_work); - flush_delayed_work(&p->restore_work); - if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND)) pr_err("Failed to suspend process 0x%x\n", p->pasid); - dma_fence_signal(p->ef); - dma_fence_put(p->ef); - p->ef = NULL; + signal_eviction_fence(p); } srcu_read_unlock(&kfd_processes_srcu, idx); } @@ -2018,7 +2036,7 @@ int kfd_resume_all_processes(void) int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu); hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { - if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) { + if (restore_process_helper(p)) { pr_err("Restore process %d failed during resume\n", p->pasid); ret = -EFAULT; @@ -2059,36 +2077,6 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process, KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); } -void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) -{ - struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); - uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm); - struct kfd_node *dev = pdd->dev; - uint32_t xcc_mask = dev->xcc_mask; - int xcc = 0; - - /* - * It can be that we race and lose here, but that is extremely unlikely - * and the worst thing which could happen is that we flush the changes - * into the TLB once more which is harmless. - */ - if (atomic64_xchg(&pdd->tlb_seq, tlb_seq) == tlb_seq) - return; - - if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { - /* Nothing to flush until a VMID is assigned, which - * only happens when the first queue is created. - */ - if (pdd->qpd.vmid) - amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev, - pdd->qpd.vmid); - } else { - for_each_inst(xcc, xcc_mask) - amdgpu_amdkfd_flush_gpu_tlb_pasid( - dev->adev, pdd->process->pasid, type, xcc); - } -} - /* assumes caller holds process lock. */ int kfd_process_drain_interrupts(struct kfd_process_device *pdd) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 77f493262e05..43eff221eae5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -87,6 +87,8 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) return; dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); + if (dev->kfd->shared_resources.enable_mes) + amdgpu_mes_flush_shader_debugger(dev->adev, pdd->proc_ctx_gpu_addr); pdd->already_dequeued = true; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index a15bfb5223e8..ac84c4a2ca07 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -198,6 +198,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n", addr[i] >> PAGE_SHIFT, page_to_pfn(page)); } + return 0; } @@ -349,6 +350,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(&prange->child_list); atomic_set(&prange->invalid, 0); prange->validate_timestamp = 0; + prange->vram_pages = 0; mutex_init(&prange->migrate_mutex); mutex_init(&prange->lock); @@ -395,6 +397,8 @@ static void svm_range_bo_release(struct kref *kref) prange->start, prange->last); mutex_lock(&prange->lock); prange->svm_bo = NULL; + /* prange should not hold vram page now */ + WARN_ONCE(prange->actual_loc, "prange should not hold vram page"); mutex_unlock(&prange->lock); spin_lock(&svm_bo->list_lock); @@ -878,14 +882,29 @@ static void svm_range_debug_dump(struct svm_range_list *svms) static void * svm_range_copy_array(void *psrc, size_t size, uint64_t num_elements, - uint64_t offset) + uint64_t offset, uint64_t *vram_pages) { + unsigned char *src = (unsigned char *)psrc + offset; unsigned char *dst; + uint64_t i; dst = kvmalloc_array(num_elements, size, GFP_KERNEL); if (!dst) return NULL; - memcpy(dst, (unsigned char *)psrc + offset, num_elements * size); + + if (!vram_pages) { + memcpy(dst, src, num_elements * size); + return (void *)dst; + } + + *vram_pages = 0; + for (i = 0; i < num_elements; i++) { + dma_addr_t *temp; + temp = (dma_addr_t *)dst + i; + *temp = *((dma_addr_t *)src + i); + if (*temp&SVM_RANGE_VRAM_DOMAIN) + (*vram_pages)++; + } return (void *)dst; } @@ -899,7 +918,7 @@ svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src) if (!src->dma_addr[i]) continue; dst->dma_addr[i] = svm_range_copy_array(src->dma_addr[i], - sizeof(*src->dma_addr[i]), src->npages, 0); + sizeof(*src->dma_addr[i]), src->npages, 0, NULL); if (!dst->dma_addr[i]) return -ENOMEM; } @@ -910,7 +929,7 @@ svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src) static int svm_range_split_array(void *ppnew, void *ppold, size_t size, uint64_t old_start, uint64_t old_n, - uint64_t new_start, uint64_t new_n) + uint64_t new_start, uint64_t new_n, uint64_t *new_vram_pages) { unsigned char *new, *old, *pold; uint64_t d; @@ -922,11 +941,12 @@ svm_range_split_array(void *ppnew, void *ppold, size_t size, return 0; d = (new_start - old_start) * size; - new = svm_range_copy_array(pold, size, new_n, d); + /* get dma addr array for new range and calculte its vram page number */ + new = svm_range_copy_array(pold, size, new_n, d, new_vram_pages); if (!new) return -ENOMEM; d = (new_start == old_start) ? new_n * size : 0; - old = svm_range_copy_array(pold, size, old_n, d); + old = svm_range_copy_array(pold, size, old_n, d, NULL); if (!old) { kvfree(new); return -ENOMEM; @@ -948,10 +968,13 @@ svm_range_split_pages(struct svm_range *new, struct svm_range *old, for (i = 0; i < MAX_GPU_INSTANCE; i++) { r = svm_range_split_array(&new->dma_addr[i], &old->dma_addr[i], sizeof(*old->dma_addr[i]), old->start, - npages, new->start, new->npages); + npages, new->start, new->npages, + old->actual_loc ? &new->vram_pages : NULL); if (r) return r; } + if (old->actual_loc) + old->vram_pages -= new->vram_pages; return 0; } @@ -1097,7 +1120,7 @@ static int svm_range_split_tail(struct svm_range *prange, uint64_t new_last, struct list_head *insert_list, struct list_head *remap_list) { - struct svm_range *tail; + struct svm_range *tail = NULL; int r = svm_range_split(prange, prange->start, new_last, &tail); if (!r) { @@ -1112,7 +1135,7 @@ static int svm_range_split_head(struct svm_range *prange, uint64_t new_start, struct list_head *insert_list, struct list_head *remap_list) { - struct svm_range *head; + struct svm_range *head = NULL; int r = svm_range_split(prange, new_start, prange->last, &head); if (!r) { @@ -1135,66 +1158,6 @@ svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, list_add_tail(&pchild->child_list, &prange->child_list); } -/** - * svm_range_split_by_granularity - collect ranges within granularity boundary - * - * @p: the process with svms list - * @mm: mm structure - * @addr: the vm fault address in pages, to split the prange - * @parent: parent range if prange is from child list - * @prange: prange to split - * - * Trims @prange to be a single aligned block of prange->granularity if - * possible. The head and tail are added to the child_list in @parent. - * - * Context: caller must hold mmap_read_lock and prange->lock - * - * Return: - * 0 - OK, otherwise error code - */ -int -svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, - unsigned long addr, struct svm_range *parent, - struct svm_range *prange) -{ - struct svm_range *head, *tail; - unsigned long start, last, size; - int r; - - /* Align splited range start and size to granularity size, then a single - * PTE will be used for whole range, this reduces the number of PTE - * updated and the L1 TLB space used for translation. - */ - size = 1UL << prange->granularity; - start = ALIGN_DOWN(addr, size); - last = ALIGN(addr + 1, size) - 1; - - pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n", - prange->svms, prange->start, prange->last, start, last, size); - - if (start > prange->start) { - r = svm_range_split(prange, start, prange->last, &head); - if (r) - return r; - svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE); - } - - if (last < prange->last) { - r = svm_range_split(prange, prange->start, last, &tail); - if (r) - return r; - svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE); - } - - /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ - if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) { - prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP; - pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n", - prange, prange->start, prange->last, - SVM_OP_ADD_RANGE_AND_MAP); - } - return 0; -} static bool svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b) { @@ -1529,7 +1492,7 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx, bool intr) uint32_t gpuidx; int r; - drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0); + drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0, 0); drm_exec_until_all_locked(&ctx->exec) { for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) { pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx); @@ -1614,6 +1577,7 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx) * 5. Release page table (and SVM BO) reservation */ static int svm_range_validate_and_map(struct mm_struct *mm, + unsigned long map_start, unsigned long map_last, struct svm_range *prange, int32_t gpuidx, bool intr, bool wait, bool flush_tlb) { @@ -1694,10 +1658,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm, } } - start = prange->start << PAGE_SHIFT; - end = (prange->last + 1) << PAGE_SHIFT; + start = map_start << PAGE_SHIFT; + end = (map_last + 1) << PAGE_SHIFT; for (addr = start; !r && addr < end; ) { struct hmm_range *hmm_range; + unsigned long map_start_vma; + unsigned long map_last_vma; struct vm_area_struct *vma; unsigned long next = 0; unsigned long offset; @@ -1725,7 +1691,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm, } if (!r) { - offset = (addr - start) >> PAGE_SHIFT; + offset = (addr >> PAGE_SHIFT) - prange->start; r = svm_range_dma_map(prange, ctx->bitmap, offset, npages, hmm_range->hmm_pfns); if (r) @@ -1743,9 +1709,16 @@ static int svm_range_validate_and_map(struct mm_struct *mm, r = -EAGAIN; } - if (!r) - r = svm_range_map_to_gpus(prange, offset, npages, readonly, - ctx->bitmap, wait, flush_tlb); + if (!r) { + map_start_vma = max(map_start, prange->start + offset); + map_last_vma = min(map_last, prange->start + offset + npages - 1); + if (map_start_vma <= map_last_vma) { + offset = map_start_vma - prange->start; + npages = map_last_vma - map_start_vma + 1; + r = svm_range_map_to_gpus(prange, offset, npages, readonly, + ctx->bitmap, wait, flush_tlb); + } + } if (!r && next == end) prange->mapped_to_gpu = true; @@ -1838,8 +1811,8 @@ static void svm_range_restore_work(struct work_struct *work) */ mutex_lock(&prange->migrate_mutex); - r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, - false, true, false); + r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, + MAX_GPU_INSTANCE, false, true, false); if (r) pr_debug("failed %d to map 0x%lx to gpus\n", r, prange->start); @@ -1876,7 +1849,7 @@ out_reschedule: /* If validation failed, reschedule another attempt */ if (evicted_ranges) { pr_debug("reschedule to restore svm range\n"); - schedule_delayed_work(&svms->restore_work, + queue_delayed_work(system_freezable_wq, &svms->restore_work, msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); kfd_smi_event_queue_restore_rescheduled(mm); @@ -1952,7 +1925,7 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, pr_debug("failed to quiesce KFD\n"); pr_debug("schedule to restore svm %p ranges\n", svms); - schedule_delayed_work(&svms->restore_work, + queue_delayed_work(system_freezable_wq, &svms->restore_work, msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); } else { unsigned long s, l; @@ -2007,6 +1980,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old) new->actual_loc = old->actual_loc; new->granularity = old->granularity; new->mapped_to_gpu = old->mapped_to_gpu; + new->vram_pages = old->vram_pages; bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); @@ -2914,6 +2888,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint32_t vmid, uint32_t node_id, uint64_t addr, bool write_fault) { + unsigned long start, last, size; struct mm_struct *mm = NULL; struct svm_range_list *svms; struct svm_range *prange; @@ -3049,40 +3024,44 @@ retry_write_locked: kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr, write_fault, timestamp); - if (prange->actual_loc != best_loc) { + /* Align migration range start and size to granularity size */ + size = 1UL << prange->granularity; + start = max_t(unsigned long, ALIGN_DOWN(addr, size), prange->start); + last = min_t(unsigned long, ALIGN(addr + 1, size) - 1, prange->last); + if (prange->actual_loc != 0 || best_loc != 0) { migration = true; + if (best_loc) { - r = svm_migrate_to_vram(prange, best_loc, mm, - KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU); + r = svm_migrate_to_vram(prange, best_loc, start, last, + mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU); if (r) { pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n", r, addr); /* Fallback to system memory if migration to * VRAM failed */ - if (prange->actual_loc) - r = svm_migrate_vram_to_ram(prange, mm, - KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, - NULL); + if (prange->actual_loc && prange->actual_loc != best_loc) + r = svm_migrate_vram_to_ram(prange, mm, start, last, + KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL); else r = 0; } } else { - r = svm_migrate_vram_to_ram(prange, mm, - KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, - NULL); + r = svm_migrate_vram_to_ram(prange, mm, start, last, + KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL); } if (r) { pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n", - r, svms, prange->start, prange->last); + r, svms, start, last); goto out_unlock_range; } } - r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false); + r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false, + false, false); if (r) pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", - r, svms, prange->start, prange->last); + r, svms, start, last); kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr, migration); @@ -3428,18 +3407,24 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, *migrated = false; best_loc = svm_range_best_prefetch_location(prange); - if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED || - best_loc == prange->actual_loc) + /* when best_loc is a gpu node and same as prange->actual_loc + * we still need do migration as prange->actual_loc !=0 does + * not mean all pages in prange are vram. hmm migrate will pick + * up right pages during migration. + */ + if ((best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED) || + (best_loc == 0 && prange->actual_loc == 0)) return 0; if (!best_loc) { - r = svm_migrate_vram_to_ram(prange, mm, + r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last, KFD_MIGRATE_TRIGGER_PREFETCH, NULL); *migrated = !r; return r; } - r = svm_migrate_to_vram(prange, best_loc, mm, KFD_MIGRATE_TRIGGER_PREFETCH); + r = svm_migrate_to_vram(prange, best_loc, prange->start, prange->last, + mm, KFD_MIGRATE_TRIGGER_PREFETCH); *migrated = !r; return r; @@ -3494,7 +3479,11 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) mutex_lock(&prange->migrate_mutex); do { + /* migrate all vram pages in this prange to sys ram + * after that prange->actual_loc should be zero + */ r = svm_migrate_vram_to_ram(prange, mm, + prange->start, prange->last, KFD_MIGRATE_TRIGGER_TTM_EVICTION, NULL); } while (!r && prange->actual_loc && --retries); @@ -3618,8 +3607,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu; - r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, - true, true, flush_tlb); + r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, + MAX_GPU_INSTANCE, true, true, flush_tlb); if (r) pr_debug("failed %d to map svm range\n", r); @@ -3633,8 +3622,8 @@ out_unlock_range: pr_debug("Remapping prange 0x%p [0x%lx 0x%lx]\n", prange, prange->start, prange->last); mutex_lock(&prange->migrate_mutex); - r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, - true, true, prange->mapped_to_gpu); + r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, + MAX_GPU_INSTANCE, true, true, prange->mapped_to_gpu); if (r) pr_debug("failed %d on remap svm range\n", r); mutex_unlock(&prange->migrate_mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index c528df1d0ba2..026863a0abcd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -78,6 +78,7 @@ struct svm_work_list_item { * @update_list:link list node used to add to update_list * @mapping: bo_va mapping structure to create and update GPU page table * @npages: number of pages + * @vram_pages: vram pages number in this svm_range * @dma_addr: dma mapping address on each GPU for system memory physical page * @ttm_res: vram ttm resource map * @offset: range start offset within mm_nodes @@ -88,7 +89,9 @@ struct svm_work_list_item { * @flags: flags defined as KFD_IOCTL_SVM_FLAG_* * @perferred_loc: perferred location, 0 for CPU, or GPU id * @perfetch_loc: last prefetch location, 0 for CPU, or GPU id - * @actual_loc: the actual location, 0 for CPU, or GPU id + * @actual_loc: this svm_range location. 0: all pages are from sys ram; + * GPU id: this svm_range may include vram pages from GPU with + * id actual_loc. * @granularity:migration granularity, log2 num pages * @invalid: not 0 means cpu page table is invalidated * @validate_timestamp: system timestamp when range is validated @@ -112,6 +115,7 @@ struct svm_range { struct list_head list; struct list_head update_list; uint64_t npages; + uint64_t vram_pages; dma_addr_t *dma_addr[MAX_GPU_INSTANCE]; struct ttm_resource *ttm_res; uint64_t offset; @@ -168,9 +172,6 @@ struct kfd_node *svm_range_get_node_by_id(struct svm_range *prange, int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, bool clear); void svm_range_vram_node_free(struct svm_range *prange); -int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, - unsigned long addr, struct svm_range *parent, - struct svm_range *prange); int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint32_t vmid, uint32_t node_id, uint64_t addr, bool write_fault); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 057284bf50bb..e5f7c92eebcb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1342,10 +1342,11 @@ static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int g num_cpu++; } + if (list_empty(&kdev->io_link_props)) + return -ENODATA; + gpu_link = list_first_entry(&kdev->io_link_props, - struct kfd_iolink_properties, list); - if (!gpu_link) - return -ENOMEM; + struct kfd_iolink_properties, list); for (i = 0; i < num_cpu; i++) { /* CPU <--> GPU */ @@ -1423,15 +1424,17 @@ static int kfd_add_peer_prop(struct kfd_topology_device *kdev, peer->gpu->adev)) return ret; + if (list_empty(&kdev->io_link_props)) + return -ENODATA; + iolink1 = list_first_entry(&kdev->io_link_props, - struct kfd_iolink_properties, list); - if (!iolink1) - return -ENOMEM; + struct kfd_iolink_properties, list); + + if (list_empty(&peer->io_link_props)) + return -ENODATA; iolink2 = list_first_entry(&peer->io_link_props, - struct kfd_iolink_properties, list); - if (!iolink2) - return -ENOMEM; + struct kfd_iolink_properties, list); props = kfd_alloc_struct(props); if (!props) @@ -1449,17 +1452,19 @@ static int kfd_add_peer_prop(struct kfd_topology_device *kdev, /* CPU->CPU link*/ cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to); if (cpu_dev) { - list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) - if (iolink3->node_to == iolink2->node_to) - break; - - props->weight += iolink3->weight; - props->min_latency += iolink3->min_latency; - props->max_latency += iolink3->max_latency; - props->min_bandwidth = min(props->min_bandwidth, - iolink3->min_bandwidth); - props->max_bandwidth = min(props->max_bandwidth, - iolink3->max_bandwidth); + list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) { + if (iolink3->node_to != iolink2->node_to) + continue; + + props->weight += iolink3->weight; + props->min_latency += iolink3->min_latency; + props->max_latency += iolink3->max_latency; + props->min_bandwidth = min(props->min_bandwidth, + iolink3->min_bandwidth); + props->max_bandwidth = min(props->max_bandwidth, + iolink3->max_bandwidth); + break; + } } else { WARN(1, "CPU node not found"); } diff --git a/drivers/gpu/drm/amd/display/Makefile b/drivers/gpu/drm/amd/display/Makefile index af17ab8027df..92a5c5efcf92 100644 --- a/drivers/gpu/drm/amd/display/Makefile +++ b/drivers/gpu/drm/amd/display/Makefile @@ -30,6 +30,9 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/inc/ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/inc/hw subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/clk_mgr subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/hwss +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/resource +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dsc +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/optc subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/inc subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/freesync subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/color diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile index 8bf94920d23e..ab2a97e354da 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile @@ -25,22 +25,25 @@ +ifneq ($(CONFIG_DRM_AMD_DC),) AMDGPUDM = \ amdgpu_dm.o \ amdgpu_dm_plane.o \ amdgpu_dm_crtc.o \ amdgpu_dm_irq.o \ amdgpu_dm_mst_types.o \ - amdgpu_dm_color.o + amdgpu_dm_color.o \ + amdgpu_dm_services.o \ + amdgpu_dm_helpers.o \ + amdgpu_dm_pp_smu.o \ + amdgpu_dm_psr.o \ + amdgpu_dm_replay.o \ + amdgpu_dm_wb.o ifdef CONFIG_DRM_AMD_DC_FP AMDGPUDM += dc_fpu.o endif -ifneq ($(CONFIG_DRM_AMD_DC),) -AMDGPUDM += amdgpu_dm_services.o amdgpu_dm_helpers.o amdgpu_dm_pp_smu.o amdgpu_dm_psr.o amdgpu_dm_replay.o -endif - AMDGPUDM += amdgpu_dm_hdcp.o ifneq ($(CONFIG_DEBUG_FS),) @@ -52,3 +55,4 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc AMDGPU_DM = $(addprefix $(AMDDALPATH)/amdgpu_dm/,$(AMDGPUDM)) AMD_DISPLAY_FILES += $(AMDGPU_DM) +endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 4e82ee4d74ac..f6575d7dee97 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -37,6 +37,7 @@ #include "dc/dc_dmub_srv.h" #include "dc/dc_edid_parser.h" #include "dc/dc_stat.h" +#include "dc/dc_state.h" #include "amdgpu_dm_trace.h" #include "dpcd_defs.h" #include "link/protocols/link_dpcd.h" @@ -54,6 +55,7 @@ #include "amdgpu_dm_crtc.h" #include "amdgpu_dm_hdcp.h" #include <drm/display/drm_hdcp_helper.h> +#include "amdgpu_dm_wb.h" #include "amdgpu_pm.h" #include "amdgpu_atombios.h" @@ -65,7 +67,6 @@ #include "amdgpu_dm_debugfs.h" #endif #include "amdgpu_dm_psr.h" -#include "amdgpu_dm_replay.h" #include "ivsrcid/ivsrcid_vislands30.h" @@ -85,12 +86,13 @@ #include <drm/drm_atomic_uapi.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_blend.h> +#include <drm/drm_fixed.h> #include <drm/drm_fourcc.h> #include <drm/drm_edid.h> +#include <drm/drm_eld.h> #include <drm/drm_vblank.h> #include <drm/drm_audio_component.h> #include <drm/drm_gem_atomic_helper.h> -#include <drm/drm_plane_helper.h> #include <acpi/video.h> @@ -575,6 +577,7 @@ static void dm_crtc_high_irq(void *interrupt_params) { struct common_irq_params *irq_params = interrupt_params; struct amdgpu_device *adev = irq_params->adev; + struct drm_writeback_job *job; struct amdgpu_crtc *acrtc; unsigned long flags; int vrr_active; @@ -583,6 +586,33 @@ static void dm_crtc_high_irq(void *interrupt_params) if (!acrtc) return; + if (acrtc->wb_pending) { + if (acrtc->wb_conn) { + spin_lock_irqsave(&acrtc->wb_conn->job_lock, flags); + job = list_first_entry_or_null(&acrtc->wb_conn->job_queue, + struct drm_writeback_job, + list_entry); + spin_unlock_irqrestore(&acrtc->wb_conn->job_lock, flags); + + if (job) { + unsigned int v_total, refresh_hz; + struct dc_stream_state *stream = acrtc->dm_irq_params.stream; + + v_total = stream->adjust.v_total_max ? + stream->adjust.v_total_max : stream->timing.v_total; + refresh_hz = div_u64((uint64_t) stream->timing.pix_clk_100hz * + 100LL, (v_total * stream->timing.h_total)); + mdelay(1000 / refresh_hz); + + drm_writeback_signal_completion(acrtc->wb_conn, 0); + dc_stream_fc_disable_writeback(adev->dm.dc, + acrtc->dm_irq_params.stream, 0); + } + } else + DRM_ERROR("%s: no amdgpu_crtc wb_conn\n", __func__); + acrtc->wb_pending = false; + } + vrr_active = amdgpu_dm_crtc_vrr_active_irq(acrtc); drm_dbg_vbl(adev_to_drm(adev), @@ -725,6 +755,10 @@ static void dmub_hpd_callback(struct amdgpu_device *adev, drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { + + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); if (link && aconnector->dc_link == link) { if (notify->type == DMUB_NOTIFICATION_HPD) @@ -894,8 +928,7 @@ static int dm_early_init(void *handle); /* Allocate memory for FBC compressed data */ static void amdgpu_dm_fbc_init(struct drm_connector *connector) { - struct drm_device *dev = connector->dev; - struct amdgpu_device *adev = drm_to_adev(dev); + struct amdgpu_device *adev = drm_to_adev(connector->dev); struct dm_compressor_info *compressor = &adev->dm.compressor; struct amdgpu_dm_connector *aconn = to_amdgpu_dm_connector(connector); struct drm_display_mode *mode; @@ -949,6 +982,10 @@ static int amdgpu_dm_audio_component_get_eld(struct device *kdev, int port, drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { + + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); if (aconnector->audio_inst != port) continue; @@ -989,8 +1026,7 @@ static int amdgpu_dm_audio_component_bind(struct device *kdev, static void amdgpu_dm_audio_component_unbind(struct device *kdev, struct device *hda_kdev, void *data) { - struct drm_device *dev = dev_get_drvdata(kdev); - struct amdgpu_device *adev = drm_to_adev(dev); + struct amdgpu_device *adev = drm_to_adev(dev_get_drvdata(kdev)); struct drm_audio_component *acomp = data; acomp->ops = NULL; @@ -1258,7 +1294,9 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_ /* AGP aperture is disabled */ if (agp_bot > agp_top) { logical_addr_low = adev->gmc.fb_start >> 18; - if (adev->apu_flags & AMD_APU_IS_RAVEN2) + if (adev->apu_flags & (AMD_APU_IS_RAVEN2 | + AMD_APU_IS_RENOIR | + AMD_APU_IS_GREEN_SARDINE)) /* * Raven2 has a HW issue that it is unable to use the vram which * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the @@ -1270,7 +1308,9 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_ logical_addr_high = adev->gmc.fb_end >> 18; } else { logical_addr_low = min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18; - if (adev->apu_flags & AMD_APU_IS_RAVEN2) + if (adev->apu_flags & (AMD_APU_IS_RAVEN2 | + AMD_APU_IS_RENOIR | + AMD_APU_IS_GREEN_SARDINE)) /* * Raven2 has a HW issue that it is unable to use the vram which * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the @@ -1675,6 +1715,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.nbio_reg_offsets = adev->reg_offset[NBIO_HWIP][0]; init_data.clk_reg_offsets = adev->reg_offset[CLK_HWIP][0]; + init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL; + + /* Enable DWB for tested platforms only */ + if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) + init_data.num_virtual_links = 1; + INIT_LIST_HEAD(&adev->dm.da_list); retrieve_dmi_info(&adev->dm); @@ -1717,23 +1763,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) /* TODO: Remove after DP2 receiver gets proper support of Cable ID feature */ adev->dm.dc->debug.ignore_cable_id = true; - /* TODO: There is a new drm mst change where the freedom of - * vc_next_start_slot update is revoked/moved into drm, instead of in - * driver. This forces us to make sure to get vc_next_start_slot updated - * in drm function each time without considering if mst_state is active - * or not. Otherwise, next time hotplug will give wrong start_slot - * number. We are implementing a temporary solution to even notify drm - * mst deallocation when link is no longer of MST type when uncommitting - * the stream so we will have more time to work on a proper solution. - * Ideally when dm_helpers_dp_mst_stop_top_mgr message is triggered, we - * should notify drm to do a complete "reset" of its states and stop - * calling further drm mst functions when link is no longer of an MST - * type. This could happen when we unplug an MST hubs/displays. When - * uncommit stream comes later after unplug, we should just reset - * hardware states only. - */ - adev->dm.dc->debug.temp_mst_deallocation_sequence = true; - if (adev->dm.dc->caps.dp_hdmi21_pcon_support) DRM_INFO("DP-HDMI FRL PCON supported\n"); @@ -2269,6 +2298,10 @@ static int detect_mst_link_for_all_connectors(struct drm_device *dev) drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { + + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); if (aconnector->dc_link->type == dc_connection_mst_branch && aconnector->mst_mgr.aux) { @@ -2397,6 +2430,10 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend) drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { + + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); if (aconnector->dc_link->type != dc_connection_mst_branch || aconnector->mst_root) @@ -2576,12 +2613,10 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc) memset(del_streams, 0, sizeof(del_streams)); - context = dc_create_state(dc); + context = dc_state_create_current_copy(dc); if (context == NULL) goto context_alloc_fail; - dc_resource_state_copy_construct_current(dc, context); - /* First remove from context all streams */ for (i = 0; i < context->stream_count; i++) { struct dc_stream_state *stream = context->streams[i]; @@ -2591,12 +2626,12 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc) /* Remove all planes for removed streams and then remove the streams */ for (i = 0; i < del_streams_count; i++) { - if (!dc_rem_all_planes_for_stream(dc, del_streams[i], context)) { + if (!dc_state_rem_all_planes_for_stream(dc, del_streams[i], context)) { res = DC_FAIL_DETACH_SURFACES; goto fail; } - res = dc_remove_stream_from_ctx(dc, context, del_streams[i]); + res = dc_state_remove_stream(dc, context, del_streams[i]); if (res != DC_OK) goto fail; } @@ -2604,7 +2639,7 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc) res = dc_commit_streams(dc, context->streams, context->stream_count); fail: - dc_release_state(context); + dc_state_release(context); context_alloc_fail: return res; @@ -2631,7 +2666,7 @@ static int dm_suspend(void *handle) dc_allow_idle_optimizations(adev->dm.dc, false); - dm->cached_dc_state = dc_copy_state(dm->dc->current_state); + dm->cached_dc_state = dc_state_create_copy(dm->dc->current_state); dm_gpureset_toggle_interrupts(adev, dm->cached_dc_state, false); @@ -2656,11 +2691,12 @@ static int dm_suspend(void *handle) hpd_rx_irq_work_suspend(dm); dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3); + dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3); return 0; } -struct amdgpu_dm_connector * +struct drm_connector * amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state, struct drm_crtc *crtc) { @@ -2673,7 +2709,7 @@ amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state, crtc_from_state = new_con_state->crtc; if (crtc_from_state == crtc) - return to_amdgpu_dm_connector(connector); + return connector; } return NULL; @@ -2824,7 +2860,7 @@ static int dm_resume(void *handle) bool need_hotplug = false; if (dm->dc->caps.ips_support) { - dc_dmub_srv_exit_low_power_state(dm->dc); + dc_dmub_srv_apply_idle_power_optimizations(dm->dc, false); } if (amdgpu_in_reset(adev)) { @@ -2851,6 +2887,7 @@ static int dm_resume(void *handle) if (r) DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r); + dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D0); dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); dc_resume(dm->dc); @@ -2876,7 +2913,7 @@ static int dm_resume(void *handle) dm_gpureset_toggle_interrupts(adev, dm->cached_dc_state, true); - dc_release_state(dm->cached_dc_state); + dc_state_release(dm->cached_dc_state); dm->cached_dc_state = NULL; amdgpu_dm_irq_resume_late(adev); @@ -2886,10 +2923,9 @@ static int dm_resume(void *handle) return 0; } /* Recreate dc_state - DC invalidates it when setting power state to S3. */ - dc_release_state(dm_state->context); - dm_state->context = dc_create_state(dm->dc); + dc_state_release(dm_state->context); + dm_state->context = dc_state_create(dm->dc); /* TODO: Remove dc_state->dccg, use dc->dccg directly. */ - dc_resource_state_construct(dm->dc, dm_state->context); /* Before powering on DC we need to re-initialize DMUB. */ dm_dmub_hw_resume(adev); @@ -2901,6 +2937,7 @@ static int dm_resume(void *handle) } /* power on hardware */ + dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D0); dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); /* program HPD filter */ @@ -2918,6 +2955,10 @@ static int dm_resume(void *handle) /* Do detection*/ drm_connector_list_iter_begin(ddev, &iter); drm_for_each_connector_iter(connector, &iter) { + + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); if (!aconnector->dc_link) @@ -3491,6 +3532,9 @@ static void register_hpd_handlers(struct amdgpu_device *adev) list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); dc_link = aconnector->dc_link; @@ -3957,7 +4001,7 @@ dm_atomic_duplicate_state(struct drm_private_obj *obj) old_state = to_dm_atomic_state(obj->state); if (old_state && old_state->context) - new_state->context = dc_copy_state(old_state->context); + new_state->context = dc_state_create_copy(old_state->context); if (!new_state->context) { kfree(new_state); @@ -3973,7 +4017,7 @@ static void dm_atomic_destroy_state(struct drm_private_obj *obj, struct dm_atomic_state *dm_state = to_dm_atomic_state(state); if (dm_state && dm_state->context) - dc_release_state(dm_state->context); + dc_state_release(dm_state->context); kfree(dm_state); } @@ -4009,14 +4053,12 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) if (!state) return -ENOMEM; - state->context = dc_create_state(adev->dm.dc); + state->context = dc_state_create_current_copy(adev->dm.dc); if (!state->context) { kfree(state); return -ENOMEM; } - dc_resource_state_copy_construct_current(adev->dm.dc, state->context); - drm_atomic_private_obj_init(adev_to_drm(adev), &adev->dm.atomic_obj, &state->base, @@ -4024,14 +4066,19 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) r = amdgpu_display_modeset_create_props(adev); if (r) { - dc_release_state(state->context); + dc_state_release(state->context); kfree(state); return r; } +#ifdef AMD_PRIVATE_COLOR + if (amdgpu_dm_create_color_properties(adev)) + return -ENOMEM; +#endif + r = amdgpu_dm_audio_init(adev); if (r) { - dc_release_state(state->context); + dc_state_release(state->context); kfree(state); return r; } @@ -4345,7 +4392,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) enum dc_connection_type new_connection_type = dc_connection_none; const struct dc_plane_cap *plane; bool psr_feature_enabled = false; - bool replay_feature_enabled = false; int max_overlay = dm->dc->caps.max_slave_planes; dm->display_indexes_num = dm->dc->caps.max_streams; @@ -4457,20 +4503,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) } } - if (!(amdgpu_dc_debug_mask & DC_DISABLE_REPLAY)) { - switch (adev->ip_versions[DCE_HWIP][0]) { - case IP_VERSION(3, 1, 4): - case IP_VERSION(3, 1, 5): - case IP_VERSION(3, 1, 6): - case IP_VERSION(3, 2, 0): - case IP_VERSION(3, 2, 1): - replay_feature_enabled = true; - break; - default: - replay_feature_enabled = amdgpu_dc_feature_mask & DC_REPLAY_MASK; - break; - } - } /* loops over all connectors on the board */ for (i = 0; i < link_cnt; i++) { struct dc_link *link = NULL; @@ -4482,6 +4514,28 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) continue; } + link = dc_get_link_at_index(dm->dc, i); + + if (link->connector_signal == SIGNAL_TYPE_VIRTUAL) { + struct amdgpu_dm_wb_connector *wbcon = kzalloc(sizeof(*wbcon), GFP_KERNEL); + + if (!wbcon) { + DRM_ERROR("KMS: Failed to allocate writeback connector\n"); + continue; + } + + if (amdgpu_dm_wb_connector_init(dm, wbcon, i)) { + DRM_ERROR("KMS: Failed to initialize writeback connector\n"); + kfree(wbcon); + continue; + } + + link->psr_settings.psr_feature_enabled = false; + link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED; + + continue; + } + aconnector = kzalloc(sizeof(*aconnector), GFP_KERNEL); if (!aconnector) goto fail; @@ -4500,8 +4554,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) goto fail; } - link = dc_get_link_at_index(dm->dc, i); - if (!dc_link_detect_connection_type(link, &new_connection_type)) DRM_ERROR("KMS: Failed to detect connector\n"); @@ -4519,12 +4571,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) amdgpu_dm_update_connector_after_detect(aconnector); setup_backlight_device(dm, aconnector); - /* - * Disable psr if replay can be enabled - */ - if (replay_feature_enabled && amdgpu_dm_setup_replay(link, aconnector)) - psr_feature_enabled = false; - if (psr_feature_enabled) amdgpu_dm_set_psr_caps(link); @@ -5106,7 +5152,9 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, * Always set input transfer function, since plane state is refreshed * every time. */ - ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, dc_plane_state); + ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, + plane_state, + dc_plane_state); if (ret) return ret; @@ -5511,10 +5559,13 @@ static void fill_stream_properties_from_drm_display_mode( { struct dc_crtc_timing *timing_out = &stream->timing; const struct drm_display_info *info = &connector->display_info; - struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct amdgpu_dm_connector *aconnector = NULL; struct hdmi_vendor_infoframe hv_frame; struct hdmi_avi_infoframe avi_frame; + if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) + aconnector = to_amdgpu_dm_connector(connector); + memset(&hv_frame, 0, sizeof(hv_frame)); memset(&avi_frame, 0, sizeof(avi_frame)); @@ -5527,6 +5578,7 @@ static void fill_stream_properties_from_drm_display_mode( && stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420; else if (drm_mode_is_420_also(info, mode_in) + && aconnector && aconnector->force_yuv420_output) timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420; else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCBCR444) @@ -5562,7 +5614,7 @@ static void fill_stream_properties_from_drm_display_mode( timing_out->hdmi_vic = hv_frame.vic; } - if (is_freesync_video_mode(mode_in, aconnector)) { + if (aconnector && is_freesync_video_mode(mode_in, aconnector)) { timing_out->h_addressable = mode_in->hdisplay; timing_out->h_total = mode_in->htotal; timing_out->h_sync_width = mode_in->hsync_end - mode_in->hsync_start; @@ -5683,13 +5735,13 @@ decide_crtc_timing_for_drm_display_mode(struct drm_display_mode *drm_mode, } static struct dc_sink * -create_fake_sink(struct amdgpu_dm_connector *aconnector) +create_fake_sink(struct dc_link *link) { struct dc_sink_init_data sink_init_data = { 0 }; struct dc_sink *sink = NULL; - sink_init_data.link = aconnector->dc_link; - sink_init_data.sink_signal = aconnector->dc_link->connector_signal; + sink_init_data.link = link; + sink_init_data.sink_signal = link->connector_signal; sink = dc_sink_create(&sink_init_data); if (!sink) { @@ -6039,14 +6091,14 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, } static struct dc_stream_state * -create_stream_for_sink(struct amdgpu_dm_connector *aconnector, +create_stream_for_sink(struct drm_connector *connector, const struct drm_display_mode *drm_mode, const struct dm_connector_state *dm_state, const struct dc_stream_state *old_stream, int requested_bpc) { + struct amdgpu_dm_connector *aconnector = NULL; struct drm_display_mode *preferred_mode = NULL; - struct drm_connector *drm_connector; const struct drm_connector_state *con_state = &dm_state->base; struct dc_stream_state *stream = NULL; struct drm_display_mode mode; @@ -6060,22 +6112,35 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, enum color_transfer_func tf = TRANSFER_FUNC_UNKNOWN; struct dsc_dec_dpcd_caps dsc_caps; + struct dc_link *link = NULL; struct dc_sink *sink = NULL; drm_mode_init(&mode, drm_mode); memset(&saved_mode, 0, sizeof(saved_mode)); - if (aconnector == NULL) { - DRM_ERROR("aconnector is NULL!\n"); + if (connector == NULL) { + DRM_ERROR("connector is NULL!\n"); return stream; } - drm_connector = &aconnector->base; + if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) { + aconnector = NULL; + aconnector = to_amdgpu_dm_connector(connector); + link = aconnector->dc_link; + } else { + struct drm_writeback_connector *wbcon = NULL; + struct amdgpu_dm_wb_connector *dm_wbcon = NULL; + + wbcon = drm_connector_to_writeback(connector); + dm_wbcon = to_amdgpu_dm_wb_connector(wbcon); + link = dm_wbcon->link; + } - if (!aconnector->dc_sink) { - sink = create_fake_sink(aconnector); + if (!aconnector || !aconnector->dc_sink) { + sink = create_fake_sink(link); if (!sink) return stream; + } else { sink = aconnector->dc_sink; dc_sink_retain(sink); @@ -6088,12 +6153,13 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, goto finish; } + /* We leave this NULL for writeback connectors */ stream->dm_stream_context = aconnector; stream->timing.flags.LTE_340MCSC_SCRAMBLE = - drm_connector->display_info.hdmi.scdc.scrambling.low_rates; + connector->display_info.hdmi.scdc.scrambling.low_rates; - list_for_each_entry(preferred_mode, &aconnector->base.modes, head) { + list_for_each_entry(preferred_mode, &connector->modes, head) { /* Search for preferred mode */ if (preferred_mode->type & DRM_MODE_TYPE_PREFERRED) { native_mode_found = true; @@ -6102,7 +6168,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, } if (!native_mode_found) preferred_mode = list_first_entry_or_null( - &aconnector->base.modes, + &connector->modes, struct drm_display_mode, head); @@ -6116,7 +6182,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, * and the modelist may not be filled in time. */ DRM_DEBUG_DRIVER("No preferred mode found\n"); - } else { + } else if (aconnector) { recalculate_timing = is_freesync_video_mode(&mode, aconnector); if (recalculate_timing) { freesync_mode = get_highest_refresh_rate_mode(aconnector, false); @@ -6139,13 +6205,17 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, */ if (!scale || mode_refresh != preferred_refresh) fill_stream_properties_from_drm_display_mode( - stream, &mode, &aconnector->base, con_state, NULL, + stream, &mode, connector, con_state, NULL, requested_bpc); else fill_stream_properties_from_drm_display_mode( - stream, &mode, &aconnector->base, con_state, old_stream, + stream, &mode, connector, con_state, old_stream, requested_bpc); + /* The rest isn't needed for writeback connectors */ + if (!aconnector) + goto finish; + if (aconnector->timing_changed) { drm_dbg(aconnector->base.dev, "overriding timing for automated test, bpc %d, changing to %d\n", @@ -6163,7 +6233,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, fill_audio_info( &stream->audio_info, - drm_connector, + connector, sink); update_stream_signal(stream, sink); @@ -6570,7 +6640,7 @@ static enum dc_status dm_validate_stream_and_context(struct dc *dc, if (!dc_plane_state) goto cleanup; - dc_state = dc_create_state(dc); + dc_state = dc_state_create(dc); if (!dc_state) goto cleanup; @@ -6597,9 +6667,9 @@ static enum dc_status dm_validate_stream_and_context(struct dc *dc, dc_result = dc_validate_plane(dc, dc_plane_state); if (dc_result == DC_OK) - dc_result = dc_add_stream_to_ctx(dc, dc_state, stream); + dc_result = dc_state_add_stream(dc, dc_state, stream); - if (dc_result == DC_OK && !dc_add_plane_to_context( + if (dc_result == DC_OK && !dc_state_add_plane( dc, stream, dc_plane_state, @@ -6611,7 +6681,7 @@ static enum dc_status dm_validate_stream_and_context(struct dc *dc, cleanup: if (dc_state) - dc_release_state(dc_state); + dc_state_release(dc_state); if (dc_plane_state) dc_plane_state_release(dc_plane_state); @@ -6633,7 +6703,7 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector, enum dc_status dc_result = DC_OK; do { - stream = create_stream_for_sink(aconnector, drm_mode, + stream = create_stream_for_sink(connector, drm_mode, dm_state, old_stream, requested_bpc); if (stream == NULL) { @@ -6641,6 +6711,9 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector, break; } + if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + return stream; + dc_result = dc_validate_stream(adev->dm.dc, stream); if (dc_result == DC_OK && stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) dc_result = dm_dp_mst_is_port_support_mode(aconnector, stream); @@ -6916,7 +6989,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, if (IS_ERR(mst_state)) return PTR_ERR(mst_state); - mst_state->pbn_div = dm_mst_get_pbn_divider(aconnector->mst_root->dc_link); + mst_state->pbn_div.full = dfixed_const(dm_mst_get_pbn_divider(aconnector->mst_root->dc_link)); if (!state->duplicated) { int max_bpc = conn_state->max_requested_bpc; @@ -6928,7 +7001,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, max_bpc); bpp = convert_dc_color_depth_into_bpc(color_depth) * 3; clock = adjusted_mode->clock; - dm_new_connector_state->pbn = drm_dp_calc_pbn_mode(clock, bpp, false); + dm_new_connector_state->pbn = drm_dp_calc_pbn_mode(clock, bpp << 4); } dm_new_connector_state->vcpi_slots = @@ -6960,6 +7033,9 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, for_each_new_connector_in_state(state, connector, new_con_state, i) { + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); if (!aconnector->mst_output_port) @@ -7565,6 +7641,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, struct dc_link *link = dc_get_link_at_index(dc, link_index); struct amdgpu_i2c_adapter *i2c; + /* Not needed for writeback connector */ link->priv = aconnector; @@ -8175,6 +8252,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->surface_updates[planes_count].gamma = dc_plane->gamma_correction; bundle->surface_updates[planes_count].in_transfer_func = dc_plane->in_transfer_func; bundle->surface_updates[planes_count].gamut_remap_matrix = &dc_plane->gamut_remap_matrix; + bundle->surface_updates[planes_count].hdr_mult = dc_plane->hdr_mult; + bundle->surface_updates[planes_count].func_shaper = dc_plane->in_shaper_func; + bundle->surface_updates[planes_count].lut3d_func = dc_plane->lut3d_func; + bundle->surface_updates[planes_count].blend_tf = dc_plane->blend_tf; } amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state, @@ -8386,6 +8467,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, &acrtc_state->stream->csc_color_matrix; bundle->stream_update.out_transfer_func = acrtc_state->stream->out_transfer_func; + bundle->stream_update.lut3d_func = + (struct dc_3dlut *) acrtc_state->stream->lut3d_func; + bundle->stream_update.func_shaper = + (struct dc_transfer_func *) acrtc_state->stream->func_shaper; } acrtc_state->stream->abm_level = acrtc_state->abm_level; @@ -8519,6 +8604,9 @@ static void amdgpu_dm_commit_audio(struct drm_device *dev, if (!drm_atomic_crtc_needs_modeset(new_crtc_state)) continue; + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + notify: aconnector = to_amdgpu_dm_connector(connector); @@ -8552,6 +8640,9 @@ notify: if (!status) continue; + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); mutex_lock(&adev->dm.audio_lock); @@ -8577,6 +8668,12 @@ static void amdgpu_dm_crtc_copy_transient_flags(struct drm_crtc_state *crtc_stat stream_state->mode_changed = drm_atomic_crtc_needs_modeset(crtc_state); } +static void dm_clear_writeback(struct amdgpu_display_manager *dm, + struct dm_crtc_state *crtc_state) +{ + dc_stream_remove_writeback(dm->dc, crtc_state->stream, 0); +} + static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, struct dc_state *dc_state) { @@ -8586,9 +8683,38 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, struct drm_crtc *crtc; struct drm_crtc_state *old_crtc_state, *new_crtc_state; struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state; + struct drm_connector_state *old_con_state; + struct drm_connector *connector; bool mode_set_reset_required = false; u32 i; + /* Disable writeback */ + for_each_old_connector_in_state(state, connector, old_con_state, i) { + struct dm_connector_state *dm_old_con_state; + struct amdgpu_crtc *acrtc; + + if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) + continue; + + old_crtc_state = NULL; + + dm_old_con_state = to_dm_connector_state(old_con_state); + if (!dm_old_con_state->base.crtc) + continue; + + acrtc = to_amdgpu_crtc(dm_old_con_state->base.crtc); + if (acrtc) + old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base); + + if (!acrtc->wb_enabled) + continue; + + dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); + + dm_clear_writeback(dm, dm_old_crtc_state); + acrtc->wb_enabled = false; + } + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); @@ -8713,7 +8839,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, dc_stream_get_status(dm_new_crtc_state->stream); if (!status) - status = dc_stream_get_status_from_state(dc_state, + status = dc_state_get_stream_status(dc_state, dm_new_crtc_state->stream); if (!status) drm_err(dev, @@ -8725,6 +8851,105 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, } } +static void dm_set_writeback(struct amdgpu_display_manager *dm, + struct dm_crtc_state *crtc_state, + struct drm_connector *connector, + struct drm_connector_state *new_con_state) +{ + struct drm_writeback_connector *wb_conn = drm_connector_to_writeback(connector); + struct amdgpu_device *adev = dm->adev; + struct amdgpu_crtc *acrtc; + struct dc_writeback_info *wb_info; + struct pipe_ctx *pipe = NULL; + struct amdgpu_framebuffer *afb; + int i = 0; + + wb_info = kzalloc(sizeof(*wb_info), GFP_KERNEL); + if (!wb_info) { + DRM_ERROR("Failed to allocate wb_info\n"); + return; + } + + acrtc = to_amdgpu_crtc(wb_conn->encoder.crtc); + if (!acrtc) { + DRM_ERROR("no amdgpu_crtc found\n"); + kfree(wb_info); + return; + } + + afb = to_amdgpu_framebuffer(new_con_state->writeback_job->fb); + if (!afb) { + DRM_ERROR("No amdgpu_framebuffer found\n"); + kfree(wb_info); + return; + } + + for (i = 0; i < MAX_PIPES; i++) { + if (dm->dc->current_state->res_ctx.pipe_ctx[i].stream == crtc_state->stream) { + pipe = &dm->dc->current_state->res_ctx.pipe_ctx[i]; + break; + } + } + + /* fill in wb_info */ + wb_info->wb_enabled = true; + + wb_info->dwb_pipe_inst = 0; + wb_info->dwb_params.dwbscl_black_color = 0; + wb_info->dwb_params.hdr_mult = 0x1F000; + wb_info->dwb_params.csc_params.gamut_adjust_type = CM_GAMUT_ADJUST_TYPE_BYPASS; + wb_info->dwb_params.csc_params.gamut_coef_format = CM_GAMUT_REMAP_COEF_FORMAT_S2_13; + wb_info->dwb_params.output_depth = DWB_OUTPUT_PIXEL_DEPTH_10BPC; + wb_info->dwb_params.cnv_params.cnv_out_bpc = DWB_CNV_OUT_BPC_10BPC; + + /* width & height from crtc */ + wb_info->dwb_params.cnv_params.src_width = acrtc->base.mode.crtc_hdisplay; + wb_info->dwb_params.cnv_params.src_height = acrtc->base.mode.crtc_vdisplay; + wb_info->dwb_params.dest_width = acrtc->base.mode.crtc_hdisplay; + wb_info->dwb_params.dest_height = acrtc->base.mode.crtc_vdisplay; + + wb_info->dwb_params.cnv_params.crop_en = false; + wb_info->dwb_params.stereo_params.stereo_enabled = false; + + wb_info->dwb_params.cnv_params.out_max_pix_val = 0x3ff; // 10 bits + wb_info->dwb_params.cnv_params.out_min_pix_val = 0; + wb_info->dwb_params.cnv_params.fc_out_format = DWB_OUT_FORMAT_32BPP_ARGB; + wb_info->dwb_params.cnv_params.out_denorm_mode = DWB_OUT_DENORM_BYPASS; + + wb_info->dwb_params.out_format = dwb_scaler_mode_bypass444; + + wb_info->dwb_params.capture_rate = dwb_capture_rate_0; + + wb_info->dwb_params.scaler_taps.h_taps = 4; + wb_info->dwb_params.scaler_taps.v_taps = 4; + wb_info->dwb_params.scaler_taps.h_taps_c = 2; + wb_info->dwb_params.scaler_taps.v_taps_c = 2; + wb_info->dwb_params.subsample_position = DWB_INTERSTITIAL_SUBSAMPLING; + + wb_info->mcif_buf_params.luma_pitch = afb->base.pitches[0]; + wb_info->mcif_buf_params.chroma_pitch = afb->base.pitches[1]; + + for (i = 0; i < DWB_MCIF_BUF_COUNT; i++) { + wb_info->mcif_buf_params.luma_address[i] = afb->address; + wb_info->mcif_buf_params.chroma_address[i] = 0; + } + + wb_info->mcif_buf_params.p_vmid = 1; + if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) { + wb_info->mcif_warmup_params.start_address.quad_part = afb->address; + wb_info->mcif_warmup_params.region_size = + wb_info->mcif_buf_params.luma_pitch * wb_info->dwb_params.dest_height; + } + wb_info->mcif_warmup_params.p_vmid = 1; + wb_info->writeback_source_plane = pipe->plane_state; + + dc_stream_add_writeback(dm->dc, crtc_state->stream, wb_info); + + acrtc->wb_pending = true; + acrtc->wb_conn = wb_conn; + drm_writeback_queue_job(wb_conn, new_con_state); +} + /** * amdgpu_dm_atomic_commit_tail() - AMDgpu DM's commit tail implementation. * @state: The atomic state to commit @@ -8757,7 +8982,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) if (new_con_state->crtc && new_con_state->crtc->state->active && drm_atomic_crtc_needs_modeset(new_con_state->crtc->state)) { - dc_dmub_srv_exit_low_power_state(dm->dc); + dc_dmub_srv_apply_idle_power_optimizations(dm->dc, false); break; } } @@ -8775,7 +9000,12 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) { struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state); struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc); - struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct amdgpu_dm_connector *aconnector; + + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + + aconnector = to_amdgpu_dm_connector(connector); if (!adev->dm.hdcp_workqueue) continue; @@ -9052,6 +9282,31 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) amdgpu_dm_commit_planes(state, dev, dm, crtc, wait_for_vblank); } + /* Enable writeback */ + for_each_new_connector_in_state(state, connector, new_con_state, i) { + struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state); + struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc); + + if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) + continue; + + if (!new_con_state->writeback_job) + continue; + + new_crtc_state = NULL; + + if (acrtc) + new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base); + + if (acrtc->wb_enabled) + continue; + + dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); + + dm_set_writeback(dm, dm_new_crtc_state, connector, new_con_state); + acrtc->wb_enabled = true; + } + /* Update audio instances for each connector. */ amdgpu_dm_commit_audio(dev, state); @@ -9169,10 +9424,15 @@ out: void dm_restore_drm_connector_state(struct drm_device *dev, struct drm_connector *connector) { - struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct amdgpu_dm_connector *aconnector; struct amdgpu_crtc *disconnected_acrtc; struct dm_crtc_state *acrtc_state; + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + return; + + aconnector = to_amdgpu_dm_connector(connector); + if (!aconnector->dc_sink || !connector->state || !connector->encoder) return; @@ -9249,12 +9509,16 @@ static void get_freesync_config_for_crtc( struct dm_connector_state *new_con_state) { struct mod_freesync_config config = {0}; - struct amdgpu_dm_connector *aconnector = - to_amdgpu_dm_connector(new_con_state->base.connector); + struct amdgpu_dm_connector *aconnector; struct drm_display_mode *mode = &new_crtc_state->base.mode; int vrefresh = drm_mode_vrefresh(mode); bool fs_vid_mode = false; + if (new_con_state->base.connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + return; + + aconnector = to_amdgpu_dm_connector(new_con_state->base.connector); + new_crtc_state->vrr_supported = new_con_state->freesync_capable && vrefresh >= aconnector->min_vfreq && vrefresh <= aconnector->max_vfreq; @@ -9354,6 +9618,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, * update changed items */ struct amdgpu_crtc *acrtc = NULL; + struct drm_connector *connector = NULL; struct amdgpu_dm_connector *aconnector = NULL; struct drm_connector_state *drm_new_conn_state = NULL, *drm_old_conn_state = NULL; struct dm_connector_state *dm_new_conn_state = NULL, *dm_old_conn_state = NULL; @@ -9363,15 +9628,17 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); acrtc = to_amdgpu_crtc(crtc); - aconnector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc); + connector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc); + if (connector) + aconnector = to_amdgpu_dm_connector(connector); /* TODO This hack should go away */ - if (aconnector && enable) { + if (connector && enable) { /* Make sure fake sink is created in plug-in scenario */ drm_new_conn_state = drm_atomic_get_new_connector_state(state, - &aconnector->base); + connector); drm_old_conn_state = drm_atomic_get_old_connector_state(state, - &aconnector->base); + connector); if (IS_ERR(drm_new_conn_state)) { ret = PTR_ERR_OR_ZERO(drm_new_conn_state); @@ -9497,7 +9764,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, crtc->base.id); /* i.e. reset mode */ - if (dc_remove_stream_from_ctx( + if (dc_state_remove_stream( dm->dc, dm_state->context, dm_old_crtc_state->stream) != DC_OK) { @@ -9518,7 +9785,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, * added MST connectors not found in existing crtc_state in the chained mode * TODO: need to dig out the root cause of that */ - if (!aconnector) + if (!connector) goto skip_modeset; if (modereset_required(new_crtc_state)) @@ -9540,7 +9807,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, DRM_DEBUG_ATOMIC("Enabling DRM crtc: %d\n", crtc->base.id); - if (dc_add_stream_to_ctx( + if (dc_state_add_stream( dm->dc, dm_state->context, dm_new_crtc_state->stream) != DC_OK) { @@ -9561,7 +9828,7 @@ skip_modeset: * We want to do dc stream updates that do not require a * full modeset below. */ - if (!(enable && aconnector && new_crtc_state->active)) + if (!(enable && connector && new_crtc_state->active)) return 0; /* * Given above conditions, the dc state cannot be NULL because: @@ -9587,6 +9854,7 @@ skip_modeset: * when a modeset is needed, to ensure it gets reprogrammed. */ if (dm_new_crtc_state->base.color_mgmt_changed || + dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf || drm_atomic_crtc_needs_modeset(new_crtc_state)) { ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state); if (ret) @@ -9620,7 +9888,8 @@ static bool should_reset_plane(struct drm_atomic_state *state, * TODO: Remove this hack for all asics once it proves that the * fast updates works fine on DCN3.2+. */ - if (adev->ip_versions[DCE_HWIP][0] < IP_VERSION(3, 2, 0) && state->allow_modeset) + if (amdgpu_ip_version(adev, DCE_HWIP, 0) < IP_VERSION(3, 2, 0) && + state->allow_modeset) return true; /* Exit early if we know that we're adding or removing the plane. */ @@ -9654,6 +9923,10 @@ static bool should_reset_plane(struct drm_atomic_state *state, */ for_each_oldnew_plane_in_state(state, other, old_other_state, new_other_state, i) { struct amdgpu_framebuffer *old_afb, *new_afb; + struct dm_plane_state *dm_new_other_state, *dm_old_other_state; + + dm_new_other_state = to_dm_plane_state(new_other_state); + dm_old_other_state = to_dm_plane_state(old_other_state); if (other->type == DRM_PLANE_TYPE_CURSOR) continue; @@ -9690,6 +9963,18 @@ static bool should_reset_plane(struct drm_atomic_state *state, old_other_state->color_encoding != new_other_state->color_encoding) return true; + /* HDR/Transfer Function changes. */ + if (dm_old_other_state->degamma_tf != dm_new_other_state->degamma_tf || + dm_old_other_state->degamma_lut != dm_new_other_state->degamma_lut || + dm_old_other_state->hdr_mult != dm_new_other_state->hdr_mult || + dm_old_other_state->ctm != dm_new_other_state->ctm || + dm_old_other_state->shaper_lut != dm_new_other_state->shaper_lut || + dm_old_other_state->shaper_tf != dm_new_other_state->shaper_tf || + dm_old_other_state->lut3d != dm_new_other_state->lut3d || + dm_old_other_state->blend_lut != dm_new_other_state->blend_lut || + dm_old_other_state->blend_tf != dm_new_other_state->blend_tf) + return true; + /* Framebuffer checks fall at the end. */ if (!old_other_state->fb || !new_other_state->fb) continue; @@ -9844,7 +10129,7 @@ static int dm_update_plane_state(struct dc *dc, if (ret) return ret; - if (!dc_remove_plane_from_context( + if (!dc_state_remove_plane( dc, dm_old_crtc_state->stream, dm_old_plane_state->dc_state, @@ -9922,7 +10207,7 @@ static int dm_update_plane_state(struct dc *dc, * state. It'll be released when the atomic state is * cleaned. */ - if (!dc_add_plane_to_context( + if (!dc_state_add_plane( dc, dm_new_crtc_state->stream, dc_new_plane_state, @@ -10084,6 +10369,9 @@ static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm if (conn_state->crtc != crtc) continue; + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); if (!aconnector->mst_output_port || !aconnector->mst_root) aconnector = NULL; @@ -10465,7 +10753,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, DRM_DEBUG_DRIVER("drm_dp_mst_atomic_check() failed\n"); goto fail; } - status = dc_validate_global_state(dc, dm_state->context, true); + status = dc_validate_global_state(dc, dm_state->context, false); if (status != DC_OK) { DRM_DEBUG_DRIVER("DC global validation failure: %s (%d)", dc_status_to_str(status), status); @@ -10603,7 +10891,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm, input->cea_total_length = total_length; memcpy(input->payload, data, length); - res = dm_execute_dmub_cmd(dm->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY); + res = dc_wake_and_execute_dmub_cmd(dm->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY); if (!res) { DRM_ERROR("EDID CEA parser failed\n"); return false; @@ -10794,8 +11082,7 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, struct dm_connector_state *dm_con_state = NULL; struct dc_sink *sink; - struct drm_device *dev = connector->dev; - struct amdgpu_device *adev = drm_to_adev(dev); + struct amdgpu_device *adev = drm_to_adev(connector->dev); struct amdgpu_hdmi_vsdb_info vsdb_info = {0}; bool freesync_capable = false; enum adaptive_sync_type as_type = ADAPTIVE_SYNC_TYPE_NONE; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 3d480be802cb..9c1871b866cc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -32,6 +32,7 @@ #include <drm/drm_crtc.h> #include <drm/drm_plane.h> #include "link_service_types.h" +#include <drm/drm_writeback.h> /* * This file contains the definition for amdgpu_display_manager @@ -54,6 +55,9 @@ #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x00001A #define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40 #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_VERSION_3 0x3 + +#define AMDGPU_HDR_MULT_DEFAULT (0x100000000LL) + /* #include "include/amdgpu_dal_power_if.h" #include "amdgpu_dm_irq.h" @@ -714,11 +718,107 @@ static inline void amdgpu_dm_set_mst_status(uint8_t *status, #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base) +struct amdgpu_dm_wb_connector { + struct drm_writeback_connector base; + struct dc_link *link; +}; + +#define to_amdgpu_dm_wb_connector(x) container_of(x, struct amdgpu_dm_wb_connector, base) + extern const struct amdgpu_ip_block_version dm_ip_block; +/* enum amdgpu_transfer_function: pre-defined transfer function supported by AMD. + * + * It includes standardized transfer functions and pure power functions. The + * transfer function coefficients are available at modules/color/color_gamma.c + */ +enum amdgpu_transfer_function { + AMDGPU_TRANSFER_FUNCTION_DEFAULT, + AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF, + AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF, + AMDGPU_TRANSFER_FUNCTION_PQ_EOTF, + AMDGPU_TRANSFER_FUNCTION_IDENTITY, + AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF, + AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF, + AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF, + AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF, + AMDGPU_TRANSFER_FUNCTION_BT709_OETF, + AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF, + AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF, + AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF, + AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF, + AMDGPU_TRANSFER_FUNCTION_COUNT +}; + struct dm_plane_state { struct drm_plane_state base; struct dc_plane_state *dc_state; + + /* Plane color mgmt */ + /** + * @degamma_lut: + * + * 1D LUT for mapping framebuffer/plane pixel data before sampling or + * blending operations. It's usually applied to linearize input space. + * The blob (if not NULL) is an array of &struct drm_color_lut. + */ + struct drm_property_blob *degamma_lut; + /** + * @degamma_tf: + * + * Predefined transfer function to tell DC driver the input space to + * linearize. + */ + enum amdgpu_transfer_function degamma_tf; + /** + * @hdr_mult: + * + * Multiplier to 'gain' the plane. When PQ is decoded using the fixed + * func transfer function to the internal FP16 fb, 1.0 -> 80 nits (on + * AMD at least). When sRGB is decoded, 1.0 -> 1.0, obviously. + * Therefore, 1.0 multiplier = 80 nits for SDR content. So if you + * want, 203 nits for SDR content, pass in (203.0 / 80.0). Format is + * S31.32 sign-magnitude. + * + * HDR multiplier can wide range beyond [0.0, 1.0]. This means that PQ + * TF is needed for any subsequent linear-to-non-linear transforms. + */ + __u64 hdr_mult; + /** + * @ctm: + * + * Color transformation matrix. The blob (if not NULL) is a &struct + * drm_color_ctm_3x4. + */ + struct drm_property_blob *ctm; + /** + * @shaper_lut: shaper lookup table blob. The blob (if not NULL) is an + * array of &struct drm_color_lut. + */ + struct drm_property_blob *shaper_lut; + /** + * @shaper_tf: + * + * Predefined transfer function to delinearize color space. + */ + enum amdgpu_transfer_function shaper_tf; + /** + * @lut3d: 3D lookup table blob. The blob (if not NULL) is an array of + * &struct drm_color_lut. + */ + struct drm_property_blob *lut3d; + /** + * @blend_lut: blend lut lookup table blob. The blob (if not NULL) is an + * array of &struct drm_color_lut. + */ + struct drm_property_blob *blend_lut; + /** + * @blend_tf: + * + * Pre-defined transfer function for converting plane pixel data before + * applying blend LUT. + */ + enum amdgpu_transfer_function blend_tf; }; struct dm_crtc_state { @@ -743,6 +843,14 @@ struct dm_crtc_state { struct dc_info_packet vrr_infopacket; int abm_level; + + /** + * @regamma_tf: + * + * Pre-defined transfer function for converting internal FB -> wire + * encoding. + */ + enum amdgpu_transfer_function regamma_tf; }; #define to_dm_crtc_state(x) container_of(x, struct dm_crtc_state, base) @@ -804,14 +912,22 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, void amdgpu_dm_trigger_timing_sync(struct drm_device *dev); +/* 3D LUT max size is 17x17x17 (4913 entries) */ +#define MAX_COLOR_3DLUT_SIZE 17 +#define MAX_COLOR_3DLUT_BITDEPTH 12 +int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev, + struct drm_plane_state *plane_state); +/* 1D LUT size */ #define MAX_COLOR_LUT_ENTRIES 4096 /* Legacy gamm LUT users such as X doesn't like large LUT sizes */ #define MAX_COLOR_LEGACY_LUT_ENTRIES 256 void amdgpu_dm_init_color_mod(void); +int amdgpu_dm_create_color_properties(struct amdgpu_device *adev); int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state); int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc); int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, + struct drm_plane_state *plane_state, struct dc_plane_state *dc_plane_state); void amdgpu_dm_update_connector_after_detect( @@ -834,7 +950,7 @@ struct dc_stream_state * int dm_atomic_get_state(struct drm_atomic_state *state, struct dm_atomic_state **dm_state); -struct amdgpu_dm_connector * +struct drm_connector * amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state, struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index a4cb23d059bd..9b527bffe11a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -72,6 +72,7 @@ */ #define MAX_DRM_LUT_VALUE 0xFFFF +#define SDR_WHITE_LEVEL_INIT_VALUE 80 /** * amdgpu_dm_init_color_mod - Initialize the color module. @@ -84,6 +85,247 @@ void amdgpu_dm_init_color_mod(void) setup_x_points_distribution(); } +static inline struct fixed31_32 amdgpu_dm_fixpt_from_s3132(__u64 x) +{ + struct fixed31_32 val; + + /* If negative, convert to 2's complement. */ + if (x & (1ULL << 63)) + x = -(x & ~(1ULL << 63)); + + val.value = x; + return val; +} + +#ifdef AMD_PRIVATE_COLOR +/* Pre-defined Transfer Functions (TF) + * + * AMD driver supports pre-defined mathematical functions for transferring + * between encoded values and optical/linear space. Depending on HW color caps, + * ROMs and curves built by the AMD color module support these transforms. + * + * The driver-specific color implementation exposes properties for pre-blending + * degamma TF, shaper TF (before 3D LUT), and blend(dpp.ogam) TF and + * post-blending regamma (mpc.ogam) TF. However, only pre-blending degamma + * supports ROM curves. AMD color module uses pre-defined coefficients to build + * curves for the other blocks. What can be done by each color block is + * described by struct dpp_color_capsand struct mpc_color_caps. + * + * AMD driver-specific color API exposes the following pre-defined transfer + * functions: + * + * - Identity: linear/identity relationship between pixel value and + * luminance value; + * - Gamma 2.2, Gamma 2.4, Gamma 2.6: pure power functions; + * - sRGB: 2.4: The piece-wise transfer function from IEC 61966-2-1:1999; + * - BT.709: has a linear segment in the bottom part and then a power function + * with a 0.45 (~1/2.22) gamma for the rest of the range; standardized by + * ITU-R BT.709-6; + * - PQ (Perceptual Quantizer): used for HDR display, allows luminance range + * capability of 0 to 10,000 nits; standardized by SMPTE ST 2084. + * + * The AMD color model is designed with an assumption that SDR (sRGB, BT.709, + * Gamma 2.2, etc.) peak white maps (normalized to 1.0 FP) to 80 nits in the PQ + * system. This has the implication that PQ EOTF (non-linear to linear) maps to + * [0.0..125.0] where 125.0 = 10,000 nits / 80 nits. + * + * Non-linear and linear forms are described in the table below: + * + * ┌───────────┬─────────────────────┬──────────────────────┐ + * │ │ Non-linear │ Linear │ + * ├───────────┼─────────────────────┼──────────────────────┤ + * │ sRGB │ UNORM or [0.0, 1.0] │ [0.0, 1.0] │ + * ├───────────┼─────────────────────┼──────────────────────┤ + * │ BT709 │ UNORM or [0.0, 1.0] │ [0.0, 1.0] │ + * ├───────────┼─────────────────────┼──────────────────────┤ + * │ Gamma 2.x │ UNORM or [0.0, 1.0] │ [0.0, 1.0] │ + * ├───────────┼─────────────────────┼──────────────────────┤ + * │ PQ │ UNORM or FP16 CCCS* │ [0.0, 125.0] │ + * ├───────────┼─────────────────────┼──────────────────────┤ + * │ Identity │ UNORM or FP16 CCCS* │ [0.0, 1.0] or CCCS** │ + * └───────────┴─────────────────────┴──────────────────────┘ + * * CCCS: Windows canonical composition color space + * ** Respectively + * + * In the driver-specific API, color block names attached to TF properties + * suggest the intention regarding non-linear encoding pixel's luminance + * values. As some newer encodings don't use gamma curve, we make encoding and + * decoding explicit by defining an enum list of transfer functions supported + * in terms of EOTF and inverse EOTF, where: + * + * - EOTF (electro-optical transfer function): is the transfer function to go + * from the encoded value to an optical (linear) value. De-gamma functions + * traditionally do this. + * - Inverse EOTF (simply the inverse of the EOTF): is usually intended to go + * from an optical/linear space (which might have been used for blending) + * back to the encoded values. Gamma functions traditionally do this. + */ +static const char * const +amdgpu_transfer_function_names[] = { + [AMDGPU_TRANSFER_FUNCTION_DEFAULT] = "Default", + [AMDGPU_TRANSFER_FUNCTION_IDENTITY] = "Identity", + [AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF] = "sRGB EOTF", + [AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF] = "BT.709 inv_OETF", + [AMDGPU_TRANSFER_FUNCTION_PQ_EOTF] = "PQ EOTF", + [AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF] = "Gamma 2.2 EOTF", + [AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF] = "Gamma 2.4 EOTF", + [AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF] = "Gamma 2.6 EOTF", + [AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF] = "sRGB inv_EOTF", + [AMDGPU_TRANSFER_FUNCTION_BT709_OETF] = "BT.709 OETF", + [AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF] = "PQ inv_EOTF", + [AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF] = "Gamma 2.2 inv_EOTF", + [AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF] = "Gamma 2.4 inv_EOTF", + [AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF] = "Gamma 2.6 inv_EOTF", +}; + +static const u32 amdgpu_eotf = + BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF) | + BIT(AMDGPU_TRANSFER_FUNCTION_PQ_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF); + +static const u32 amdgpu_inv_eotf = + BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_BT709_OETF) | + BIT(AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF); + +static struct drm_property * +amdgpu_create_tf_property(struct drm_device *dev, + const char *name, + u32 supported_tf) +{ + u32 transfer_functions = supported_tf | + BIT(AMDGPU_TRANSFER_FUNCTION_DEFAULT) | + BIT(AMDGPU_TRANSFER_FUNCTION_IDENTITY); + struct drm_prop_enum_list enum_list[AMDGPU_TRANSFER_FUNCTION_COUNT]; + int i, len; + + len = 0; + for (i = 0; i < AMDGPU_TRANSFER_FUNCTION_COUNT; i++) { + if ((transfer_functions & BIT(i)) == 0) + continue; + + enum_list[len].type = i; + enum_list[len].name = amdgpu_transfer_function_names[i]; + len++; + } + + return drm_property_create_enum(dev, DRM_MODE_PROP_ENUM, + name, enum_list, len); +} + +int +amdgpu_dm_create_color_properties(struct amdgpu_device *adev) +{ + struct drm_property *prop; + + prop = drm_property_create(adev_to_drm(adev), + DRM_MODE_PROP_BLOB, + "AMD_PLANE_DEGAMMA_LUT", 0); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_degamma_lut_property = prop; + + prop = drm_property_create_range(adev_to_drm(adev), + DRM_MODE_PROP_IMMUTABLE, + "AMD_PLANE_DEGAMMA_LUT_SIZE", + 0, UINT_MAX); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_degamma_lut_size_property = prop; + + prop = amdgpu_create_tf_property(adev_to_drm(adev), + "AMD_PLANE_DEGAMMA_TF", + amdgpu_eotf); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_degamma_tf_property = prop; + + prop = drm_property_create_range(adev_to_drm(adev), + 0, "AMD_PLANE_HDR_MULT", 0, U64_MAX); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_hdr_mult_property = prop; + + prop = drm_property_create(adev_to_drm(adev), + DRM_MODE_PROP_BLOB, + "AMD_PLANE_CTM", 0); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_ctm_property = prop; + + prop = drm_property_create(adev_to_drm(adev), + DRM_MODE_PROP_BLOB, + "AMD_PLANE_SHAPER_LUT", 0); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_shaper_lut_property = prop; + + prop = drm_property_create_range(adev_to_drm(adev), + DRM_MODE_PROP_IMMUTABLE, + "AMD_PLANE_SHAPER_LUT_SIZE", 0, UINT_MAX); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_shaper_lut_size_property = prop; + + prop = amdgpu_create_tf_property(adev_to_drm(adev), + "AMD_PLANE_SHAPER_TF", + amdgpu_inv_eotf); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_shaper_tf_property = prop; + + prop = drm_property_create(adev_to_drm(adev), + DRM_MODE_PROP_BLOB, + "AMD_PLANE_LUT3D", 0); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_lut3d_property = prop; + + prop = drm_property_create_range(adev_to_drm(adev), + DRM_MODE_PROP_IMMUTABLE, + "AMD_PLANE_LUT3D_SIZE", 0, UINT_MAX); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_lut3d_size_property = prop; + + prop = drm_property_create(adev_to_drm(adev), + DRM_MODE_PROP_BLOB, + "AMD_PLANE_BLEND_LUT", 0); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_blend_lut_property = prop; + + prop = drm_property_create_range(adev_to_drm(adev), + DRM_MODE_PROP_IMMUTABLE, + "AMD_PLANE_BLEND_LUT_SIZE", 0, UINT_MAX); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_blend_lut_size_property = prop; + + prop = amdgpu_create_tf_property(adev_to_drm(adev), + "AMD_PLANE_BLEND_TF", + amdgpu_eotf); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_blend_tf_property = prop; + + prop = amdgpu_create_tf_property(adev_to_drm(adev), + "AMD_CRTC_REGAMMA_TF", + amdgpu_inv_eotf); + if (!prop) + return -ENOMEM; + adev->mode_info.regamma_tf_property = prop; + + return 0; +} +#endif + /** * __extract_blob_lut - Extracts the DRM lut and lut size from a blob. * @blob: DRM color mgmt property blob @@ -182,7 +424,6 @@ static void __drm_lut_to_dc_gamma(const struct drm_color_lut *lut, static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm, struct fixed31_32 *matrix) { - int64_t val; int i; /* @@ -201,12 +442,29 @@ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm, } /* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */ - val = ctm->matrix[i - (i / 4)]; - /* If negative, convert to 2's complement. */ - if (val & (1ULL << 63)) - val = -(val & ~(1ULL << 63)); + matrix[i] = amdgpu_dm_fixpt_from_s3132(ctm->matrix[i - (i / 4)]); + } +} - matrix[i].value = val; +/** + * __drm_ctm_3x4_to_dc_matrix - converts a DRM CTM 3x4 to a DC CSC float matrix + * @ctm: DRM color transformation matrix with 3x4 dimensions + * @matrix: DC CSC float matrix + * + * The matrix needs to be a 3x4 (12 entry) matrix. + */ +static void __drm_ctm_3x4_to_dc_matrix(const struct drm_color_ctm_3x4 *ctm, + struct fixed31_32 *matrix) +{ + int i; + + /* The format provided is S31.32, using signed-magnitude representation. + * Our fixed31_32 is also S31.32, but is using 2's complement. We have + * to convert from signed-magnitude to 2's complement. + */ + for (i = 0; i < 12; i++) { + /* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */ + matrix[i] = amdgpu_dm_fixpt_from_s3132(ctm->matrix[i]); } } @@ -268,16 +526,18 @@ static int __set_output_tf(struct dc_transfer_func *func, struct calculate_buffer cal_buffer = {0}; bool res; - ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES); - cal_buffer.buffer_index = -1; - gamma = dc_create_gamma(); - if (!gamma) - return -ENOMEM; + if (lut_size) { + ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES); - gamma->num_entries = lut_size; - __drm_lut_to_dc_gamma(lut, gamma, false); + gamma = dc_create_gamma(); + if (!gamma) + return -ENOMEM; + + gamma->num_entries = lut_size; + __drm_lut_to_dc_gamma(lut, gamma, false); + } if (func->tf == TRANSFER_FUNCTION_LINEAR) { /* @@ -285,27 +545,68 @@ static int __set_output_tf(struct dc_transfer_func *func, * on top of a linear input. But degamma params can be used * instead to simulate this. */ - gamma->type = GAMMA_CUSTOM; + if (gamma) + gamma->type = GAMMA_CUSTOM; res = mod_color_calculate_degamma_params(NULL, func, - gamma, true); + gamma, gamma != NULL); } else { /* * Assume sRGB. The actual mapping will depend on whether the * input was legacy or not. */ - gamma->type = GAMMA_CS_TFM_1D; - res = mod_color_calculate_regamma_params(func, gamma, false, + if (gamma) + gamma->type = GAMMA_CS_TFM_1D; + res = mod_color_calculate_regamma_params(func, gamma, gamma != NULL, has_rom, NULL, &cal_buffer); } - dc_gamma_release(&gamma); + if (gamma) + dc_gamma_release(&gamma); return res ? 0 : -ENOMEM; } +static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream, + const struct drm_color_lut *regamma_lut, + uint32_t regamma_size, bool has_rom, + enum dc_transfer_func_predefined tf) +{ + struct dc_transfer_func *out_tf = stream->out_transfer_func; + int ret = 0; + + if (regamma_size || tf != TRANSFER_FUNCTION_LINEAR) { + /* + * CRTC RGM goes into RGM LUT. + * + * Note: there is no implicit sRGB regamma here. We are using + * degamma calculation from color module to calculate the curve + * from a linear base if gamma TF is not set. However, if gamma + * TF (!= Linear) and LUT are set at the same time, we will use + * regamma calculation, and the color module will combine the + * pre-defined TF and the custom LUT values into the LUT that's + * actually programmed. + */ + out_tf->type = TF_TYPE_DISTRIBUTED_POINTS; + out_tf->tf = tf; + out_tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; + + ret = __set_output_tf(out_tf, regamma_lut, regamma_size, has_rom); + } else { + /* + * No CRTC RGM means we can just put the block into bypass + * since we don't have any plane level adjustments using it. + */ + out_tf->type = TF_TYPE_BYPASS; + out_tf->tf = TRANSFER_FUNCTION_LINEAR; + } + + return ret; +} + /** * __set_input_tf - calculates the input transfer function based on expected * input space. + * @caps: dc color capabilities * @func: transfer function * @lut: lookup table that defines the color space * @lut_size: size of respective lut. @@ -313,27 +614,240 @@ static int __set_output_tf(struct dc_transfer_func *func, * Returns: * 0 in case of success. -ENOMEM if fails. */ -static int __set_input_tf(struct dc_transfer_func *func, +static int __set_input_tf(struct dc_color_caps *caps, struct dc_transfer_func *func, const struct drm_color_lut *lut, uint32_t lut_size) { struct dc_gamma *gamma = NULL; bool res; - gamma = dc_create_gamma(); - if (!gamma) - return -ENOMEM; + if (lut_size) { + gamma = dc_create_gamma(); + if (!gamma) + return -ENOMEM; - gamma->type = GAMMA_CUSTOM; - gamma->num_entries = lut_size; + gamma->type = GAMMA_CUSTOM; + gamma->num_entries = lut_size; + + __drm_lut_to_dc_gamma(lut, gamma, false); + } - __drm_lut_to_dc_gamma(lut, gamma, false); + res = mod_color_calculate_degamma_params(caps, func, gamma, gamma != NULL); - res = mod_color_calculate_degamma_params(NULL, func, gamma, true); - dc_gamma_release(&gamma); + if (gamma) + dc_gamma_release(&gamma); return res ? 0 : -ENOMEM; } +static enum dc_transfer_func_predefined +amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf) +{ + switch (tf) { + default: + case AMDGPU_TRANSFER_FUNCTION_DEFAULT: + case AMDGPU_TRANSFER_FUNCTION_IDENTITY: + return TRANSFER_FUNCTION_LINEAR; + case AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF: + case AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF: + return TRANSFER_FUNCTION_SRGB; + case AMDGPU_TRANSFER_FUNCTION_BT709_OETF: + case AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF: + return TRANSFER_FUNCTION_BT709; + case AMDGPU_TRANSFER_FUNCTION_PQ_EOTF: + case AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF: + return TRANSFER_FUNCTION_PQ; + case AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF: + case AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF: + return TRANSFER_FUNCTION_GAMMA22; + case AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF: + case AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF: + return TRANSFER_FUNCTION_GAMMA24; + case AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF: + case AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF: + return TRANSFER_FUNCTION_GAMMA26; + } +} + +static void __to_dc_lut3d_color(struct dc_rgb *rgb, + const struct drm_color_lut lut, + int bit_precision) +{ + rgb->red = drm_color_lut_extract(lut.red, bit_precision); + rgb->green = drm_color_lut_extract(lut.green, bit_precision); + rgb->blue = drm_color_lut_extract(lut.blue, bit_precision); +} + +static void __drm_3dlut_to_dc_3dlut(const struct drm_color_lut *lut, + uint32_t lut3d_size, + struct tetrahedral_params *params, + bool use_tetrahedral_9, + int bit_depth) +{ + struct dc_rgb *lut0; + struct dc_rgb *lut1; + struct dc_rgb *lut2; + struct dc_rgb *lut3; + int lut_i, i; + + + if (use_tetrahedral_9) { + lut0 = params->tetrahedral_9.lut0; + lut1 = params->tetrahedral_9.lut1; + lut2 = params->tetrahedral_9.lut2; + lut3 = params->tetrahedral_9.lut3; + } else { + lut0 = params->tetrahedral_17.lut0; + lut1 = params->tetrahedral_17.lut1; + lut2 = params->tetrahedral_17.lut2; + lut3 = params->tetrahedral_17.lut3; + } + + for (lut_i = 0, i = 0; i < lut3d_size - 4; lut_i++, i += 4) { + /* + * We should consider the 3D LUT RGB values are distributed + * along four arrays lut0-3 where the first sizes 1229 and the + * other 1228. The bit depth supported for 3dlut channel is + * 12-bit, but DC also supports 10-bit. + * + * TODO: improve color pipeline API to enable the userspace set + * bit depth and 3D LUT size/stride, as specified by VA-API. + */ + __to_dc_lut3d_color(&lut0[lut_i], lut[i], bit_depth); + __to_dc_lut3d_color(&lut1[lut_i], lut[i + 1], bit_depth); + __to_dc_lut3d_color(&lut2[lut_i], lut[i + 2], bit_depth); + __to_dc_lut3d_color(&lut3[lut_i], lut[i + 3], bit_depth); + } + /* lut0 has 1229 points (lut_size/4 + 1) */ + __to_dc_lut3d_color(&lut0[lut_i], lut[i], bit_depth); +} + +/* amdgpu_dm_atomic_lut3d - set DRM 3D LUT to DC stream + * @drm_lut3d: user 3D LUT + * @drm_lut3d_size: size of 3D LUT + * @lut3d: DC 3D LUT + * + * Map user 3D LUT data to DC 3D LUT and all necessary bits to program it + * on DCN accordingly. + */ +static void amdgpu_dm_atomic_lut3d(const struct drm_color_lut *drm_lut3d, + uint32_t drm_lut3d_size, + struct dc_3dlut *lut) +{ + if (!drm_lut3d_size) { + lut->state.bits.initialized = 0; + } else { + /* Stride and bit depth are not programmable by API yet. + * Therefore, only supports 17x17x17 3D LUT (12-bit). + */ + lut->lut_3d.use_tetrahedral_9 = false; + lut->lut_3d.use_12bits = true; + lut->state.bits.initialized = 1; + __drm_3dlut_to_dc_3dlut(drm_lut3d, drm_lut3d_size, &lut->lut_3d, + lut->lut_3d.use_tetrahedral_9, + MAX_COLOR_3DLUT_BITDEPTH); + } +} + +static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut, + bool has_rom, + enum dc_transfer_func_predefined tf, + uint32_t shaper_size, + struct dc_transfer_func *func_shaper) +{ + int ret = 0; + + if (shaper_size || tf != TRANSFER_FUNCTION_LINEAR) { + /* + * If user shaper LUT is set, we assume a linear color space + * (linearized by degamma 1D LUT or not). + */ + func_shaper->type = TF_TYPE_DISTRIBUTED_POINTS; + func_shaper->tf = tf; + func_shaper->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; + + ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, has_rom); + } else { + func_shaper->type = TF_TYPE_BYPASS; + func_shaper->tf = TRANSFER_FUNCTION_LINEAR; + } + + return ret; +} + +static int amdgpu_dm_atomic_blend_lut(const struct drm_color_lut *blend_lut, + bool has_rom, + enum dc_transfer_func_predefined tf, + uint32_t blend_size, + struct dc_transfer_func *func_blend) +{ + int ret = 0; + + if (blend_size || tf != TRANSFER_FUNCTION_LINEAR) { + /* + * DRM plane gamma LUT or TF means we are linearizing color + * space before blending (similar to degamma programming). As + * we don't have hardcoded curve support, or we use AMD color + * module to fill the parameters that will be translated to HW + * points. + */ + func_blend->type = TF_TYPE_DISTRIBUTED_POINTS; + func_blend->tf = tf; + func_blend->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; + + ret = __set_input_tf(NULL, func_blend, blend_lut, blend_size); + } else { + func_blend->type = TF_TYPE_BYPASS; + func_blend->tf = TRANSFER_FUNCTION_LINEAR; + } + + return ret; +} + +/** + * amdgpu_dm_verify_lut3d_size - verifies if 3D LUT is supported and if user + * shaper and 3D LUTs match the hw supported size + * @adev: amdgpu device + * @plane_state: the DRM plane state + * + * Verifies if pre-blending (DPP) 3D LUT is supported by the HW (DCN 2.0 or + * newer) and if the user shaper and 3D LUTs match the supported size. + * + * Returns: + * 0 on success. -EINVAL if lut size are invalid. + */ +int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev, + struct drm_plane_state *plane_state) +{ + struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); + const struct drm_color_lut *shaper = NULL, *lut3d = NULL; + uint32_t exp_size, size, dim_size = MAX_COLOR_3DLUT_SIZE; + bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut; + + /* shaper LUT is only available if 3D LUT color caps */ + exp_size = has_3dlut ? MAX_COLOR_LUT_ENTRIES : 0; + shaper = __extract_blob_lut(dm_plane_state->shaper_lut, &size); + + if (shaper && size != exp_size) { + drm_dbg(&adev->ddev, + "Invalid Shaper LUT size. Should be %u but got %u.\n", + exp_size, size); + return -EINVAL; + } + + /* The number of 3D LUT entries is the dimension size cubed */ + exp_size = has_3dlut ? dim_size * dim_size * dim_size : 0; + lut3d = __extract_blob_lut(dm_plane_state->lut3d, &size); + + if (lut3d && size != exp_size) { + drm_dbg(&adev->ddev, + "Invalid 3D LUT size. Should be %u but got %u.\n", + exp_size, size); + return -EINVAL; + } + + return 0; +} + /** * amdgpu_dm_verify_lut_sizes - verifies if DRM luts match the hw supported sizes * @crtc_state: the DRM CRTC state @@ -401,9 +915,12 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) const struct drm_color_lut *degamma_lut, *regamma_lut; uint32_t degamma_size, regamma_size; bool has_regamma, has_degamma; + enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_LINEAR; bool is_legacy; int r; + tf = amdgpu_tf_to_dc_tf(crtc->regamma_tf); + r = amdgpu_dm_verify_lut_sizes(&crtc->base); if (r) return r; @@ -439,27 +956,23 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) crtc->cm_is_degamma_srgb = true; stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB; - + /* + * Note: although we pass has_rom as parameter here, we never + * actually use ROM because the color module only takes the ROM + * path if transfer_func->type == PREDEFINED. + * + * See more in mod_color_calculate_regamma_params() + */ r = __set_legacy_tf(stream->out_transfer_func, regamma_lut, regamma_size, has_rom); if (r) return r; - } else if (has_regamma) { - /* If atomic regamma, CRTC RGM goes into RGM LUT. */ - stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; - stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; - - r = __set_output_tf(stream->out_transfer_func, regamma_lut, - regamma_size, has_rom); + } else { + regamma_size = has_regamma ? regamma_size : 0; + r = amdgpu_dm_set_atomic_regamma(stream, regamma_lut, + regamma_size, has_rom, tf); if (r) return r; - } else { - /* - * No CRTC RGM means we can just put the block into bypass - * since we don't have any plane level adjustments using it. - */ - stream->out_transfer_func->type = TF_TYPE_BYPASS; - stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; } /* @@ -495,20 +1008,10 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) return 0; } -/** - * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane. - * @crtc: amdgpu_dm crtc state - * @dc_plane_state: target DC surface - * - * Update the underlying dc_stream_state's input transfer function (ITF) in - * preparation for hardware commit. The transfer function used depends on - * the preparation done on the stream for color management. - * - * Returns: - * 0 on success. -ENOMEM if mem allocation fails. - */ -int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, - struct dc_plane_state *dc_plane_state) +static int +map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, + struct dc_plane_state *dc_plane_state, + struct dc_color_caps *caps) { const struct drm_color_lut *degamma_lut; enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; @@ -531,8 +1034,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, °amma_size); ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES); - dc_plane_state->in_transfer_func->type = - TF_TYPE_DISTRIBUTED_POINTS; + dc_plane_state->in_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; /* * This case isn't fully correct, but also fairly @@ -564,11 +1066,11 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; - r = __set_input_tf(dc_plane_state->in_transfer_func, + r = __set_input_tf(caps, dc_plane_state->in_transfer_func, degamma_lut, degamma_size); if (r) return r; - } else if (crtc->cm_is_degamma_srgb) { + } else { /* * For legacy gamma support we need the regamma input * in linear space. Assume that the input is sRGB. @@ -577,14 +1079,209 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, dc_plane_state->in_transfer_func->tf = tf; if (tf != TRANSFER_FUNCTION_SRGB && - !mod_color_calculate_degamma_params(NULL, - dc_plane_state->in_transfer_func, NULL, false)) + !mod_color_calculate_degamma_params(caps, + dc_plane_state->in_transfer_func, + NULL, false)) return -ENOMEM; - } else { - /* ...Otherwise we can just bypass the DGM block. */ - dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS; - dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; } return 0; } + +static int +__set_dm_plane_degamma(struct drm_plane_state *plane_state, + struct dc_plane_state *dc_plane_state, + struct dc_color_caps *color_caps) +{ + struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); + const struct drm_color_lut *degamma_lut; + enum amdgpu_transfer_function tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; + uint32_t degamma_size; + bool has_degamma_lut; + int ret; + + degamma_lut = __extract_blob_lut(dm_plane_state->degamma_lut, + °amma_size); + + has_degamma_lut = degamma_lut && + !__is_lut_linear(degamma_lut, degamma_size); + + tf = dm_plane_state->degamma_tf; + + /* If we don't have plane degamma LUT nor TF to set on DC, we have + * nothing to do here, return. + */ + if (!has_degamma_lut && tf == AMDGPU_TRANSFER_FUNCTION_DEFAULT) + return -EINVAL; + + dc_plane_state->in_transfer_func->tf = amdgpu_tf_to_dc_tf(tf); + + if (has_degamma_lut) { + ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES); + + dc_plane_state->in_transfer_func->type = + TF_TYPE_DISTRIBUTED_POINTS; + + ret = __set_input_tf(color_caps, dc_plane_state->in_transfer_func, + degamma_lut, degamma_size); + if (ret) + return ret; + } else { + dc_plane_state->in_transfer_func->type = + TF_TYPE_PREDEFINED; + + if (!mod_color_calculate_degamma_params(color_caps, + dc_plane_state->in_transfer_func, NULL, false)) + return -ENOMEM; + } + return 0; +} + +static int +amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state, + struct dc_plane_state *dc_plane_state) +{ + struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); + enum amdgpu_transfer_function shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; + enum amdgpu_transfer_function blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; + const struct drm_color_lut *shaper_lut, *lut3d, *blend_lut; + uint32_t shaper_size, lut3d_size, blend_size; + int ret; + + dc_plane_state->hdr_mult = amdgpu_dm_fixpt_from_s3132(dm_plane_state->hdr_mult); + + shaper_lut = __extract_blob_lut(dm_plane_state->shaper_lut, &shaper_size); + shaper_size = shaper_lut != NULL ? shaper_size : 0; + shaper_tf = dm_plane_state->shaper_tf; + lut3d = __extract_blob_lut(dm_plane_state->lut3d, &lut3d_size); + lut3d_size = lut3d != NULL ? lut3d_size : 0; + + amdgpu_dm_atomic_lut3d(lut3d, lut3d_size, dc_plane_state->lut3d_func); + ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, false, + amdgpu_tf_to_dc_tf(shaper_tf), + shaper_size, + dc_plane_state->in_shaper_func); + if (ret) { + drm_dbg_kms(plane_state->plane->dev, + "setting plane %d shaper LUT failed.\n", + plane_state->plane->index); + + return ret; + } + + blend_tf = dm_plane_state->blend_tf; + blend_lut = __extract_blob_lut(dm_plane_state->blend_lut, &blend_size); + blend_size = blend_lut != NULL ? blend_size : 0; + + ret = amdgpu_dm_atomic_blend_lut(blend_lut, false, + amdgpu_tf_to_dc_tf(blend_tf), + blend_size, dc_plane_state->blend_tf); + if (ret) { + drm_dbg_kms(plane_state->plane->dev, + "setting plane %d gamma lut failed.\n", + plane_state->plane->index); + + return ret; + } + + return 0; +} + +/** + * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane. + * @crtc: amdgpu_dm crtc state + * @plane_state: DRM plane state + * @dc_plane_state: target DC surface + * + * Update the underlying dc_stream_state's input transfer function (ITF) in + * preparation for hardware commit. The transfer function used depends on + * the preparation done on the stream for color management. + * + * Returns: + * 0 on success. -ENOMEM if mem allocation fails. + */ +int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, + struct drm_plane_state *plane_state, + struct dc_plane_state *dc_plane_state) +{ + struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev); + struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); + struct drm_color_ctm_3x4 *ctm = NULL; + struct dc_color_caps *color_caps = NULL; + bool has_crtc_cm_degamma; + int ret; + + ret = amdgpu_dm_verify_lut3d_size(adev, plane_state); + if (ret) { + drm_dbg_driver(&adev->ddev, "amdgpu_dm_verify_lut3d_size() failed\n"); + return ret; + } + + if (dc_plane_state->ctx && dc_plane_state->ctx->dc) + color_caps = &dc_plane_state->ctx->dc->caps.color; + + /* Initially, we can just bypass the DGM block. */ + dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS; + dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; + + /* After, we start to update values according to color props */ + has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb); + + ret = __set_dm_plane_degamma(plane_state, dc_plane_state, color_caps); + if (ret == -ENOMEM) + return ret; + + /* We only have one degamma block available (pre-blending) for the + * whole color correction pipeline, so that we can't actually perform + * plane and CRTC degamma at the same time. Explicitly reject atomic + * updates when userspace sets both plane and CRTC degamma properties. + */ + if (has_crtc_cm_degamma && ret != -EINVAL) { + drm_dbg_kms(crtc->base.crtc->dev, + "doesn't support plane and CRTC degamma at the same time\n"); + return -EINVAL; + } + + /* If we are here, it means we don't have plane degamma settings, check + * if we have CRTC degamma waiting for mapping to pre-blending degamma + * block + */ + if (has_crtc_cm_degamma) { + /* + * AMD HW doesn't have post-blending degamma caps. When DRM + * CRTC atomic degamma is set, we maps it to DPP degamma block + * (pre-blending) or, on legacy gamma, we use DPP degamma to + * linearize (implicit degamma) from sRGB/BT709 according to + * the input space. + */ + ret = map_crtc_degamma_to_dc_plane(crtc, dc_plane_state, color_caps); + if (ret) + return ret; + } + + /* Setup CRTC CTM. */ + if (dm_plane_state->ctm) { + ctm = (struct drm_color_ctm_3x4 *)dm_plane_state->ctm->data; + /* + * DCN2 and older don't support both pre-blending and + * post-blending gamut remap. For this HW family, if we have + * the plane and CRTC CTMs simultaneously, CRTC CTM takes + * priority, and we discard plane CTM, as implemented in + * dcn10_program_gamut_remap(). However, DCN3+ has DPP + * (pre-blending) and MPC (post-blending) `gamut remap` blocks; + * therefore, we can program plane and CRTC CTMs together by + * mapping CRTC CTM to MPC and keeping plane CTM setup at DPP, + * as it's done by dcn30_program_gamut_remap(). + */ + __drm_ctm_3x4_to_dc_matrix(ctm, dc_plane_state->gamut_remap_matrix.matrix); + + dc_plane_state->gamut_remap_matrix.enable_remap = true; + dc_plane_state->input_csc_color_matrix.enable_adjustment = false; + } else { + /* Bypass CTM. */ + dc_plane_state->gamut_remap_matrix.enable_remap = false; + dc_plane_state->input_csc_color_matrix.enable_adjustment = false; + } + + return amdgpu_dm_plane_set_color_properties(plane_state, dc_plane_state); +} diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index 52ecfa746b54..f936a35fa9eb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -326,6 +326,9 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) if (!connector->state || connector->state->crtc != crtc) continue; + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconn = to_amdgpu_dm_connector(connector); break; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index cb0b48bb2a7d..6e715ef3a556 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -29,7 +29,6 @@ #include "dc.h" #include "amdgpu.h" #include "amdgpu_dm_psr.h" -#include "amdgpu_dm_replay.h" #include "amdgpu_dm_crtc.h" #include "amdgpu_dm_plane.h" #include "amdgpu_dm_trace.h" @@ -124,12 +123,7 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) * fill_dc_dirty_rects(). */ if (vblank_work->stream && vblank_work->stream->link) { - /* - * Prioritize replay, instead of psr - */ - if (vblank_work->stream->link->replay_settings.replay_feature_enabled) - amdgpu_dm_replay_enable(vblank_work->stream, false); - else if (vblank_work->enable) { + if (vblank_work->enable) { if (vblank_work->stream->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 && vblank_work->stream->link->psr_settings.psr_allow_active) amdgpu_dm_psr_disable(vblank_work->stream); @@ -138,7 +132,6 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY !amdgpu_dm_crc_window_is_activated(&vblank_work->acrtc->base) && #endif - vblank_work->stream->link->panel_config.psr.disallow_replay && vblank_work->acrtc->dm_irq_params.allow_psr_entry) { amdgpu_dm_psr_enable(vblank_work->stream); } @@ -260,6 +253,7 @@ static struct drm_crtc_state *amdgpu_dm_crtc_duplicate_state(struct drm_crtc *cr state->freesync_config = cur->freesync_config; state->cm_has_degamma = cur->cm_has_degamma; state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb; + state->regamma_tf = cur->regamma_tf; state->crc_skip_count = cur->crc_skip_count; state->mpo_requested = cur->mpo_requested; /* TODO Duplicate dc_stream after objects are stream object is flattened */ @@ -296,6 +290,70 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) } #endif +#ifdef AMD_PRIVATE_COLOR +/** + * dm_crtc_additional_color_mgmt - enable additional color properties + * @crtc: DRM CRTC + * + * This function lets the driver enable post-blending CRTC regamma transfer + * function property in addition to DRM CRTC gamma LUT. Default value means + * linear transfer function, which is the default CRTC gamma LUT behaviour + * without this property. + */ +static void +dm_crtc_additional_color_mgmt(struct drm_crtc *crtc) +{ + struct amdgpu_device *adev = drm_to_adev(crtc->dev); + + if (adev->dm.dc->caps.color.mpc.ogam_ram) + drm_object_attach_property(&crtc->base, + adev->mode_info.regamma_tf_property, + AMDGPU_TRANSFER_FUNCTION_DEFAULT); +} + +static int +amdgpu_dm_atomic_crtc_set_property(struct drm_crtc *crtc, + struct drm_crtc_state *state, + struct drm_property *property, + uint64_t val) +{ + struct amdgpu_device *adev = drm_to_adev(crtc->dev); + struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state); + + if (property == adev->mode_info.regamma_tf_property) { + if (acrtc_state->regamma_tf != val) { + acrtc_state->regamma_tf = val; + acrtc_state->base.color_mgmt_changed |= 1; + } + } else { + drm_dbg_atomic(crtc->dev, + "[CRTC:%d:%s] unknown property [PROP:%d:%s]]\n", + crtc->base.id, crtc->name, + property->base.id, property->name); + return -EINVAL; + } + + return 0; +} + +static int +amdgpu_dm_atomic_crtc_get_property(struct drm_crtc *crtc, + const struct drm_crtc_state *state, + struct drm_property *property, + uint64_t *val) +{ + struct amdgpu_device *adev = drm_to_adev(crtc->dev); + struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state); + + if (property == adev->mode_info.regamma_tf_property) + *val = acrtc_state->regamma_tf; + else + return -EINVAL; + + return 0; +} +#endif + /* Implemented only the options currently available for the driver */ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { .reset = amdgpu_dm_crtc_reset_state, @@ -314,6 +372,10 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { #if defined(CONFIG_DEBUG_FS) .late_register = amdgpu_dm_crtc_late_register, #endif +#ifdef AMD_PRIVATE_COLOR + .atomic_set_property = amdgpu_dm_atomic_crtc_set_property, + .atomic_get_property = amdgpu_dm_atomic_crtc_get_property, +#endif }; static void amdgpu_dm_crtc_helper_disable(struct drm_crtc *crtc) @@ -489,6 +551,9 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); +#ifdef AMD_PRIVATE_COLOR + dm_crtc_additional_color_mgmt(&acrtc->base); +#endif return 0; fail: diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 13a177d34376..68a846323912 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -2971,6 +2971,85 @@ static int allow_edp_hotplug_detection_set(void *data, u64 val) return 0; } +static int dmub_trace_mask_set(void *data, u64 val) +{ + struct amdgpu_device *adev = data; + struct dmub_srv *srv = adev->dm.dc->ctx->dmub_srv->dmub; + enum dmub_gpint_command cmd; + u64 mask = 0xffff; + u8 shift = 0; + u32 res; + int i; + + if (!srv->fw_version) + return -EINVAL; + + for (i = 0; i < 4; i++) { + res = (val & mask) >> shift; + + switch (i) { + case 0: + cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD0; + break; + case 1: + cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1; + break; + case 2: + cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD2; + break; + case 3: + cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD3; + break; + } + + if (!dc_wake_and_execute_gpint(adev->dm.dc->ctx, cmd, res, NULL, DM_DMUB_WAIT_TYPE_WAIT)) + return -EIO; + + usleep_range(100, 1000); + + mask <<= 16; + shift += 16; + } + + return 0; +} + +static int dmub_trace_mask_show(void *data, u64 *val) +{ + enum dmub_gpint_command cmd = DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD0; + struct amdgpu_device *adev = data; + struct dmub_srv *srv = adev->dm.dc->ctx->dmub_srv->dmub; + u8 shift = 0; + u64 raw = 0; + u64 res = 0; + int i = 0; + + if (!srv->fw_version) + return -EINVAL; + + while (i < 4) { + uint32_t response; + + if (!dc_wake_and_execute_gpint(adev->dm.dc->ctx, cmd, 0, &response, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) + return -EIO; + + raw = response; + usleep_range(100, 1000); + + cmd++; + res |= (raw << shift); + shift += 16; + i++; + } + + *val = res; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(dmub_trace_mask_fops, dmub_trace_mask_show, + dmub_trace_mask_set, "0x%llx\n"); + /* * Set dmcub trace event IRQ enable or disable. * Usage to enable dmcub trace event IRQ: echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en @@ -3647,12 +3726,16 @@ static int capabilities_show(struct seq_file *m, void *unused) bool mall_supported = dc->caps.mall_size_total; bool subvp_supported = dc->caps.subvp_fw_processing_delay_us; unsigned int mall_in_use = false; - unsigned int subvp_in_use = dc->cap_funcs.get_subvp_en(dc, dc->current_state); + unsigned int subvp_in_use = false; + struct hubbub *hubbub = dc->res_pool->hubbub; if (hubbub->funcs->get_mall_en) hubbub->funcs->get_mall_en(hubbub, &mall_in_use); + if (dc->cap_funcs.get_subvp_en) + subvp_in_use = dc->cap_funcs.get_subvp_en(dc, dc->current_state); + seq_printf(m, "mall supported: %s, enabled: %s\n", mall_supported ? "yes" : "no", mall_in_use ? "yes" : "no"); seq_printf(m, "sub-viewport supported: %s, enabled: %s\n", @@ -3880,6 +3963,9 @@ void dtn_debugfs_init(struct amdgpu_device *adev) debugfs_create_file_unsafe("amdgpu_dm_force_timing_sync", 0644, root, adev, &force_timing_sync_ops); + debugfs_create_file_unsafe("amdgpu_dm_dmub_trace_mask", 0644, root, + adev, &dmub_trace_mask_fops); + debugfs_create_file_unsafe("amdgpu_dm_dmcub_trace_event_en", 0644, root, adev, &dmcub_trace_event_state_fops); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index aac98f93545a..eaf8d9f48244 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -31,6 +31,7 @@ #include <drm/drm_probe_helper.h> #include <drm/amdgpu_drm.h> #include <drm/drm_edid.h> +#include <drm/drm_fixed.h> #include "dm_services.h" #include "amdgpu.h" @@ -216,7 +217,7 @@ static void dm_helpers_construct_old_payload( struct drm_dp_mst_atomic_payload *old_payload) { struct drm_dp_mst_atomic_payload *pos; - int pbn_per_slot = mst_state->pbn_div; + int pbn_per_slot = dfixed_trunc(mst_state->pbn_div); u8 next_payload_vc_start = mgr->next_start_slot; u8 payload_vc_start = new_payload->vc_start_slot; u8 allocated_time_slots; @@ -339,15 +340,14 @@ enum act_return_status dm_helpers_dp_mst_poll_for_allocation_change_trigger( return ACT_SUCCESS; } -bool dm_helpers_dp_mst_send_payload_allocation( +void dm_helpers_dp_mst_send_payload_allocation( struct dc_context *ctx, - const struct dc_stream_state *stream, - bool enable) + const struct dc_stream_state *stream) { struct amdgpu_dm_connector *aconnector; struct drm_dp_mst_topology_state *mst_state; struct drm_dp_mst_topology_mgr *mst_mgr; - struct drm_dp_mst_atomic_payload *new_payload, old_payload; + struct drm_dp_mst_atomic_payload *new_payload; enum mst_progress_status set_flag = MST_ALLOCATE_NEW_PAYLOAD; enum mst_progress_status clr_flag = MST_CLEAR_ALLOCATED_PAYLOAD; int ret = 0; @@ -355,25 +355,13 @@ bool dm_helpers_dp_mst_send_payload_allocation( aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; if (!aconnector || !aconnector->mst_root) - return false; + return; mst_mgr = &aconnector->mst_root->mst_mgr; mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state); - new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port); - if (!enable) { - set_flag = MST_CLEAR_ALLOCATED_PAYLOAD; - clr_flag = MST_ALLOCATE_NEW_PAYLOAD; - } - - if (enable) { - ret = drm_dp_add_payload_part2(mst_mgr, mst_state->base.state, new_payload); - } else { - dm_helpers_construct_old_payload(mst_mgr, mst_state, - new_payload, &old_payload); - drm_dp_remove_payload_part2(mst_mgr, mst_state, &old_payload, new_payload); - } + ret = drm_dp_add_payload_part2(mst_mgr, mst_state->base.state, new_payload); if (ret) { amdgpu_dm_set_mst_status(&aconnector->mst_status, @@ -384,10 +372,36 @@ bool dm_helpers_dp_mst_send_payload_allocation( amdgpu_dm_set_mst_status(&aconnector->mst_status, clr_flag, false); } - - return true; } +void dm_helpers_dp_mst_update_mst_mgr_for_deallocation( + struct dc_context *ctx, + const struct dc_stream_state *stream) +{ + struct amdgpu_dm_connector *aconnector; + struct drm_dp_mst_topology_state *mst_state; + struct drm_dp_mst_topology_mgr *mst_mgr; + struct drm_dp_mst_atomic_payload *new_payload, old_payload; + enum mst_progress_status set_flag = MST_CLEAR_ALLOCATED_PAYLOAD; + enum mst_progress_status clr_flag = MST_ALLOCATE_NEW_PAYLOAD; + + aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; + + if (!aconnector || !aconnector->mst_root) + return; + + mst_mgr = &aconnector->mst_root->mst_mgr; + mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state); + new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port); + dm_helpers_construct_old_payload(mst_mgr, mst_state, + new_payload, &old_payload); + + drm_dp_remove_payload_part2(mst_mgr, mst_state, &old_payload, new_payload); + + amdgpu_dm_set_mst_status(&aconnector->mst_status, set_flag, true); + amdgpu_dm_set_mst_status(&aconnector->mst_status, clr_flag, false); + } + void dm_dtn_log_begin(struct dc_context *ctx, struct dc_log_buffer_ctx *log_ctx) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 51467f132c26..58b880acb087 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -894,10 +894,15 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { - struct amdgpu_dm_connector *amdgpu_dm_connector = - to_amdgpu_dm_connector(connector); + struct amdgpu_dm_connector *amdgpu_dm_connector; + const struct dc_link *dc_link; - const struct dc_link *dc_link = amdgpu_dm_connector->dc_link; + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + + amdgpu_dm_connector = to_amdgpu_dm_connector(connector); + + dc_link = amdgpu_dm_connector->dc_link; if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) { dc_interrupt_set(adev->dm.dc, @@ -930,9 +935,14 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { - struct amdgpu_dm_connector *amdgpu_dm_connector = - to_amdgpu_dm_connector(connector); - const struct dc_link *dc_link = amdgpu_dm_connector->dc_link; + struct amdgpu_dm_connector *amdgpu_dm_connector; + const struct dc_link *dc_link; + + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + + amdgpu_dm_connector = to_amdgpu_dm_connector(connector); + dc_link = amdgpu_dm_connector->dc_link; if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) { dc_interrupt_set(adev->dm.dc, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 11da0eebee6c..941e96f100f4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -27,6 +27,8 @@ #include <drm/display/drm_dp_mst_helper.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> +#include <drm/drm_fixed.h> +#include <drm/drm_edid.h> #include "dm_services.h" #include "amdgpu.h" #include "amdgpu_dm.h" @@ -44,7 +46,7 @@ #include "amdgpu_dm_debugfs.h" #endif -#include "dc/dcn20/dcn20_resource.h" +#include "dc/resource/dcn20/dcn20_resource.h" #define PEAK_FACTOR_X1000 1006 @@ -424,8 +426,7 @@ dm_mst_atomic_best_encoder(struct drm_connector *connector, { struct drm_connector_state *connector_state = drm_atomic_get_new_connector_state(state, connector); - struct drm_device *dev = connector->dev; - struct amdgpu_device *adev = drm_to_adev(dev); + struct amdgpu_device *adev = drm_to_adev(connector->dev); struct amdgpu_crtc *acrtc = to_amdgpu_crtc(connector_state->crtc); return &adev->dm.mst_encoders[acrtc->crtc_id].base; @@ -941,10 +942,10 @@ static int increase_dsc_bpp(struct drm_atomic_state *state, link_timeslots_used = 0; for (i = 0; i < count; i++) - link_timeslots_used += DIV_ROUND_UP(vars[i + k].pbn, mst_state->pbn_div); + link_timeslots_used += DIV_ROUND_UP(vars[i + k].pbn, dfixed_trunc(mst_state->pbn_div)); fair_pbn_alloc = - (63 - link_timeslots_used) / remaining_to_increase * mst_state->pbn_div; + (63 - link_timeslots_used) / remaining_to_increase * dfixed_trunc(mst_state->pbn_div); if (initial_slack[next_index] > fair_pbn_alloc) { vars[next_index].pbn += fair_pbn_alloc; @@ -1500,14 +1501,16 @@ int pre_validate_dsc(struct drm_atomic_state *state, int ind = find_crtc_index_in_state_by_stream(state, stream); if (ind >= 0) { + struct drm_connector *connector; struct amdgpu_dm_connector *aconnector; struct drm_connector_state *drm_new_conn_state; struct dm_connector_state *dm_new_conn_state; struct dm_crtc_state *dm_old_crtc_state; - aconnector = + connector = amdgpu_dm_find_first_crtc_matching_connector(state, state->crtcs[ind].ptr); + aconnector = to_amdgpu_dm_connector(connector); drm_new_conn_state = drm_atomic_get_new_connector_state(state, &aconnector->base); @@ -1602,9 +1605,8 @@ enum dc_status dm_dp_mst_is_port_support_mode( struct dc_link_settings cur_link_settings; unsigned int end_to_end_bw_in_kbps = 0; unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0; - unsigned int max_compressed_bw_in_kbps = 0; struct dc_dsc_bw_range bw_range = {0}; - uint16_t full_pbn = aconnector->mst_output_port->full_pbn; + struct dc_dsc_config_options dsc_options = {0}; /* * Consider the case with the depth of the mst topology tree is equal or less than 2 @@ -1620,30 +1622,39 @@ enum dc_status dm_dp_mst_is_port_support_mode( (aconnector->mst_output_port->passthrough_aux || aconnector->dsc_aux == &aconnector->mst_output_port->aux)) { cur_link_settings = stream->link->verified_link_cap; + upper_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link, &cur_link_settings); + down_link_bw_in_kbps = kbps_from_pbn(aconnector->mst_output_port->full_pbn); - upper_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link, - &cur_link_settings); - down_link_bw_in_kbps = kbps_from_pbn(full_pbn); - - /* pick the bottleneck */ - end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps, - down_link_bw_in_kbps); - - /* - * use the maximum dsc compression bandwidth as the required - * bandwidth for the mode - */ - max_compressed_bw_in_kbps = bw_range.min_kbps; + /* pick the end to end bw bottleneck */ + end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps, down_link_bw_in_kbps); - if (end_to_end_bw_in_kbps < max_compressed_bw_in_kbps) { - DRM_DEBUG_DRIVER("Mode does not fit into DSC pass-through bandwidth validation\n"); + if (end_to_end_bw_in_kbps < bw_range.min_kbps) { + DRM_DEBUG_DRIVER("maximum dsc compression cannot fit into end-to-end bandwidth\n"); return DC_FAIL_BANDWIDTH_VALIDATE; } + + if (end_to_end_bw_in_kbps < bw_range.stream_kbps) { + dc_dsc_get_default_config_option(stream->link->dc, &dsc_options); + dsc_options.max_target_bpp_limit_override_x16 = aconnector->base.display_info.max_dsc_bpp * 16; + if (dc_dsc_compute_config(stream->sink->ctx->dc->res_pool->dscs[0], + &stream->sink->dsc_caps.dsc_dec_caps, + &dsc_options, + end_to_end_bw_in_kbps, + &stream->timing, + dc_link_get_highest_encoding_format(stream->link), + &stream->timing.dsc_cfg)) { + stream->timing.flags.DSC = 1; + DRM_DEBUG_DRIVER("end-to-end bandwidth require dsc and dsc config found\n"); + } else { + DRM_DEBUG_DRIVER("end-to-end bandwidth require dsc but dsc config not found\n"); + return DC_FAIL_BANDWIDTH_VALIDATE; + } + } } else { /* check if mode could be supported within full_pbn */ bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3; - pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false); - if (pbn > full_pbn) + pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp << 4); + if (pbn > aconnector->mst_output_port->full_pbn) return DC_FAIL_BANDWIDTH_VALIDATE; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 116121e647ca..8a4c40b4c27e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1337,8 +1337,14 @@ static void amdgpu_dm_plane_drm_plane_reset(struct drm_plane *plane) amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL); WARN_ON(amdgpu_state == NULL); - if (amdgpu_state) - __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base); + if (!amdgpu_state) + return; + + __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base); + amdgpu_state->degamma_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; + amdgpu_state->hdr_mult = AMDGPU_HDR_MULT_DEFAULT; + amdgpu_state->shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; + amdgpu_state->blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; } static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct drm_plane *plane) @@ -1357,6 +1363,27 @@ static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct dc_plane_state_retain(dm_plane_state->dc_state); } + if (old_dm_plane_state->degamma_lut) + dm_plane_state->degamma_lut = + drm_property_blob_get(old_dm_plane_state->degamma_lut); + if (old_dm_plane_state->ctm) + dm_plane_state->ctm = + drm_property_blob_get(old_dm_plane_state->ctm); + if (old_dm_plane_state->shaper_lut) + dm_plane_state->shaper_lut = + drm_property_blob_get(old_dm_plane_state->shaper_lut); + if (old_dm_plane_state->lut3d) + dm_plane_state->lut3d = + drm_property_blob_get(old_dm_plane_state->lut3d); + if (old_dm_plane_state->blend_lut) + dm_plane_state->blend_lut = + drm_property_blob_get(old_dm_plane_state->blend_lut); + + dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf; + dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult; + dm_plane_state->shaper_tf = old_dm_plane_state->shaper_tf; + dm_plane_state->blend_tf = old_dm_plane_state->blend_tf; + return &dm_plane_state->base; } @@ -1424,12 +1451,206 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane, { struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); + if (dm_plane_state->degamma_lut) + drm_property_blob_put(dm_plane_state->degamma_lut); + if (dm_plane_state->ctm) + drm_property_blob_put(dm_plane_state->ctm); + if (dm_plane_state->lut3d) + drm_property_blob_put(dm_plane_state->lut3d); + if (dm_plane_state->shaper_lut) + drm_property_blob_put(dm_plane_state->shaper_lut); + if (dm_plane_state->blend_lut) + drm_property_blob_put(dm_plane_state->blend_lut); + if (dm_plane_state->dc_state) dc_plane_state_release(dm_plane_state->dc_state); drm_atomic_helper_plane_destroy_state(plane, state); } +#ifdef AMD_PRIVATE_COLOR +static void +dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, + struct drm_plane *plane) +{ + struct amdgpu_mode_info mode_info = dm->adev->mode_info; + struct dpp_color_caps dpp_color_caps = dm->dc->caps.color.dpp; + + /* Check HW color pipeline capabilities on DPP block (pre-blending) + * before exposing related properties. + */ + if (dpp_color_caps.dgam_ram || dpp_color_caps.gamma_corr) { + drm_object_attach_property(&plane->base, + mode_info.plane_degamma_lut_property, + 0); + drm_object_attach_property(&plane->base, + mode_info.plane_degamma_lut_size_property, + MAX_COLOR_LUT_ENTRIES); + drm_object_attach_property(&plane->base, + dm->adev->mode_info.plane_degamma_tf_property, + AMDGPU_TRANSFER_FUNCTION_DEFAULT); + } + /* HDR MULT is always available */ + drm_object_attach_property(&plane->base, + dm->adev->mode_info.plane_hdr_mult_property, + AMDGPU_HDR_MULT_DEFAULT); + + /* Only enable plane CTM if both DPP and MPC gamut remap is available. */ + if (dm->dc->caps.color.mpc.gamut_remap) + drm_object_attach_property(&plane->base, + dm->adev->mode_info.plane_ctm_property, 0); + + if (dpp_color_caps.hw_3d_lut) { + drm_object_attach_property(&plane->base, + mode_info.plane_shaper_lut_property, 0); + drm_object_attach_property(&plane->base, + mode_info.plane_shaper_lut_size_property, + MAX_COLOR_LUT_ENTRIES); + drm_object_attach_property(&plane->base, + mode_info.plane_shaper_tf_property, + AMDGPU_TRANSFER_FUNCTION_DEFAULT); + drm_object_attach_property(&plane->base, + mode_info.plane_lut3d_property, 0); + drm_object_attach_property(&plane->base, + mode_info.plane_lut3d_size_property, + MAX_COLOR_3DLUT_SIZE); + } + + if (dpp_color_caps.ogam_ram) { + drm_object_attach_property(&plane->base, + mode_info.plane_blend_lut_property, 0); + drm_object_attach_property(&plane->base, + mode_info.plane_blend_lut_size_property, + MAX_COLOR_LUT_ENTRIES); + drm_object_attach_property(&plane->base, + mode_info.plane_blend_tf_property, + AMDGPU_TRANSFER_FUNCTION_DEFAULT); + } +} + +static int +dm_atomic_plane_set_property(struct drm_plane *plane, + struct drm_plane_state *state, + struct drm_property *property, + uint64_t val) +{ + struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); + struct amdgpu_device *adev = drm_to_adev(plane->dev); + bool replaced = false; + int ret; + + if (property == adev->mode_info.plane_degamma_lut_property) { + ret = drm_property_replace_blob_from_id(plane->dev, + &dm_plane_state->degamma_lut, + val, -1, + sizeof(struct drm_color_lut), + &replaced); + dm_plane_state->base.color_mgmt_changed |= replaced; + return ret; + } else if (property == adev->mode_info.plane_degamma_tf_property) { + if (dm_plane_state->degamma_tf != val) { + dm_plane_state->degamma_tf = val; + dm_plane_state->base.color_mgmt_changed = 1; + } + } else if (property == adev->mode_info.plane_hdr_mult_property) { + if (dm_plane_state->hdr_mult != val) { + dm_plane_state->hdr_mult = val; + dm_plane_state->base.color_mgmt_changed = 1; + } + } else if (property == adev->mode_info.plane_ctm_property) { + ret = drm_property_replace_blob_from_id(plane->dev, + &dm_plane_state->ctm, + val, + sizeof(struct drm_color_ctm_3x4), -1, + &replaced); + dm_plane_state->base.color_mgmt_changed |= replaced; + return ret; + } else if (property == adev->mode_info.plane_shaper_lut_property) { + ret = drm_property_replace_blob_from_id(plane->dev, + &dm_plane_state->shaper_lut, + val, -1, + sizeof(struct drm_color_lut), + &replaced); + dm_plane_state->base.color_mgmt_changed |= replaced; + return ret; + } else if (property == adev->mode_info.plane_shaper_tf_property) { + if (dm_plane_state->shaper_tf != val) { + dm_plane_state->shaper_tf = val; + dm_plane_state->base.color_mgmt_changed = 1; + } + } else if (property == adev->mode_info.plane_lut3d_property) { + ret = drm_property_replace_blob_from_id(plane->dev, + &dm_plane_state->lut3d, + val, -1, + sizeof(struct drm_color_lut), + &replaced); + dm_plane_state->base.color_mgmt_changed |= replaced; + return ret; + } else if (property == adev->mode_info.plane_blend_lut_property) { + ret = drm_property_replace_blob_from_id(plane->dev, + &dm_plane_state->blend_lut, + val, -1, + sizeof(struct drm_color_lut), + &replaced); + dm_plane_state->base.color_mgmt_changed |= replaced; + return ret; + } else if (property == adev->mode_info.plane_blend_tf_property) { + if (dm_plane_state->blend_tf != val) { + dm_plane_state->blend_tf = val; + dm_plane_state->base.color_mgmt_changed = 1; + } + } else { + drm_dbg_atomic(plane->dev, + "[PLANE:%d:%s] unknown property [PROP:%d:%s]]\n", + plane->base.id, plane->name, + property->base.id, property->name); + return -EINVAL; + } + + return 0; +} + +static int +dm_atomic_plane_get_property(struct drm_plane *plane, + const struct drm_plane_state *state, + struct drm_property *property, + uint64_t *val) +{ + struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); + struct amdgpu_device *adev = drm_to_adev(plane->dev); + + if (property == adev->mode_info.plane_degamma_lut_property) { + *val = (dm_plane_state->degamma_lut) ? + dm_plane_state->degamma_lut->base.id : 0; + } else if (property == adev->mode_info.plane_degamma_tf_property) { + *val = dm_plane_state->degamma_tf; + } else if (property == adev->mode_info.plane_hdr_mult_property) { + *val = dm_plane_state->hdr_mult; + } else if (property == adev->mode_info.plane_ctm_property) { + *val = (dm_plane_state->ctm) ? + dm_plane_state->ctm->base.id : 0; + } else if (property == adev->mode_info.plane_shaper_lut_property) { + *val = (dm_plane_state->shaper_lut) ? + dm_plane_state->shaper_lut->base.id : 0; + } else if (property == adev->mode_info.plane_shaper_tf_property) { + *val = dm_plane_state->shaper_tf; + } else if (property == adev->mode_info.plane_lut3d_property) { + *val = (dm_plane_state->lut3d) ? + dm_plane_state->lut3d->base.id : 0; + } else if (property == adev->mode_info.plane_blend_lut_property) { + *val = (dm_plane_state->blend_lut) ? + dm_plane_state->blend_lut->base.id : 0; + } else if (property == adev->mode_info.plane_blend_tf_property) { + *val = dm_plane_state->blend_tf; + + } else { + return -EINVAL; + } + + return 0; +} +#endif + static const struct drm_plane_funcs dm_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, @@ -1438,6 +1659,10 @@ static const struct drm_plane_funcs dm_plane_funcs = { .atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state, .atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state, .format_mod_supported = amdgpu_dm_plane_format_mod_supported, +#ifdef AMD_PRIVATE_COLOR + .atomic_set_property = dm_atomic_plane_set_property, + .atomic_get_property = dm_atomic_plane_get_property, +#endif }; int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, @@ -1517,6 +1742,9 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, drm_plane_helper_add(plane, &dm_plane_helper_funcs); +#ifdef AMD_PRIVATE_COLOR + dm_atomic_plane_attach_color_mgmt_properties(dm, plane); +#endif /* Create (reset) the plane state */ if (plane->funcs->reset) plane->funcs->reset(plane); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index 08ce3bb8f640..1f08c6564c3b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -51,6 +51,9 @@ static bool link_supports_psrsu(struct dc_link *link) !link->dpcd_caps.psr_info.psr2_su_y_granularity_cap) return false; + if (amdgpu_dc_debug_mask & DC_DISABLE_PSR_SU) + return false; + return dc_dmub_check_min_version(dc->ctx->dmub_srv->dmub); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c new file mode 100644 index 000000000000..16e72d623630 --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dm_services_types.h" + +#include "amdgpu.h" +#include "amdgpu_dm.h" +#include "amdgpu_dm_wb.h" +#include "amdgpu_display.h" +#include "dc.h" + +#include <drm/drm_edid.h> +#include <drm/drm_atomic_state_helper.h> +#include <drm/drm_modeset_helper_vtables.h> + +static const u32 amdgpu_dm_wb_formats[] = { + DRM_FORMAT_XRGB2101010, +}; + +static int amdgpu_dm_wb_encoder_atomic_check(struct drm_encoder *encoder, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + struct drm_framebuffer *fb; + const struct drm_display_mode *mode = &crtc_state->mode; + bool found = false; + uint8_t i; + + if (!conn_state->writeback_job || !conn_state->writeback_job->fb) + return 0; + + fb = conn_state->writeback_job->fb; + if (fb->width != mode->hdisplay || fb->height != mode->vdisplay) { + DRM_DEBUG_KMS("Invalid framebuffer size %ux%u\n", + fb->width, fb->height); + return -EINVAL; + } + + for (i = 0; i < sizeof(amdgpu_dm_wb_formats) / sizeof(u32); i++) { + if (fb->format->format == amdgpu_dm_wb_formats[i]) + found = true; + } + + if (!found) { + DRM_DEBUG_KMS("Invalid pixel format %p4cc\n", + &fb->format->format); + return -EINVAL; + } + + return 0; +} + + +static int amdgpu_dm_wb_connector_get_modes(struct drm_connector *connector) +{ + struct drm_device *dev = connector->dev; + + return drm_add_modes_noedid(connector, dev->mode_config.max_width, + dev->mode_config.max_height); +} + +static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector, + struct drm_writeback_job *job) +{ + struct amdgpu_framebuffer *afb; + struct drm_gem_object *obj; + struct amdgpu_device *adev; + struct amdgpu_bo *rbo; + uint32_t domain; + int r; + + if (!job->fb) { + DRM_DEBUG_KMS("No FB bound\n"); + return 0; + } + + afb = to_amdgpu_framebuffer(job->fb); + obj = job->fb->obj[0]; + rbo = gem_to_amdgpu_bo(obj); + adev = amdgpu_ttm_adev(rbo->tbo.bdev); + + r = amdgpu_bo_reserve(rbo, true); + if (r) { + dev_err(adev->dev, "fail to reserve bo (%d)\n", r); + return r; + } + + r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1); + if (r) { + dev_err(adev->dev, "reserving fence slot failed (%d)\n", r); + goto error_unlock; + } + + domain = amdgpu_display_supported_domains(adev, rbo->flags); + + r = amdgpu_bo_pin(rbo, domain); + if (unlikely(r != 0)) { + if (r != -ERESTARTSYS) + DRM_ERROR("Failed to pin framebuffer with error %d\n", r); + goto error_unlock; + } + + r = amdgpu_ttm_alloc_gart(&rbo->tbo); + if (unlikely(r != 0)) { + DRM_ERROR("%p bind failed\n", rbo); + goto error_unpin; + } + + amdgpu_bo_unreserve(rbo); + + afb->address = amdgpu_bo_gpu_offset(rbo); + + amdgpu_bo_ref(rbo); + + return 0; + +error_unpin: + amdgpu_bo_unpin(rbo); + +error_unlock: + amdgpu_bo_unreserve(rbo); + return r; +} + +static void amdgpu_dm_wb_cleanup_job(struct drm_writeback_connector *connector, + struct drm_writeback_job *job) +{ + struct amdgpu_bo *rbo; + int r; + + if (!job->fb) + return; + + rbo = gem_to_amdgpu_bo(job->fb->obj[0]); + r = amdgpu_bo_reserve(rbo, false); + if (unlikely(r)) { + DRM_ERROR("failed to reserve rbo before unpin\n"); + return; + } + + amdgpu_bo_unpin(rbo); + amdgpu_bo_unreserve(rbo); + amdgpu_bo_unref(&rbo); +} + +static const struct drm_encoder_helper_funcs amdgpu_dm_wb_encoder_helper_funcs = { + .atomic_check = amdgpu_dm_wb_encoder_atomic_check, +}; + +static const struct drm_connector_funcs amdgpu_dm_wb_connector_funcs = { + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = drm_connector_cleanup, + .reset = amdgpu_dm_connector_funcs_reset, + .atomic_duplicate_state = amdgpu_dm_connector_atomic_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, +}; + +static const struct drm_connector_helper_funcs amdgpu_dm_wb_conn_helper_funcs = { + .get_modes = amdgpu_dm_wb_connector_get_modes, + .prepare_writeback_job = amdgpu_dm_wb_prepare_job, + .cleanup_writeback_job = amdgpu_dm_wb_cleanup_job, +}; + +int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm, + struct amdgpu_dm_wb_connector *wbcon, + uint32_t link_index) +{ + struct dc *dc = dm->dc; + struct dc_link *link = dc_get_link_at_index(dc, link_index); + int res = 0; + + wbcon->link = link; + + drm_connector_helper_add(&wbcon->base.base, &amdgpu_dm_wb_conn_helper_funcs); + + res = drm_writeback_connector_init(&dm->adev->ddev, &wbcon->base, + &amdgpu_dm_wb_connector_funcs, + &amdgpu_dm_wb_encoder_helper_funcs, + amdgpu_dm_wb_formats, + ARRAY_SIZE(amdgpu_dm_wb_formats), + amdgpu_dm_get_encoder_crtc_mask(dm->adev)); + + if (res) + return res; + /* + * Some of the properties below require access to state, like bpc. + * Allocate some default initial connector state with our reset helper. + */ + if (wbcon->base.base.funcs->reset) + wbcon->base.base.funcs->reset(&wbcon->base.base); + + return 0; +} diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h new file mode 100644 index 000000000000..13d31c857dee --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __AMDGPU_DM_WB_H__ +#define __AMDGPU_DM_WB_H__ + +#include <drm/drm_writeback.h> + +int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm, + struct amdgpu_dm_wb_connector *dm_wbcon, + uint32_t link_index); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index 3a169b78e7e4..7991ae468f75 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -22,7 +22,7 @@ # # Makefile for Display Core (dc) component. -DC_LIBS = basics bios dml clk_mgr dce gpio hwss irq link virtual dsc +DC_LIBS = basics bios dml clk_mgr dce gpio hwss irq link virtual dsc resource optc ifdef CONFIG_DRM_AMD_DC_FP @@ -34,12 +34,8 @@ DC_LIBS += dcn21 DC_LIBS += dcn201 DC_LIBS += dcn30 DC_LIBS += dcn301 -DC_LIBS += dcn302 -DC_LIBS += dcn303 DC_LIBS += dcn31 DC_LIBS += dcn314 -DC_LIBS += dcn315 -DC_LIBS += dcn316 DC_LIBS += dcn32 DC_LIBS += dcn321 DC_LIBS += dcn35 @@ -51,7 +47,6 @@ DC_LIBS += dce120 DC_LIBS += dce112 DC_LIBS += dce110 -DC_LIBS += dce100 DC_LIBS += dce80 ifdef CONFIG_DRM_AMD_DC_SI @@ -65,7 +60,7 @@ AMD_DC = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/dc/,$(DC_LI include $(AMD_DC) DISPLAY_CORE = dc.o dc_stat.o dc_resource.o dc_hw_sequencer.o dc_sink.o \ -dc_surface.o dc_debug.o dc_stream.o dc_link_enc_cfg.o dc_link_exports.o +dc_surface.o dc_debug.o dc_stream.o dc_link_enc_cfg.o dc_link_exports.o dc_state.o DISPLAY_CORE += dc_vm_helper.o diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.c b/drivers/gpu/drm/amd/display/dc/basics/conversion.c index e295a839ab47..1090d235086a 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/conversion.c +++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.c @@ -103,7 +103,8 @@ void convert_float_matrix( static uint32_t find_gcd(uint32_t a, uint32_t b) { - uint32_t remainder = 0; + uint32_t remainder; + while (b != 0) { remainder = a % b; a = b; diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 2d1f5efa9091..960c4b4f6ddf 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -1698,7 +1698,7 @@ static enum bp_result bios_parser_enable_disp_power_gating( static enum bp_result bios_parser_enable_lvtma_control( struct dc_bios *dcb, uint8_t uc_pwr_on, - uint8_t panel_instance, + uint8_t pwrseq_instance, uint8_t bypass_panel_control_wait) { struct bios_parser *bp = BP_FROM_DCB(dcb); @@ -1706,7 +1706,7 @@ static enum bp_result bios_parser_enable_lvtma_control( if (!bp->cmd_tbl.enable_lvtma_control) return BP_RESULT_FAILURE; - return bp->cmd_tbl.enable_lvtma_control(bp, uc_pwr_on, panel_instance, bypass_panel_control_wait); + return bp->cmd_tbl.enable_lvtma_control(bp, uc_pwr_on, pwrseq_instance, bypass_panel_control_wait); } static bool bios_parser_is_accelerated_mode( @@ -2221,22 +2221,22 @@ static enum bp_result bios_parser_get_disp_connector_caps_info( switch (bp->object_info_tbl.revision.minor) { case 4: - default: - object = get_bios_object(bp, object_id); - - if (!object) - return BP_RESULT_BADINPUT; - - record = get_disp_connector_caps_record(bp, object); - if (!record) - return BP_RESULT_NORECORD; - - info->INTERNAL_DISPLAY = - (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY) ? 1 : 0; - info->INTERNAL_DISPLAY_BL = - (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY_BL) ? 1 : 0; - break; - case 5: + default: + object = get_bios_object(bp, object_id); + + if (!object) + return BP_RESULT_BADINPUT; + + record = get_disp_connector_caps_record(bp, object); + if (!record) + return BP_RESULT_NORECORD; + + info->INTERNAL_DISPLAY = + (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY) ? 1 : 0; + info->INTERNAL_DISPLAY_BL = + (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY_BL) ? 1 : 0; + break; + case 5: object_path_v3 = get_bios_object_from_path_v3(bp, object_id); if (!object_path_v3) @@ -2398,7 +2398,6 @@ static enum bp_result get_vram_info_v30( return result; } - /* * get_integrated_info_v11 * @@ -3332,27 +3331,28 @@ static enum bp_result get_bracket_layout_record( DC_LOG_DETECTION_EDID_PARSER("Invalid slot_layout_info\n"); return BP_RESULT_BADINPUT; } + tbl = &bp->object_info_tbl; v1_4 = tbl->v1_4; v1_5 = tbl->v1_5; result = BP_RESULT_NORECORD; switch (bp->object_info_tbl.revision.minor) { - case 4: - default: - for (i = 0; i < v1_4->number_of_path; ++i) { - if (bracket_layout_id == - v1_4->display_path[i].display_objid) { - result = update_slot_layout_info(dcb, i, slot_layout_info); - break; - } + case 4: + default: + for (i = 0; i < v1_4->number_of_path; ++i) { + if (bracket_layout_id == v1_4->display_path[i].display_objid) { + result = update_slot_layout_info(dcb, i, slot_layout_info); + break; } - break; - case 5: - for (i = 0; i < v1_5->number_of_path; ++i) - result = update_slot_layout_info_v2(dcb, i, slot_layout_info); - break; + } + break; + case 5: + for (i = 0; i < v1_5->number_of_path; ++i) + result = update_slot_layout_info_v2(dcb, i, slot_layout_info); + break; } + return result; } @@ -3361,9 +3361,7 @@ static enum bp_result bios_get_board_layout_info( struct board_layout_info *board_layout_info) { unsigned int i; - struct bios_parser *bp; - static enum bp_result record_result; unsigned int max_slots; @@ -3373,7 +3371,6 @@ static enum bp_result bios_get_board_layout_info( 0, 0 }; - bp = BP_FROM_DCB(dcb); if (board_layout_info == NULL) { @@ -3554,7 +3551,6 @@ static const struct dc_vbios_funcs vbios_funcs = { .bios_parser_destroy = firmware_parser_destroy, .get_board_layout_info = bios_get_board_layout_info, - /* TODO: use this fn in hw init?*/ .pack_data_tables = bios_parser_pack_data_tables, .get_atom_dc_golden_table = bios_get_atom_dc_golden_table, diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c index 90a02d7bd3da..293a919d605d 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c @@ -123,7 +123,7 @@ static void encoder_control_dmcub( sizeof(cmd.digx_encoder_control.header); cmd.digx_encoder_control.encoder_control.dig.stream_param = *dig; - dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static enum bp_result encoder_control_digx_v1_5( @@ -259,7 +259,7 @@ static void transmitter_control_dmcub( sizeof(cmd.dig1_transmitter_control.header); cmd.dig1_transmitter_control.transmitter_control.dig = *dig; - dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static enum bp_result transmitter_control_v1_6( @@ -321,7 +321,7 @@ static void transmitter_control_dmcub_v1_7( sizeof(cmd.dig1_transmitter_control.header); cmd.dig1_transmitter_control.transmitter_control.dig_v1_7 = *dig; - dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static enum bp_result transmitter_control_v1_7( @@ -429,7 +429,7 @@ static void set_pixel_clock_dmcub( sizeof(cmd.set_pixel_clock.header); cmd.set_pixel_clock.pixel_clock.clk = *clk; - dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static enum bp_result set_pixel_clock_v7( @@ -796,7 +796,7 @@ static void enable_disp_power_gating_dmcub( sizeof(cmd.enable_disp_power_gating.header); cmd.enable_disp_power_gating.power_gating.pwr = *pwr; - dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static enum bp_result enable_disp_power_gating_v2_1( @@ -976,7 +976,7 @@ static unsigned int get_smu_clock_info_v3_1(struct bios_parser *bp, uint8_t id) static enum bp_result enable_lvtma_control( struct bios_parser *bp, uint8_t uc_pwr_on, - uint8_t panel_instance, + uint8_t pwrseq_instance, uint8_t bypass_panel_control_wait); static void init_enable_lvtma_control(struct bios_parser *bp) @@ -989,7 +989,7 @@ static void init_enable_lvtma_control(struct bios_parser *bp) static void enable_lvtma_control_dmcub( struct dc_dmub_srv *dmcub, uint8_t uc_pwr_on, - uint8_t panel_instance, + uint8_t pwrseq_instance, uint8_t bypass_panel_control_wait) { @@ -1002,17 +1002,17 @@ static void enable_lvtma_control_dmcub( DMUB_CMD__VBIOS_LVTMA_CONTROL; cmd.lvtma_control.data.uc_pwr_action = uc_pwr_on; - cmd.lvtma_control.data.panel_inst = - panel_instance; + cmd.lvtma_control.data.pwrseq_inst = + pwrseq_instance; cmd.lvtma_control.data.bypass_panel_control_wait = bypass_panel_control_wait; - dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static enum bp_result enable_lvtma_control( struct bios_parser *bp, uint8_t uc_pwr_on, - uint8_t panel_instance, + uint8_t pwrseq_instance, uint8_t bypass_panel_control_wait) { enum bp_result result = BP_RESULT_FAILURE; @@ -1021,7 +1021,7 @@ static enum bp_result enable_lvtma_control( bp->base.ctx->dc->debug.dmub_command_table) { enable_lvtma_control_dmcub(bp->base.ctx->dmub_srv, uc_pwr_on, - panel_instance, + pwrseq_instance, bypass_panel_control_wait); return BP_RESULT_OK; } diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h index b6d09bf6cf72..41c8c014397f 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h @@ -96,7 +96,7 @@ struct cmd_tbl { struct bios_parser *bp, uint8_t id); enum bp_result (*enable_lvtma_control)(struct bios_parser *bp, uint8_t uc_pwr_on, - uint8_t panel_instance, + uint8_t pwrseq_instance, uint8_t bypass_panel_control_wait); }; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 3e73c4e59d40..28a2a837d2f0 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -29,6 +29,7 @@ #include "dc_types.h" #include "dccg.h" #include "clk_mgr_internal.h" +#include "dc_state_priv.h" #include "link.h" #include "dce100/dce_clk_mgr.h" @@ -63,7 +64,7 @@ int clk_mgr_helper_get_active_display_cnt( /* Don't count SubVP phantom pipes as part of active * display count */ - if (stream->mall_stream_config.type == SUBVP_PHANTOM) + if (dc_state_get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) continue; /* @@ -368,7 +369,7 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p } break; -#endif /* CONFIG_DRM_AMD_DC_FP - Family RV */ +#endif /* CONFIG_DRM_AMD_DC_FP */ default: ASSERT(0); /* Unknown Asic */ break; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index 3db4ef564b99..ce1386e22576 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -253,7 +253,7 @@ void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz; cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index 7326b7565846..757528256326 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -284,7 +284,7 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz; cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index 8776055bbeaa..644da4637320 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -232,7 +232,7 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base, cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz; cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static void dcn315_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c index 09151cc56ce4..12f3e8aa46d8 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c @@ -239,7 +239,7 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base, cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz; cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static void dcn316_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index a496930b1f9c..aadd07bc68c5 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -25,7 +25,6 @@ #include "dccg.h" #include "clk_mgr_internal.h" - #include "dcn32/dcn32_clk_mgr_smu_msg.h" #include "dcn20/dcn20_clk_mgr.h" #include "dce100/dce_clk_mgr.h" @@ -34,7 +33,7 @@ #include "core_types.h" #include "dm_helpers.h" #include "link.h" - +#include "dc_state_priv.h" #include "atomfirmware.h" #include "smu13_driver_if.h" @@ -458,20 +457,56 @@ static int dcn32_get_dispclk_from_dentist(struct clk_mgr *clk_mgr_base) return 0; } -static void dcn32_auto_dpm_test_log(struct dc_clocks *new_clocks, struct clk_mgr_internal *clk_mgr) +static bool dcn32_check_native_scaling(struct pipe_ctx *pipe) { - unsigned int dispclk_khz_reg = REG_READ(CLK1_CLK0_CURRENT_CNT); // DISPCLK - unsigned int dppclk_khz_reg = REG_READ(CLK1_CLK1_CURRENT_CNT); // DPPCLK - unsigned int dprefclk_khz_reg = REG_READ(CLK1_CLK2_CURRENT_CNT); // DPREFCLK - unsigned int dcfclk_khz_reg = REG_READ(CLK1_CLK3_CURRENT_CNT); // DCFCLK - unsigned int dtbclk_khz_reg = REG_READ(CLK1_CLK4_CURRENT_CNT); // DTBCLK - unsigned int fclk_khz_reg = REG_READ(CLK4_CLK0_CURRENT_CNT); // FCLK + bool is_native_scaling = false; + int width = pipe->plane_state->src_rect.width; + int height = pipe->plane_state->src_rect.height; + + if (pipe->stream->timing.h_addressable == width && + pipe->stream->timing.v_addressable == height && + pipe->plane_state->dst_rect.width == width && + pipe->plane_state->dst_rect.height == height) + is_native_scaling = true; + + return is_native_scaling; +} + +static void dcn32_auto_dpm_test_log( + struct dc_clocks *new_clocks, + struct clk_mgr_internal *clk_mgr, + struct dc_state *context) +{ + unsigned int dispclk_khz_reg, dppclk_khz_reg, dprefclk_khz_reg, dcfclk_khz_reg, dtbclk_khz_reg, + fclk_khz_reg, mall_ss_size_bytes; + int dramclk_khz_override, fclk_khz_override, num_fclk_levels; + + struct pipe_ctx *pipe_ctx_list[MAX_PIPES]; + int active_pipe_count = 0; + + for (int i = 0; i < MAX_PIPES; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) { + pipe_ctx_list[active_pipe_count] = pipe_ctx; + active_pipe_count++; + } + } + + mall_ss_size_bytes = context->bw_ctx.bw.dcn.mall_ss_size_bytes; + + dispclk_khz_reg = REG_READ(CLK1_CLK0_CURRENT_CNT); // DISPCLK + dppclk_khz_reg = REG_READ(CLK1_CLK1_CURRENT_CNT); // DPPCLK + dprefclk_khz_reg = REG_READ(CLK1_CLK2_CURRENT_CNT); // DPREFCLK + dcfclk_khz_reg = REG_READ(CLK1_CLK3_CURRENT_CNT); // DCFCLK + dtbclk_khz_reg = REG_READ(CLK1_CLK4_CURRENT_CNT); // DTBCLK + fclk_khz_reg = REG_READ(CLK4_CLK0_CURRENT_CNT); // FCLK // Overrides for these clocks in case there is no p_state change support - int dramclk_khz_override = new_clocks->dramclk_khz; - int fclk_khz_override = new_clocks->fclk_khz; + dramclk_khz_override = new_clocks->dramclk_khz; + fclk_khz_override = new_clocks->fclk_khz; - int num_fclk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1; + num_fclk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1; if (!new_clocks->p_state_change_support) { dramclk_khz_override = clk_mgr->base.bw_params->max_memclk_mhz * 1000; @@ -488,16 +523,49 @@ static void dcn32_auto_dpm_test_log(struct dc_clocks *new_clocks, struct clk_mgr // // AutoDPMTest: clk1:%d - clk2:%d - clk3:%d - clk4:%d\n" //////////////////////////////////////////////////////////////////////////// - if (new_clocks && + if (new_clocks && active_pipe_count > 0 && new_clocks->dramclk_khz > 0 && new_clocks->fclk_khz > 0 && new_clocks->dcfclk_khz > 0 && new_clocks->dppclk_khz > 0) { + uint32_t pix_clk_list[MAX_PIPES] = {0}; + int p_state_list[MAX_PIPES] = {0}; + int disp_src_width_list[MAX_PIPES] = {0}; + int disp_src_height_list[MAX_PIPES] = {0}; + uint64_t disp_src_refresh_list[MAX_PIPES] = {0}; + bool is_scaled_list[MAX_PIPES] = {0}; + + for (int i = 0; i < active_pipe_count; i++) { + struct pipe_ctx *curr_pipe_ctx = pipe_ctx_list[i]; + uint64_t refresh_rate; + + pix_clk_list[i] = curr_pipe_ctx->stream->timing.pix_clk_100hz; + p_state_list[i] = curr_pipe_ctx->p_state_type; + + refresh_rate = (curr_pipe_ctx->stream->timing.pix_clk_100hz * (uint64_t)100 + + curr_pipe_ctx->stream->timing.v_total * curr_pipe_ctx->stream->timing.h_total - (uint64_t)1); + refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.v_total); + refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.h_total); + disp_src_refresh_list[i] = refresh_rate; + + if (curr_pipe_ctx->plane_state) { + is_scaled_list[i] = !(dcn32_check_native_scaling(curr_pipe_ctx)); + disp_src_width_list[i] = curr_pipe_ctx->plane_state->src_rect.width; + disp_src_height_list[i] = curr_pipe_ctx->plane_state->src_rect.height; + } + } + DC_LOG_AUTO_DPM_TEST("AutoDPMTest: dramclk:%d - fclk:%d - " "dcfclk:%d - dppclk:%d - dispclk_hw:%d - " "dppclk_hw:%d - dprefclk_hw:%d - dcfclk_hw:%d - " - "dtbclk_hw:%d - fclk_hw:%d\n", + "dtbclk_hw:%d - fclk_hw:%d - pix_clk_0:%d - pix_clk_1:%d - " + "pix_clk_2:%d - pix_clk_3:%d - mall_ss_size:%d - p_state_type_0:%d - " + "p_state_type_1:%d - p_state_type_2:%d - p_state_type_3:%d - " + "pix_width_0:%d - pix_height_0:%d - refresh_rate_0:%lld - is_scaled_0:%d - " + "pix_width_1:%d - pix_height_1:%d - refresh_rate_1:%lld - is_scaled_1:%d - " + "pix_width_2:%d - pix_height_2:%d - refresh_rate_2:%lld - is_scaled_2:%d - " + "pix_width_3:%d - pix_height_3:%d - refresh_rate_3:%lld - is_scaled_3:%d - LOG_END\n", dramclk_khz_override, fclk_khz_override, new_clocks->dcfclk_khz, @@ -507,7 +575,14 @@ static void dcn32_auto_dpm_test_log(struct dc_clocks *new_clocks, struct clk_mgr dprefclk_khz_reg, dcfclk_khz_reg, dtbclk_khz_reg, - fclk_khz_reg); + fclk_khz_reg, + pix_clk_list[0], pix_clk_list[1], pix_clk_list[3], pix_clk_list[2], + mall_ss_size_bytes, + p_state_list[0], p_state_list[1], p_state_list[2], p_state_list[3], + disp_src_width_list[0], disp_src_height_list[0], disp_src_refresh_list[0], is_scaled_list[0], + disp_src_width_list[1], disp_src_height_list[1], disp_src_refresh_list[1], is_scaled_list[1], + disp_src_width_list[2], disp_src_height_list[2], disp_src_refresh_list[2], is_scaled_list[2], + disp_src_width_list[3], disp_src_height_list[3], disp_src_refresh_list[3], is_scaled_list[3]); } } @@ -680,6 +755,7 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base, /* DCCG requires KHz precision for DTBCLK */ clk_mgr_base->clks.ref_dtbclk_khz = dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DTBCLK, khz_to_mhz_ceil(new_clocks->ref_dtbclk_khz)); + dcn32_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz); } @@ -708,7 +784,7 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base, clk_mgr_base->clks.dispclk_khz / 1000 / 7); if (dc->config.enable_auto_dpm_test_logs) { - dcn32_auto_dpm_test_log(new_clocks, clk_mgr); + dcn32_auto_dpm_test_log(new_clocks, clk_mgr, context); } } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index d5fde7d23fbf..9c660d1facc7 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -50,6 +50,7 @@ #include "dc_dmub_srv.h" #include "link.h" #include "logger_types.h" + #undef DC_LOGGER #define DC_LOGGER \ clk_mgr->base.base.ctx->logger @@ -80,12 +81,12 @@ static int dcn35_get_active_display_cnt_wa( struct dc *dc, - struct dc_state *context) + struct dc_state *context, + int *all_active_disps) { - int i, display_count; + int i, display_count = 0; bool tmds_present = false; - display_count = 0; for (i = 0; i < context->stream_count; i++) { const struct dc_stream_state *stream = context->streams[i]; @@ -103,7 +104,8 @@ static int dcn35_get_active_display_cnt_wa( link->link_enc->funcs->is_dig_enabled(link->link_enc)) display_count++; } - + if (all_active_disps != NULL) + *all_active_disps = display_count; /* WA for hang on HDMI after display off back on*/ if (display_count == 0 && tmds_present) display_count = 1; @@ -126,21 +128,13 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state * continue; if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || !pipe->stream->link_enc)) { - struct stream_encoder *stream_enc = pipe->stream_res.stream_enc; - if (disable) { - if (stream_enc && stream_enc->funcs->disable_fifo) - pipe->stream_res.stream_enc->funcs->disable_fifo(stream_enc); - if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc) pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); reset_sync_context_for_pipe(dc, context, i); } else { pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg); - - if (stream_enc && stream_enc->funcs->enable_fifo) - pipe->stream_res.stream_enc->funcs->enable_fifo(stream_enc); } } } @@ -224,15 +218,16 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; struct dc *dc = clk_mgr_base->ctx->dc; - int display_count; + int display_count = 0; bool update_dppclk = false; bool update_dispclk = false; bool dpp_clock_lowered = false; + int all_active_disps = 0; if (dc->work_arounds.skip_clock_update) return; - /* DTBCLK is fixed, so set a default if unspecified. */ + display_count = dcn35_get_active_display_cnt_wa(dc, context, &all_active_disps); if (new_clocks->dtbclk_en && !new_clocks->ref_dtbclk_khz) new_clocks->ref_dtbclk_khz = 600000; @@ -254,7 +249,6 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, } /* check that we're not already in lower */ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) { - display_count = dcn35_get_active_display_cnt_wa(dc, context); /* if we can go lower, go lower */ if (display_count == 0) clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER; @@ -349,7 +343,7 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz; cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr) @@ -424,9 +418,8 @@ bool dcn35_are_clock_states_equal(struct dc_clocks *a, } static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, - struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info) + struct clk_mgr_dcn35 *clk_mgr) { - } static struct clk_bw_params dcn35_bw_params = { @@ -826,7 +819,7 @@ static void dcn35_set_low_power_state(struct clk_mgr *clk_mgr_base) struct dc_state *context = dc->current_state; if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) { - display_count = dcn35_get_active_display_cnt_wa(dc, context); + display_count = dcn35_get_active_display_cnt_wa(dc, context, NULL); /* if we can go lower, go lower */ if (display_count == 0) clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER; @@ -993,7 +986,6 @@ void dcn35_clk_mgr_construct( struct dccg *dccg) { struct dcn35_smu_dpm_clks smu_dpm_clks = { 0 }; - struct clk_log_info log_info = {0}; clk_mgr->base.base.ctx = ctx; clk_mgr->base.base.funcs = &dcn35_funcs; @@ -1046,7 +1038,7 @@ void dcn35_clk_mgr_construct( dcn35_bw_params.wm_table = ddr5_wm_table; } /* Saved clocks configured at boot for debug purposes */ - dcn35_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, &clk_mgr->base.base, &log_info); + dcn35_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, clk_mgr); clk_mgr->base.base.dprefclk_khz = dcn35_smu_get_dprefclk(&clk_mgr->base); clk_mgr->base.base.clks.ref_dtbclk_khz = 600000; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c index b6b8c3ca1572..6d4a1ffab5ed 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c @@ -116,6 +116,9 @@ static uint32_t dcn35_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un msleep(delay_us/1000); else if (delay_us > 0) udelay(delay_us); + + if (clk_mgr->base.ctx->dc->debug.disable_timeout) + max_retries++; } while (max_retries--); return res_val; @@ -276,7 +279,7 @@ void dcn35_smu_set_display_idle_optimization(struct clk_mgr_internal *clk_mgr, u clk_mgr, VBIOSSMC_MSG_SetDisplayIdleOptimizations, idle_info); - smu_print("VBIOSSMC_MSG_SetDisplayIdleOptimizations idle_info = %d\n", idle_info); + smu_print("%s: VBIOSSMC_MSG_SetDisplayIdleOptimizations idle_info = %x\n", __func__, idle_info); } void dcn35_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable) @@ -295,7 +298,7 @@ void dcn35_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool e clk_mgr, VBIOSSMC_MSG_SetDisplayIdleOptimizations, idle_info.data); - smu_print("dcn35_smu_enable_phy_refclk_pwrdwn = %d\n", enable ? 1 : 0); + smu_print("%s smu_enable_phy_refclk_pwrdwn = %d\n", __func__, enable ? 1 : 0); } void dcn35_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr) @@ -307,6 +310,7 @@ void dcn35_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr) clk_mgr, VBIOSSMC_MSG_UpdatePmeRestore, 0); + smu_print("%s: SMC_MSG_UpdatePmeRestore\n", __func__); } void dcn35_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high) @@ -347,7 +351,7 @@ void dcn35_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr) void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zstate_support_state support) { - unsigned int msg_id, param; + unsigned int msg_id, param, retv; if (!clk_mgr->smu_present) return; @@ -357,27 +361,32 @@ void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zst case DCN_ZSTATE_SUPPORT_ALLOW: msg_id = VBIOSSMC_MSG_AllowZstatesEntry; param = (1 << 10) | (1 << 9) | (1 << 8); + smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW, param = %d\n", __func__, param); break; case DCN_ZSTATE_SUPPORT_DISALLOW: msg_id = VBIOSSMC_MSG_AllowZstatesEntry; param = 0; + smu_print("%s: SMC_MSG_AllowZstatesEntry msg_id = DISALLOW, param = %d\n", __func__, param); break; case DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY: msg_id = VBIOSSMC_MSG_AllowZstatesEntry; param = (1 << 10); + smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z10_ONLY, param = %d\n", __func__, param); break; case DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY: msg_id = VBIOSSMC_MSG_AllowZstatesEntry; param = (1 << 10) | (1 << 8); + smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z8_Z10_ONLY, param = %d\n", __func__, param); break; case DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY: msg_id = VBIOSSMC_MSG_AllowZstatesEntry; param = (1 << 8); + smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z8_ONLY, param = %d\n", __func__, param); break; default: //DCN_ZSTATE_SUPPORT_UNKNOWN @@ -387,11 +396,11 @@ void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zst } - dcn35_smu_send_msg_with_param( + retv = dcn35_smu_send_msg_with_param( clk_mgr, msg_id, param); - smu_print("dcn35_smu_set_zstate_support msg_id = %d, param = %d\n", msg_id, param); + smu_print("%s: msg_id = %d, param = 0x%x, return = %d\n", __func__, msg_id, param, retv); } int dcn35_smu_get_dprefclk(struct clk_mgr_internal *clk_mgr) @@ -405,7 +414,7 @@ int dcn35_smu_get_dprefclk(struct clk_mgr_internal *clk_mgr) VBIOSSMC_MSG_GetDprefclkFreq, 0); - smu_print("dcn35_smu_get_DPREF clk = %d mhz\n", dprefclk); + smu_print("%s: SMU DPREF clk = %d mhz\n", __func__, dprefclk); return dprefclk * 1000; } @@ -420,7 +429,7 @@ int dcn35_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr) VBIOSSMC_MSG_GetDtbclkFreq, 0); - smu_print("dcn35_smu_get_dtbclk = %d mhz\n", dtbclk); + smu_print("%s: get_dtbclk = %dmhz\n", __func__, dtbclk); return dtbclk * 1000; } /* Arg = 1: Turn DTB on; 0: Turn DTB CLK OFF. when it is on, it is 600MHZ */ @@ -433,7 +442,7 @@ void dcn35_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable) clk_mgr, VBIOSSMC_MSG_SetDtbClk, enable); - smu_print("dcn35_smu_set_dtbclk = %d \n", enable ? 1 : 0); + smu_print("%s: smu_set_dtbclk = %d\n", __func__, enable ? 1 : 0); } void dcn35_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable) @@ -442,30 +451,45 @@ void dcn35_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *cl clk_mgr, VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown, enable); + smu_print("%s: smu_enable_48mhz_tmdp_refclk_pwrdwn = %d\n", __func__, enable ? 1 : 0); } int dcn35_smu_exit_low_power_state(struct clk_mgr_internal *clk_mgr) { - return dcn35_smu_send_msg_with_param( + int retv; + + retv = dcn35_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_DispPsrExit, 0); + smu_print("%s: smu_exit_low_power_state return = %d\n", __func__, retv); + return retv; } int dcn35_smu_get_ips_supported(struct clk_mgr_internal *clk_mgr) { - return dcn35_smu_send_msg_with_param( + int retv; + + retv = dcn35_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_QueryIPS2Support, 0); + + //smu_print("%s: VBIOSSMC_MSG_QueryIPS2Support return = %x\n", __func__, retv); + return retv; } void dcn35_smu_write_ips_scratch(struct clk_mgr_internal *clk_mgr, uint32_t param) { REG_WRITE(MP1_SMN_C2PMSG_71, param); + //smu_print("%s: write_ips_scratch = %x\n", __func__, param); } uint32_t dcn35_smu_read_ips_scratch(struct clk_mgr_internal *clk_mgr) { - return REG_READ(MP1_SMN_C2PMSG_71); + uint32_t retv; + + retv = REG_READ(MP1_SMN_C2PMSG_71); + //smu_print("%s: dcn35_smu_read_ips_scratch = %x\n", __func__, retv); + return retv; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 5c1185206645..2d7205058c64 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -34,6 +34,8 @@ #include "dce/dce_hwseq.h" #include "resource.h" +#include "dc_state.h" +#include "dc_state_priv.h" #include "gpio_service_interface.h" #include "clk_mgr.h" @@ -409,9 +411,12 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, * avoid conflicting with firmware updates. */ if (dc->ctx->dce_version > DCE_VERSION_MAX) - if (dc->optimized_required || dc->wm_optimized_required) + if (dc->optimized_required) return false; + if (!memcmp(&stream->adjust, adjust, sizeof(*adjust))) + return true; + stream->adjust.v_total_max = adjust->v_total_max; stream->adjust.v_total_mid = adjust->v_total_mid; stream->adjust.v_total_mid_frame_num = adjust->v_total_mid_frame_num; @@ -519,7 +524,7 @@ dc_stream_forward_dmub_crc_window(struct dc_dmub_srv *dmub_srv, cmd.secure_display.roi_info.y_end = rect->y + rect->height; } - dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); + dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); } static inline void @@ -808,7 +813,7 @@ static void dc_destruct(struct dc *dc) link_enc_cfg_init(dc, dc->current_state); if (dc->current_state) { - dc_release_state(dc->current_state); + dc_state_release(dc->current_state); dc->current_state = NULL; } @@ -1020,29 +1025,27 @@ static bool dc_construct(struct dc *dc, } #endif + if (!create_links(dc, init_params->num_virtual_links)) + goto fail; + + /* Create additional DIG link encoder objects if fewer than the platform + * supports were created during link construction. + */ + if (!create_link_encoders(dc)) + goto fail; + /* Creation of current_state must occur after dc->dml * is initialized in dc_create_resource_pool because * on creation it copies the contents of dc->dml */ - dc->current_state = dc_create_state(dc); + dc->current_state = dc_state_create(dc); if (!dc->current_state) { dm_error("%s: failed to create validate ctx\n", __func__); goto fail; } - if (!create_links(dc, init_params->num_virtual_links)) - goto fail; - - /* Create additional DIG link encoder objects if fewer than the platform - * supports were created during link construction. - */ - if (!create_link_encoders(dc)) - goto fail; - - dc_resource_state_construct(dc, dc->current_state); - return true; fail: @@ -1085,7 +1088,7 @@ static void apply_ctx_interdependent_lock(struct dc *dc, } } -static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx) +static void dc_update_visual_confirm_color(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx) { if (dc->ctx->dce_version >= DCN_VERSION_1_0) { memset(&pipe_ctx->visual_confirm_color, 0, sizeof(struct tg_color)); @@ -1105,9 +1108,9 @@ static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *conte if (dc->debug.visual_confirm == VISUAL_CONFIRM_MPCTREE) get_mpctree_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color)); else if (dc->debug.visual_confirm == VISUAL_CONFIRM_SUBVP) - get_subvp_visual_confirm_color(dc, context, pipe_ctx, &(pipe_ctx->visual_confirm_color)); + get_subvp_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color)); else if (dc->debug.visual_confirm == VISUAL_CONFIRM_MCLK_SWITCH) - get_mclk_switch_visual_confirm_color(dc, context, pipe_ctx, &(pipe_ctx->visual_confirm_color)); + get_mclk_switch_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color)); } } } @@ -1115,7 +1118,7 @@ static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *conte static void disable_dangling_plane(struct dc *dc, struct dc_state *context) { int i, j; - struct dc_state *dangling_context = dc_create_state(dc); + struct dc_state *dangling_context = dc_state_create_current_copy(dc); struct dc_state *current_ctx; struct pipe_ctx *pipe; struct timing_generator *tg; @@ -1123,8 +1126,6 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) if (dangling_context == NULL) return; - dc_resource_state_copy_construct(dc->current_state, dangling_context); - for (i = 0; i < dc->res_pool->pipe_count; i++) { struct dc_stream_state *old_stream = dc->current_state->res_ctx.pipe_ctx[i].stream; @@ -1161,6 +1162,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) } if (should_disable && old_stream) { + bool is_phantom = dc_state_get_stream_subvp_type(dc->current_state, old_stream) == SUBVP_PHANTOM; pipe = &dc->current_state->res_ctx.pipe_ctx[i]; tg = pipe->stream_res.tg; /* When disabling plane for a phantom pipe, we must turn on the @@ -1169,22 +1171,29 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) * state that can result in underflow or hang when enabling it * again for different use. */ - if (old_stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (is_phantom) { if (tg->funcs->enable_crtc) { int main_pipe_width, main_pipe_height; + struct dc_stream_state *old_paired_stream = dc_state_get_paired_subvp_stream(dc->current_state, old_stream); - main_pipe_width = old_stream->mall_stream_config.paired_stream->dst.width; - main_pipe_height = old_stream->mall_stream_config.paired_stream->dst.height; + main_pipe_width = old_paired_stream->dst.width; + main_pipe_height = old_paired_stream->dst.height; if (dc->hwss.blank_phantom) dc->hwss.blank_phantom(dc, tg, main_pipe_width, main_pipe_height); tg->funcs->enable_crtc(tg); } } - dc_rem_all_planes_for_stream(dc, old_stream, dangling_context); + + if (is_phantom) + dc_state_rem_all_phantom_planes_for_stream(dc, old_stream, dangling_context, true); + else + dc_state_rem_all_planes_for_stream(dc, old_stream, dangling_context); disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context); - if (pipe->stream && pipe->plane_state) - dc_update_viusal_confirm_color(dc, context, pipe); + if (pipe->stream && pipe->plane_state) { + set_p_state_switch_method(dc, context, pipe); + dc_update_visual_confirm_color(dc, context, pipe); + } if (dc->hwss.apply_ctx_for_surface) { apply_ctx_interdependent_lock(dc, dc->current_state, old_stream, true); @@ -1203,7 +1212,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) * The OTG is set to disable on falling edge of VUPDATE so the plane disable * will still get it's double buffer update. */ - if (old_stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (is_phantom) { if (tg->funcs->disable_phantom_crtc) tg->funcs->disable_phantom_crtc(tg); } @@ -1212,7 +1221,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) current_ctx = dc->current_state; dc->current_state = dangling_context; - dc_release_state(current_ctx); + dc_state_release(current_ctx); } static void disable_vbios_mode_if_required( @@ -1284,7 +1293,7 @@ static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context) int count = 0; struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - if (!pipe->plane_state || pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) + if (!pipe->plane_state || dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) continue; /* Timeout 100 ms */ @@ -1510,7 +1519,7 @@ static void program_timing_sync( } for (k = 0; k < group_size; k++) { - struct dc_stream_status *status = dc_stream_get_status_from_state(ctx, pipe_set[k]->stream); + struct dc_stream_status *status = dc_state_get_stream_status(ctx, pipe_set[k]->stream); status->timing_sync_info.group_id = num_group; status->timing_sync_info.group_size = group_size; @@ -1521,7 +1530,7 @@ static void program_timing_sync( } - /* remove any other pipes that are already been synced */ + /* remove any other unblanked pipes as they have already been synced */ if (dc->config.use_pipe_ctx_sync_logic) { /* check pipe's syncd to decide which pipe to be removed */ for (j = 1; j < group_size; j++) { @@ -1534,6 +1543,7 @@ static void program_timing_sync( pipe_set[j]->pipe_idx_syncd = pipe_set[0]->pipe_idx_syncd; } } else { + /* remove any other pipes by checking valid plane */ for (j = j + 1; j < group_size; j++) { bool is_blanked; @@ -1554,7 +1564,7 @@ static void program_timing_sync( if (group_size > 1) { if (sync_type == TIMING_SYNCHRONIZABLE) { dc->hwss.enable_timing_synchronization( - dc, group_index, group_size, pipe_set); + dc, ctx, group_index, group_size, pipe_set); } else if (sync_type == VBLANK_SYNCHRONIZABLE) { dc->hwss.enable_vblanks_synchronization( @@ -1836,7 +1846,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; /* Check old context for SubVP */ - subvp_prev_use |= (old_pipe->stream && old_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM); + subvp_prev_use |= (dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) == SUBVP_PHANTOM); if (subvp_prev_use) break; } @@ -1964,6 +1974,10 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c wait_for_no_pipes_pending(dc, context); /* pplib is notified if disp_num changed */ dc->hwss.optimize_bandwidth(dc, context); + /* Need to do otg sync again as otg could be out of sync due to otg + * workaround applied during clock update + */ + dc_trigger_sync(dc, context); } if (dc->hwss.update_dsc_pg) @@ -1990,9 +2004,9 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c old_state = dc->current_state; dc->current_state = context; - dc_release_state(old_state); + dc_state_release(old_state); - dc_retain_state(dc->current_state); + dc_state_retain(dc->current_state); return result; } @@ -2063,12 +2077,10 @@ enum dc_status dc_commit_streams(struct dc *dc, if (handle_exit_odm2to1) res = commit_minimal_transition_state(dc, dc->current_state); - context = dc_create_state(dc); + context = dc_state_create_current_copy(dc); if (!context) goto context_alloc_fail; - dc_resource_state_copy_construct_current(dc, context); - res = dc_validate_with_context(dc, set, stream_count, context, false); if (res != DC_OK) { BREAK_TO_DEBUGGER(); @@ -2083,7 +2095,7 @@ enum dc_status dc_commit_streams(struct dc *dc, streams[i]->out.otg_offset = context->stream_status[j].primary_otg_inst; if (dc_is_embedded_signal(streams[i]->signal)) { - struct dc_stream_status *status = dc_stream_get_status_from_state(context, streams[i]); + struct dc_stream_status *status = dc_state_get_stream_status(context, streams[i]); if (dc->hwss.is_abm_supported) status->is_abm_supported = dc->hwss.is_abm_supported(dc, context, streams[i]); @@ -2094,7 +2106,7 @@ enum dc_status dc_commit_streams(struct dc *dc, } fail: - dc_release_state(context); + dc_state_release(context); context_alloc_fail: @@ -2148,7 +2160,7 @@ static bool is_flip_pending_in_pipes(struct dc *dc, struct dc_state *context) pipe = &context->res_ctx.pipe_ctx[i]; // Don't check flip pending on phantom pipes - if (!pipe->plane_state || (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM)) + if (!pipe->plane_state || (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM)) continue; /* Must set to false to start with, due to OR in update function */ @@ -2206,7 +2218,7 @@ void dc_post_update_surfaces_to_stream(struct dc *dc) if (context->res_ctx.pipe_ctx[i].stream == NULL || context->res_ctx.pipe_ctx[i].plane_state == NULL) { context->res_ctx.pipe_ctx[i].pipe_idx = i; - dc->hwss.disable_plane(dc, &context->res_ctx.pipe_ctx[i]); + dc->hwss.disable_plane(dc, context, &context->res_ctx.pipe_ctx[i]); } process_deferred_updates(dc); @@ -2218,111 +2230,6 @@ void dc_post_update_surfaces_to_stream(struct dc *dc) } dc->optimized_required = false; - dc->wm_optimized_required = false; -} - -static void init_state(struct dc *dc, struct dc_state *context) -{ - /* Each context must have their own instance of VBA and in order to - * initialize and obtain IP and SOC the base DML instance from DC is - * initially copied into every context - */ - memcpy(&context->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib)); -} - -struct dc_state *dc_create_state(struct dc *dc) -{ - struct dc_state *context = kvzalloc(sizeof(struct dc_state), - GFP_KERNEL); - - if (!context) - return NULL; - - init_state(dc, context); - -#ifdef CONFIG_DRM_AMD_DC_FP - if (dc->debug.using_dml2) { - dml2_create(dc, &dc->dml2_options, &context->bw_ctx.dml2); - } -#endif - kref_init(&context->refcount); - - return context; -} - -struct dc_state *dc_copy_state(struct dc_state *src_ctx) -{ - int i, j; - struct dc_state *new_ctx = kvmalloc(sizeof(struct dc_state), GFP_KERNEL); -#ifdef CONFIG_DRM_AMD_DC_FP - struct dml2_context *dml2 = NULL; -#endif - - if (!new_ctx) - return NULL; - memcpy(new_ctx, src_ctx, sizeof(struct dc_state)); - -#ifdef CONFIG_DRM_AMD_DC_FP - if (new_ctx->bw_ctx.dml2) { - dml2 = kzalloc(sizeof(struct dml2_context), GFP_KERNEL); - if (!dml2) - return NULL; - - memcpy(dml2, src_ctx->bw_ctx.dml2, sizeof(struct dml2_context)); - new_ctx->bw_ctx.dml2 = dml2; - } -#endif - - for (i = 0; i < MAX_PIPES; i++) { - struct pipe_ctx *cur_pipe = &new_ctx->res_ctx.pipe_ctx[i]; - - if (cur_pipe->top_pipe) - cur_pipe->top_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx]; - - if (cur_pipe->bottom_pipe) - cur_pipe->bottom_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx]; - - if (cur_pipe->prev_odm_pipe) - cur_pipe->prev_odm_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx]; - - if (cur_pipe->next_odm_pipe) - cur_pipe->next_odm_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx]; - - } - - for (i = 0; i < new_ctx->stream_count; i++) { - dc_stream_retain(new_ctx->streams[i]); - for (j = 0; j < new_ctx->stream_status[i].plane_count; j++) - dc_plane_state_retain( - new_ctx->stream_status[i].plane_states[j]); - } - - kref_init(&new_ctx->refcount); - - return new_ctx; -} - -void dc_retain_state(struct dc_state *context) -{ - kref_get(&context->refcount); -} - -static void dc_state_free(struct kref *kref) -{ - struct dc_state *context = container_of(kref, struct dc_state, refcount); - dc_resource_state_destruct(context); - -#ifdef CONFIG_DRM_AMD_DC_FP - dml2_destroy(context->bw_ctx.dml2); - context->bw_ctx.dml2 = 0; -#endif - - kvfree(context); -} - -void dc_release_state(struct dc_state *context) -{ - kref_put(&context->refcount, dc_state_free); } bool dc_set_generic_gpio_for_stereo(bool enable, @@ -2745,8 +2652,6 @@ enum surface_update_type dc_check_update_surfaces_for_stream( } else if (memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) { dc->optimized_required = true; } - - dc->optimized_required |= dc->wm_optimized_required; } return type; @@ -2954,9 +2859,6 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->vrr_active_fixed) stream->vrr_active_fixed = *update->vrr_active_fixed; - if (update->crtc_timing_adjust) - stream->adjust = *update->crtc_timing_adjust; - if (update->dpms_off) stream->dpms_off = *update->dpms_off; @@ -2997,11 +2899,9 @@ static void copy_stream_update_to_stream(struct dc *dc, update->dsc_config->num_slices_v != 0); /* Use temporarry context for validating new DSC config */ - struct dc_state *dsc_validate_context = dc_create_state(dc); + struct dc_state *dsc_validate_context = dc_state_create_copy(dc->current_state); if (dsc_validate_context) { - dc_resource_state_copy_construct(dc->current_state, dsc_validate_context); - stream->timing.dsc_cfg = *update->dsc_config; stream->timing.flags.DSC = enable_dsc; if (!dc->res_pool->funcs->validate_bandwidth(dc, dsc_validate_context, true)) { @@ -3010,7 +2910,7 @@ static void copy_stream_update_to_stream(struct dc *dc, update->dsc_config = NULL; } - dc_release_state(dsc_validate_context); + dc_state_release(dsc_validate_context); } else { DC_ERROR("Failed to allocate new validate context for DSC change\n"); update->dsc_config = NULL; @@ -3109,30 +3009,27 @@ static bool update_planes_and_stream_state(struct dc *dc, new_planes[i] = srf_updates[i].surface; /* initialize scratch memory for building context */ - context = dc_create_state(dc); + context = dc_state_create_copy(dc->current_state); if (context == NULL) { DC_ERROR("Failed to allocate new validate context!\n"); return false; } - dc_resource_state_copy_construct( - dc->current_state, context); - /* For each full update, remove all existing phantom pipes first. * Ensures that we have enough pipes for newly added MPO planes */ - if (dc->res_pool->funcs->remove_phantom_pipes) - dc->res_pool->funcs->remove_phantom_pipes(dc, context, false); + dc_state_remove_phantom_streams_and_planes(dc, context); + dc_state_release_phantom_streams_and_planes(dc, context); /*remove old surfaces from context */ - if (!dc_rem_all_planes_for_stream(dc, stream, context)) { + if (!dc_state_rem_all_planes_for_stream(dc, stream, context)) { BREAK_TO_DEBUGGER(); goto fail; } /* add surface to context */ - if (!dc_add_all_planes_for_stream(dc, stream, new_planes, surface_count, context)) { + if (!dc_state_add_all_planes_for_stream(dc, stream, new_planes, surface_count, context)) { BREAK_TO_DEBUGGER(); goto fail; @@ -3157,19 +3054,6 @@ static bool update_planes_and_stream_state(struct dc *dc, if (update_type == UPDATE_TYPE_FULL) { if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) { - /* For phantom pipes we remove and create a new set of phantom pipes - * for each full update (because we don't know if we'll need phantom - * pipes until after the first round of validation). However, if validation - * fails we need to keep the existing phantom pipes (because we don't update - * the dc->current_state). - * - * The phantom stream/plane refcount is decremented for validation because - * we assume it'll be removed (the free comes when the dc_state is freed), - * but if validation fails we have to increment back the refcount so it's - * consistent. - */ - if (dc->res_pool->funcs->retain_phantom_pipes) - dc->res_pool->funcs->retain_phantom_pipes(dc, dc->current_state); BREAK_TO_DEBUGGER(); goto fail; } @@ -3190,7 +3074,7 @@ static bool update_planes_and_stream_state(struct dc *dc, return true; fail: - dc_release_state(context); + dc_state_release(context); return false; @@ -3386,7 +3270,7 @@ void dc_dmub_update_dirty_rect(struct dc *dc, update_dirty_rect->panel_inst = panel_inst; update_dirty_rect->pipe_idx = j; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); } } } @@ -3488,18 +3372,24 @@ static void commit_planes_for_stream_fast(struct dc *dc, { int i, j; struct pipe_ctx *top_pipe_to_program = NULL; + struct dc_stream_status *stream_status = NULL; dc_z10_restore(dc); top_pipe_to_program = resource_get_otg_master_for_stream( &context->res_ctx, stream); - if (dc->debug.visual_confirm) { - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + if (!top_pipe_to_program) + return; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - if (pipe->stream && pipe->plane_state) - dc_update_viusal_confirm_color(dc, context, pipe); + if (pipe->stream && pipe->plane_state) { + set_p_state_switch_method(dc, context, pipe); + + if (dc->debug.visual_confirm) + dc_update_visual_confirm_color(dc, context, pipe); } } @@ -3523,6 +3413,8 @@ static void commit_planes_for_stream_fast(struct dc *dc, } } + stream_status = dc_state_get_stream_status(context, stream); + build_dmub_cmd_list(dc, srf_updates, surface_count, @@ -3535,7 +3427,8 @@ static void commit_planes_for_stream_fast(struct dc *dc, context->dmub_cmd_count, context->block_sequence, &(context->block_sequence_steps), - top_pipe_to_program); + top_pipe_to_program, + stream_status); hwss_execute_sequence(dc, context->block_sequence, context->block_sequence_steps); @@ -3631,7 +3524,7 @@ static void commit_planes_for_stream(struct dc *dc, struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; // Check old context for SubVP - subvp_prev_use |= (old_pipe->stream && old_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM); + subvp_prev_use |= (dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) == SUBVP_PHANTOM); if (subvp_prev_use) break; } @@ -3639,19 +3532,22 @@ static void commit_planes_for_stream(struct dc *dc, for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { subvp_curr_use = true; break; } } - if (dc->debug.visual_confirm) - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->stream && pipe->plane_state) { + set_p_state_switch_method(dc, context, pipe); - if (pipe->stream && pipe->plane_state) - dc_update_viusal_confirm_color(dc, context, pipe); + if (dc->debug.visual_confirm) + dc_update_visual_confirm_color(dc, context, pipe); } + } if (stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE) { struct pipe_ctx *mpcc_pipe; @@ -4024,7 +3920,7 @@ static bool could_mpcc_tree_change_for_active_pipes(struct dc *dc, for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; - if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) { + if (dc_state_get_pipe_subvp_type(dc->current_state, pipe) != SUBVP_NONE) { subvp_active = true; break; } @@ -4061,7 +3957,7 @@ struct pipe_split_policy_backup { static void release_minimal_transition_state(struct dc *dc, struct dc_state *context, struct pipe_split_policy_backup *policy) { - dc_release_state(context); + dc_state_release(context); /* restore previous pipe split and odm policy */ if (!dc->config.is_vmin_only_asic) dc->debug.pipe_split_policy = policy->mpc_policy; @@ -4072,7 +3968,7 @@ static void release_minimal_transition_state(struct dc *dc, static struct dc_state *create_minimal_transition_state(struct dc *dc, struct dc_state *base_context, struct pipe_split_policy_backup *policy) { - struct dc_state *minimal_transition_context = dc_create_state(dc); + struct dc_state *minimal_transition_context = NULL; unsigned int i, j; if (!dc->config.is_vmin_only_asic) { @@ -4084,7 +3980,9 @@ static struct dc_state *create_minimal_transition_state(struct dc *dc, policy->subvp_policy = dc->debug.force_disable_subvp; dc->debug.force_disable_subvp = true; - dc_resource_state_copy_construct(base_context, minimal_transition_context); + minimal_transition_context = dc_state_create_copy(base_context); + if (!minimal_transition_context) + return NULL; /* commit minimal state */ if (dc->res_pool->funcs->validate_bandwidth(dc, minimal_transition_context, false)) { @@ -4116,7 +4014,6 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc, bool success = false; struct dc_state *minimal_transition_context; struct pipe_split_policy_backup policy; - struct mall_temp_config mall_temp_config; /* commit based on new context */ /* Since all phantom pipes are removed in full validation, @@ -4125,8 +4022,6 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc, * pipe as subvp/phantom will be cleared (dc copy constructor * creates a shallow copy). */ - if (dc->res_pool->funcs->save_mall_state) - dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config); minimal_transition_context = create_minimal_transition_state(dc, context, &policy); if (minimal_transition_context) { @@ -4139,16 +4034,6 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc, success = dc_commit_state_no_check(dc, minimal_transition_context) == DC_OK; } release_minimal_transition_state(dc, minimal_transition_context, &policy); - if (dc->res_pool->funcs->restore_mall_state) - dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config); - /* If we do a minimal transition with plane removal and the context - * has subvp we also have to retain back the phantom stream / planes - * since the refcount is decremented as part of the min transition - * (we commit a state with no subvp, so the phantom streams / planes - * had to be removed). - */ - if (dc->res_pool->funcs->retain_phantom_pipes) - dc->res_pool->funcs->retain_phantom_pipes(dc, context); } if (!success) { @@ -4216,7 +4101,7 @@ static bool commit_minimal_transition_state(struct dc *dc, for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; - if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (pipe->stream && dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) { subvp_in_use = true; break; } @@ -4403,8 +4288,7 @@ static bool full_update_required(struct dc *dc, stream_update->mst_bw_update || stream_update->func_shaper || stream_update->lut3d_func || - stream_update->pending_test_pattern || - stream_update->crtc_timing_adjust)) + stream_update->pending_test_pattern)) return true; if (stream) { @@ -4482,7 +4366,6 @@ bool dc_update_planes_and_stream(struct dc *dc, struct dc_state *context; enum surface_update_type update_type; int i; - struct mall_temp_config mall_temp_config; struct dc_fast_update fast_update[MAX_SURFACES] = {0}; /* In cases where MPO and split or ODM are used transitions can @@ -4526,23 +4409,10 @@ bool dc_update_planes_and_stream(struct dc *dc, * pipe as subvp/phantom will be cleared (dc copy constructor * creates a shallow copy). */ - if (dc->res_pool->funcs->save_mall_state) - dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config); if (!commit_minimal_transition_state(dc, context)) { - dc_release_state(context); + dc_state_release(context); return false; } - if (dc->res_pool->funcs->restore_mall_state) - dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config); - - /* If we do a minimal transition with plane removal and the context - * has subvp we also have to retain back the phantom stream / planes - * since the refcount is decremented as part of the min transition - * (we commit a state with no subvp, so the phantom streams / planes - * had to be removed). - */ - if (dc->res_pool->funcs->retain_phantom_pipes) - dc->res_pool->funcs->retain_phantom_pipes(dc, context); update_type = UPDATE_TYPE_FULL; } @@ -4599,7 +4469,7 @@ bool dc_update_planes_and_stream(struct dc *dc, struct dc_state *old = dc->current_state; dc->current_state = context; - dc_release_state(old); + dc_state_release(old); // clear any forced full updates for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -4658,14 +4528,12 @@ void dc_commit_updates_for_stream(struct dc *dc, if (update_type >= UPDATE_TYPE_FULL) { /* initialize scratch memory for building context */ - context = dc_create_state(dc); + context = dc_state_create_copy(state); if (context == NULL) { DC_ERROR("Failed to allocate new validate context!\n"); return; } - dc_resource_state_copy_construct(state, context); - for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; @@ -4704,7 +4572,7 @@ void dc_commit_updates_for_stream(struct dc *dc, if (update_type >= UPDATE_TYPE_FULL) { if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) { DC_ERROR("Mode validation failed for stream update!\n"); - dc_release_state(context); + dc_state_release(context); return; } } @@ -4737,7 +4605,7 @@ void dc_commit_updates_for_stream(struct dc *dc, struct dc_state *old = dc->current_state; dc->current_state = context; - dc_release_state(old); + dc_state_release(old); for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; @@ -4810,7 +4678,7 @@ void dc_set_power_state( switch (power_state) { case DC_ACPI_CM_POWER_STATE_D0: - dc_resource_state_construct(dc, dc->current_state); + dc_state_construct(dc, dc->current_state); dc_z10_restore(dc); @@ -4825,7 +4693,7 @@ void dc_set_power_state( default: ASSERT(dc->current_state->stream_count == 0); - dc_resource_state_destruct(dc->current_state); + dc_state_destruct(dc->current_state); break; } @@ -4902,6 +4770,38 @@ bool dc_set_psr_allow_active(struct dc *dc, bool enable) return true; } +/* enable/disable eDP Replay without specify stream for eDP */ +bool dc_set_replay_allow_active(struct dc *dc, bool active) +{ + int i; + bool allow_active; + + for (i = 0; i < dc->current_state->stream_count; i++) { + struct dc_link *link; + struct dc_stream_state *stream = dc->current_state->streams[i]; + + link = stream->link; + if (!link) + continue; + + if (link->replay_settings.replay_feature_enabled) { + if (active && !link->replay_settings.replay_allow_active) { + allow_active = true; + if (!dc_link_set_replay_allow_active(link, &allow_active, + false, false, NULL)) + return false; + } else if (!active && link->replay_settings.replay_allow_active) { + allow_active = false; + if (!dc_link_set_replay_allow_active(link, &allow_active, + true, false, NULL)) + return false; + } + } + } + + return true; +} + void dc_allow_idle_optimizations(struct dc *dc, bool allow) { if (dc->debug.disable_idle_power_optimizations) @@ -5213,7 +5113,7 @@ bool dc_process_dmub_aux_transfer_async(struct dc *dc, ); } - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } @@ -5267,7 +5167,7 @@ bool dc_process_dmub_set_config_async(struct dc *dc, cmd.set_config_access.set_config_control.cmd_pkt.msg_type = payload->msg_type; cmd.set_config_access.set_config_control.cmd_pkt.msg_data = payload->msg_data; - if (!dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) { + if (!dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) { /* command is not processed by dmub */ notify->sc_status = SET_CONFIG_UNKNOWN_ERROR; return is_cmd_complete; @@ -5310,7 +5210,7 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc, cmd.set_mst_alloc_slots.mst_slots_control.instance = dc->links[link_index]->ddc_hw_inst; cmd.set_mst_alloc_slots.mst_slots_control.mst_alloc_slots = mst_alloc_slots; - if (!dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) + if (!dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) /* command is not processed by dmub */ return DC_ERROR_UNEXPECTED; @@ -5348,7 +5248,7 @@ void dc_process_dmub_dpia_hpd_int_enable(const struct dc *dc, cmd.dpia_hpd_int_enable.header.type = DMUB_CMD__DPIA_HPD_INT_ENABLE; cmd.dpia_hpd_int_enable.enable = hpd_int_enable; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); DC_LOG_DEBUG("%s: hpd_int_enable(%d)\n", __func__, hpd_int_enable); } @@ -5447,6 +5347,8 @@ bool dc_abm_save_restore( struct dc_link *link = stream->sink->link; struct dc_link *edp_links[MAX_NUM_EDP]; + if (link->replay_settings.replay_feature_enabled) + return false; /*find primary pipe associated with stream*/ for (i = 0; i < MAX_PIPES; i++) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index fe07160932d6..9c05b1a07142 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -31,6 +31,7 @@ #include "basics/dc_common.h" #include "resource.h" #include "dc_dmub_srv.h" +#include "dc_state_priv.h" #define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) @@ -425,45 +426,130 @@ void get_hdr_visual_confirm_color( } void get_subvp_visual_confirm_color( - struct dc *dc, - struct dc_state *context, struct pipe_ctx *pipe_ctx, struct tg_color *color) { uint32_t color_value = MAX_TG_COLOR_VALUE; - bool enable_subvp = false; - int i; - - if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !context) - return; + if (pipe_ctx) { + switch (pipe_ctx->p_state_type) { + case P_STATE_SUB_VP: + color->color_r_cr = color_value; + color->color_g_y = 0; + color->color_b_cb = 0; + break; + case P_STATE_DRR_SUB_VP: + color->color_r_cr = 0; + color->color_g_y = color_value; + color->color_b_cb = 0; + break; + case P_STATE_V_BLANK_SUB_VP: + color->color_r_cr = 0; + color->color_g_y = 0; + color->color_b_cb = color_value; + break; + default: + break; + } + } +} - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; +void get_mclk_switch_visual_confirm_color( + struct pipe_ctx *pipe_ctx, + struct tg_color *color) +{ + uint32_t color_value = MAX_TG_COLOR_VALUE; - if (pipe->stream && pipe->stream->mall_stream_config.paired_stream && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) { - /* SubVP enable - red */ - color->color_g_y = 0; + if (pipe_ctx) { + switch (pipe_ctx->p_state_type) { + case P_STATE_V_BLANK: + color->color_r_cr = color_value; + color->color_g_y = color_value; color->color_b_cb = 0; + break; + case P_STATE_FPO: + color->color_r_cr = 0; + color->color_g_y = color_value; + color->color_b_cb = color_value; + break; + case P_STATE_V_ACTIVE: color->color_r_cr = color_value; - enable_subvp = true; - - if (pipe_ctx->stream == pipe->stream) - return; + color->color_g_y = 0; + color->color_b_cb = color_value; + break; + case P_STATE_SUB_VP: + color->color_r_cr = color_value; + color->color_g_y = 0; + color->color_b_cb = 0; + break; + case P_STATE_DRR_SUB_VP: + color->color_r_cr = 0; + color->color_g_y = color_value; + color->color_b_cb = 0; + break; + case P_STATE_V_BLANK_SUB_VP: + color->color_r_cr = 0; + color->color_g_y = 0; + color->color_b_cb = color_value; + break; + default: break; } } +} - if (enable_subvp && pipe_ctx->stream->mall_stream_config.type == SUBVP_NONE) { - color->color_r_cr = 0; - if (pipe_ctx->stream->allow_freesync == 1) { - /* SubVP enable and DRR on - green */ - color->color_b_cb = 0; - color->color_g_y = color_value; +void set_p_state_switch_method( + struct dc *dc, + struct dc_state *context, + struct pipe_ctx *pipe_ctx) +{ + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + bool enable_subvp; + + if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !vba || !context) + return; + + if (vba->DRAMClockChangeSupport[vba->VoltageLevel][vba->maxMpcComb] != + dm_dram_clock_change_unsupported) { + /* MCLK switching is supported */ + if (!pipe_ctx->has_vactive_margin) { + /* In Vblank - yellow */ + pipe_ctx->p_state_type = P_STATE_V_BLANK; + + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { + /* FPO + Vblank - cyan */ + pipe_ctx->p_state_type = P_STATE_FPO; + } } else { - /* SubVP enable and No DRR - blue */ - color->color_g_y = 0; - color->color_b_cb = color_value; + /* In Vactive - pink */ + pipe_ctx->p_state_type = P_STATE_V_ACTIVE; + } + + /* SubVP */ + enable_subvp = false; + + for (int i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->stream && dc_state_get_paired_subvp_stream(context, pipe->stream) && + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) { + /* SubVP enable - red */ + pipe_ctx->p_state_type = P_STATE_SUB_VP; + enable_subvp = true; + + if (pipe_ctx->stream == pipe->stream) + return; + break; + } + } + + if (enable_subvp && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_NONE) { + if (pipe_ctx->stream->allow_freesync == 1) { + /* SubVP enable and DRR on - green */ + pipe_ctx->p_state_type = P_STATE_DRR_SUB_VP; + } else { + /* SubVP enable and No DRR - blue */ + pipe_ctx->p_state_type = P_STATE_V_BLANK_SUB_VP; + } } } } @@ -473,7 +559,8 @@ void hwss_build_fast_sequence(struct dc *dc, unsigned int dmub_cmd_count, struct block_sequence block_sequence[], int *num_steps, - struct pipe_ctx *pipe_ctx) + struct pipe_ctx *pipe_ctx, + struct dc_stream_status *stream_status) { struct dc_plane_state *plane = pipe_ctx->plane_state; struct dc_stream_state *stream = pipe_ctx->stream; @@ -490,7 +577,8 @@ void hwss_build_fast_sequence(struct dc *dc, if (dc->hwss.subvp_pipe_control_lock_fast) { block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.dc = dc; block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.lock = true; - block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.pipe_ctx = pipe_ctx; + block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.subvp_immediate_flip = + plane->flip_immediate && stream_status->mall_stream_config.type == SUBVP_MAIN; block_sequence[*num_steps].func = DMUB_SUBVP_PIPE_CONTROL_LOCK_FAST; (*num_steps)++; } @@ -529,7 +617,7 @@ void hwss_build_fast_sequence(struct dc *dc, } if (dc->hwss.update_plane_addr && current_mpc_pipe->plane_state->update_flags.bits.addr_update) { if (resource_is_pipe_type(current_mpc_pipe, OTG_MASTER) && - current_mpc_pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + stream_status->mall_stream_config.type == SUBVP_MAIN) { block_sequence[*num_steps].params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv; block_sequence[*num_steps].params.subvp_save_surf_addr.addr = ¤t_mpc_pipe->plane_state->address; block_sequence[*num_steps].params.subvp_save_surf_addr.subvp_index = current_mpc_pipe->subvp_index; @@ -612,7 +700,8 @@ void hwss_build_fast_sequence(struct dc *dc, if (dc->hwss.subvp_pipe_control_lock_fast) { block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.dc = dc; block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.lock = false; - block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.pipe_ctx = pipe_ctx; + block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.subvp_immediate_flip = + plane->flip_immediate && stream_status->mall_stream_config.type == SUBVP_MAIN; block_sequence[*num_steps].func = DMUB_SUBVP_PIPE_CONTROL_LOCK_FAST; (*num_steps)++; } @@ -724,7 +813,7 @@ void hwss_send_dmcub_cmd(union block_sequence_params *params) union dmub_rb_cmd *cmd = params->send_dmcub_cmd_params.cmd; enum dm_dmub_wait_type wait_type = params->send_dmcub_cmd_params.wait_type; - dm_execute_dmub_cmd(ctx, cmd, wait_type); + dc_wake_and_execute_dmub_cmd(ctx, cmd, wait_type); } void hwss_program_manual_trigger(union block_sequence_params *params) @@ -812,42 +901,6 @@ void hwss_subvp_save_surf_addr(union block_sequence_params *params) dc_dmub_srv_subvp_save_surf_addr(dc_dmub_srv, addr, subvp_index); } -void get_mclk_switch_visual_confirm_color( - struct dc *dc, - struct dc_state *context, - struct pipe_ctx *pipe_ctx, - struct tg_color *color) -{ - uint32_t color_value = MAX_TG_COLOR_VALUE; - struct vba_vars_st *vba = &context->bw_ctx.dml.vba; - - if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !vba || !context) - return; - - if (vba->DRAMClockChangeSupport[vba->VoltageLevel][vba->maxMpcComb] != - dm_dram_clock_change_unsupported) { - /* MCLK switching is supported */ - if (!pipe_ctx->has_vactive_margin) { - /* In Vblank - yellow */ - color->color_r_cr = color_value; - color->color_g_y = color_value; - - if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { - /* FPO + Vblank - cyan */ - color->color_r_cr = 0; - color->color_g_y = color_value; - color->color_b_cb = color_value; - } - } else { - /* In Vactive - pink */ - color->color_r_cr = color_value; - color->color_b_cb = color_value; - } - /* SubVP */ - get_subvp_visual_confirm_color(dc, context, pipe_ctx, color); - } -} - void get_surface_tile_visual_confirm_color( struct pipe_ctx *pipe_ctx, struct tg_color *color) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c index ed94187c2afa..c6c35037bdb8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c @@ -467,6 +467,13 @@ bool dc_link_setup_psr(struct dc_link *link, return link->dc->link_srv->edp_setup_psr(link, stream, psr_config, psr_context); } +bool dc_link_set_replay_allow_active(struct dc_link *link, const bool *allow_active, + bool wait, bool force_static, const unsigned int *power_opts) +{ + return link->dc->link_srv->edp_set_replay_allow_active(link, allow_active, wait, + force_static, power_opts); +} + bool dc_link_get_replay_state(const struct dc_link *link, uint64_t *state) { return link->dc->link_srv->edp_get_replay_state(link, state); @@ -497,7 +504,7 @@ void dc_link_enable_hpd_filter(struct dc_link *link, bool enable) link->dc->link_srv->enable_hpd_filter(link, enable); } -bool dc_link_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count) +bool dc_link_dp_dpia_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count) { return dc->link_srv->validate_dpia_bandwidth(streams, count); } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index a1f1d1003992..57f0ddd15923 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -42,6 +42,7 @@ #include "link_enc_cfg.h" #include "link.h" #include "clk_mgr.h" +#include "dc_state_priv.h" #include "virtual/virtual_link_hwss.h" #include "link/hwss/link_hwss_dio.h" #include "link/hwss/link_hwss_dpia.h" @@ -69,8 +70,8 @@ #include "dcn314/dcn314_resource.h" #include "dcn315/dcn315_resource.h" #include "dcn316/dcn316_resource.h" -#include "../dcn32/dcn32_resource.h" -#include "../dcn321/dcn321_resource.h" +#include "dcn32/dcn32_resource.h" +#include "dcn321/dcn321_resource.h" #include "dcn35/dcn35_resource.h" #define VISUAL_CONFIRM_BASE_DEFAULT 3 @@ -1764,6 +1765,29 @@ int recource_find_free_pipe_not_used_in_cur_res_ctx( return free_pipe_idx; } +int recource_find_free_pipe_used_as_otg_master_in_cur_res_ctx( + const struct resource_context *cur_res_ctx, + struct resource_context *new_res_ctx, + const struct resource_pool *pool) +{ + int free_pipe_idx = FREE_PIPE_INDEX_NOT_FOUND; + const struct pipe_ctx *new_pipe, *cur_pipe; + int i; + + for (i = 0; i < pool->pipe_count; i++) { + cur_pipe = &cur_res_ctx->pipe_ctx[i]; + new_pipe = &new_res_ctx->pipe_ctx[i]; + + if (resource_is_pipe_type(cur_pipe, OTG_MASTER) && + resource_is_pipe_type(new_pipe, FREE_PIPE)) { + free_pipe_idx = i; + break; + } + } + + return free_pipe_idx; +} + int resource_find_free_pipe_used_as_cur_sec_dpp_in_mpcc_combine( const struct resource_context *cur_res_ctx, struct resource_context *new_res_ctx, @@ -2233,7 +2257,7 @@ static struct pipe_ctx *get_last_dpp_pipe_in_mpcc_combine( } static bool update_pipe_params_after_odm_slice_count_change( - const struct dc_stream_state *stream, + struct pipe_ctx *otg_master, struct dc_state *context, const struct resource_pool *pool) { @@ -2243,9 +2267,12 @@ static bool update_pipe_params_after_odm_slice_count_change( for (i = 0; i < pool->pipe_count && result; i++) { pipe = &context->res_ctx.pipe_ctx[i]; - if (pipe->stream == stream && pipe->plane_state) + if (pipe->stream == otg_master->stream && pipe->plane_state) result = resource_build_scaling_params(pipe); } + + if (pool->funcs->build_pipe_pix_clk_params) + pool->funcs->build_pipe_pix_clk_params(otg_master); return result; } @@ -2433,6 +2460,9 @@ void resource_remove_otg_master_for_stream_output(struct dc_state *context, struct pipe_ctx *otg_master = resource_get_otg_master_for_stream( &context->res_ctx, stream); + if (!otg_master) + return; + ASSERT(resource_get_odm_slice_count(otg_master) == 1); ASSERT(otg_master->plane_state == NULL); ASSERT(otg_master->stream_res.stream_enc); @@ -2928,7 +2958,7 @@ bool resource_update_pipes_for_stream_with_slice_count( otg_master, new_ctx, pool); if (result) result = update_pipe_params_after_odm_slice_count_change( - otg_master->stream, new_ctx, pool); + otg_master, new_ctx, pool); return result; } @@ -2967,189 +2997,6 @@ bool resource_update_pipes_for_plane_with_slice_count( return result; } -bool dc_add_plane_to_context( - const struct dc *dc, - struct dc_stream_state *stream, - struct dc_plane_state *plane_state, - struct dc_state *context) -{ - struct resource_pool *pool = dc->res_pool; - struct pipe_ctx *otg_master_pipe; - struct dc_stream_status *stream_status = NULL; - bool added = false; - - stream_status = dc_stream_get_status_from_state(context, stream); - if (stream_status == NULL) { - dm_error("Existing stream not found; failed to attach surface!\n"); - goto out; - } else if (stream_status->plane_count == MAX_SURFACE_NUM) { - dm_error("Surface: can not attach plane_state %p! Maximum is: %d\n", - plane_state, MAX_SURFACE_NUM); - goto out; - } - - otg_master_pipe = resource_get_otg_master_for_stream( - &context->res_ctx, stream); - added = resource_append_dpp_pipes_for_plane_composition(context, - dc->current_state, pool, otg_master_pipe, plane_state); - - if (added) { - stream_status->plane_states[stream_status->plane_count] = - plane_state; - stream_status->plane_count++; - dc_plane_state_retain(plane_state); - } - -out: - return added; -} - -bool dc_remove_plane_from_context( - const struct dc *dc, - struct dc_stream_state *stream, - struct dc_plane_state *plane_state, - struct dc_state *context) -{ - int i; - struct dc_stream_status *stream_status = NULL; - struct resource_pool *pool = dc->res_pool; - - if (!plane_state) - return true; - - for (i = 0; i < context->stream_count; i++) - if (context->streams[i] == stream) { - stream_status = &context->stream_status[i]; - break; - } - - if (stream_status == NULL) { - dm_error("Existing stream not found; failed to remove plane.\n"); - return false; - } - - resource_remove_dpp_pipes_for_plane_composition( - context, pool, plane_state); - - for (i = 0; i < stream_status->plane_count; i++) { - if (stream_status->plane_states[i] == plane_state) { - dc_plane_state_release(stream_status->plane_states[i]); - break; - } - } - - if (i == stream_status->plane_count) { - dm_error("Existing plane_state not found; failed to detach it!\n"); - return false; - } - - stream_status->plane_count--; - - /* Start at the plane we've just released, and move all the planes one index forward to "trim" the array */ - for (; i < stream_status->plane_count; i++) - stream_status->plane_states[i] = stream_status->plane_states[i + 1]; - - stream_status->plane_states[stream_status->plane_count] = NULL; - - if (stream_status->plane_count == 0 && dc->config.enable_windowed_mpo_odm) - /* ODM combine could prevent us from supporting more planes - * we will reset ODM slice count back to 1 when all planes have - * been removed to maximize the amount of planes supported when - * new planes are added. - */ - resource_update_pipes_for_stream_with_slice_count( - context, dc->current_state, dc->res_pool, stream, 1); - - return true; -} - -/** - * dc_rem_all_planes_for_stream - Remove planes attached to the target stream. - * - * @dc: Current dc state. - * @stream: Target stream, which we want to remove the attached plans. - * @context: New context. - * - * Return: - * Return true if DC was able to remove all planes from the target - * stream, otherwise, return false. - */ -bool dc_rem_all_planes_for_stream( - const struct dc *dc, - struct dc_stream_state *stream, - struct dc_state *context) -{ - int i, old_plane_count; - struct dc_stream_status *stream_status = NULL; - struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 }; - - for (i = 0; i < context->stream_count; i++) - if (context->streams[i] == stream) { - stream_status = &context->stream_status[i]; - break; - } - - if (stream_status == NULL) { - dm_error("Existing stream %p not found!\n", stream); - return false; - } - - old_plane_count = stream_status->plane_count; - - for (i = 0; i < old_plane_count; i++) - del_planes[i] = stream_status->plane_states[i]; - - for (i = 0; i < old_plane_count; i++) - if (!dc_remove_plane_from_context(dc, stream, del_planes[i], context)) - return false; - - return true; -} - -static bool add_all_planes_for_stream( - const struct dc *dc, - struct dc_stream_state *stream, - const struct dc_validation_set set[], - int set_count, - struct dc_state *context) -{ - int i, j; - - for (i = 0; i < set_count; i++) - if (set[i].stream == stream) - break; - - if (i == set_count) { - dm_error("Stream %p not found in set!\n", stream); - return false; - } - - for (j = 0; j < set[i].plane_count; j++) - if (!dc_add_plane_to_context(dc, stream, set[i].plane_states[j], context)) - return false; - - return true; -} - -bool dc_add_all_planes_for_stream( - const struct dc *dc, - struct dc_stream_state *stream, - struct dc_plane_state * const *plane_states, - int plane_count, - struct dc_state *context) -{ - struct dc_validation_set set; - int i; - - set.stream = stream; - set.plane_count = plane_count; - - for (i = 0; i < plane_count; i++) - set.plane_states[i] = plane_states[i]; - - return add_all_planes_for_stream(dc, stream, &set, 1, context); -} - bool dc_is_timing_changed(struct dc_stream_state *cur_stream, struct dc_stream_state *new_stream) { @@ -3301,84 +3148,6 @@ static struct audio *find_first_free_audio( return NULL; } -/* - * dc_add_stream_to_ctx() - Add a new dc_stream_state to a dc_state. - */ -enum dc_status dc_add_stream_to_ctx( - struct dc *dc, - struct dc_state *new_ctx, - struct dc_stream_state *stream) -{ - enum dc_status res; - DC_LOGGER_INIT(dc->ctx->logger); - - if (new_ctx->stream_count >= dc->res_pool->timing_generator_count) { - DC_LOG_WARNING("Max streams reached, can't add stream %p !\n", stream); - return DC_ERROR_UNEXPECTED; - } - - new_ctx->streams[new_ctx->stream_count] = stream; - dc_stream_retain(stream); - new_ctx->stream_count++; - - res = resource_add_otg_master_for_stream_output( - new_ctx, dc->res_pool, stream); - if (res != DC_OK) - DC_LOG_WARNING("Adding stream %p to context failed with err %d!\n", stream, res); - - return res; -} - -/* - * dc_remove_stream_from_ctx() - Remove a stream from a dc_state. - */ -enum dc_status dc_remove_stream_from_ctx( - struct dc *dc, - struct dc_state *new_ctx, - struct dc_stream_state *stream) -{ - int i; - struct dc_context *dc_ctx = dc->ctx; - struct pipe_ctx *del_pipe = resource_get_otg_master_for_stream( - &new_ctx->res_ctx, stream); - - if (!del_pipe) { - DC_ERROR("Pipe not found for stream %p !\n", stream); - return DC_ERROR_UNEXPECTED; - } - - resource_update_pipes_for_stream_with_slice_count(new_ctx, - dc->current_state, dc->res_pool, stream, 1); - resource_remove_otg_master_for_stream_output( - new_ctx, dc->res_pool, stream); - - for (i = 0; i < new_ctx->stream_count; i++) - if (new_ctx->streams[i] == stream) - break; - - if (new_ctx->streams[i] != stream) { - DC_ERROR("Context doesn't have stream %p !\n", stream); - return DC_ERROR_UNEXPECTED; - } - - dc_stream_release(new_ctx->streams[i]); - new_ctx->stream_count--; - - /* Trim back arrays */ - for (; i < new_ctx->stream_count; i++) { - new_ctx->streams[i] = new_ctx->streams[i + 1]; - new_ctx->stream_status[i] = new_ctx->stream_status[i + 1]; - } - - new_ctx->streams[new_ctx->stream_count] = NULL; - memset( - &new_ctx->stream_status[new_ctx->stream_count], - 0, - sizeof(new_ctx->stream_status[0])); - - return DC_OK; -} - static struct dc_stream_state *find_pll_sharable_stream( struct dc_stream_state *stream_needs_pll, struct dc_state *context) @@ -3586,6 +3355,7 @@ static void mark_seamless_boot_stream( * |________|_______________|___________|_____________| */ static bool acquire_otg_master_pipe_for_stream( + const struct dc_state *cur_ctx, struct dc_state *new_ctx, const struct resource_pool *pool, struct dc_stream_state *stream) @@ -3599,7 +3369,22 @@ static bool acquire_otg_master_pipe_for_stream( int pipe_idx; struct pipe_ctx *pipe_ctx = NULL; - pipe_idx = resource_find_any_free_pipe(&new_ctx->res_ctx, pool); + /* + * Upper level code is responsible to optimize unnecessary addition and + * removal for unchanged streams. So unchanged stream will keep the same + * OTG master instance allocated. When current stream is removed and a + * new stream is added, we want to reuse the OTG instance made available + * by the removed stream first. If not found, we try to avoid of using + * any free pipes already used in current context as this could tear + * down exiting ODM/MPC/MPO configuration unnecessarily. + */ + pipe_idx = recource_find_free_pipe_used_as_otg_master_in_cur_res_ctx( + &cur_ctx->res_ctx, &new_ctx->res_ctx, pool); + if (pipe_idx == FREE_PIPE_INDEX_NOT_FOUND) + pipe_idx = recource_find_free_pipe_not_used_in_cur_res_ctx( + &cur_ctx->res_ctx, &new_ctx->res_ctx, pool); + if (pipe_idx == FREE_PIPE_INDEX_NOT_FOUND) + pipe_idx = resource_find_any_free_pipe(&new_ctx->res_ctx, pool); if (pipe_idx != FREE_PIPE_INDEX_NOT_FOUND) { pipe_ctx = &new_ctx->res_ctx.pipe_ctx[pipe_idx]; memset(pipe_ctx, 0, sizeof(*pipe_ctx)); @@ -3659,7 +3444,7 @@ enum dc_status resource_map_pool_resources( if (!acquired) /* acquire new resources */ - acquired = acquire_otg_master_pipe_for_stream( + acquired = acquire_otg_master_pipe_for_stream(dc->current_state, context, pool, stream); pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream); @@ -3742,34 +3527,6 @@ enum dc_status resource_map_pool_resources( return DC_ERROR_UNEXPECTED; } -/** - * dc_resource_state_copy_construct_current() - Creates a new dc_state from existing state - * - * @dc: copy out of dc->current_state - * @dst_ctx: copy into this - * - * This function makes a shallow copy of the current DC state and increments - * refcounts on existing streams and planes. - */ -void dc_resource_state_copy_construct_current( - const struct dc *dc, - struct dc_state *dst_ctx) -{ - dc_resource_state_copy_construct(dc->current_state, dst_ctx); -} - - -void dc_resource_state_construct( - const struct dc *dc, - struct dc_state *dst_ctx) -{ - dst_ctx->clk_mgr = dc->clk_mgr; - - /* Initialise DIG link encoder resource tracking variables. */ - link_enc_cfg_init(dc, dst_ctx); -} - - bool dc_resource_is_dsc_encoding_supported(const struct dc *dc) { if (dc->res_pool == NULL) @@ -3813,6 +3570,31 @@ static bool planes_changed_for_existing_stream(struct dc_state *context, return false; } +static bool add_all_planes_for_stream( + const struct dc *dc, + struct dc_stream_state *stream, + const struct dc_validation_set set[], + int set_count, + struct dc_state *state) +{ + int i, j; + + for (i = 0; i < set_count; i++) + if (set[i].stream == stream) + break; + + if (i == set_count) { + dm_error("Stream %p not found in set!\n", stream); + return false; + } + + for (j = 0; j < set[i].plane_count; j++) + if (!dc_state_add_plane(dc, stream, set[i].plane_states[j], state)) + return false; + + return true; +} + /** * dc_validate_with_context - Validate and update the potential new stream in the context object * @@ -3918,7 +3700,8 @@ enum dc_status dc_validate_with_context(struct dc *dc, unchanged_streams[i], set, set_count)) { - if (!dc_rem_all_planes_for_stream(dc, + + if (!dc_state_rem_all_planes_for_stream(dc, unchanged_streams[i], context)) { res = DC_FAIL_DETACH_SURFACES; @@ -3940,12 +3723,24 @@ enum dc_status dc_validate_with_context(struct dc *dc, } } - if (!dc_rem_all_planes_for_stream(dc, del_streams[i], context)) { - res = DC_FAIL_DETACH_SURFACES; - goto fail; + if (dc_state_get_stream_subvp_type(context, del_streams[i]) == SUBVP_PHANTOM) { + /* remove phantoms specifically */ + if (!dc_state_rem_all_phantom_planes_for_stream(dc, del_streams[i], context, true)) { + res = DC_FAIL_DETACH_SURFACES; + goto fail; + } + + res = dc_state_remove_phantom_stream(dc, context, del_streams[i]); + dc_state_release_phantom_stream(dc, context, del_streams[i]); + } else { + if (!dc_state_rem_all_planes_for_stream(dc, del_streams[i], context)) { + res = DC_FAIL_DETACH_SURFACES; + goto fail; + } + + res = dc_state_remove_stream(dc, context, del_streams[i]); } - res = dc_remove_stream_from_ctx(dc, context, del_streams[i]); if (res != DC_OK) goto fail; } @@ -3968,7 +3763,7 @@ enum dc_status dc_validate_with_context(struct dc *dc, /* Add new streams and then add all planes for the new stream */ for (i = 0; i < add_streams_count; i++) { calculate_phy_pix_clks(add_streams[i]); - res = dc_add_stream_to_ctx(dc, context, add_streams[i]); + res = dc_state_add_stream(dc, context, add_streams[i]); if (res != DC_OK) goto fail; @@ -4474,84 +4269,6 @@ static void set_vtem_info_packet( *info_packet = stream->vtem_infopacket; } -void dc_resource_state_destruct(struct dc_state *context) -{ - int i, j; - - for (i = 0; i < context->stream_count; i++) { - for (j = 0; j < context->stream_status[i].plane_count; j++) - dc_plane_state_release( - context->stream_status[i].plane_states[j]); - - context->stream_status[i].plane_count = 0; - dc_stream_release(context->streams[i]); - context->streams[i] = NULL; - } - context->stream_count = 0; - context->stream_mask = 0; - memset(&context->res_ctx, 0, sizeof(context->res_ctx)); - memset(&context->pp_display_cfg, 0, sizeof(context->pp_display_cfg)); - memset(&context->dcn_bw_vars, 0, sizeof(context->dcn_bw_vars)); - context->clk_mgr = NULL; - memset(&context->bw_ctx.bw, 0, sizeof(context->bw_ctx.bw)); - memset(context->block_sequence, 0, sizeof(context->block_sequence)); - context->block_sequence_steps = 0; - memset(context->dc_dmub_cmd, 0, sizeof(context->dc_dmub_cmd)); - context->dmub_cmd_count = 0; - memset(&context->perf_params, 0, sizeof(context->perf_params)); - memset(&context->scratch, 0, sizeof(context->scratch)); -} - -void dc_resource_state_copy_construct( - const struct dc_state *src_ctx, - struct dc_state *dst_ctx) -{ - int i, j; - struct kref refcount = dst_ctx->refcount; -#ifdef CONFIG_DRM_AMD_DC_FP - struct dml2_context *dml2 = NULL; - - // Need to preserve allocated dml2 context - if (src_ctx->clk_mgr->ctx->dc->debug.using_dml2) - dml2 = dst_ctx->bw_ctx.dml2; -#endif - - *dst_ctx = *src_ctx; - -#ifdef CONFIG_DRM_AMD_DC_FP - // Preserve allocated dml2 context - if (src_ctx->clk_mgr->ctx->dc->debug.using_dml2) - dst_ctx->bw_ctx.dml2 = dml2; -#endif - - for (i = 0; i < MAX_PIPES; i++) { - struct pipe_ctx *cur_pipe = &dst_ctx->res_ctx.pipe_ctx[i]; - - if (cur_pipe->top_pipe) - cur_pipe->top_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx]; - - if (cur_pipe->bottom_pipe) - cur_pipe->bottom_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx]; - - if (cur_pipe->next_odm_pipe) - cur_pipe->next_odm_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx]; - - if (cur_pipe->prev_odm_pipe) - cur_pipe->prev_odm_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx]; - } - - for (i = 0; i < dst_ctx->stream_count; i++) { - dc_stream_retain(dst_ctx->streams[i]); - for (j = 0; j < dst_ctx->stream_status[i].plane_count; j++) - dc_plane_state_retain( - dst_ctx->stream_status[i].plane_states[j]); - } - - /* context refcount should not be overridden */ - dst_ctx->refcount = refcount; - -} - struct clock_source *dc_resource_find_first_free_pll( struct resource_context *res_ctx, const struct resource_pool *pool) @@ -4731,7 +4448,7 @@ void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream, option = DITHER_OPTION_SPATIAL8; break; case COLOR_DEPTH_101010: - option = DITHER_OPTION_SPATIAL10; + option = DITHER_OPTION_TRUN10; break; default: option = DITHER_OPTION_DISABLE; @@ -4757,6 +4474,8 @@ void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream, option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) { fmt_bit_depth->flags.TRUNCATE_ENABLED = 1; fmt_bit_depth->flags.TRUNCATE_DEPTH = 2; + if (option == DITHER_OPTION_TRUN10) + fmt_bit_depth->flags.TRUNCATE_MODE = 1; } /* special case - Formatter can only reduce by 4 bits at most. @@ -5267,6 +4986,20 @@ enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc, return DC_OK; } +bool resource_subvp_in_use(struct dc *dc, + struct dc_state *context) +{ + uint32_t i; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) + return true; + } + return false; +} + bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_state *stream) { if (!dc->debug.disable_subvp_high_refresh && is_subvp_high_refresh_candidate(stream)) @@ -5274,7 +5007,7 @@ bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_st if (dc->current_state->stream_count == 1 && stream->timing.v_addressable >= 2880 && ((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120) return true; - else if (dc->current_state->stream_count > 1 && stream->timing.v_addressable >= 2160 && + else if (dc->current_state->stream_count > 1 && stream->timing.v_addressable >= 1080 && ((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120) return true; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c new file mode 100644 index 000000000000..460a8010c79f --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -0,0 +1,865 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ +#include "core_types.h" +#include "core_status.h" +#include "dc_state.h" +#include "dc_state_priv.h" +#include "dc_stream_priv.h" +#include "dc_plane_priv.h" + +#include "dm_services.h" +#include "resource.h" +#include "link_enc_cfg.h" + +#include "dml2/dml2_wrapper.h" +#include "dml2/dml2_internal_types.h" + +#define DC_LOGGER \ + dc->ctx->logger +#define DC_LOGGER_INIT(logger) + +/* Private dc_state helper functions */ +static bool dc_state_track_phantom_stream(struct dc_state *state, + struct dc_stream_state *phantom_stream) +{ + if (state->phantom_stream_count >= MAX_PHANTOM_PIPES) + return false; + + state->phantom_streams[state->phantom_stream_count++] = phantom_stream; + + return true; +} + +static bool dc_state_untrack_phantom_stream(struct dc_state *state, struct dc_stream_state *phantom_stream) +{ + bool res = false; + int i; + + /* first find phantom stream in the dc_state */ + for (i = 0; i < state->phantom_stream_count; i++) { + if (state->phantom_streams[i] == phantom_stream) { + state->phantom_streams[i] = NULL; + res = true; + break; + } + } + + /* failed to find stream in state */ + if (!res) + return res; + + /* trim back phantom streams */ + state->phantom_stream_count--; + for (; i < state->phantom_stream_count; i++) + state->phantom_streams[i] = state->phantom_streams[i + 1]; + + return res; +} + +static bool dc_state_is_phantom_stream_tracked(struct dc_state *state, struct dc_stream_state *phantom_stream) +{ + int i; + + for (i = 0; i < state->phantom_stream_count; i++) { + if (state->phantom_streams[i] == phantom_stream) + return true; + } + + return false; +} + +static bool dc_state_track_phantom_plane(struct dc_state *state, + struct dc_plane_state *phantom_plane) +{ + if (state->phantom_plane_count >= MAX_PHANTOM_PIPES) + return false; + + state->phantom_planes[state->phantom_plane_count++] = phantom_plane; + + return true; +} + +static bool dc_state_untrack_phantom_plane(struct dc_state *state, struct dc_plane_state *phantom_plane) +{ + bool res = false; + int i; + + /* first find phantom plane in the dc_state */ + for (i = 0; i < state->phantom_plane_count; i++) { + if (state->phantom_planes[i] == phantom_plane) { + state->phantom_planes[i] = NULL; + res = true; + break; + } + } + + /* failed to find plane in state */ + if (!res) + return res; + + /* trim back phantom planes */ + state->phantom_plane_count--; + for (; i < state->phantom_plane_count; i++) + state->phantom_planes[i] = state->phantom_planes[i + 1]; + + return res; +} + +static bool dc_state_is_phantom_plane_tracked(struct dc_state *state, struct dc_plane_state *phantom_plane) +{ + int i; + + for (i = 0; i < state->phantom_plane_count; i++) { + if (state->phantom_planes[i] == phantom_plane) + return true; + } + + return false; +} + +static void dc_state_copy_internal(struct dc_state *dst_state, struct dc_state *src_state) +{ + int i, j; + + memcpy(dst_state, src_state, sizeof(struct dc_state)); + + for (i = 0; i < MAX_PIPES; i++) { + struct pipe_ctx *cur_pipe = &dst_state->res_ctx.pipe_ctx[i]; + + if (cur_pipe->top_pipe) + cur_pipe->top_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx]; + + if (cur_pipe->bottom_pipe) + cur_pipe->bottom_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx]; + + if (cur_pipe->prev_odm_pipe) + cur_pipe->prev_odm_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx]; + + if (cur_pipe->next_odm_pipe) + cur_pipe->next_odm_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx]; + } + + /* retain phantoms */ + for (i = 0; i < dst_state->phantom_stream_count; i++) + dc_stream_retain(dst_state->phantom_streams[i]); + + for (i = 0; i < dst_state->phantom_plane_count; i++) + dc_plane_state_retain(dst_state->phantom_planes[i]); + + /* retain streams and planes */ + for (i = 0; i < dst_state->stream_count; i++) { + dc_stream_retain(dst_state->streams[i]); + for (j = 0; j < dst_state->stream_status[i].plane_count; j++) + dc_plane_state_retain( + dst_state->stream_status[i].plane_states[j]); + } + +} + +static void init_state(struct dc *dc, struct dc_state *state) +{ + /* Each context must have their own instance of VBA and in order to + * initialize and obtain IP and SOC the base DML instance from DC is + * initially copied into every context + */ + memcpy(&state->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib)); +} + +/* Public dc_state functions */ +struct dc_state *dc_state_create(struct dc *dc) +{ + struct dc_state *state = kvzalloc(sizeof(struct dc_state), + GFP_KERNEL); + + if (!state) + return NULL; + + init_state(dc, state); + dc_state_construct(dc, state); + +#ifdef CONFIG_DRM_AMD_DC_FP + if (dc->debug.using_dml2) + dml2_create(dc, &dc->dml2_options, &state->bw_ctx.dml2); +#endif + + kref_init(&state->refcount); + + return state; +} + +void dc_state_copy(struct dc_state *dst_state, struct dc_state *src_state) +{ + struct kref refcount = dst_state->refcount; +#ifdef CONFIG_DRM_AMD_DC_FP + struct dml2_context *dst_dml2 = dst_state->bw_ctx.dml2; +#endif + + dc_state_copy_internal(dst_state, src_state); + +#ifdef CONFIG_DRM_AMD_DC_FP + dst_state->bw_ctx.dml2 = dst_dml2; + if (src_state->bw_ctx.dml2) + dml2_copy(dst_state->bw_ctx.dml2, src_state->bw_ctx.dml2); +#endif + + /* context refcount should not be overridden */ + dst_state->refcount = refcount; +} + +struct dc_state *dc_state_create_copy(struct dc_state *src_state) +{ + struct dc_state *new_state; + + new_state = kvmalloc(sizeof(struct dc_state), + GFP_KERNEL); + if (!new_state) + return NULL; + + dc_state_copy_internal(new_state, src_state); + +#ifdef CONFIG_DRM_AMD_DC_FP + if (src_state->bw_ctx.dml2 && + !dml2_create_copy(&new_state->bw_ctx.dml2, src_state->bw_ctx.dml2)) { + dc_state_release(new_state); + return NULL; + } +#endif + + kref_init(&new_state->refcount); + + return new_state; +} + +void dc_state_copy_current(struct dc *dc, struct dc_state *dst_state) +{ + dc_state_copy(dst_state, dc->current_state); +} + +struct dc_state *dc_state_create_current_copy(struct dc *dc) +{ + return dc_state_create_copy(dc->current_state); +} + +void dc_state_construct(struct dc *dc, struct dc_state *state) +{ + state->clk_mgr = dc->clk_mgr; + + /* Initialise DIG link encoder resource tracking variables. */ + link_enc_cfg_init(dc, state); +} + +void dc_state_destruct(struct dc_state *state) +{ + int i, j; + + for (i = 0; i < state->stream_count; i++) { + for (j = 0; j < state->stream_status[i].plane_count; j++) + dc_plane_state_release( + state->stream_status[i].plane_states[j]); + + state->stream_status[i].plane_count = 0; + dc_stream_release(state->streams[i]); + state->streams[i] = NULL; + } + state->stream_count = 0; + + /* release tracked phantoms */ + for (i = 0; i < state->phantom_stream_count; i++) { + dc_stream_release(state->phantom_streams[i]); + state->phantom_streams[i] = NULL; + } + + for (i = 0; i < state->phantom_plane_count; i++) { + dc_plane_state_release(state->phantom_planes[i]); + state->phantom_planes[i] = NULL; + } + state->stream_mask = 0; + memset(&state->res_ctx, 0, sizeof(state->res_ctx)); + memset(&state->pp_display_cfg, 0, sizeof(state->pp_display_cfg)); + memset(&state->dcn_bw_vars, 0, sizeof(state->dcn_bw_vars)); + state->clk_mgr = NULL; + memset(&state->bw_ctx.bw, 0, sizeof(state->bw_ctx.bw)); + memset(state->block_sequence, 0, sizeof(state->block_sequence)); + state->block_sequence_steps = 0; + memset(state->dc_dmub_cmd, 0, sizeof(state->dc_dmub_cmd)); + state->dmub_cmd_count = 0; + memset(&state->perf_params, 0, sizeof(state->perf_params)); + memset(&state->scratch, 0, sizeof(state->scratch)); +} + +void dc_state_retain(struct dc_state *state) +{ + kref_get(&state->refcount); +} + +static void dc_state_free(struct kref *kref) +{ + struct dc_state *state = container_of(kref, struct dc_state, refcount); + + dc_state_destruct(state); + +#ifdef CONFIG_DRM_AMD_DC_FP + dml2_destroy(state->bw_ctx.dml2); + state->bw_ctx.dml2 = 0; +#endif + + kvfree(state); +} + +void dc_state_release(struct dc_state *state) +{ + kref_put(&state->refcount, dc_state_free); +} +/* + * dc_state_add_stream() - Add a new dc_stream_state to a dc_state. + */ +enum dc_status dc_state_add_stream( + struct dc *dc, + struct dc_state *state, + struct dc_stream_state *stream) +{ + enum dc_status res; + + DC_LOGGER_INIT(dc->ctx->logger); + + if (state->stream_count >= dc->res_pool->timing_generator_count) { + DC_LOG_WARNING("Max streams reached, can't add stream %p !\n", stream); + return DC_ERROR_UNEXPECTED; + } + + state->streams[state->stream_count] = stream; + dc_stream_retain(stream); + state->stream_count++; + + res = resource_add_otg_master_for_stream_output( + state, dc->res_pool, stream); + if (res != DC_OK) + DC_LOG_WARNING("Adding stream %p to context failed with err %d!\n", stream, res); + + return res; +} + +/* + * dc_state_remove_stream() - Remove a stream from a dc_state. + */ +enum dc_status dc_state_remove_stream( + struct dc *dc, + struct dc_state *state, + struct dc_stream_state *stream) +{ + int i; + struct pipe_ctx *del_pipe = resource_get_otg_master_for_stream( + &state->res_ctx, stream); + + if (!del_pipe) { + dm_error("Pipe not found for stream %p !\n", stream); + return DC_ERROR_UNEXPECTED; + } + + resource_update_pipes_for_stream_with_slice_count(state, + dc->current_state, dc->res_pool, stream, 1); + resource_remove_otg_master_for_stream_output( + state, dc->res_pool, stream); + + for (i = 0; i < state->stream_count; i++) + if (state->streams[i] == stream) + break; + + if (state->streams[i] != stream) { + dm_error("Context doesn't have stream %p !\n", stream); + return DC_ERROR_UNEXPECTED; + } + + dc_stream_release(state->streams[i]); + state->stream_count--; + + /* Trim back arrays */ + for (; i < state->stream_count; i++) { + state->streams[i] = state->streams[i + 1]; + state->stream_status[i] = state->stream_status[i + 1]; + } + + state->streams[state->stream_count] = NULL; + memset( + &state->stream_status[state->stream_count], + 0, + sizeof(state->stream_status[0])); + + return DC_OK; +} + +bool dc_state_add_plane( + const struct dc *dc, + struct dc_stream_state *stream, + struct dc_plane_state *plane_state, + struct dc_state *state) +{ + struct resource_pool *pool = dc->res_pool; + struct pipe_ctx *otg_master_pipe; + struct dc_stream_status *stream_status = NULL; + bool added = false; + + stream_status = dc_state_get_stream_status(state, stream); + if (stream_status == NULL) { + dm_error("Existing stream not found; failed to attach surface!\n"); + goto out; + } else if (stream_status->plane_count == MAX_SURFACE_NUM) { + dm_error("Surface: can not attach plane_state %p! Maximum is: %d\n", + plane_state, MAX_SURFACE_NUM); + goto out; + } + + otg_master_pipe = resource_get_otg_master_for_stream( + &state->res_ctx, stream); + added = resource_append_dpp_pipes_for_plane_composition(state, + dc->current_state, pool, otg_master_pipe, plane_state); + + if (added) { + stream_status->plane_states[stream_status->plane_count] = + plane_state; + stream_status->plane_count++; + dc_plane_state_retain(plane_state); + } + +out: + return added; +} + +bool dc_state_remove_plane( + const struct dc *dc, + struct dc_stream_state *stream, + struct dc_plane_state *plane_state, + struct dc_state *state) +{ + int i; + struct dc_stream_status *stream_status = NULL; + struct resource_pool *pool = dc->res_pool; + + if (!plane_state) + return true; + + for (i = 0; i < state->stream_count; i++) + if (state->streams[i] == stream) { + stream_status = &state->stream_status[i]; + break; + } + + if (stream_status == NULL) { + dm_error("Existing stream not found; failed to remove plane.\n"); + return false; + } + + resource_remove_dpp_pipes_for_plane_composition( + state, pool, plane_state); + + for (i = 0; i < stream_status->plane_count; i++) { + if (stream_status->plane_states[i] == plane_state) { + dc_plane_state_release(stream_status->plane_states[i]); + break; + } + } + + if (i == stream_status->plane_count) { + dm_error("Existing plane_state not found; failed to detach it!\n"); + return false; + } + + stream_status->plane_count--; + + /* Start at the plane we've just released, and move all the planes one index forward to "trim" the array */ + for (; i < stream_status->plane_count; i++) + stream_status->plane_states[i] = stream_status->plane_states[i + 1]; + + stream_status->plane_states[stream_status->plane_count] = NULL; + + if (stream_status->plane_count == 0 && dc->config.enable_windowed_mpo_odm) + /* ODM combine could prevent us from supporting more planes + * we will reset ODM slice count back to 1 when all planes have + * been removed to maximize the amount of planes supported when + * new planes are added. + */ + resource_update_pipes_for_stream_with_slice_count( + state, dc->current_state, dc->res_pool, stream, 1); + + return true; +} + +/** + * dc_state_rem_all_planes_for_stream - Remove planes attached to the target stream. + * + * @dc: Current dc state. + * @stream: Target stream, which we want to remove the attached plans. + * @state: context from which the planes are to be removed. + * + * Return: + * Return true if DC was able to remove all planes from the target + * stream, otherwise, return false. + */ +bool dc_state_rem_all_planes_for_stream( + const struct dc *dc, + struct dc_stream_state *stream, + struct dc_state *state) +{ + int i, old_plane_count; + struct dc_stream_status *stream_status = NULL; + struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 }; + + for (i = 0; i < state->stream_count; i++) + if (state->streams[i] == stream) { + stream_status = &state->stream_status[i]; + break; + } + + if (stream_status == NULL) { + dm_error("Existing stream %p not found!\n", stream); + return false; + } + + old_plane_count = stream_status->plane_count; + + for (i = 0; i < old_plane_count; i++) + del_planes[i] = stream_status->plane_states[i]; + + for (i = 0; i < old_plane_count; i++) + if (!dc_state_remove_plane(dc, stream, del_planes[i], state)) + return false; + + return true; +} + +bool dc_state_add_all_planes_for_stream( + const struct dc *dc, + struct dc_stream_state *stream, + struct dc_plane_state * const *plane_states, + int plane_count, + struct dc_state *state) +{ + int i; + bool result = true; + + for (i = 0; i < plane_count; i++) + if (!dc_state_add_plane(dc, stream, plane_states[i], state)) { + result = false; + break; + } + + return result; +} + +/* Private dc_state functions */ + +/** + * dc_state_get_stream_status - Get stream status from given dc state + * @state: DC state to find the stream status in + * @stream: The stream to get the stream status for + * + * The given stream is expected to exist in the given dc state. Otherwise, NULL + * will be returned. + */ +struct dc_stream_status *dc_state_get_stream_status( + struct dc_state *state, + struct dc_stream_state *stream) +{ + uint8_t i; + + if (state == NULL) + return NULL; + + for (i = 0; i < state->stream_count; i++) { + if (stream == state->streams[i]) + return &state->stream_status[i]; + } + + return NULL; +} + +enum mall_stream_type dc_state_get_pipe_subvp_type(const struct dc_state *state, + const struct pipe_ctx *pipe_ctx) +{ + return dc_state_get_stream_subvp_type(state, pipe_ctx->stream); +} + +enum mall_stream_type dc_state_get_stream_subvp_type(const struct dc_state *state, + const struct dc_stream_state *stream) +{ + int i; + + enum mall_stream_type type = SUBVP_NONE; + + for (i = 0; i < state->stream_count; i++) { + if (state->streams[i] == stream) { + type = state->stream_status[i].mall_stream_config.type; + break; + } + } + + return type; +} + +struct dc_stream_state *dc_state_get_paired_subvp_stream(const struct dc_state *state, + const struct dc_stream_state *stream) +{ + int i; + + struct dc_stream_state *paired_stream = NULL; + + for (i = 0; i < state->stream_count; i++) { + if (state->streams[i] == stream) { + paired_stream = state->stream_status[i].mall_stream_config.paired_stream; + break; + } + } + + return paired_stream; +} + +struct dc_stream_state *dc_state_create_phantom_stream(const struct dc *dc, + struct dc_state *state, + struct dc_stream_state *main_stream) +{ + struct dc_stream_state *phantom_stream; + + DC_LOGGER_INIT(dc->ctx->logger); + + phantom_stream = dc_create_stream_for_sink(main_stream->sink); + + if (!phantom_stream) { + DC_LOG_ERROR("Failed to allocate phantom stream.\n"); + return NULL; + } + + /* track phantom stream in dc_state */ + dc_state_track_phantom_stream(state, phantom_stream); + + phantom_stream->is_phantom = true; + phantom_stream->signal = SIGNAL_TYPE_VIRTUAL; + phantom_stream->dpms_off = true; + + return phantom_stream; +} + +void dc_state_release_phantom_stream(const struct dc *dc, + struct dc_state *state, + struct dc_stream_state *phantom_stream) +{ + DC_LOGGER_INIT(dc->ctx->logger); + + if (!dc_state_untrack_phantom_stream(state, phantom_stream)) { + DC_LOG_ERROR("Failed to free phantom stream %p in dc state %p.\n", phantom_stream, state); + return; + } + + dc_stream_release(phantom_stream); +} + +struct dc_plane_state *dc_state_create_phantom_plane(struct dc *dc, + struct dc_state *state, + struct dc_plane_state *main_plane) +{ + struct dc_plane_state *phantom_plane = dc_create_plane_state(dc); + + DC_LOGGER_INIT(dc->ctx->logger); + + if (!phantom_plane) { + DC_LOG_ERROR("Failed to allocate phantom plane.\n"); + return NULL; + } + + /* track phantom inside dc_state */ + dc_state_track_phantom_plane(state, phantom_plane); + + phantom_plane->is_phantom = true; + + return phantom_plane; +} + +void dc_state_release_phantom_plane(const struct dc *dc, + struct dc_state *state, + struct dc_plane_state *phantom_plane) +{ + DC_LOGGER_INIT(dc->ctx->logger); + + if (!dc_state_untrack_phantom_plane(state, phantom_plane)) { + DC_LOG_ERROR("Failed to free phantom plane %p in dc state %p.\n", phantom_plane, state); + return; + } + + dc_plane_state_release(phantom_plane); +} + +/* add phantom streams to context and generate correct meta inside dc_state */ +enum dc_status dc_state_add_phantom_stream(struct dc *dc, + struct dc_state *state, + struct dc_stream_state *phantom_stream, + struct dc_stream_state *main_stream) +{ + struct dc_stream_status *main_stream_status; + struct dc_stream_status *phantom_stream_status; + enum dc_status res = dc_state_add_stream(dc, state, phantom_stream); + + /* check if stream is tracked */ + if (res == DC_OK && !dc_state_is_phantom_stream_tracked(state, phantom_stream)) { + /* stream must be tracked if added to state */ + dc_state_track_phantom_stream(state, phantom_stream); + } + + /* setup subvp meta */ + main_stream_status = dc_state_get_stream_status(state, main_stream); + phantom_stream_status = dc_state_get_stream_status(state, phantom_stream); + phantom_stream_status->mall_stream_config.type = SUBVP_PHANTOM; + phantom_stream_status->mall_stream_config.paired_stream = main_stream; + main_stream_status->mall_stream_config.type = SUBVP_MAIN; + main_stream_status->mall_stream_config.paired_stream = phantom_stream; + + return res; +} + +enum dc_status dc_state_remove_phantom_stream(struct dc *dc, + struct dc_state *state, + struct dc_stream_state *phantom_stream) +{ + struct dc_stream_status *main_stream_status; + struct dc_stream_status *phantom_stream_status; + + /* reset subvp meta */ + phantom_stream_status = dc_state_get_stream_status(state, phantom_stream); + main_stream_status = dc_state_get_stream_status(state, phantom_stream_status->mall_stream_config.paired_stream); + phantom_stream_status->mall_stream_config.type = SUBVP_NONE; + phantom_stream_status->mall_stream_config.paired_stream = NULL; + if (main_stream_status) { + main_stream_status->mall_stream_config.type = SUBVP_NONE; + main_stream_status->mall_stream_config.paired_stream = NULL; + } + + /* remove stream from state */ + return dc_state_remove_stream(dc, state, phantom_stream); +} + +bool dc_state_add_phantom_plane( + const struct dc *dc, + struct dc_stream_state *phantom_stream, + struct dc_plane_state *phantom_plane, + struct dc_state *state) +{ + bool res = dc_state_add_plane(dc, phantom_stream, phantom_plane, state); + + /* check if stream is tracked */ + if (res && !dc_state_is_phantom_plane_tracked(state, phantom_plane)) { + /* stream must be tracked if added to state */ + dc_state_track_phantom_plane(state, phantom_plane); + } + + return res; +} + +bool dc_state_remove_phantom_plane( + const struct dc *dc, + struct dc_stream_state *phantom_stream, + struct dc_plane_state *phantom_plane, + struct dc_state *state) +{ + return dc_state_remove_plane(dc, phantom_stream, phantom_plane, state); +} + +bool dc_state_rem_all_phantom_planes_for_stream( + const struct dc *dc, + struct dc_stream_state *phantom_stream, + struct dc_state *state, + bool should_release_planes) +{ + int i, old_plane_count; + struct dc_stream_status *stream_status = NULL; + struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 }; + + for (i = 0; i < state->stream_count; i++) + if (state->streams[i] == phantom_stream) { + stream_status = &state->stream_status[i]; + break; + } + + if (stream_status == NULL) { + dm_error("Existing stream %p not found!\n", phantom_stream); + return false; + } + + old_plane_count = stream_status->plane_count; + + for (i = 0; i < old_plane_count; i++) + del_planes[i] = stream_status->plane_states[i]; + + for (i = 0; i < old_plane_count; i++) { + if (!dc_state_remove_plane(dc, phantom_stream, del_planes[i], state)) + return false; + if (should_release_planes) + dc_state_release_phantom_plane(dc, state, del_planes[i]); + } + + return true; +} + +bool dc_state_add_all_phantom_planes_for_stream( + const struct dc *dc, + struct dc_stream_state *phantom_stream, + struct dc_plane_state * const *phantom_planes, + int plane_count, + struct dc_state *state) +{ + return dc_state_add_all_planes_for_stream(dc, phantom_stream, phantom_planes, plane_count, state); +} + +bool dc_state_remove_phantom_streams_and_planes( + struct dc *dc, + struct dc_state *state) +{ + int i; + bool removed_phantom = false; + struct dc_stream_state *phantom_stream = NULL; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state && pipe->stream && dc_state_get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) { + phantom_stream = pipe->stream; + + dc_state_rem_all_phantom_planes_for_stream(dc, phantom_stream, state, false); + dc_state_remove_phantom_stream(dc, state, phantom_stream); + removed_phantom = true; + } + } + return removed_phantom; +} + +void dc_state_release_phantom_streams_and_planes( + struct dc *dc, + struct dc_state *state) +{ + int i; + + for (i = 0; i < state->phantom_stream_count; i++) + dc_state_release_phantom_stream(dc, state, state->phantom_streams[i]); + + for (i = 0; i < state->phantom_plane_count; i++) + dc_state_release_phantom_plane(dc, state, state->phantom_planes[i]); +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 4bdf105d1d71..54670e0b1518 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -31,6 +31,8 @@ #include "ipp.h" #include "timing_generator.h" #include "dc_dmub_srv.h" +#include "dc_state_priv.h" +#include "dc_stream_priv.h" #define DC_LOGGER dc->ctx->logger @@ -54,7 +56,7 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink) } } -static bool dc_stream_construct(struct dc_stream_state *stream, +bool dc_stream_construct(struct dc_stream_state *stream, struct dc_sink *dc_sink_data) { uint32_t i = 0; @@ -121,13 +123,12 @@ static bool dc_stream_construct(struct dc_stream_state *stream, } stream->out_transfer_func->type = TF_TYPE_BYPASS; - stream->stream_id = stream->ctx->dc_stream_id_count; - stream->ctx->dc_stream_id_count++; + dc_stream_assign_stream_id(stream); return true; } -static void dc_stream_destruct(struct dc_stream_state *stream) +void dc_stream_destruct(struct dc_stream_state *stream) { dc_sink_release(stream->sink); if (stream->out_transfer_func != NULL) { @@ -136,6 +137,13 @@ static void dc_stream_destruct(struct dc_stream_state *stream) } } +void dc_stream_assign_stream_id(struct dc_stream_state *stream) +{ + /* MSB is reserved to indicate phantoms */ + stream->stream_id = stream->ctx->dc_stream_id_count; + stream->ctx->dc_stream_id_count++; +} + void dc_stream_retain(struct dc_stream_state *stream) { kref_get(&stream->refcount); @@ -196,8 +204,7 @@ struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream) if (new_stream->out_transfer_func) dc_transfer_func_retain(new_stream->out_transfer_func); - new_stream->stream_id = new_stream->ctx->dc_stream_id_count; - new_stream->ctx->dc_stream_id_count++; + dc_stream_assign_stream_id(new_stream); /* If using dynamic encoder assignment, wait till stream committed to assign encoder. */ if (new_stream->ctx->dc->res_pool->funcs->link_encs_assign) @@ -209,31 +216,6 @@ struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream) } /** - * dc_stream_get_status_from_state - Get stream status from given dc state - * @state: DC state to find the stream status in - * @stream: The stream to get the stream status for - * - * The given stream is expected to exist in the given dc state. Otherwise, NULL - * will be returned. - */ -struct dc_stream_status *dc_stream_get_status_from_state( - struct dc_state *state, - struct dc_stream_state *stream) -{ - uint8_t i; - - if (state == NULL) - return NULL; - - for (i = 0; i < state->stream_count; i++) { - if (stream == state->streams[i]) - return &state->stream_status[i]; - } - - return NULL; -} - -/** * dc_stream_get_status() - Get current stream status of the given stream state * @stream: The stream to get the stream status for. * @@ -244,7 +226,7 @@ struct dc_stream_status *dc_stream_get_status( struct dc_stream_state *stream) { struct dc *dc = stream->ctx->dc; - return dc_stream_get_status_from_state(dc->current_state, stream); + return dc_state_get_stream_status(dc->current_state, stream); } static void program_cursor_attributes( @@ -465,16 +447,37 @@ bool dc_stream_add_writeback(struct dc *dc, if (dc->hwss.enable_writeback) { struct dc_stream_status *stream_status = dc_stream_get_status(stream); struct dwbc *dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst]; - dwb->otg_inst = stream_status->primary_otg_inst; + if (stream_status) + dwb->otg_inst = stream_status->primary_otg_inst; } + + if (!dc->hwss.update_bandwidth(dc, dc->current_state)) { + dm_error("DC: update_bandwidth failed!\n"); + return false; + } + + /* enable writeback */ + if (dc->hwss.enable_writeback) { + struct dwbc *dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst]; + + if (dwb->funcs->is_enabled(dwb)) { + /* writeback pipe already enabled, only need to update */ + dc->hwss.update_writeback(dc, wb_info, dc->current_state); + } else { + /* Enable writeback pipe from scratch*/ + dc->hwss.enable_writeback(dc, wb_info, dc->current_state); + } + } + return true; } -bool dc_stream_remove_writeback(struct dc *dc, +bool dc_stream_fc_disable_writeback(struct dc *dc, struct dc_stream_state *stream, uint32_t dwb_pipe_inst) { - int i = 0, j = 0; + struct dwbc *dwb = dc->res_pool->dwbc[dwb_pipe_inst]; + if (stream == NULL) { dm_error("DC: dc_stream is NULL!\n"); return false; @@ -490,27 +493,63 @@ bool dc_stream_remove_writeback(struct dc *dc, return false; } -// stream->writeback_info[dwb_pipe_inst].wb_enabled = false; - for (i = 0; i < stream->num_wb_info; i++) { - /*dynamic update*/ - if (stream->writeback_info[i].wb_enabled && - stream->writeback_info[i].dwb_pipe_inst == dwb_pipe_inst) { - stream->writeback_info[i].wb_enabled = false; - } + if (dwb->funcs->set_fc_enable) + dwb->funcs->set_fc_enable(dwb, DWB_FRAME_CAPTURE_DISABLE); + + return true; +} + +bool dc_stream_remove_writeback(struct dc *dc, + struct dc_stream_state *stream, + uint32_t dwb_pipe_inst) +{ + int i = 0, j = 0; + if (stream == NULL) { + dm_error("DC: dc_stream is NULL!\n"); + return false; + } + + if (dwb_pipe_inst >= MAX_DWB_PIPES) { + dm_error("DC: writeback pipe is invalid!\n"); + return false; + } + + if (stream->num_wb_info > MAX_DWB_PIPES) { + dm_error("DC: num_wb_info is invalid!\n"); + return false; } /* remove writeback info for disabled writeback pipes from stream */ for (i = 0, j = 0; i < stream->num_wb_info; i++) { if (stream->writeback_info[i].wb_enabled) { - if (j < i) - /* trim the array */ + + if (stream->writeback_info[i].dwb_pipe_inst == dwb_pipe_inst) + stream->writeback_info[i].wb_enabled = false; + + /* trim the array */ + if (j < i) { memcpy(&stream->writeback_info[j], &stream->writeback_info[i], sizeof(struct dc_writeback_info)); - j++; + j++; + } } } stream->num_wb_info = j; + /* recalculate and apply DML parameters */ + if (!dc->hwss.update_bandwidth(dc, dc->current_state)) { + dm_error("DC: update_bandwidth failed!\n"); + return false; + } + + /* disable writeback */ + if (dc->hwss.disable_writeback) { + struct dwbc *dwb = dc->res_pool->dwbc[dwb_pipe_inst]; + + if (dwb->funcs->is_enabled(dwb)) + dc->hwss.disable_writeback(dc, dwb_pipe_inst); + } + return true; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c index a80e45300783..19a2c7140ae8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c @@ -32,10 +32,12 @@ #include "transform.h" #include "dpp.h" +#include "dc_plane_priv.h" + /******************************************************************************* * Private functions ******************************************************************************/ -static void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state) +void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state) { plane_state->ctx = ctx; @@ -63,7 +65,7 @@ static void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *pl } -static void dc_plane_destruct(struct dc_plane_state *plane_state) +void dc_plane_destruct(struct dc_plane_state *plane_state) { if (plane_state->gamma_correction != NULL) { dc_gamma_release(&plane_state->gamma_correction); diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 2cafd644baff..f30a341bc090 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -27,6 +27,8 @@ #define DC_INTERFACE_H_ #include "dc_types.h" +#include "dc_state.h" +#include "dc_plane.h" #include "grph_object_defs.h" #include "logger_types.h" #include "hdcp_msg_types.h" @@ -49,7 +51,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.259" +#define DC_VER "3.2.265" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -461,6 +463,12 @@ enum dml_hostvm_override_opts { DML_HOSTVM_OVERRIDE_TRUE = 0x2, }; +enum dc_replay_power_opts { + replay_power_opt_invalid = 0x0, + replay_power_opt_smu_opt_static_screen = 0x1, + replay_power_opt_z10_static_screen = 0x10, +}; + enum dcc_option { DCC_ENABLE = 0, DCC_DISABLE = 1, @@ -956,7 +964,6 @@ struct dc_debug_options { unsigned int min_prefetch_in_strobe_ns; bool disable_unbounded_requesting; bool dig_fifo_off_in_blank; - bool temp_mst_deallocation_sequence; bool override_dispclk_programming; bool otg_crc_db; bool disallow_dispclk_dppclk_ds; @@ -979,6 +986,9 @@ struct dc_debug_options { bool psp_disabled_wa; unsigned int ips2_eval_delay_us; unsigned int ips2_entry_delay_us; + bool disable_timeout; + bool disable_extblankadj; + unsigned int static_screen_wait_frames; }; struct gpu_info_soc_bounding_box_v1_0; @@ -1026,7 +1036,6 @@ struct dc { /* Require to optimize clocks and bandwidth for added/removed planes */ bool optimized_required; - bool wm_optimized_required; bool idle_optimizations_allowed; bool enable_c20_dtm_b0; @@ -1389,13 +1398,6 @@ struct dc_surface_update { /* * Create a new surface with default parameters; */ -struct dc_plane_state *dc_create_plane_state(struct dc *dc); -const struct dc_plane_status *dc_plane_get_status( - const struct dc_plane_state *plane_state); - -void dc_plane_state_retain(struct dc_plane_state *plane_state); -void dc_plane_state_release(struct dc_plane_state *plane_state); - void dc_gamma_retain(struct dc_gamma *dc_gamma); void dc_gamma_release(struct dc_gamma **dc_gamma); struct dc_gamma *dc_create_gamma(void); @@ -1459,37 +1461,20 @@ enum dc_status dc_validate_global_state( struct dc_state *new_ctx, bool fast_validate); - -void dc_resource_state_construct( - const struct dc *dc, - struct dc_state *dst_ctx); - bool dc_acquire_release_mpc_3dlut( struct dc *dc, bool acquire, struct dc_stream_state *stream, struct dc_3dlut **lut, struct dc_transfer_func **shaper); -void dc_resource_state_copy_construct( - const struct dc_state *src_ctx, - struct dc_state *dst_ctx); - -void dc_resource_state_copy_construct_current( - const struct dc *dc, - struct dc_state *dst_ctx); - -void dc_resource_state_destruct(struct dc_state *context); - bool dc_resource_is_dsc_encoding_supported(const struct dc *dc); +void get_audio_check(struct audio_info *aud_modes, + struct audio_check *aud_chk); enum dc_status dc_commit_streams(struct dc *dc, struct dc_stream_state *streams[], uint8_t stream_count); -struct dc_state *dc_create_state(struct dc *dc); -struct dc_state *dc_copy_state(struct dc_state *src_ctx); -void dc_retain_state(struct dc_state *context); -void dc_release_state(struct dc_state *context); struct dc_plane_state *dc_get_surface_for_mpcc(struct dc *dc, struct dc_stream_state *stream, @@ -1541,7 +1526,13 @@ struct dc_link { bool is_dig_mapping_flexible; bool hpd_status; /* HPD status of link without physical HPD pin. */ bool is_hpd_pending; /* Indicates a new received hpd */ - bool is_automated; /* Indicates automated testing */ + + /* USB4 DPIA links skip verifying link cap, instead performing the fallback method + * for every link training. This is incompatible with DP LL compliance automation, + * which expects the same link settings to be used every retry on a link loss. + * This flag is used to skip the fallback when link loss occurs during automation. + */ + bool skip_fallback_on_link_loss; bool edp_sink_present; @@ -2092,6 +2083,20 @@ bool dc_link_setup_psr(struct dc_link *dc_link, const struct dc_stream_state *stream, struct psr_config *psr_config, struct psr_context *psr_context); +/* + * Communicate with DMUB to allow or disallow Panel Replay on the specified link: + * + * @link: pointer to the dc_link struct instance + * @enable: enable(active) or disable(inactive) replay + * @wait: state transition need to wait the active set completed. + * @force_static: force disable(inactive) the replay + * @power_opts: set power optimazation parameters to DMUB. + * + * return: allow Replay active will return true, else will return false. + */ +bool dc_link_set_replay_allow_active(struct dc_link *dc_link, const bool *enable, + bool wait, bool force_static, const unsigned int *power_opts); + bool dc_link_get_replay_state(const struct dc_link *dc_link, uint64_t *state); /* On eDP links this function call will stall until T12 has elapsed. @@ -2187,11 +2192,11 @@ int dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link( * * @dc: pointer to dc struct * @stream: pointer to all possible streams - * @num_streams: number of valid DPIA streams + * @count: number of valid DPIA streams * * return: TRUE if bw used by DPIAs doesn't exceed available BW else return FALSE */ -bool dc_link_validate(struct dc *dc, const struct dc_stream_state *streams, +bool dc_link_dp_dpia_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count); /* Sink Interfaces - A sink corresponds to a display output device */ @@ -2336,6 +2341,9 @@ void dc_hardware_release(struct dc *dc); void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc); bool dc_set_psr_allow_active(struct dc *dc, bool enable); + +bool dc_set_replay_allow_active(struct dc *dc, bool active); + void dc_z10_restore(const struct dc *dc); void dc_z10_save_init(struct dc *dc); diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h index be9aa1a71847..26940d94d8fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h @@ -140,7 +140,7 @@ struct dc_vbios_funcs { enum bp_result (*enable_lvtma_control)( struct dc_bios *bios, uint8_t uc_pwr_on, - uint8_t panel_instance, + uint8_t pwrseq_instance, uint8_t bypass_panel_control_wait); enum bp_result (*get_soc_bb_info)( diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 0e07699c1e83..2b79a0e5638e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -33,6 +33,7 @@ #include "cursor_reg_cache.h" #include "resource.h" #include "clk_mgr.h" +#include "dc_state_priv.h" #define CTX dc_dmub_srv->ctx #define DC_LOGGER CTX->logger @@ -140,7 +141,10 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, if (status == DMUB_STATUS_QUEUE_FULL) { /* Execute and wait for queue to become empty again. */ - dmub_srv_cmd_execute(dmub); + status = dmub_srv_cmd_execute(dmub); + if (status == DMUB_STATUS_POWER_STATE_D3) + return false; + dmub_srv_wait_for_idle(dmub, 100000); /* Requeue the command. */ @@ -148,16 +152,20 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, } if (status != DMUB_STATUS_OK) { - DC_ERROR("Error queueing DMUB command: status=%d\n", status); - dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + if (status != DMUB_STATUS_POWER_STATE_D3) { + DC_ERROR("Error queueing DMUB command: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + } return false; } } status = dmub_srv_cmd_execute(dmub); if (status != DMUB_STATUS_OK) { - DC_ERROR("Error starting DMUB execution: status=%d\n", status); - dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + if (status != DMUB_STATUS_POWER_STATE_D3) { + DC_ERROR("Error starting DMUB execution: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + } return false; } @@ -218,7 +226,10 @@ bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int coun if (status == DMUB_STATUS_QUEUE_FULL) { /* Execute and wait for queue to become empty again. */ - dmub_srv_cmd_execute(dmub); + status = dmub_srv_cmd_execute(dmub); + if (status == DMUB_STATUS_POWER_STATE_D3) + return false; + dmub_srv_wait_for_idle(dmub, 100000); /* Requeue the command. */ @@ -226,22 +237,31 @@ bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int coun } if (status != DMUB_STATUS_OK) { - DC_ERROR("Error queueing DMUB command: status=%d\n", status); - dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + if (status != DMUB_STATUS_POWER_STATE_D3) { + DC_ERROR("Error queueing DMUB command: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + } return false; } } status = dmub_srv_cmd_execute(dmub); if (status != DMUB_STATUS_OK) { - DC_ERROR("Error starting DMUB execution: status=%d\n", status); - dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + if (status != DMUB_STATUS_POWER_STATE_D3) { + DC_ERROR("Error starting DMUB execution: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + } return false; } // Wait for DMUB to process command if (wait_type != DM_DMUB_WAIT_TYPE_NO_WAIT) { - status = dmub_srv_wait_for_idle(dmub, 100000); + if (dc_dmub_srv->ctx->dc->debug.disable_timeout) { + do { + status = dmub_srv_wait_for_idle(dmub, 100000); + } while (status != DMUB_STATUS_OK); + } else + status = dmub_srv_wait_for_idle(dmub, 100000); if (status != DMUB_STATUS_OK) { DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status); @@ -282,17 +302,11 @@ bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv) bool dc_dmub_srv_notify_stream_mask(struct dc_dmub_srv *dc_dmub_srv, unsigned int stream_mask) { - struct dmub_srv *dmub; - const uint32_t timeout = 30; - if (!dc_dmub_srv || !dc_dmub_srv->dmub) return false; - dmub = dc_dmub_srv->dmub; - - return dmub_srv_send_gpint_command( - dmub, DMUB_GPINT__IDLE_OPT_NOTIFY_STREAM_MASK, - stream_mask, timeout) == DMUB_STATUS_OK; + return dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__IDLE_OPT_NOTIFY_STREAM_MASK, + stream_mask, NULL, DM_DMUB_WAIT_TYPE_WAIT); } bool dc_dmub_srv_is_restore_required(struct dc_dmub_srv *dc_dmub_srv) @@ -341,7 +355,7 @@ void dc_dmub_srv_drr_update_cmd(struct dc *dc, uint32_t tg_inst, uint32_t vtotal cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header); // Send the command to the DMCUB. - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst) @@ -355,7 +369,7 @@ void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst) cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header); // Send the command to the DMCUB. - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static uint8_t dc_dmub_srv_get_pipes_for_stream(struct dc *dc, struct dc_stream_state *stream) @@ -448,7 +462,7 @@ bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool should_manage_pstate, stru sizeof(cmd.fw_assisted_mclk_switch) - sizeof(cmd.fw_assisted_mclk_switch.header); // Send the command to the DMCUB. - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } @@ -469,7 +483,7 @@ void dc_dmub_srv_query_caps_cmd(struct dc_dmub_srv *dc_dmub_srv) cmd.query_feature_caps.header.payload_bytes = sizeof(struct dmub_cmd_query_feature_caps_data); /* If command was processed, copy feature caps to dmub srv */ - if (dm_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && + if (dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && cmd.query_feature_caps.header.ret_status == 0) { memcpy(&dc_dmub_srv->dmub->feature_caps, &cmd.query_feature_caps.query_feature_caps_data, @@ -494,7 +508,7 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi cmd.visual_confirm_color.visual_confirm_color_data.visual_confirm_color.panel_inst = panel_inst; // If command was processed, copy feature caps to dmub srv - if (dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && + if (dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && cmd.visual_confirm_color.header.ret_status == 0) { memcpy(&dc->ctx->dmub_srv->dmub->visual_confirm_color, &cmd.visual_confirm_color.visual_confirm_color_data, @@ -505,10 +519,11 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi /** * populate_subvp_cmd_drr_info - Helper to populate DRR pipe info for the DMCUB subvp command * - * @dc: [in] current dc state + * @dc: [in] pointer to dc object * @subvp_pipe: [in] pipe_ctx for the SubVP pipe * @vblank_pipe: [in] pipe_ctx for the DRR pipe * @pipe_data: [in] Pipe data which stores the VBLANK/DRR info + * @context: [in] DC state for access to phantom stream * * Populate the DMCUB SubVP command with DRR pipe info. All the information * required for calculating the SubVP + DRR microschedule is populated here. @@ -519,12 +534,14 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi * 3. Populate the drr_info with the min and max supported vtotal values */ static void populate_subvp_cmd_drr_info(struct dc *dc, + struct dc_state *context, struct pipe_ctx *subvp_pipe, struct pipe_ctx *vblank_pipe, struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data) { + struct dc_stream_state *phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream); struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing; - struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; + struct dc_crtc_timing *phantom_timing = &phantom_stream->timing; struct dc_crtc_timing *drr_timing = &vblank_pipe->stream->timing; uint16_t drr_frame_us = 0; uint16_t min_drr_supported_us = 0; @@ -612,7 +629,7 @@ static void populate_subvp_cmd_vblank_pipe_info(struct dc *dc, continue; // Find the SubVP pipe - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) + if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) break; } @@ -629,7 +646,7 @@ static void populate_subvp_cmd_vblank_pipe_info(struct dc *dc, if (vblank_pipe->stream->ignore_msa_timing_param && (vblank_pipe->stream->allow_freesync || vblank_pipe->stream->vrr_active_variable || vblank_pipe->stream->vrr_active_fixed)) - populate_subvp_cmd_drr_info(dc, pipe, vblank_pipe, pipe_data); + populate_subvp_cmd_drr_info(dc, context, pipe, vblank_pipe, pipe_data); } /** @@ -654,10 +671,17 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc, uint32_t subvp0_prefetch_us = 0; uint32_t subvp1_prefetch_us = 0; uint32_t prefetch_delta_us = 0; - struct dc_crtc_timing *phantom_timing0 = &subvp_pipes[0]->stream->mall_stream_config.paired_stream->timing; - struct dc_crtc_timing *phantom_timing1 = &subvp_pipes[1]->stream->mall_stream_config.paired_stream->timing; + struct dc_stream_state *phantom_stream0 = NULL; + struct dc_stream_state *phantom_stream1 = NULL; + struct dc_crtc_timing *phantom_timing0 = NULL; + struct dc_crtc_timing *phantom_timing1 = NULL; struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data = NULL; + phantom_stream0 = dc_state_get_paired_subvp_stream(context, subvp_pipes[0]->stream); + phantom_stream1 = dc_state_get_paired_subvp_stream(context, subvp_pipes[1]->stream); + phantom_timing0 = &phantom_stream0->timing; + phantom_timing1 = &phantom_stream1->timing; + subvp0_prefetch_us = div64_u64(((uint64_t)(phantom_timing0->v_total - phantom_timing0->v_front_porch) * (uint64_t)phantom_timing0->h_total * 1000000), (((uint64_t)phantom_timing0->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us)); @@ -707,8 +731,9 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc, uint32_t j; struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data = &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[cmd_pipe_index]; + struct dc_stream_state *phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream); struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing; - struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; + struct dc_crtc_timing *phantom_timing = &phantom_stream->timing; uint32_t out_num_stream, out_den_stream, out_num_plane, out_den_plane, out_num, out_den; pipe_data->mode = SUBVP; @@ -762,7 +787,7 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc, for (j = 0; j < dc->res_pool->pipe_count; j++) { struct pipe_ctx *phantom_pipe = &context->res_ctx.pipe_ctx[j]; - if (phantom_pipe->stream == subvp_pipe->stream->mall_stream_config.paired_stream) { + if (phantom_pipe->stream == dc_state_get_paired_subvp_stream(context, subvp_pipe->stream)) { pipe_data->pipe_config.subvp_data.phantom_pipe_index = phantom_pipe->stream_res.tg->inst; if (phantom_pipe->bottom_pipe) { pipe_data->pipe_config.subvp_data.phantom_split_pipe_index = phantom_pipe->bottom_pipe->plane_res.hubp->inst; @@ -796,6 +821,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, union dmub_rb_cmd cmd; struct pipe_ctx *subvp_pipes[2]; uint32_t wm_val_refclk = 0; + enum mall_stream_type pipe_mall_type; memset(&cmd, 0, sizeof(cmd)); // FW command for SUBVP @@ -811,7 +837,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, */ if (resource_is_pipe_type(pipe, OTG_MASTER) && resource_is_pipe_type(pipe, DPP_PIPE) && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) subvp_pipes[subvp_count++] = pipe; } @@ -819,6 +845,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, // For each pipe that is a "main" SUBVP pipe, fill in pipe data for DMUB SUBVP cmd for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe); if (!pipe->stream) continue; @@ -829,12 +856,11 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, */ if (resource_is_pipe_type(pipe, OTG_MASTER) && resource_is_pipe_type(pipe, DPP_PIPE) && - pipe->stream->mall_stream_config.paired_stream && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + pipe_mall_type == SUBVP_MAIN) { populate_subvp_cmd_pipe_info(dc, context, &cmd, pipe, cmd_pipe_index++); } else if (resource_is_pipe_type(pipe, OTG_MASTER) && resource_is_pipe_type(pipe, DPP_PIPE) && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { + pipe_mall_type == SUBVP_NONE) { // Don't need to check for ActiveDRAMClockChangeMargin < 0, not valid in cases where // we run through DML without calculating "natural" P-state support populate_subvp_cmd_vblank_pipe_info(dc, context, &cmd, pipe, cmd_pipe_index++); @@ -856,7 +882,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, cmd.fw_assisted_mclk_switch_v2.config_data.watermark_a_cache = wm_val_refclk < 0xFFFF ? wm_val_refclk : 0xFFFF; } - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv, struct dmub_diagnostic_data *diag_data) @@ -1093,7 +1119,7 @@ void dc_send_update_cursor_info_to_dmu( pipe_idx, pCtx->plane_res.hubp, pCtx->plane_res.dpp); /* Combine 2nd cmds update_curosr_info to DMU */ - dm_execute_dmub_cmd_list(pCtx->stream->ctx, 2, cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd_list(pCtx->stream->ctx, 2, cmd, DM_DMUB_WAIT_TYPE_WAIT); } } @@ -1107,25 +1133,20 @@ bool dc_dmub_check_min_version(struct dmub_srv *srv) void dc_dmub_srv_enable_dpia_trace(const struct dc *dc) { struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv; - struct dmub_srv *dmub; - enum dmub_status status; - static const uint32_t timeout_us = 30; if (!dc_dmub_srv || !dc_dmub_srv->dmub) { DC_LOG_ERROR("%s: invalid parameters.", __func__); return; } - dmub = dc_dmub_srv->dmub; - - status = dmub_srv_send_gpint_command(dmub, DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1, 0x0010, timeout_us); - if (status != DMUB_STATUS_OK) { + if (!dc_wake_and_execute_gpint(dc->ctx, DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1, + 0x0010, NULL, DM_DMUB_WAIT_TYPE_WAIT)) { DC_LOG_ERROR("timeout updating trace buffer mask word\n"); return; } - status = dmub_srv_send_gpint_command(dmub, DMUB_GPINT__UPDATE_TRACE_BUFFER_MASK, 0x0000, timeout_us); - if (status != DMUB_STATUS_OK) { + if (!dc_wake_and_execute_gpint(dc->ctx, DMUB_GPINT__UPDATE_TRACE_BUFFER_MASK, + 0x0000, NULL, DM_DMUB_WAIT_TYPE_WAIT)) { DC_LOG_ERROR("timeout updating trace buffer mask word\n"); return; } @@ -1143,14 +1164,23 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait) struct dc_context *dc_ctx = dc_dmub_srv->ctx; enum dmub_status status; + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return true; + if (dc_dmub_srv->ctx->dc->debug.dmcub_emulation) return true; if (wait) { - status = dmub_srv_wait_for_hw_pwr_up(dc_dmub_srv->dmub, 500000); - if (status != DMUB_STATUS_OK) { - DC_ERROR("Error querying DMUB hw power up status: error=%d\n", status); - return false; + if (dc_dmub_srv->ctx->dc->debug.disable_timeout) { + do { + status = dmub_srv_wait_for_hw_pwr_up(dc_dmub_srv->dmub, 500000); + } while (status != DMUB_STATUS_OK); + } else { + status = dmub_srv_wait_for_hw_pwr_up(dc_dmub_srv->dmub, 500000); + if (status != DMUB_STATUS_OK) { + DC_ERROR("Error querying DMUB hw power up status: error=%d\n", status); + return false; + } } } else return dmub_srv_is_hw_pwr_up(dc_dmub_srv->dmub); @@ -1158,7 +1188,7 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait) return true; } -void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) +static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) { union dmub_rb_cmd cmd = {0}; @@ -1179,20 +1209,20 @@ void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) dc->hwss.set_idle_state(dc, true); } - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + /* NOTE: This does not use the "wake" interface since this is part of the wake path. */ + /* We also do not perform a wait since DMCUB could enter idle after the notification. */ + dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); } -void dc_dmub_srv_exit_low_power_state(const struct dc *dc) +static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) { - const uint32_t max_num_polls = 10000; uint32_t allow_state = 0; uint32_t commit_state = 0; - uint32_t i; if (dc->debug.dmcub_emulation) return; - if (!dc->idle_optimizations_allowed) + if (!dc->ctx->dmub_srv || !dc->ctx->dmub_srv->dmub) return; if (dc->hwss.get_idle_state && @@ -1204,8 +1234,16 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc) if (!(allow_state & DMUB_IPS2_ALLOW_MASK)) { // Wait for evaluation time - udelay(dc->debug.ips2_eval_delay_us); - commit_state = dc->hwss.get_idle_state(dc); + for (;;) { + udelay(dc->debug.ips2_eval_delay_us); + commit_state = dc->hwss.get_idle_state(dc); + if (commit_state & DMUB_IPS2_ALLOW_MASK) + break; + + /* allow was still set, retry eval delay */ + dc->hwss.set_idle_state(dc, false); + } + if (!(commit_state & DMUB_IPS2_COMMIT_MASK)) { // Tell PMFW to exit low power state dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr); @@ -1214,14 +1252,13 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc) udelay(dc->debug.ips2_entry_delay_us); dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr); - for (i = 0; i < max_num_polls; ++i) { + for (;;) { commit_state = dc->hwss.get_idle_state(dc); if (commit_state & DMUB_IPS2_COMMIT_MASK) break; udelay(1); } - ASSERT(i < max_num_polls); if (!dc_dmub_srv_is_hw_pwr_up(dc->ctx->dmub_srv, true)) ASSERT(0); @@ -1236,14 +1273,13 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc) dc_dmub_srv_notify_idle(dc, false); if (!(allow_state & DMUB_IPS1_ALLOW_MASK)) { - for (i = 0; i < max_num_polls; ++i) { + for (;;) { commit_state = dc->hwss.get_idle_state(dc); if (commit_state & DMUB_IPS1_COMMIT_MASK) break; udelay(1); } - ASSERT(i < max_num_polls); } } @@ -1251,3 +1287,131 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc) ASSERT(0); } +void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state powerState) +{ + struct dmub_srv *dmub; + + if (!dc_dmub_srv) + return; + + dmub = dc_dmub_srv->dmub; + + if (powerState == DC_ACPI_CM_POWER_STATE_D0) + dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D0); + else + dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D3); +} + +void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_idle) +{ + struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return; + + if (dc_dmub_srv->idle_allowed == allow_idle) + return; + + /* + * Entering a low power state requires a driver notification. + * Powering up the hardware requires notifying PMFW and DMCUB. + * Clearing the driver idle allow requires a DMCUB command. + * DMCUB commands requires the DMCUB to be powered up and restored. + * + * Exit out early to prevent an infinite loop of DMCUB commands + * triggering exit low power - use software state to track this. + */ + dc_dmub_srv->idle_allowed = allow_idle; + + if (!allow_idle) + dc_dmub_srv_exit_low_power_state(dc); + else + dc_dmub_srv_notify_idle(dc, allow_idle); +} + +bool dc_wake_and_execute_dmub_cmd(const struct dc_context *ctx, union dmub_rb_cmd *cmd, + enum dm_dmub_wait_type wait_type) +{ + return dc_wake_and_execute_dmub_cmd_list(ctx, 1, cmd, wait_type); +} + +bool dc_wake_and_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned int count, + union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type) +{ + struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv; + bool result = false, reallow_idle = false; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return false; + + if (count == 0) + return true; + + if (dc_dmub_srv->idle_allowed) { + dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, false); + reallow_idle = true; + } + + /* + * These may have different implementations in DM, so ensure + * that we guide it to the expected helper. + */ + if (count > 1) + result = dm_execute_dmub_cmd_list(ctx, count, cmd, wait_type); + else + result = dm_execute_dmub_cmd(ctx, cmd, wait_type); + + if (result && reallow_idle) + dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, true); + + return result; +} + +static bool dc_dmub_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code, + uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type) +{ + struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv; + const uint32_t wait_us = wait_type == DM_DMUB_WAIT_TYPE_NO_WAIT ? 0 : 30; + enum dmub_status status; + + if (response) + *response = 0; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return false; + + status = dmub_srv_send_gpint_command(dc_dmub_srv->dmub, command_code, param, wait_us); + if (status != DMUB_STATUS_OK) { + if (status == DMUB_STATUS_TIMEOUT && wait_type == DM_DMUB_WAIT_TYPE_NO_WAIT) + return true; + + return false; + } + + if (response && wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) + dmub_srv_get_gpint_response(dc_dmub_srv->dmub, response); + + return true; +} + +bool dc_wake_and_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code, + uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type) +{ + struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv; + bool result = false, reallow_idle = false; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return false; + + if (dc_dmub_srv->idle_allowed) { + dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, false); + reallow_idle = true; + } + + result = dc_dmub_execute_gpint(ctx, command_code, param, response, wait_type); + + if (result && reallow_idle) + dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, true); + + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index d4a60f53faab..952bfb368886 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -50,6 +50,8 @@ struct dc_dmub_srv { struct dc_context *ctx; void *dm; + + bool idle_allowed; }; void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv); @@ -100,6 +102,59 @@ void dc_dmub_srv_enable_dpia_trace(const struct dc *dc); void dc_dmub_srv_subvp_save_surf_addr(const struct dc_dmub_srv *dc_dmub_srv, const struct dc_plane_address *addr, uint8_t subvp_index); bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait); -void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle); -void dc_dmub_srv_exit_low_power_state(const struct dc *dc); + +void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_idle); + +void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state powerState); + +/** + * dc_wake_and_execute_dmub_cmd() - Wrapper for DMUB command execution. + * + * Refer to dc_wake_and_execute_dmub_cmd_list() for usage and limitations, + * This function is a convenience wrapper for a single command execution. + * + * @ctx: DC context + * @cmd: The command to send/receive + * @wait_type: The wait behavior for the execution + * + * Return: true on command submission success, false otherwise + */ +bool dc_wake_and_execute_dmub_cmd(const struct dc_context *ctx, union dmub_rb_cmd *cmd, + enum dm_dmub_wait_type wait_type); + +/** + * dc_wake_and_execute_dmub_cmd_list() - Wrapper for DMUB command list execution. + * + * If the DMCUB hardware was asleep then it wakes the DMUB before + * executing the command and attempts to re-enter if the command + * submission was successful. + * + * This should be the preferred command submission interface provided + * the DC lock is acquired. + * + * Entry/exit out of idle power optimizations would need to be + * manually performed otherwise through dc_allow_idle_optimizations(). + * + * @ctx: DC context + * @count: Number of commands to send/receive + * @cmd: Array of commands to send + * @wait_type: The wait behavior for the execution + * + * Return: true on command submission success, false otherwise + */ +bool dc_wake_and_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned int count, + union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type); + +/** + * dc_wake_and_execute_gpint() + * + * @ctx: DC context + * @command_code: The command ID to send to DMCUB + * @param: The parameter to message DMCUB + * @response: Optional response out value - may be NULL. + * @wait_type: The wait behavior for the execution + */ +bool dc_wake_and_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code, + uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type); + #endif /* _DMUB_DC_SRV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index eeeeeef4d717..1cb7765f593a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -1377,6 +1377,12 @@ struct dp_trace { #ifndef DP_TUNNELING_STATUS #define DP_TUNNELING_STATUS 0xE0025 /* 1.4a */ #endif +#ifndef DP_TUNNELING_MAX_LINK_RATE +#define DP_TUNNELING_MAX_LINK_RATE 0xE0028 /* 1.4a */ +#endif +#ifndef DP_TUNNELING_MAX_LANE_COUNT +#define DP_TUNNELING_MAX_LANE_COUNT 0xE0029 /* 1.4a */ +#endif #ifndef DPTX_BW_ALLOCATION_MODE_CONTROL #define DPTX_BW_ALLOCATION_MODE_CONTROL 0xE0030 /* 1.4a */ #endif diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index cb6eaddab720..8f9a67825615 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -50,7 +50,7 @@ static inline void submit_dmub_read_modify_write( cmd_buf->header.payload_bytes = sizeof(struct dmub_cmd_read_modify_write_sequence) * offload->reg_seq_count; - dm_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT); + dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT); memset(cmd_buf, 0, sizeof(*cmd_buf)); @@ -67,7 +67,7 @@ static inline void submit_dmub_burst_write( cmd_buf->header.payload_bytes = sizeof(uint32_t) * offload->reg_seq_count; - dm_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT); + dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT); memset(cmd_buf, 0, sizeof(*cmd_buf)); @@ -80,7 +80,7 @@ static inline void submit_dmub_reg_wait( { struct dmub_rb_cmd_reg_wait *cmd_buf = &offload->cmd_data.reg_wait; - dm_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT); + dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT); memset(cmd_buf, 0, sizeof(*cmd_buf)); offload->reg_seq_count = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index e2a3aa8812df..811474f4419b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -244,7 +244,7 @@ enum pixel_format { #define DC_MAX_DIRTY_RECTS 3 struct dc_flip_addrs { struct dc_plane_address address; - unsigned int flip_timestamp_in_us; + unsigned long long flip_timestamp_in_us; bool flip_immediate; /* TODO: add flip duration for FreeSync */ bool triplebuffer_flips; diff --git a/drivers/gpu/drm/amd/display/dc/dc_plane.h b/drivers/gpu/drm/amd/display/dc/dc_plane.h new file mode 100644 index 000000000000..ef380cae816a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dc_plane.h @@ -0,0 +1,38 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DC_PLANE_H_ +#define _DC_PLANE_H_ + +#include "dc.h" +#include "dc_hw_types.h" + +struct dc_plane_state *dc_create_plane_state(struct dc *dc); +const struct dc_plane_status *dc_plane_get_status( + const struct dc_plane_state *plane_state); +void dc_plane_state_retain(struct dc_plane_state *plane_state); +void dc_plane_state_release(struct dc_plane_state *plane_state); + +#endif /* _DC_PLANE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h b/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h new file mode 100644 index 000000000000..9ee184c1df00 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h @@ -0,0 +1,34 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DC_PLANE_PRIV_H_ +#define _DC_PLANE_PRIV_H_ + +#include "dc_plane.h" + +void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state); +void dc_plane_destruct(struct dc_plane_state *plane_state); + +#endif /* _DC_PLANE_PRIV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_state.h b/drivers/gpu/drm/amd/display/dc/dc_state.h new file mode 100644 index 000000000000..d167fdbfa8a9 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dc_state.h @@ -0,0 +1,78 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DC_STATE_H_ +#define _DC_STATE_H_ + +#include "dc.h" +#include "inc/core_status.h" + +struct dc_state *dc_state_create(struct dc *dc); +void dc_state_copy(struct dc_state *dst_state, struct dc_state *src_state); +struct dc_state *dc_state_create_copy(struct dc_state *src_state); +void dc_state_copy_current(struct dc *dc, struct dc_state *dst_state); +struct dc_state *dc_state_create_current_copy(struct dc *dc); +void dc_state_construct(struct dc *dc, struct dc_state *state); +void dc_state_destruct(struct dc_state *state); +void dc_state_retain(struct dc_state *state); +void dc_state_release(struct dc_state *state); + +enum dc_status dc_state_add_stream(struct dc *dc, + struct dc_state *state, + struct dc_stream_state *stream); + +enum dc_status dc_state_remove_stream( + struct dc *dc, + struct dc_state *state, + struct dc_stream_state *stream); + +bool dc_state_add_plane( + const struct dc *dc, + struct dc_stream_state *stream, + struct dc_plane_state *plane_state, + struct dc_state *state); + +bool dc_state_remove_plane( + const struct dc *dc, + struct dc_stream_state *stream, + struct dc_plane_state *plane_state, + struct dc_state *state); + +bool dc_state_rem_all_planes_for_stream( + const struct dc *dc, + struct dc_stream_state *stream, + struct dc_state *state); + +bool dc_state_add_all_planes_for_stream( + const struct dc *dc, + struct dc_stream_state *stream, + struct dc_plane_state * const *plane_states, + int plane_count, + struct dc_state *state); + +struct dc_stream_status *dc_state_get_stream_status( + struct dc_state *state, + struct dc_stream_state *stream); +#endif /* _DC_STATE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_state_priv.h b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h new file mode 100644 index 000000000000..c1f44e09a6c1 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h @@ -0,0 +1,102 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DC_STATE_PRIV_H_ +#define _DC_STATE_PRIV_H_ + +#include "dc_state.h" +#include "dc_stream.h" + +/* Get the type of the provided resource (none, phantom, main) based on the provided + * context. If the context is unavailable, determine only if phantom or not. + */ +enum mall_stream_type dc_state_get_pipe_subvp_type(const struct dc_state *state, + const struct pipe_ctx *pipe_ctx); +enum mall_stream_type dc_state_get_stream_subvp_type(const struct dc_state *state, + const struct dc_stream_state *stream); + +/* Gets the phantom stream if main is provided, gets the main if phantom is provided.*/ +struct dc_stream_state *dc_state_get_paired_subvp_stream(const struct dc_state *state, + const struct dc_stream_state *stream); + +/* allocate's phantom stream or plane and returns pointer to the object */ +struct dc_stream_state *dc_state_create_phantom_stream(const struct dc *dc, + struct dc_state *state, + struct dc_stream_state *main_stream); +struct dc_plane_state *dc_state_create_phantom_plane(struct dc *dc, + struct dc_state *state, + struct dc_plane_state *main_plane); + +/* deallocate's phantom stream or plane */ +void dc_state_release_phantom_stream(const struct dc *dc, + struct dc_state *state, + struct dc_stream_state *phantom_stream); +void dc_state_release_phantom_plane(const struct dc *dc, + struct dc_state *state, + struct dc_plane_state *phantom_plane); + +/* add/remove phantom stream to context and generate subvp meta data */ +enum dc_status dc_state_add_phantom_stream(struct dc *dc, + struct dc_state *state, + struct dc_stream_state *phantom_stream, + struct dc_stream_state *main_stream); +enum dc_status dc_state_remove_phantom_stream(struct dc *dc, + struct dc_state *state, + struct dc_stream_state *phantom_stream); + +bool dc_state_add_phantom_plane( + const struct dc *dc, + struct dc_stream_state *phantom_stream, + struct dc_plane_state *phantom_plane, + struct dc_state *state); + +bool dc_state_remove_phantom_plane( + const struct dc *dc, + struct dc_stream_state *phantom_stream, + struct dc_plane_state *phantom_plane, + struct dc_state *state); + +bool dc_state_rem_all_phantom_planes_for_stream( + const struct dc *dc, + struct dc_stream_state *phantom_stream, + struct dc_state *state, + bool should_release_planes); + +bool dc_state_add_all_phantom_planes_for_stream( + const struct dc *dc, + struct dc_stream_state *phantom_stream, + struct dc_plane_state * const *phantom_planes, + int plane_count, + struct dc_state *state); + +bool dc_state_remove_phantom_streams_and_planes( + struct dc *dc, + struct dc_state *state); + +void dc_state_release_phantom_streams_and_planes( + struct dc *dc, + struct dc_state *state); + +#endif /* _DC_STATE_PRIV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index e61eea6db29c..a23eebd9933b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -38,6 +38,14 @@ struct timing_sync_info { bool master; }; +struct mall_stream_config { + /* MALL stream config to indicate if the stream is phantom or not. + * We will use a phantom stream to indicate that the pipe is phantom. + */ + enum mall_stream_type type; + struct dc_stream_state *paired_stream; // master / slave stream +}; + struct dc_stream_status { int primary_otg_inst; int stream_enc_inst; @@ -50,6 +58,7 @@ struct dc_stream_status { struct timing_sync_info timing_sync_info; struct dc_plane_state *plane_states[MAX_SURFACE_NUM]; bool is_abm_supported; + struct mall_stream_config mall_stream_config; }; enum hubp_dmdata_mode { @@ -130,7 +139,6 @@ union stream_update_flags { uint32_t wb_update:1; uint32_t dsc_changed : 1; uint32_t mst_bw : 1; - uint32_t crtc_timing_adjust : 1; uint32_t fams_changed : 1; } bits; @@ -147,31 +155,6 @@ struct test_pattern { #define SUBVP_DRR_MARGIN_US 100 // 100us for DRR margin (SubVP + DRR) -enum mall_stream_type { - SUBVP_NONE, // subvp not in use - SUBVP_MAIN, // subvp in use, this stream is main stream - SUBVP_PHANTOM, // subvp in use, this stream is a phantom stream -}; - -struct mall_stream_config { - /* MALL stream config to indicate if the stream is phantom or not. - * We will use a phantom stream to indicate that the pipe is phantom. - */ - enum mall_stream_type type; - struct dc_stream_state *paired_stream; // master / slave stream -}; - -/* Temp struct used to save and restore MALL config - * during validation. - * - * TODO: Move MALL config into dc_state instead of stream struct - * to avoid needing to save/restore. - */ -struct mall_temp_config { - struct mall_stream_config mall_stream_config[MAX_PIPES]; - bool is_phantom_plane[MAX_PIPES]; -}; - struct dc_stream_debug_options { char force_odm_combine_segments; }; @@ -301,7 +284,7 @@ struct dc_stream_state { bool has_non_synchronizable_pclk; bool vblank_synchronized; bool fpo_in_use; - struct mall_stream_config mall_stream_config; + bool is_phantom; }; #define ABM_LEVEL_IMMEDIATE_DISABLE 255 @@ -342,7 +325,6 @@ struct dc_stream_update { struct dc_3dlut *lut3d_func; struct test_pattern *pending_test_pattern; - struct dc_crtc_timing_adjust *crtc_timing_adjust; }; bool dc_is_stream_unchanged( @@ -415,45 +397,14 @@ bool dc_stream_get_scanoutpos(const struct dc_stream_state *stream, uint32_t *h_position, uint32_t *v_position); -enum dc_status dc_add_stream_to_ctx( - struct dc *dc, - struct dc_state *new_ctx, - struct dc_stream_state *stream); - -enum dc_status dc_remove_stream_from_ctx( - struct dc *dc, - struct dc_state *new_ctx, - struct dc_stream_state *stream); - - -bool dc_add_plane_to_context( - const struct dc *dc, - struct dc_stream_state *stream, - struct dc_plane_state *plane_state, - struct dc_state *context); - -bool dc_remove_plane_from_context( - const struct dc *dc, - struct dc_stream_state *stream, - struct dc_plane_state *plane_state, - struct dc_state *context); - -bool dc_rem_all_planes_for_stream( - const struct dc *dc, - struct dc_stream_state *stream, - struct dc_state *context); - -bool dc_add_all_planes_for_stream( - const struct dc *dc, - struct dc_stream_state *stream, - struct dc_plane_state * const *plane_states, - int plane_count, - struct dc_state *context); - bool dc_stream_add_writeback(struct dc *dc, struct dc_stream_state *stream, struct dc_writeback_info *wb_info); +bool dc_stream_fc_disable_writeback(struct dc *dc, + struct dc_stream_state *stream, + uint32_t dwb_pipe_inst); + bool dc_stream_remove_writeback(struct dc *dc, struct dc_stream_state *stream, uint32_t dwb_pipe_inst); @@ -514,9 +465,6 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink); void dc_stream_retain(struct dc_stream_state *dc_stream); void dc_stream_release(struct dc_stream_state *dc_stream); -struct dc_stream_status *dc_stream_get_status_from_state( - struct dc_state *state, - struct dc_stream_state *stream); struct dc_stream_status *dc_stream_get_status( struct dc_stream_state *dc_stream); diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h b/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h new file mode 100644 index 000000000000..7476fd52ce2b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h @@ -0,0 +1,37 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DC_STREAM_PRIV_H_ +#define _DC_STREAM_PRIV_H_ + +#include "dc_stream.h" + +bool dc_stream_construct(struct dc_stream_state *stream, + struct dc_sink *dc_sink_data); +void dc_stream_destruct(struct dc_stream_state *stream); + +void dc_stream_assign_stream_id(struct dc_stream_state *stream); + +#endif // _DC_STREAM_PRIV_H_ diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 35d146217aef..4f276169e05a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1018,6 +1018,24 @@ enum replay_coasting_vtotal_type { PR_COASTING_TYPE_NUM, }; +enum replay_link_off_frame_count_level { + PR_LINK_OFF_FRAME_COUNT_FAIL = 0x0, + PR_LINK_OFF_FRAME_COUNT_GOOD = 0x2, + PR_LINK_OFF_FRAME_COUNT_BEST = 0x6, +}; + +/* + * This is general Interface for Replay to + * set an 32 bit variable to dmub + * The Message_type indicates which variable + * passed to DMUB. + */ +enum replay_FW_Message_type { + Replay_Msg_Not_Support = -1, + Replay_Set_Timing_Sync_Supported, + Replay_Set_Residency_Frameupdate_Timer, +}; + union replay_error_status { struct { unsigned char STATE_TRANSITION_ERROR :1; @@ -1029,26 +1047,48 @@ union replay_error_status { }; struct replay_config { - bool replay_supported; // Replay feature is supported - unsigned int replay_power_opt_supported; // Power opt flags that are supported - bool replay_smu_opt_supported; // SMU optimization is supported - unsigned int replay_enable_option; // Replay enablement option - uint32_t debug_flags; // Replay debug flags - bool replay_timing_sync_supported; // Replay desync is supported - bool force_disable_desync_error_check; // Replay desync is supported - bool received_desync_error_hpd; //Replay Received Desync Error HPD. - union replay_error_status replay_error_status; // Replay error status -}; - -/* Replay feature flags */ + /* Replay feature is supported */ + bool replay_supported; + /* Power opt flags that are supported */ + unsigned int replay_power_opt_supported; + /* SMU optimization is supported */ + bool replay_smu_opt_supported; + /* Replay enablement option */ + unsigned int replay_enable_option; + /* Replay debug flags */ + uint32_t debug_flags; + /* Replay sync is supported */ + bool replay_timing_sync_supported; + /* Replay Disable desync error check. */ + bool force_disable_desync_error_check; + /* Replay Received Desync Error HPD. */ + bool received_desync_error_hpd; + /* Replay feature is supported long vblank */ + bool replay_support_fast_resync_in_ultra_sleep_mode; + /* Replay error status */ + union replay_error_status replay_error_status; +}; + +/* Replay feature flags*/ struct replay_settings { - struct replay_config config; // Replay configuration - bool replay_feature_enabled; // Replay feature is ready for activating - bool replay_allow_active; // Replay is currently active - unsigned int replay_power_opt_active; // Power opt flags that are activated currently - bool replay_smu_opt_enable; // SMU optimization is enabled - uint16_t coasting_vtotal; // Current Coasting vtotal - uint16_t coasting_vtotal_table[PR_COASTING_TYPE_NUM]; // Coasting vtotal table + /* Replay configuration */ + struct replay_config config; + /* Replay feature is ready for activating */ + bool replay_feature_enabled; + /* Replay is currently active */ + bool replay_allow_active; + /* Replay is currently active */ + bool replay_allow_long_vblank; + /* Power opt flags that are activated currently */ + unsigned int replay_power_opt_active; + /* SMU optimization is enabled */ + bool replay_smu_opt_enable; + /* Current Coasting vtotal */ + uint16_t coasting_vtotal; + /* Coasting vtotal table */ + uint16_t coasting_vtotal_table[PR_COASTING_TYPE_NUM]; + /* Maximum link off frame count */ + enum replay_link_off_frame_count_level link_off_frame_count_level; }; /* To split out "global" and "per-panel" config settings. @@ -1111,6 +1151,8 @@ struct dc_dpia_bw_alloc { int bw_granularity; // BW Granularity bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3: DP-Tx & Dpia & CM bool response_ready; // Response ready from the CM side + uint8_t nrd_max_lane_count; // Non-reduced max lane count + uint8_t nrd_max_link_rate; // Non-reduced max link rate }; #define MAX_SINKS_PER_LINK 4 @@ -1121,4 +1163,9 @@ enum dc_hpd_enable_select { HPD_EN_FOR_SECONDARY_EDP_ONLY, }; +enum mall_stream_type { + SUBVP_NONE, // subvp not in use + SUBVP_MAIN, // subvp in use, this stream is main stream + SUBVP_PHANTOM, // subvp in use, this stream is a phantom stream +}; #endif /* DC_TYPES_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c index 874b132fe1d7..a6006776333d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c @@ -135,7 +135,7 @@ static void dmcu_set_backlight_level( 0, 1, 80000); } -static void dce_abm_init(struct abm *abm, uint32_t backlight) +static void dce_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level) { struct dce_abm *abm_dce = TO_DCE_ABM(abm); @@ -162,7 +162,7 @@ static void dce_abm_init(struct abm *abm, uint32_t backlight) BL1_PWM_TARGET_ABM_LEVEL, backlight); REG_UPDATE(BL1_PWM_USER_LEVEL, - BL1_PWM_USER_LEVEL, backlight); + BL1_PWM_USER_LEVEL, user_level); REG_UPDATE_2(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, ABM1_LS_MIN_PIXEL_VALUE_THRES, 0, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c index d3e6544022b7..ccc154b0281c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c @@ -57,18 +57,22 @@ static unsigned int abm_feature_support(struct abm *abm, unsigned int panel_inst return ret; } -static void dmub_abm_init_ex(struct abm *abm, uint32_t backlight) +static void dmub_abm_init_ex(struct abm *abm, uint32_t backlight, uint32_t user_level) { - dmub_abm_init(abm, backlight); + dmub_abm_init(abm, backlight, user_level); } static unsigned int dmub_abm_get_current_backlight_ex(struct abm *abm) { + dc_allow_idle_optimizations(abm->ctx->dc, false); + return dmub_abm_get_current_backlight(abm); } static unsigned int dmub_abm_get_target_backlight_ex(struct abm *abm) { + dc_allow_idle_optimizations(abm->ctx->dc, false); + return dmub_abm_get_target_backlight(abm); } @@ -145,7 +149,11 @@ static bool dmub_abm_save_restore_ex( return ret; } -static bool dmub_abm_set_pipe_ex(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst) +static bool dmub_abm_set_pipe_ex(struct abm *abm, + uint32_t otg_inst, + uint32_t option, + uint32_t panel_inst, + uint32_t pwrseq_inst) { bool ret = false; unsigned int feature_support; @@ -153,7 +161,7 @@ static bool dmub_abm_set_pipe_ex(struct abm *abm, uint32_t otg_inst, uint32_t op feature_support = abm_feature_support(abm, panel_inst); if (feature_support == ABM_LCD_SUPPORT) - ret = dmub_abm_set_pipe(abm, otg_inst, option, panel_inst); + ret = dmub_abm_set_pipe(abm, otg_inst, option, panel_inst, pwrseq_inst); return ret; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c index 592a8f7a1c6d..f9d6a181164a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c @@ -76,10 +76,10 @@ static void dmub_abm_enable_fractional_pwm(struct dc_context *dc) cmd.abm_set_pwm_frac.abm_set_pwm_frac_data.panel_mask = panel_mask; cmd.abm_set_pwm_frac.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pwm_frac_data); - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } -void dmub_abm_init(struct abm *abm, uint32_t backlight) +void dmub_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level) { struct dce_abm *dce_abm = TO_DMUB_ABM(abm); @@ -106,7 +106,7 @@ void dmub_abm_init(struct abm *abm, uint32_t backlight) BL1_PWM_TARGET_ABM_LEVEL, backlight); REG_UPDATE(BL1_PWM_USER_LEVEL, - BL1_PWM_USER_LEVEL, backlight); + BL1_PWM_USER_LEVEL, user_level); REG_UPDATE_2(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, ABM1_LS_MIN_PIXEL_VALUE_THRES, 0, @@ -155,7 +155,7 @@ bool dmub_abm_set_level(struct abm *abm, uint32_t level, uint8_t panel_mask) cmd.abm_set_level.abm_set_level_data.panel_mask = panel_mask; cmd.abm_set_level.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_level_data); - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } @@ -186,7 +186,7 @@ void dmub_abm_init_config(struct abm *abm, cmd.abm_init_config.header.payload_bytes = sizeof(struct dmub_cmd_abm_init_config_data); - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } @@ -203,7 +203,7 @@ bool dmub_abm_set_pause(struct abm *abm, bool pause, unsigned int panel_inst, un cmd.abm_pause.abm_pause_data.panel_mask = panel_mask; cmd.abm_set_level.header.payload_bytes = sizeof(struct dmub_cmd_abm_pause_data); - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } @@ -246,7 +246,7 @@ bool dmub_abm_save_restore( cmd.abm_save_restore.header.payload_bytes = sizeof(struct dmub_rb_cmd_abm_save_restore); - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); // Copy iramtable data into local structure memcpy((void *)pData, dc->dmub_srv->dmub->scratch_mem_fb.cpu_addr, bytes); @@ -254,7 +254,11 @@ bool dmub_abm_save_restore( return true; } -bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst) +bool dmub_abm_set_pipe(struct abm *abm, + uint32_t otg_inst, + uint32_t option, + uint32_t panel_inst, + uint32_t pwrseq_inst) { union dmub_rb_cmd cmd; struct dc_context *dc = abm->ctx; @@ -264,12 +268,13 @@ bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint cmd.abm_set_pipe.header.type = DMUB_CMD__ABM; cmd.abm_set_pipe.header.sub_type = DMUB_CMD__ABM_SET_PIPE; cmd.abm_set_pipe.abm_set_pipe_data.otg_inst = otg_inst; + cmd.abm_set_pipe.abm_set_pipe_data.pwrseq_inst = pwrseq_inst; cmd.abm_set_pipe.abm_set_pipe_data.set_pipe_option = option; cmd.abm_set_pipe.abm_set_pipe_data.panel_inst = panel_inst; cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary; cmd.abm_set_pipe.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pipe_data); - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } @@ -291,7 +296,7 @@ bool dmub_abm_set_backlight_level(struct abm *abm, cmd.abm_set_backlight.abm_set_backlight_data.panel_mask = (0x01 << panel_inst); cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data); - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h index 853564d7f471..761685e5b8c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h @@ -30,7 +30,7 @@ struct abm_save_restore; -void dmub_abm_init(struct abm *abm, uint32_t backlight); +void dmub_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level); bool dmub_abm_set_level(struct abm *abm, uint32_t level, uint8_t panel_mask); unsigned int dmub_abm_get_current_backlight(struct abm *abm); unsigned int dmub_abm_get_target_backlight(struct abm *abm); @@ -44,7 +44,7 @@ bool dmub_abm_save_restore( struct dc_context *dc, unsigned int panel_inst, struct abm_save_restore *pData); -bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst); +bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst, uint32_t pwrseq_inst); bool dmub_abm_set_backlight_level(struct abm *abm, unsigned int backlight_pwm_u16_16, unsigned int frame_ramp, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c index 2aa0e01a6891..ba1fec3016d5 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c @@ -47,7 +47,7 @@ void dmub_hw_lock_mgr_cmd(struct dc_dmub_srv *dmub_srv, if (!lock) cmd.lock_hw.lock_hw_data.should_release = 1; - dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c index d8009b2dc56a..98a778996e1a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c @@ -48,5 +48,5 @@ void dmub_enable_outbox_notification(struct dc_dmub_srv *dmub_srv) sizeof(cmd.outbox1_enable.header); cmd.outbox1_enable.enable = true; - dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index 9d4170a356a2..3e243e407bb8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -105,23 +105,18 @@ static enum dc_psr_state convert_psr_state(uint32_t raw_state) */ static void dmub_psr_get_state(struct dmub_psr *dmub, enum dc_psr_state *state, uint8_t panel_inst) { - struct dmub_srv *srv = dmub->ctx->dmub_srv->dmub; uint32_t raw_state = 0; uint32_t retry_count = 0; - enum dmub_status status; do { // Send gpint command and wait for ack - status = dmub_srv_send_gpint_command(srv, DMUB_GPINT__GET_PSR_STATE, panel_inst, 30); - - if (status == DMUB_STATUS_OK) { - // GPINT was executed, get response - dmub_srv_get_gpint_response(srv, &raw_state); + if (dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__GET_PSR_STATE, panel_inst, &raw_state, + DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) { *state = convert_psr_state(raw_state); - } else + } else { // Return invalid state when GPINT times out *state = PSR_STATE_INVALID; - + } } while (++retry_count <= 1000 && *state == PSR_STATE_INVALID); // Assert if max retry hit @@ -171,7 +166,7 @@ static bool dmub_psr_set_version(struct dmub_psr *dmub, struct dc_stream_state * cmd.psr_set_version.psr_set_version_data.panel_inst = panel_inst; cmd.psr_set_version.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_version_data); - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } @@ -199,7 +194,7 @@ static void dmub_psr_enable(struct dmub_psr *dmub, bool enable, bool wait, uint8 cmd.psr_enable.header.payload_bytes = 0; // Send header only - dm_execute_dmub_cmd(dc->dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); /* Below loops 1000 x 500us = 500 ms. * Exit PSR may need to wait 1-2 frames to power up. Timeout after at @@ -248,7 +243,7 @@ static void dmub_psr_set_level(struct dmub_psr *dmub, uint16_t psr_level, uint8_ cmd.psr_set_level.psr_set_level_data.psr_level = psr_level; cmd.psr_set_level.psr_set_level_data.cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1; cmd.psr_set_level.psr_set_level_data.panel_inst = panel_inst; - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } /* @@ -267,7 +262,7 @@ static void dmub_psr_set_sink_vtotal_in_psr_active(struct dmub_psr *dmub, cmd.psr_set_vtotal.psr_set_vtotal_data.psr_vtotal_idle = psr_vtotal_idle; cmd.psr_set_vtotal.psr_set_vtotal_data.psr_vtotal_su = psr_vtotal_su; - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } /* @@ -286,7 +281,7 @@ static void dmub_psr_set_power_opt(struct dmub_psr *dmub, unsigned int power_opt cmd.psr_set_power_opt.psr_set_power_opt_data.power_opt = power_opt; cmd.psr_set_power_opt.psr_set_power_opt_data.panel_inst = panel_inst; - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } /* @@ -423,7 +418,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, copy_settings_data->relock_delay_frame_cnt = 2; copy_settings_data->dsc_slice_height = psr_context->dsc_slice_height; - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } @@ -444,7 +439,7 @@ static void dmub_psr_force_static(struct dmub_psr *dmub, uint8_t panel_inst) cmd.psr_force_static.header.sub_type = DMUB_CMD__PSR_FORCE_STATIC; cmd.psr_enable.header.payload_bytes = 0; - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } /* @@ -452,13 +447,11 @@ static void dmub_psr_force_static(struct dmub_psr *dmub, uint8_t panel_inst) */ static void dmub_psr_get_residency(struct dmub_psr *dmub, uint32_t *residency, uint8_t panel_inst) { - struct dmub_srv *srv = dmub->ctx->dmub_srv->dmub; uint16_t param = (uint16_t)(panel_inst << 8); /* Send gpint command and wait for ack */ - dmub_srv_send_gpint_command(srv, DMUB_GPINT__PSR_RESIDENCY, param, 30); - - dmub_srv_get_gpint_response(srv, residency); + dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__PSR_RESIDENCY, param, residency, + DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY); } static const struct dmub_psr_funcs psr_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index 28149e53c2a6..38e4797e9476 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -258,13 +258,97 @@ static void dmub_replay_residency(struct dmub_replay *dmub, uint8_t panel_inst, *residency = 0; } +/** + * Set REPLAY power optimization flags and coasting vtotal. + */ +static void dmub_replay_set_power_opt_and_coasting_vtotal(struct dmub_replay *dmub, + unsigned int power_opt, uint8_t panel_inst, uint16_t coasting_vtotal) +{ + union dmub_rb_cmd cmd; + struct dc_context *dc = dmub->ctx; + + memset(&cmd, 0, sizeof(cmd)); + cmd.replay_set_power_opt_and_coasting_vtotal.header.type = DMUB_CMD__REPLAY; + cmd.replay_set_power_opt_and_coasting_vtotal.header.sub_type = + DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL; + cmd.replay_set_power_opt_and_coasting_vtotal.header.payload_bytes = + sizeof(struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal); + cmd.replay_set_power_opt_and_coasting_vtotal.replay_set_power_opt_data.power_opt = power_opt; + cmd.replay_set_power_opt_and_coasting_vtotal.replay_set_power_opt_data.panel_inst = panel_inst; + cmd.replay_set_power_opt_and_coasting_vtotal.replay_set_coasting_vtotal_data.coasting_vtotal = coasting_vtotal; + + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); +} + +/** + * send Replay general cmd to DMUB. + */ +static void dmub_replay_send_cmd(struct dmub_replay *dmub, + enum replay_FW_Message_type msg, union dmub_replay_cmd_set *cmd_element) +{ + union dmub_rb_cmd cmd; + struct dc_context *ctx = NULL; + + if (dmub == NULL || cmd_element == NULL) + return; + + ctx = dmub->ctx; + if (ctx != NULL) { + + if (msg != Replay_Msg_Not_Support) { + memset(&cmd, 0, sizeof(cmd)); + //Header + cmd.replay_set_timing_sync.header.type = DMUB_CMD__REPLAY; + } else + return; + } else + return; + + switch (msg) { + case Replay_Set_Timing_Sync_Supported: + //Header + cmd.replay_set_timing_sync.header.sub_type = + DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED; + cmd.replay_set_timing_sync.header.payload_bytes = + sizeof(struct dmub_rb_cmd_replay_set_timing_sync); + //Cmd Body + cmd.replay_set_timing_sync.replay_set_timing_sync_data.panel_inst = + cmd_element->sync_data.panel_inst; + cmd.replay_set_timing_sync.replay_set_timing_sync_data.timing_sync_supported = + cmd_element->sync_data.timing_sync_supported; + break; + case Replay_Set_Residency_Frameupdate_Timer: + //Header + cmd.replay_set_frameupdate_timer.header.sub_type = + DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER; + cmd.replay_set_frameupdate_timer.header.payload_bytes = + sizeof(struct dmub_rb_cmd_replay_set_frameupdate_timer); + //Cmd Body + cmd.replay_set_frameupdate_timer.data.panel_inst = + cmd_element->panel_inst; + cmd.replay_set_frameupdate_timer.data.enable = + cmd_element->timer_data.enable; + cmd.replay_set_frameupdate_timer.data.frameupdate_count = + cmd_element->timer_data.frameupdate_count; + break; + case Replay_Msg_Not_Support: + default: + return; + break; + } + + dc_wake_and_execute_dmub_cmd(ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); +} + static const struct dmub_replay_funcs replay_funcs = { - .replay_copy_settings = dmub_replay_copy_settings, - .replay_enable = dmub_replay_enable, - .replay_get_state = dmub_replay_get_state, - .replay_set_power_opt = dmub_replay_set_power_opt, - .replay_set_coasting_vtotal = dmub_replay_set_coasting_vtotal, - .replay_residency = dmub_replay_residency, + .replay_copy_settings = dmub_replay_copy_settings, + .replay_enable = dmub_replay_enable, + .replay_get_state = dmub_replay_get_state, + .replay_set_power_opt = dmub_replay_set_power_opt, + .replay_set_coasting_vtotal = dmub_replay_set_coasting_vtotal, + .replay_residency = dmub_replay_residency, + .replay_set_power_opt_and_coasting_vtotal = dmub_replay_set_power_opt_and_coasting_vtotal, + .replay_send_cmd = dmub_replay_send_cmd, }; /* diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h index e8385bbf51fc..3613aff994d7 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h @@ -45,10 +45,14 @@ struct dmub_replay_funcs { struct replay_context *replay_context, uint8_t panel_inst); void (*replay_set_power_opt)(struct dmub_replay *dmub, unsigned int power_opt, uint8_t panel_inst); + void (*replay_send_cmd)(struct dmub_replay *dmub, + enum replay_FW_Message_type msg, union dmub_replay_cmd_set *cmd_element); void (*replay_set_coasting_vtotal)(struct dmub_replay *dmub, uint16_t coasting_vtotal, uint8_t panel_inst); void (*replay_residency)(struct dmub_replay *dmub, uint8_t panel_inst, uint32_t *residency, const bool is_start, const bool is_alpm); + void (*replay_set_power_opt_and_coasting_vtotal)(struct dmub_replay *dmub, + unsigned int power_opt, uint8_t panel_inst, uint16_t coasting_vtotal); }; struct dmub_replay *dmub_replay_create(struct dc_context *ctx); diff --git a/drivers/gpu/drm/amd/display/dc/dce100/Makefile b/drivers/gpu/drm/amd/display/dc/dce100/Makefile deleted file mode 100644 index 0d2f6bbf7558..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dce100/Makefile +++ /dev/null @@ -1,46 +0,0 @@ -# -# Copyright 2017 Advanced Micro Devices, Inc. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# -# Makefile for the 'controller' sub-component of DAL. -# It provides the control and status of HW CRTC block. - -CFLAGS_$(AMDDALPATH)/dc/dce100/dce100_resource.o = $(call cc-disable-warning, override-init) - -DCE100 = dce100_resource.o - -AMD_DAL_DCE100 = $(addprefix $(AMDDALPATH)/dc/dce100/,$(DCE100)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DCE100) - - -############################################################################### -# DCE 10x -############################################################################### -ifdef 0#CONFIG_DRM_AMD_DC_DCE11_0 -TG_DCE100 = dce100_resource.o - -AMD_DAL_TG_DCE100 = $(addprefix \ - $(AMDDALPATH)/dc/dce100/,$(TG_DCE100)) - -AMD_DISPLAY_FILES += $(AMD_DAL_TG_DCE100) -endif - diff --git a/drivers/gpu/drm/amd/display/dc/dce110/Makefile b/drivers/gpu/drm/amd/display/dc/dce110/Makefile index 695a50ed5ad2..f0777d61c2cb 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce110/Makefile @@ -26,8 +26,8 @@ CFLAGS_$(AMDDALPATH)/dc/dce110/dce110_resource.o = $(call cc-disable-warning, override-init) DCE110 = dce110_timing_generator.o \ -dce110_compressor.o dce110_resource.o \ -dce110_opp_regamma_v.o dce110_opp_csc_v.o dce110_timing_generator_v.o \ +dce110_compressor.o dce110_opp_regamma_v.o \ +dce110_opp_csc_v.o dce110_timing_generator_v.o \ dce110_mem_input_v.o dce110_opp_v.o dce110_transform_v.o AMD_DAL_DCE110 = $(addprefix $(AMDDALPATH)/dc/dce110/,$(DCE110)) diff --git a/drivers/gpu/drm/amd/display/dc/dce112/Makefile b/drivers/gpu/drm/amd/display/dc/dce112/Makefile index e846ef58cab3..7e92effec894 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce112/Makefile @@ -25,8 +25,7 @@ CFLAGS_$(AMDDALPATH)/dc/dce112/dce112_resource.o = $(call cc-disable-warning, override-init) -DCE112 = dce112_compressor.o \ -dce112_resource.o +DCE112 = dce112_compressor.o AMD_DAL_DCE112 = $(addprefix $(AMDDALPATH)/dc/dce112/,$(DCE112)) diff --git a/drivers/gpu/drm/amd/display/dc/dce120/Makefile b/drivers/gpu/drm/amd/display/dc/dce120/Makefile index 097cf407a15d..1e3ef68a452a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce120/Makefile @@ -26,7 +26,7 @@ CFLAGS_$(AMDDALPATH)/dc/dce120/dce120_resource.o = $(call cc-disable-warning, override-init) -DCE120 = dce120_resource.o dce120_timing_generator.o \ +DCE120 = dce120_timing_generator.o AMD_DAL_DCE120 = $(addprefix $(AMDDALPATH)/dc/dce120/,$(DCE120)) diff --git a/drivers/gpu/drm/amd/display/dc/dce80/Makefile b/drivers/gpu/drm/amd/display/dc/dce80/Makefile index 93dd68c31275..7eefffbdc925 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce80/Makefile @@ -25,8 +25,7 @@ CFLAGS_$(AMDDALPATH)/dc/dce80/dce80_resource.o = $(call cc-disable-warning, override-init) -DCE80 = dce80_timing_generator.o \ - dce80_resource.o +DCE80 = dce80_timing_generator.o AMD_DAL_DCE80 = $(addprefix $(AMDDALPATH)/dc/dce80/,$(DCE80)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile index 2d2007c3e2b6..ae6a131be71b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile @@ -22,9 +22,9 @@ # # Makefile for DCN. -DCN10 = dcn10_init.o dcn10_resource.o dcn10_ipp.o \ +DCN10 = dcn10_ipp.o \ dcn10_hw_sequencer_debug.o \ - dcn10_dpp.o dcn10_opp.o dcn10_optc.o \ + dcn10_dpp.o dcn10_opp.o \ dcn10_hubp.o dcn10_mpc.o \ dcn10_dpp_dscl.o dcn10_dpp_cm.o dcn10_cm_common.o \ dcn10_hubbub.o dcn10_stream_encoder.o dcn10_link_encoder.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c index 92fdab731f4a..9033b39e0e0c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c @@ -32,7 +32,7 @@ #include "dce/dce_hwseq.h" #include "abm.h" #include "dmcu.h" -#include "dcn10_optc.h" +#include "dcn10/dcn10_optc.h" #include "dcn10/dcn10_dpp.h" #include "dcn10/dcn10_mpc.h" #include "timing_generator.h" diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index d7dc9696a8c8..3dae3943b056 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -2,13 +2,11 @@ # # Makefile for DCN. -DCN20 = dcn20_resource.o dcn20_init.o dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \ - dcn20_mpc.o dcn20_opp.o dcn20_hubbub.o dcn20_optc.o dcn20_mmhubbub.o \ +DCN20 = dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \ + dcn20_mpc.o dcn20_opp.o dcn20_hubbub.o dcn20_mmhubbub.o \ dcn20_stream_encoder.o dcn20_link_encoder.o dcn20_dccg.o \ dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o -DCN20 += dcn20_dsc.o - AMD_DAL_DCN20 = $(addprefix $(AMDDALPATH)/dc/dcn20/,$(DCN20)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN20) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h index ab6d09c6fe34..ef5c22f41563 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h @@ -291,7 +291,43 @@ type SYMCLKB_FE_SRC_SEL;\ type SYMCLKC_FE_SRC_SEL;\ type SYMCLKD_FE_SRC_SEL;\ - type SYMCLKE_FE_SRC_SEL; + type SYMCLKE_FE_SRC_SEL;\ + type DTBCLK_P0_GATE_DISABLE;\ + type DTBCLK_P1_GATE_DISABLE;\ + type DTBCLK_P2_GATE_DISABLE;\ + type DTBCLK_P3_GATE_DISABLE;\ + type DSCCLK0_ROOT_GATE_DISABLE;\ + type DSCCLK1_ROOT_GATE_DISABLE;\ + type DSCCLK2_ROOT_GATE_DISABLE;\ + type DSCCLK3_ROOT_GATE_DISABLE;\ + type SYMCLKA_FE_ROOT_GATE_DISABLE;\ + type SYMCLKB_FE_ROOT_GATE_DISABLE;\ + type SYMCLKC_FE_ROOT_GATE_DISABLE;\ + type SYMCLKD_FE_ROOT_GATE_DISABLE;\ + type SYMCLKE_FE_ROOT_GATE_DISABLE;\ + type DPPCLK0_ROOT_GATE_DISABLE;\ + type DPPCLK1_ROOT_GATE_DISABLE;\ + type DPPCLK2_ROOT_GATE_DISABLE;\ + type DPPCLK3_ROOT_GATE_DISABLE;\ + type HDMISTREAMCLK0_ROOT_GATE_DISABLE;\ + type SYMCLKA_ROOT_GATE_DISABLE;\ + type SYMCLKB_ROOT_GATE_DISABLE;\ + type SYMCLKC_ROOT_GATE_DISABLE;\ + type SYMCLKD_ROOT_GATE_DISABLE;\ + type SYMCLKE_ROOT_GATE_DISABLE;\ + type PHYA_REFCLK_ROOT_GATE_DISABLE;\ + type PHYB_REFCLK_ROOT_GATE_DISABLE;\ + type PHYC_REFCLK_ROOT_GATE_DISABLE;\ + type PHYD_REFCLK_ROOT_GATE_DISABLE;\ + type PHYE_REFCLK_ROOT_GATE_DISABLE;\ + type DPSTREAMCLK0_ROOT_GATE_DISABLE;\ + type DPSTREAMCLK1_ROOT_GATE_DISABLE;\ + type DPSTREAMCLK2_ROOT_GATE_DISABLE;\ + type DPSTREAMCLK3_ROOT_GATE_DISABLE;\ + type DPSTREAMCLK0_GATE_DISABLE;\ + type DPSTREAMCLK1_GATE_DISABLE;\ + type DPSTREAMCLK2_GATE_DISABLE;\ + type DPSTREAMCLK3_GATE_DISABLE;\ struct dccg_shift { DCCG_REG_FIELD_LIST(uint8_t) diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile index 3a41a97b0729..2b0b4f32e13b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile @@ -1,9 +1,8 @@ # SPDX-License-Identifier: MIT # # Makefile for DCN. -DCN201 = dcn201_init.o dcn201_resource.o \ - dcn201_hubbub.o\ - dcn201_mpc.o dcn201_hubp.o dcn201_opp.o dcn201_optc.o dcn201_dpp.o \ +DCN201 = dcn201_hubbub.o\ + dcn201_mpc.o dcn201_hubp.o dcn201_opp.o dcn201_dpp.o \ dcn201_dccg.o dcn201_link_encoder.o AMD_DAL_DCN201 = $(addprefix $(AMDDALPATH)/dc/dcn201/,$(DCN201)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index ce1be0afae4a..ca92f5c8e7fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -2,7 +2,7 @@ # # Makefile for DCN21. -DCN21 = dcn21_init.o dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o \ +DCN21 = dcn21_hubp.o dcn21_hubbub.o \ dcn21_link_encoder.o dcn21_dccg.o AMD_DAL_DCN21 = $(addprefix $(AMDDALPATH)/dc/dcn21/,$(DCN21)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c index 68cad55c72ab..e13d69a22c1c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c @@ -691,7 +691,7 @@ static void dmcub_PLAT_54186_wa(struct hubp *hubp, cmd.PLAT_54186_wa.flip.flip_params.vmid = flip_regs->vmid; PERF_TRACE(); // TODO: remove after performance is stable. - dm_execute_dmub_cmd(hubp->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(hubp->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); PERF_TRACE(); // TODO: remove after performance is stable. } diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index af4d2065d2c1..b5b2aa3b3783 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -23,12 +23,9 @@ # # -DCN30 := \ - dcn30_init.o \ - dcn30_hubbub.o \ +DCN30 := dcn30_hubbub.o \ dcn30_hubp.o \ dcn30_dpp.o \ - dcn30_optc.o \ dcn30_dccg.o \ dcn30_mpc.o dcn30_vpg.o \ dcn30_afmt.o \ @@ -38,7 +35,6 @@ DCN30 := \ dcn30_dwb_cm.o \ dcn30_cm_common.o \ dcn30_mmhubbub.o \ - dcn30_resource.o \ dcn30_dio_link_encoder.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c index 0d98918bf0fc..1b9d9495f76d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c @@ -130,6 +130,28 @@ bool dwb3_disable(struct dwbc *dwbc) return true; } +void dwb3_set_fc_enable(struct dwbc *dwbc, enum dwb_frame_capture_enable enable) +{ + struct dcn30_dwbc *dwbc30 = TO_DCN30_DWBC(dwbc); + unsigned int pre_locked; + + REG_GET(DWB_UPDATE_CTRL, DWB_UPDATE_LOCK, &pre_locked); + + /* Lock DWB registers */ + if (pre_locked == 0) + REG_UPDATE(DWB_UPDATE_CTRL, DWB_UPDATE_LOCK, 1); + + /* Disable FC */ + REG_UPDATE(FC_MODE_CTRL, FC_FRAME_CAPTURE_EN, enable); + + /* Unlock DWB registers */ + if (pre_locked == 0) + REG_UPDATE(DWB_UPDATE_CTRL, DWB_UPDATE_LOCK, 0); + + DC_LOG_DWB("%s dwb3_fc_disabled at inst = %d", __func__, dwbc->inst); +} + + bool dwb3_update(struct dwbc *dwbc, struct dc_dwb_params *params) { struct dcn30_dwbc *dwbc30 = TO_DCN30_DWBC(dwbc); @@ -226,6 +248,7 @@ static const struct dwbc_funcs dcn30_dwbc_funcs = { .disable = dwb3_disable, .update = dwb3_update, .is_enabled = dwb3_is_enabled, + .set_fc_enable = dwb3_set_fc_enable, .set_stereo = dwb3_set_stereo, .set_new_content = dwb3_set_new_content, .dwb_program_output_csc = NULL, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h index a5d1b81e768d..332634b76aac 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h @@ -877,6 +877,8 @@ bool dwb3_update(struct dwbc *dwbc, struct dc_dwb_params *params); bool dwb3_is_enabled(struct dwbc *dwbc); +void dwb3_set_fc_enable(struct dwbc *dwbc, enum dwb_frame_capture_enable enable); + void dwb3_set_stereo(struct dwbc *dwbc, struct dwb_stereo_params *stereo_params); diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c index 701c7d8bc038..03a50c32fcfe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c @@ -243,6 +243,9 @@ static bool dwb3_program_ogam_lut( return false; } + if (params->hw_points_num == 0) + return false; + REG_SET(DWB_OGAM_CONTROL, 0, DWB_OGAM_MODE, 2); current_mode = dwb3_get_ogam_current(dwbc30); diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile index 30fbc5e06dca..d241f665e40a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile @@ -10,9 +10,8 @@ # # Makefile for dcn30. -DCN301 = dcn301_init.o dcn301_resource.o dcn301_dccg.o \ - dcn301_dio_link_encoder.o dcn301_panel_cntl.o dcn301_hubbub.o \ - dcn301_optc.o +DCN301 = dcn301_dccg.o \ + dcn301_dio_link_encoder.o dcn301_panel_cntl.o dcn301_hubbub.o AMD_DAL_DCN301 = $(addprefix $(AMDDALPATH)/dc/dcn301/,$(DCN301)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile deleted file mode 100644 index 95b66baf39e9..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# -# (c) Copyright 2020 Advanced Micro Devices, Inc. All the rights reserved -# -# Authors: AMD -# -# Makefile for dcn302. - -DCN3_02 = dcn302_init.o dcn302_resource.o - -AMD_DAL_DCN3_02 = $(addprefix $(AMDDALPATH)/dc/dcn302/,$(DCN3_02)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DCN3_02) diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile b/drivers/gpu/drm/amd/display/dc/dcn303/Makefile index d7b3ad780e5d..a954e316aca2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn303/Makefile @@ -6,7 +6,7 @@ # # Makefile for dcn303. -DCN3_03 = dcn303_init.o dcn303_resource.o +DCN3_03 = dcn303_init.o AMD_DAL_DCN3_03 = $(addprefix $(AMDDALPATH)/dc/dcn303/,$(DCN3_03)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile index 96e45c9efb46..5d93ac16c03a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile @@ -10,8 +10,8 @@ # # Makefile for dcn31. -DCN31 = dcn31_resource.o dcn31_hubbub.o dcn31_init.o dcn31_hubp.o \ - dcn31_dccg.o dcn31_optc.o dcn31_dio_link_encoder.o dcn31_panel_cntl.o \ +DCN31 = dcn31_hubbub.o dcn31_hubp.o \ + dcn31_dccg.o dcn31_dio_link_encoder.o dcn31_panel_cntl.o \ dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \ dcn31_afmt.o dcn31_vpg.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c index 4596f3bac1b4..26be5fee7411 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c @@ -125,7 +125,7 @@ static bool query_dp_alt_from_dmub(struct link_encoder *enc, cmd->query_dp_alt.header.payload_bytes = sizeof(cmd->query_dp_alt.data); cmd->query_dp_alt.data.phy_id = phy_id_from_transmitter(enc10->base.transmitter); - if (!dm_execute_dmub_cmd(enc->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) + if (!dc_wake_and_execute_dmub_cmd(enc->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) return false; return true; @@ -436,7 +436,7 @@ static bool link_dpia_control(struct dc_context *dc_ctx, cmd.dig1_dpia_control.dpia_control = *dpia_control; - dm_execute_dmub_cmd(dc_ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c index 217acd4e292a..03248422d6ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c @@ -50,9 +50,9 @@ static bool dcn31_query_backlight_info(struct panel_cntl *panel_cntl, union dmub cmd->panel_cntl.header.type = DMUB_CMD__PANEL_CNTL; cmd->panel_cntl.header.sub_type = DMUB_CMD__PANEL_CNTL_QUERY_BACKLIGHT_INFO; cmd->panel_cntl.header.payload_bytes = sizeof(cmd->panel_cntl.data); - cmd->panel_cntl.data.inst = dcn31_panel_cntl->base.inst; + cmd->panel_cntl.data.pwrseq_inst = dcn31_panel_cntl->base.pwrseq_inst; - return dm_execute_dmub_cmd(dc_dmub_srv->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY); + return dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY); } static uint32_t dcn31_get_16_bit_backlight_from_pwm(struct panel_cntl *panel_cntl) @@ -78,14 +78,14 @@ static uint32_t dcn31_panel_cntl_hw_init(struct panel_cntl *panel_cntl) cmd.panel_cntl.header.type = DMUB_CMD__PANEL_CNTL; cmd.panel_cntl.header.sub_type = DMUB_CMD__PANEL_CNTL_HW_INIT; cmd.panel_cntl.header.payload_bytes = sizeof(cmd.panel_cntl.data); - cmd.panel_cntl.data.inst = dcn31_panel_cntl->base.inst; + cmd.panel_cntl.data.pwrseq_inst = dcn31_panel_cntl->base.pwrseq_inst; cmd.panel_cntl.data.bl_pwm_cntl = panel_cntl->stored_backlight_registers.BL_PWM_CNTL; cmd.panel_cntl.data.bl_pwm_period_cntl = panel_cntl->stored_backlight_registers.BL_PWM_PERIOD_CNTL; cmd.panel_cntl.data.bl_pwm_ref_div1 = panel_cntl->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV; cmd.panel_cntl.data.bl_pwm_ref_div2 = panel_cntl->stored_backlight_registers.PANEL_PWRSEQ_REF_DIV2; - if (!dm_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) + if (!dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) return 0; panel_cntl->stored_backlight_registers.BL_PWM_CNTL = cmd.panel_cntl.data.bl_pwm_cntl; @@ -157,4 +157,5 @@ void dcn31_panel_cntl_construct( dcn31_panel_cntl->base.funcs = &dcn31_link_panel_cntl_funcs; dcn31_panel_cntl->base.ctx = init_data->ctx; dcn31_panel_cntl->base.inst = init_data->inst; + dcn31_panel_cntl->base.pwrseq_inst = init_data->pwrseq_inst; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile index 72456debb99f..b134ab05aa71 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile @@ -10,8 +10,7 @@ # # Makefile for dcn314. -DCN314 = dcn314_resource.o dcn314_init.o \ - dcn314_dio_stream_encoder.o dcn314_dccg.o dcn314_optc.o +DCN314 = dcn314_dio_stream_encoder.o dcn314_dccg.o AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile deleted file mode 100644 index 59381d24800b..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile +++ /dev/null @@ -1,30 +0,0 @@ -# -# Copyright © 2021 Advanced Micro Devices, Inc. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# Authors: AMD -# -# Makefile for dcn315. - -DCN315 = dcn315_resource.o - -AMD_DAL_DCN315 = $(addprefix $(AMDDALPATH)/dc/dcn315/,$(DCN315)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DCN315) diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile deleted file mode 100644 index 819d44a9439b..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile +++ /dev/null @@ -1,30 +0,0 @@ -# -# Copyright 2021 Advanced Micro Devices, Inc. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# Authors: AMD -# -# Makefile for dcn316. - -DCN316 = dcn316_resource.o - -AMD_DAL_DCN316 = $(addprefix $(AMDDALPATH)/dc/dcn316/,$(DCN316)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DCN316) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile index 8bb251307247..5314770fff1c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile @@ -10,10 +10,10 @@ # # Makefile for dcn32. -DCN32 = dcn32_resource.o dcn32_hubbub.o dcn32_init.o dcn32_dccg.o \ - dcn32_dccg.o dcn32_optc.o dcn32_mmhubbub.o dcn32_hubp.o dcn32_dpp.o \ - dcn32_dio_stream_encoder.o dcn32_dio_link_encoder.o dcn32_hpo_dp_link_encoder.o \ - dcn32_resource_helpers.o dcn32_mpc.o +DCN32 = dcn32_hubbub.o dcn32_dccg.o \ + dcn32_mmhubbub.o dcn32_dpp.o dcn32_hubp.o dcn32_mpc.o \ + dcn32_dio_stream_encoder.o dcn32_dio_link_encoder.o dcn32_resource_helpers.o \ + dcn32_hpo_dp_link_encoder.o AMD_DAL_DCN32 = $(addprefix $(AMDDALPATH)/dc/dcn32/,$(DCN32)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c index 994b21ed272f..e789e654c387 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c @@ -71,12 +71,13 @@ void mpc32_power_on_blnd_lut( { struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); + REG_SET(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], 0, MPCC_MCM_1DLUT_MEM_PWR_DIS, power_on); + if (mpc->ctx->dc->debug.enable_mem_low_power.bits.cm) { if (power_on) { REG_UPDATE(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_FORCE, 0); REG_WAIT(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_STATE, 0, 1, 5); } else if (!mpc->ctx->dc->debug.disable_mem_low_power) { - ASSERT(false); /* TODO: change to mpc * dpp_base->ctx->dc->optimized_required = true; * dpp_base->deferred_reg_writes.bits.disable_blnd_lut = true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index bc5f0db23d0c..e4a328b45c8a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -24,10 +24,11 @@ */ // header file of functions being implemented -#include "dcn32_resource.h" +#include "dcn32/dcn32_resource.h" #include "dcn20/dcn20_resource.h" #include "dml/dcn32/display_mode_vba_util_32.h" #include "dml/dcn32/dcn32_fpu.h" +#include "dc_state_priv.h" static bool is_dual_plane(enum surface_pixel_format format) { @@ -182,20 +183,6 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, return true; } -bool dcn32_subvp_in_use(struct dc *dc, - struct dc_state *context) -{ - uint32_t i; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) - return true; - } - return false; -} - bool dcn32_mpo_in_use(struct dc_state *context) { uint32_t i; @@ -264,18 +251,17 @@ static void override_det_for_subvp(struct dc *dc, struct dc_state *context, uint // Do not override if a stream has multiple planes for (i = 0; i < context->stream_count; i++) { - if (context->stream_status[i].plane_count > 1) { + if (context->stream_status[i].plane_count > 1) return; - } - if (context->streams[i]->mall_stream_config.type != SUBVP_PHANTOM) { + + if (dc_state_get_stream_subvp_type(context, context->streams[i]) != SUBVP_PHANTOM) stream_count++; - } } for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - if (pipe_ctx->stream && pipe_ctx->plane_state && pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) { + if (pipe_ctx->stream && pipe_ctx->plane_state && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) { if (dcn32_allow_subvp_high_refresh_rate(dc, context, pipe_ctx)) { if (pipe_ctx->stream->timing.v_addressable == 1080 && pipe_ctx->stream->timing.h_addressable == 1920) { @@ -290,7 +276,7 @@ static void override_det_for_subvp(struct dc *dc, struct dc_state *context, uint for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - if (pipe_ctx->stream && pipe_ctx->plane_state && pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) { + if (pipe_ctx->stream && pipe_ctx->plane_state && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) { if (pipe_ctx->stream->timing.v_addressable == 1080 && pipe_ctx->stream->timing.h_addressable == 1920) { if (pipe_segments[i] > 4) pipe_segments[i] = 4; @@ -337,14 +323,14 @@ void dcn32_determine_det_override(struct dc *dc, for (i = 0; i < context->stream_count; i++) { /* Don't count SubVP streams for DET allocation */ - if (context->streams[i]->mall_stream_config.type != SUBVP_PHANTOM) + if (dc_state_get_stream_subvp_type(context, context->streams[i]) != SUBVP_PHANTOM) stream_count++; } if (stream_count > 0) { stream_segments = 18 / stream_count; for (i = 0; i < context->stream_count; i++) { - if (context->streams[i]->mall_stream_config.type == SUBVP_PHANTOM) + if (dc_state_get_stream_subvp_type(context, context->streams[i]) == SUBVP_PHANTOM) continue; if (context->stream_status[i].plane_count > 0) @@ -430,71 +416,6 @@ void dcn32_set_det_allocations(struct dc *dc, struct dc_state *context, dcn32_determine_det_override(dc, context, pipes); } -/** - * dcn32_save_mall_state(): Save MALL (SubVP) state for fast validation cases - * - * This function saves the MALL (SubVP) case for fast validation cases. For fast validation, - * there are situations where a shallow copy of the dc->current_state is created for the - * validation. In this case we want to save and restore the mall config because we always - * teardown subvp at the beginning of validation (and don't attempt to add it back if it's - * fast validation). If we don't restore the subvp config in cases of fast validation + - * shallow copy of the dc->current_state, the dc->current_state will have a partially - * removed subvp state when we did not intend to remove it. - * - * NOTE: This function ONLY works if the streams are not moved to a different pipe in the - * validation. We don't expect this to happen in fast_validation=1 cases. - * - * @dc: Current DC state - * @context: New DC state to be programmed - * @temp_config: struct used to cache the existing MALL state - * - * Return: void - */ -void dcn32_save_mall_state(struct dc *dc, - struct dc_state *context, - struct mall_temp_config *temp_config) -{ - uint32_t i; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->stream) - temp_config->mall_stream_config[i] = pipe->stream->mall_stream_config; - - if (pipe->plane_state) - temp_config->is_phantom_plane[i] = pipe->plane_state->is_phantom; - } -} - -/** - * dcn32_restore_mall_state(): Restore MALL (SubVP) state for fast validation cases - * - * Restore the MALL state based on the previously saved state from dcn32_save_mall_state - * - * @dc: Current DC state - * @context: New DC state to be programmed, restore MALL state into here - * @temp_config: struct that has the cached MALL state - * - * Return: void - */ -void dcn32_restore_mall_state(struct dc *dc, - struct dc_state *context, - struct mall_temp_config *temp_config) -{ - uint32_t i; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->stream) - pipe->stream->mall_stream_config = temp_config->mall_stream_config[i]; - - if (pipe->plane_state) - pipe->plane_state->is_phantom = temp_config->is_phantom_plane[i]; - } -} - #define MAX_STRETCHED_V_BLANK 1000 // in micro-seconds (must ensure to match value in FW) /* * Scaling factor for v_blank stretch calculations considering timing in @@ -589,13 +510,14 @@ static int get_refresh_rate(struct dc_stream_state *fpo_candidate_stream) * * Return: Pointer to FPO stream candidate if config can support FPO, otherwise NULL */ -struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context) +struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context) { int refresh_rate = 0; const int minimum_refreshrate_supported = 120; struct dc_stream_state *fpo_candidate_stream = NULL; bool is_fpo_vactive = false; uint32_t fpo_vactive_margin_us = 0; + struct dc_stream_status *fpo_stream_status = NULL; if (context == NULL) return NULL; @@ -618,16 +540,28 @@ struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stre DC_FP_START(); dcn32_assign_fpo_vactive_candidate(dc, context, &fpo_candidate_stream); DC_FP_END(); - + if (fpo_candidate_stream) + fpo_stream_status = dc_state_get_stream_status(context, fpo_candidate_stream); DC_FP_START(); is_fpo_vactive = dcn32_find_vactive_pipe(dc, context, dc->debug.fpo_vactive_min_active_margin_us); DC_FP_END(); if (!is_fpo_vactive || dc->debug.disable_fpo_vactive) return NULL; - } else + } else { fpo_candidate_stream = context->streams[0]; + if (fpo_candidate_stream) + fpo_stream_status = dc_state_get_stream_status(context, fpo_candidate_stream); + } - if (!fpo_candidate_stream) + /* In DCN32/321, FPO uses per-pipe P-State force. + * If there's no planes, HUBP is power gated and + * therefore programming UCLK_PSTATE_FORCE does + * nothing (P-State will always be asserted naturally + * on a pipe that has HUBP power gated. Therefore we + * only want to enable FPO if the FPO pipe has both + * a stream and a plane. + */ + if (!fpo_candidate_stream || !fpo_stream_status || fpo_stream_status->plane_count == 0) return NULL; if (fpo_candidate_stream->sink->edid_caps.panel_patch.disable_fams) @@ -666,6 +600,30 @@ bool dcn32_check_native_scaling_for_res(struct pipe_ctx *pipe, unsigned int widt } /** + * disallow_subvp_in_active_plus_blank() - Function to determine disallowed subvp + drr/vblank configs + * + * @pipe: subvp pipe to be used for the subvp + drr/vblank config + * + * Since subvp is being enabled on more configs (such as 1080p60), we want + * to explicitly block any configs that we don't want to enable. We do not + * want to enable any 1080p60 (SubVP) + drr / vblank configs since these + * are already convered by FPO. + * + * Return: True if disallowed, false otherwise + */ +static bool disallow_subvp_in_active_plus_blank(struct pipe_ctx *pipe) +{ + bool disallow = false; + + if (resource_is_pipe_type(pipe, OPP_HEAD) && + resource_is_pipe_type(pipe, DPP_PIPE)) { + if (pipe->stream->timing.v_addressable == 1080 && pipe->stream->timing.h_addressable == 1920) + disallow = true; + } + return disallow; +} + +/** * dcn32_subvp_drr_admissable() - Determine if SubVP + DRR config is admissible * * @dc: Current DC state @@ -688,21 +646,24 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context) bool drr_pipe_found = false; bool drr_psr_capable = false; uint64_t refresh_rate = 0; + bool subvp_disallow = false; for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe); if (resource_is_pipe_type(pipe, OPP_HEAD) && resource_is_pipe_type(pipe, DPP_PIPE)) { - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + if (pipe_mall_type == SUBVP_MAIN) { subvp_count++; + subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe); refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 + pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total); } - if (pipe->stream->mall_stream_config.type == SUBVP_NONE) { + if (pipe_mall_type == SUBVP_NONE) { non_subvp_pipes++; drr_psr_capable = (drr_psr_capable || dcn32_is_psr_capable(pipe)); if (pipe->stream->ignore_msa_timing_param && @@ -713,7 +674,7 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context) } } - if (subvp_count == 1 && non_subvp_pipes == 1 && drr_pipe_found && !drr_psr_capable && + if (subvp_count == 1 && !subvp_disallow && non_subvp_pipes == 1 && drr_pipe_found && !drr_psr_capable && ((uint32_t)refresh_rate < 120)) result = true; @@ -746,21 +707,24 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int struct vba_vars_st *vba = &context->bw_ctx.dml.vba; bool vblank_psr_capable = false; uint64_t refresh_rate = 0; + bool subvp_disallow = false; for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe); if (resource_is_pipe_type(pipe, OPP_HEAD) && resource_is_pipe_type(pipe, DPP_PIPE)) { - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + if (pipe_mall_type == SUBVP_MAIN) { subvp_count++; + subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe); refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 + pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total); } - if (pipe->stream->mall_stream_config.type == SUBVP_NONE) { + if (pipe_mall_type == SUBVP_NONE) { non_subvp_pipes++; vblank_psr_capable = (vblank_psr_capable || dcn32_is_psr_capable(pipe)); if (pipe->stream->ignore_msa_timing_param && @@ -772,9 +736,35 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int } if (subvp_count == 1 && non_subvp_pipes == 1 && !drr_pipe_found && !vblank_psr_capable && - ((uint32_t)refresh_rate < 120) && + ((uint32_t)refresh_rate < 120) && !subvp_disallow && vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vblank_w_mall_sub_vp) result = true; return result; } + +void dcn32_update_dml_pipes_odm_policy_based_on_context(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes) +{ + int i, pipe_cnt; + struct resource_context *res_ctx = &context->res_ctx; + struct pipe_ctx *pipe = NULL; + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + int odm_slice_count = 0; + + if (!res_ctx->pipe_ctx[i].stream) + continue; + pipe = &res_ctx->pipe_ctx[i]; + odm_slice_count = resource_get_odm_slice_count(pipe); + + if (odm_slice_count == 1) + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; + else if (odm_slice_count == 2) + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; + else if (odm_slice_count == 4) + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_4to1; + + pipe_cnt++; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile index 0a199c83bb5b..c195c47f58b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile @@ -10,7 +10,7 @@ # # Makefile for dcn321. -DCN321 = dcn321_resource.o dcn321_dio_link_encoder.o +DCN321 = dcn321_dio_link_encoder.o AMD_DAL_DCN321 = $(addprefix $(AMDDALPATH)/dc/dcn321/,$(DCN321)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile index 20d0eef1a13b..0e317e0c36a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile @@ -10,9 +10,9 @@ # # Makefile for DCN35. -DCN35 = dcn35_resource.o dcn35_init.o dcn35_dio_stream_encoder.o \ - dcn35_dio_link_encoder.o dcn35_dccg.o dcn35_optc.o \ - dcn35_dsc.o dcn35_hubp.o dcn35_hubbub.o \ +DCN35 = dcn35_dio_stream_encoder.o \ + dcn35_dio_link_encoder.o dcn35_dccg.o \ + dcn35_hubp.o dcn35_hubbub.o \ dcn35_mmhubbub.o dcn35_opp.o dcn35_dpp.o dcn35_pg_cntl.o dcn35_dwb.o AMD_DAL_DCN35 = $(addprefix $(AMDDALPATH)/dc/dcn35/,$(DCN35)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c index 479f3683c0b7..f1ba7bb792ea 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c @@ -256,6 +256,21 @@ static void dccg35_set_dtbclk_dto( if (params->ref_dtbclk_khz && req_dtbclk_khz) { uint32_t modulo, phase; + switch (params->otg_inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, 1); + break; + } + // phase / modulo = dtbclk / dtbclk ref modulo = params->ref_dtbclk_khz * 1000; phase = req_dtbclk_khz * 1000; @@ -280,6 +295,21 @@ static void dccg35_set_dtbclk_dto( REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], PIPE_DTO_SRC_SEL[params->otg_inst], 2); } else { + switch (params->otg_inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, 0); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, 0); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, 0); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, 0); + break; + } + REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst], DTBCLK_DTO_ENABLE[params->otg_inst], 0, PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1); @@ -476,6 +506,64 @@ static void dccg35_dpp_root_clock_control( dccg->dpp_clock_gated[dpp_inst] = !clock_on; } +static void dccg35_disable_symclk32_se( + struct dccg *dccg, + int hpo_se_inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + /* set refclk as the source for symclk32_se */ + switch (hpo_se_inst) { + case 0: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE0_SRC_SEL, 0, + SYMCLK32_SE0_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) { + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE0_GATE_DISABLE, 0); +// REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, +// SYMCLK32_ROOT_SE0_GATE_DISABLE, 0); + } + break; + case 1: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE1_SRC_SEL, 0, + SYMCLK32_SE1_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) { + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE1_GATE_DISABLE, 0); +// REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, +// SYMCLK32_ROOT_SE1_GATE_DISABLE, 0); + } + break; + case 2: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE2_SRC_SEL, 0, + SYMCLK32_SE2_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) { + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE2_GATE_DISABLE, 0); +// REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, +// SYMCLK32_ROOT_SE2_GATE_DISABLE, 0); + } + break; + case 3: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE3_SRC_SEL, 0, + SYMCLK32_SE3_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) { + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE3_GATE_DISABLE, 0); +// REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, +// SYMCLK32_ROOT_SE3_GATE_DISABLE, 0); + } + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + void dccg35_init(struct dccg *dccg) { int otg_inst; @@ -484,7 +572,7 @@ void dccg35_init(struct dccg *dccg) * will cause DCN to hang. */ for (otg_inst = 0; otg_inst < 4; otg_inst++) - dccg31_disable_symclk32_se(dccg, otg_inst); + dccg35_disable_symclk32_se(dccg, otg_inst); if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) for (otg_inst = 0; otg_inst < 2; otg_inst++) @@ -758,7 +846,7 @@ static const struct dccg_funcs dccg35_funcs = { .dccg_init = dccg35_init, .set_dpstreamclk = dccg35_set_dpstreamclk, .enable_symclk32_se = dccg31_enable_symclk32_se, - .disable_symclk32_se = dccg31_disable_symclk32_se, + .disable_symclk32_se = dccg35_disable_symclk32_se, .enable_symclk32_le = dccg31_enable_symclk32_le, .disable_symclk32_le = dccg31_disable_symclk32_le, .set_symclk32_le_root_clock_gating = dccg31_set_symclk32_le_root_clock_gating, diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h index 423feb4c2f3f..1586a45ca3bd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h @@ -34,6 +34,8 @@ #define DCCG_REG_LIST_DCN35() \ DCCG_REG_LIST_DCN314(),\ SR(DPPCLK_CTRL),\ + SR(DCCG_GATE_DISABLE_CNTL4),\ + SR(DCCG_GATE_DISABLE_CNTL5),\ SR(DCCG_GATE_DISABLE_CNTL6),\ SR(DCCG_GLOBAL_FGCG_REP_CNTL),\ SR(SYMCLKA_CLOCK_ENABLE),\ @@ -174,7 +176,61 @@ DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_SRC_SEL, mask_sh),\ DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_SRC_SEL, mask_sh),\ DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_SRC_SEL, mask_sh),\ - DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_SRC_SEL, mask_sh) + DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_SRC_SEL, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_FE_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_FE_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_FE_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_FE_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_FE_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL4, HDMICHARCLK0_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, HDMISTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE2_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE2_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYA_REFCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYB_REFCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYC_REFCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYD_REFCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYE_REFCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_PHASE, mask_sh),\ + DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_MODULO, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL, DISPCLK_DCCG_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, HDMISTREAMCLK0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\ struct dccg *dccg35_create( struct dc_context *ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c index f91e08895275..da94e5309fba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c @@ -256,6 +256,10 @@ void dcn35_link_encoder_construct( enc10->base.features.flags.bits.IS_UHBR10_CAPABLE = bp_cap_info.DP_UHBR10_EN; enc10->base.features.flags.bits.IS_UHBR13_5_CAPABLE = bp_cap_info.DP_UHBR13_5_EN; enc10->base.features.flags.bits.IS_UHBR20_CAPABLE = bp_cap_info.DP_UHBR20_EN; + if (bp_cap_info.DP_IS_USB_C) { + /*BIOS not switch to use CONNECTOR_ID_USBC = 24 yet*/ + enc10->base.features.flags.bits.DP_IS_USB_C = 1; + } } else { DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n", @@ -264,4 +268,5 @@ void dcn35_link_encoder_construct( } if (enc10->base.ctx->dc->debug.hdmi20_disable) enc10->base.features.flags.bits.HDMI_6GB_EN = 0; + } diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c index d19db8e9b8a5..53bd0ae4bab5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c @@ -342,13 +342,6 @@ void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on) pg_cntl->pg_res_enable[PG_DCIO] = power_on; } -void pg_cntl35_set_force_poweron_domain22(struct pg_cntl *pg_cntl, bool power_on) -{ - struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl); - - REG_UPDATE(DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, power_on ? 1 : 0); -} - static bool pg_cntl35_plane_otg_status(struct pg_cntl *pg_cntl) { struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl); @@ -518,8 +511,7 @@ static const struct pg_cntl_funcs pg_cntl35_funcs = { .mpcc_pg_control = pg_cntl35_mpcc_pg_control, .opp_pg_control = pg_cntl35_opp_pg_control, .optc_pg_control = pg_cntl35_optc_pg_control, - .dwb_pg_control = pg_cntl35_dwb_pg_control, - .set_force_poweron_domain22 = pg_cntl35_set_force_poweron_domain22 + .dwb_pg_control = pg_cntl35_dwb_pg_control }; struct pg_cntl *pg_cntl35_create( diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h index 069dae08e222..3de240884d22 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h @@ -183,7 +183,6 @@ void pg_cntl35_optc_pg_control(struct pg_cntl *pg_cntl, unsigned int optc_inst, bool power_on); void pg_cntl35_dwb_pg_control(struct pg_cntl *pg_cntl, bool power_on); void pg_cntl35_init_pg_status(struct pg_cntl *pg_cntl); -void pg_cntl35_set_force_poweron_domain22(struct pg_cntl *pg_cntl, bool power_on); struct pg_cntl *pg_cntl35_create( struct dc_context *ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h index 7ce9a5b6c33b..6d7a15dcf8a7 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h +++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h @@ -103,10 +103,16 @@ enum act_return_status dm_helpers_dp_mst_poll_for_allocation_change_trigger( /* * Sends ALLOCATE_PAYLOAD message. */ -bool dm_helpers_dp_mst_send_payload_allocation( +void dm_helpers_dp_mst_send_payload_allocation( struct dc_context *ctx, - const struct dc_stream_state *stream, - bool enable); + const struct dc_stream_state *stream); + +/* + * Update mst manager relevant variables + */ +void dm_helpers_dp_mst_update_mst_mgr_for_deallocation( + struct dc_context *ctx, + const struct dc_stream_state *stream); bool dm_helpers_dp_mst_start_top_mgr( struct dc_context *ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h index 4440d08743aa..bd7ba0a25198 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h +++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h @@ -247,6 +247,7 @@ struct pp_smu_funcs_nv { #define PP_SMU_NUM_MEMCLK_DPM_LEVELS 4 #define PP_SMU_NUM_DCLK_DPM_LEVELS 8 #define PP_SMU_NUM_VCLK_DPM_LEVELS 8 +#define PP_SMU_NUM_VPECLK_DPM_LEVELS 8 struct dpm_clock { uint32_t Freq; // In MHz @@ -262,6 +263,7 @@ struct dpm_clocks { struct dpm_clock MemClocks[PP_SMU_NUM_MEMCLK_DPM_LEVELS]; struct dpm_clock VClocks[PP_SMU_NUM_VCLK_DPM_LEVELS]; struct dpm_clock DClocks[PP_SMU_NUM_DCLK_DPM_LEVELS]; + struct dpm_clock VPEClocks[PP_SMU_NUM_VPECLK_DPM_LEVELS]; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c index 50b0434354f8..0c4a8fe8e5ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c @@ -30,7 +30,7 @@ #include "dcn_calc_auto.h" #include "dal_asic_id.h" #include "resource.h" -#include "dcn10/dcn10_resource.h" +#include "resource/dcn10/dcn10_resource.h" #include "dcn10/dcn10_hubbub.h" #include "dml/dml1_display_rq_dlg_calc.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index d2271e308fa0..38ab9ad60ef8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -33,6 +33,7 @@ #include "link.h" #include "dcn20_fpu.h" +#include "dc_state_priv.h" #define DC_LOGGER \ dc->ctx->logger @@ -1182,7 +1183,7 @@ void dcn20_calculate_dlg_params(struct dc *dc, pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) { // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0; context->res_ctx.pipe_ctx[i].unbounded_req = false; @@ -1532,7 +1533,7 @@ int dcn20_populate_dml_pipes_from_context(struct dc *dc, */ if (res_ctx->pipe_ctx[i].plane_state && (res_ctx->pipe_ctx[i].plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE || - res_ctx->pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM)) + dc_state_get_pipe_subvp_type(context, &res_ctx->pipe_ctx[i]) == SUBVP_PHANTOM)) pipes[pipe_cnt].pipe.src.num_cursors = 0; else pipes[pipe_cnt].pipe.src.num_cursors = dc->dml.ip.number_of_cursors; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index 3686f1e7de3a..63c48c29ba49 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -3542,7 +3542,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l { struct vba_vars_st *v = &mode_lib->vba; int MinPrefetchMode, MaxPrefetchMode; - int i; + int i, start_state; unsigned int j, k, m; bool EnoughWritebackUnits = true; bool WritebackModeSupport = true; @@ -3553,6 +3553,11 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ + if (mode_lib->validate_max_state) + start_state = v->soc.num_states - 1; + else + start_state = 0; + CalculateMinAndMaxPrefetchMode( mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &MinPrefetchMode, &MaxPrefetchMode); @@ -3851,7 +3856,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->SingleDPPViewportSizeSupportPerPlane, &v->ViewportSizeSupport[0][0]); - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed); v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed); @@ -4007,7 +4012,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /*Total Available Pipes Support Check*/ - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) { v->TotalAvailablePipesSupport[i][j] = true; @@ -4046,7 +4051,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { v->RequiresDSC[i][k] = false; v->RequiresFEC[i][k] = false; @@ -4174,7 +4179,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } } - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { v->DIOSupport[i] = true; for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { if (!v->skip_dio_check[k] && v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi) @@ -4185,7 +4190,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } - for (i = 0; i < v->soc.num_states; ++i) { + for (i = start_state; i < v->soc.num_states; ++i) { v->ODMCombine4To1SupportCheckOK[i] = true; for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 @@ -4197,7 +4202,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */ - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { v->NotEnoughDSCUnits[i] = false; v->TotalDSCUnitsRequired = 0.0; for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { @@ -4217,7 +4222,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } /*DSC Delay per state*/ - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { if (v->OutputBppPerState[i][k] == BPP_INVALID) { v->BPP = 0.0; @@ -4333,7 +4338,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; } - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; @@ -5075,7 +5080,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /*PTE Buffer Size Check*/ - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { v->PTEBufferSizeNotExceeded[i][j] = true; for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { @@ -5136,7 +5141,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } /*Mode Support, Voltage State and SOC Configuration*/ - for (i = v->soc.num_states - 1; i >= 0; i--) { + for (i = v->soc.num_states - 1; i >= start_state; i--) { for (j = 0; j < 2; j++) { if (v->ScaleRatioAndTapsSupport == 1 && v->SourceFormatPixelAndScanSupport == 1 && v->ViewportSizeSupport[i][j] == 1 && v->DIOSupport[i] == 1 && v->ODMCombine4To1SupportCheckOK[i] == 1 @@ -5158,7 +5163,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } { unsigned int MaximumMPCCombine = 0; - for (i = v->soc.num_states; i >= 0; i--) { + for (i = v->soc.num_states; i >= start_state; i--) { if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) { v->VoltageLevel = i; v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index b46cde525066..aa68d010cbfd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -32,6 +32,8 @@ #include "clk_mgr/dcn32/dcn32_smu13_driver_if.h" #include "dcn30/dcn30_resource.h" #include "link.h" +#include "dc_state_priv.h" +#include "resource.h" #define DC_LOGGER_INIT(logger) @@ -45,6 +47,14 @@ static const struct subvp_high_refresh_list subvp_high_refresh_list = { {.width = 1920, .height = 1080, }}, }; +static const struct subvp_active_margin_list subvp_active_margin_list = { + .min_refresh = 55, + .max_refresh = 65, + .res = { + {.width = 2560, .height = 1440, }, + {.width = 1920, .height = 1080, }}, +}; + struct _vcs_dpi_ip_params_st dcn3_2_ip = { .gpuvm_enable = 0, .gpuvm_max_page_table_levels = 4, @@ -282,7 +292,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, /* for subvp + DRR case, if subvp pipes are still present we support pstate */ if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported && - dcn32_subvp_in_use(dc, context)) + resource_subvp_in_use(dc, context)) vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support; if (vlevel < context->bw_ctx.dml.vba.soc.num_states && @@ -333,7 +343,7 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, if (!pipe->stream) continue; - if (pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (pipe->plane_state && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); pipes[pipe_idx].pipe.dest.vupdate_offset = @@ -616,7 +626,7 @@ static bool dcn32_assign_subvp_pipe(struct dc *dc, if (pipe->plane_state && !pipe->top_pipe && !dcn32_is_center_timing(pipe) && !(pipe->stream->timing.pix_clk_100hz / 10000 > DCN3_2_MAX_SUBVP_PIXEL_RATE_MHZ) && (!dcn32_is_psr_capable(pipe) || (context->stream_count == 1 && dc->caps.dmub_caps.subvp_psr)) && - pipe->stream->mall_stream_config.type == SUBVP_NONE && + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_NONE && (refresh_rate < 120 || dcn32_allow_subvp_high_refresh_rate(dc, context, pipe)) && !pipe->plane_state->address.tmz_surface && (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0 || @@ -674,7 +684,7 @@ static bool dcn32_enough_pipes_for_subvp(struct dc *dc, struct dc_state *context // Find the minimum pipe split count for non SubVP pipes if (resource_is_pipe_type(pipe, OPP_HEAD) && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_NONE) { split_cnt = 0; while (pipe) { split_cnt++; @@ -727,8 +737,8 @@ static bool subvp_subvp_schedulable(struct dc *dc, struct dc_state *context) * and also to store the two main SubVP pipe pointers in subvp_pipes[2]. */ if (pipe->stream && pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) { - phantom = pipe->stream->mall_stream_config.paired_stream; + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) { + phantom = dc_state_get_paired_subvp_stream(context, pipe->stream); microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) + phantom->timing.v_addressable; @@ -796,6 +806,9 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context) int16_t stretched_drr_us = 0; int16_t drr_stretched_vblank_us = 0; int16_t max_vblank_mallregion = 0; + struct dc_stream_state *phantom_stream; + bool subvp_found = false; + bool drr_found = false; // Find SubVP pipe for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -808,8 +821,10 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context) continue; // Find the SubVP pipe - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) + if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) { + subvp_found = true; break; + } } // Find the DRR pipe @@ -817,32 +832,37 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context) drr_pipe = &context->res_ctx.pipe_ctx[i]; // We check for master pipe only - if (!resource_is_pipe_type(pipe, OTG_MASTER) || - !resource_is_pipe_type(pipe, DPP_PIPE)) + if (!resource_is_pipe_type(drr_pipe, OTG_MASTER) || + !resource_is_pipe_type(drr_pipe, DPP_PIPE)) continue; - if (drr_pipe->stream->mall_stream_config.type == SUBVP_NONE && drr_pipe->stream->ignore_msa_timing_param && - (drr_pipe->stream->allow_freesync || drr_pipe->stream->vrr_active_variable || drr_pipe->stream->vrr_active_fixed)) + if (dc_state_get_pipe_subvp_type(context, drr_pipe) == SUBVP_NONE && drr_pipe->stream->ignore_msa_timing_param && + (drr_pipe->stream->allow_freesync || drr_pipe->stream->vrr_active_variable || drr_pipe->stream->vrr_active_fixed)) { + drr_found = true; break; + } } - main_timing = &pipe->stream->timing; - phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing; - drr_timing = &drr_pipe->stream->timing; - prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + - dc->caps.subvp_prefetch_end_to_mall_start_us; - subvp_active_us = main_timing->v_addressable * main_timing->h_total / - (double)(main_timing->pix_clk_100hz * 100) * 1000000; - drr_frame_us = drr_timing->v_total * drr_timing->h_total / - (double)(drr_timing->pix_clk_100hz * 100) * 1000000; - // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; - stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; - drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total / - (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us); - max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; + if (subvp_found && drr_found) { + phantom_stream = dc_state_get_paired_subvp_stream(context, pipe->stream); + main_timing = &pipe->stream->timing; + phantom_timing = &phantom_stream->timing; + drr_timing = &drr_pipe->stream->timing; + prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + + dc->caps.subvp_prefetch_end_to_mall_start_us; + subvp_active_us = main_timing->v_addressable * main_timing->h_total / + (double)(main_timing->pix_clk_100hz * 100) * 1000000; + drr_frame_us = drr_timing->v_total * drr_timing->h_total / + (double)(drr_timing->pix_clk_100hz * 100) * 1000000; + // P-State allow width and FW delays already included phantom_timing->v_addressable + mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; + stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; + drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total / + (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us); + max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; + } /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis @@ -887,6 +907,8 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context) struct dc_crtc_timing *main_timing = NULL; struct dc_crtc_timing *phantom_timing = NULL; struct dc_crtc_timing *vblank_timing = NULL; + struct dc_stream_state *phantom_stream; + enum mall_stream_type pipe_mall_type; /* For SubVP + VBLANK/DRR cases, we assume there can only be * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK @@ -896,6 +918,7 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context) */ for (i = 0; i < dc->res_pool->pipe_count; i++) { pipe = &context->res_ctx.pipe_ctx[i]; + pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe); // We check for master pipe, but it shouldn't matter since we only need // the pipe for timing info (stream should be same for any pipe splits) @@ -903,18 +926,19 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context) !resource_is_pipe_type(pipe, DPP_PIPE)) continue; - if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) { + if (!found && pipe_mall_type == SUBVP_NONE) { // Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe). vblank_index = i; found = true; } - if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN) + if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN) subvp_pipe = pipe; } if (found) { + phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream); main_timing = &subvp_pipe->stream->timing; - phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; + phantom_timing = &phantom_stream->timing; vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing; // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe // Also include the prefetch end to mallstart delay time @@ -969,7 +993,7 @@ static bool subvp_subvp_admissable(struct dc *dc, continue; if (pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) { refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 + pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); @@ -1018,23 +1042,23 @@ static bool subvp_validate_static_schedulability(struct dc *dc, for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe); if (!pipe->stream) continue; if (pipe->plane_state && !pipe->top_pipe) { - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) + if (pipe_mall_type == SUBVP_MAIN) subvp_count++; - if (pipe->stream->mall_stream_config.type == SUBVP_NONE) { + if (pipe_mall_type == SUBVP_NONE) non_subvp_pipes++; - } } // Count how many planes that aren't SubVP/phantom are capable of VACTIVE // switching (SubVP + VACTIVE unsupported). In situations where we force // SubVP for a VACTIVE plane, we don't want to increment the vactive_count. if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vlevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] > 0 && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { + pipe_mall_type == SUBVP_NONE) { vactive_count++; } pipe_idx++; @@ -1070,7 +1094,7 @@ static void assign_subvp_index(struct dc *dc, struct dc_state *context) struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) && - pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN) { + dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) { pipe_ctx->subvp_index = index++; } else { pipe_ctx->subvp_index = 0; @@ -1237,15 +1261,11 @@ static void update_pipes_with_slice_table(struct dc *dc, struct dc_state *contex { int i; - for (i = 0; i < table->odm_combine_count; i++) { + for (i = 0; i < table->odm_combine_count; i++) resource_update_pipes_for_stream_with_slice_count(context, dc->current_state, dc->res_pool, table->odm_combines[i].stream, table->odm_combines[i].slice_count); - /* TODO: move this into the function above */ - dcn20_build_mapped_resource(dc, context, - table->odm_combines[i].stream); - } for (i = 0; i < table->mpc_combine_count; i++) resource_update_pipes_for_plane_with_slice_count(context, @@ -1412,6 +1432,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, unsigned int dc_pipe_idx = 0; int i = 0; bool found_supported_config = false; + int vlevel_temp = 0; dc_assert_fp_enabled(); @@ -1444,13 +1465,15 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, */ if (!dc->debug.force_disable_subvp && !dc->caps.dmub_caps.gecc_enable && dcn32_all_pipes_have_stream_and_plane(dc, context) && !dcn32_mpo_in_use(context) && !dcn32_any_surfaces_rotated(dc, context) && !is_test_pattern_enabled(context) && - (*vlevel == context->bw_ctx.dml.soc.num_states || + (*vlevel == context->bw_ctx.dml.soc.num_states || (vba->DRAMSpeedPerState[*vlevel] != vba->DRAMSpeedPerState[0] && + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported) || vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported || dc->debug.force_subvp_mclk_switch)) { dcn32_merge_pipes_for_subvp(dc, context); memset(merge, 0, MAX_PIPES * sizeof(bool)); + vlevel_temp = *vlevel; /* to re-initialize viewport after the pipe merge */ for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; @@ -1519,10 +1542,14 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, } } + if (vba->DRAMSpeedPerState[*vlevel] >= vba->DRAMSpeedPerState[vlevel_temp]) + found_supported_config = false; + // If SubVP pipe config is unsupported (or cannot be used for UCLK switching) // remove phantom pipes and repopulate dml pipes if (!found_supported_config) { - dc->res_pool->funcs->remove_phantom_pipes(dc, context, false); + dc_state_remove_phantom_streams_and_planes(dc, context); + dc_state_release_phantom_streams_and_planes(dc, context); vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported; *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); @@ -1674,7 +1701,7 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) { // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0; context->res_ctx.pipe_ctx[i].unbounded_req = false; @@ -1706,7 +1733,7 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, context->res_ctx.pipe_ctx[i].plane_state != context->res_ctx.pipe_ctx[i].top_pipe->plane_state) && context->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) { /* SS: all active surfaces stored in MALL */ - if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type != SUBVP_PHANTOM) { + if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) != SUBVP_PHANTOM) { context->bw_ctx.bw.dcn.mall_ss_size_bytes += context->res_ctx.pipe_ctx[i].surface_size_in_mall_bytes; if (context->res_ctx.pipe_ctx[i].stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED) { @@ -1920,7 +1947,8 @@ bool dcn32_internal_validate_bw(struct dc *dc, return false; // For each full update, remove all existing phantom pipes first - dc->res_pool->funcs->remove_phantom_pipes(dc, context, fast_validate); + dc_state_remove_phantom_streams_and_planes(dc, context); + dc_state_release_phantom_streams_and_planes(dc, context); dc->res_pool->funcs->update_soc_for_wm_a(dc, context); @@ -2182,6 +2210,7 @@ bool dcn32_internal_validate_bw(struct dc *dc, int i; pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + dcn32_update_dml_pipes_odm_policy_based_on_context(dc, context, pipes); /* repopulate_pipes = 1 means the pipes were either split or merged. In this case * we have to re-calculate the DET allocation and run through DML once more to @@ -2190,7 +2219,9 @@ bool dcn32_internal_validate_bw(struct dc *dc, * */ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = dm_prefetch_support_uclk_fclk_and_stutter_if_possible; + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + if (vlevel == context->bw_ctx.dml.soc.num_states) { /* failed after DET size changes */ goto validate_fail; @@ -2242,7 +2273,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, unsigned int dummy_latency_index = 0; int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; - bool subvp_in_use = dcn32_subvp_in_use(dc, context); + bool subvp_active = resource_subvp_in_use(dc, context); unsigned int min_dram_speed_mts_margin; bool need_fclk_lat_as_dummy = false; bool is_subvp_p_drr = false; @@ -2251,7 +2282,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, dc_assert_fp_enabled(); /* need to find dummy latency index for subvp */ - if (subvp_in_use) { + if (subvp_active) { /* Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK */ if (!pstate_en) { context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp; @@ -2437,7 +2468,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16; } - if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_in_use) { + if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_active) { /* find largest table entry that is lower than dram speed, * but lower than DPM0 still uses DPM0 */ @@ -3299,25 +3330,24 @@ bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe) { bool allow = false; uint32_t refresh_rate = 0; + uint32_t min_refresh = subvp_active_margin_list.min_refresh; + uint32_t max_refresh = subvp_active_margin_list.max_refresh; + uint32_t i; - /* Allow subvp on displays that have active margin for 2560x1440@60hz displays - * only for now. There must be no scaling as well. - * - * For now we only enable on 2560x1440@60hz displays to enable 4K60 + 1440p60 configs - * for p-state switching. - */ - if (pipe->stream && pipe->plane_state) { - refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 + - pipe->stream->timing.v_total * pipe->stream->timing.h_total - 1) - / (double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total); - if (pipe->stream->timing.v_addressable == 1440 && - pipe->stream->timing.h_addressable == 2560 && - refresh_rate >= 55 && refresh_rate <= 65 && - pipe->plane_state->src_rect.height == 1440 && - pipe->plane_state->src_rect.width == 2560 && - pipe->plane_state->dst_rect.height == 1440 && - pipe->plane_state->dst_rect.width == 2560) + for (i = 0; i < SUBVP_ACTIVE_MARGIN_LIST_LEN; i++) { + uint32_t width = subvp_active_margin_list.res[i].width; + uint32_t height = subvp_active_margin_list.res[i].height; + + refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 + + pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); + refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); + refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total); + + if (refresh_rate >= min_refresh && refresh_rate <= max_refresh && + dcn32_check_native_scaling_for_res(pipe, width, height)) { allow = true; + break; + } } return allow; } @@ -3436,7 +3466,15 @@ void dcn32_assign_fpo_vactive_candidate(struct dc *dc, const struct dc_state *co for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - if (!pipe->stream) + /* In DCN32/321, FPO uses per-pipe P-State force. + * If there's no planes, HUBP is power gated and + * therefore programming UCLK_PSTATE_FORCE does + * nothing (P-State will always be asserted naturally + * on a pipe that has HUBP power gated. Therefore we + * only want to enable FPO if the FPO pipe has both + * a stream and a plane. + */ + if (!pipe->stream || !pipe->plane_state) continue; if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0) { @@ -3490,7 +3528,7 @@ void dcn32_set_clock_limits(const struct _vcs_dpi_soc_bounding_box_st *soc_bb) void dcn32_override_min_req_memclk(struct dc *dc, struct dc_state *context) { // WA: restrict FPO and SubVP to use first non-strobe mode (DCN32 BW issue) - if ((context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dcn32_subvp_in_use(dc, context)) && + if ((context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || resource_subvp_in_use(dc, context)) && dc->dml.soc.num_chans <= 8) { int num_mclk_levels = dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index cbdfb762c10c..6c84b0fa40f4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -813,6 +813,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman (v->DRAMSpeedPerState[mode_lib->vba.VoltageLevel] <= MEM_STROBE_FREQ_MHZ || v->DCFCLKPerState[mode_lib->vba.VoltageLevel] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ? mode_lib->vba.ip.min_prefetch_in_strobe_us : 0, + mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] > 0 || mode_lib->vba.DRAMClockChangeRequirementFinal == false, + /* Output */ &v->DSTXAfterScaler[k], &v->DSTYAfterScaler[k], @@ -3317,6 +3319,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->SwathHeightCThisState[k], v->TWait, (v->DRAMSpeedPerState[i] <= MEM_STROBE_FREQ_MHZ || v->DCFCLKState[i][j] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ? mode_lib->vba.ip.min_prefetch_in_strobe_us : 0, + mode_lib->vba.PrefetchModePerState[i][j] > 0 || mode_lib->vba.DRAMClockChangeRequirementFinal == false, /* Output */ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler[k], diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index d940dfa5ae43..80fccd4999a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -3423,6 +3423,7 @@ bool dml32_CalculatePrefetchSchedule( unsigned int SwathHeightC, double TWait, double TPreReq, + bool ExtendPrefetchIfPossible, /* Output */ double *DSTXAfterScaler, double *DSTYAfterScaler, @@ -3892,12 +3893,32 @@ bool dml32_CalculatePrefetchSchedule( /* Clamp to oto for bandwidth calculation */ LinesForPrefetchBandwidth = dst_y_prefetch_oto; } else { - *DestinationLinesForPrefetch = dst_y_prefetch_equ; - TimeForFetchingMetaPTE = Tvm_equ; - TimeForFetchingRowInVBlank = Tr0_equ; - *PrefetchBandwidth = prefetch_bw_equ; - /* Clamp to equ for bandwidth calculation */ - LinesForPrefetchBandwidth = dst_y_prefetch_equ; + /* For mode programming we want to extend the prefetch as much as possible + * (up to oto, or as long as we can for equ) if we're not already applying + * the 60us prefetch requirement. This is to avoid intermittent underflow + * issues during prefetch. + * + * The prefetch extension is applied under the following scenarios: + * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank) + * 2. We're using subvp or drr methods of p-state switch, in which case we + * we don't care if prefetch takes up more of the blanking time + * + * Mode programming typically chooses the smallest prefetch time possible + * (i.e. highest bandwidth during prefetch) presumably to create margin between + * p-states / c-states that happen in vblank and prefetch. Therefore we only + * apply this prefetch extension when p-state in vblank is not required (UCLK + * p-states take up the most vblank time). + */ + if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) { + MyError = true; + } else { + *DestinationLinesForPrefetch = dst_y_prefetch_equ; + TimeForFetchingMetaPTE = Tvm_equ; + TimeForFetchingRowInVBlank = Tr0_equ; + *PrefetchBandwidth = prefetch_bw_equ; + /* Clamp to equ for bandwidth calculation */ + LinesForPrefetchBandwidth = dst_y_prefetch_equ; + } } *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 592d174df6c6..5d34735df83d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -747,6 +747,7 @@ bool dml32_CalculatePrefetchSchedule( unsigned int SwathHeightC, double TWait, double TPreReq, + bool ExtendPrefetchIfPossible, /* Output */ double *DSTXAfterScaler, double *DSTYAfterScaler, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index f154a3eb1d1a..3d12dabd39e4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -326,6 +326,25 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc, dcn3_5_soc.dram_clock_change_latency_us = dc->debug.dram_clock_change_latency_ns / 1000.0; } + + if (dc->bb_overrides.dram_clock_change_latency_ns > 0) + dcn3_5_soc.dram_clock_change_latency_us = + dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; + + if (dc->bb_overrides.sr_exit_time_ns > 0) + dcn3_5_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; + + if (dc->bb_overrides.sr_enter_plus_exit_time_ns > 0) + dcn3_5_soc.sr_enter_plus_exit_time_us = + dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; + + if (dc->bb_overrides.sr_exit_z8_time_ns > 0) + dcn3_5_soc.sr_exit_z8_time_us = dc->bb_overrides.sr_exit_z8_time_ns / 1000.0; + + if (dc->bb_overrides.sr_enter_plus_exit_z8_time_ns > 0) + dcn3_5_soc.sr_enter_plus_exit_z8_time_us = + dc->bb_overrides.sr_enter_plus_exit_z8_time_ns / 1000.0; + /*temp till dml2 fully work without dml1*/ dml_init_instance(&dc->dml, &dcn3_5_soc, &dcn3_5_ip, DML_PROJECT_DCN31); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c index 1a2b24cc6b61..0baf39d64a2d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c @@ -772,18 +772,29 @@ static unsigned int get_mpc_factor(struct dml2_context *ctx, const struct dc_state *state, const struct dml_display_cfg_st *disp_cfg, struct dml2_dml_to_dc_pipe_mapping *mapping, - const struct dc_stream_status *status, unsigned int stream_id, + const struct dc_stream_status *status, + const struct dc_stream_state *stream, int plane_idx) { unsigned int plane_id; unsigned int cfg_idx; + unsigned int mpc_factor; - get_plane_id(ctx, state, status->plane_states[plane_idx], stream_id, plane_idx, &plane_id); + get_plane_id(ctx, state, status->plane_states[plane_idx], + stream->stream_id, plane_idx, &plane_id); cfg_idx = find_disp_cfg_idx_by_plane_id(mapping, plane_id); - if (ctx->architecture == dml2_architecture_20) - return (unsigned int)disp_cfg->hw.DPPPerSurface[cfg_idx]; - ASSERT(false); - return 1; + if (ctx->architecture == dml2_architecture_20) { + mpc_factor = (unsigned int)disp_cfg->hw.DPPPerSurface[cfg_idx]; + } else { + mpc_factor = 1; + ASSERT(false); + } + + /* For stereo timings, we need to pipe split */ + if (dml2_is_stereo_timing(stream)) + mpc_factor = 2; + + return mpc_factor; } static unsigned int get_odm_factor( @@ -820,14 +831,13 @@ static void populate_mpc_factors_for_stream( unsigned int mpc_factors[MAX_PIPES]) { const struct dc_stream_status *status = &state->stream_status[stream_idx]; - unsigned int stream_id = state->streams[stream_idx]->stream_id; int i; for (i = 0; i < status->plane_count; i++) if (odm_factor == 1) mpc_factors[i] = get_mpc_factor( ctx, state, disp_cfg, mapping, status, - stream_id, i); + state->streams[stream_idx], i); else mpc_factors[i] = 1; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h index e85866db80ff..7ca7f2a743c2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h @@ -38,5 +38,6 @@ #include "core_types.h" #include "dsc.h" #include "clk_mgr.h" +#include "dc_state_priv.h" #endif //__DML2_DC_TYPES_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c index 32f8a43af3d6..282d70e2b18a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c @@ -51,7 +51,7 @@ unsigned int dml2_helper_calculate_num_ways_for_subvp(struct dml2_context *ctx, // Find the phantom pipes if (pipe->stream && pipe->plane_state && !pipe->top_pipe && !pipe->prev_odm_pipe && - pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4; mblk_width = ctx->config.mall_cfg.mblk_width_pixels; mblk_height = bytes_per_pixel == 4 ? mblk_width = ctx->config.mall_cfg.mblk_height_4bpe_pixels : ctx->config.mall_cfg.mblk_height_8bpe_pixels; @@ -253,7 +253,7 @@ static bool assign_subvp_pipe(struct dml2_context *ctx, struct dc_state *context * to combine this with SubVP can cause issues with the scheduling). */ if (pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_NONE && refresh_rate < 120 && + ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_NONE && refresh_rate < 120 && vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0) { while (pipe) { num_pipes++; @@ -317,7 +317,7 @@ static bool enough_pipes_for_subvp(struct dml2_context *ctx, struct dc_state *st // Find the minimum pipe split count for non SubVP pipes if (pipe->stream && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { + ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_NONE) { split_cnt = 0; while (pipe) { split_cnt++; @@ -372,8 +372,8 @@ static bool subvp_subvp_schedulable(struct dml2_context *ctx, struct dc_state *c * and also to store the two main SubVP pipe pointers in subvp_pipes[2]. */ if (pipe->stream && pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) { - phantom = pipe->stream->mall_stream_config.paired_stream; + ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) { + phantom = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, pipe->stream); microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) + phantom->timing.v_addressable; @@ -435,6 +435,7 @@ bool dml2_svp_drr_schedulable(struct dml2_context *ctx, struct dc_state *context struct pipe_ctx *pipe = NULL; struct dc_crtc_timing *main_timing = NULL; struct dc_crtc_timing *phantom_timing = NULL; + struct dc_stream_state *phantom_stream; int16_t prefetch_us = 0; int16_t mall_region_us = 0; int16_t drr_frame_us = 0; // nominal frame time @@ -453,12 +454,13 @@ bool dml2_svp_drr_schedulable(struct dml2_context *ctx, struct dc_state *context continue; // Find the SubVP pipe - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) + if (ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) break; } + phantom_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, pipe->stream); main_timing = &pipe->stream->timing; - phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing; + phantom_timing = &phantom_stream->timing; prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + ctx->config.svp_pstate.subvp_prefetch_end_to_mall_start_us; @@ -519,6 +521,8 @@ static bool subvp_vblank_schedulable(struct dml2_context *ctx, struct dc_state * struct dc_crtc_timing *main_timing = NULL; struct dc_crtc_timing *phantom_timing = NULL; struct dc_crtc_timing *vblank_timing = NULL; + struct dc_stream_state *phantom_stream; + enum mall_stream_type pipe_mall_type; /* For SubVP + VBLANK/DRR cases, we assume there can only be * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK @@ -528,19 +532,20 @@ static bool subvp_vblank_schedulable(struct dml2_context *ctx, struct dc_state * */ for (i = 0; i < ctx->config.dcn_pipe_count; i++) { pipe = &context->res_ctx.pipe_ctx[i]; + pipe_mall_type = ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe); // We check for master pipe, but it shouldn't matter since we only need // the pipe for timing info (stream should be same for any pipe splits) if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) continue; - if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) { + if (!found && pipe_mall_type == SUBVP_NONE) { // Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe). vblank_index = i; found = true; } - if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN) + if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN) subvp_pipe = pipe; } // Use ignore_msa_timing_param flag to identify as DRR @@ -548,8 +553,9 @@ static bool subvp_vblank_schedulable(struct dml2_context *ctx, struct dc_state * // SUBVP + DRR case schedulable = dml2_svp_drr_schedulable(ctx, context, &context->res_ctx.pipe_ctx[vblank_index].stream->timing); } else if (found) { + phantom_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, subvp_pipe->stream); main_timing = &subvp_pipe->stream->timing; - phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; + phantom_timing = &phantom_stream->timing; vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing; // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe // Also include the prefetch end to mallstart delay time @@ -602,19 +608,20 @@ bool dml2_svp_validate_static_schedulability(struct dml2_context *ctx, struct dc for (i = 0, pipe_idx = 0; i < ctx->config.dcn_pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + enum mall_stream_type pipe_mall_type = ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe); if (!pipe->stream) continue; if (pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) + pipe_mall_type == SUBVP_MAIN) subvp_count++; // Count how many planes that aren't SubVP/phantom are capable of VACTIVE // switching (SubVP + VACTIVE unsupported). In situations where we force // SubVP for a VACTIVE plane, we don't want to increment the vactive_count. if (vba->ActiveDRAMClockChangeLatencyMargin[vba->pipe_plane[pipe_idx]] > 0 && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { + pipe_mall_type == SUBVP_NONE) { vactive_count++; } pipe_idx++; @@ -708,14 +715,10 @@ static void set_phantom_stream_timing(struct dml2_context *ctx, struct dc_state static struct dc_stream_state *enable_phantom_stream(struct dml2_context *ctx, struct dc_state *state, unsigned int dc_pipe_idx, unsigned int svp_height, unsigned int vstartup) { struct pipe_ctx *ref_pipe = &state->res_ctx.pipe_ctx[dc_pipe_idx]; - struct dc_stream_state *phantom_stream = ctx->config.svp_pstate.callbacks.create_stream_for_sink(ref_pipe->stream->sink); - - phantom_stream->signal = SIGNAL_TYPE_VIRTUAL; - phantom_stream->dpms_off = true; - phantom_stream->mall_stream_config.type = SUBVP_PHANTOM; - phantom_stream->mall_stream_config.paired_stream = ref_pipe->stream; - ref_pipe->stream->mall_stream_config.type = SUBVP_MAIN; - ref_pipe->stream->mall_stream_config.paired_stream = phantom_stream; + struct dc_stream_state *phantom_stream = ctx->config.svp_pstate.callbacks.create_phantom_stream( + ctx->config.svp_pstate.callbacks.dc, + state, + ref_pipe->stream); /* stream has limited viewport and small timing */ memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing)); @@ -723,7 +726,10 @@ static struct dc_stream_state *enable_phantom_stream(struct dml2_context *ctx, s memcpy(&phantom_stream->dst, &ref_pipe->stream->dst, sizeof(phantom_stream->dst)); set_phantom_stream_timing(ctx, state, ref_pipe, phantom_stream, dc_pipe_idx, svp_height, vstartup); - ctx->config.svp_pstate.callbacks.add_stream_to_ctx(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream); + ctx->config.svp_pstate.callbacks.add_phantom_stream(ctx->config.svp_pstate.callbacks.dc, + state, + phantom_stream, + ref_pipe->stream); return phantom_stream; } @@ -740,7 +746,10 @@ static void enable_phantom_plane(struct dml2_context *ctx, if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state) { phantom_plane = prev_phantom_plane; } else { - phantom_plane = ctx->config.svp_pstate.callbacks.create_plane(ctx->config.svp_pstate.callbacks.dc); + phantom_plane = ctx->config.svp_pstate.callbacks.create_phantom_plane( + ctx->config.svp_pstate.callbacks.dc, + state, + curr_pipe->plane_state); } memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address)); @@ -763,9 +772,7 @@ static void enable_phantom_plane(struct dml2_context *ctx, phantom_plane->clip_rect.y = 0; phantom_plane->clip_rect.height = phantom_stream->timing.v_addressable; - phantom_plane->is_phantom = true; - - ctx->config.svp_pstate.callbacks.add_plane_to_context(ctx->config.svp_pstate.callbacks.dc, phantom_stream, phantom_plane, state); + ctx->config.svp_pstate.callbacks.add_phantom_plane(ctx->config.svp_pstate.callbacks.dc, phantom_stream, phantom_plane, state); curr_pipe = curr_pipe->bottom_pipe; prev_phantom_plane = phantom_plane; @@ -790,7 +797,7 @@ static void add_phantom_pipes_for_main_pipe(struct dml2_context *ctx, struct dc_ // We determine which phantom pipes were added by comparing with // the phantom stream. if (pipe->plane_state && pipe->stream && pipe->stream == phantom_stream && - pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) { pipe->stream->use_dynamic_meta = false; pipe->plane_state->flip_immediate = false; if (!ctx->config.svp_pstate.callbacks.build_scaling_params(pipe)) { @@ -800,7 +807,7 @@ static void add_phantom_pipes_for_main_pipe(struct dml2_context *ctx, struct dc_ } } -static bool remove_all_planes_for_stream(struct dml2_context *ctx, struct dc_stream_state *stream, struct dc_state *context) +static bool remove_all_phantom_planes_for_stream(struct dml2_context *ctx, struct dc_stream_state *stream, struct dc_state *context) { int i, old_plane_count; struct dc_stream_status *stream_status = NULL; @@ -821,9 +828,11 @@ static bool remove_all_planes_for_stream(struct dml2_context *ctx, struct dc_str for (i = 0; i < old_plane_count; i++) del_planes[i] = stream_status->plane_states[i]; - for (i = 0; i < old_plane_count; i++) - if (!ctx->config.svp_pstate.callbacks.remove_plane_from_context(ctx->config.svp_pstate.callbacks.dc, stream, del_planes[i], context)) + for (i = 0; i < old_plane_count; i++) { + if (!ctx->config.svp_pstate.callbacks.remove_phantom_plane(ctx->config.svp_pstate.callbacks.dc, stream, del_planes[i], context)) return false; + ctx->config.svp_pstate.callbacks.release_phantom_plane(ctx->config.svp_pstate.callbacks.dc, context, del_planes[i]); + } return true; } @@ -832,35 +841,21 @@ bool dml2_svp_remove_all_phantom_pipes(struct dml2_context *ctx, struct dc_state { int i; bool removed_pipe = false; - struct dc_plane_state *phantom_plane = NULL; struct dc_stream_state *phantom_stream = NULL; for (i = 0; i < ctx->config.dcn_pipe_count; i++) { struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; // build scaling params for phantom pipes - if (pipe->plane_state && pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - phantom_plane = pipe->plane_state; + if (pipe->plane_state && pipe->stream && ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) { phantom_stream = pipe->stream; - remove_all_planes_for_stream(ctx, pipe->stream, state); - ctx->config.svp_pstate.callbacks.remove_stream_from_ctx(ctx->config.svp_pstate.callbacks.dc, state, pipe->stream); - - /* Ref count is incremented on allocation and also when added to the context. - * Therefore we must call release for the the phantom plane and stream once - * they are removed from the ctx to finally decrement the refcount to 0 to free. - */ - ctx->config.svp_pstate.callbacks.plane_state_release(phantom_plane); - ctx->config.svp_pstate.callbacks.stream_release(phantom_stream); + remove_all_phantom_planes_for_stream(ctx, phantom_stream, state); + ctx->config.svp_pstate.callbacks.remove_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream); + ctx->config.svp_pstate.callbacks.release_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream); removed_pipe = true; } - // Clear all phantom stream info - if (pipe->stream) { - pipe->stream->mall_stream_config.type = SUBVP_NONE; - pipe->stream->mall_stream_config.paired_stream = NULL; - } - if (pipe->plane_state) { pipe->plane_state->is_phantom = false; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index db06a5b749b4..fa6a93dd9629 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -341,6 +341,9 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, break; } + if (dml2->config.bbox_overrides.clks_table.num_states) + p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states; + /* Override from passed values, if available */ for (i = 0; i < p->in_states->num_states; i++) { if (dml2->config.bbox_overrides.sr_exit_latency_us) { @@ -397,7 +400,6 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, } /* Copy clocks tables entries, if available */ if (dml2->config.bbox_overrides.clks_table.num_states) { - p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states; for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels; i++) { p->in_states->state_array[i].dcfclk_mhz = dml2->config.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz; @@ -1047,8 +1049,10 @@ static void dml2_populate_pipe_to_plane_index_mapping(struct dml2_context *dml2, void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg) { - int i = 0, j = 0; + int i = 0, j = 0, k = 0; int disp_cfg_stream_location, disp_cfg_plane_location; + enum mall_stream_type stream_mall_type; + struct pipe_ctx *current_pipe_context; for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] = false; @@ -1068,7 +1072,17 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat dml2_populate_pipe_to_plane_index_mapping(dml2, context); for (i = 0; i < context->stream_count; i++) { + current_pipe_context = NULL; + for (k = 0; k < MAX_PIPES; k++) { + /* find one pipe allocated to this stream for the purpose of getting + info about the link later */ + if (context->streams[i] == context->res_ctx.pipe_ctx[k].stream) { + current_pipe_context = &context->res_ctx.pipe_ctx[k]; + break; + } + } disp_cfg_stream_location = map_stream_to_dml_display_cfg(dml2, context->streams[i], dml_dispcfg); + stream_mall_type = dc_state_get_stream_subvp_type(context, context->streams[i]); if (disp_cfg_stream_location < 0) disp_cfg_stream_location = dml_dispcfg->num_timings++; @@ -1076,7 +1090,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_stream_location, context->streams[i]); - populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], &context->res_ctx.pipe_ctx[i]); + populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context); switch (context->streams[i]->debug.force_odm_combine_segments) { case 2: dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_stream_location] = dml_odm_use_policy_combine_2to1; @@ -1113,10 +1127,10 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat populate_dml_surface_cfg_from_plane_state(dml2->v20.dml_core_ctx.project, &dml_dispcfg->surface, disp_cfg_plane_location, context->stream_status[i].plane_states[j]); populate_dml_plane_cfg_from_plane_state(&dml_dispcfg->plane, disp_cfg_plane_location, context->stream_status[i].plane_states[j], context); - if (context->streams[i]->mall_stream_config.type == SUBVP_MAIN) { + if (stream_mall_type == SUBVP_MAIN) { dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport; dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_optimize; - } else if (context->streams[i]->mall_stream_config.type == SUBVP_PHANTOM) { + } else if (stream_mall_type == SUBVP_PHANTOM) { dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_phantom_pipe; dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_disable; dml2->v20.dml_core_ctx.policy.ImmediateFlipRequirement[disp_cfg_plane_location] = dml_immediate_flip_not_required; @@ -1133,7 +1147,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (j >= 1) { populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_plane_location, context->streams[i]); - populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], &context->res_ctx.pipe_ctx[i]); + populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context); switch (context->streams[i]->debug.force_odm_combine_segments) { case 2: dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_plane_location] = dml_odm_use_policy_combine_2to1; @@ -1145,9 +1159,9 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat break; } - if (context->streams[i]->mall_stream_config.type == SUBVP_MAIN) + if (stream_mall_type == SUBVP_MAIN) dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport; - else if (context->streams[i]->mall_stream_config.type == SUBVP_PHANTOM) + else if (stream_mall_type == SUBVP_PHANTOM) dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_phantom_pipe; dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[disp_cfg_plane_location] = context->streams[i]->stream_id; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c index 2498b8341199..1068b962d1c1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c @@ -155,8 +155,20 @@ unsigned int dml2_util_get_maximum_odm_combine_for_output(bool force_odm_4to1, e bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) { + if (pipe_ctx == NULL || pipe_ctx->stream == NULL) + return false; + /* If this assert is hit then we have a link encoder dynamic management issue */ ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true); + + /* Count MST hubs once by treating only 1st remote sink in topology as an encoder */ + if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0]) { + return (pipe_ctx->stream_res.hpo_dp_stream_enc && + pipe_ctx->link_res.hpo_dp_link_enc && + dc_is_dp_signal(pipe_ctx->stream->signal) && + (pipe_ctx->stream->link->remote_sinks[0]->sink_id == pipe_ctx->stream->sink->sink_id)); + } + return (pipe_ctx->stream_res.hpo_dp_stream_enc && pipe_ctx->link_res.hpo_dp_link_enc && dc_is_dp_signal(pipe_ctx->stream->signal)); @@ -275,6 +287,7 @@ static void populate_pipe_ctx_dlg_params_from_dml(struct pipe_ctx *pipe_ctx, str void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, struct dml2_context *in_ctx, unsigned int pipe_cnt) { unsigned int dc_pipe_ctx_index, dml_pipe_idx, plane_id; + enum mall_stream_type pipe_mall_type; bool unbounded_req_enabled = false; struct dml2_calculate_rq_and_dlg_params_scratch *s = &in_ctx->v20.scratch.calculate_rq_and_dlg_params_scratch; @@ -322,7 +335,8 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont */ populate_pipe_ctx_dlg_params_from_dml(&context->res_ctx.pipe_ctx[dc_pipe_ctx_index], &context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx); - if (context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->mall_stream_config.type == SUBVP_PHANTOM) { + pipe_mall_type = dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[dc_pipe_ctx_index]); + if (pipe_mall_type == SUBVP_PHANTOM) { // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests context->res_ctx.pipe_ctx[dc_pipe_ctx_index].det_buffer_size_kb = 0; context->res_ctx.pipe_ctx[dc_pipe_ctx_index].unbounded_req = false; @@ -349,7 +363,7 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state != context->res_ctx.pipe_ctx[dc_pipe_ctx_index].top_pipe->plane_state) && context->res_ctx.pipe_ctx[dc_pipe_ctx_index].prev_odm_pipe == NULL) { /* SS: all active surfaces stored in MALL */ - if (context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->mall_stream_config.type != SUBVP_PHANTOM) { + if (pipe_mall_type != SUBVP_PHANTOM) { context->bw_ctx.bw.dcn.mall_ss_size_bytes += context->res_ctx.pipe_ctx[dc_pipe_ctx_index].surface_size_in_mall_bytes; } else { /* SUBVP: phantom surfaces only stored in MALL */ @@ -468,7 +482,7 @@ bool dml2_verify_det_buffer_configuration(struct dml2_context *in_ctx, struct dc return need_recalculation; } -bool dml2_is_stereo_timing(struct dc_stream_state *stream) +bool dml2_is_stereo_timing(const struct dc_stream_state *stream) { bool is_stereo = false; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h index 23b9028337d4..5842d6d3c4b6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h @@ -42,7 +42,7 @@ void dml2_copy_clocks_to_dc_state(struct dml2_dcn_clocks *out_clks, struct dc_st void dml2_extract_watermark_set(struct dcn_watermarks *watermark, struct display_mode_lib_st *dml_core_ctx); int dml2_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id); bool is_dtbclk_required(const struct dc *dc, struct dc_state *context); -bool dml2_is_stereo_timing(struct dc_stream_state *stream); +bool dml2_is_stereo_timing(const struct dc_stream_state *stream); /* * dml2_dc_construct_pipes - This function will determine if we need additional pipes based diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 8f231418870f..26307e599614 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -418,7 +418,7 @@ static int find_drr_eligible_stream(struct dc_state *display_state) int i; for (i = 0; i < display_state->stream_count; i++) { - if (display_state->streams[i]->mall_stream_config.type == SUBVP_NONE + if (dc_state_get_stream_subvp_type(display_state, display_state->streams[i]) == SUBVP_NONE && display_state->streams[i]->ignore_msa_timing_param) { // Use ignore_msa_timing_param flag to identify as DRR return i; @@ -634,6 +634,8 @@ static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_s dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.b, &dml2->v20.dml_core_ctx); memcpy(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.g6_temp_read_watermark_set, sizeof(context->bw_ctx.bw.dcn.watermarks.c)); dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.d, &dml2->v20.dml_core_ctx); + //copy for deciding zstate use + context->bw_ctx.dml.vba.StutterPeriod = context->bw_ctx.dml2->v20.dml_core_ctx.mp.StutterPeriod; } return result; @@ -691,10 +693,15 @@ bool dml2_validate(const struct dc *in_dc, struct dc_state *context, bool fast_v return out; } +static inline struct dml2_context *dml2_allocate_memory(void) +{ + return (struct dml2_context *) kzalloc(sizeof(struct dml2_context), GFP_KERNEL); +} + bool dml2_create(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) { // Allocate Mode Lib Ctx - *dml2 = (struct dml2_context *) kzalloc(sizeof(struct dml2_context), GFP_KERNEL); + *dml2 = dml2_allocate_memory(); if (!(*dml2)) return false; @@ -745,3 +752,25 @@ void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2, *fclk_change_support = (unsigned int) dml2->v20.dml_core_ctx.ms.support.FCLKChangeSupport[0]; *dram_clk_change_support = (unsigned int) dml2->v20.dml_core_ctx.ms.support.DRAMClockChangeSupport[0]; } + +void dml2_copy(struct dml2_context *dst_dml2, + struct dml2_context *src_dml2) +{ + /* copy Mode Lib Ctx */ + memcpy(dst_dml2, src_dml2, sizeof(struct dml2_context)); +} + +bool dml2_create_copy(struct dml2_context **dst_dml2, + struct dml2_context *src_dml2) +{ + /* Allocate Mode Lib Ctx */ + *dst_dml2 = dml2_allocate_memory(); + + if (!(*dst_dml2)) + return false; + + /* copy Mode Lib Ctx */ + dml2_copy(*dst_dml2, src_dml2); + + return true; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h index fe15baa4bf09..ee0eb184eb6d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h @@ -93,15 +93,34 @@ struct dml2_dc_callbacks { struct dml2_dc_svp_callbacks { struct dc *dc; bool (*build_scaling_params)(struct pipe_ctx *pipe_ctx); - struct dc_stream_state* (*create_stream_for_sink)(struct dc_sink *dc_sink_data); - struct dc_plane_state* (*create_plane)(struct dc *dc); - enum dc_status (*add_stream_to_ctx)(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream); - bool (*add_plane_to_context)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context); - bool (*remove_plane_from_context)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context); - enum dc_status (*remove_stream_from_ctx)(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *stream); - void (*plane_state_release)(struct dc_plane_state *plane_state); - void (*stream_release)(struct dc_stream_state *stream); + struct dc_stream_state* (*create_phantom_stream)(const struct dc *dc, + struct dc_state *state, + struct dc_stream_state *main_stream); + struct dc_plane_state* (*create_phantom_plane)(struct dc *dc, + struct dc_state *state, + struct dc_plane_state *main_plane); + enum dc_status (*add_phantom_stream)(struct dc *dc, + struct dc_state *state, + struct dc_stream_state *phantom_stream, + struct dc_stream_state *main_stream); + bool (*add_phantom_plane)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context); + bool (*remove_phantom_plane)(const struct dc *dc, + struct dc_stream_state *stream, + struct dc_plane_state *plane_state, + struct dc_state *context); + enum dc_status (*remove_phantom_stream)(struct dc *dc, + struct dc_state *state, + struct dc_stream_state *stream); + void (*release_phantom_plane)(const struct dc *dc, + struct dc_state *state, + struct dc_plane_state *plane); + void (*release_phantom_stream)(const struct dc *dc, + struct dc_state *state, + struct dc_stream_state *stream); void (*release_dsc)(struct resource_context *res_ctx, const struct resource_pool *pool, struct display_stream_compressor **dsc); + enum mall_stream_type (*get_pipe_subvp_type)(const struct dc_state *state, const struct pipe_ctx *pipe_ctx); + enum mall_stream_type (*get_stream_subvp_type)(const struct dc_state *state, const struct dc_stream_state *stream); + struct dc_stream_state *(*get_paired_subvp_stream)(const struct dc_state *state, const struct dc_stream_state *stream); }; struct dml2_clks_table_entry { @@ -191,6 +210,10 @@ bool dml2_create(const struct dc *in_dc, struct dml2_context **dml2); void dml2_destroy(struct dml2_context *dml2); +void dml2_copy(struct dml2_context *dst_dml2, + struct dml2_context *src_dml2); +bool dml2_create_copy(struct dml2_context **dst_dml2, + struct dml2_context *src_dml2); /* * dml2_validate - Determines if a display configuration is supported or not. diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile index a2537229ee88..b183ba5a692e 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile @@ -1,8 +1,34 @@ # SPDX-License-Identifier: MIT # # Makefile for the 'dsc' sub-component of DAL. + +ifdef CONFIG_DRM_AMD_DC_FP + +############################################################################### +# DCN20 +############################################################################### +DSC_DCN20 = dcn20_dsc.o + +AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/dsc/dcn20/,$(DSC_DCN20)) + + + + +############################################################################### +# DCN35 +############################################################################### + +DSC_DCN35 = dcn35_dsc.o + +AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/dsc/dcn35/,$(DSC_DCN35)) + + + +endif + DSC = dc_dsc.o rc_calc.o rc_calc_dpi.o AMD_DAL_DSC = $(addprefix $(AMDDALPATH)/dc/dsc/,$(DSC)) AMD_DISPLAY_FILES += $(AMD_DAL_DSC) + diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index e8b5f17beb96..0df6c55eb326 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -331,8 +331,9 @@ bool dc_dsc_parse_dsc_dpcd(const struct dc *dc, int buff_block_size; int buff_size; - if (!dsc_buff_block_size_from_dpcd(dpcd_dsc_basic_data[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT], - &buff_block_size)) + if (!dsc_buff_block_size_from_dpcd( + dpcd_dsc_basic_data[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT] & 0x03, + &buff_block_size)) return false; buff_size = dpcd_dsc_basic_data[DP_DSC_RC_BUF_SIZE - DP_DSC_SUPPORT] + 1; @@ -357,10 +358,15 @@ bool dc_dsc_parse_dsc_dpcd(const struct dc *dc, { int dpcd_throughput = dpcd_dsc_basic_data[DP_DSC_PEAK_THROUGHPUT - DP_DSC_SUPPORT]; + int dsc_throughput_granular_delta; + + dsc_throughput_granular_delta = dpcd_dsc_basic_data[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT] >> 3; + dsc_throughput_granular_delta *= 2; if (!dsc_throughput_from_dpcd(dpcd_throughput & DP_DSC_THROUGHPUT_MODE_0_MASK, &dsc_sink_caps->throughput_mode_0_mps)) return false; + dsc_sink_caps->throughput_mode_0_mps += dsc_throughput_granular_delta; dpcd_throughput = (dpcd_throughput & DP_DSC_THROUGHPUT_MODE_1_MASK) >> DP_DSC_THROUGHPUT_MODE_1_SHIFT; if (!dsc_throughput_from_dpcd(dpcd_throughput, &dsc_sink_caps->throughput_mode_1_mps)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c index c9ae2d8f0096..c9ae2d8f0096 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h index ba869387c3c5..ba869387c3c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c index 71d2dff9986d..71d2dff9986d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.h index 133ad38842cc..133ad38842cc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.h diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dsc.h index 4b27f29d0d80..4b27f29d0d80 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dsc/dsc.h diff --git a/drivers/gpu/drm/amd/display/dc/hwss/Makefile b/drivers/gpu/drm/amd/display/dc/hwss/Makefile index bccd46bd1815..254136f8e3f9 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/Makefile +++ b/drivers/gpu/drm/amd/display/dc/hwss/Makefile @@ -78,7 +78,7 @@ ifdef CONFIG_DRM_AMD_DC_FP # DCN ############################################################################### -HWSS_DCN10 = dcn10_hwseq.o +HWSS_DCN10 = dcn10_hwseq.o dcn10_init.o AMD_DAL_HWSS_DCN10 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn10/,$(HWSS_DCN10)) @@ -86,7 +86,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN10) ############################################################################### -HWSS_DCN20 = dcn20_hwseq.o +HWSS_DCN20 = dcn20_hwseq.o dcn20_init.o AMD_DAL_HWSS_DCN20 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn20/,$(HWSS_DCN20)) @@ -94,7 +94,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN20) ############################################################################### -HWSS_DCN201 = dcn201_hwseq.o +HWSS_DCN201 = dcn201_hwseq.o dcn201_init.o AMD_DAL_HWSS_DCN201 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn201/,$(HWSS_DCN201)) @@ -102,7 +102,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN201) ############################################################################### -HWSS_DCN21 = dcn21_hwseq.o +HWSS_DCN21 = dcn21_hwseq.o dcn21_init.o AMD_DAL_HWSS_DCN21 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn21/,$(HWSS_DCN21)) @@ -114,7 +114,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN21) ############################################################################### -HWSS_DCN30 = dcn30_hwseq.o +HWSS_DCN30 = dcn30_hwseq.o dcn30_init.o AMD_DAL_HWSS_DCN30 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn30/,$(HWSS_DCN30)) @@ -122,7 +122,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN30) ############################################################################### -HWSS_DCN301 = dcn301_hwseq.o +HWSS_DCN301 = dcn301_hwseq.o dcn301_init.o AMD_DAL_HWSS_DCN301 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn301/,$(HWSS_DCN301)) @@ -130,15 +130,17 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN301) ############################################################################### -HWSS_DCN302 = dcn302_hwseq.o +HWSS_DCN302 = dcn302_hwseq.o dcn302_init.o AMD_DAL_HWSS_DCN302 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn302/,$(HWSS_DCN302)) AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN302) + + ############################################################################### -HWSS_DCN303 = dcn303_hwseq.o +HWSS_DCN303 = dcn303_hwseq.o dcn303_init.o AMD_DAL_HWSS_DCN303 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn303/,$(HWSS_DCN303)) @@ -146,7 +148,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN303) ############################################################################### -HWSS_DCN31 = dcn31_hwseq.o +HWSS_DCN31 = dcn31_hwseq.o dcn31_init.o AMD_DAL_HWSS_DCN31 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn31/,$(HWSS_DCN31)) @@ -154,7 +156,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN31) ############################################################################### -HWSS_DCN314 = dcn314_hwseq.o +HWSS_DCN314 = dcn314_hwseq.o dcn314_init.o AMD_DAL_HWSS_DCN314 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn314/,$(HWSS_DCN314)) @@ -162,7 +164,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN314) ############################################################################### -HWSS_DCN32 = dcn32_hwseq.o +HWSS_DCN32 = dcn32_hwseq.o dcn32_init.o AMD_DAL_HWSS_DCN32 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn32/,$(HWSS_DCN32)) @@ -170,7 +172,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN32) ############################################################################### -HWSS_DCN35 = dcn35_hwseq.o +HWSS_DCN35 = dcn35_hwseq.o dcn35_init.o AMD_DAL_HWSS_DCN35 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn35/,$(HWSS_DCN35)) @@ -180,4 +182,4 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN35) ############################################################################### -endif
\ No newline at end of file +endif diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h index 44b4df6469d1..52f045cfd52a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h @@ -682,6 +682,7 @@ struct dce_hwseq_registers { uint32_t DCHUBBUB_ARB_HOSTVM_CNTL; uint32_t HPO_TOP_HW_CONTROL; uint32_t DMU_CLK_CNTL; + uint32_t DCCG_GATE_DISABLE_CNTL4; uint32_t DCCG_GATE_DISABLE_CNTL5; }; /* set field name */ @@ -1199,7 +1200,19 @@ struct dce_hwseq_registers { type PHYBSYMCLK_ROOT_GATE_DISABLE;\ type PHYCSYMCLK_ROOT_GATE_DISABLE;\ type PHYDSYMCLK_ROOT_GATE_DISABLE;\ - type PHYESYMCLK_ROOT_GATE_DISABLE; + type PHYESYMCLK_ROOT_GATE_DISABLE;\ + type DTBCLK_P0_GATE_DISABLE;\ + type DTBCLK_P1_GATE_DISABLE;\ + type DTBCLK_P2_GATE_DISABLE;\ + type DTBCLK_P3_GATE_DISABLE;\ + type DPSTREAMCLK0_GATE_DISABLE;\ + type DPSTREAMCLK1_GATE_DISABLE;\ + type DPSTREAMCLK2_GATE_DISABLE;\ + type DPSTREAMCLK3_GATE_DISABLE;\ + type DPIASYMCLK0_GATE_DISABLE;\ + type DPIASYMCLK1_GATE_DISABLE;\ + type DPIASYMCLK2_GATE_DISABLE;\ + type DPIASYMCLK3_GATE_DISABLE; struct dce_hwseq_shift { HWSEQ_REG_FIELD_LIST(uint8_t) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 960a55e06375..fb328cd06cea 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -55,6 +55,7 @@ #include "audio.h" #include "reg_helper.h" #include "panel_cntl.h" +#include "dc_state_priv.h" #include "dpcd_defs.h" /* include DCE11 register header files */ #include "dce/dce_11_0_d.h" @@ -790,7 +791,7 @@ void dce110_edp_power_control( struct dc_context *ctx = link->ctx; struct bp_transmitter_control cntl = { 0 }; enum bp_result bp_result; - uint8_t panel_instance; + uint8_t pwrseq_instance; if (dal_graphics_object_id_get_connector_id(link->link_enc->connector) @@ -873,7 +874,7 @@ void dce110_edp_power_control( cntl.coherent = false; cntl.lanes_number = LANE_COUNT_FOUR; cntl.hpd_sel = link->link_enc->hpd_source; - panel_instance = link->panel_cntl->inst; + pwrseq_instance = link->panel_cntl->pwrseq_inst; if (ctx->dc->ctx->dmub_srv && ctx->dc->debug.dmub_command_table) { @@ -881,11 +882,11 @@ void dce110_edp_power_control( if (cntl.action == TRANSMITTER_CONTROL_POWER_ON) { bp_result = ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios, LVTMA_CONTROL_POWER_ON, - panel_instance, link->link_powered_externally); + pwrseq_instance, link->link_powered_externally); } else { bp_result = ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios, LVTMA_CONTROL_POWER_OFF, - panel_instance, link->link_powered_externally); + pwrseq_instance, link->link_powered_externally); } } @@ -956,7 +957,7 @@ void dce110_edp_backlight_control( { struct dc_context *ctx = link->ctx; struct bp_transmitter_control cntl = { 0 }; - uint8_t panel_instance; + uint8_t pwrseq_instance; unsigned int pre_T11_delay = OLED_PRE_T11_DELAY; unsigned int post_T7_delay = OLED_POST_T7_DELAY; @@ -1009,7 +1010,7 @@ void dce110_edp_backlight_control( */ /* dc_service_sleep_in_milliseconds(50); */ /*edp 1.2*/ - panel_instance = link->panel_cntl->inst; + pwrseq_instance = link->panel_cntl->pwrseq_inst; if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) { if (!link->dc->config.edp_no_power_sequencing) @@ -1034,11 +1035,11 @@ void dce110_edp_backlight_control( if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios, LVTMA_CONTROL_LCD_BLON, - panel_instance, link->link_powered_externally); + pwrseq_instance, link->link_powered_externally); else ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios, LVTMA_CONTROL_LCD_BLOFF, - panel_instance, link->link_powered_externally); + pwrseq_instance, link->link_powered_externally); } link_transmitter_control(ctx->dc_bios, &cntl); @@ -1596,7 +1597,7 @@ static enum dc_status apply_single_controller_ctx_to_hw( * is constructed with the same sink). Make sure not to override * and link programming on the main. */ - if (pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) { + if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) { pipe_ctx->stream->link->psr_settings.psr_feature_enabled = false; pipe_ctx->stream->link->replay_settings.replay_feature_enabled = false; } @@ -1684,7 +1685,7 @@ static void disable_vga_and_power_gate_all_controllers( true); dc->current_state->res_ctx.pipe_ctx[i].pipe_idx = i; - dc->hwss.disable_plane(dc, + dc->hwss.disable_plane(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]); } } @@ -2124,7 +2125,8 @@ static void dce110_reset_hw_ctx_wrap( BREAK_TO_DEBUGGER(); } pipe_ctx_old->stream_res.tg->funcs->disable_crtc(pipe_ctx_old->stream_res.tg); - pipe_ctx_old->stream->link->phy_state.symclk_ref_cnts.otg = 0; + if (dc_is_hdmi_tmds_signal(pipe_ctx_old->stream->signal)) + pipe_ctx_old->stream->link->phy_state.symclk_ref_cnts.otg = 0; pipe_ctx_old->plane_res.mi->funcs->free_mem_input( pipe_ctx_old->plane_res.mi, dc->current_state->stream_count); @@ -2133,7 +2135,7 @@ static void dce110_reset_hw_ctx_wrap( old_clk)) old_clk->funcs->cs_power_down(old_clk); - dc->hwss.disable_plane(dc, pipe_ctx_old); + dc->hwss.disable_plane(dc, dc->current_state, pipe_ctx_old); pipe_ctx_old->stream = NULL; } @@ -2497,6 +2499,7 @@ static bool wait_for_reset_trigger_to_occur( /* Enable timing synchronization for a group of Timing Generators. */ static void dce110_enable_timing_synchronization( struct dc *dc, + struct dc_state *state, int group_index, int group_size, struct pipe_ctx *grouped_pipes[]) @@ -2590,6 +2593,7 @@ static void init_hw(struct dc *dc) struct dmcu *dmcu; struct dce_hwseq *hws = dc->hwseq; uint32_t backlight = MAX_BACKLIGHT_LEVEL; + uint32_t user_level = MAX_BACKLIGHT_LEVEL; bp = dc->ctx->dc_bios; for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -2639,13 +2643,15 @@ static void init_hw(struct dc *dc) for (i = 0; i < dc->link_count; i++) { struct dc_link *link = dc->links[i]; - if (link->panel_cntl) + if (link->panel_cntl) { backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl); + user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL; + } } abm = dc->res_pool->abm; if (abm != NULL) - abm->funcs->abm_init(abm, backlight); + abm->funcs->abm_init(abm, backlight, user_level); dmcu = dc->res_pool->dmcu; if (dmcu != NULL && abm != NULL) @@ -2842,7 +2848,7 @@ static void dce110_post_unlock_program_front_end( { } -static void dce110_power_down_fe(struct dc *dc, struct pipe_ctx *pipe_ctx) +static void dce110_power_down_fe(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx) { struct dce_hwseq *hws = dc->hwseq; int fe_idx = pipe_ctx->plane_res.mi ? @@ -3115,7 +3121,8 @@ void dce110_disable_link_output(struct dc_link *link, struct dmcu *dmcu = dc->res_pool->dmcu; if (signal == SIGNAL_TYPE_EDP && - link->dc->hwss.edp_backlight_control) + link->dc->hwss.edp_backlight_control && + !link->skip_implict_edp_power_control) link->dc->hwss.edp_backlight_control(link, false); else if (dmcu != NULL && dmcu->funcs->lock_phy) dmcu->funcs->lock_phy(dmcu); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index cdb903116eb7..51dd2ae09b2a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -56,6 +56,7 @@ #include "dc_trace.h" #include "dce/dmub_outbox.h" #include "link.h" +#include "dc_state_priv.h" #define DC_LOGGER \ dc_logger @@ -115,7 +116,7 @@ void dcn10_lock_all_pipes(struct dc *dc, !pipe_ctx->stream || (!pipe_ctx->plane_state && !old_pipe_ctx->plane_state) || !tg->funcs->is_tg_enabled(tg) || - pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM) + dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) continue; if (lock) @@ -1057,7 +1058,8 @@ static void dcn10_reset_back_end_for_pipe( if (pipe_ctx->stream_res.tg->funcs->set_drr) pipe_ctx->stream_res.tg->funcs->set_drr( pipe_ctx->stream_res.tg, NULL); - pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0; + if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal)) + pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0; } for (i = 0; i < dc->res_pool->pipe_count; i++) @@ -1180,7 +1182,9 @@ void dcn10_verify_allow_pstate_change_high(struct dc *dc) } /* trigger HW to start disconnect plane from stream on the next vsync */ -void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx) +void dcn10_plane_atomic_disconnect(struct dc *dc, + struct dc_state *state, + struct pipe_ctx *pipe_ctx) { struct dce_hwseq *hws = dc->hwseq; struct hubp *hubp = pipe_ctx->plane_res.hubp; @@ -1200,7 +1204,7 @@ void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx) mpc->funcs->remove_mpcc(mpc, mpc_tree_params, mpcc_to_remove); // Phantom pipes have OTG disabled by default, so MPCC_STATUS will never assert idle, // so don't wait for MPCC_IDLE in the programming sequence - if (opp != NULL && !pipe_ctx->plane_state->is_phantom) + if (opp != NULL && dc_state_get_pipe_subvp_type(state, pipe_ctx) != SUBVP_PHANTOM) opp->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true; dc->optimized_required = true; @@ -1290,7 +1294,7 @@ void dcn10_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) pipe_ctx->plane_state = NULL; } -void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx) +void dcn10_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx) { struct dce_hwseq *hws = dc->hwseq; DC_LOGGER_INIT(dc->ctx->logger); @@ -1416,12 +1420,12 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true; pipe_ctx->stream_res.opp = dc->res_pool->opps[i]; - hws->funcs.plane_atomic_disconnect(dc, pipe_ctx); + hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx); if (tg->funcs->is_tg_enabled(tg)) tg->funcs->unlock(tg); - dc->hwss.disable_plane(dc, pipe_ctx); + dc->hwss.disable_plane(dc, context, pipe_ctx); pipe_ctx->stream_res.tg = NULL; pipe_ctx->plane_res.hubp = NULL; @@ -1486,6 +1490,7 @@ void dcn10_init_hw(struct dc *dc) struct dc_bios *dcb = dc->ctx->dc_bios; struct resource_pool *res_pool = dc->res_pool; uint32_t backlight = MAX_BACKLIGHT_LEVEL; + uint32_t user_level = MAX_BACKLIGHT_LEVEL; bool is_optimized_init_done = false; if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) @@ -1583,12 +1588,14 @@ void dcn10_init_hw(struct dc *dc) for (i = 0; i < dc->link_count; i++) { struct dc_link *link = dc->links[i]; - if (link->panel_cntl) + if (link->panel_cntl) { backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl); + user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL; + } } if (abm != NULL) - abm->funcs->abm_init(abm, backlight); + abm->funcs->abm_init(abm, backlight, user_level); if (dmcu != NULL && !dmcu->auto_load_dmcu) dmcu->funcs->dmcu_init(dmcu); @@ -2262,6 +2269,7 @@ void dcn10_enable_vblanks_synchronization( void dcn10_enable_timing_synchronization( struct dc *dc, + struct dc_state *state, int group_index, int group_size, struct pipe_ctx *grouped_pipes[]) @@ -2276,7 +2284,7 @@ void dcn10_enable_timing_synchronization( DC_SYNC_INFO("Setting up OTG reset trigger\n"); for (i = 1; i < group_size; i++) { - if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM) + if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM) continue; opp = grouped_pipes[i]->stream_res.opp; @@ -2296,14 +2304,14 @@ void dcn10_enable_timing_synchronization( if (grouped_pipes[i]->stream == NULL) continue; - if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM) + if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM) continue; grouped_pipes[i]->stream->vblank_synchronized = false; } for (i = 1; i < group_size; i++) { - if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM) + if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM) continue; grouped_pipes[i]->stream_res.tg->funcs->enable_reset_trigger( @@ -2317,11 +2325,11 @@ void dcn10_enable_timing_synchronization( * synchronized. Look at last pipe programmed to reset. */ - if (grouped_pipes[1]->stream && grouped_pipes[1]->stream->mall_stream_config.type != SUBVP_PHANTOM) + if (grouped_pipes[1]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[1]) != SUBVP_PHANTOM) wait_for_reset_trigger_to_occur(dc_ctx, grouped_pipes[1]->stream_res.tg); for (i = 1; i < group_size; i++) { - if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM) + if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM) continue; grouped_pipes[i]->stream_res.tg->funcs->disable_reset_trigger( @@ -2329,7 +2337,7 @@ void dcn10_enable_timing_synchronization( } for (i = 1; i < group_size; i++) { - if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM) + if (dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM) continue; opp = grouped_pipes[i]->stream_res.opp; @@ -3021,7 +3029,7 @@ void dcn10_post_unlock_program_front_end( for (i = 0; i < dc->res_pool->pipe_count; i++) if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable) - dc->hwss.disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]); + dc->hwss.disable_plane(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]); for (i = 0; i < dc->res_pool->pipe_count; i++) if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable) { @@ -3068,7 +3076,7 @@ void dcn10_prepare_bandwidth( context, false); - dc->wm_optimized_required = hubbub->funcs->program_watermarks(hubbub, + dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, true); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h index ef6d56da417c..bc5dd68a2408 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h @@ -75,7 +75,7 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx); void dcn10_reset_hw_ctx_wrap( struct dc *dc, struct dc_state *context); -void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx); +void dcn10_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx); void dcn10_lock_all_pipes( struct dc *dc, struct dc_state *context, @@ -108,13 +108,16 @@ void dcn10_power_down_on_boot(struct dc *dc); enum dc_status dce110_apply_ctx_to_hw( struct dc *dc, struct dc_state *context); -void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx); +void dcn10_plane_atomic_disconnect(struct dc *dc, + struct dc_state *state, + struct pipe_ctx *pipe_ctx); void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data); void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx); void dce110_power_down(struct dc *dc); void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context); void dcn10_enable_timing_synchronization( struct dc *dc, + struct dc_state *state, int group_index, int group_size, struct pipe_ctx *grouped_pipes[]); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c index a5bdac79a744..a5bdac79a744 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.h index 8c6fd7b844a4..8c6fd7b844a4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.h diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 608221b0dd5d..bc71a9b058fe 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -55,6 +55,7 @@ #include "inc/link_enc_cfg.h" #include "link_hwss.h" #include "link.h" +#include "dc_state_priv.h" #define DC_LOGGER \ dc_logger @@ -623,9 +624,9 @@ void dcn20_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) } -void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx) +void dcn20_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx) { - bool is_phantom = pipe_ctx->plane_state && pipe_ctx->plane_state->is_phantom; + bool is_phantom = dc_state_get_pipe_subvp_type(state, pipe_ctx) == SUBVP_PHANTOM; struct timing_generator *tg = is_phantom ? pipe_ctx->stream_res.tg : NULL; DC_LOGGER_INIT(dc->ctx->logger); @@ -847,7 +848,7 @@ enum dc_status dcn20_enable_stream_timing( /* TODO enable stream if timing changed */ /* TODO unblank stream if DP */ - if (pipe_ctx->stream && pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { if (pipe_ctx->stream_res.tg && pipe_ctx->stream_res.tg->funcs->phantom_crtc_post_enable) pipe_ctx->stream_res.tg->funcs->phantom_crtc_post_enable(pipe_ctx->stream_res.tg); } @@ -1368,8 +1369,14 @@ void dcn20_pipe_control_lock( } } -static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx *new_pipe) +static void dcn20_detect_pipe_changes(struct dc_state *old_state, + struct dc_state *new_state, + struct pipe_ctx *old_pipe, + struct pipe_ctx *new_pipe) { + bool old_is_phantom = dc_state_get_pipe_subvp_type(old_state, old_pipe) == SUBVP_PHANTOM; + bool new_is_phantom = dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM; + new_pipe->update_flags.raw = 0; /* If non-phantom pipe is being transitioned to a phantom pipe, @@ -1379,8 +1386,8 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx * be different). The post_unlock sequence will set the correct * update flags to enable the phantom pipe. */ - if (old_pipe->plane_state && !old_pipe->plane_state->is_phantom && - new_pipe->plane_state && new_pipe->plane_state->is_phantom) { + if (old_pipe->plane_state && !old_is_phantom && + new_pipe->plane_state && new_is_phantom) { new_pipe->update_flags.bits.disable = 1; return; } @@ -1400,6 +1407,10 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx new_pipe->update_flags.bits.scaler = 1; new_pipe->update_flags.bits.viewport = 1; new_pipe->update_flags.bits.det_size = 1; + if (new_pipe->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE && + new_pipe->stream_res.test_pattern_params.width != 0 && + new_pipe->stream_res.test_pattern_params.height != 0) + new_pipe->update_flags.bits.test_pattern_changed = 1; if (!new_pipe->top_pipe && !new_pipe->prev_odm_pipe) { new_pipe->update_flags.bits.odm = 1; new_pipe->update_flags.bits.global_sync = 1; @@ -1412,14 +1423,14 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx * The remove-add sequence of the phantom pipe always results in the pipe * being blanked in enable_stream_timing (DPG). */ - if (new_pipe->stream && new_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) + if (new_pipe->stream && dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM) new_pipe->update_flags.bits.enable = 1; /* Phantom pipes are effectively disabled, if the pipe was previously phantom * we have to enable */ - if (old_pipe->plane_state && old_pipe->plane_state->is_phantom && - new_pipe->plane_state && !new_pipe->plane_state->is_phantom) + if (old_pipe->plane_state && old_is_phantom && + new_pipe->plane_state && !new_is_phantom) new_pipe->update_flags.bits.enable = 1; if (old_pipe->plane_state && !new_pipe->plane_state) { @@ -1556,6 +1567,7 @@ static void dcn20_update_dchubp_dpp( struct dc_plane_state *plane_state = pipe_ctx->plane_state; struct dccg *dccg = dc->res_pool->dccg; bool viewport_changed = false; + enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe_ctx); if (pipe_ctx->update_flags.bits.dppclk) dpp->funcs->dpp_dppclk_control(dpp, false, true); @@ -1701,7 +1713,7 @@ static void dcn20_update_dchubp_dpp( pipe_ctx->update_flags.bits.plane_changed || plane_state->update_flags.bits.addr_update) { if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) && - pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN) { + pipe_mall_type == SUBVP_MAIN) { union block_sequence_params params; params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv; @@ -1715,7 +1727,7 @@ static void dcn20_update_dchubp_dpp( if (pipe_ctx->update_flags.bits.enable) hubp->funcs->set_blank(hubp, false); /* If the stream paired with this plane is phantom, the plane is also phantom */ - if (pipe_ctx->stream && pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM + if (pipe_ctx->stream && pipe_mall_type == SUBVP_PHANTOM && hubp->funcs->phantom_hubp_post_enable) hubp->funcs->phantom_hubp_post_enable(hubp); } @@ -1773,7 +1785,7 @@ static void dcn20_program_pipe( pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width); - if (pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) + if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); pipe_ctx->stream_res.tg->funcs->set_vtg_params( @@ -1870,6 +1882,42 @@ static void dcn20_program_pipe( } } +static void update_vmin_vmax_fams(struct dc *dc, + struct dc_state *context) +{ + uint32_t i; + struct drr_params params = {0}; + bool subvp_in_use = resource_subvp_in_use(dc, context); + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (resource_is_pipe_type(pipe, OTG_MASTER) && + ((subvp_in_use && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM && + pipe->stream->allow_freesync) || (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && pipe->stream->fpo_in_use))) { + if (!pipe->stream->vrr_active_variable && !pipe->stream->vrr_active_fixed) { + struct timing_generator *tg = context->res_ctx.pipe_ctx[i].stream_res.tg; + + /* DRR should be configured already if we're in active variable + * or active fixed, so only program if we're not in this state + */ + params.vertical_total_min = pipe->stream->timing.v_total; + params.vertical_total_max = pipe->stream->timing.v_total; + tg->funcs->set_drr(tg, ¶ms); + } + } else { + if (resource_is_pipe_type(pipe, OTG_MASTER) && + !pipe->stream->vrr_active_variable && + !pipe->stream->vrr_active_fixed) { + struct timing_generator *tg = context->res_ctx.pipe_ctx[i].stream_res.tg; + params.vertical_total_min = 0; + params.vertical_total_max = 0; + tg->funcs->set_drr(tg, ¶ms); + } + } + } +} + void dcn20_program_front_end_for_ctx( struct dc *dc, struct dc_state *context) @@ -1877,6 +1925,8 @@ void dcn20_program_front_end_for_ctx( int i; struct dce_hwseq *hws = dc->hwseq; DC_LOGGER_INIT(dc->ctx->logger); + unsigned int prev_hubp_count = 0; + unsigned int hubp_count = 0; if (resource_is_pipe_topology_changed(dc->current_state, context)) resource_log_pipe_topology_update(dc, context); @@ -1894,9 +1944,23 @@ void dcn20_program_front_end_for_ctx( } } + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (dc->current_state->res_ctx.pipe_ctx[i].plane_state) + prev_hubp_count++; + if (context->res_ctx.pipe_ctx[i].plane_state) + hubp_count++; + } + + if (prev_hubp_count == 0 && hubp_count > 0) { + if (dc->res_pool->hubbub->funcs->force_pstate_change_control) + dc->res_pool->hubbub->funcs->force_pstate_change_control( + dc->res_pool->hubbub, true, false); + udelay(500); + } + /* Set pipe update flags and lock pipes */ for (i = 0; i < dc->res_pool->pipe_count; i++) - dcn20_detect_pipe_changes(&dc->current_state->res_ctx.pipe_ctx[i], + dcn20_detect_pipe_changes(dc->current_state, context, &dc->current_state->res_ctx.pipe_ctx[i], &context->res_ctx.pipe_ctx[i]); /* When disabling phantom pipes, turn on phantom OTG first (so we can get double @@ -1906,15 +1970,16 @@ void dcn20_program_front_end_for_ctx( struct dc_stream_state *stream = dc->current_state->res_ctx.pipe_ctx[i].stream; if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable && stream && - dc->current_state->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { + dc_state_get_pipe_subvp_type(dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) { struct timing_generator *tg = dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg; if (tg->funcs->enable_crtc) { if (dc->hwss.blank_phantom) { int main_pipe_width, main_pipe_height; + struct dc_stream_state *phantom_stream = dc_state_get_paired_subvp_stream(dc->current_state, dc->current_state->res_ctx.pipe_ctx[i].stream); - main_pipe_width = dc->current_state->res_ctx.pipe_ctx[i].stream->mall_stream_config.paired_stream->dst.width; - main_pipe_height = dc->current_state->res_ctx.pipe_ctx[i].stream->mall_stream_config.paired_stream->dst.height; + main_pipe_width = phantom_stream->dst.width; + main_pipe_height = phantom_stream->dst.height; dc->hwss.blank_phantom(dc, tg, main_pipe_width, main_pipe_height); } tg->funcs->enable_crtc(tg); @@ -1929,6 +1994,7 @@ void dcn20_program_front_end_for_ctx( && context->res_ctx.pipe_ctx[i].stream) hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true); + update_vmin_vmax_fams(dc, context); /* Disconnect mpcc */ for (i = 0; i < dc->res_pool->pipe_count; i++) @@ -1943,9 +2009,9 @@ void dcn20_program_front_end_for_ctx( * DET allocation. */ if (hubbub->funcs->program_det_size && (context->res_ctx.pipe_ctx[i].update_flags.bits.disable || - (context->res_ctx.pipe_ctx[i].plane_state && context->res_ctx.pipe_ctx[i].plane_state->is_phantom))) + (context->res_ctx.pipe_ctx[i].plane_state && dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM))) hubbub->funcs->program_det_size(hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); - hws->funcs.plane_atomic_disconnect(dc, &dc->current_state->res_ctx.pipe_ctx[i]); + hws->funcs.plane_atomic_disconnect(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]); DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx); } @@ -1968,7 +2034,7 @@ void dcn20_program_front_end_for_ctx( * but the MPO still exists until the double buffered update of the main pipe so we * will get a frame of underflow if the phantom pipe is programmed here. */ - if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) + if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) dcn20_program_pipe(dc, pipe, context); } @@ -2018,7 +2084,7 @@ void dcn20_post_unlock_program_front_end( for (i = 0; i < dc->res_pool->pipe_count; i++) if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable) - dc->hwss.disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]); + dc->hwss.disable_plane(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]); /* * If we are enabling a pipe, we need to wait for pending clear as this is a critical @@ -2030,7 +2096,7 @@ void dcn20_post_unlock_program_front_end( struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; // Don't check flip pending on phantom pipes if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable && - pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) { + dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) { struct hubp *hubp = pipe->plane_res.hubp; int j = 0; for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_US / polling_interval_us @@ -2039,6 +2105,10 @@ void dcn20_post_unlock_program_front_end( } } + if (dc->res_pool->hubbub->funcs->force_pstate_change_control) + dc->res_pool->hubbub->funcs->force_pstate_change_control( + dc->res_pool->hubbub, false, false); + for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; @@ -2049,7 +2119,7 @@ void dcn20_post_unlock_program_front_end( * programming sequence). */ while (pipe) { - if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { /* When turning on the phantom pipe we want to run through the * entire enable sequence, so apply all the "enable" flags. */ @@ -2119,17 +2189,17 @@ void dcn20_prepare_bandwidth( struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; // At optimize don't restore the original watermark value - if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) { + if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) { context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 4U * 1000U * 1000U * 1000U; break; } } /* program dchubbub watermarks: - * For assigning wm_optimized_required, use |= operator since we don't want + * For assigning optimized_required, use |= operator since we don't want * to clear the value if the optimize has not happened yet */ - dc->wm_optimized_required |= hubbub->funcs->program_watermarks(hubbub, + dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, false); @@ -2142,10 +2212,10 @@ void dcn20_prepare_bandwidth( if (hubbub->funcs->program_compbuf_size) { if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes) { compbuf_size_kb = context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes; - dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes); + dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes); } else { compbuf_size_kb = context->bw_ctx.bw.dcn.compbuf_size_kb; - dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb); + dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb); } hubbub->funcs->program_compbuf_size(hubbub, compbuf_size_kb, false); @@ -2163,7 +2233,7 @@ void dcn20_optimize_bandwidth( struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; // At optimize don't need to restore the original watermark value - if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) { + if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) { context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 4U * 1000U * 1000U * 1000U; break; } @@ -2197,7 +2267,8 @@ void dcn20_optimize_bandwidth( dc->clk_mgr, context, true); - if (context->bw_ctx.bw.dcn.clk.zstate_support == DCN_ZSTATE_SUPPORT_ALLOW) { + if (context->bw_ctx.bw.dcn.clk.zstate_support == DCN_ZSTATE_SUPPORT_ALLOW && + !dc->debug.disable_extblankadj) { for (i = 0; i < dc->res_pool->pipe_count; ++i) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; @@ -2590,7 +2661,8 @@ static void dcn20_reset_back_end_for_pipe( * the case where the same symclk is shared across multiple otg * instances */ - link->phy_state.symclk_ref_cnts.otg = 0; + if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal)) + link->phy_state.symclk_ref_cnts.otg = 0; if (link->phy_state.symclk_state == SYMCLK_ON_TX_OFF) { link_hwss->disable_link_output(link, &pipe_ctx->link_res, pipe_ctx->stream->signal); @@ -2923,7 +2995,7 @@ void dcn20_fpga_init_hw(struct dc *dc) dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true; pipe_ctx->stream_res.opp = dc->res_pool->opps[i]; /*to do*/ - hws->funcs.plane_atomic_disconnect(dc, pipe_ctx); + hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx); } /* initialize DWB pointer to MCIF_WB */ @@ -2940,7 +3012,7 @@ void dcn20_fpga_init_hw(struct dc *dc) for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - dc->hwss.disable_plane(dc, pipe_ctx); + dc->hwss.disable_plane(dc, context, pipe_ctx); pipe_ctx->stream_res.tg = NULL; pipe_ctx->plane_res.hubp = NULL; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h index ab02e4e9c8c2..b94c85340abf 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h @@ -52,7 +52,7 @@ void dcn20_program_output_csc(struct dc *dc, void dcn20_enable_stream(struct pipe_ctx *pipe_ctx); void dcn20_unblank_stream(struct pipe_ctx *pipe_ctx, struct dc_link_settings *link_settings); -void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx); +void dcn20_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx); void dcn20_disable_pixel_data( struct dc *dc, struct pipe_ctx *pipe_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c index 884e3e323338..884e3e323338 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.h index 12277797cd71..12277797cd71 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.h diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c index d3fe6092f50e..d5769f38874f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c @@ -320,7 +320,7 @@ void dcn201_init_hw(struct dc *dc) res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true; pipe_ctx->stream_res.opp = res_pool->opps[i]; /*To do: number of MPCC != number of opp*/ - hws->funcs.plane_atomic_disconnect(dc, pipe_ctx); + hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx); } /* initialize DWB pointer to MCIF_WB */ @@ -337,7 +337,7 @@ void dcn201_init_hw(struct dc *dc) for (i = 0; i < res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - dc->hwss.disable_plane(dc, pipe_ctx); + dc->hwss.disable_plane(dc, context, pipe_ctx); pipe_ctx->stream_res.tg = NULL; pipe_ctx->plane_res.hubp = NULL; @@ -369,7 +369,9 @@ void dcn201_init_hw(struct dc *dc) } /* trigger HW to start disconnect plane from stream on the next vsync */ -void dcn201_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx) +void dcn201_plane_atomic_disconnect(struct dc *dc, + struct dc_state *state, + struct pipe_ctx *pipe_ctx) { struct dce_hwseq *hws = dc->hwseq; struct hubp *hubp = pipe_ctx->plane_res.hubp; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h index 26cd62be6418..6a50a9894be6 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h @@ -33,7 +33,7 @@ void dcn201_init_hw(struct dc *dc); void dcn201_unblank_stream(struct pipe_ctx *pipe_ctx, struct dc_link_settings *link_settings); void dcn201_update_plane_addr(const struct dc *dc, struct pipe_ctx *pipe_ctx); -void dcn201_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx); +void dcn201_plane_atomic_disconnect(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx); void dcn201_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx); void dcn201_set_cursor_attribute(struct pipe_ctx *pipe_ctx); void dcn201_pipe_control_lock( diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c index a13bf6c9386e..a13bf6c9386e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.h index 1168887b033d..1168887b033d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.h diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c index 467812cf3368..8e88dcaf88f5 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c @@ -137,7 +137,8 @@ void dcn21_PLAT_58856_wa(struct dc_state *context, struct pipe_ctx *pipe_ctx) pipe_ctx->stream->dpms_off = true; } -static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst) +static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, + uint32_t option, uint32_t panel_inst, uint32_t pwrseq_inst) { union dmub_rb_cmd cmd; struct dc_context *dc = abm->ctx; @@ -147,12 +148,13 @@ static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t optio cmd.abm_set_pipe.header.type = DMUB_CMD__ABM; cmd.abm_set_pipe.header.sub_type = DMUB_CMD__ABM_SET_PIPE; cmd.abm_set_pipe.abm_set_pipe_data.otg_inst = otg_inst; + cmd.abm_set_pipe.abm_set_pipe_data.pwrseq_inst = pwrseq_inst; cmd.abm_set_pipe.abm_set_pipe_data.set_pipe_option = option; cmd.abm_set_pipe.abm_set_pipe_data.panel_inst = panel_inst; cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary; cmd.abm_set_pipe.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pipe_data); - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } @@ -171,7 +173,7 @@ static void dmub_abm_set_backlight(struct dc_context *dc, uint32_t backlight_pwm cmd.abm_set_backlight.abm_set_backlight_data.panel_mask = (0x01 << panel_inst); cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data); - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx) @@ -179,7 +181,6 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx) struct abm *abm = pipe_ctx->stream_res.abm; uint32_t otg_inst = pipe_ctx->stream_res.tg->inst; struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl; - struct dmcu *dmcu = pipe_ctx->stream->ctx->dc->res_pool->dmcu; if (dmcu) { @@ -190,9 +191,13 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx) if (abm && panel_cntl) { if (abm->funcs && abm->funcs->set_pipe_ex) { abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_IMMEDIATELY_DISABLE, - panel_cntl->inst); + panel_cntl->inst, panel_cntl->pwrseq_inst); } else { - dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_IMMEDIATELY_DISABLE, panel_cntl->inst); + dmub_abm_set_pipe(abm, + otg_inst, + SET_ABM_PIPE_IMMEDIATELY_DISABLE, + panel_cntl->inst, + panel_cntl->pwrseq_inst); } panel_cntl->funcs->store_backlight_level(panel_cntl); } @@ -212,9 +217,16 @@ void dcn21_set_pipe(struct pipe_ctx *pipe_ctx) if (abm && panel_cntl) { if (abm->funcs && abm->funcs->set_pipe_ex) { - abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst); + abm->funcs->set_pipe_ex(abm, + otg_inst, + SET_ABM_PIPE_NORMAL, + panel_cntl->inst, + panel_cntl->pwrseq_inst); } else { - dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst); + dmub_abm_set_pipe(abm, otg_inst, + SET_ABM_PIPE_NORMAL, + panel_cntl->inst, + panel_cntl->pwrseq_inst); } } } @@ -237,9 +249,17 @@ bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx, if (abm && panel_cntl) { if (abm->funcs && abm->funcs->set_pipe_ex) { - abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst); + abm->funcs->set_pipe_ex(abm, + otg_inst, + SET_ABM_PIPE_NORMAL, + panel_cntl->inst, + panel_cntl->pwrseq_inst); } else { - dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst); + dmub_abm_set_pipe(abm, + otg_inst, + SET_ABM_PIPE_NORMAL, + panel_cntl->inst, + panel_cntl->pwrseq_inst); } } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c index 18249c6b6d81..18249c6b6d81 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.h index 3ed24292648a..3ed24292648a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.h diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c index d71faf2ecd41..c34c13e1e0a4 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c @@ -51,7 +51,7 @@ #include "dcn20/dcn20_hwseq.h" #include "dcn30/dcn30_resource.h" #include "link.h" - +#include "dc_state_priv.h" @@ -367,6 +367,10 @@ void dcn30_enable_writeback( DC_LOG_DWB("%s dwb_pipe_inst = %d, mpcc_inst = %d",\ __func__, wb_info->dwb_pipe_inst,\ wb_info->mpcc_inst); + + /* Warmup interface */ + dcn30_mmhubbub_warmup(dc, 1, wb_info); + /* Update writeback pipe */ dcn30_set_writeback(dc, wb_info, context); @@ -472,6 +476,7 @@ void dcn30_init_hw(struct dc *dc) int i; int edp_num; uint32_t backlight = MAX_BACKLIGHT_LEVEL; + uint32_t user_level = MAX_BACKLIGHT_LEVEL; if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); @@ -608,13 +613,15 @@ void dcn30_init_hw(struct dc *dc) for (i = 0; i < dc->link_count; i++) { struct dc_link *link = dc->links[i]; - if (link->panel_cntl) + if (link->panel_cntl) { backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl); + user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL; + } } for (i = 0; i < dc->res_pool->pipe_count; i++) { if (abms[i] != NULL) - abms[i]->funcs->abm_init(abms[i], backlight); + abms[i]->funcs->abm_init(abms[i], backlight, user_level); } /* power AFMT HDMI memory TODO: may move to dis/en output save power*/ @@ -750,7 +757,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable) cmd.mall.header.sub_type = DMUB_CMD__MALL_ACTION_NO_DF_REQ; cmd.mall.header.payload_bytes = sizeof(cmd.mall) - sizeof(cmd.mall.header); - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); return true; } @@ -872,7 +879,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable) cmd.mall.cursor_height = cursor_attr.height; cmd.mall.cursor_pitch = cursor_attr.pitch; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); /* Use copied cursor, and it's okay to not switch back */ cursor_attr.address.quad_part = cmd.mall.cursor_copy_dst.quad_part; @@ -888,7 +895,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable) cmd.mall.tmr_scale = tmr_scale; cmd.mall.debug_bits = dc->debug.mall_error_as_fatal; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); return true; } @@ -905,7 +912,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable) cmd.mall.header.payload_bytes = sizeof(cmd.mall) - sizeof(cmd.mall.header); - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } @@ -962,7 +969,7 @@ void dcn30_hardware_release(struct dc *dc) if (!pipe->stream) continue; - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + if (dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_MAIN) { subvp_in_use = true; break; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c index 9894caedffed..9894caedffed 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.h index c280ff90bfa3..c280ff90bfa3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c index 6477009ce065..6477009ce065 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.h index 0bca48ccbfa2..0bca48ccbfa2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.c index 637f9514d37b..637f9514d37b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.h index 899587b93aa1..899587b93aa1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.c index edb4d68b8187..edb4d68b8187 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.h index 4949981126d7..4949981126d7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.h diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c index 97798cee876e..7423880fabb6 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c @@ -96,7 +96,8 @@ static void enable_memory_low_power(struct dc *dc) if (dc->debug.enable_mem_low_power.bits.vpg && dc->res_pool->stream_enc[0]->vpg->funcs->vpg_powerdown) { // Power down VPGs for (i = 0; i < dc->res_pool->stream_enc_count; i++) - dc->res_pool->stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->stream_enc[i]->vpg); + if (dc->res_pool->stream_enc[i]->vpg) + dc->res_pool->stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->stream_enc[i]->vpg); #if defined(CONFIG_DRM_AMD_DC_FP) for (i = 0; i < dc->res_pool->hpo_dp_stream_enc_count; i++) dc->res_pool->hpo_dp_stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->hpo_dp_stream_enc[i]->vpg); @@ -112,6 +113,7 @@ void dcn31_init_hw(struct dc *dc) struct dc_bios *dcb = dc->ctx->dc_bios; struct resource_pool *res_pool = dc->res_pool; uint32_t backlight = MAX_BACKLIGHT_LEVEL; + uint32_t user_level = MAX_BACKLIGHT_LEVEL; int i; if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) @@ -223,13 +225,15 @@ void dcn31_init_hw(struct dc *dc) for (i = 0; i < dc->link_count; i++) { struct dc_link *link = dc->links[i]; - if (link->panel_cntl) + if (link->panel_cntl) { backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl); + user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL; + } } for (i = 0; i < dc->res_pool->pipe_count; i++) { if (abms[i] != NULL) - abms[i]->funcs->abm_init(abms[i], backlight); + abms[i]->funcs->abm_init(abms[i], backlight, user_level); } /* power AFMT HDMI memory TODO: may move to dis/en output save power*/ @@ -415,7 +419,7 @@ void dcn31_z10_save_init(struct dc *dc) cmd.dcn_restore.header.type = DMUB_CMD__IDLE_OPT; cmd.dcn_restore.header.sub_type = DMUB_CMD__IDLE_OPT_DCN_SAVE_INIT; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } void dcn31_z10_restore(const struct dc *dc) @@ -433,7 +437,7 @@ void dcn31_z10_restore(const struct dc *dc) cmd.dcn_restore.header.type = DMUB_CMD__IDLE_OPT; cmd.dcn_restore.header.sub_type = DMUB_CMD__IDLE_OPT_DCN_RESTORE; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } void dcn31_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool power_on) @@ -523,7 +527,8 @@ static void dcn31_reset_back_end_for_pipe( if (pipe_ctx->stream_res.tg->funcs->set_odm_bypass) pipe_ctx->stream_res.tg->funcs->set_odm_bypass( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); - pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0; + if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal)) + pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0; if (pipe_ctx->stream_res.tg->funcs->set_drr) pipe_ctx->stream_res.tg->funcs->set_drr( diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c index 669f524bd064..669f524bd064 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.h index a3db08c8bd35..a3db08c8bd35 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c index ccb7e317e86a..ccb7e317e86a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.h index 8f92e66577cf..8f92e66577cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.h diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index c1a9b746c43f..6c9299c7683d 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -51,6 +51,7 @@ #include "dcn32/dcn32_resource.h" #include "link.h" #include "../dcn20/dcn20_hwseq.h" +#include "dc_state_priv.h" #define DC_LOGGER_INIT(logger) @@ -277,7 +278,7 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable) cmd.cab.header.sub_type = DMUB_CMD__CAB_NO_DCN_REQ; cmd.cab.header.payload_bytes = sizeof(cmd.cab) - sizeof(cmd.cab.header); - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); return true; } @@ -311,7 +312,7 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable) cmd.cab.header.payload_bytes = sizeof(cmd.cab) - sizeof(cmd.cab.header); cmd.cab.cab_alloc_ways = (uint8_t)ways; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); return true; } @@ -327,7 +328,7 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable) cmd.cab.header.payload_bytes = sizeof(cmd.cab) - sizeof(cmd.cab.header); - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } @@ -348,8 +349,7 @@ void dcn32_commit_subvp_config(struct dc *dc, struct dc_state *context) for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - if (pipe_ctx->stream && pipe_ctx->stream->mall_stream_config.paired_stream && - pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN) { + if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) { // There is at least 1 SubVP pipe, so enable SubVP enable_subvp = true; break; @@ -375,18 +375,20 @@ void dcn32_subvp_pipe_control_lock(struct dc *dc, bool subvp_immediate_flip = false; bool subvp_in_use = false; struct pipe_ctx *pipe; + enum mall_stream_type pipe_mall_type = SUBVP_NONE; for (i = 0; i < dc->res_pool->pipe_count; i++) { pipe = &context->res_ctx.pipe_ctx[i]; + pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe); - if (pipe->stream && pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + if (pipe->stream && pipe->plane_state && pipe_mall_type == SUBVP_MAIN) { subvp_in_use = true; break; } } if (top_pipe_to_program && top_pipe_to_program->stream && top_pipe_to_program->plane_state) { - if (top_pipe_to_program->stream->mall_stream_config.type == SUBVP_MAIN && + if (dc_state_get_pipe_subvp_type(context, top_pipe_to_program) == SUBVP_MAIN && top_pipe_to_program->plane_state->flip_immediate) subvp_immediate_flip = true; } @@ -398,7 +400,7 @@ void dcn32_subvp_pipe_control_lock(struct dc *dc, if (!lock) { for (i = 0; i < dc->res_pool->pipe_count; i++) { pipe = &context->res_ctx.pipe_ctx[i]; - if (pipe->stream && pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_MAIN && + if (pipe->stream && pipe->plane_state && pipe_mall_type == SUBVP_MAIN && should_lock_all_pipes) pipe->stream_res.tg->funcs->wait_for_state(pipe->stream_res.tg, CRTC_STATE_VBLANK); } @@ -416,14 +418,7 @@ void dcn32_subvp_pipe_control_lock_fast(union block_sequence_params *params) { struct dc *dc = params->subvp_pipe_control_lock_fast_params.dc; bool lock = params->subvp_pipe_control_lock_fast_params.lock; - struct pipe_ctx *pipe_ctx = params->subvp_pipe_control_lock_fast_params.pipe_ctx; - bool subvp_immediate_flip = false; - - if (pipe_ctx && pipe_ctx->stream && pipe_ctx->plane_state) { - if (pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN && - pipe_ctx->plane_state->flip_immediate) - subvp_immediate_flip = true; - } + bool subvp_immediate_flip = params->subvp_pipe_control_lock_fast_params.subvp_immediate_flip; // Don't need to lock for DRR VSYNC flips -- FW will wait for DRR pending update cleared. if (subvp_immediate_flip) { @@ -609,7 +604,7 @@ void dcn32_update_force_pstate(struct dc *dc, struct dc_state *context) struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; struct hubp *hubp = pipe->plane_res.hubp; - if (!pipe->stream || !(pipe->stream->mall_stream_config.type == SUBVP_MAIN || + if (!pipe->stream || !(dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN || pipe->stream->fpo_in_use)) { if (hubp && hubp->funcs->hubp_update_force_pstate_disallow) hubp->funcs->hubp_update_force_pstate_disallow(hubp, false); @@ -624,7 +619,7 @@ void dcn32_update_force_pstate(struct dc *dc, struct dc_state *context) struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; struct hubp *hubp = pipe->plane_res.hubp; - if (pipe->stream && (pipe->stream->mall_stream_config.type == SUBVP_MAIN || + if (pipe->stream && (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN || pipe->stream->fpo_in_use)) { if (hubp && hubp->funcs->hubp_update_force_pstate_disallow) hubp->funcs->hubp_update_force_pstate_disallow(hubp, true); @@ -671,8 +666,8 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context) if (cursor_size > 16384) cache_cursor = true; - if (pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - hubp->funcs->hubp_update_mall_sel(hubp, 1, false); + if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { + hubp->funcs->hubp_update_mall_sel(hubp, 1, false); } else { // MALL not supported with Stereo3D hubp->funcs->hubp_update_mall_sel(hubp, @@ -714,9 +709,8 @@ void dcn32_program_mall_pipe_config(struct dc *dc, struct dc_state *context) * see if CURSOR_REQ_MODE will be back to 1 for SubVP * when it should be 0 for MPO */ - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) hubp->funcs->hubp_prepare_subvp_buffering(hubp, true); - } } } } @@ -759,6 +753,7 @@ void dcn32_init_hw(struct dc *dc) int i; int edp_num; uint32_t backlight = MAX_BACKLIGHT_LEVEL; + uint32_t user_level = MAX_BACKLIGHT_LEVEL; if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); @@ -913,13 +908,15 @@ void dcn32_init_hw(struct dc *dc) for (i = 0; i < dc->link_count; i++) { struct dc_link *link = dc->links[i]; - if (link->panel_cntl) + if (link->panel_cntl) { backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl); + user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL; + } } for (i = 0; i < dc->res_pool->pipe_count; i++) { if (abms[i] != NULL && abms[i]->funcs != NULL) - abms[i]->funcs->abm_init(abms[i], backlight); + abms[i]->funcs->abm_init(abms[i], backlight, user_level); } /* power AFMT HDMI memory TODO: may move to dis/en output save power*/ @@ -995,9 +992,22 @@ static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream, static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) { struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc; + struct dc *dc = pipe_ctx->stream->ctx->dc; struct dc_stream_state *stream = pipe_ctx->stream; struct pipe_ctx *odm_pipe; int opp_cnt = 1; + struct dccg *dccg = dc->res_pool->dccg; + /* It has been found that when DSCCLK is lower than 16Mhz, we will get DCN + * register access hung. When DSCCLk is based on refclk, DSCCLk is always a + * fixed value higher than 16Mhz so the issue doesn't occur. When DSCCLK is + * generated by DTO, DSCCLK would be based on 1/3 dispclk. For small timings + * with DSC such as 480p60Hz, the dispclk could be low enough to trigger + * this problem. We are implementing a workaround here to keep using dscclk + * based on fixed value refclk when timing is smaller than 3x16Mhz (i.e + * 48Mhz) pixel clock to avoid hitting this problem. + */ + bool should_use_dto_dscclk = (dccg->funcs->set_dto_dscclk != NULL) && + stream->timing.pix_clk_100hz > 480000; ASSERT(dsc); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) @@ -1020,12 +1030,16 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg); dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst); + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, dsc->inst); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc; ASSERT(odm_dsc); odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg); odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst); + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst); } dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt; dsc_cfg.pic_width *= opp_cnt; @@ -1045,9 +1059,13 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) OPTC_DSC_DISABLED, 0, 0); /* disable DSC block */ + if (dccg->funcs->set_ref_dscclk) + dccg->funcs->set_ref_dscclk(dccg, pipe_ctx->stream_res.dsc->inst); dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { ASSERT(odm_pipe->stream_res.dsc); + if (dccg->funcs->set_ref_dscclk) + dccg->funcs->set_ref_dscclk(dccg, odm_pipe->stream_res.dsc->inst); odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc); } } @@ -1130,6 +1148,10 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * if (!pipe_ctx->next_odm_pipe && current_pipe_ctx->next_odm_pipe && current_pipe_ctx->next_odm_pipe->stream_res.dsc) { struct display_stream_compressor *dsc = current_pipe_ctx->next_odm_pipe->stream_res.dsc; + struct dccg *dccg = dc->res_pool->dccg; + + if (dccg->funcs->set_ref_dscclk) + dccg->funcs->set_ref_dscclk(dccg, dsc->inst); /* disconnect DSC block from stream */ dsc->funcs->dsc_disconnect(dsc); } @@ -1203,7 +1225,7 @@ void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_ continue; if ((pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal)) - && pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) { + && dc_state_get_pipe_subvp_type(dc->current_state, pipe) != SUBVP_PHANTOM) { pipe->stream_res.tg->funcs->disable_crtc(pipe->stream_res.tg); reset_sync_context_for_pipe(dc, context, i); otg_disabled[i] = true; @@ -1354,8 +1376,8 @@ void dcn32_update_phantom_vp_position(struct dc *dc, for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_MAIN && - pipe->stream->mall_stream_config.paired_stream == phantom_pipe->stream) { + if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN && + dc_state_get_paired_subvp_stream(context, pipe->stream) == phantom_pipe->stream) { if (pipe->plane_state && pipe->plane_state->update_flags.bits.position_change) { phantom_plane->src_rect.x = pipe->plane_state->src_rect.x; @@ -1380,21 +1402,19 @@ void dcn32_update_phantom_vp_position(struct dc *dc, void dcn32_apply_update_flags_for_phantom(struct pipe_ctx *phantom_pipe) { phantom_pipe->update_flags.raw = 0; - if (phantom_pipe->stream && phantom_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - if (resource_is_pipe_type(phantom_pipe, DPP_PIPE)) { - phantom_pipe->update_flags.bits.enable = 1; - phantom_pipe->update_flags.bits.mpcc = 1; - phantom_pipe->update_flags.bits.dppclk = 1; - phantom_pipe->update_flags.bits.hubp_interdependent = 1; - phantom_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1; - phantom_pipe->update_flags.bits.gamut_remap = 1; - phantom_pipe->update_flags.bits.scaler = 1; - phantom_pipe->update_flags.bits.viewport = 1; - phantom_pipe->update_flags.bits.det_size = 1; - if (resource_is_pipe_type(phantom_pipe, OTG_MASTER)) { - phantom_pipe->update_flags.bits.odm = 1; - phantom_pipe->update_flags.bits.global_sync = 1; - } + if (resource_is_pipe_type(phantom_pipe, DPP_PIPE)) { + phantom_pipe->update_flags.bits.enable = 1; + phantom_pipe->update_flags.bits.mpcc = 1; + phantom_pipe->update_flags.bits.dppclk = 1; + phantom_pipe->update_flags.bits.hubp_interdependent = 1; + phantom_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1; + phantom_pipe->update_flags.bits.gamut_remap = 1; + phantom_pipe->update_flags.bits.scaler = 1; + phantom_pipe->update_flags.bits.viewport = 1; + phantom_pipe->update_flags.bits.det_size = 1; + if (resource_is_pipe_type(phantom_pipe, OTG_MASTER)) { + phantom_pipe->update_flags.bits.odm = 1; + phantom_pipe->update_flags.bits.global_sync = 1; } } } @@ -1466,8 +1486,8 @@ void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context) * pipe, wait for the double buffer update to complete first before we do * ANY phantom pipe programming. */ - if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM && - old_pipe->stream && old_pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) { + if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM && + old_pipe->stream && dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) != SUBVP_PHANTOM) { old_pipe->stream_res.tg->funcs->wait_for_state( old_pipe->stream_res.tg, CRTC_STATE_VBLANK); @@ -1479,7 +1499,7 @@ void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context) for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; - if (new_pipe->stream && new_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (new_pipe->stream && dc_state_get_pipe_subvp_type(context, new_pipe) == SUBVP_PHANTOM) { // If old context or new context has phantom pipes, apply // the phantom timings now. We can't change the phantom // pipe configuration safely without driver acquiring diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c index 427cfc8c24a4..427cfc8c24a4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.h index 89a591eb2c23..89a591eb2c23 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.h diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 5a8258287438..9c806385ecbd 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -56,6 +56,7 @@ #include "dcn30/dcn30_cm_common.h" #include "dcn31/dcn31_hwseq.h" #include "dcn20/dcn20_hwseq.h" +#include "dc_state_priv.h" #define DC_LOGGER_INIT(logger) \ struct dal_logger *dc_logger = logger @@ -133,6 +134,7 @@ void dcn35_init_hw(struct dc *dc) struct dc_bios *dcb = dc->ctx->dc_bios; struct resource_pool *res_pool = dc->res_pool; uint32_t backlight = MAX_BACKLIGHT_LEVEL; + uint32_t user_level = MAX_BACKLIGHT_LEVEL; int i; if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) @@ -145,17 +147,36 @@ void dcn35_init_hw(struct dc *dc) hws->funcs.bios_golden_init(dc); } - REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0); - REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0); - - /* Disable gating for PHYASYMCLK. This will be enabled in dccg if needed */ - REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, 1, - PHYBSYMCLK_ROOT_GATE_DISABLE, 1, - PHYCSYMCLK_ROOT_GATE_DISABLE, 1, - PHYDSYMCLK_ROOT_GATE_DISABLE, 1, - PHYESYMCLK_ROOT_GATE_DISABLE, 1); + if (!dc->debug.disable_clock_gate) { + REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0); + REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0); + + /* Disable gating for PHYASYMCLK. This will be enabled in dccg if needed */ + REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, 1, + PHYBSYMCLK_ROOT_GATE_DISABLE, 1, + PHYCSYMCLK_ROOT_GATE_DISABLE, 1, + PHYDSYMCLK_ROOT_GATE_DISABLE, 1, + PHYESYMCLK_ROOT_GATE_DISABLE, 1); + + REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL4, + DPIASYMCLK0_GATE_DISABLE, 0, + DPIASYMCLK1_GATE_DISABLE, 0, + DPIASYMCLK2_GATE_DISABLE, 0, + DPIASYMCLK3_GATE_DISABLE, 0); + + REG_WRITE(DCCG_GATE_DISABLE_CNTL5, 0xFFFFFFFF); + REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL5, + DTBCLK_P0_GATE_DISABLE, 0, + DTBCLK_P1_GATE_DISABLE, 0, + DTBCLK_P2_GATE_DISABLE, 0, + DTBCLK_P3_GATE_DISABLE, 0); + REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK0_GATE_DISABLE, 0, + DPSTREAMCLK1_GATE_DISABLE, 0, + DPSTREAMCLK2_GATE_DISABLE, 0, + DPSTREAMCLK3_GATE_DISABLE, 0); - REG_WRITE(DCCG_GATE_DISABLE_CNTL5, 0x1f7c3fcf); + } // Initialize the dccg if (res_pool->dccg->funcs->dccg_init) @@ -260,13 +281,15 @@ void dcn35_init_hw(struct dc *dc) for (i = 0; i < dc->link_count; i++) { struct dc_link *link = dc->links[i]; - if (link->panel_cntl) + if (link->panel_cntl) { backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl); + user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL; + } } if (dc->ctx->dmub_srv) { for (i = 0; i < dc->res_pool->pipe_count; i++) { if (abms[i] != NULL && abms[i]->funcs != NULL) - abms[i]->funcs->abm_init(abms[i], backlight); + abms[i]->funcs->abm_init(abms[i], backlight, user_level); } } @@ -332,9 +355,6 @@ void dcn35_init_hw(struct dc *dc) if (dc->res_pool->pg_cntl) { if (dc->res_pool->pg_cntl->funcs->init_pg_status) dc->res_pool->pg_cntl->funcs->init_pg_status(dc->res_pool->pg_cntl); - - if (dc->res_pool->pg_cntl->funcs->set_force_poweron_domain22) - dc->res_pool->pg_cntl->funcs->set_force_poweron_domain22(dc->res_pool->pg_cntl, false); } } @@ -671,11 +691,7 @@ bool dcn35_apply_idle_power_optimizations(struct dc *dc, bool enable) } // TODO: review other cases when idle optimization is allowed - - if (!enable) - dc_dmub_srv_exit_low_power_state(dc); - else - dc_dmub_srv_notify_idle(dc, enable); + dc_dmub_srv_apply_idle_power_optimizations(dc, enable); return true; } @@ -685,7 +701,7 @@ void dcn35_z10_restore(const struct dc *dc) if (dc->debug.disable_z10) return; - dc_dmub_srv_exit_low_power_state(dc); + dc_dmub_srv_apply_idle_power_optimizations(dc, false); dcn31_z10_restore(dc); } @@ -801,12 +817,12 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context) dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true; pipe_ctx->stream_res.opp = dc->res_pool->opps[i]; - hws->funcs.plane_atomic_disconnect(dc, pipe_ctx); + hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx); if (tg->funcs->is_tg_enabled(tg)) tg->funcs->unlock(tg); - dc->hwss.disable_plane(dc, pipe_ctx); + dc->hwss.disable_plane(dc, context, pipe_ctx); pipe_ctx->stream_res.tg = NULL; pipe_ctx->plane_res.hubp = NULL; @@ -933,10 +949,10 @@ void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) pipe_ctx->plane_state = NULL; } -void dcn35_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx) +void dcn35_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx) { struct dce_hwseq *hws = dc->hwseq; - bool is_phantom = pipe_ctx->plane_state && pipe_ctx->plane_state->is_phantom; + bool is_phantom = dc_state_get_pipe_subvp_type(state, pipe_ctx) == SUBVP_PHANTOM; struct timing_generator *tg = is_phantom ? pipe_ctx->stream_res.tg : NULL; DC_LOGGER_INIT(dc->ctx->logger); @@ -963,6 +979,8 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, bool hpo_frl_stream_enc_acquired = false; bool hpo_dp_stream_enc_acquired = false; int i = 0, j = 0; + int edp_num = 0; + struct dc_link *edp_links[MAX_NUM_EDP] = { NULL }; memset(update_state, 0, sizeof(struct pg_block_update)); @@ -1003,10 +1021,24 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, if (pipe_ctx->stream_res.opp) update_state->pg_pipe_res_update[PG_OPP][pipe_ctx->stream_res.opp->inst] = false; + } + /*domain24 controls all the otg, mpc, opp, as long as one otg is still up, avoid enabling OTG PG*/ + for (i = 0; i < dc->res_pool->timing_generator_count; i++) { + struct timing_generator *tg = dc->res_pool->timing_generators[i]; + if (tg && tg->funcs->is_tg_enabled(tg)) { + update_state->pg_pipe_res_update[PG_OPTC][i] = false; + break; + } + } - if (pipe_ctx->stream_res.tg) - update_state->pg_pipe_res_update[PG_OPTC][pipe_ctx->stream_res.tg->inst] = false; + dc_get_edp_links(dc, edp_links, &edp_num); + if (edp_num == 0 || + ((!edp_links[0] || !edp_links[0]->edp_sink_present) && + (!edp_links[1] || !edp_links[1]->edp_sink_present))) { + /*eDP not exist on this config, keep Domain24 power on, for S0i3, this will be handled in dmubfw*/ + update_state->pg_pipe_res_update[PG_OPTC][0] = false; } + } void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context, @@ -1092,8 +1124,29 @@ void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context, } -void dcn35_block_power_control(struct dc *dc, - struct pg_block_update *update_state, bool power_on) +/** + * dcn35_hw_block_power_down() - power down sequence + * + * The following sequence describes the ON-OFF (ONO) for power down: + * + * ONO Region 3, DCPG 25: hpo - SKIPPED + * ONO Region 4, DCPG 0: dchubp0, dpp0 + * ONO Region 6, DCPG 1: dchubp1, dpp1 + * ONO Region 8, DCPG 2: dchubp2, dpp2 + * ONO Region 10, DCPG 3: dchubp3, dpp3 + * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will pwr dwn at IPS2 entry + * ONO Region 5, DCPG 16: dsc0 + * ONO Region 7, DCPG 17: dsc1 + * ONO Region 9, DCPG 18: dsc2 + * ONO Region 11, DCPG 19: dsc3 + * ONO Region 2, DCPG 24: mpc opp optc dwb + * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED. will be pwr dwn after lono timer is armed + * + * @dc: Current DC state + * @update_state: update PG sequence states for HW block + */ +void dcn35_hw_block_power_down(struct dc *dc, + struct pg_block_update *update_state) { int i = 0; struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl; @@ -1102,64 +1155,106 @@ void dcn35_block_power_control(struct dc *dc, return; if (dc->debug.ignore_pg) return; + if (update_state->pg_res_update[PG_HPO]) { if (pg_cntl->funcs->hpo_pg_control) - pg_cntl->funcs->hpo_pg_control(pg_cntl, power_on); + pg_cntl->funcs->hpo_pg_control(pg_cntl, false); } for (i = 0; i < dc->res_pool->pipe_count; i++) { if (update_state->pg_pipe_res_update[PG_HUBP][i] && update_state->pg_pipe_res_update[PG_DPP][i]) { if (pg_cntl->funcs->hubp_dpp_pg_control) - pg_cntl->funcs->hubp_dpp_pg_control(pg_cntl, i, power_on); + pg_cntl->funcs->hubp_dpp_pg_control(pg_cntl, i, false); } - + } + for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) if (update_state->pg_pipe_res_update[PG_DSC][i]) { if (pg_cntl->funcs->dsc_pg_control) - pg_cntl->funcs->dsc_pg_control(pg_cntl, i, power_on); + pg_cntl->funcs->dsc_pg_control(pg_cntl, i, false); } - if (update_state->pg_pipe_res_update[PG_MPCC][i]) { - if (pg_cntl->funcs->mpcc_pg_control) - pg_cntl->funcs->mpcc_pg_control(pg_cntl, i, power_on); - } - - if (update_state->pg_pipe_res_update[PG_OPP][i]) { - if (pg_cntl->funcs->opp_pg_control) - pg_cntl->funcs->opp_pg_control(pg_cntl, i, power_on); - } - if (update_state->pg_pipe_res_update[PG_OPTC][i]) { - if (pg_cntl->funcs->optc_pg_control) - pg_cntl->funcs->optc_pg_control(pg_cntl, i, power_on); - } - } + /*this will need all the clients to unregister optc interruts let dmubfw handle this*/ + if (pg_cntl->funcs->plane_otg_pg_control) + pg_cntl->funcs->plane_otg_pg_control(pg_cntl, false); - if (update_state->pg_res_update[PG_DWB]) { - if (pg_cntl->funcs->dwb_pg_control) - pg_cntl->funcs->dwb_pg_control(pg_cntl, power_on); - } + //domain22, 23, 25 currently always on. - if (pg_cntl->funcs->plane_otg_pg_control) - pg_cntl->funcs->plane_otg_pg_control(pg_cntl, power_on); } -void dcn35_root_clock_control(struct dc *dc, - struct pg_block_update *update_state, bool power_on) +/** + * dcn35_hw_block_power_up() - power up sequence + * + * The following sequence describes the ON-OFF (ONO) for power up: + * + * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED + * ONO Region 2, DCPG 24: mpc opp optc dwb + * ONO Region 5, DCPG 16: dsc0 + * ONO Region 7, DCPG 17: dsc1 + * ONO Region 9, DCPG 18: dsc2 + * ONO Region 11, DCPG 19: dsc3 + * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will power up at IPS2 exit + * ONO Region 4, DCPG 0: dchubp0, dpp0 + * ONO Region 6, DCPG 1: dchubp1, dpp1 + * ONO Region 8, DCPG 2: dchubp2, dpp2 + * ONO Region 10, DCPG 3: dchubp3, dpp3 + * ONO Region 3, DCPG 25: hpo - SKIPPED + * + * @dc: Current DC state + * @update_state: update PG sequence states for HW block + */ +void dcn35_hw_block_power_up(struct dc *dc, + struct pg_block_update *update_state) { int i = 0; struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl; if (!pg_cntl) return; + if (dc->debug.ignore_pg) + return; + //domain22, 23, 25 currently always on. + /*this will need all the clients to unregister optc interruts let dmubfw handle this*/ + if (pg_cntl->funcs->plane_otg_pg_control) + pg_cntl->funcs->plane_otg_pg_control(pg_cntl, true); + + for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) + if (update_state->pg_pipe_res_update[PG_DSC][i]) { + if (pg_cntl->funcs->dsc_pg_control) + pg_cntl->funcs->dsc_pg_control(pg_cntl, i, true); + } for (i = 0; i < dc->res_pool->pipe_count; i++) { if (update_state->pg_pipe_res_update[PG_HUBP][i] && update_state->pg_pipe_res_update[PG_DPP][i]) { - if (dc->hwseq->funcs.dpp_root_clock_control) - dc->hwseq->funcs.dpp_root_clock_control(dc->hwseq, i, power_on); + if (pg_cntl->funcs->hubp_dpp_pg_control) + pg_cntl->funcs->hubp_dpp_pg_control(pg_cntl, i, true); } + } + if (update_state->pg_res_update[PG_HPO]) { + if (pg_cntl->funcs->hpo_pg_control) + pg_cntl->funcs->hpo_pg_control(pg_cntl, true); + } +} +void dcn35_root_clock_control(struct dc *dc, + struct pg_block_update *update_state, bool power_on) +{ + int i = 0; + struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl; + if (!pg_cntl) + return; + /*enable root clock first when power up*/ + if (power_on) + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (update_state->pg_pipe_res_update[PG_HUBP][i] && + update_state->pg_pipe_res_update[PG_DPP][i]) { + if (dc->hwseq->funcs.dpp_root_clock_control) + dc->hwseq->funcs.dpp_root_clock_control(dc->hwseq, i, power_on); + } + } + for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) { if (update_state->pg_pipe_res_update[PG_DSC][i]) { if (power_on) { if (dc->res_pool->dccg->funcs->enable_dsc) @@ -1170,6 +1265,15 @@ void dcn35_root_clock_control(struct dc *dc, } } } + /*disable root clock first when power down*/ + if (!power_on) + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (update_state->pg_pipe_res_update[PG_HUBP][i] && + update_state->pg_pipe_res_update[PG_DPP][i]) { + if (dc->hwseq->funcs.dpp_root_clock_control) + dc->hwseq->funcs.dpp_root_clock_control(dc->hwseq, i, power_on); + } + } } void dcn35_prepare_bandwidth( @@ -1183,9 +1287,9 @@ void dcn35_prepare_bandwidth( if (dc->hwss.root_clock_control) dc->hwss.root_clock_control(dc, &pg_update_state, true); - - if (dc->hwss.block_power_control) - dc->hwss.block_power_control(dc, &pg_update_state, true); + /*power up required HW block*/ + if (dc->hwss.hw_block_power_up) + dc->hwss.hw_block_power_up(dc, &pg_update_state); } dcn20_prepare_bandwidth(dc, context); @@ -1201,9 +1305,9 @@ void dcn35_optimize_bandwidth( if (dc->hwss.calc_blocks_to_gate) { dc->hwss.calc_blocks_to_gate(dc, context, &pg_update_state); - - if (dc->hwss.block_power_control) - dc->hwss.block_power_control(dc, &pg_update_state, false); + /*try to power down unused block*/ + if (dc->hwss.hw_block_power_down) + dc->hwss.hw_block_power_down(dc, &pg_update_state); if (dc->hwss.root_clock_control) dc->hwss.root_clock_control(dc, &pg_update_state, false); @@ -1225,3 +1329,44 @@ uint32_t dcn35_get_idle_state(const struct dc *dc) return 0; } + +void dcn35_set_drr(struct pipe_ctx **pipe_ctx, + int num_pipes, struct dc_crtc_timing_adjust adjust) +{ + int i = 0; + struct drr_params params = {0}; + // DRR set trigger event mapped to OTG_TRIG_A (bit 11) for manual control flow + unsigned int event_triggers = 0x800; + // Note DRR trigger events are generated regardless of whether num frames met. + unsigned int num_frames = 2; + + params.vertical_total_max = adjust.v_total_max; + params.vertical_total_min = adjust.v_total_min; + params.vertical_total_mid = adjust.v_total_mid; + params.vertical_total_mid_frame_num = adjust.v_total_mid_frame_num; + + for (i = 0; i < num_pipes; i++) { + if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) { + struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing; + struct dc *dc = pipe_ctx[i]->stream->ctx->dc; + + if (dc->debug.static_screen_wait_frames) { + unsigned int frame_rate = timing->pix_clk_100hz / (timing->h_total * timing->v_total); + + if (frame_rate >= 120 && dc->caps.ips_support && + dc->config.disable_ips != DMUB_IPS_DISABLE_ALL) { + /*ips enable case*/ + num_frames = 2 * (frame_rate % 60); + } + } + if (pipe_ctx[i]->stream_res.tg->funcs->set_drr) + pipe_ctx[i]->stream_res.tg->funcs->set_drr( + pipe_ctx[i]->stream_res.tg, ¶ms); + if (adjust.v_total_max != 0 && adjust.v_total_min != 0) + if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control) + pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( + pipe_ctx[i]->stream_res.tg, + event_triggers, num_frames); + } + } +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h index 0dff10d179b8..fd66316e33de 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h @@ -57,14 +57,16 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context); void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx); void dcn35_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_state *context); -void dcn35_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx); +void dcn35_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx); void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, struct pg_block_update *update_state); void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context, struct pg_block_update *update_state); -void dcn35_block_power_control(struct dc *dc, - struct pg_block_update *update_state, bool power_on); +void dcn35_hw_block_power_up(struct dc *dc, + struct pg_block_update *update_state); +void dcn35_hw_block_power_down(struct dc *dc, + struct pg_block_update *update_state); void dcn35_root_clock_control(struct dc *dc, struct pg_block_update *update_state, bool power_on); @@ -84,4 +86,8 @@ void dcn35_dsc_pg_control( void dcn35_set_idle_state(const struct dc *dc, bool allow_idle); uint32_t dcn35_get_idle_state(const struct dc *dc); + +void dcn35_set_drr(struct pipe_ctx **pipe_ctx, + int num_pipes, struct dc_crtc_timing_adjust adjust); + #endif /* __DC_HWSS_DCN35_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c index 296bf3a38cb9..a630aa77dcec 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -68,7 +68,7 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .prepare_bandwidth = dcn35_prepare_bandwidth, .optimize_bandwidth = dcn35_optimize_bandwidth, .update_bandwidth = dcn20_update_bandwidth, - .set_drr = dcn10_set_drr, + .set_drr = dcn35_set_drr, .get_position = dcn10_get_position, .set_static_screen_control = dcn30_set_static_screen_control, .setup_stereo = dcn10_setup_stereo, @@ -118,7 +118,8 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .update_dsc_pg = dcn32_update_dsc_pg, .calc_blocks_to_gate = dcn35_calc_blocks_to_gate, .calc_blocks_to_ungate = dcn35_calc_blocks_to_ungate, - .block_power_control = dcn35_block_power_control, + .hw_block_power_up = dcn35_hw_block_power_up, + .hw_block_power_down = dcn35_hw_block_power_down, .root_clock_control = dcn35_root_clock_control, .set_idle_state = dcn35_set_idle_state, .get_idle_state = dcn35_get_idle_state diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.h index b67015032c35..b67015032c35 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.h diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/CMakeLists.txt b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/CMakeLists.txt new file mode 100644 index 000000000000..951ca2da4486 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/CMakeLists.txt @@ -0,0 +1,4 @@ +dal3_subdirectory_sources( + dcn351_init.c + dcn351_init.h +) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile new file mode 100644 index 000000000000..b24ad27fe6ef --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile @@ -0,0 +1,17 @@ +# +# (c) Copyright 2022 Advanced Micro Devices, Inc. All the rights reserved +# +# All rights reserved. This notice is intended as a precaution against +# inadvertent publication and does not imply publication or any waiver +# of confidentiality. The year included in the foregoing notice is the +# year of creation of the work. +# +# Authors: AMD +# +# Makefile for DCN351. + +DCN351 = dcn351_init.o + +AMD_DAL_DCN351 = $(addprefix $(AMDDALPATH)/dc/dcn351/,$(DCN351)) + +AMD_DISPLAY_FILES += $(AMD_DAL_DCN351) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c new file mode 100644 index 000000000000..143d3fc0221c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -0,0 +1,171 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dce110/dce110_hwseq.h" +#include "dcn10/dcn10_hwseq.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn21/dcn21_hwseq.h" +#include "dcn30/dcn30_hwseq.h" +#include "dcn301/dcn301_hwseq.h" +#include "dcn31/dcn31_hwseq.h" +#include "dcn32/dcn32_hwseq.h" +#include "dcn35/dcn35_hwseq.h" + +#include "dcn351_init.h" + +static const struct hw_sequencer_funcs dcn351_funcs = { + .program_gamut_remap = dcn30_program_gamut_remap, + .init_hw = dcn35_init_hw, + .power_down_on_boot = dcn35_power_down_on_boot, + .apply_ctx_to_hw = dce110_apply_ctx_to_hw, + .apply_ctx_for_surface = NULL, + .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, + .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, + .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, + .update_plane_addr = dcn20_update_plane_addr, + .update_dchub = dcn10_update_dchub, + .update_pending_status = dcn10_update_pending_status, + .program_output_csc = dcn20_program_output_csc, + .enable_accelerated_mode = dce110_enable_accelerated_mode, + .enable_timing_synchronization = dcn10_enable_timing_synchronization, + .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, + .update_info_frame = dcn31_update_info_frame, + .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, + .enable_stream = dcn20_enable_stream, + .disable_stream = dce110_disable_stream, + .unblank_stream = dcn32_unblank_stream, + .blank_stream = dce110_blank_stream, + .enable_audio_stream = dce110_enable_audio_stream, + .disable_audio_stream = dce110_disable_audio_stream, + .disable_plane = dcn35_disable_plane, + .disable_pixel_data = dcn20_disable_pixel_data, + .pipe_control_lock = dcn20_pipe_control_lock, + .interdependent_update_lock = dcn10_lock_all_pipes, + .cursor_lock = dcn10_cursor_lock, + .prepare_bandwidth = dcn35_prepare_bandwidth, + .optimize_bandwidth = dcn35_optimize_bandwidth, + .update_bandwidth = dcn20_update_bandwidth, + .set_drr = dcn10_set_drr, + .get_position = dcn10_get_position, + .set_static_screen_control = dcn30_set_static_screen_control, + .setup_stereo = dcn10_setup_stereo, + .set_avmute = dcn30_set_avmute, + .log_hw_state = dcn10_log_hw_state, + .get_hw_state = dcn10_get_hw_state, + .clear_status_bits = dcn10_clear_status_bits, + .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, + .edp_backlight_control = dce110_edp_backlight_control, + .edp_power_control = dce110_edp_power_control, + .edp_wait_for_T12 = dce110_edp_wait_for_T12, + .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, + .set_cursor_position = dcn10_set_cursor_position, + .set_cursor_attribute = dcn10_set_cursor_attribute, + .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, + .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, + .set_clock = dcn10_set_clock, + .get_clock = dcn10_get_clock, + .program_triplebuffer = dcn20_program_triple_buffer, + .enable_writeback = dcn30_enable_writeback, + .disable_writeback = dcn30_disable_writeback, + .update_writeback = dcn30_update_writeback, + .mmhubbub_warmup = dcn30_mmhubbub_warmup, + .dmdata_status_done = dcn20_dmdata_status_done, + .program_dmdata_engine = dcn30_program_dmdata_engine, + .set_dmdata_attributes = dcn20_set_dmdata_attributes, + .init_sys_ctx = dcn31_init_sys_ctx, + .init_vm_ctx = dcn20_init_vm_ctx, + .set_flip_control_gsl = dcn20_set_flip_control_gsl, + .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, + .calc_vupdate_position = dcn10_calc_vupdate_position, + .power_down = dce110_power_down, + .set_backlight_level = dcn21_set_backlight_level, + .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, + .set_pipe = dcn21_set_pipe, + .enable_lvds_link_output = dce110_enable_lvds_link_output, + .enable_tmds_link_output = dce110_enable_tmds_link_output, + .enable_dp_link_output = dce110_enable_dp_link_output, + .disable_link_output = dcn32_disable_link_output, + .z10_restore = dcn35_z10_restore, + .z10_save_init = dcn31_z10_save_init, + .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, + .optimize_pwr_state = dcn21_optimize_pwr_state, + .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, + .update_visual_confirm_color = dcn10_update_visual_confirm_color, + .apply_idle_power_optimizations = dcn35_apply_idle_power_optimizations, + .update_dsc_pg = dcn32_update_dsc_pg, + .calc_blocks_to_gate = dcn35_calc_blocks_to_gate, + .calc_blocks_to_ungate = dcn35_calc_blocks_to_ungate, + .hw_block_power_up = dcn35_hw_block_power_up, + .hw_block_power_down = dcn35_hw_block_power_down, + .root_clock_control = dcn35_root_clock_control, + .set_idle_state = dcn35_set_idle_state, + .get_idle_state = dcn35_get_idle_state +}; + +static const struct hwseq_private_funcs dcn351_private_funcs = { + .init_pipes = dcn35_init_pipes, + .update_plane_addr = dcn20_update_plane_addr, + .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, + .update_mpcc = dcn20_update_mpcc, + .set_input_transfer_func = dcn32_set_input_transfer_func, + .set_output_transfer_func = dcn32_set_output_transfer_func, + .power_down = dce110_power_down, + .enable_display_power_gating = dcn10_dummy_display_power_gating, + .blank_pixel_data = dcn20_blank_pixel_data, + .reset_hw_ctx_wrap = dcn31_reset_hw_ctx_wrap, + .enable_stream_timing = dcn20_enable_stream_timing, + .edp_backlight_control = dce110_edp_backlight_control, + .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, + .did_underflow_occur = dcn10_did_underflow_occur, + .init_blank = dcn20_init_blank, + .disable_vga = NULL, + .bios_golden_init = dcn10_bios_golden_init, + .plane_atomic_disable = dcn35_plane_atomic_disable, + //.plane_atomic_disable = dcn20_plane_atomic_disable,/*todo*/ + //.hubp_pg_control = dcn35_hubp_pg_control, + .enable_power_gating_plane = dcn35_enable_power_gating_plane, + .dpp_root_clock_control = dcn35_dpp_root_clock_control, + .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, + .update_odm = dcn35_update_odm, + .set_hdr_multiplier = dcn10_set_hdr_multiplier, + .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, + .wait_for_blank_complete = dcn20_wait_for_blank_complete, + .dccg_init = dcn20_dccg_init, + .set_mcm_luts = dcn32_set_mcm_luts, + .setup_hpo_hw_control = dcn35_setup_hpo_hw_control, + .calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values, + .set_pixels_per_cycle = dcn32_set_pixels_per_cycle, + .is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy, + .dsc_pg_control = dcn35_dsc_pg_control, + .dsc_pg_status = dcn32_dsc_pg_status, + .enable_plane = dcn35_enable_plane, +}; + +void dcn351_hw_sequencer_construct(struct dc *dc) +{ + dc->hwss = dcn351_funcs; + dc->hwseq->funcs = dcn351_private_funcs; + +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h new file mode 100644 index 000000000000..970b01008b23 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h @@ -0,0 +1,33 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_DCN351_INIT_H__ +#define __DC_DCN351_INIT_H__ + +struct dc; + +void dcn351_hw_sequencer_construct(struct dc *dc); + +#endif /* __DC_DCN351_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index 452680fe9aab..a54399383318 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -50,7 +50,7 @@ struct pg_block_update; struct subvp_pipe_control_lock_fast_params { struct dc *dc; bool lock; - struct pipe_ctx *pipe_ctx; + bool subvp_immediate_flip; }; struct pipe_control_lock_params { @@ -200,7 +200,7 @@ struct hw_sequencer_funcs { struct dc_state *context); enum dc_status (*apply_ctx_to_hw)(struct dc *dc, struct dc_state *context); - void (*disable_plane)(struct dc *dc, struct pipe_ctx *pipe_ctx); + void (*disable_plane)(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx); void (*disable_pixel_data)(struct dc *dc, struct pipe_ctx *pipe_ctx, bool blank); void (*apply_ctx_for_surface)(struct dc *dc, const struct dc_stream_state *stream, @@ -248,6 +248,7 @@ struct hw_sequencer_funcs { void (*enable_per_frame_crtc_position_reset)(struct dc *dc, int group_size, struct pipe_ctx *grouped_pipes[]); void (*enable_timing_synchronization)(struct dc *dc, + struct dc_state *state, int group_index, int group_size, struct pipe_ctx *grouped_pipes[]); void (*enable_vblanks_synchronization)(struct dc *dc, @@ -414,8 +415,10 @@ struct hw_sequencer_funcs { struct pg_block_update *update_state); void (*calc_blocks_to_ungate)(struct dc *dc, struct dc_state *context, struct pg_block_update *update_state); - void (*block_power_control)(struct dc *dc, - struct pg_block_update *update_state, bool power_on); + void (*hw_block_power_up)(struct dc *dc, + struct pg_block_update *update_state); + void (*hw_block_power_down)(struct dc *dc, + struct pg_block_update *update_state); void (*root_clock_control)(struct dc *dc, struct pg_block_update *update_state, bool power_on); void (*set_idle_state)(const struct dc *dc, bool allow_idle); @@ -452,17 +455,18 @@ void get_mpctree_visual_confirm_color( struct tg_color *color); void get_subvp_visual_confirm_color( - struct dc *dc, - struct dc_state *context, struct pipe_ctx *pipe_ctx, struct tg_color *color); void get_mclk_switch_visual_confirm_color( - struct dc *dc, - struct dc_state *context, struct pipe_ctx *pipe_ctx, struct tg_color *color); +void set_p_state_switch_method( + struct dc *dc, + struct dc_state *context, + struct pipe_ctx *pipe_ctx); + void hwss_execute_sequence(struct dc *dc, struct block_sequence block_sequence[], int num_steps); @@ -472,7 +476,8 @@ void hwss_build_fast_sequence(struct dc *dc, unsigned int dmub_cmd_count, struct block_sequence block_sequence[], int *num_steps, - struct pipe_ctx *pipe_ctx); + struct pipe_ctx *pipe_ctx, + struct dc_stream_status *stream_status); void hwss_send_dmcub_cmd(union block_sequence_params *params); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h index 82c592166875..6137cf09aa54 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h @@ -79,6 +79,7 @@ struct hwseq_private_funcs { void (*update_plane_addr)(const struct dc *dc, struct pipe_ctx *pipe_ctx); void (*plane_atomic_disconnect)(struct dc *dc, + struct dc_state *state, struct pipe_ctx *pipe_ctx); void (*update_mpcc)(struct dc *dc, struct pipe_ctx *pipe_ctx); bool (*set_input_transfer_func)(struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index bac1420b1de8..f74ae0d41d3c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -200,11 +200,8 @@ struct resource_funcs { unsigned int pipe_cnt, unsigned int index); - bool (*remove_phantom_pipes)(struct dc *dc, struct dc_state *context, bool fast_update); - void (*retain_phantom_pipes)(struct dc *dc, struct dc_state *context); void (*get_panel_config_defaults)(struct dc_panel_config *panel_config); - void (*save_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config); - void (*restore_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config); + void (*build_pipe_pix_clk_params)(struct pipe_ctx *pipe_ctx); }; struct audio_support{ @@ -384,6 +381,16 @@ union pipe_update_flags { uint32_t raw; }; +enum p_state_switch_method { + P_STATE_UNKNOWN = 0, + P_STATE_V_BLANK = 1, + P_STATE_FPO, + P_STATE_V_ACTIVE, + P_STATE_SUB_VP, + P_STATE_DRR_SUB_VP, + P_STATE_V_BLANK_SUB_VP +}; + struct pipe_ctx { struct dc_plane_state *plane_state; struct dc_stream_state *stream; @@ -432,6 +439,7 @@ struct pipe_ctx { struct dwbc *dwbc; struct mcif_wb *mcif_wb; union pipe_update_flags update_flags; + enum p_state_switch_method p_state_type; struct tg_color visual_confirm_color; bool has_vactive_margin; /* subvp_index: only valid if the pipe is a SUBVP_MAIN*/ @@ -525,6 +533,14 @@ struct dc_state { * @stream_status: Planes status on a given stream */ struct dc_stream_status stream_status[MAX_PIPES]; + /** + * @phantom_streams: Stream state properties for phantoms + */ + struct dc_stream_state *phantom_streams[MAX_PHANTOM_PIPES]; + /** + * @phantom_planes: Planes state properties for phantoms + */ + struct dc_plane_state *phantom_planes[MAX_PHANTOM_PIPES]; /** * @stream_count: Total of streams in use @@ -533,6 +549,14 @@ struct dc_state { uint8_t stream_mask; /** + * @stream_count: Total phantom streams in use + */ + uint8_t phantom_stream_count; + /** + * @stream_count: Total phantom planes in use + */ + uint8_t phantom_plane_count; + /** * @res_ctx: Persistent state of resources */ struct resource_context res_ctx; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h index 33db15d69f23..3f0161d64675 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h @@ -36,7 +36,7 @@ struct abm { }; struct abm_funcs { - void (*abm_init)(struct abm *abm, uint32_t back_light); + void (*abm_init)(struct abm *abm, uint32_t back_light, uint32_t user_level); bool (*set_abm_level)(struct abm *abm, unsigned int abm_level); bool (*set_abm_immediate_disable)(struct abm *abm, unsigned int panel_inst); bool (*set_pipe)(struct abm *abm, unsigned int controller_id, unsigned int panel_inst); @@ -64,7 +64,8 @@ struct abm_funcs { bool (*set_pipe_ex)(struct abm *abm, unsigned int otg_inst, unsigned int option, - unsigned int panel_inst); + unsigned int panel_inst, + unsigned int pwrseq_inst); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index fa9614bcb160..cbba39d251e5 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -62,6 +62,25 @@ struct dcn3_clk_internal { uint32_t CLK4_CLK0_CURRENT_CNT; //fclk }; +struct dcn35_clk_internal { + int dummy; + uint32_t CLK1_CLK0_CURRENT_CNT; //dispclk + uint32_t CLK1_CLK1_CURRENT_CNT; //dppclk + uint32_t CLK1_CLK2_CURRENT_CNT; //dprefclk + uint32_t CLK1_CLK3_CURRENT_CNT; //dcfclk + uint32_t CLK1_CLK4_CURRENT_CNT; //dtbclk + //uint32_t CLK1_CLK5_CURRENT_CNT; //dpiaclk + //uint32_t CLK1_CLK6_CURRENT_CNT; //srdbgclk + uint32_t CLK1_CLK3_DS_CNTL; //dcf_deep_sleep_divider + uint32_t CLK1_CLK3_ALLOW_DS; //dcf_deep_sleep_allow + + uint32_t CLK1_CLK0_BYPASS_CNTL; //dispclk bypass + uint32_t CLK1_CLK1_BYPASS_CNTL; //dppclk bypass + uint32_t CLK1_CLK2_BYPASS_CNTL; //dprefclk bypass + uint32_t CLK1_CLK3_BYPASS_CNTL; //dcfclk bypass + uint32_t CLK1_CLK4_BYPASS_CNTL; //dtbclk bypass +}; + struct dcn301_clk_internal { int dummy; uint32_t CLK1_CLK0_CURRENT_CNT; //dispclk diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index ce2f0c0e82bd..b9a06bf84cc9 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -59,8 +59,8 @@ enum dentist_dispclk_change_mode { struct dp_dto_params { int otg_inst; enum signal_type signal; - long long pixclk_hz; - long long refclk_hz; + uint64_t pixclk_hz; + uint64_t refclk_hz; }; enum pixel_rate_div { @@ -201,6 +201,10 @@ struct dccg_funcs { struct dccg *dccg, enum streamclk_source src, uint32_t otg_inst); + void (*set_dto_dscclk)( + struct dccg *dccg, + uint32_t dsc_inst); + void (*set_ref_dscclk)(struct dccg *dccg, uint32_t dsc_inst); }; #endif //__DAL_DCCG_H__ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h index 86b711dcc785..729ca0064e94 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h @@ -188,6 +188,10 @@ struct dwbc_funcs { bool (*is_enabled)( struct dwbc *dwbc); + void (*set_fc_enable)( + struct dwbc *dwbc, + enum dwb_frame_capture_enable enable); + void (*set_stereo)( struct dwbc *dwbc, struct dwb_stereo_params *stereo_params); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h index b95ae9596c3b..dcae23faeee3 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h @@ -43,6 +43,7 @@ * to be used inside loops and for determining array sizes. */ #define MAX_PIPES 6 +#define MAX_PHANTOM_PIPES (MAX_PIPES / 2) #define MAX_DIG_LINK_ENCODERS 7 #define MAX_DWB_PIPES 1 #define MAX_HPO_DP2_ENCODERS 4 diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h index 24af9d80b937..5dcbaa2db964 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h @@ -40,6 +40,7 @@ struct panel_cntl_backlight_registers { unsigned int BL_PWM_PERIOD_CNTL; unsigned int LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV; unsigned int PANEL_PWRSEQ_REF_DIV2; + unsigned int USER_LEVEL; }; struct panel_cntl_funcs { @@ -56,12 +57,14 @@ struct panel_cntl_funcs { struct panel_cntl_init_data { struct dc_context *ctx; uint32_t inst; + uint32_t pwrseq_inst; }; struct panel_cntl { const struct panel_cntl_funcs *funcs; struct dc_context *ctx; uint32_t inst; + uint32_t pwrseq_inst; /* registers setting needs to be saved and restored at InitBacklight */ struct panel_cntl_backlight_registers stored_backlight_registers; }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h index b9812afb886b..00ea3864dd4d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h @@ -47,8 +47,6 @@ struct pg_cntl_funcs { void (*optc_pg_control)(struct pg_cntl *pg_cntl, unsigned int optc_inst, bool power_on); void (*dwb_pg_control)(struct pg_cntl *pg_cntl, bool power_on); void (*init_pg_status)(struct pg_cntl *pg_cntl); - - void (*set_force_poweron_domain22)(struct pg_cntl *pg_cntl, bool power_on); }; #endif //__DC_PG_CNTL_H__ diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link.h index d7685368140a..26fe81f213da 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link.h @@ -281,11 +281,16 @@ struct link_service { const unsigned int *power_opts); bool (*edp_setup_replay)(struct dc_link *link, const struct dc_stream_state *stream); + bool (*edp_send_replay_cmd)(struct dc_link *link, + enum replay_FW_Message_type msg, + union dmub_replay_cmd_set *cmd_data); bool (*edp_set_coasting_vtotal)( struct dc_link *link, uint16_t coasting_vtotal); bool (*edp_replay_residency)(const struct dc_link *link, unsigned int *residency, const bool is_start, const bool is_alpm); + bool (*edp_set_replay_power_opt_and_coasting_vtotal)(struct dc_link *link, + const unsigned int *power_opts, uint16_t coasting_vtotal); bool (*edp_wait_for_t12)(struct dc_link *link); bool (*edp_is_ilr_optimization_required)(struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 06ca8bfb91e7..1d51fed12e20 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -501,6 +501,18 @@ int recource_find_free_pipe_not_used_in_cur_res_ctx( const struct resource_pool *pool); /* + * Look for a free pipe in new resource context that is used in current resource + * context as an OTG master pipe. + * + * return - FREE_PIPE_INDEX_NOT_FOUND if free pipe is not found, otherwise + * pipe idx of the free pipe + */ +int recource_find_free_pipe_used_as_otg_master_in_cur_res_ctx( + const struct resource_context *cur_res_ctx, + struct resource_context *new_res_ctx, + const struct resource_pool *pool); + +/* * Look for a free pipe in new resource context that is used as a secondary DPP * pipe in any MPCC combine in current resource context. * return - FREE_PIPE_INDEX_NOT_FOUND if free pipe is not found, otherwise @@ -561,9 +573,6 @@ void update_audio_usage( unsigned int resource_pixel_format_to_bpp(enum surface_pixel_format format); -void get_audio_check(struct audio_info *aud_modes, - struct audio_check *aud_chk); - bool get_temp_dp_link_res(struct dc_link *link, struct link_resource *link_res, struct dc_link_settings *link_settings); @@ -600,6 +609,9 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy( struct pipe_ctx *sec_pipe, bool odm); +bool resource_subvp_in_use(struct dc *dc, + struct dc_state *context); + /* A test harness interface that modifies dp encoder resources in the given dc * state and bypasses the need to revalidate. The interface assumes that the * test harness interface is called with pre-validated link config stored in the @@ -610,5 +622,4 @@ enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc, struct pipe_ctx *pipe_ctx); bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_state *stream); - #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index f8e01ca09d96..5fe8b4871c77 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -776,10 +776,26 @@ static bool dp_set_dsc_on_rx(struct pipe_ctx *pipe_ctx, bool enable) */ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) { + /* TODO: Move this to HWSS as this is hardware programming sequence not a + * link layer sequence + */ struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc; + struct dc *dc = pipe_ctx->stream->ctx->dc; struct dc_stream_state *stream = pipe_ctx->stream; struct pipe_ctx *odm_pipe; int opp_cnt = 1; + struct dccg *dccg = dc->res_pool->dccg; + /* It has been found that when DSCCLK is lower than 16Mhz, we will get DCN + * register access hung. When DSCCLk is based on refclk, DSCCLk is always a + * fixed value higher than 16Mhz so the issue doesn't occur. When DSCCLK is + * generated by DTO, DSCCLK would be based on 1/3 dispclk. For small timings + * with DSC such as 480p60Hz, the dispclk could be low enough to trigger + * this problem. We are implementing a workaround here to keep using dscclk + * based on fixed value refclk when timing is smaller than 3x16Mhz (i.e + * 48Mhz) pixel clock to avoid hitting this problem. + */ + bool should_use_dto_dscclk = (dccg->funcs->set_dto_dscclk != NULL) && + stream->timing.pix_clk_100hz > 480000; DC_LOGGER_INIT(dsc->ctx->logger); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) @@ -802,11 +818,15 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg); dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst); + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, dsc->inst); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc; odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg); odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst); + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst); } dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt; dsc_cfg.pic_width *= opp_cnt; @@ -856,9 +876,14 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) } /* disable DSC block */ + if (dccg->funcs->set_ref_dscclk) + dccg->funcs->set_ref_dscclk(dccg, pipe_ctx->stream_res.dsc->inst); pipe_ctx->stream_res.dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc); - for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) + for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { + if (dccg->funcs->set_ref_dscclk) + dccg->funcs->set_ref_dscclk(dccg, odm_pipe->stream_res.dsc->inst); odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc); + } } } @@ -1057,18 +1082,21 @@ static struct fixed31_32 get_pbn_from_bw_in_kbps(uint64_t kbps) uint32_t denominator = 1; /* - * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006 + * The 1.006 factor (margin 5300ppm + 300ppm ~ 0.6% as per spec) is not + * required when determining PBN/time slot utilization on the link between + * us and the branch, since that overhead is already accounted for in + * the get_pbn_per_slot function. + * * The unit of 54/64Mbytes/sec is an arbitrary unit chosen based on * common multiplier to render an integer PBN for all link rate/lane * counts combinations * calculate - * peak_kbps *= (1006/1000) * peak_kbps *= (64/54) - * peak_kbps *= 8 convert to bytes + * peak_kbps /= (8 * 1000) convert to bytes */ - numerator = 64 * PEAK_FACTOR_X1000; - denominator = 54 * 8 * 1000 * 1000; + numerator = 64; + denominator = 54 * 8 * 1000; kbps *= numerator; peak_kbps = dc_fixpt_from_fraction(kbps, denominator); @@ -1247,86 +1275,6 @@ static void remove_stream_from_alloc_table( } } -static enum dc_status deallocate_mst_payload_with_temp_drm_wa( - struct pipe_ctx *pipe_ctx) -{ - struct dc_stream_state *stream = pipe_ctx->stream; - struct dc_link *link = stream->link; - struct dc_dp_mst_stream_allocation_table proposed_table = {0}; - struct fixed31_32 avg_time_slots_per_mtp = dc_fixpt_from_int(0); - int i; - bool mst_mode = (link->type == dc_connection_mst_branch); - /* adjust for drm changes*/ - const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res); - const struct dc_link_settings empty_link_settings = {0}; - DC_LOGGER_INIT(link->ctx->logger); - - if (link_hwss->ext.set_throttled_vcp_size) - link_hwss->ext.set_throttled_vcp_size(pipe_ctx, avg_time_slots_per_mtp); - if (link_hwss->ext.set_hblank_min_symbol_width) - link_hwss->ext.set_hblank_min_symbol_width(pipe_ctx, - &empty_link_settings, - avg_time_slots_per_mtp); - - if (dm_helpers_dp_mst_write_payload_allocation_table( - stream->ctx, - stream, - &proposed_table, - false)) - update_mst_stream_alloc_table( - link, - pipe_ctx->stream_res.stream_enc, - pipe_ctx->stream_res.hpo_dp_stream_enc, - &proposed_table); - else - DC_LOG_WARNING("Failed to update" - "MST allocation table for" - "pipe idx:%d\n", - pipe_ctx->pipe_idx); - - DC_LOG_MST("%s" - "stream_count: %d: ", - __func__, - link->mst_stream_alloc_table.stream_count); - - for (i = 0; i < MAX_CONTROLLER_NUM; i++) { - DC_LOG_MST("stream_enc[%d]: %p " - "stream[%d].hpo_dp_stream_enc: %p " - "stream[%d].vcp_id: %d " - "stream[%d].slot_count: %d\n", - i, - (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc, - i, - (void *) link->mst_stream_alloc_table.stream_allocations[i].hpo_dp_stream_enc, - i, - link->mst_stream_alloc_table.stream_allocations[i].vcp_id, - i, - link->mst_stream_alloc_table.stream_allocations[i].slot_count); - } - - if (link_hwss->ext.update_stream_allocation_table == NULL || - link_dp_get_encoding_format(&link->cur_link_settings) == DP_UNKNOWN_ENCODING) { - DC_LOG_DEBUG("Unknown encoding format\n"); - return DC_ERROR_UNEXPECTED; - } - - link_hwss->ext.update_stream_allocation_table(link, &pipe_ctx->link_res, - &link->mst_stream_alloc_table); - - if (mst_mode) { - dm_helpers_dp_mst_poll_for_allocation_change_trigger( - stream->ctx, - stream); - } - - dm_helpers_dp_mst_send_payload_allocation( - stream->ctx, - stream, - false); - - return DC_OK; -} - static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; @@ -1339,9 +1287,6 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) const struct dc_link_settings empty_link_settings = {0}; DC_LOGGER_INIT(link->ctx->logger); - if (link->dc->debug.temp_mst_deallocation_sequence) - return deallocate_mst_payload_with_temp_drm_wa(pipe_ctx); - /* deallocate_mst_payload is called before disable link. When mode or * disable/enable monitor, new stream is created which is not in link * stream[] yet. For this, payload is not allocated yet, so de-alloc @@ -1414,16 +1359,14 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) link_hwss->ext.update_stream_allocation_table(link, &pipe_ctx->link_res, &link->mst_stream_alloc_table); - if (mst_mode) { + if (mst_mode) dm_helpers_dp_mst_poll_for_allocation_change_trigger( stream->ctx, stream); - dm_helpers_dp_mst_send_payload_allocation( - stream->ctx, - stream, - false); - } + dm_helpers_dp_mst_update_mst_mgr_for_deallocation( + stream->ctx, + stream); return DC_OK; } @@ -1504,12 +1447,10 @@ static enum dc_status allocate_mst_payload(struct pipe_ctx *pipe_ctx) stream->ctx, stream); - if (ret != ACT_LINK_LOST) { + if (ret != ACT_LINK_LOST) dm_helpers_dp_mst_send_payload_allocation( stream->ctx, - stream, - true); - } + stream); /* slot X.Y for only current stream */ pbn_per_slot = get_pbn_per_slot(stream); @@ -1769,8 +1710,7 @@ enum dc_status link_reduce_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t bw_in /* send ALLOCATE_PAYLOAD sideband message with updated pbn */ dm_helpers_dp_mst_send_payload_allocation( stream->ctx, - stream, - true); + stream); /* notify immediate branch device table update */ if (dm_helpers_dp_mst_write_payload_allocation_table( @@ -1899,8 +1839,7 @@ enum dc_status link_increase_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t bw_ /* send ALLOCATE_PAYLOAD sideband message with updated pbn */ dm_helpers_dp_mst_send_payload_allocation( stream->ctx, - stream, - true); + stream); } /* increase throttled vcp size */ diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 7abfc67d10a6..37d3027c32dc 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -213,8 +213,10 @@ static void construct_link_service_edp_panel_control(struct link_service *link_s link_srv->edp_get_replay_state = edp_get_replay_state; link_srv->edp_set_replay_allow_active = edp_set_replay_allow_active; link_srv->edp_setup_replay = edp_setup_replay; + link_srv->edp_send_replay_cmd = edp_send_replay_cmd; link_srv->edp_set_coasting_vtotal = edp_set_coasting_vtotal; link_srv->edp_replay_residency = edp_replay_residency; + link_srv->edp_set_replay_power_opt_and_coasting_vtotal = edp_set_replay_power_opt_and_coasting_vtotal; link_srv->edp_wait_for_t12 = edp_wait_for_t12; link_srv->edp_is_ilr_optimization_required = @@ -368,6 +370,30 @@ static enum transmitter translate_encoder_to_transmitter( } } +static uint8_t translate_dig_inst_to_pwrseq_inst(struct dc_link *link) +{ + uint8_t pwrseq_inst = 0xF; + struct dc_context *dc_ctx = link->dc->ctx; + + DC_LOGGER_INIT(dc_ctx->logger); + + switch (link->eng_id) { + case ENGINE_ID_DIGA: + pwrseq_inst = 0; + break; + case ENGINE_ID_DIGB: + pwrseq_inst = 1; + break; + default: + DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", link->eng_id); + ASSERT(false); + break; + } + + return pwrseq_inst; +} + + static void link_destruct(struct dc_link *link) { int i; @@ -595,24 +621,6 @@ static bool construct_phy(struct dc_link *link, link->ddc_hw_inst = dal_ddc_get_line(get_ddc_pin(link->ddc)); - - if (link->dc->res_pool->funcs->panel_cntl_create && - (link->link_id.id == CONNECTOR_ID_EDP || - link->link_id.id == CONNECTOR_ID_LVDS)) { - panel_cntl_init_data.ctx = dc_ctx; - panel_cntl_init_data.inst = - panel_cntl_init_data.ctx->dc_edp_id_count; - link->panel_cntl = - link->dc->res_pool->funcs->panel_cntl_create( - &panel_cntl_init_data); - panel_cntl_init_data.ctx->dc_edp_id_count++; - - if (link->panel_cntl == NULL) { - DC_ERROR("Failed to create link panel_cntl!\n"); - goto panel_cntl_create_fail; - } - } - enc_init_data.ctx = dc_ctx; bp_funcs->get_src_obj(dc_ctx->dc_bios, link->link_id, 0, &enc_init_data.encoder); @@ -643,6 +651,23 @@ static bool construct_phy(struct dc_link *link, link->dc->res_pool->dig_link_enc_count++; link->link_enc_hw_inst = link->link_enc->transmitter; + + if (link->dc->res_pool->funcs->panel_cntl_create && + (link->link_id.id == CONNECTOR_ID_EDP || + link->link_id.id == CONNECTOR_ID_LVDS)) { + panel_cntl_init_data.ctx = dc_ctx; + panel_cntl_init_data.inst = panel_cntl_init_data.ctx->dc_edp_id_count; + panel_cntl_init_data.pwrseq_inst = translate_dig_inst_to_pwrseq_inst(link); + link->panel_cntl = + link->dc->res_pool->funcs->panel_cntl_create( + &panel_cntl_init_data); + panel_cntl_init_data.ctx->dc_edp_id_count++; + + if (link->panel_cntl == NULL) { + DC_ERROR("Failed to create link panel_cntl!\n"); + goto panel_cntl_create_fail; + } + } for (i = 0; i < 4; i++) { if (bp_funcs->get_device_tag(dc_ctx->dc_bios, link->link_id, i, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.h b/drivers/gpu/drm/amd/display/dc/link/link_validation.h index 4a954317d0da..595fb05946e9 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.h +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.h @@ -25,6 +25,7 @@ #ifndef __LINK_VALIDATION_H__ #define __LINK_VALIDATION_H__ #include "link.h" + enum dc_status link_validate_mode_timing( const struct dc_stream_state *stream, struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index db87aa7b5c90..289f5d133342 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -412,12 +412,18 @@ static enum dc_link_rate get_cable_max_link_rate(struct dc_link *link) { enum dc_link_rate cable_max_link_rate = LINK_RATE_UNKNOWN; - if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR20) + if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR20) { cable_max_link_rate = LINK_RATE_UHBR20; - else if (link->dpcd_caps.cable_id.bits.UHBR13_5_CAPABILITY) + } else if (link->dpcd_caps.cable_id.bits.UHBR13_5_CAPABILITY) { cable_max_link_rate = LINK_RATE_UHBR13_5; - else if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR10) - cable_max_link_rate = LINK_RATE_UHBR10; + } else if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR10) { + // allow DP40 cables to do UHBR13.5 for passive or unknown cable type + if (link->dpcd_caps.cable_id.bits.CABLE_TYPE < 2) { + cable_max_link_rate = LINK_RATE_UHBR13_5; + } else { + cable_max_link_rate = LINK_RATE_UHBR10; + } + } return cable_max_link_rate; } @@ -1392,7 +1398,7 @@ static bool get_usbc_cable_id(struct dc_link *link, union dp_cable_id *cable_id) cmd.cable_id.header.payload_bytes = sizeof(cmd.cable_id.data); cmd.cable_id.data.input.phy_inst = resource_transmitter_to_phy_idx( link->dc, link->link_enc->transmitter); - if (dm_execute_dmub_cmd(link->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && + if (dc_wake_and_execute_dmub_cmd(link->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && cmd.cable_id.header.ret_status == 1) { cable_id->raw = cmd.cable_id.data.output_raw; DC_LOG_DC("usbc_cable_id = %d.\n", cable_id->raw); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c index 0bb749133909..982eda3c46f5 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c @@ -90,7 +90,8 @@ bool dpia_query_hpd_status(struct dc_link *link) cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA; /* Return HPD status reported by DMUB if query successfully executed. */ - if (dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && cmd.query_hpd.data.status == AUX_RET_SUCCESS) + if (dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && + cmd.query_hpd.data.status == AUX_RET_SUCCESS) is_hpd_high = cmd.query_hpd.data.result; DC_LOG_DEBUG("%s: link(%d) dpia(%d) cmd_status(%d) result(%d)\n", diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c index 7581023daa47..a7aa8c9da868 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c @@ -50,6 +50,7 @@ static bool get_bw_alloc_proceed_flag(struct dc_link *tmp) && tmp->hpd_status && tmp->dpia_bw_alloc_config.bw_alloc_enabled); } + static void reset_bw_alloc_struct(struct dc_link *link) { link->dpia_bw_alloc_config.bw_alloc_enabled = false; @@ -58,7 +59,13 @@ static void reset_bw_alloc_struct(struct dc_link *link) link->dpia_bw_alloc_config.estimated_bw = 0; link->dpia_bw_alloc_config.bw_granularity = 0; link->dpia_bw_alloc_config.response_ready = false; + link->dpia_bw_alloc_config.sink_allocated_bw = 0; } + +#define BW_GRANULARITY_0 4 // 0.25 Gbps +#define BW_GRANULARITY_1 2 // 0.5 Gbps +#define BW_GRANULARITY_2 1 // 1 Gbps + static uint8_t get_bw_granularity(struct dc_link *link) { uint8_t bw_granularity = 0; @@ -71,16 +78,20 @@ static uint8_t get_bw_granularity(struct dc_link *link) switch (bw_granularity & 0x3) { case 0: - bw_granularity = 4; + bw_granularity = BW_GRANULARITY_0; break; case 1: + bw_granularity = BW_GRANULARITY_1; + break; + case 2: default: - bw_granularity = 2; + bw_granularity = BW_GRANULARITY_2; break; } return bw_granularity; } + static int get_estimated_bw(struct dc_link *link) { uint8_t bw_estimated_bw = 0; @@ -93,31 +104,33 @@ static int get_estimated_bw(struct dc_link *link) return bw_estimated_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); } -static bool allocate_usb4_bw(int *stream_allocated_bw, int bw_needed, struct dc_link *link) + +static int get_non_reduced_max_link_rate(struct dc_link *link) { - if (bw_needed > 0) - *stream_allocated_bw += bw_needed; + uint8_t nrd_max_link_rate = 0; - return true; + core_link_read_dpcd( + link, + DP_TUNNELING_MAX_LINK_RATE, + &nrd_max_link_rate, + sizeof(uint8_t)); + + return nrd_max_link_rate; } -static bool deallocate_usb4_bw(int *stream_allocated_bw, int bw_to_dealloc, struct dc_link *link) -{ - bool ret = false; - if (*stream_allocated_bw > 0) { - *stream_allocated_bw -= bw_to_dealloc; - ret = true; - } else { - //Do nothing for now - ret = true; - } +static int get_non_reduced_max_lane_count(struct dc_link *link) +{ + uint8_t nrd_max_lane_count = 0; - // Unplug so reset values - if (!link->hpd_status) - reset_bw_alloc_struct(link); + core_link_read_dpcd( + link, + DP_TUNNELING_MAX_LANE_COUNT, + &nrd_max_lane_count, + sizeof(uint8_t)); - return ret; + return nrd_max_lane_count; } + /* * Read all New BW alloc configuration ex: estimated_bw, allocated_bw, * granuality, Driver_ID, CM_Group, & populate the BW allocation structs @@ -125,10 +138,22 @@ static bool deallocate_usb4_bw(int *stream_allocated_bw, int bw_to_dealloc, stru */ static void init_usb4_bw_struct(struct dc_link *link) { - // Init the known values + reset_bw_alloc_struct(link); + + /* init the known values */ link->dpia_bw_alloc_config.bw_granularity = get_bw_granularity(link); link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link); + link->dpia_bw_alloc_config.nrd_max_link_rate = get_non_reduced_max_link_rate(link); + link->dpia_bw_alloc_config.nrd_max_lane_count = get_non_reduced_max_lane_count(link); + + DC_LOG_DEBUG("%s: bw_granularity(%d), estimated_bw(%d)\n", + __func__, link->dpia_bw_alloc_config.bw_granularity, + link->dpia_bw_alloc_config.estimated_bw); + DC_LOG_DEBUG("%s: nrd_max_link_rate(%d), nrd_max_lane_count(%d)\n", + __func__, link->dpia_bw_alloc_config.nrd_max_link_rate, + link->dpia_bw_alloc_config.nrd_max_lane_count); } + static uint8_t get_lowest_dpia_index(struct dc_link *link) { const struct dc *dc_struct = link->dc; @@ -141,51 +166,66 @@ static uint8_t get_lowest_dpia_index(struct dc_link *link) dc_struct->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) continue; - if (idx > dc_struct->links[i]->link_index) + if (idx > dc_struct->links[i]->link_index) { idx = dc_struct->links[i]->link_index; + break; + } } return idx; } + /* - * Get the Max Available BW or Max Estimated BW for each Host Router + * Get the maximum dp tunnel banwidth of host router * - * @link: pointer to the dc_link struct instance - * @type: ESTIMATD BW or MAX AVAILABLE BW + * @dc: pointer to the dc struct instance + * @hr_index: host router index * - * return: response_ready flag from dc_link struct + * return: host router maximum dp tunnel bandwidth */ -static int get_host_router_total_bw(struct dc_link *link, uint8_t type) +static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_index) { - const struct dc *dc_struct = link->dc; - uint8_t lowest_dpia_index = get_lowest_dpia_index(link); - uint8_t idx = (link->link_index - lowest_dpia_index) / 2, idx_temp = 0; - struct dc_link *link_temp; + uint8_t lowest_dpia_index = get_lowest_dpia_index(dc->links[0]); + uint8_t hr_index_temp = 0; + struct dc_link *link_dpia_primary, *link_dpia_secondary; int total_bw = 0; - int i; - - for (i = 0; i < MAX_PIPES * 2; ++i) { - if (!dc_struct->links[i] || dc_struct->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) - continue; + for (uint8_t i = 0; i < MAX_PIPES * 2; ++i) { - link_temp = dc_struct->links[i]; - if (!link_temp || !link_temp->hpd_status) + if (!dc->links[i] || dc->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) continue; - idx_temp = (link_temp->link_index - lowest_dpia_index) / 2; - - if (idx_temp == idx) { - - if (type == HOST_ROUTER_BW_ESTIMATED) - total_bw += link_temp->dpia_bw_alloc_config.estimated_bw; - else if (type == HOST_ROUTER_BW_ALLOCATED) - total_bw += link_temp->dpia_bw_alloc_config.sink_allocated_bw; + hr_index_temp = (dc->links[i]->link_index - lowest_dpia_index) / 2; + + if (hr_index_temp == hr_index) { + link_dpia_primary = dc->links[i]; + link_dpia_secondary = dc->links[i + 1]; + + /** + * If BW allocation enabled on both DPIAs, then + * HR BW = Estimated(dpia_primary) + Allocated(dpia_secondary) + * otherwise HR BW = Estimated(bw alloc enabled dpia) + */ + if ((link_dpia_primary->hpd_status && + link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) && + (link_dpia_secondary->hpd_status && + link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled)) { + total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw + + link_dpia_secondary->dpia_bw_alloc_config.sink_allocated_bw; + } else if (link_dpia_primary->hpd_status && + link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) { + total_bw = link_dpia_primary->dpia_bw_alloc_config.estimated_bw; + } else if (link_dpia_secondary->hpd_status && + link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled) { + total_bw += link_dpia_secondary->dpia_bw_alloc_config.estimated_bw; + } + break; } } return total_bw; } + /* * Cleanup function for when the dpia is unplugged to reset struct * and perform any required clean up @@ -194,42 +234,49 @@ static int get_host_router_total_bw(struct dc_link *link, uint8_t type) * * return: none */ -static bool dpia_bw_alloc_unplug(struct dc_link *link) +static void dpia_bw_alloc_unplug(struct dc_link *link) { - if (!link) - return true; - - return deallocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw, - link->dpia_bw_alloc_config.sink_allocated_bw, link); + if (link) { + DC_LOG_DEBUG("%s: resetting bw alloc config for link(%d)\n", + __func__, link->link_index); + reset_bw_alloc_struct(link); + } } + static void set_usb4_req_bw_req(struct dc_link *link, int req_bw) { uint8_t requested_bw; uint32_t temp; - // 1. Add check for this corner case #1 - if (req_bw > link->dpia_bw_alloc_config.estimated_bw) + /* Error check whether request bw greater than allocated */ + if (req_bw > link->dpia_bw_alloc_config.estimated_bw) { + DC_LOG_ERROR("%s: Request bw greater than estimated bw for link(%d)\n", + __func__, link->link_index); req_bw = link->dpia_bw_alloc_config.estimated_bw; + } temp = req_bw * link->dpia_bw_alloc_config.bw_granularity; requested_bw = temp / Kbps_TO_Gbps; - // Always make sure to add more to account for floating points + /* Always make sure to add more to account for floating points */ if (temp % Kbps_TO_Gbps) ++requested_bw; - // 2. Add check for this corner case #2 + /* Error check whether requested and allocated are equal */ req_bw = requested_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); - if (req_bw == link->dpia_bw_alloc_config.sink_allocated_bw) - return; + if (req_bw == link->dpia_bw_alloc_config.sink_allocated_bw) { + DC_LOG_ERROR("%s: Request bw equals to allocated bw for link(%d)\n", + __func__, link->link_index); + } - if (core_link_write_dpcd( + link->dpia_bw_alloc_config.response_ready = false; // Reset flag + core_link_write_dpcd( link, REQUESTED_BW, &requested_bw, - sizeof(uint8_t)) == DC_OK) - link->dpia_bw_alloc_config.response_ready = false; // Reset flag + sizeof(uint8_t)); } + /* * Return the response_ready flag from dc_link struct * @@ -241,6 +288,7 @@ static bool get_cm_response_ready_flag(struct dc_link *link) { return link->dpia_bw_alloc_config.response_ready; } + // ------------------------------------------------------------------ // PUBLIC FUNCTIONS // ------------------------------------------------------------------ @@ -277,27 +325,27 @@ bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link) DPTX_BW_ALLOCATION_MODE_CONTROL, &response, sizeof(uint8_t)) != DC_OK) { - DC_LOG_DEBUG("%s: **** FAILURE Enabling DPtx BW Allocation Mode Support ***\n", - __func__); + DC_LOG_DEBUG("%s: FAILURE Enabling DPtx BW Allocation Mode Support for link(%d)\n", + __func__, link->link_index); } else { // SUCCESS Enabled DPtx BW Allocation Mode Support - link->dpia_bw_alloc_config.bw_alloc_enabled = true; - DC_LOG_DEBUG("%s: **** SUCCESS Enabling DPtx BW Allocation Mode Support ***\n", - __func__); + DC_LOG_DEBUG("%s: SUCCESS Enabling DPtx BW Allocation Mode Support for link(%d)\n", + __func__, link->link_index); ret = true; init_usb4_bw_struct(link); + link->dpia_bw_alloc_config.bw_alloc_enabled = true; } } out: return ret; } + void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t result) { int bw_needed = 0; int estimated = 0; - int host_router_total_estimated_bw = 0; if (!get_bw_alloc_proceed_flag((link))) return; @@ -306,14 +354,22 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res case DPIA_BW_REQ_FAILED: - DC_LOG_DEBUG("%s: *** *** BW REQ FAILURE for DP-TX Request *** ***\n", __func__); + /* + * Ideally, we shouldn't run into this case as we always validate available + * bandwidth and request within that limit + */ + estimated = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); + + DC_LOG_ERROR("%s: BW REQ FAILURE for DP-TX Request for link(%d)\n", + __func__, link->link_index); + DC_LOG_ERROR("%s: current estimated_bw(%d), new estimated_bw(%d)\n", + __func__, link->dpia_bw_alloc_config.estimated_bw, estimated); - // Update the new Estimated BW value updated by CM - link->dpia_bw_alloc_config.estimated_bw = - bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); + /* Update the new Estimated BW value updated by CM */ + link->dpia_bw_alloc_config.estimated_bw = estimated; + /* Allocate the previously requested bandwidth */ set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.estimated_bw); - link->dpia_bw_alloc_config.response_ready = false; /* * If FAIL then it is either: @@ -326,68 +382,34 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res case DPIA_BW_REQ_SUCCESS: - DC_LOG_DEBUG("%s: *** BW REQ SUCCESS for DP-TX Request ***\n", __func__); - - // 1. SUCCESS 1st time before any Pruning is done - // 2. SUCCESS after prev. FAIL before any Pruning is done - // 3. SUCCESS after Pruning is done but before enabling link - bw_needed = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); - // 1. - if (!link->dpia_bw_alloc_config.sink_allocated_bw) { - - allocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw, bw_needed, link); - link->dpia_bw_alloc_config.sink_verified_bw = - link->dpia_bw_alloc_config.sink_allocated_bw; + DC_LOG_DEBUG("%s: BW REQ SUCCESS for DP-TX Request for link(%d)\n", + __func__, link->link_index); + DC_LOG_DEBUG("%s: current allocated_bw(%d), new allocated_bw(%d)\n", + __func__, link->dpia_bw_alloc_config.sink_allocated_bw, bw_needed); - // SUCCESS from first attempt - if (link->dpia_bw_alloc_config.sink_allocated_bw > - link->dpia_bw_alloc_config.sink_max_bw) - link->dpia_bw_alloc_config.sink_verified_bw = - link->dpia_bw_alloc_config.sink_max_bw; - } - // 3. - else if (link->dpia_bw_alloc_config.sink_allocated_bw) { - - // Find out how much do we need to de-alloc - if (link->dpia_bw_alloc_config.sink_allocated_bw > bw_needed) - deallocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw, - link->dpia_bw_alloc_config.sink_allocated_bw - bw_needed, link); - else - allocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw, - bw_needed - link->dpia_bw_alloc_config.sink_allocated_bw, link); - } - - // 4. If this is the 2nd sink then any unused bw will be reallocated to master DPIA - // => check if estimated_bw changed + link->dpia_bw_alloc_config.sink_allocated_bw = bw_needed; link->dpia_bw_alloc_config.response_ready = true; break; case DPIA_EST_BW_CHANGED: - DC_LOG_DEBUG("%s: *** ESTIMATED BW CHANGED for DP-TX Request ***\n", __func__); - estimated = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); - host_router_total_estimated_bw = get_host_router_total_bw(link, HOST_ROUTER_BW_ESTIMATED); - // 1. If due to unplug of other sink - if (estimated == host_router_total_estimated_bw) { - // First update the estimated & max_bw fields - if (link->dpia_bw_alloc_config.estimated_bw < estimated) - link->dpia_bw_alloc_config.estimated_bw = estimated; - } - // 2. If due to realloc bw btw 2 dpia due to plug OR realloc unused Bw - else { - // We lost estimated bw usually due to plug event of other dpia - link->dpia_bw_alloc_config.estimated_bw = estimated; - } + DC_LOG_DEBUG("%s: ESTIMATED BW CHANGED for link(%d)\n", + __func__, link->link_index); + DC_LOG_DEBUG("%s: current estimated_bw(%d), new estimated_bw(%d)\n", + __func__, link->dpia_bw_alloc_config.estimated_bw, estimated); + + link->dpia_bw_alloc_config.estimated_bw = estimated; break; case DPIA_BW_ALLOC_CAPS_CHANGED: - DC_LOG_DEBUG("%s: *** BW ALLOC CAPABILITY CHANGED for DP-TX Request ***\n", __func__); + DC_LOG_ERROR("%s: BW ALLOC CAPABILITY CHANGED to Disabled for link(%d)\n", + __func__, link->link_index); link->dpia_bw_alloc_config.bw_alloc_enabled = false; break; } @@ -409,17 +431,17 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.sink_max_bw); do { - if (!(timeout > 0)) + if (timeout > 0) timeout--; else break; - fsleep(10 * 1000); + msleep(10); } while (!get_cm_response_ready_flag(link)); if (!timeout) ret = 0;// ERROR TIMEOUT waiting for response for allocating bw else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0) - ret = get_host_router_total_bw(link, HOST_ROUTER_BW_ALLOCATED); + ret = link->dpia_bw_alloc_config.sink_allocated_bw; } //2. Cold Unplug else if (!link->hpd_status) @@ -428,63 +450,74 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea out: return ret; } -int link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw) + +bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw) { - int ret = 0; + bool ret = false; uint8_t timeout = 10; + DC_LOG_DEBUG("%s: ENTER: link(%d), hpd_status(%d), current allocated_bw(%d), req_bw(%d)\n", + __func__, link->link_index, link->hpd_status, + link->dpia_bw_alloc_config.sink_allocated_bw, req_bw); + if (!get_bw_alloc_proceed_flag(link)) goto out; - /* - * Sometimes stream uses same timing parameters as the already - * allocated max sink bw so no need to re-alloc - */ - if (req_bw != link->dpia_bw_alloc_config.sink_allocated_bw) { - set_usb4_req_bw_req(link, req_bw); - do { - if (!(timeout > 0)) - timeout--; - else - break; - udelay(10 * 1000); - } while (!get_cm_response_ready_flag(link)); + set_usb4_req_bw_req(link, req_bw); + do { + if (timeout > 0) + timeout--; + else + break; + msleep(10); + } while (!get_cm_response_ready_flag(link)); - if (!timeout) - ret = 0;// ERROR TIMEOUT waiting for response for allocating bw - else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0) - ret = get_host_router_total_bw(link, HOST_ROUTER_BW_ALLOCATED); - } + if (timeout) + ret = true; out: + DC_LOG_DEBUG("%s: EXIT: timeout(%d), ret(%d)\n", __func__, timeout, ret); return ret; } + bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const unsigned int num_dpias) { bool ret = true; - int bw_needed_per_hr[MAX_HR_NUM] = { 0, 0 }; - uint8_t lowest_dpia_index = 0, dpia_index = 0; - uint8_t i; + int bw_needed_per_hr[MAX_HR_NUM] = { 0, 0 }, host_router_total_dp_bw = 0; + uint8_t lowest_dpia_index, i, hr_index; if (!num_dpias || num_dpias > MAX_DPIA_NUM) return ret; - //Get total Host Router BW & Validate against each Host Router max BW + lowest_dpia_index = get_lowest_dpia_index(link[0]); + + /* get total Host Router BW with granularity for the given modes */ for (i = 0; i < num_dpias; ++i) { + int granularity_Gbps = 0; + int bw_granularity = 0; if (!link[i]->dpia_bw_alloc_config.bw_alloc_enabled) continue; - lowest_dpia_index = get_lowest_dpia_index(link[i]); if (link[i]->link_index < lowest_dpia_index) continue; - dpia_index = (link[i]->link_index - lowest_dpia_index) / 2; - bw_needed_per_hr[dpia_index] += bw_needed_per_dpia[i]; - if (bw_needed_per_hr[dpia_index] > get_host_router_total_bw(link[i], HOST_ROUTER_BW_ALLOCATED)) { + granularity_Gbps = (Kbps_TO_Gbps / link[i]->dpia_bw_alloc_config.bw_granularity); + bw_granularity = (bw_needed_per_dpia[i] / granularity_Gbps) * granularity_Gbps + + ((bw_needed_per_dpia[i] % granularity_Gbps) ? granularity_Gbps : 0); - ret = false; - break; + hr_index = (link[i]->link_index - lowest_dpia_index) / 2; + bw_needed_per_hr[hr_index] += bw_granularity; + } + + /* validate against each Host Router max BW */ + for (hr_index = 0; hr_index < MAX_HR_NUM; ++hr_index) { + if (bw_needed_per_hr[hr_index]) { + host_router_total_dp_bw = get_host_router_total_dp_tunnel_bw(link[0]->dc, hr_index); + if (bw_needed_per_hr[hr_index] > host_router_total_dp_bw) { + ret = false; + break; + } } } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h index 7292690383ae..981bc4eb6120 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h @@ -59,9 +59,9 @@ bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link); * @link: pointer to the dc_link struct instance * @req_bw: Bw requested by the stream * - * return: allocated bw else return 0 + * return: true if allocated successfully */ -int link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw); +bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw); /* * Handle the USB4 BW Allocation related functionality here: diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c index 0c00e94e90b1..ba69874be5a4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c @@ -190,9 +190,6 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link) /*AMD Replay version reuse DP_PSR_ERROR_STATUS for REPLAY_ERROR status.*/ union psr_error_status replay_error_status; - if (link->replay_settings.config.force_disable_desync_error_check) - return; - if (!link->replay_settings.replay_feature_enabled) return; @@ -210,9 +207,6 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link) &replay_error_status.raw, sizeof(replay_error_status.raw)); - if (replay_configuration.bits.DESYNC_ERROR_STATUS) - link->replay_settings.config.received_desync_error_hpd = 1; - link->replay_settings.config.replay_error_status.bits.LINK_CRC_ERROR = replay_error_status.bits.LINK_CRC_ERROR; link->replay_settings.config.replay_error_status.bits.DESYNC_ERROR = @@ -225,6 +219,12 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link) link->replay_settings.config.replay_error_status.bits.STATE_TRANSITION_ERROR) { bool allow_active; + if (link->replay_settings.config.replay_error_status.bits.DESYNC_ERROR) + link->replay_settings.config.received_desync_error_hpd = 1; + + if (link->replay_settings.config.force_disable_desync_error_check) + return; + /* Acknowledge and clear configuration bits */ dm_helpers_dp_write_dpcd( link->ctx, @@ -265,7 +265,7 @@ void dp_handle_link_loss(struct dc_link *link) for (i = count - 1; i >= 0; i--) { // Always use max settings here for DP 1.4a LL Compliance CTS - if (link->is_automated) { + if (link->skip_fallback_on_link_loss) { pipes[i]->link_config.dp_link_settings.lane_count = link->verified_link_cap.lane_count; pipes[i]->link_config.dp_link_settings.link_rate = @@ -404,7 +404,9 @@ bool dp_handle_hpd_rx_irq(struct dc_link *link, if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.AUTOMATED_TEST) { // Workaround for DP 1.4a LL Compliance CTS as USB4 has to share encoders unlike DP and USBC - link->is_automated = true; + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + link->skip_fallback_on_link_loss = true; + device_service_clear.bits.AUTOMATED_TEST = 1; core_link_write_dpcd( link, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c index 90339c2dfd84..5a0b04518956 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c @@ -807,7 +807,7 @@ void dp_decide_lane_settings( const struct link_training_settings *lt_settings, const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX], struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX], - union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]) + union dpcd_training_lane *dpcd_lane_settings) { uint32_t lane; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h index 7d027bac8255..851bd17317a0 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h @@ -111,7 +111,7 @@ void dp_decide_lane_settings( const struct link_training_settings *lt_settings, const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX], struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX], - union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]); + union dpcd_training_lane *dpcd_lane_settings); enum dc_dp_training_pattern decide_cr_training_pattern( const struct dc_link_settings *link_settings); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c index 4f4e899e5c46..e8dda44b23cb 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c @@ -811,7 +811,7 @@ static enum link_training_result dpia_training_eq_transparent( /* Take into consideration corner case for DP 1.4a LL Compliance CTS as USB4 * has to share encoders unlike DP and USBC */ - if (dp_is_interlane_aligned(dpcd_lane_status_updated) || (link->is_automated && retries_eq)) { + if (dp_is_interlane_aligned(dpcd_lane_status_updated) || (link->skip_fallback_on_link_loss && retries_eq)) { result = LINK_TRAINING_SUCCESS; break; } @@ -1037,7 +1037,7 @@ enum link_training_result dpia_perform_link_training( */ if (result == LINK_TRAINING_SUCCESS) { fsleep(5000); - if (!link->is_automated) + if (!link->skip_fallback_on_link_loss) result = dp_check_link_loss_status(link, <_settings); } else if (result == LINK_TRAINING_ABORT) dpia_training_abort(link, <_settings, repeater_id); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c index 68096d12f52f..7087cdc9e977 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c @@ -205,6 +205,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy( const uint8_t vendor_lttpr_write_data_4lane_3[4] = {0x1, 0x6D, 0xF2, 0x18}; const uint8_t vendor_lttpr_write_data_4lane_4[4] = {0x1, 0x6C, 0xF2, 0x03}; const uint8_t vendor_lttpr_write_data_4lane_5[4] = {0x1, 0x03, 0xF3, 0x06}; + const uint8_t vendor_lttpr_write_data_dpmf[4] = {0x1, 0x6, 0x70, 0x87}; enum link_training_result status = LINK_TRAINING_SUCCESS; uint8_t lane = 0; union down_spread_ctrl downspread = {0}; @@ -293,6 +294,10 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy( DP_DOWNSPREAD_CTRL, lt_settings->link_settings.link_spread); + link_configure_fixed_vs_pe_retimer(link->ddc, + &vendor_lttpr_write_data_dpmf[0], + sizeof(vendor_lttpr_write_data_dpmf)); + if (lt_settings->link_settings.lane_count == LANE_COUNT_FOUR) { link_configure_fixed_vs_pe_retimer(link->ddc, &vendor_lttpr_write_data_4lane_1[0], sizeof(vendor_lttpr_write_data_4lane_1)); @@ -552,6 +557,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence( const uint8_t vendor_lttpr_write_data_4lane_3[4] = {0x1, 0x6D, 0xF2, 0x18}; const uint8_t vendor_lttpr_write_data_4lane_4[4] = {0x1, 0x6C, 0xF2, 0x03}; const uint8_t vendor_lttpr_write_data_4lane_5[4] = {0x1, 0x03, 0xF3, 0x06}; + const uint8_t vendor_lttpr_write_data_dpmf[4] = {0x1, 0x6, 0x70, 0x87}; enum link_training_result status = LINK_TRAINING_SUCCESS; uint8_t lane = 0; union down_spread_ctrl downspread = {0}; @@ -639,6 +645,10 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence( DP_DOWNSPREAD_CTRL, lt_settings->link_settings.link_spread); + link_configure_fixed_vs_pe_retimer(link->ddc, + &vendor_lttpr_write_data_dpmf[0], + sizeof(vendor_lttpr_write_data_dpmf)); + if (lt_settings->link_settings.lane_count == LANE_COUNT_FOUR) { link_configure_fixed_vs_pe_retimer(link->ddc, &vendor_lttpr_write_data_4lane_1[0], sizeof(vendor_lttpr_write_data_4lane_1)); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index e5cfaaef70b3..7f1196528218 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -529,6 +529,9 @@ bool edp_set_backlight_level(const struct dc_link *link, if (dc_is_embedded_signal(link->connector_signal)) { struct pipe_ctx *pipe_ctx = get_pipe_from_link(link); + if (link->panel_cntl) + link->panel_cntl->stored_backlight_registers.USER_LEVEL = backlight_pwm_u16_16; + if (pipe_ctx) { /* Disable brightness ramping when the display is blanked * as it can hang the DMCU @@ -997,6 +1000,36 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream return true; } +/* + * This is general Interface for Replay to set an 32 bit variable to dmub + * replay_FW_Message_type: Indicates which instruction or variable pass to DMUB + * cmd_data: Value of the config. + */ +bool edp_send_replay_cmd(struct dc_link *link, + enum replay_FW_Message_type msg, + union dmub_replay_cmd_set *cmd_data) +{ + struct dc *dc = link->ctx->dc; + struct dmub_replay *replay = dc->res_pool->replay; + unsigned int panel_inst; + + if (!replay) + return false; + + DC_LOGGER_INIT(link->ctx->logger); + + if (dc_get_edp_link_panel_inst(dc, link, &panel_inst)) + cmd_data->panel_inst = panel_inst; + else { + DC_LOG_DC("%s(): get edp panel inst fail ", __func__); + return false; + } + + replay->funcs->replay_send_cmd(replay, msg, cmd_data); + + return true; +} + bool edp_set_coasting_vtotal(struct dc_link *link, uint16_t coasting_vtotal) { struct dc *dc = link->ctx->dc; @@ -1035,6 +1068,33 @@ bool edp_replay_residency(const struct dc_link *link, return true; } +bool edp_set_replay_power_opt_and_coasting_vtotal(struct dc_link *link, + const unsigned int *power_opts, uint16_t coasting_vtotal) +{ + struct dc *dc = link->ctx->dc; + struct dmub_replay *replay = dc->res_pool->replay; + unsigned int panel_inst; + + if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst)) + return false; + + /* Only both power and coasting vtotal changed, this func could return true */ + if (power_opts && link->replay_settings.replay_power_opt_active != *power_opts && + coasting_vtotal && link->replay_settings.coasting_vtotal != coasting_vtotal) { + if (link->replay_settings.replay_feature_enabled && + replay->funcs->replay_set_power_opt_and_coasting_vtotal) { + replay->funcs->replay_set_power_opt_and_coasting_vtotal(replay, + *power_opts, panel_inst, coasting_vtotal); + link->replay_settings.replay_power_opt_active = *power_opts; + link->replay_settings.coasting_vtotal = coasting_vtotal; + } else + return false; + } else + return false; + + return true; +} + static struct abm *get_abm_from_stream_res(const struct dc_link *link) { int i; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h index a034288ad75d..34e521af7bb4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h @@ -56,10 +56,15 @@ bool edp_set_replay_allow_active(struct dc_link *dc_link, const bool *enable, bool wait, bool force_static, const unsigned int *power_opts); bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream); +bool edp_send_replay_cmd(struct dc_link *link, + enum replay_FW_Message_type msg, + union dmub_replay_cmd_set *cmd_data); bool edp_set_coasting_vtotal(struct dc_link *link, uint16_t coasting_vtotal); bool edp_replay_residency(const struct dc_link *link, unsigned int *residency, const bool is_start, const bool is_alpm); bool edp_get_replay_state(const struct dc_link *link, uint64_t *state); +bool edp_set_replay_power_opt_and_coasting_vtotal(struct dc_link *link, + const unsigned int *power_opts, uint16_t coasting_vtotal); bool edp_wait_for_t12(struct dc_link *link); bool edp_is_ilr_optimization_required(struct dc_link *link, struct dc_crtc_timing *crtc_timing); diff --git a/drivers/gpu/drm/amd/display/dc/optc/Makefile b/drivers/gpu/drm/amd/display/dc/optc/Makefile new file mode 100644 index 000000000000..bb213335fb9f --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/Makefile @@ -0,0 +1,108 @@ + +# Copyright 2022 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# Makefile for the 'optc' sub-component of DAL. +# + + +ifdef CONFIG_DRM_AMD_DC_FP +############################################################################### +# DCN +############################################################################### + +OPTC_DCN10 = dcn10_optc.o + +AMD_DAL_OPTC_DCN10 = $(addprefix $(AMDDALPATH)/dc/optc/dcn10/,$(OPTC_DCN10)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN10) + +############################################################################### + +OPTC_DCN20 = dcn20_optc.o + +AMD_DAL_OPTC_DCN20 = $(addprefix $(AMDDALPATH)/dc/optc/dcn20/,$(OPTC_DCN20)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN20) + +############################################################################### + +OPTC_DCN201 = dcn201_optc.o + +AMD_DAL_OPTC_DCN201 = $(addprefix $(AMDDALPATH)/dc/optc/dcn201/,$(OPTC_DCN201)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN201) + +############################################################################### + +############################################################################### + +############################################################################### + +OPTC_DCN30 = dcn30_optc.o + +AMD_DAL_OPTC_DCN30 = $(addprefix $(AMDDALPATH)/dc/optc/dcn30/,$(OPTC_DCN30)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN30) + +############################################################################### + +OPTC_DCN301 = dcn301_optc.o + +AMD_DAL_OPTC_DCN301 = $(addprefix $(AMDDALPATH)/dc/optc/dcn301/,$(OPTC_DCN301)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN301) + +############################################################################### + +OPTC_DCN31 = dcn31_optc.o + +AMD_DAL_OPTC_DCN31 = $(addprefix $(AMDDALPATH)/dc/optc/dcn31/,$(OPTC_DCN31)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN31) + +############################################################################### + +OPTC_DCN314 = dcn314_optc.o + +AMD_DAL_OPTC_DCN314 = $(addprefix $(AMDDALPATH)/dc/optc/dcn314/,$(OPTC_DCN314)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN314) + +############################################################################### + +OPTC_DCN32 = dcn32_optc.o + +AMD_DAL_OPTC_DCN32 = $(addprefix $(AMDDALPATH)/dc/optc/dcn32/,$(OPTC_DCN32)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN32) + +############################################################################### + +OPTC_DCN35 = dcn35_optc.o + +AMD_DAL_OPTC_DCN35 = $(addprefix $(AMDDALPATH)/dc/optc/dcn35/,$(OPTC_DCN35)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN35) + +############################################################################### + +############################################################################### +endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c index 0e8f4f36c87c..0e8f4f36c87c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h index ab81594a7fad..ab81594a7fad 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c index 58bdbd859bf9..58bdbd859bf9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h index f7968b9ca16e..c2e03ced392e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h @@ -26,7 +26,7 @@ #ifndef __DC_OPTC_DCN20_H__ #define __DC_OPTC_DCN20_H__ -#include "../dcn10/dcn10_optc.h" +#include "dcn10/dcn10_optc.h" #define TG_COMMON_REG_LIST_DCN2_0(inst) \ TG_COMMON_REG_LIST_DCN(inst),\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.c index 70fcbec03fb6..70fcbec03fb6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.h index e9545b73513a..e9545b73513a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c index b97bdb868a0e..b97bdb868a0e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h index d3a056c12b0d..d3a056c12b0d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c index b3cfcb887905..b3cfcb887905 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.h index b49585682a15..b49585682a15 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c index 63a677c8ee27..63a677c8ee27 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h index 30b81a448ce2..30b81a448ce2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c index 0086cafb0f7a..0086cafb0f7a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h index 99c098e76116..99c098e76116 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c index a2c4db2cebdd..91ea0d4da06a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c @@ -172,6 +172,13 @@ static bool optc32_disable_crtc(struct timing_generator *optc) REG_UPDATE(OTG_CONTROL, OTG_MASTER_EN, 0); + REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT, + OPTC_SEG0_SRC_SEL, 0xf, + OPTC_SEG1_SRC_SEL, 0xf, + OPTC_SEG2_SRC_SEL, 0xf, + OPTC_SEG3_SRC_SEL, 0xf, + OPTC_NUM_OF_INPUT_SEGMENT, 0); + REG_UPDATE(CONTROL, VTG0_ENABLE, 0); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h index 8ce3b178cab0..8ce3b178cab0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c index a4a39f1638cf..08a59cf449ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c @@ -144,6 +144,13 @@ static bool optc35_disable_crtc(struct timing_generator *optc) REG_UPDATE(OTG_CONTROL, OTG_MASTER_EN, 0); + REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT, + OPTC_SEG0_SRC_SEL, 0xf, + OPTC_SEG1_SRC_SEL, 0xf, + OPTC_SEG2_SRC_SEL, 0xf, + OPTC_SEG3_SRC_SEL, 0xf, + OPTC_NUM_OF_INPUT_SEGMENT, 0); + REG_UPDATE(CONTROL, VTG0_ENABLE, 0); diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h index 1f422e4c468f..1f422e4c468f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h diff --git a/drivers/gpu/drm/amd/display/dc/resource/Makefile b/drivers/gpu/drm/amd/display/dc/resource/Makefile new file mode 100644 index 000000000000..0a75ed8962a5 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/Makefile @@ -0,0 +1,199 @@ + +# Copyright 2022 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# Makefile for the 'resource' sub-component of DAL. +# + + +############################################################################### +# DCE +############################################################################### + +RESOURCE_DCE100 = dce100_resource.o + +AMD_DAL_RESOURCE_DCE100 = $(addprefix $(AMDDALPATH)/dc/resource/dce100/,$(RESOURCE_DCE100)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE100) + +############################################################################### + +RESOURCE_DCE110 = dce110_resource.o + +AMD_DAL_RESOURCE_DCE110 = $(addprefix $(AMDDALPATH)/dc/resource/dce110/,$(RESOURCE_DCE110)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE110) + +############################################################################### + +RESOURCE_DCE112 = dce112_resource.o + +AMD_DAL_RESOURCE_DCE112 = $(addprefix $(AMDDALPATH)/dc/resource/dce112/,$(RESOURCE_DCE112)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE112) + +############################################################################### + +RESOURCE_DCE120 = dce120_resource.o + +AMD_DAL_RESOURCE_DCE120 = $(addprefix $(AMDDALPATH)/dc/resource/dce120/,$(RESOURCE_DCE120)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE120) + +############################################################################### + +RESOURCE_DCE80 = dce80_resource.o + +AMD_DAL_RESOURCE_DCE80 = $(addprefix $(AMDDALPATH)/dc/resource/dce80/,$(RESOURCE_DCE80)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE80) + +ifdef CONFIG_DRM_AMD_DC_FP +############################################################################### +# DCN +############################################################################### + +RESOURCE_DCN10 = dcn10_resource.o + +AMD_DAL_RESOURCE_DCN10 = $(addprefix $(AMDDALPATH)/dc/resource/dcn10/,$(RESOURCE_DCN10)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN10) + +############################################################################### + +RESOURCE_DCN20 = dcn20_resource.o + +AMD_DAL_RESOURCE_DCN20 = $(addprefix $(AMDDALPATH)/dc/resource/dcn20/,$(RESOURCE_DCN20)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN20) + +############################################################################### + +RESOURCE_DCN201 = dcn201_resource.o + +AMD_DAL_RESOURCE_DCN201 = $(addprefix $(AMDDALPATH)/dc/resource/dcn201/,$(RESOURCE_DCN201)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN201) + +############################################################################### + +RESOURCE_DCN21 = dcn21_resource.o + +AMD_DAL_RESOURCE_DCN21 = $(addprefix $(AMDDALPATH)/dc/resource/dcn21/,$(RESOURCE_DCN21)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN21) + +############################################################################### + +############################################################################### + +############################################################################### + +RESOURCE_DCN30 = dcn30_resource.o + +AMD_DAL_RESOURCE_DCN30 = $(addprefix $(AMDDALPATH)/dc/resource/dcn30/,$(RESOURCE_DCN30)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN30) + +############################################################################### + +RESOURCE_DCN301 = dcn301_resource.o + +AMD_DAL_RESOURCE_DCN301 = $(addprefix $(AMDDALPATH)/dc/resource/dcn301/,$(RESOURCE_DCN301)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN301) + +############################################################################### + +RESOURCE_DCN302 = dcn302_resource.o + +AMD_DAL_RESOURCE_DCN302 = $(addprefix $(AMDDALPATH)/dc/resource/dcn302/,$(RESOURCE_DCN302)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN302) + +############################################################################### + +RESOURCE_DCN303 = dcn303_resource.o + +AMD_DAL_RESOURCE_DCN303 = $(addprefix $(AMDDALPATH)/dc/resource/dcn303/,$(RESOURCE_DCN303)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN303) + +############################################################################### + +RESOURCE_DCN31 = dcn31_resource.o + +AMD_DAL_RESOURCE_DCN31 = $(addprefix $(AMDDALPATH)/dc/resource/dcn31/,$(RESOURCE_DCN31)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN31) + +############################################################################### + +RESOURCE_DCN314 = dcn314_resource.o + +AMD_DAL_RESOURCE_DCN314 = $(addprefix $(AMDDALPATH)/dc/resource/dcn314/,$(RESOURCE_DCN314)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN314) + +############################################################################### + +RESOURCE_DCN315 = dcn315_resource.o + +AMD_DAL_RESOURCE_DCN315 = $(addprefix $(AMDDALPATH)/dc/resource/dcn315/,$(RESOURCE_DCN315)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN315) + +############################################################################### + +RESOURCE_DCN316 = dcn316_resource.o + +AMD_DAL_RESOURCE_DCN316 = $(addprefix $(AMDDALPATH)/dc/resource/dcn316/,$(RESOURCE_DCN316)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN316) + +############################################################################### + +RESOURCE_DCN32 = dcn32_resource.o + +AMD_DAL_RESOURCE_DCN32 = $(addprefix $(AMDDALPATH)/dc/resource/dcn32/,$(RESOURCE_DCN32)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN32) + +############################################################################### + +RESOURCE_DCN321 = dcn321_resource.o + +AMD_DAL_RESOURCE_DCN321 = $(addprefix $(AMDDALPATH)/dc/resource/dcn321/,$(RESOURCE_DCN321)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN321) + +############################################################################### + +RESOURCE_DCN35 = dcn35_resource.o + +AMD_DAL_RESOURCE_DCN35 = $(addprefix $(AMDDALPATH)/dc/resource/dcn35/,$(RESOURCE_DCN35)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN35) + +############################################################################### + +############################################################################### + +endif diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c index 53a5f4cb648c..53a5f4cb648c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h index fecab7c560f5..fecab7c560f5 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c index fe518fd27b08..fe518fd27b08 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.h index aa4531e0800e..aa4531e0800e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.h diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c index d1edac46c9a0..d1edac46c9a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h index 1f57ebc6f9b4..1f57ebc6f9b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c index 962de79be169..20662edd0ae4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c @@ -36,7 +36,7 @@ #include "dce110/dce110_resource.h" #include "virtual/virtual_stream_encoder.h" -#include "dce120_timing_generator.h" +#include "dce120/dce120_timing_generator.h" #include "irq/dce120/irq_service_dce120.h" #include "dce/dce_opp.h" #include "dce/dce_clock_source.h" diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.h index 3d1f3cf012f4..3d1f3cf012f4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.h diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/CMakeLists.txt b/drivers/gpu/drm/amd/display/dc/resource/dce80/CMakeLists.txt new file mode 100644 index 000000000000..19dd73bc9ab0 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/CMakeLists.txt @@ -0,0 +1,4 @@ +dal3_subdirectory_sources( + dce80_resource.c + dce80_resource.h + )
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c index 35a2cce0c2b8..35a2cce0c2b8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.h index eff31ab83a39..eff31ab83a39 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c index b94c5c97eee7..d08d10969251 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c @@ -26,29 +26,32 @@ #include "dm_services.h" #include "dc.h" -#include "dcn10_init.h" +#include "dcn10/dcn10_init.h" #include "resource.h" #include "include/irq_service_interface.h" -#include "dcn10_resource.h" -#include "dcn10_ipp.h" -#include "dcn10_mpc.h" +#include "dcn10/dcn10_resource.h" +#include "dcn10/dcn10_ipp.h" +#include "dcn10/dcn10_mpc.h" + +#include "dcn10/dcn10_dwb.h" + #include "irq/dcn10/irq_service_dcn10.h" -#include "dcn10_dpp.h" -#include "dcn10_optc.h" +#include "dcn10/dcn10_dpp.h" +#include "dcn10/dcn10_optc.h" #include "dcn10/dcn10_hwseq.h" #include "dce110/dce110_hwseq.h" -#include "dcn10_opp.h" -#include "dcn10_link_encoder.h" -#include "dcn10_stream_encoder.h" +#include "dcn10/dcn10_opp.h" +#include "dcn10/dcn10_link_encoder.h" +#include "dcn10/dcn10_stream_encoder.h" #include "dce/dce_clock_source.h" #include "dce/dce_audio.h" #include "dce/dce_hwseq.h" #include "virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" #include "dce112/dce112_resource.h" -#include "dcn10_hubp.h" -#include "dcn10_hubbub.h" +#include "dcn10/dcn10_hubp.h" +#include "dcn10/dcn10_hubbub.h" #include "dce/dce_panel_cntl.h" #include "soc15_hw_ip.h" @@ -1247,7 +1250,10 @@ struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link( /* Store first available for MST second display * in daisy chain use case */ - j = i; + + if (pool->stream_enc[i]->id != ENGINE_ID_VIRTUAL) + j = i; + if (link->ep_type == DISPLAY_ENDPOINT_PHY && pool->stream_enc[i]->id == link->link_enc->preferred_engine) return pool->stream_enc[i]; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h index bf8e33cd8147..bf8e33cd8147 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index 0a422fbb14bc..f9c5bc624be3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -29,7 +29,7 @@ #include "dm_services.h" #include "dc.h" -#include "dcn20_init.h" +#include "dcn20/dcn20_init.h" #include "resource.h" #include "include/irq_service_interface.h" @@ -39,29 +39,29 @@ #include "dcn10/dcn10_hubp.h" #include "dcn10/dcn10_ipp.h" -#include "dcn20_hubbub.h" -#include "dcn20_mpc.h" -#include "dcn20_hubp.h" +#include "dcn20/dcn20_hubbub.h" +#include "dcn20/dcn20_mpc.h" +#include "dcn20/dcn20_hubp.h" #include "irq/dcn20/irq_service_dcn20.h" -#include "dcn20_dpp.h" -#include "dcn20_optc.h" +#include "dcn20/dcn20_dpp.h" +#include "dcn20/dcn20_optc.h" #include "dcn20/dcn20_hwseq.h" #include "dce110/dce110_hwseq.h" #include "dcn10/dcn10_resource.h" -#include "dcn20_opp.h" +#include "dcn20/dcn20_opp.h" -#include "dcn20_dsc.h" +#include "dcn20/dcn20_dsc.h" -#include "dcn20_link_encoder.h" -#include "dcn20_stream_encoder.h" +#include "dcn20/dcn20_link_encoder.h" +#include "dcn20/dcn20_stream_encoder.h" #include "dce/dce_clock_source.h" #include "dce/dce_audio.h" #include "dce/dce_hwseq.h" #include "virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" -#include "dcn20_dccg.h" -#include "dcn20_vmid.h" +#include "dcn20/dcn20_dccg.h" +#include "dcn20/dcn20_vmid.h" #include "dce/dce_panel_cntl.h" #include "navi10_ip_offset.h" @@ -1273,15 +1273,19 @@ static void build_clamping_params(struct dc_stream_state *stream) stream->clamping.pixel_encoding = stream->timing.pixel_encoding; } -static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx) +void dcn20_build_pipe_pix_clk_params(struct pipe_ctx *pipe_ctx) { - get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params); - pipe_ctx->clock_source->funcs->get_pix_clk_dividers( - pipe_ctx->clock_source, - &pipe_ctx->stream_res.pix_clk_params, - &pipe_ctx->pll_settings); + pipe_ctx->clock_source, + &pipe_ctx->stream_res.pix_clk_params, + &pipe_ctx->pll_settings); +} + +static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx) +{ + + dcn20_build_pipe_pix_clk_params(pipe_ctx); pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h index 37ecaccc5d12..4cee3fa11a7f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h @@ -165,6 +165,7 @@ enum dc_status dcn20_add_stream_to_ctx(struct dc *dc, struct dc_state *new_ctx, enum dc_status dcn20_add_dsc_to_stream_resource(struct dc *dc, struct dc_state *dc_ctx, struct dc_stream_state *dc_stream); enum dc_status dcn20_remove_stream_from_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream); enum dc_status dcn20_patch_unknown_plane_state(struct dc_plane_state *plane_state); +void dcn20_build_pipe_pix_clk_params(struct pipe_ctx *pipe_ctx); #endif /* __DC_RESOURCE_DCN20_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c index bca22d867696..914b234d7f6b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c @@ -26,7 +26,7 @@ #include "dm_services.h" #include "dc.h" -#include "dcn201_init.h" +#include "dcn201/dcn201_init.h" #include "dml/dcn20/dcn20_fpu.h" #include "resource.h" #include "include/irq_service_interface.h" @@ -36,16 +36,16 @@ #include "dcn10/dcn10_hubp.h" #include "dcn10/dcn10_ipp.h" -#include "dcn201_mpc.h" -#include "dcn201_hubp.h" +#include "dcn201/dcn201_mpc.h" +#include "dcn201/dcn201_hubp.h" #include "irq/dcn201/irq_service_dcn201.h" #include "dcn201/dcn201_dpp.h" #include "dcn201/dcn201_hubbub.h" -#include "dcn201_dccg.h" -#include "dcn201_optc.h" +#include "dcn201/dcn201_dccg.h" +#include "dcn201/dcn201_optc.h" #include "dcn201/dcn201_hwseq.h" #include "dce110/dce110_hwseq.h" -#include "dcn201_opp.h" +#include "dcn201/dcn201_opp.h" #include "dcn201/dcn201_link_encoder.h" #include "dcn20/dcn20_stream_encoder.h" #include "dce/dce_clock_source.h" @@ -55,7 +55,7 @@ #include "dce110/dce110_resource.h" #include "dce/dce_aux.h" #include "dce/dce_i2c.h" -#include "dcn201_hubbub.h" +#include "dcn201/dcn201_hubbub.h" #include "dcn10/dcn10_resource.h" #include "cyan_skillfish_ip_offset.h" diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.h index e0467d17d4ae..e0467d17d4ae 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c index 42277b280586..65d337731f56 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c @@ -29,7 +29,7 @@ #include "dm_services.h" #include "dc.h" -#include "dcn21_init.h" +#include "dcn21/dcn21_init.h" #include "resource.h" #include "include/irq_service_interface.h" @@ -44,7 +44,7 @@ #include "dcn20/dcn20_hubbub.h" #include "dcn20/dcn20_mpc.h" #include "dcn20/dcn20_hubp.h" -#include "dcn21_hubp.h" +#include "dcn21/dcn21_hubp.h" #include "irq/dcn21/irq_service_dcn21.h" #include "dcn20/dcn20_dpp.h" #include "dcn20/dcn20_optc.h" @@ -61,7 +61,7 @@ #include "dml/display_mode_vba.h" #include "dcn20/dcn20_dccg.h" #include "dcn21/dcn21_dccg.h" -#include "dcn21_hubbub.h" +#include "dcn21/dcn21_hubbub.h" #include "dcn10/dcn10_resource.h" #include "dce/dce_panel_cntl.h" @@ -713,9 +713,8 @@ static void dcn21_resource_destruct(struct dcn21_resource_pool *pool) pool->base.hubps[i] = NULL; } - if (pool->base.irqs != NULL) { + if (pool->base.irqs != NULL) dal_irq_service_destroy(&pool->base.irqs); - } } for (i = 0; i < pool->base.res_cap->num_ddc; i++) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h index f7ecc002c2f7..f7ecc002c2f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c index 7b259cb5f418..37a64186f324 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c @@ -27,7 +27,7 @@ #include "dm_services.h" #include "dc.h" -#include "dcn30_init.h" +#include "dcn30/dcn30_init.h" #include "resource.h" #include "include/irq_service_interface.h" @@ -1682,6 +1682,7 @@ noinline bool dcn30_internal_validate_bw( * We don't actually support prefetch mode 2, so require that we * at least support prefetch mode 1. */ + context->bw_ctx.dml.validate_max_state = fast_validate; context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = dm_allow_self_refresh; @@ -1691,6 +1692,7 @@ noinline bool dcn30_internal_validate_bw( memset(merge, 0, sizeof(merge)); vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); } + context->bw_ctx.dml.validate_max_state = false; } dml_log_mode_support_params(&context->bw_ctx.dml); diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h index 8e6b8b7368fd..8e6b8b7368fd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c index f3b75f283aa2..511ff6b5b985 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c @@ -27,7 +27,7 @@ #include "dm_services.h" #include "dc.h" -#include "dcn301_init.h" +#include "dcn301/dcn301_init.h" #include "resource.h" #include "include/irq_service_interface.h" @@ -61,7 +61,7 @@ #include "dcn10/dcn10_resource.h" #include "dcn30/dcn30_dio_stream_encoder.h" #include "dcn301/dcn301_dio_link_encoder.h" -#include "dcn301_panel_cntl.h" +#include "dcn301/dcn301_panel_cntl.h" #include "vangogh_ip_offset.h" diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.h index ae8672680cdd..ae8672680cdd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c index 63ac984a04f7..5791b5cc2875 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c @@ -23,9 +23,9 @@ * */ -#include "dcn302_init.h" +#include "dcn302/dcn302_init.h" #include "dcn302_resource.h" -#include "dcn302_dccg.h" +#include "dcn302/dcn302_dccg.h" #include "irq/dcn302/irq_service_dcn302.h" #include "dcn30/dcn30_dio_link_encoder.h" diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.h index 9f24e73b92b3..9f24e73b92b3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c index 49cb7fde416a..25cd6236b054 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c @@ -23,9 +23,9 @@ * Authors: AMD */ -#include "dcn303_init.h" +#include "dcn303/dcn303_init.h" #include "dcn303_resource.h" -#include "dcn303_dccg.h" +#include "dcn303/dcn303_dccg.h" #include "irq/dcn303/irq_service_dcn303.h" #include "dcn30/dcn30_dio_link_encoder.h" diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.h index 37cf1525820b..37cf1525820b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index 79416cfb22f0..31035fc3d868 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -70,7 +70,7 @@ #include "dml/dcn31/dcn31_fpu.h" #include "dcn31/dcn31_dccg.h" #include "dcn10/dcn10_resource.h" -#include "dcn31_panel_cntl.h" +#include "dcn31/dcn31_panel_cntl.h" #include "dcn30/dcn30_dwb.h" #include "dcn30/dcn30_mmhubbub.h" diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h index 901436591ed4..901436591ed4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index c97391edb5ff..c97391edb5ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h index 49ffe71018df..49ffe71018df 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c index cb8024eee8e4..515ba435f759 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c @@ -1631,8 +1631,10 @@ static bool allow_pixel_rate_crb(struct dc *dc, struct dc_state *context) int i; struct resource_context *res_ctx = &context->res_ctx; - /*Don't apply for single stream*/ - if (context->stream_count < 2) + /* Only apply for dual stream scenarios with edp*/ + if (context->stream_count != 2) + return false; + if (context->streams[0]->signal != SIGNAL_TYPE_EDP && context->streams[1]->signal != SIGNAL_TYPE_EDP) return false; for (i = 0; i < dc->res_pool->pipe_count; i++) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.h index 22849eaa6f24..22849eaa6f24 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c index b9753d4606f8..b9753d4606f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.h index aba6d634131b..aba6d634131b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 89b072447dba..ac04a9c9a3d8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -27,7 +27,7 @@ #include "dm_services.h" #include "dc.h" -#include "dcn32_init.h" +#include "dcn32/dcn32_init.h" #include "resource.h" #include "include/irq_service_interface.h" @@ -41,7 +41,7 @@ #include "dcn31/dcn31_hubbub.h" #include "dcn32/dcn32_hubbub.h" #include "dcn32/dcn32_mpc.h" -#include "dcn32_hubp.h" +#include "dcn32/dcn32_hubp.h" #include "irq/dcn32/irq_service_dcn32.h" #include "dcn32/dcn32_dpp.h" #include "dcn32/dcn32_optc.h" @@ -89,6 +89,8 @@ #include "dcn20/dcn20_vmid.h" #include "dml/dcn32/dcn32_fpu.h" +#include "dc_state_priv.h" + #include "dml2/dml2_wrapper.h" #define DC_LOGGER_INIT(logger) @@ -1644,7 +1646,7 @@ static void dcn32_enable_phantom_plane(struct dc *dc, if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state) phantom_plane = prev_phantom_plane; else - phantom_plane = dc_create_plane_state(dc); + phantom_plane = dc_state_create_phantom_plane(dc, context, curr_pipe->plane_state); memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address)); memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality, @@ -1665,9 +1667,7 @@ static void dcn32_enable_phantom_plane(struct dc *dc, phantom_plane->clip_rect.y = 0; phantom_plane->clip_rect.height = phantom_stream->src.height; - phantom_plane->is_phantom = true; - - dc_add_plane_to_context(dc, phantom_stream, phantom_plane, context); + dc_state_add_phantom_plane(dc, phantom_stream, phantom_plane, context); curr_pipe = curr_pipe->bottom_pipe; prev_phantom_plane = phantom_plane; @@ -1683,13 +1683,7 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc, struct dc_stream_state *phantom_stream = NULL; struct pipe_ctx *ref_pipe = &context->res_ctx.pipe_ctx[dc_pipe_idx]; - phantom_stream = dc_create_stream_for_sink(ref_pipe->stream->sink); - phantom_stream->signal = SIGNAL_TYPE_VIRTUAL; - phantom_stream->dpms_off = true; - phantom_stream->mall_stream_config.type = SUBVP_PHANTOM; - phantom_stream->mall_stream_config.paired_stream = ref_pipe->stream; - ref_pipe->stream->mall_stream_config.type = SUBVP_MAIN; - ref_pipe->stream->mall_stream_config.paired_stream = phantom_stream; + phantom_stream = dc_state_create_phantom_stream(dc, context, ref_pipe->stream); /* stream has limited viewport and small timing */ memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing)); @@ -1699,81 +1693,10 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc, dcn32_set_phantom_stream_timing(dc, context, ref_pipe, phantom_stream, pipes, pipe_cnt, dc_pipe_idx); DC_FP_END(); - dc_add_stream_to_ctx(dc, context, phantom_stream); + dc_state_add_phantom_stream(dc, context, phantom_stream, ref_pipe->stream); return phantom_stream; } -void dcn32_retain_phantom_pipes(struct dc *dc, struct dc_state *context) -{ - int i; - struct dc_plane_state *phantom_plane = NULL; - struct dc_stream_state *phantom_stream = NULL; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (resource_is_pipe_type(pipe, OTG_MASTER) && - resource_is_pipe_type(pipe, DPP_PIPE) && - pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - phantom_plane = pipe->plane_state; - phantom_stream = pipe->stream; - - dc_plane_state_retain(phantom_plane); - dc_stream_retain(phantom_stream); - } - } -} - -// return true if removed piped from ctx, false otherwise -bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context, bool fast_update) -{ - int i; - bool removed_pipe = false; - struct dc_plane_state *phantom_plane = NULL; - struct dc_stream_state *phantom_stream = NULL; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - // build scaling params for phantom pipes - if (pipe->plane_state && pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - phantom_plane = pipe->plane_state; - phantom_stream = pipe->stream; - - dc_rem_all_planes_for_stream(dc, pipe->stream, context); - dc_remove_stream_from_ctx(dc, context, pipe->stream); - - /* Ref count is incremented on allocation and also when added to the context. - * Therefore we must call release for the the phantom plane and stream once - * they are removed from the ctx to finally decrement the refcount to 0 to free. - */ - dc_plane_state_release(phantom_plane); - dc_stream_release(phantom_stream); - - removed_pipe = true; - } - - /* For non-full updates, a shallow copy of the current state - * is created. In this case we don't want to erase the current - * state (there can be 2 HIRQL threads, one in flip, and one in - * checkMPO) that can cause a race condition. - * - * This is just a workaround, needs a proper fix. - */ - if (!fast_update) { - // Clear all phantom stream info - if (pipe->stream) { - pipe->stream->mall_stream_config.type = SUBVP_NONE; - pipe->stream->mall_stream_config.paired_stream = NULL; - } - - if (pipe->plane_state) { - pipe->plane_state->is_phantom = false; - } - } - } - return removed_pipe; -} - /* TODO: Input to this function should indicate which pipe indexes (or streams) * require a phantom pipe / stream */ @@ -1798,7 +1721,7 @@ void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context, // We determine which phantom pipes were added by comparing with // the phantom stream. if (pipe->plane_state && pipe->stream && pipe->stream == phantom_stream && - pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { pipe->stream->use_dynamic_meta = false; pipe->plane_state->flip_immediate = false; if (!resource_build_scaling_params(pipe)) { @@ -1817,7 +1740,6 @@ static bool dml1_validate(struct dc *dc, struct dc_state *context, bool fast_val int vlevel = 0; int pipe_cnt = 0; display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL); - struct mall_temp_config mall_temp_config; /* To handle Freesync properly, setting FreeSync DML parameters * to its default state for the first stage of validation @@ -1827,29 +1749,12 @@ static bool dml1_validate(struct dc *dc, struct dc_state *context, bool fast_val DC_LOGGER_INIT(dc->ctx->logger); - /* For fast validation, there are situations where a shallow copy of - * of the dc->current_state is created for the validation. In this case - * we want to save and restore the mall config because we always - * teardown subvp at the beginning of validation (and don't attempt - * to add it back if it's fast validation). If we don't restore the - * subvp config in cases of fast validation + shallow copy of the - * dc->current_state, the dc->current_state will have a partially - * removed subvp state when we did not intend to remove it. - */ - if (fast_validate) { - memset(&mall_temp_config, 0, sizeof(mall_temp_config)); - dcn32_save_mall_state(dc, context, &mall_temp_config); - } - BW_VAL_TRACE_COUNT(); DC_FP_START(); out = dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); DC_FP_END(); - if (fast_validate) - dcn32_restore_mall_state(dc, context, &mall_temp_config); - if (pipe_cnt == 0) goto validate_out; @@ -1933,7 +1838,7 @@ int dcn32_populate_dml_pipes_from_context( * This is just a workaround -- needs a proper fix. */ if (!fast_validate) { - switch (pipe->stream->mall_stream_config.type) { + switch (dc_state_get_pipe_subvp_type(context, pipe)) { case SUBVP_MAIN: pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport; subvp_in_use = true; @@ -1994,7 +1899,7 @@ int dcn32_populate_dml_pipes_from_context( static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap, - .get_subvp_en = dcn32_subvp_in_use, + .get_subvp_en = resource_subvp_in_use, }; void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, @@ -2037,10 +1942,7 @@ static struct resource_funcs dcn32_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, .add_phantom_pipes = dcn32_add_phantom_pipes, - .remove_phantom_pipes = dcn32_remove_phantom_pipes, - .retain_phantom_pipes = dcn32_retain_phantom_pipes, - .save_mall_state = dcn32_save_mall_state, - .restore_mall_state = dcn32_restore_mall_state, + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, }; static uint32_t read_pipe_fuses(struct dc_context *ctx) @@ -2453,16 +2355,19 @@ static bool dcn32_resource_construct( dc->dml2_options.callbacks.get_opp_head = &resource_get_opp_head; dc->dml2_options.svp_pstate.callbacks.dc = dc; - dc->dml2_options.svp_pstate.callbacks.add_plane_to_context = &dc_add_plane_to_context; - dc->dml2_options.svp_pstate.callbacks.add_stream_to_ctx = &dc_add_stream_to_ctx; + dc->dml2_options.svp_pstate.callbacks.add_phantom_plane = &dc_state_add_phantom_plane; + dc->dml2_options.svp_pstate.callbacks.add_phantom_stream = &dc_state_add_phantom_stream; dc->dml2_options.svp_pstate.callbacks.build_scaling_params = &resource_build_scaling_params; - dc->dml2_options.svp_pstate.callbacks.create_plane = &dc_create_plane_state; - dc->dml2_options.svp_pstate.callbacks.remove_plane_from_context = &dc_remove_plane_from_context; - dc->dml2_options.svp_pstate.callbacks.remove_stream_from_ctx = &dc_remove_stream_from_ctx; - dc->dml2_options.svp_pstate.callbacks.create_stream_for_sink = &dc_create_stream_for_sink; - dc->dml2_options.svp_pstate.callbacks.plane_state_release = &dc_plane_state_release; - dc->dml2_options.svp_pstate.callbacks.stream_release = &dc_stream_release; + dc->dml2_options.svp_pstate.callbacks.create_phantom_plane = &dc_state_create_phantom_plane; + dc->dml2_options.svp_pstate.callbacks.remove_phantom_plane = &dc_state_remove_phantom_plane; + dc->dml2_options.svp_pstate.callbacks.remove_phantom_stream = &dc_state_remove_phantom_stream; + dc->dml2_options.svp_pstate.callbacks.create_phantom_stream = &dc_state_create_phantom_stream; + dc->dml2_options.svp_pstate.callbacks.release_phantom_plane = &dc_state_release_phantom_plane; + dc->dml2_options.svp_pstate.callbacks.release_phantom_stream = &dc_state_release_phantom_stream; dc->dml2_options.svp_pstate.callbacks.release_dsc = &dcn20_release_dsc; + dc->dml2_options.svp_pstate.callbacks.get_pipe_subvp_type = &dc_state_get_pipe_subvp_type; + dc->dml2_options.svp_pstate.callbacks.get_stream_subvp_type = &dc_state_get_stream_subvp_type; + dc->dml2_options.svp_pstate.callbacks.get_paired_subvp_stream = &dc_state_get_paired_subvp_stream; dc->dml2_options.svp_pstate.subvp_fw_processing_delay_us = dc->caps.subvp_fw_processing_delay_us; dc->dml2_options.svp_pstate.subvp_prefetch_end_to_mall_start_us = dc->caps.subvp_prefetch_end_to_mall_start_us; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h index b931008114c9..62611acd4bcb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h @@ -39,6 +39,7 @@ #define DCN3_2_MBLK_HEIGHT_8BPE 64 #define DCN3_2_DCFCLK_DS_INIT_KHZ 10000 // Choose 10Mhz for init DCFCLK DS freq #define SUBVP_HIGH_REFRESH_LIST_LEN 4 +#define SUBVP_ACTIVE_MARGIN_LIST_LEN 2 #define DCN3_2_MAX_SUBVP_PIXEL_RATE_MHZ 1800 #define DCN3_2_VMIN_DISPCLK_HZ 717000000 @@ -57,6 +58,15 @@ struct subvp_high_refresh_list { } res[SUBVP_HIGH_REFRESH_LIST_LEN]; }; +struct subvp_active_margin_list { + int min_refresh; + int max_refresh; + struct { + int width; + int height; + } res[SUBVP_ACTIVE_MARGIN_LIST_LEN]; +}; + struct dcn32_resource_pool { struct resource_pool base; }; @@ -81,12 +91,6 @@ bool dcn32_release_post_bldn_3dlut( struct dc_3dlut **lut, struct dc_transfer_func **shaper); -bool dcn32_remove_phantom_pipes(struct dc *dc, - struct dc_state *context, bool fast_update); - -void dcn32_retain_phantom_pipes(struct dc *dc, - struct dc_state *context); - void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, @@ -127,9 +131,6 @@ void dcn32_merge_pipes_for_subvp(struct dc *dc, bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, struct dc_state *context); -bool dcn32_subvp_in_use(struct dc *dc, - struct dc_state *context); - bool dcn32_mpo_in_use(struct dc_state *context); bool dcn32_any_surfaces_rotated(struct dc *dc, struct dc_state *context); @@ -159,15 +160,7 @@ void dcn32_determine_det_override(struct dc *dc, void dcn32_set_det_allocations(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes); -void dcn32_save_mall_state(struct dc *dc, - struct dc_state *context, - struct mall_temp_config *temp_config); - -void dcn32_restore_mall_state(struct dc *dc, - struct dc_state *context, - struct mall_temp_config *temp_config); - -struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context); +struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context); bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe); @@ -183,6 +176,8 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context); bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int vlevel); +void dcn32_update_dml_pipes_odm_policy_based_on_context(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes); + /* definitions for run time init of reg offsets */ /* CLK SRC */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index f7de3eca1225..e1ab207c46f1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -63,7 +63,7 @@ #include "dcn31/dcn31_apg.h" #include "dcn31/dcn31_dio_link_encoder.h" #include "dcn32/dcn32_dio_link_encoder.h" -#include "dcn321_dio_link_encoder.h" +#include "dcn321/dcn321_dio_link_encoder.h" #include "dce/dce_clock_source.h" #include "dce/dce_audio.h" #include "dce/dce_hwseq.h" @@ -92,6 +92,8 @@ #include "vm_helper.h" #include "dcn20/dcn20_vmid.h" +#include "dc_state_priv.h" + #define DC_LOGGER_INIT(logger) enum dcn321_clk_src_array_id { @@ -1572,7 +1574,7 @@ static void dcn321_destroy_resource_pool(struct resource_pool **pool) static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap, - .get_subvp_en = dcn32_subvp_in_use, + .get_subvp_en = resource_subvp_in_use, }; static void dcn321_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) @@ -1605,10 +1607,7 @@ static struct resource_funcs dcn321_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, .add_phantom_pipes = dcn32_add_phantom_pipes, - .remove_phantom_pipes = dcn32_remove_phantom_pipes, - .retain_phantom_pipes = dcn32_retain_phantom_pipes, - .save_mall_state = dcn32_save_mall_state, - .restore_mall_state = dcn32_restore_mall_state, + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, }; static uint32_t read_pipe_fuses(struct dc_context *ctx) @@ -2007,16 +2006,19 @@ static bool dcn321_resource_construct( dc->dml2_options.callbacks.get_opp_head = &resource_get_opp_head; dc->dml2_options.svp_pstate.callbacks.dc = dc; - dc->dml2_options.svp_pstate.callbacks.add_plane_to_context = &dc_add_plane_to_context; - dc->dml2_options.svp_pstate.callbacks.add_stream_to_ctx = &dc_add_stream_to_ctx; + dc->dml2_options.svp_pstate.callbacks.add_phantom_plane = &dc_state_add_phantom_plane; + dc->dml2_options.svp_pstate.callbacks.add_phantom_stream = &dc_state_add_phantom_stream; dc->dml2_options.svp_pstate.callbacks.build_scaling_params = &resource_build_scaling_params; - dc->dml2_options.svp_pstate.callbacks.create_plane = &dc_create_plane_state; - dc->dml2_options.svp_pstate.callbacks.remove_plane_from_context = &dc_remove_plane_from_context; - dc->dml2_options.svp_pstate.callbacks.remove_stream_from_ctx = &dc_remove_stream_from_ctx; - dc->dml2_options.svp_pstate.callbacks.create_stream_for_sink = &dc_create_stream_for_sink; - dc->dml2_options.svp_pstate.callbacks.plane_state_release = &dc_plane_state_release; - dc->dml2_options.svp_pstate.callbacks.stream_release = &dc_stream_release; + dc->dml2_options.svp_pstate.callbacks.create_phantom_plane = &dc_state_create_phantom_plane; + dc->dml2_options.svp_pstate.callbacks.remove_phantom_plane = &dc_state_remove_phantom_plane; + dc->dml2_options.svp_pstate.callbacks.remove_phantom_stream = &dc_state_remove_phantom_stream; + dc->dml2_options.svp_pstate.callbacks.create_phantom_stream = &dc_state_create_phantom_stream; + dc->dml2_options.svp_pstate.callbacks.release_phantom_plane = &dc_state_release_phantom_plane; + dc->dml2_options.svp_pstate.callbacks.release_phantom_stream = &dc_state_release_phantom_stream; dc->dml2_options.svp_pstate.callbacks.release_dsc = &dcn20_release_dsc; + dc->dml2_options.svp_pstate.callbacks.get_pipe_subvp_type = &dc_state_get_pipe_subvp_type; + dc->dml2_options.svp_pstate.callbacks.get_stream_subvp_type = &dc_state_get_stream_subvp_type; + dc->dml2_options.svp_pstate.callbacks.get_paired_subvp_stream = &dc_state_get_paired_subvp_stream; dc->dml2_options.svp_pstate.subvp_fw_processing_delay_us = dc->caps.subvp_fw_processing_delay_us; dc->dml2_options.svp_pstate.subvp_prefetch_end_to_mall_start_us = dc->caps.subvp_prefetch_end_to_mall_start_us; diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.h index 82cbf009f2d3..82cbf009f2d3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 70ef1e7ff841..761ec9891875 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -78,7 +78,7 @@ #include "dcn10/dcn10_resource.h" #include "dcn31/dcn31_panel_cntl.h" #include "dcn35/dcn35_hwseq.h" -#include "dcn35_dio_link_encoder.h" +#include "dcn35/dcn35_dio_link_encoder.h" #include "dml/dcn31/dcn31_fpu.h" /*todo*/ #include "dml/dcn35/dcn35_fpu.h" #include "dcn35/dcn35_dwb.h" @@ -96,12 +96,15 @@ #include "reg_helper.h" #include "dce/dmub_abm.h" #include "dce/dmub_psr.h" +#include "dce/dmub_replay.h" #include "dce/dce_aux.h" #include "dce/dce_i2c.h" #include "dml/dcn31/display_mode_vba_31.h" /*temp*/ #include "vm_helper.h" #include "dcn20/dcn20_vmid.h" +#include "dc_state_priv.h" + #include "link_enc_cfg.h" #define DC_LOGGER_INIT(logger) @@ -626,7 +629,19 @@ static struct dce_hwseq_registers hwseq_reg; HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_ROOT_GATE_DISABLE, mask_sh), \ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_ROOT_GATE_DISABLE, mask_sh), \ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh), \ - HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh) + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK0_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK1_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK2_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK3_GATE_DISABLE, mask_sh) static const struct dce_hwseq_shift hwseq_shift = { HWSEQ_DCN35_MASK_SH_LIST(__SHIFT) @@ -705,7 +720,9 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dcc = DCC_ENABLE, .disable_dpp_power_gate = true, .disable_hubp_power_gate = true, - .disable_clock_gate = true, + .disable_optc_power_gate = true, /*should the same as above two*/ + .disable_hpo_power_gate = true, /*dmubfw force domain25 on*/ + .disable_clock_gate = false, .disable_dsc_power_gate = true, .vsr_support = true, .performance_trace = false, @@ -724,7 +741,7 @@ static const struct dc_debug_options debug_defaults_drv = { .i2c = true, .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled .dscl = true, - .cm = false, + .cm = true, .mpc = true, .optc = true, .vpg = true, @@ -752,7 +769,7 @@ static const struct dc_debug_options debug_defaults_drv = { .enable_hpo_pg_support = false, .enable_legacy_fast_update = true, .enable_single_display_2to1_odm_policy = false, - .disable_idle_power_optimizations = true, + .disable_idle_power_optimizations = false, .dmcub_emulation = false, .disable_boot_optimizations = false, .disable_unbounded_requesting = false, @@ -764,13 +781,15 @@ static const struct dc_debug_options debug_defaults_drv = { .ignore_pg = true, .psp_disabled_wa = true, .ips2_eval_delay_us = 200, - .ips2_entry_delay_us = 400 + .ips2_entry_delay_us = 400, + .static_screen_wait_frames = 2, }; static const struct dc_panel_config panel_config_defaults = { .psr = { .disable_psr = false, .disallow_psrsu = false, + .disallow_replay = false, }, .ilr = { .optimize_edp_link_rate = true, @@ -1529,6 +1548,9 @@ static void dcn35_resource_destruct(struct dcn35_resource_pool *pool) if (pool->base.psr != NULL) dmub_psr_destroy(&pool->base.psr); + if (pool->base.replay != NULL) + dmub_replay_destroy(&pool->base.replay); + if (pool->base.pg_cntl != NULL) dcn_pg_cntl_destroy(&pool->base.pg_cntl); @@ -2013,6 +2035,14 @@ static bool dcn35_resource_construct( goto create_fail; } + /* Replay */ + pool->base.replay = dmub_replay_create(ctx); + if (pool->base.replay == NULL) { + dm_error("DC: failed to create replay obj!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + /* ABM */ for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { pool->base.multiple_abms[i] = dmub_abm_create(ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h index 99aea102e3f7..a51c4a9eaafe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h @@ -166,6 +166,7 @@ struct resource_pool *dcn35_create_resource_pool( SR(MMHUBBUB_MEM_PWR_CNTL), \ SR(DCCG_GATE_DISABLE_CNTL), \ SR(DCCG_GATE_DISABLE_CNTL2), \ + SR(DCCG_GATE_DISABLE_CNTL4), \ SR(DCCG_GATE_DISABLE_CNTL5), \ SR(DCFCLK_CNTL),\ SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \ diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index df63aa8f01e9..c78c9224ab60 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -86,6 +86,7 @@ enum dmub_status { DMUB_STATUS_TIMEOUT, DMUB_STATUS_INVALID, DMUB_STATUS_HW_FAILURE, + DMUB_STATUS_POWER_STATE_D3 }; /* enum dmub_asic - dmub asic identifier */ @@ -150,6 +151,13 @@ enum dmub_memory_access_type { DMUB_MEMORY_ACCESS_DMA }; +/* enum dmub_power_state type - to track DC power state in dmub_srv */ +enum dmub_srv_power_state_type { + DMUB_POWER_STATE_UNDEFINED = 0, + DMUB_POWER_STATE_D0 = 1, + DMUB_POWER_STATE_D3 = 8 +}; + /** * struct dmub_region - dmub hw memory region * @base: base address for region, must be 256 byte aligned @@ -485,6 +493,8 @@ struct dmub_srv { /* Feature capabilities reported by fw */ struct dmub_feature_caps feature_caps; struct dmub_visual_confirm_color visual_confirm_color; + + enum dmub_srv_power_state_type power_state; }; /** @@ -889,6 +899,18 @@ enum dmub_status dmub_srv_clear_inbox0_ack(struct dmub_srv *dmub); */ void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index); +/** + * dmub_srv_set_power_state() - Track DC power state in dmub_srv + * @dmub: The dmub service + * @power_state: DC power state setting + * + * Store DC power state in dmub_srv. If dmub_srv is in D3, then don't send messages to DMUB + * + * Return: + * void + */ +void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state); + #if defined(__cplusplus) } #endif diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index ed4379c04715..c64b6c848ef7 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -185,8 +185,7 @@ union abm_flags { unsigned int disable_abm_requested : 1; /** - * @disable_abm_immediately: Indicates if driver has requested ABM to be disabled - * immediately. + * @disable_abm_immediately: Indicates if driver has requested ABM to be disabled immediately. */ unsigned int disable_abm_immediately : 1; @@ -654,7 +653,7 @@ union dmub_fw_boot_options { uint32_t gpint_scratch8: 1; /* 1 if GPINT is in scratch8*/ uint32_t usb4_cm_version: 1; /**< 1 CM support */ uint32_t dpia_hpd_int_enable_supported: 1; /* 1 if dpia hpd int enable supported */ - uint32_t usb4_dpia_bw_alloc_supported: 1; /* 1 if USB4 dpia BW allocation supported */ + uint32_t reserved0: 1; uint32_t disable_clk_ds: 1; /* 1 if disallow dispclk_ds and dppclk_ds*/ uint32_t disable_timeout_recovery : 1; /* 1 if timeout recovery should be disabled */ uint32_t ips_pg_disable: 1; /* 1 to disable ONO domains power gating*/ @@ -818,18 +817,61 @@ enum dmub_gpint_command { * RETURN: Lower 32-bit mask. */ DMUB_GPINT__UPDATE_TRACE_BUFFER_MASK = 101, + /** - * DESC: Updates the trace buffer lower 32-bit mask. + * DESC: Updates the trace buffer mask bit0~bit15. * ARGS: The new mask * RETURN: Lower 32-bit mask. */ DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD0 = 102, + /** - * DESC: Updates the trace buffer mask bi0~bit15. + * DESC: Updates the trace buffer mask bit16~bit31. * ARGS: The new mask * RETURN: Lower 32-bit mask. */ DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1 = 103, + + /** + * DESC: Updates the trace buffer mask bit32~bit47. + * ARGS: The new mask + * RETURN: Lower 32-bit mask. + */ + DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD2 = 114, + + /** + * DESC: Updates the trace buffer mask bit48~bit63. + * ARGS: The new mask + * RETURN: Lower 32-bit mask. + */ + DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD3 = 115, + + /** + * DESC: Read the trace buffer mask bi0~bit15. + */ + DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD0 = 116, + + /** + * DESC: Read the trace buffer mask bit16~bit31. + */ + DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD1 = 117, + + /** + * DESC: Read the trace buffer mask bi32~bit47. + */ + DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD2 = 118, + + /** + * DESC: Updates the trace buffer mask bit32~bit63. + */ + DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD3 = 119, + + /** + * DESC: Enable measurements for various task duration + * ARGS: 0 - Disable measurement + * 1 - Enable measurement + */ + DMUB_GPINT__TRACE_DMUB_WAKE_ACTIVITY = 123, }; /** @@ -1303,6 +1345,10 @@ enum dmub_cmd_cab_type { * Fit surfaces in CAB (i.e. CAB enable) */ DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB = 2, + /** + * Do not fit surfaces in CAB (i.e. no CAB) + */ + DMUB_CMD__CAB_DCN_SS_NOT_FIT_IN_CAB = 3, }; /** @@ -2840,6 +2886,14 @@ enum dmub_cmd_replay_type { * Set power opt and coasting vtotal. */ DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL = 4, + /** + * Set disabled iiming sync. + */ + DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED = 5, + /** + * Set Residency Frameupdate Timer. + */ + DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER = 6, }; /** @@ -3003,6 +3057,26 @@ struct dmub_cmd_replay_set_power_opt_data { }; /** + * Data passed from driver to FW in a DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command. + */ +struct dmub_cmd_replay_set_timing_sync_data { + /** + * Panel Instance. + * Panel isntance to identify which replay_state to use + * Currently the support is only for 0 or 1 + */ + uint8_t panel_inst; + /** + * REPLAY set_timing_sync + */ + uint8_t timing_sync_supported; + /** + * Explicit padding to 4 byte boundary. + */ + uint8_t pad[2]; +}; + +/** * Definition of a DMUB_CMD__SET_REPLAY_POWER_OPT command. */ struct dmub_rb_cmd_replay_set_power_opt { @@ -3069,6 +3143,73 @@ struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal { }; /** + * Definition of a DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command. + */ +struct dmub_rb_cmd_replay_set_timing_sync { + /** + * Command header. + */ + struct dmub_cmd_header header; + /** + * Definition of DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command. + */ + struct dmub_cmd_replay_set_timing_sync_data replay_set_timing_sync_data; +}; + +/** + * Data passed from driver to FW in DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command. + */ +struct dmub_cmd_replay_frameupdate_timer_data { + /** + * Panel Instance. + * Panel isntance to identify which replay_state to use + * Currently the support is only for 0 or 1 + */ + uint8_t panel_inst; + /** + * Replay Frameupdate Timer Enable or not + */ + uint8_t enable; + /** + * REPLAY force reflash frame update number + */ + uint16_t frameupdate_count; +}; +/** + * Definition of DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER + */ +struct dmub_rb_cmd_replay_set_frameupdate_timer { + /** + * Command header. + */ + struct dmub_cmd_header header; + /** + * Definition of a DMUB_CMD__SET_REPLAY_POWER_OPT command. + */ + struct dmub_cmd_replay_frameupdate_timer_data data; +}; + +/** + * Definition union of replay command set + */ +union dmub_replay_cmd_set { + /** + * Panel Instance. + * Panel isntance to identify which replay_state to use + * Currently the support is only for 0 or 1 + */ + uint8_t panel_inst; + /** + * Definition of DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command data. + */ + struct dmub_cmd_replay_set_timing_sync_data sync_data; + /** + * Definition of DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command data. + */ + struct dmub_cmd_replay_frameupdate_timer_data timer_data; +}; + +/** * Set of HW components that can be locked. * * Note: If updating with more HW components, fields @@ -3357,6 +3498,16 @@ struct dmub_cmd_abm_set_pipe_data { * TODO: Remove. */ uint8_t ramping_boundary; + + /** + * PwrSeq HW Instance. + */ + uint8_t pwrseq_inst; + + /** + * Explicit padding to 4 byte boundary. + */ + uint8_t pad[3]; }; /** @@ -3737,7 +3888,7 @@ enum dmub_cmd_panel_cntl_type { * struct dmub_cmd_panel_cntl_data - Panel control data. */ struct dmub_cmd_panel_cntl_data { - uint32_t inst; /**< panel instance */ + uint32_t pwrseq_inst; /**< pwrseq instance */ uint32_t current_backlight; /* in/out */ uint32_t bl_pwm_cntl; /* in/out */ uint32_t bl_pwm_period_cntl; /* in/out */ @@ -3796,7 +3947,7 @@ struct dmub_cmd_lvtma_control_data { uint8_t uc_pwr_action; /**< LVTMA_ACTION */ uint8_t bypass_panel_control_wait; uint8_t reserved_0[2]; /**< For future use */ - uint8_t panel_inst; /**< LVTMA control instance */ + uint8_t pwrseq_inst; /**< LVTMA control instance */ uint8_t reserved_1[3]; /**< For future use */ }; @@ -4201,6 +4352,12 @@ union dmub_rb_cmd { * Definition of a DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL command. */ struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal replay_set_power_opt_and_coasting_vtotal; + + struct dmub_rb_cmd_replay_set_timing_sync replay_set_timing_sync; + /** + * Definition of a DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command. + */ + struct dmub_rb_cmd_replay_set_frameupdate_timer replay_set_frameupdate_timer; }; /** diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 38360adc53d9..9ad738805320 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -64,7 +64,7 @@ /* Default scratch mem size. */ -#define DMUB_SCRATCH_MEM_SIZE (256) +#define DMUB_SCRATCH_MEM_SIZE (1024) /* Number of windows in use. */ #define DMUB_NUM_WINDOWS (DMUB_WINDOW_TOTAL) @@ -713,6 +713,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, dmub->hw_funcs.reset_release(dmub); dmub->hw_init = true; + dmub->power_state = DMUB_POWER_STATE_D0; return DMUB_STATUS_OK; } @@ -766,6 +767,9 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, if (!dmub->hw_init) return DMUB_STATUS_INVALID; + if (dmub->power_state != DMUB_POWER_STATE_D0) + return DMUB_STATUS_POWER_STATE_D3; + if (dmub->inbox1_rb.rptr > dmub->inbox1_rb.capacity || dmub->inbox1_rb.wrpt > dmub->inbox1_rb.capacity) { return DMUB_STATUS_HW_FAILURE; @@ -784,6 +788,9 @@ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub) if (!dmub->hw_init) return DMUB_STATUS_INVALID; + if (dmub->power_state != DMUB_POWER_STATE_D0) + return DMUB_STATUS_POWER_STATE_D3; + /** * Read back all the queued commands to ensure that they've * been flushed to framebuffer memory. Otherwise DMCUB might @@ -1100,3 +1107,11 @@ void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_ subvp_index); } } + +void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state) +{ + if (!dmub || !dmub->hw_init) + return; + + dmub->power_state = dmub_srv_power_state; +} diff --git a/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h b/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h index 42229b4effdc..eced9ad91f1d 100644 --- a/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h +++ b/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h @@ -69,6 +69,11 @@ enum hdcp_message_id { HDCP_MESSAGE_ID_READ_RXSTATUS, HDCP_MESSAGE_ID_WRITE_CONTENT_STREAM_TYPE, + /* PS175 chip */ + + HDCP_MESSAGE_ID_WRITE_PS175_CMD, + HDCP_MESSAGE_ID_READ_PS175_RSP, + HDCP_MESSAGE_ID_MAX }; diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index ccecddafeb05..3955b7e4b2e2 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -81,6 +81,7 @@ fail_alloc_context: void mod_freesync_destroy(struct mod_freesync *mod_freesync) { struct core_freesync *core_freesync = NULL; + if (mod_freesync == NULL) return; core_freesync = MOD_FREESYNC_TO_CORE(mod_freesync); @@ -278,9 +279,8 @@ static void apply_below_the_range(struct core_freesync *core_freesync, } } else if (last_render_time_in_us > (max_render_time_in_us + in_out_vrr->btr.margin_in_us / 2)) { /* Enter Below the Range */ - if (!in_out_vrr->btr.btr_active) { + if (!in_out_vrr->btr.btr_active) in_out_vrr->btr.btr_active = true; - } } /* BTR set to "not active" so disengage */ @@ -693,10 +693,12 @@ static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf, if (app_tf != TRANSFER_FUNC_UNKNOWN) { infopacket->valid = true; - if (app_tf != TRANSFER_FUNC_PQ2084) { + if (app_tf == TRANSFER_FUNC_PQ2084) + infopacket->sb[9] |= 0x20; // PB9 = [Bit 5 = PQ EOTF Active] + else { infopacket->sb[6] |= 0x08; // PB6 = [Bit 3 = Native Color Active] if (app_tf == TRANSFER_FUNC_GAMMA_22) - infopacket->sb[9] |= 0x04; // PB6 = [Bit 2 = Gamma 2.2 EOTF Active] + infopacket->sb[9] |= 0x04; // PB9 = [Bit 2 = Gamma 2.2 EOTF Active] } } } diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c index 1ddb4f5eac8e..182e7532dda8 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c @@ -63,6 +63,7 @@ static inline enum mod_hdcp_status check_hdcp_capable_dp(struct mod_hdcp *hdcp) static inline enum mod_hdcp_status check_r0p_available_dp(struct mod_hdcp *hdcp) { enum mod_hdcp_status status; + if (is_dp_hdcp(hdcp)) { status = (hdcp->auth.msg.hdcp1.bstatus & DP_BSTATUS_R0_PRIME_READY) ? @@ -131,9 +132,8 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp) static inline enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp) { /* Avoid device count == 0 to do authentication */ - if (0 == get_device_count(hdcp)) { + if (get_device_count(hdcp) == 0) return MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE; - } /* Some MST display may choose to report the internal panel as an HDCP RX. * To update this condition with 1(because the immediate repeater's internal diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c index 91c22b96ebde..733f22bed021 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c @@ -208,9 +208,8 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp) static enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp) { /* Avoid device count == 0 to do authentication */ - if (0 == get_device_count(hdcp)) { + if (get_device_count(hdcp) == 0) return MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE; - } /* Some MST display may choose to report the internal panel as an HDCP RX. */ /* To update this condition with 1(because the immediate repeater's internal */ @@ -689,9 +688,8 @@ static enum mod_hdcp_status validate_stream_ready(struct mod_hdcp *hdcp, if (is_hdmi_dvi_sl_hdcp(hdcp)) { if (!process_rxstatus(hdcp, event_ctx, input, &status)) goto out; - if (event_ctx->rx_id_list_ready) { + if (event_ctx->rx_id_list_ready) goto out; - } } if (is_hdmi_dvi_sl_hdcp(hdcp)) if (!mod_hdcp_execute_and_set(check_stream_ready_available, diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h index c62df3bcc7cb..1d83c1b9da10 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h @@ -86,10 +86,12 @@ #define HDCP_CPIRQ_TRACE(hdcp) \ HDCP_LOG_FSM(hdcp, "[Link %d] --> CPIRQ", hdcp->config.index) #define HDCP_EVENT_TRACE(hdcp, event) \ - if (event == MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) \ - HDCP_TIMEOUT_TRACE(hdcp); \ - else if (event == MOD_HDCP_EVENT_CPIRQ) \ - HDCP_CPIRQ_TRACE(hdcp) + do { \ + if (event == MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) \ + HDCP_TIMEOUT_TRACE(hdcp); \ + else if (event == MOD_HDCP_EVENT_CPIRQ) \ + HDCP_CPIRQ_TRACE(hdcp); \ + } while (0) /* TODO: find some way to tell if logging is off to save time */ #define HDCP_DDC_READ_TRACE(hdcp, msg_name, msg, msg_size) do { \ mod_hdcp_dump_binary_message(msg, msg_size, hdcp->buf, \ diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c index ee67a35c2a8e..8c137d7c032e 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c @@ -443,7 +443,7 @@ enum mod_hdcp_status mod_hdcp_hdcp1_enable_dp_stream_encryption(struct mod_hdcp for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) { if (hdcp->displays[i].adjust.disable || hdcp->displays[i].state != MOD_HDCP_DISPLAY_ACTIVE) - continue; + continue; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); @@ -926,7 +926,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_enable_dp_stream_encryption(struct mod_hdcp for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) { if (hdcp->displays[i].adjust.disable || hdcp->displays[i].state != MOD_HDCP_DISPLAY_ACTIVE) - continue; + continue; hdcp_cmd->in_msg.hdcp2_enable_dp_stream_encryption.display_handle = hdcp->displays[i].index; hdcp_cmd->in_msg.hdcp2_enable_dp_stream_encryption.session_handle = hdcp->auth.id; diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h index 5b71bc96b98c..7844ea91650b 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h @@ -98,9 +98,9 @@ enum ta_dtm_encoder_type { * This enum defines software value for dio_output_type */ typedef enum { - TA_DTM_DIO_OUTPUT_TYPE__INVALID, - TA_DTM_DIO_OUTPUT_TYPE__DIRECT, - TA_DTM_DIO_OUTPUT_TYPE__DPIA + TA_DTM_DIO_OUTPUT_TYPE__INVALID, + TA_DTM_DIO_OUTPUT_TYPE__DIRECT, + TA_DTM_DIO_OUTPUT_TYPE__DPIA } ta_dtm_dio_output_type; struct ta_dtm_topology_update_input_v3 { @@ -237,11 +237,11 @@ enum ta_hdcp2_hdcp2_msg_id_max_size { #define TA_HDCP__HDCP1_KSV_LIST_MAX_ENTRIES 127 #define TA_HDCP__HDCP1_V_PRIME_SIZE 20 #define TA_HDCP__HDCP2_TX_BUF_MAX_SIZE \ - TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_NO_STORED_KM + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_STORED_KM + 6 + (TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_NO_STORED_KM + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_STORED_KM + 6) // 64 bits boundaries #define TA_HDCP__HDCP2_RX_BUF_MAX_SIZE \ - TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_SEND_CERT + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_RECEIVER_INFO + 4 + (TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_SEND_CERT + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_RECEIVER_INFO + 4) enum ta_hdcp_status { TA_HDCP_STATUS__SUCCESS = 0x00, diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h index afe1f6cce528..cc3dc9b589f6 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h @@ -23,34 +23,6 @@ * */ - - - -/* - * Copyright 2016 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - #ifndef MOD_FREESYNC_H_ #define MOD_FREESYNC_H_ diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 1675314a3ff2..ad98e504c00d 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -31,7 +31,7 @@ #define DIV_ROUNDUP(a, b) (((a)+((b)/2))/(b)) #define bswap16_based_on_endian(big_endian, value) \ - (big_endian) ? cpu_to_be16(value) : cpu_to_le16(value) + ((big_endian) ? cpu_to_be16(value) : cpu_to_le16(value)) /* Possible Min Reduction config from least aggressive to most aggressive * 0 1 2 3 4 5 6 7 8 9 10 11 12 @@ -973,6 +973,34 @@ bool psr_su_set_dsc_slice_height(struct dc *dc, struct dc_link *link, return true; } +void set_replay_coasting_vtotal(struct dc_link *link, + enum replay_coasting_vtotal_type type, + uint16_t vtotal) +{ + link->replay_settings.coasting_vtotal_table[type] = vtotal; +} + +void calculate_replay_link_off_frame_count(struct dc_link *link, + uint16_t vtotal, uint16_t htotal) +{ + uint8_t max_link_off_frame_count = 0; + uint16_t max_deviation_line = 0, pixel_deviation_per_line = 0; + + max_deviation_line = link->dpcd_caps.pr_info.max_deviation_line; + pixel_deviation_per_line = link->dpcd_caps.pr_info.pixel_deviation_per_line; + + if (htotal != 0 && vtotal != 0) + max_link_off_frame_count = htotal * max_deviation_line / (pixel_deviation_per_line * vtotal); + else + ASSERT(0); + + link->replay_settings.link_off_frame_count_level = + max_link_off_frame_count >= PR_LINK_OFF_FRAME_COUNT_BEST ? PR_LINK_OFF_FRAME_COUNT_BEST : + max_link_off_frame_count >= PR_LINK_OFF_FRAME_COUNT_GOOD ? PR_LINK_OFF_FRAME_COUNT_GOOD : + PR_LINK_OFF_FRAME_COUNT_FAIL; + +} + bool fill_custom_backlight_caps(unsigned int config_no, struct dm_acpi_atif_backlight_caps *caps) { unsigned int data_points_size; diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h index d9e0d67d67f7..c17bbc6fb38c 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h @@ -54,6 +54,11 @@ bool dmub_init_abm_config(struct resource_pool *res_pool, unsigned int inst); void init_replay_config(struct dc_link *link, struct replay_config *pr_config); +void set_replay_coasting_vtotal(struct dc_link *link, + enum replay_coasting_vtotal_type type, + uint16_t vtotal); +void calculate_replay_link_off_frame_count(struct dc_link *link, + uint16_t vtotal, uint16_t htotal); bool is_psr_su_specific_panel(struct dc_link *link); void mod_power_calc_psr_configs(struct psr_config *psr_config, diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 7f98394338c2..1dc5dd9b7bf7 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -244,7 +244,6 @@ enum DC_FEATURE_MASK { DC_DISABLE_LTTPR_DP2_0 = (1 << 6), //0x40, disabled by default DC_PSR_ALLOW_SMU_OPT = (1 << 7), //0x80, disabled by default DC_PSR_ALLOW_MULTI_DISP_OPT = (1 << 8), //0x100, disabled by default - DC_REPLAY_MASK = (1 << 9), //0x200, disabled by default for dcn < 3.1.4 }; enum DC_DEBUG_MASK { @@ -255,8 +254,10 @@ enum DC_DEBUG_MASK { DC_DISABLE_PSR = 0x10, DC_FORCE_SUBVP_MCLK_SWITCH = 0x20, DC_DISABLE_MPO = 0x40, - DC_DISABLE_REPLAY = 0x50, DC_ENABLE_DPIA_TRACE = 0x80, + DC_ENABLE_DML2 = 0x100, + DC_DISABLE_PSR_SU = 0x200, + DC_DISABLE_REPLAY = 0x400, }; enum amd_dpm_forced_level; diff --git a/drivers/gpu/drm/amd/include/amdgpu_reg_state.h b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h new file mode 100644 index 000000000000..be519c8edf49 --- /dev/null +++ b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_REG_STATE_H__ +#define __AMDGPU_REG_STATE_H__ + +enum amdgpu_reg_state { + AMDGPU_REG_STATE_TYPE_INVALID = 0, + AMDGPU_REG_STATE_TYPE_XGMI = 1, + AMDGPU_REG_STATE_TYPE_WAFL = 2, + AMDGPU_REG_STATE_TYPE_PCIE = 3, + AMDGPU_REG_STATE_TYPE_USR = 4, + AMDGPU_REG_STATE_TYPE_USR_1 = 5 +}; + +enum amdgpu_sysfs_reg_offset { + AMDGPU_SYS_REG_STATE_XGMI = 0x0000, + AMDGPU_SYS_REG_STATE_WAFL = 0x1000, + AMDGPU_SYS_REG_STATE_PCIE = 0x2000, + AMDGPU_SYS_REG_STATE_USR = 0x3000, + AMDGPU_SYS_REG_STATE_USR_1 = 0x4000, + AMDGPU_SYS_REG_STATE_END = 0x5000, +}; + +struct amdgpu_reg_state_header { + uint16_t structure_size; + uint8_t format_revision; + uint8_t content_revision; + uint8_t state_type; + uint8_t num_instances; + uint16_t pad; +}; + +enum amdgpu_reg_inst_state { + AMDGPU_INST_S_OK, + AMDGPU_INST_S_EDISABLED, + AMDGPU_INST_S_EACCESS, +}; + +struct amdgpu_smn_reg_data { + uint64_t addr; + uint32_t value; + uint32_t pad; +}; + +struct amdgpu_reg_inst_header { + uint16_t instance; + uint16_t state; + uint16_t num_smn_regs; + uint16_t pad; +}; + + +struct amdgpu_regs_xgmi_v1_0 { + struct amdgpu_reg_inst_header inst_header; + + struct amdgpu_smn_reg_data smn_reg_values[]; +}; + +struct amdgpu_reg_state_xgmi_v1_0 { + /* common_header.state_type must be AMDGPU_REG_STATE_TYPE_XGMI */ + struct amdgpu_reg_state_header common_header; + + struct amdgpu_regs_xgmi_v1_0 xgmi_state_regs[]; +}; + +struct amdgpu_regs_wafl_v1_0 { + struct amdgpu_reg_inst_header inst_header; + + struct amdgpu_smn_reg_data smn_reg_values[]; +}; + +struct amdgpu_reg_state_wafl_v1_0 { + /* common_header.state_type must be AMDGPU_REG_STATE_TYPE_WAFL */ + struct amdgpu_reg_state_header common_header; + + struct amdgpu_regs_wafl_v1_0 wafl_state_regs[]; +}; + +struct amdgpu_regs_pcie_v1_0 { + struct amdgpu_reg_inst_header inst_header; + + uint16_t device_status; + uint16_t link_status; + uint32_t sub_bus_number_latency; + uint32_t pcie_corr_err_status; + uint32_t pcie_uncorr_err_status; + + struct amdgpu_smn_reg_data smn_reg_values[]; +}; + +struct amdgpu_reg_state_pcie_v1_0 { + /* common_header.state_type must be AMDGPU_REG_STATE_TYPE_PCIE */ + struct amdgpu_reg_state_header common_header; + + struct amdgpu_regs_pcie_v1_0 pci_state_regs[]; +}; + +struct amdgpu_regs_usr_v1_0 { + struct amdgpu_reg_inst_header inst_header; + + struct amdgpu_smn_reg_data smn_reg_values[]; +}; + +struct amdgpu_reg_state_usr_v1_0 { + /* common_header.state_type must be AMDGPU_REG_STATE_TYPE_USR */ + struct amdgpu_reg_state_header common_header; + + struct amdgpu_regs_usr_v1_0 usr_state_regs[]; +}; + +static inline size_t amdgpu_reginst_size(uint16_t num_inst, size_t inst_size, + uint16_t num_regs) +{ + return num_inst * + (inst_size + num_regs * sizeof(struct amdgpu_smn_reg_data)); +} + +#define amdgpu_asic_get_reg_state_supported(adev) \ + ((adev)->asic_funcs->get_reg_state ? 1 : 0) + +#define amdgpu_asic_get_reg_state(adev, state, buf, size) \ + ((adev)->asic_funcs->get_reg_state ? \ + (adev)->asic_funcs->get_reg_state((adev), (state), (buf), \ + (size)) : \ + 0) + + +int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev); +void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h index b64664879211..fca72e2ec929 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h @@ -6220,12 +6220,20 @@ #define DCCG_GATE_DISABLE_CNTL4__PHYD_REFCLK_ROOT_GATE_DISABLE__SHIFT 0x3 #define DCCG_GATE_DISABLE_CNTL4__PHYE_REFCLK_ROOT_GATE_DISABLE__SHIFT 0x4 #define DCCG_GATE_DISABLE_CNTL4__HDMICHARCLK0_ROOT_GATE_DISABLE__SHIFT 0x11 +#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK0_GATE_DISABLE__SHIFT 0x17 +#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK1_GATE_DISABLE__SHIFT 0x18 +#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK2_GATE_DISABLE__SHIFT 0x19 +#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK3_GATE_DISABLE__SHIFT 0x1a #define DCCG_GATE_DISABLE_CNTL4__PHYA_REFCLK_ROOT_GATE_DISABLE_MASK 0x00000001L #define DCCG_GATE_DISABLE_CNTL4__PHYB_REFCLK_ROOT_GATE_DISABLE_MASK 0x00000002L #define DCCG_GATE_DISABLE_CNTL4__PHYC_REFCLK_ROOT_GATE_DISABLE_MASK 0x00000004L #define DCCG_GATE_DISABLE_CNTL4__PHYD_REFCLK_ROOT_GATE_DISABLE_MASK 0x00000008L #define DCCG_GATE_DISABLE_CNTL4__PHYE_REFCLK_ROOT_GATE_DISABLE_MASK 0x00000010L #define DCCG_GATE_DISABLE_CNTL4__HDMICHARCLK0_ROOT_GATE_DISABLE_MASK 0x00020000L +#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK0_GATE_DISABLE_MASK 0x00800000L +#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK1_GATE_DISABLE_MASK 0x01000000L +#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK2_GATE_DISABLE_MASK 0x02000000L +#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK3_GATE_DISABLE_MASK 0x04000000L #define DPSTREAMCLK_CNTL__DPSTREAMCLK0_SRC_SEL__SHIFT 0x0 #define DPSTREAMCLK_CNTL__DPSTREAMCLK0_EN__SHIFT 0x3 #define DPSTREAMCLK_CNTL__DPSTREAMCLK1_SRC_SEL__SHIFT 0x4 diff --git a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_offset.h b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_offset.h new file mode 100644 index 000000000000..a4dd372c0541 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_offset.h @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _smuio_10_0_2_OFFSET_HEADER + +// addressBlock: smuio_smuio_misc_SmuSmuioDec +// base address: 0x5a000 +#define mmSMUIO_MCM_CONFIG 0x0023 +#define mmSMUIO_MCM_CONFIG_BASE_IDX 0 +#define mmIP_DISCOVERY_VERSION 0x0000 +#define mmIP_DISCOVERY_VERSION_BASE_IDX 1 +#define mmIO_SMUIO_PINSTRAP 0x01b1 +#define mmIO_SMUIO_PINSTRAP_BASE_IDX 1 +#define mmSCRATCH_REGISTER0 0x01b2 +#define mmSCRATCH_REGISTER0_BASE_IDX 1 +#define mmSCRATCH_REGISTER1 0x01b3 +#define mmSCRATCH_REGISTER1_BASE_IDX 1 +#define mmSCRATCH_REGISTER2 0x01b4 +#define mmSCRATCH_REGISTER2_BASE_IDX 1 +#define mmSCRATCH_REGISTER3 0x01b5 +#define mmSCRATCH_REGISTER3_BASE_IDX 1 +#define mmSCRATCH_REGISTER4 0x01b6 +#define mmSCRATCH_REGISTER4_BASE_IDX 1 +#define mmSCRATCH_REGISTER5 0x01b7 +#define mmSCRATCH_REGISTER5_BASE_IDX 1 +#define mmSCRATCH_REGISTER6 0x01b8 +#define mmSCRATCH_REGISTER6_BASE_IDX 1 +#define mmSCRATCH_REGISTER7 0x01b9 +#define mmSCRATCH_REGISTER7_BASE_IDX 1 + + +// addressBlock: smuio_smuio_reset_SmuSmuioDec +// base address: 0x5a300 +#define mmSMUIO_MP_RESET_INTR 0x00c1 +#define mmSMUIO_MP_RESET_INTR_BASE_IDX 0 +#define mmSMUIO_SOC_HALT 0x00c2 +#define mmSMUIO_SOC_HALT_BASE_IDX 0 +#define mmSMUIO_GFX_MISC_CNTL 0x00c8 +#define mmSMUIO_GFX_MISC_CNTL_BASE_IDX 0 + + +// addressBlock: smuio_smuio_ccxctrl_SmuSmuioDec +// base address: 0x5a000 +#define mmPWROK_REFCLK_GAP_CYCLES 0x0001 +#define mmPWROK_REFCLK_GAP_CYCLES_BASE_IDX 1 +#define mmGOLDEN_TSC_INCREMENT_UPPER 0x0004 +#define mmGOLDEN_TSC_INCREMENT_UPPER_BASE_IDX 1 +#define mmGOLDEN_TSC_INCREMENT_LOWER 0x0005 +#define mmGOLDEN_TSC_INCREMENT_LOWER_BASE_IDX 1 +#define mmGOLDEN_TSC_COUNT_UPPER 0x0025 +#define mmGOLDEN_TSC_COUNT_UPPER_BASE_IDX 1 +#define mmGOLDEN_TSC_COUNT_LOWER 0x0026 +#define mmGOLDEN_TSC_COUNT_LOWER_BASE_IDX 1 +#define mmGFX_GOLDEN_TSC_SHADOW_UPPER 0x0029 +#define mmGFX_GOLDEN_TSC_SHADOW_UPPER_BASE_IDX 1 +#define mmGFX_GOLDEN_TSC_SHADOW_LOWER 0x002a +#define mmGFX_GOLDEN_TSC_SHADOW_LOWER_BASE_IDX 1 +#define mmSOC_GOLDEN_TSC_SHADOW_UPPER 0x002b +#define mmSOC_GOLDEN_TSC_SHADOW_UPPER_BASE_IDX 1 +#define mmSOC_GOLDEN_TSC_SHADOW_LOWER 0x002c +#define mmSOC_GOLDEN_TSC_SHADOW_LOWER_BASE_IDX 1 +#define mmSOC_GAP_PWROK 0x002d +#define mmSOC_GAP_PWROK_BASE_IDX 1 + +// addressBlock: smuio_smuio_swtimer_SmuSmuioDec +// base address: 0x5ac40 +#define mmPWR_VIRT_RESET_REQ 0x0110 +#define mmPWR_VIRT_RESET_REQ_BASE_IDX 1 +#define mmPWR_DISP_TIMER_CONTROL 0x0111 +#define mmPWR_DISP_TIMER_CONTROL_BASE_IDX 1 +#define mmPWR_DISP_TIMER2_CONTROL 0x0113 +#define mmPWR_DISP_TIMER2_CONTROL_BASE_IDX 1 +#define mmPWR_DISP_TIMER_GLOBAL_CONTROL 0x0115 +#define mmPWR_DISP_TIMER_GLOBAL_CONTROL_BASE_IDX 1 +#define mmPWR_IH_CONTROL 0x0116 +#define mmPWR_IH_CONTROL_BASE_IDX 1 + +// addressBlock: smuio_smuio_svi0_SmuSmuioDec +// base address: 0x6f000 +#define mmSMUSVI0_TEL_PLANE0 0x520e +#define mmSMUSVI0_TEL_PLANE0_BASE_IDX 1 +#define mmSMUSVI0_PLANE0_CURRENTVID 0x5217 +#define mmSMUSVI0_PLANE0_CURRENTVID_BASE_IDX 1 + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_sh_mask.h new file mode 100644 index 000000000000..d10ae61c346b --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_sh_mask.h @@ -0,0 +1,184 @@ +/* + * Copyright (C) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _smuio_10_0_2_SH_MASK_HEADER + +// addressBlock: smuio_smuio_misc_SmuSmuioDec +//SMUIO_MCM_CONFIG +#define SMUIO_MCM_CONFIG__DIE_ID__SHIFT 0x0 +#define SMUIO_MCM_CONFIG__PKG_TYPE__SHIFT 0x2 +#define SMUIO_MCM_CONFIG__SOCKET_ID__SHIFT 0x5 +#define SMUIO_MCM_CONFIG__PKG_SUBTYPE__SHIFT 0x6 +#define SMUIO_MCM_CONFIG__CONSOLE_K__SHIFT 0x10 +#define SMUIO_MCM_CONFIG__CONSOLE_A__SHIFT 0x11 +#define SMUIO_MCM_CONFIG__DIE_ID_MASK 0x00000003L +#define SMUIO_MCM_CONFIG__PKG_TYPE_MASK 0x0000001CL +#define SMUIO_MCM_CONFIG__SOCKET_ID_MASK 0x00000020L +#define SMUIO_MCM_CONFIG__PKG_SUBTYPE_MASK 0x000000C0L +#define SMUIO_MCM_CONFIG__CONSOLE_K_MASK 0x00010000L +#define SMUIO_MCM_CONFIG__CONSOLE_A_MASK 0x00020000L +//IP_DISCOVERY_VERSION +#define IP_DISCOVERY_VERSION__IP_DISCOVERY_VERSION__SHIFT 0x0 +#define IP_DISCOVERY_VERSION__IP_DISCOVERY_VERSION_MASK 0xFFFFFFFFL +//IO_SMUIO_PINSTRAP +#define IO_SMUIO_PINSTRAP__AUD_PORT_CONN__SHIFT 0x0 +#define IO_SMUIO_PINSTRAP__AUD__SHIFT 0x3 +#define IO_SMUIO_PINSTRAP__AUD_PORT_CONN_MASK 0x00000007L +#define IO_SMUIO_PINSTRAP__AUD_MASK 0x00000018L +//SCRATCH_REGISTER0 +#define SCRATCH_REGISTER0__ScratchPad0__SHIFT 0x0 +#define SCRATCH_REGISTER0__ScratchPad0_MASK 0xFFFFFFFFL +//SCRATCH_REGISTER1 +#define SCRATCH_REGISTER1__ScratchPad1__SHIFT 0x0 +#define SCRATCH_REGISTER1__ScratchPad1_MASK 0xFFFFFFFFL +//SCRATCH_REGISTER2 +#define SCRATCH_REGISTER2__ScratchPad2__SHIFT 0x0 +#define SCRATCH_REGISTER2__ScratchPad2_MASK 0xFFFFFFFFL +//SCRATCH_REGISTER3 +#define SCRATCH_REGISTER3__ScratchPad3__SHIFT 0x0 +#define SCRATCH_REGISTER3__ScratchPad3_MASK 0xFFFFFFFFL +//SCRATCH_REGISTER4 +#define SCRATCH_REGISTER4__ScratchPad4__SHIFT 0x0 +#define SCRATCH_REGISTER4__ScratchPad4_MASK 0xFFFFFFFFL +//SCRATCH_REGISTER5 +#define SCRATCH_REGISTER5__ScratchPad5__SHIFT 0x0 +#define SCRATCH_REGISTER5__ScratchPad5_MASK 0xFFFFFFFFL +//SCRATCH_REGISTER6 +#define SCRATCH_REGISTER6__ScratchPad6__SHIFT 0x0 +#define SCRATCH_REGISTER6__ScratchPad6_MASK 0xFFFFFFFFL +//SCRATCH_REGISTER7 +#define SCRATCH_REGISTER7__ScratchPad7__SHIFT 0x0 +#define SCRATCH_REGISTER7__ScratchPad7_MASK 0xFFFFFFFFL + +// addressBlock: smuio_smuio_reset_SmuSmuioDec +//SMUIO_MP_RESET_INTR +#define SMUIO_MP_RESET_INTR__SMUIO_MP_RESET_INTR__SHIFT 0x0 +#define SMUIO_MP_RESET_INTR__SMUIO_MP_RESET_INTR_MASK 0x00000001L +//SMUIO_SOC_HALT +#define SMUIO_SOC_HALT__WDT_FORCE_PWROK_EN__SHIFT 0x2 +#define SMUIO_SOC_HALT__WDT_FORCE_RESETn_EN__SHIFT 0x3 +#define SMUIO_SOC_HALT__WDT_FORCE_PWROK_EN_MASK 0x00000004L +#define SMUIO_SOC_HALT__WDT_FORCE_RESETn_EN_MASK 0x00000008L +//SMUIO_GFX_MISC_CNTL +#define SMUIO_GFX_MISC_CNTL__SMU_GFX_cold_vs_gfxoff__SHIFT 0x0 +#define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS__SHIFT 0x1 +#define SMUIO_GFX_MISC_CNTL__PWR_GFX_DLDO_CLK_SWITCH__SHIFT 0x3 +#define SMUIO_GFX_MISC_CNTL__PWR_GFX_RLC_CGPG_EN__SHIFT 0x4 +#define SMUIO_GFX_MISC_CNTL__SMU_GFX_cold_vs_gfxoff_MASK 0x00000001L +#define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS_MASK 0x00000006L +#define SMUIO_GFX_MISC_CNTL__PWR_GFX_DLDO_CLK_SWITCH_MASK 0x00000008L +#define SMUIO_GFX_MISC_CNTL__PWR_GFX_RLC_CGPG_EN_MASK 0x00000010L + +// addressBlock: smuio_smuio_ccxctrl_SmuSmuioDec +//PWROK_REFCLK_GAP_CYCLES +#define PWROK_REFCLK_GAP_CYCLES__Pwrok_PreAssertion_clkgap_cycles__SHIFT 0x0 +#define PWROK_REFCLK_GAP_CYCLES__Pwrok_PostAssertion_clkgap_cycles__SHIFT 0x8 +#define PWROK_REFCLK_GAP_CYCLES__Pwrok_PreAssertion_clkgap_cycles_MASK 0x000000FFL +#define PWROK_REFCLK_GAP_CYCLES__Pwrok_PostAssertion_clkgap_cycles_MASK 0x0000FF00L +//GOLDEN_TSC_INCREMENT_UPPER +#define GOLDEN_TSC_INCREMENT_UPPER__GoldenTscIncrementUpper__SHIFT 0x0 +#define GOLDEN_TSC_INCREMENT_UPPER__GoldenTscIncrementUpper_MASK 0x00FFFFFFL +//GOLDEN_TSC_INCREMENT_LOWER +#define GOLDEN_TSC_INCREMENT_LOWER__GoldenTscIncrementLower__SHIFT 0x0 +#define GOLDEN_TSC_INCREMENT_LOWER__GoldenTscIncrementLower_MASK 0xFFFFFFFFL +//GOLDEN_TSC_COUNT_UPPER +#define GOLDEN_TSC_COUNT_UPPER__GoldenTscCountUpper__SHIFT 0x0 +#define GOLDEN_TSC_COUNT_UPPER__GoldenTscCountUpper_MASK 0x00FFFFFFL +//GOLDEN_TSC_COUNT_LOWER +#define GOLDEN_TSC_COUNT_LOWER__GoldenTscCountLower__SHIFT 0x0 +#define GOLDEN_TSC_COUNT_LOWER__GoldenTscCountLower_MASK 0xFFFFFFFFL +//GFX_GOLDEN_TSC_SHADOW_UPPER +#define GFX_GOLDEN_TSC_SHADOW_UPPER__GfxGoldenTscShadowUpper__SHIFT 0x0 +#define GFX_GOLDEN_TSC_SHADOW_UPPER__GfxGoldenTscShadowUpper_MASK 0x00FFFFFFL +//GFX_GOLDEN_TSC_SHADOW_LOWER +#define GFX_GOLDEN_TSC_SHADOW_LOWER__GfxGoldenTscShadowLower__SHIFT 0x0 +#define GFX_GOLDEN_TSC_SHADOW_LOWER__GfxGoldenTscShadowLower_MASK 0xFFFFFFFFL +//SOC_GOLDEN_TSC_SHADOW_UPPER +#define SOC_GOLDEN_TSC_SHADOW_UPPER__SocGoldenTscShadowUpper__SHIFT 0x0 +#define SOC_GOLDEN_TSC_SHADOW_UPPER__SocGoldenTscShadowUpper_MASK 0x00FFFFFFL +//SOC_GOLDEN_TSC_SHADOW_LOWER +#define SOC_GOLDEN_TSC_SHADOW_LOWER__SocGoldenTscShadowLower__SHIFT 0x0 +#define SOC_GOLDEN_TSC_SHADOW_LOWER__SocGoldenTscShadowLower_MASK 0xFFFFFFFFL +//SOC_GAP_PWROK +#define SOC_GAP_PWROK__soc_gap_pwrok__SHIFT 0x0 +#define SOC_GAP_PWROK__soc_gap_pwrok_MASK 0x00000001L + +// addressBlock: smuio_smuio_swtimer_SmuSmuioDec +//PWR_VIRT_RESET_REQ +#define PWR_VIRT_RESET_REQ__VF_FLR__SHIFT 0x0 +#define PWR_VIRT_RESET_REQ__PF_FLR__SHIFT 0x1f +#define PWR_VIRT_RESET_REQ__VF_FLR_MASK 0x7FFFFFFFL +#define PWR_VIRT_RESET_REQ__PF_FLR_MASK 0x80000000L +//PWR_DISP_TIMER_CONTROL +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_COUNT__SHIFT 0x0 +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_ENABLE__SHIFT 0x19 +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_DISABLE__SHIFT 0x1a +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_MASK__SHIFT 0x1b +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_STAT_AK__SHIFT 0x1c +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_TYPE__SHIFT 0x1d +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_MODE__SHIFT 0x1e +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_COUNT_MASK 0x01FFFFFFL +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_ENABLE_MASK 0x02000000L +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_DISABLE_MASK 0x04000000L +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_MASK_MASK 0x08000000L +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_STAT_AK_MASK 0x10000000L +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_TYPE_MASK 0x20000000L +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_MODE_MASK 0x40000000L +//PWR_DISP_TIMER2_CONTROL +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_COUNT__SHIFT 0x0 +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_ENABLE__SHIFT 0x19 +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_DISABLE__SHIFT 0x1a +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_MASK__SHIFT 0x1b +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_STAT_AK__SHIFT 0x1c +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_TYPE__SHIFT 0x1d +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_MODE__SHIFT 0x1e +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_COUNT_MASK 0x01FFFFFFL +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_ENABLE_MASK 0x02000000L +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_DISABLE_MASK 0x04000000L +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_MASK_MASK 0x08000000L +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_STAT_AK_MASK 0x10000000L +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_TYPE_MASK 0x20000000L +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_MODE_MASK 0x40000000L +//PWR_DISP_TIMER_GLOBAL_CONTROL +#define PWR_DISP_TIMER_GLOBAL_CONTROL__DISP_TIMER_PULSE_WIDTH__SHIFT 0x0 +#define PWR_DISP_TIMER_GLOBAL_CONTROL__DISP_TIMER_PULSE_EN__SHIFT 0xa +#define PWR_DISP_TIMER_GLOBAL_CONTROL__DISP_TIMER_PULSE_WIDTH_MASK 0x000003FFL +#define PWR_DISP_TIMER_GLOBAL_CONTROL__DISP_TIMER_PULSE_EN_MASK 0x00000400L +//PWR_IH_CONTROL +#define PWR_IH_CONTROL__MAX_CREDIT__SHIFT 0x0 +#define PWR_IH_CONTROL__DISP_TIMER_TRIGGER_MASK__SHIFT 0x5 +#define PWR_IH_CONTROL__DISP_TIMER2_TRIGGER_MASK__SHIFT 0x6 +#define PWR_IH_CONTROL__PWR_IH_CLK_GATE_EN__SHIFT 0x1f +#define PWR_IH_CONTROL__MAX_CREDIT_MASK 0x0000001FL +#define PWR_IH_CONTROL__DISP_TIMER_TRIGGER_MASK_MASK 0x00000020L +#define PWR_IH_CONTROL__DISP_TIMER2_TRIGGER_MASK_MASK 0x00000040L +#define PWR_IH_CONTROL__PWR_IH_CLK_GATE_EN_MASK 0x80000000L + +// addressBlock: smuio_smuio_svi0_SmuSmuioDec +//SMUSVI0_TEL_PLANE0 +#define SMUSVI0_TEL_PLANE0__SVI0_PLANE0_IDDCOR__SHIFT 0x0 +#define SMUSVI0_TEL_PLANE0__SVI0_PLANE0_VDDCOR__SHIFT 0x10 +#define SMUSVI0_TEL_PLANE0__SVI0_PLANE0_IDDCOR_MASK 0x000000FFL +#define SMUSVI0_TEL_PLANE0__SVI0_PLANE0_VDDCOR_MASK 0x01FF0000L +//SMUSVI0_PLANE0_CURRENTVID +#define SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID__SHIFT 0x18 +#define SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID_MASK 0xFF000000L + +#endif diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index c2ccf3724e37..edcb85560ced 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -422,7 +422,7 @@ struct amd_pm_funcs { int (*set_hard_min_dcefclk_by_freq)(void *handle, uint32_t clock); int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock); int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock); - int (*get_asic_baco_capability)(void *handle, bool *cap); + bool (*get_asic_baco_capability)(void *handle); int (*get_asic_baco_state)(void *handle, int *state); int (*set_asic_baco_state)(void *handle, int state); int (*get_ppfeature_status)(void *handle, char *buf); @@ -432,6 +432,7 @@ struct amd_pm_funcs { int (*set_df_cstate)(void *handle, enum pp_df_cstate state); int (*set_xgmi_pstate)(void *handle, uint32_t pstate); ssize_t (*get_gpu_metrics)(void *handle, void **table); + ssize_t (*get_pm_metrics)(void *handle, void *pmmetrics, size_t size); int (*set_watermarks_for_clock_ranges)(void *handle, struct pp_smu_wm_range_sets *ranges); int (*display_disable_memory_clock_switch)(void *handle, @@ -1225,4 +1226,19 @@ struct gpu_metrics_v3_0 { /* Metrics table alpha filter time constant [us] */ uint32_t time_filter_alphavalue; }; + +struct amdgpu_pmmetrics_header { + uint16_t structure_size; + uint16_t pad; + uint32_t mp1_ip_discovery_version; + uint32_t pmfw_version; + uint32_t pmmetrics_version; +}; + +struct amdgpu_pm_metrics { + struct amdgpu_pmmetrics_header common_header; + + uint8_t data[]; +}; + #endif diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index b1db2b190187..ec5b9ab67c5e 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -232,6 +232,7 @@ union MESAPI_SET_HW_RESOURCES { }; uint32_t oversubscription_timer; uint64_t doorbell_info; + uint64_t event_intr_history_gpu_mc_ptr; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -571,7 +572,8 @@ struct SET_SHADER_DEBUGGER { struct { uint32_t single_memop : 1; /* SQ_DEBUG.single_memop */ uint32_t single_alu_op : 1; /* SQ_DEBUG.single_alu_op */ - uint32_t reserved : 30; + uint32_t reserved : 29; + uint32_t process_ctx_flush : 1; }; uint32_t u32all; } flags; diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 8ec11da0319f..6627ee07d52d 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -203,8 +203,7 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; void *pp_handle = adev->powerplay.pp_handle; - bool baco_cap; - int ret = 0; + bool ret; if (!pp_funcs || !pp_funcs->get_asic_baco_capability) return false; @@ -222,12 +221,11 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) mutex_lock(&adev->pm.mutex); - ret = pp_funcs->get_asic_baco_capability(pp_handle, - &baco_cap); + ret = pp_funcs->get_asic_baco_capability(pp_handle); mutex_unlock(&adev->pm.mutex); - return ret ? false : baco_cap; + return ret; } int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev) @@ -618,6 +616,16 @@ void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable) enable ? "enable" : "disable", ret); } +void amdgpu_dpm_enable_vpe(struct amdgpu_device *adev, bool enable) +{ + int ret = 0; + + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VPE, !enable); + if (ret) + DRM_ERROR("Dpm %s vpe failed, ret = %d.\n", + enable ? "enable" : "disable", ret); +} + int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; @@ -1319,6 +1327,23 @@ int amdgpu_dpm_get_gpu_metrics(struct amdgpu_device *adev, void **table) return ret; } +ssize_t amdgpu_dpm_get_pm_metrics(struct amdgpu_device *adev, void *pm_metrics, + size_t size) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + int ret = 0; + + if (!pp_funcs->get_pm_metrics) + return -EOPNOTSUPP; + + mutex_lock(&adev->pm.mutex); + ret = pp_funcs->get_pm_metrics(adev->powerplay.pp_handle, pm_metrics, + size); + mutex_unlock(&adev->pm.mutex); + + return ret; +} + int amdgpu_dpm_get_fan_control_mode(struct amdgpu_device *adev, uint32_t *fan_mode) { diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 20c53eefd680..f3cb490fe79b 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -1799,6 +1799,44 @@ static ssize_t amdgpu_set_apu_thermal_cap(struct device *dev, return count; } +static int amdgpu_pm_metrics_attr_update(struct amdgpu_device *adev, + struct amdgpu_device_attr *attr, + uint32_t mask, + enum amdgpu_device_attr_states *states) +{ + if (amdgpu_dpm_get_pm_metrics(adev, NULL, 0) == -EOPNOTSUPP) + *states = ATTR_STATE_UNSUPPORTED; + + return 0; +} + +static ssize_t amdgpu_get_pm_metrics(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + ssize_t size = 0; + int ret; + + if (amdgpu_in_reset(adev)) + return -EPERM; + if (adev->in_suspend && !adev->in_runpm) + return -EPERM; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); + return ret; + } + + size = amdgpu_dpm_get_pm_metrics(adev, buf, PAGE_SIZE); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; +} + /** * DOC: gpu_metrics * @@ -2096,6 +2134,8 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = { AMDGPU_DEVICE_ATTR_RW(smartshift_bias, ATTR_FLAG_BASIC, .attr_update = ss_bias_attr_update), AMDGPU_DEVICE_ATTR_RW(xgmi_plpd_policy, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RO(pm_metrics, ATTR_FLAG_BASIC, + .attr_update = amdgpu_pm_metrics_attr_update), }; static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 482ea30147ab..3047ffe7f244 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -445,6 +445,7 @@ void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev); void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable); void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable); void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable); +void amdgpu_dpm_enable_vpe(struct amdgpu_device *adev, bool enable); int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version); int amdgpu_dpm_handle_passthrough_sbr(struct amdgpu_device *adev, bool enable); int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size); @@ -513,6 +514,18 @@ int amdgpu_dpm_get_power_profile_mode(struct amdgpu_device *adev, int amdgpu_dpm_set_power_profile_mode(struct amdgpu_device *adev, long *input, uint32_t size); int amdgpu_dpm_get_gpu_metrics(struct amdgpu_device *adev, void **table); + +/** + * @get_pm_metrics: Get one snapshot of power management metrics from PMFW. The + * sample is copied to pm_metrics buffer. It's expected to be allocated by the + * caller and size of the allocated buffer is passed. Max size expected for a + * metrics sample is 4096 bytes. + * + * Return: Actual size of the metrics sample + */ +ssize_t amdgpu_dpm_get_pm_metrics(struct amdgpu_device *adev, void *pm_metrics, + size_t size); + int amdgpu_dpm_get_fan_control_mode(struct amdgpu_device *adev, uint32_t *fan_mode); int amdgpu_dpm_set_fan_speed_pwm(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c index 5d28c951a319..5cb4725c773f 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c @@ -2735,10 +2735,8 @@ static int kv_parse_power_table(struct amdgpu_device *adev) non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *) &non_clock_info_array->nonClockInfo[non_clock_array_index]; ps = kzalloc(sizeof(struct kv_ps), GFP_KERNEL); - if (ps == NULL) { - kfree(adev->pm.dpm.ps); + if (ps == NULL) return -ENOMEM; - } adev->pm.dpm.ps[i].ps_priv = ps; k = 0; idx = (u8 *)&power_state->v2.clockInfoIndex[0]; diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c index 81fb4e5dd804..60377747bab4 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c @@ -272,10 +272,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) le16_to_cpu(power_info->pplib4.usVddcDependencyOnSCLKOffset)); ret = amdgpu_parse_clk_voltage_dep_table(&adev->pm.dpm.dyn_state.vddc_dependency_on_sclk, dep_table); - if (ret) { - amdgpu_free_extended_power_table(adev); + if (ret) return ret; - } } if (power_info->pplib4.usVddciDependencyOnMCLKOffset) { dep_table = (ATOM_PPLIB_Clock_Voltage_Dependency_Table *) @@ -283,10 +281,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) le16_to_cpu(power_info->pplib4.usVddciDependencyOnMCLKOffset)); ret = amdgpu_parse_clk_voltage_dep_table(&adev->pm.dpm.dyn_state.vddci_dependency_on_mclk, dep_table); - if (ret) { - amdgpu_free_extended_power_table(adev); + if (ret) return ret; - } } if (power_info->pplib4.usVddcDependencyOnMCLKOffset) { dep_table = (ATOM_PPLIB_Clock_Voltage_Dependency_Table *) @@ -294,10 +290,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) le16_to_cpu(power_info->pplib4.usVddcDependencyOnMCLKOffset)); ret = amdgpu_parse_clk_voltage_dep_table(&adev->pm.dpm.dyn_state.vddc_dependency_on_mclk, dep_table); - if (ret) { - amdgpu_free_extended_power_table(adev); + if (ret) return ret; - } } if (power_info->pplib4.usMvddDependencyOnMCLKOffset) { dep_table = (ATOM_PPLIB_Clock_Voltage_Dependency_Table *) @@ -305,10 +299,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) le16_to_cpu(power_info->pplib4.usMvddDependencyOnMCLKOffset)); ret = amdgpu_parse_clk_voltage_dep_table(&adev->pm.dpm.dyn_state.mvdd_dependency_on_mclk, dep_table); - if (ret) { - amdgpu_free_extended_power_table(adev); + if (ret) return ret; - } } if (power_info->pplib4.usMaxClockVoltageOnDCOffset) { ATOM_PPLIB_Clock_Voltage_Limit_Table *clk_v = @@ -339,10 +331,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) kcalloc(psl->ucNumEntries, sizeof(struct amdgpu_phase_shedding_limits_entry), GFP_KERNEL); - if (!adev->pm.dpm.dyn_state.phase_shedding_limits_table.entries) { - amdgpu_free_extended_power_table(adev); + if (!adev->pm.dpm.dyn_state.phase_shedding_limits_table.entries) return -ENOMEM; - } entry = &psl->entries[0]; for (i = 0; i < psl->ucNumEntries; i++) { @@ -383,10 +373,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) ATOM_PPLIB_CAC_Leakage_Record *entry; u32 size = cac_table->ucNumEntries * sizeof(struct amdgpu_cac_leakage_table); adev->pm.dpm.dyn_state.cac_leakage_table.entries = kzalloc(size, GFP_KERNEL); - if (!adev->pm.dpm.dyn_state.cac_leakage_table.entries) { - amdgpu_free_extended_power_table(adev); + if (!adev->pm.dpm.dyn_state.cac_leakage_table.entries) return -ENOMEM; - } entry = &cac_table->entries[0]; for (i = 0; i < cac_table->ucNumEntries; i++) { if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_EVV) { @@ -438,10 +426,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) sizeof(struct amdgpu_vce_clock_voltage_dependency_entry); adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries = kzalloc(size, GFP_KERNEL); - if (!adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries) { - amdgpu_free_extended_power_table(adev); + if (!adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries) return -ENOMEM; - } adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count = limits->numEntries; entry = &limits->entries[0]; @@ -493,10 +479,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) sizeof(struct amdgpu_uvd_clock_voltage_dependency_entry); adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries = kzalloc(size, GFP_KERNEL); - if (!adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries) { - amdgpu_free_extended_power_table(adev); + if (!adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries) return -ENOMEM; - } adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count = limits->numEntries; entry = &limits->entries[0]; @@ -525,10 +509,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) sizeof(struct amdgpu_clock_voltage_dependency_entry); adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries = kzalloc(size, GFP_KERNEL); - if (!adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries) { - amdgpu_free_extended_power_table(adev); + if (!adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries) return -ENOMEM; - } adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.count = limits->numEntries; entry = &limits->entries[0]; @@ -548,10 +530,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) le16_to_cpu(ext_hdr->usPPMTableOffset)); adev->pm.dpm.dyn_state.ppm_table = kzalloc(sizeof(struct amdgpu_ppm_table), GFP_KERNEL); - if (!adev->pm.dpm.dyn_state.ppm_table) { - amdgpu_free_extended_power_table(adev); + if (!adev->pm.dpm.dyn_state.ppm_table) return -ENOMEM; - } adev->pm.dpm.dyn_state.ppm_table->ppm_design = ppm->ucPpmDesign; adev->pm.dpm.dyn_state.ppm_table->cpu_core_number = le16_to_cpu(ppm->usCpuCoreNumber); @@ -583,10 +563,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) sizeof(struct amdgpu_clock_voltage_dependency_entry); adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries = kzalloc(size, GFP_KERNEL); - if (!adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries) { - amdgpu_free_extended_power_table(adev); + if (!adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries) return -ENOMEM; - } adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.count = limits->numEntries; entry = &limits->entries[0]; @@ -606,10 +584,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) ATOM_PowerTune_Table *pt; adev->pm.dpm.dyn_state.cac_tdp_table = kzalloc(sizeof(struct amdgpu_cac_tdp_table), GFP_KERNEL); - if (!adev->pm.dpm.dyn_state.cac_tdp_table) { - amdgpu_free_extended_power_table(adev); + if (!adev->pm.dpm.dyn_state.cac_tdp_table) return -ENOMEM; - } if (rev > 0) { ATOM_PPLIB_POWERTUNE_Table_V1 *ppt = (ATOM_PPLIB_POWERTUNE_Table_V1 *) (mode_info->atom_context->bios + data_offset + @@ -645,10 +621,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) ret = amdgpu_parse_clk_voltage_dep_table( &adev->pm.dpm.dyn_state.vddgfx_dependency_on_sclk, dep_table); - if (ret) { - kfree(adev->pm.dpm.dyn_state.vddgfx_dependency_on_sclk.entries); + if (ret) return ret; - } } } diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index fc8e4ac6c8e7..df4f20293c16 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -7379,10 +7379,9 @@ static int si_dpm_init(struct amdgpu_device *adev) kcalloc(4, sizeof(struct amdgpu_clock_voltage_dependency_entry), GFP_KERNEL); - if (!adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries) { - amdgpu_free_extended_power_table(adev); + if (!adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries) return -ENOMEM; - } + adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.count = 4; adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].clk = 0; adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].v = 0; diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index 914c15387157..aed0e2cefbf9 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -1371,21 +1371,18 @@ static int pp_set_active_display_count(void *handle, uint32_t count) return phm_set_active_display_count(hwmgr, count); } -static int pp_get_asic_baco_capability(void *handle, bool *cap) +static bool pp_get_asic_baco_capability(void *handle) { struct pp_hwmgr *hwmgr = handle; - *cap = false; if (!hwmgr) - return -EINVAL; + return false; if (!(hwmgr->not_vf && amdgpu_dpm) || !hwmgr->hwmgr_func->get_asic_baco_capability) - return 0; + return false; - hwmgr->hwmgr_func->get_asic_baco_capability(hwmgr, cap); - - return 0; + return hwmgr->hwmgr_func->get_asic_baco_capability(hwmgr); } static int pp_get_asic_baco_state(void *handle, int *state) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c index 044cda005aed..e8a9471c1898 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c @@ -33,21 +33,20 @@ #include "smu/smu_7_1_2_d.h" #include "smu/smu_7_1_2_sh_mask.h" -int smu7_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap) +bool smu7_baco_get_capability(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); uint32_t reg; - *cap = false; if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO)) return 0; reg = RREG32(mmCC_BIF_BX_FUSESTRAP0); if (reg & CC_BIF_BX_FUSESTRAP0__STRAP_BIF_PX_CAPABLE_MASK) - *cap = true; + return true; - return 0; + return false; } int smu7_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h index be0d98abb536..73a773f4ce2e 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h @@ -25,7 +25,7 @@ #include "hwmgr.h" #include "common_baco.h" -extern int smu7_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap); +extern bool smu7_baco_get_capability(struct pp_hwmgr *hwmgr); extern int smu7_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state); extern int smu7_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c index 11372fcc59c8..b1a8799e2dee 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c @@ -2974,6 +2974,8 @@ static int smu7_hwmgr_backend_init(struct pp_hwmgr *hwmgr) result = smu7_get_evv_voltages(hwmgr); if (result) { pr_info("Get EVV Voltage Failed. Abort Driver loading!\n"); + kfree(hwmgr->backend); + hwmgr->backend = NULL; return -EINVAL; } } else { @@ -3019,8 +3021,10 @@ static int smu7_hwmgr_backend_init(struct pp_hwmgr *hwmgr) } result = smu7_update_edc_leakage_table(hwmgr); - if (result) + if (result) { + smu7_hwmgr_backend_fini(hwmgr); return result; + } return 0; } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c index de0a37f7c632..c66ef9741535 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c @@ -28,14 +28,13 @@ #include "vega10_inc.h" #include "smu9_baco.h" -int smu9_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap) +bool smu9_baco_get_capability(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); uint32_t reg, data; - *cap = false; if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO)) - return 0; + return false; WREG32(0x12074, 0xFFF0003B); data = RREG32(0x12075); @@ -44,10 +43,10 @@ int smu9_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap) reg = RREG32_SOC15(NBIF, 0, mmRCC_BIF_STRAP0); if (reg & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) - *cap = true; + return true; } - return 0; + return false; } int smu9_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h index 84e90f801ac3..9ff7c2ea1b58 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h @@ -25,7 +25,7 @@ #include "hwmgr.h" #include "common_baco.h" -extern int smu9_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap); +extern bool smu9_baco_get_capability(struct pp_hwmgr *hwmgr); extern int smu9_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state); #endif diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c index 994c0d374bfa..dad4c80aee58 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c @@ -36,23 +36,22 @@ static const struct soc15_baco_cmd_entry clean_baco_tbl[] = { {CMD_WRITE, SOC15_REG_ENTRY(NBIF, 0, mmBIOS_SCRATCH_7), 0, 0, 0, 0}, }; -int vega20_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap) +bool vega20_baco_get_capability(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); uint32_t reg; - *cap = false; if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO)) - return 0; + return false; if (((RREG32(0x17569) & 0x20000000) >> 29) == 0x1) { reg = RREG32_SOC15(NBIF, 0, mmRCC_BIF_STRAP0); if (reg & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) - *cap = true; + return true; } - return 0; + return false; } int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h index f06471e712dc..bdad9c915631 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h @@ -25,7 +25,7 @@ #include "hwmgr.h" #include "common_baco.h" -extern int vega20_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap); +extern bool vega20_baco_get_capability(struct pp_hwmgr *hwmgr); extern int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state); extern int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state); extern int vega20_baco_apply_vdci_flush_workaround(struct pp_hwmgr *hwmgr); diff --git a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h index 81650727a5de..6f536159df4d 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h @@ -351,7 +351,7 @@ struct pp_hwmgr_func { int (*set_hard_min_fclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock); int (*set_hard_min_gfxclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock); int (*set_soft_max_gfxclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock); - int (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr, bool *cap); + bool (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr); int (*get_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE *state); int (*set_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE state); int (*get_ppfeature_status)(struct pp_hwmgr *hwmgr, char *buf); diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c index 9e4228232f02..ad1fd3150d03 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c @@ -2298,6 +2298,7 @@ static uint32_t ci_get_mac_definition(uint32_t value) case SMU_MAX_ENTRIES_SMIO: return SMU7_MAX_ENTRIES_SMIO; case SMU_MAX_LEVELS_VDDC: + case SMU_MAX_LEVELS_VDDGFX: return SMU7_MAX_LEVELS_VDDC; case SMU_MAX_LEVELS_VDDCI: return SMU7_MAX_LEVELS_VDDCI; diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c index 97d9802fe673..17d2f5bff4a7 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c @@ -2263,6 +2263,7 @@ static uint32_t iceland_get_mac_definition(uint32_t value) case SMU_MAX_ENTRIES_SMIO: return SMU71_MAX_ENTRIES_SMIO; case SMU_MAX_LEVELS_VDDC: + case SMU_MAX_LEVELS_VDDGFX: return SMU71_MAX_LEVELS_VDDC; case SMU_MAX_LEVELS_VDDCI: return SMU71_MAX_LEVELS_VDDCI; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index e1a5ee911dbb..c16703868e5c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1322,6 +1322,187 @@ static int smu_get_thermal_temperature_range(struct smu_context *smu) return ret; } +/** + * smu_wbrf_handle_exclusion_ranges - consume the wbrf exclusion ranges + * + * @smu: smu_context pointer + * + * Retrieve the wbrf exclusion ranges and send them to PMFW for proper handling. + * Returns 0 on success, error on failure. + */ +static int smu_wbrf_handle_exclusion_ranges(struct smu_context *smu) +{ + struct wbrf_ranges_in_out wbrf_exclusion = {0}; + struct freq_band_range *wifi_bands = wbrf_exclusion.band_list; + struct amdgpu_device *adev = smu->adev; + uint32_t num_of_wbrf_ranges = MAX_NUM_OF_WBRF_RANGES; + uint64_t start, end; + int ret, i, j; + + ret = amd_wbrf_retrieve_freq_band(adev->dev, &wbrf_exclusion); + if (ret) { + dev_err(adev->dev, "Failed to retrieve exclusion ranges!\n"); + return ret; + } + + /* + * The exclusion ranges array we got might be filled with holes and duplicate + * entries. For example: + * {(2400, 2500), (0, 0), (6882, 6962), (2400, 2500), (0, 0), (6117, 6189), (0, 0)...} + * We need to do some sortups to eliminate those holes and duplicate entries. + * Expected output: {(2400, 2500), (6117, 6189), (6882, 6962), (0, 0)...} + */ + for (i = 0; i < num_of_wbrf_ranges; i++) { + start = wifi_bands[i].start; + end = wifi_bands[i].end; + + /* get the last valid entry to fill the intermediate hole */ + if (!start && !end) { + for (j = num_of_wbrf_ranges - 1; j > i; j--) + if (wifi_bands[j].start && wifi_bands[j].end) + break; + + /* no valid entry left */ + if (j <= i) + break; + + start = wifi_bands[i].start = wifi_bands[j].start; + end = wifi_bands[i].end = wifi_bands[j].end; + wifi_bands[j].start = 0; + wifi_bands[j].end = 0; + num_of_wbrf_ranges = j; + } + + /* eliminate duplicate entries */ + for (j = i + 1; j < num_of_wbrf_ranges; j++) { + if ((wifi_bands[j].start == start) && (wifi_bands[j].end == end)) { + wifi_bands[j].start = 0; + wifi_bands[j].end = 0; + } + } + } + + /* Send the sorted wifi_bands to PMFW */ + ret = smu_set_wbrf_exclusion_ranges(smu, wifi_bands); + /* Try to set the wifi_bands again */ + if (unlikely(ret == -EBUSY)) { + mdelay(5); + ret = smu_set_wbrf_exclusion_ranges(smu, wifi_bands); + } + + return ret; +} + +/** + * smu_wbrf_event_handler - handle notify events + * + * @nb: notifier block + * @action: event type + * @_arg: event data + * + * Calls relevant amdgpu function in response to wbrf event + * notification from kernel. + */ +static int smu_wbrf_event_handler(struct notifier_block *nb, + unsigned long action, void *_arg) +{ + struct smu_context *smu = container_of(nb, struct smu_context, wbrf_notifier); + + switch (action) { + case WBRF_CHANGED: + schedule_delayed_work(&smu->wbrf_delayed_work, + msecs_to_jiffies(SMU_WBRF_EVENT_HANDLING_PACE)); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +/** + * smu_wbrf_delayed_work_handler - callback on delayed work timer expired + * + * @work: struct work_struct pointer + * + * Flood is over and driver will consume the latest exclusion ranges. + */ +static void smu_wbrf_delayed_work_handler(struct work_struct *work) +{ + struct smu_context *smu = container_of(work, struct smu_context, wbrf_delayed_work.work); + + smu_wbrf_handle_exclusion_ranges(smu); +} + +/** + * smu_wbrf_support_check - check wbrf support + * + * @smu: smu_context pointer + * + * Verifies the ACPI interface whether wbrf is supported. + */ +static void smu_wbrf_support_check(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + smu->wbrf_supported = smu_is_asic_wbrf_supported(smu) && amdgpu_wbrf && + acpi_amd_wbrf_supported_consumer(adev->dev); + + if (smu->wbrf_supported) + dev_info(adev->dev, "RF interference mitigation is supported\n"); +} + +/** + * smu_wbrf_init - init driver wbrf support + * + * @smu: smu_context pointer + * + * Verifies the AMD ACPI interfaces and registers with the wbrf + * notifier chain if wbrf feature is supported. + * Returns 0 on success, error on failure. + */ +static int smu_wbrf_init(struct smu_context *smu) +{ + int ret; + + if (!smu->wbrf_supported) + return 0; + + INIT_DELAYED_WORK(&smu->wbrf_delayed_work, smu_wbrf_delayed_work_handler); + + smu->wbrf_notifier.notifier_call = smu_wbrf_event_handler; + ret = amd_wbrf_register_notifier(&smu->wbrf_notifier); + if (ret) + return ret; + + /* + * Some wifiband exclusion ranges may be already there + * before our driver loaded. To make sure our driver + * is awared of those exclusion ranges. + */ + schedule_delayed_work(&smu->wbrf_delayed_work, + msecs_to_jiffies(SMU_WBRF_EVENT_HANDLING_PACE)); + + return 0; +} + +/** + * smu_wbrf_fini - tear down driver wbrf support + * + * @smu: smu_context pointer + * + * Unregisters with the wbrf notifier chain. + */ +static void smu_wbrf_fini(struct smu_context *smu) +{ + if (!smu->wbrf_supported) + return; + + amd_wbrf_unregister_notifier(&smu->wbrf_notifier); + + cancel_delayed_work_sync(&smu->wbrf_delayed_work); +} + static int smu_smc_hw_setup(struct smu_context *smu) { struct smu_feature *feature = &smu->smu_feature; @@ -1414,6 +1595,15 @@ static int smu_smc_hw_setup(struct smu_context *smu) if (ret) return ret; + /* Enable UclkShadow on wbrf supported */ + if (smu->wbrf_supported) { + ret = smu_enable_uclk_shadow(smu, true); + if (ret) { + dev_err(adev->dev, "Failed to enable UclkShadow feature to support wbrf!\n"); + return ret; + } + } + /* * With SCPM enabled, these actions(and relevant messages) are * not needed and permitted. @@ -1512,6 +1702,15 @@ static int smu_smc_hw_setup(struct smu_context *smu) */ ret = smu_set_min_dcef_deep_sleep(smu, smu->smu_table.boot_values.dcefclk / 100); + if (ret) { + dev_err(adev->dev, "Error setting min deepsleep dcefclk\n"); + return ret; + } + + /* Init wbrf support. Properly setup the notifier */ + ret = smu_wbrf_init(smu); + if (ret) + dev_err(adev->dev, "Error during wbrf init call\n"); return ret; } @@ -1567,6 +1766,13 @@ static int smu_hw_init(void *handle) return ret; } + /* + * Check whether wbrf is supported. This needs to be done + * before SMU setup starts since part of SMU configuration + * relies on this. + */ + smu_wbrf_support_check(smu); + if (smu->is_apu) { ret = smu_set_gfx_imu_enable(smu); if (ret) @@ -1733,6 +1939,8 @@ static int smu_smc_hw_cleanup(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; int ret = 0; + smu_wbrf_fini(smu); + cancel_work_sync(&smu->throttling_logging_work); cancel_work_sync(&smu->interrupt_work); @@ -3015,19 +3223,17 @@ static int smu_set_xgmi_pstate(void *handle, return ret; } -static int smu_get_baco_capability(void *handle, bool *cap) +static bool smu_get_baco_capability(void *handle) { struct smu_context *smu = handle; - *cap = false; - if (!smu->pm_enabled) - return 0; + return false; - if (smu->ppt_funcs && smu->ppt_funcs->baco_is_support) - *cap = smu->ppt_funcs->baco_is_support(smu); + if (!smu->ppt_funcs || !smu->ppt_funcs->baco_is_support) + return false; - return 0; + return smu->ppt_funcs->baco_is_support(smu); } static int smu_baco_set_state(void *handle, int state) @@ -3201,6 +3407,20 @@ static ssize_t smu_sys_get_gpu_metrics(void *handle, void **table) return smu->ppt_funcs->get_gpu_metrics(smu, table); } +static ssize_t smu_sys_get_pm_metrics(void *handle, void *pm_metrics, + size_t size) +{ + struct smu_context *smu = handle; + + if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) + return -EOPNOTSUPP; + + if (!smu->ppt_funcs->get_pm_metrics) + return -EOPNOTSUPP; + + return smu->ppt_funcs->get_pm_metrics(smu, pm_metrics, size); +} + static int smu_enable_mgpu_fan_boost(void *handle) { struct smu_context *smu = handle; @@ -3342,6 +3562,7 @@ static const struct amd_pm_funcs swsmu_pm_funcs = { .set_df_cstate = smu_set_df_cstate, .set_xgmi_pstate = smu_set_xgmi_pstate, .get_gpu_metrics = smu_sys_get_gpu_metrics, + .get_pm_metrics = smu_sys_get_pm_metrics, .set_watermarks_for_clock_ranges = smu_set_watermarks_for_clock_ranges, .display_disable_memory_clock_switch = smu_display_disable_memory_clock_switch, .get_max_sustainable_clocks_by_dc = smu_get_max_sustainable_clocks_by_dc, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index f8b2e6cc2568..2aa4fea87314 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -22,6 +22,9 @@ #ifndef __AMDGPU_SMU_H__ #define __AMDGPU_SMU_H__ +#include <linux/acpi_amd_wbrf.h> +#include <linux/units.h> + #include "amdgpu.h" #include "kgd_pp_interface.h" #include "dm_pp_interface.h" @@ -253,6 +256,7 @@ struct smu_table { uint64_t mc_address; void *cpu_addr; struct amdgpu_bo *bo; + uint32_t version; }; enum smu_perf_level_designation { @@ -317,6 +321,7 @@ enum smu_table_id { SMU_TABLE_PACE, SMU_TABLE_ECCINFO, SMU_TABLE_COMBO_PPTABLE, + SMU_TABLE_WIFIBAND, SMU_TABLE_COUNT, }; @@ -470,6 +475,12 @@ struct stb_context { #define WORKLOAD_POLICY_MAX 7 +/* + * Configure wbrf event handling pace as there can be only one + * event processed every SMU_WBRF_EVENT_HANDLING_PACE ms. + */ +#define SMU_WBRF_EVENT_HANDLING_PACE 10 + struct smu_context { struct amdgpu_device *adev; struct amdgpu_irq_src irq_source; @@ -569,6 +580,11 @@ struct smu_context { struct delayed_work swctf_delayed_work; enum pp_xgmi_plpd_mode plpd_mode; + + /* data structures for wbrf feature support */ + bool wbrf_supported; + struct notifier_block wbrf_notifier; + struct delayed_work wbrf_delayed_work; }; struct i2c_adapter; @@ -1253,6 +1269,15 @@ struct pptable_funcs { ssize_t (*get_gpu_metrics)(struct smu_context *smu, void **table); /** + * @get_pm_metrics: Get one snapshot of power management metrics from + * PMFW. + * + * Return: Size of the metrics sample + */ + ssize_t (*get_pm_metrics)(struct smu_context *smu, void *pm_metrics, + size_t size); + + /** * @enable_mgpu_fan_boost: Enable multi-GPU fan boost. */ int (*enable_mgpu_fan_boost)(struct smu_context *smu); @@ -1365,6 +1390,22 @@ struct pptable_funcs { * @notify_rlc_state: Notify RLC power state to SMU. */ int (*notify_rlc_state)(struct smu_context *smu, bool en); + + /** + * @is_asic_wbrf_supported: check whether PMFW supports the wbrf feature + */ + bool (*is_asic_wbrf_supported)(struct smu_context *smu); + + /** + * @enable_uclk_shadow: Enable the uclk shadow feature on wbrf supported + */ + int (*enable_uclk_shadow)(struct smu_context *smu, bool enable); + + /** + * @set_wbrf_exclusion_ranges: notify SMU the wifi bands occupied + */ + int (*set_wbrf_exclusion_ranges)(struct smu_context *smu, + struct freq_band_range *exclusion_ranges); }; typedef enum { @@ -1491,6 +1532,17 @@ enum smu_baco_seq { __dst_size); \ }) +typedef struct { + uint16_t LowFreq; + uint16_t HighFreq; +} WifiOneBand_t; + +typedef struct { + uint32_t WifiBandEntryNum; + WifiOneBand_t WifiBandEntry[11]; + uint32_t MmHubPadding[8]; +} WifiBandEntryTable_t; + #if !defined(SWSMU_CODE_LAYER_L2) && !defined(SWSMU_CODE_LAYER_L3) && !defined(SWSMU_CODE_LAYER_L4) int smu_get_power_limit(void *handle, uint32_t *limit, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h index 9dd1ed5b8940..b114d14fc053 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h @@ -1615,7 +1615,8 @@ typedef struct { #define TABLE_I2C_COMMANDS 9 #define TABLE_DRIVER_INFO 10 #define TABLE_ECCINFO 11 -#define TABLE_COUNT 12 +#define TABLE_WIFIBAND 12 +#define TABLE_COUNT 13 //IH Interupt ID #define IH_INTERRUPT_ID_TO_DRIVER 0xFE diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h index 62b7c0daff68..8b1496f8ce58 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h @@ -1605,7 +1605,8 @@ typedef struct { #define TABLE_I2C_COMMANDS 9 #define TABLE_DRIVER_INFO 10 #define TABLE_ECCINFO 11 -#define TABLE_COUNT 12 +#define TABLE_WIFIBAND 12 +#define TABLE_COUNT 13 //IH Interupt ID #define IH_INTERRUPT_ID_TO_DRIVER 0xFE diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h index 8f42771e1f0a..5bb7a63c0602 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h @@ -24,11 +24,6 @@ #ifndef SMU14_DRIVER_IF_V14_0_0_H #define SMU14_DRIVER_IF_V14_0_0_H -// *** IMPORTANT *** -// SMU TEAM: Always increment the interface version if -// any structure is changed in this file -#define PMFW_DRIVER_IF_VERSION 7 - typedef struct { int32_t value; uint32_t numFractionalBits; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h index e2ee855c7748..e862d323caab 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h @@ -138,10 +138,9 @@ #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A #define PPSMC_MSG_SetPriorityDeltaGain 0x4B #define PPSMC_MSG_AllowIHHostInterrupt 0x4C - #define PPSMC_MSG_DALNotPresent 0x4E - -#define PPSMC_Message_Count 0x4F +#define PPSMC_MSG_EnableUCLKShadow 0x51 +#define PPSMC_Message_Count 0x52 //Debug Dump Message #define DEBUGSMC_MSG_TestMessage 0x1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h index 6aaefca9b595..a6bf9cdd130e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h @@ -134,6 +134,7 @@ #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A #define PPSMC_MSG_SetPriorityDeltaGain 0x4B #define PPSMC_MSG_AllowIHHostInterrupt 0x4C -#define PPSMC_Message_Count 0x4D +#define PPSMC_MSG_EnableUCLKShadow 0x51 +#define PPSMC_Message_Count 0x52 #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index 9dd47d91093e..953a767613b1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -259,7 +259,9 @@ __SMU_DUMMY_MAP(PowerUpUmsch), \ __SMU_DUMMY_MAP(PowerDownUmsch), \ __SMU_DUMMY_MAP(SetSoftMaxVpe), \ - __SMU_DUMMY_MAP(SetSoftMinVpe), + __SMU_DUMMY_MAP(SetSoftMinVpe), \ + __SMU_DUMMY_MAP(GetMetricsVersion), \ + __SMU_DUMMY_MAP(EnableUCLKShadow), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 95cb919718ae..fbd57fa1a004 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -210,15 +210,8 @@ int smu_v13_0_set_azalia_d3_pme(struct smu_context *smu); int smu_v13_0_get_max_sustainable_clocks_by_dc(struct smu_context *smu, struct pp_smu_nv_clock_table *max_clocks); -int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu, - enum smu_baco_seq baco_seq); - bool smu_v13_0_baco_is_support(struct smu_context *smu); -enum smu_baco_state smu_v13_0_baco_get_state(struct smu_context *smu); - -int smu_v13_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state); - int smu_v13_0_baco_enter(struct smu_context *smu); int smu_v13_0_baco_exit(struct smu_context *smu); @@ -301,5 +294,9 @@ int smu_v13_0_update_pcie_parameters(struct smu_context *smu, int smu_v13_0_disable_pmfw_state(struct smu_context *smu); +int smu_v13_0_enable_uclk_shadow(struct smu_context *smu, bool enable); + +int smu_v13_0_set_wbrf_exclusion_ranges(struct smu_context *smu, + struct freq_band_range *exclusion_ranges); #endif #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h index a5b569976f19..3f7463c1c1a9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h @@ -26,8 +26,8 @@ #include "amdgpu_smu.h" #define SMU14_DRIVER_IF_VERSION_INV 0xFFFFFFFF +#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_0 0x7 #define SMU14_DRIVER_IF_VERSION_SMU_V14_0_2 0x1 -#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_0 0x6 #define FEATURE_MASK(feature) (1ULL << feature) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 2cb6b68222ba..4cd43bbec910 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -2407,8 +2407,6 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme, .get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc, .baco_is_support = smu_v11_0_baco_is_support, - .baco_get_state = smu_v11_0_baco_get_state, - .baco_set_state = smu_v11_0_baco_set_state, .baco_enter = smu_v11_0_baco_enter, .baco_exit = smu_v11_0_baco_exit, .get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index a38233cc5b7f..8d1d29ffb0f1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -3537,8 +3537,6 @@ static const struct pptable_funcs navi10_ppt_funcs = { .set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme, .get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc, .baco_is_support = smu_v11_0_baco_is_support, - .baco_get_state = smu_v11_0_baco_get_state, - .baco_set_state = smu_v11_0_baco_set_state, .baco_enter = navi10_baco_enter, .baco_exit = navi10_baco_exit, .get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 1de9f8b5cc5f..21fc033528fa 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -4428,8 +4428,6 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme, .get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc, .baco_is_support = smu_v11_0_baco_is_support, - .baco_get_state = smu_v11_0_baco_get_state, - .baco_set_state = smu_v11_0_baco_set_state, .baco_enter = sienna_cichlid_baco_enter, .baco_exit = sienna_cichlid_baco_exit, .mode1_reset_is_support = sienna_cichlid_is_mode1_reset_supported, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index cf1b84060bc3..771a3d457c33 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2199,7 +2199,7 @@ int smu_v13_0_gfx_ulv_control(struct smu_context *smu, return ret; } -int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu, +static int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu, enum smu_baco_seq baco_seq) { struct smu_baco_context *smu_baco = &smu->smu_baco; @@ -2221,33 +2221,14 @@ int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu, return 0; } -bool smu_v13_0_baco_is_support(struct smu_context *smu) -{ - struct smu_baco_context *smu_baco = &smu->smu_baco; - - if (amdgpu_sriov_vf(smu->adev) || - !smu_baco->platform_support) - return false; - - /* return true if ASIC is in BACO state already */ - if (smu_v13_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER) - return true; - - if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) && - !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT)) - return false; - - return true; -} - -enum smu_baco_state smu_v13_0_baco_get_state(struct smu_context *smu) +static enum smu_baco_state smu_v13_0_baco_get_state(struct smu_context *smu) { struct smu_baco_context *smu_baco = &smu->smu_baco; return smu_baco->state; } -int smu_v13_0_baco_set_state(struct smu_context *smu, +static int smu_v13_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state) { struct smu_baco_context *smu_baco = &smu->smu_baco; @@ -2281,24 +2262,60 @@ int smu_v13_0_baco_set_state(struct smu_context *smu, return ret; } -int smu_v13_0_baco_enter(struct smu_context *smu) +bool smu_v13_0_baco_is_support(struct smu_context *smu) { - int ret = 0; + struct smu_baco_context *smu_baco = &smu->smu_baco; - ret = smu_v13_0_baco_set_state(smu, - SMU_BACO_STATE_ENTER); - if (ret) - return ret; + if (amdgpu_sriov_vf(smu->adev) || !smu_baco->platform_support) + return false; + + /* return true if ASIC is in BACO state already */ + if (smu_v13_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER) + return true; - msleep(10); + if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) && + !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT)) + return false; - return ret; + return true; +} + +int smu_v13_0_baco_enter(struct smu_context *smu) +{ + struct smu_baco_context *smu_baco = &smu->smu_baco; + struct amdgpu_device *adev = smu->adev; + int ret; + + if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) { + return smu_v13_0_baco_set_armd3_sequence(smu, + (smu_baco->maco_support && amdgpu_runtime_pm != 1) ? + BACO_SEQ_BAMACO : BACO_SEQ_BACO); + } else { + ret = smu_v13_0_baco_set_state(smu, SMU_BACO_STATE_ENTER); + if (!ret) + usleep_range(10000, 11000); + + return ret; + } } int smu_v13_0_baco_exit(struct smu_context *smu) { - return smu_v13_0_baco_set_state(smu, - SMU_BACO_STATE_EXIT); + struct amdgpu_device *adev = smu->adev; + int ret; + + if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) { + /* Wait for PMFW handling for the Dstate change */ + usleep_range(10000, 11000); + ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS); + } else { + ret = smu_v13_0_baco_set_state(smu, SMU_BACO_STATE_EXIT); + } + + if (!ret) + adev->gfx.is_poweron = false; + + return ret; } int smu_v13_0_set_gfx_power_up_by_imu(struct smu_context *smu) @@ -2490,3 +2507,51 @@ int smu_v13_0_disable_pmfw_state(struct smu_context *smu) return ret == 0 ? 0 : -EINVAL; } + +int smu_v13_0_enable_uclk_shadow(struct smu_context *smu, bool enable) +{ + return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_EnableUCLKShadow, enable, NULL); +} + +int smu_v13_0_set_wbrf_exclusion_ranges(struct smu_context *smu, + struct freq_band_range *exclusion_ranges) +{ + WifiBandEntryTable_t wifi_bands; + int valid_entries = 0; + int ret, i; + + memset(&wifi_bands, 0, sizeof(wifi_bands)); + for (i = 0; i < ARRAY_SIZE(wifi_bands.WifiBandEntry); i++) { + if (!exclusion_ranges[i].start && !exclusion_ranges[i].end) + break; + + /* PMFW expects the inputs to be in Mhz unit */ + wifi_bands.WifiBandEntry[valid_entries].LowFreq = + DIV_ROUND_DOWN_ULL(exclusion_ranges[i].start, HZ_PER_MHZ); + wifi_bands.WifiBandEntry[valid_entries++].HighFreq = + DIV_ROUND_UP_ULL(exclusion_ranges[i].end, HZ_PER_MHZ); + } + wifi_bands.WifiBandEntryNum = valid_entries; + + /* + * Per confirm with PMFW team, WifiBandEntryNum = 0 + * is a valid setting. + * + * Considering the scenarios below: + * - At first the wifi device adds an exclusion range e.g. (2400,2500) to + * BIOS and our driver gets notified. We will set WifiBandEntryNum = 1 + * and pass the WifiBandEntry (2400, 2500) to PMFW. + * + * - Later the wifi device removes the wifiband list added above and + * our driver gets notified again. At this time, driver will set + * WifiBandEntryNum = 0 and pass an empty WifiBandEntry list to PMFW. + * + * - PMFW may still need to do some uclk shadow update(e.g. switching + * from shadow clock back to primary clock) on receiving this. + */ + ret = smu_cmn_update_table(smu, SMU_TABLE_WIFIBAND, 0, &wifi_bands, true); + if (ret) + dev_warn(smu->adev->dev, "Failed to set wifiband!"); + + return ret; +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 82c4e1f1c6f0..231122622a9c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -169,6 +169,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(AllowIHHostInterrupt, PPSMC_MSG_AllowIHHostInterrupt, 0), MSG_MAP(ReenableAcDcInterrupt, PPSMC_MSG_ReenableAcDcInterrupt, 0), MSG_MAP(DALNotPresent, PPSMC_MSG_DALNotPresent, 0), + MSG_MAP(EnableUCLKShadow, PPSMC_MSG_EnableUCLKShadow, 0), }; static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = { @@ -253,6 +254,7 @@ static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = { TAB_MAP(I2C_COMMANDS), TAB_MAP(ECCINFO), TAB_MAP(OVERDRIVE), + TAB_MAP(WIFIBAND), }; static struct cmn2asic_mapping smu_v13_0_0_pwr_src_map[SMU_POWER_SOURCE_COUNT] = { @@ -498,6 +500,9 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu) PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, sizeof(EccInfoTable_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + SMU_TABLE_INIT(tables, SMU_TABLE_WIFIBAND, + sizeof(WifiBandEntryTable_t), PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM); smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), GFP_KERNEL); if (!smu_table->metrics_table) @@ -2540,16 +2545,19 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, workload_mask = 1 << workload_type; - /* Add optimizations for SMU13.0.0. Reuse the power saving profile */ - if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE && - (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0)) && - ((smu->adev->pm.fw_version == 0x004e6601) || - (smu->adev->pm.fw_version >= 0x004e7400))) { - workload_type = smu_cmn_to_asic_specific_index(smu, - CMN2ASIC_MAPPING_WORKLOAD, - PP_SMC_POWER_PROFILE_POWERSAVING); - if (workload_type >= 0) - workload_mask |= 1 << workload_type; + /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */ + if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE) { + if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && + ((smu->adev->pm.fw_version == 0x004e6601) || + (smu->adev->pm.fw_version >= 0x004e7300))) || + (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) && + smu->adev->pm.fw_version >= 0x00504500)) { + workload_type = smu_cmn_to_asic_specific_index(smu, + CMN2ASIC_MAPPING_WORKLOAD, + PP_SMC_POWER_PROFILE_POWERSAVING); + if (workload_type >= 0) + workload_mask |= 1 << workload_type; + } } return smu_cmn_send_smc_msg_with_param(smu, @@ -2558,38 +2566,6 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, NULL); } -static int smu_v13_0_0_baco_enter(struct smu_context *smu) -{ - struct smu_baco_context *smu_baco = &smu->smu_baco; - struct amdgpu_device *adev = smu->adev; - - if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) - return smu_v13_0_baco_set_armd3_sequence(smu, - (smu_baco->maco_support && amdgpu_runtime_pm != 1) ? - BACO_SEQ_BAMACO : BACO_SEQ_BACO); - else - return smu_v13_0_baco_enter(smu); -} - -static int smu_v13_0_0_baco_exit(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - int ret; - - if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) { - /* Wait for PMFW handling for the Dstate change */ - usleep_range(10000, 11000); - ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS); - } else { - ret = smu_v13_0_baco_exit(smu); - } - - if (!ret) - adev->gfx.is_poweron = false; - - return ret; -} - static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; @@ -2970,6 +2946,20 @@ static ssize_t smu_v13_0_0_get_ecc_info(struct smu_context *smu, return ret; } +static bool smu_v13_0_0_wbrf_support_check(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(13, 0, 0): + return smu->smc_fw_version >= 0x004e6300; + case IP_VERSION(13, 0, 10): + return smu->smc_fw_version >= 0x00503300; + default: + return false; + } +} + static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table, @@ -3035,10 +3025,8 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .deep_sleep_control = smu_v13_0_deep_sleep_control, .gfx_ulv_control = smu_v13_0_gfx_ulv_control, .baco_is_support = smu_v13_0_baco_is_support, - .baco_get_state = smu_v13_0_baco_get_state, - .baco_set_state = smu_v13_0_baco_set_state, - .baco_enter = smu_v13_0_0_baco_enter, - .baco_exit = smu_v13_0_0_baco_exit, + .baco_enter = smu_v13_0_baco_enter, + .baco_exit = smu_v13_0_baco_exit, .mode1_reset_is_support = smu_v13_0_0_is_mode1_reset_supported, .mode1_reset = smu_v13_0_0_mode1_reset, .mode2_reset = smu_v13_0_0_mode2_reset, @@ -3050,6 +3038,9 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .gpo_control = smu_v13_0_gpo_control, .get_ecc_info = smu_v13_0_0_get_ecc_info, .notify_display_change = smu_v13_0_notify_display_change, + .is_asic_wbrf_supported = smu_v13_0_0_wbrf_support_check, + .enable_uclk_shadow = smu_v13_0_enable_uclk_shadow, + .set_wbrf_exclusion_ranges = smu_v13_0_set_wbrf_exclusion_ranges, }; void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index b64e07b75937..4ebc6b421c2c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -120,6 +120,7 @@ struct mca_ras_info { #define P2S_TABLE_ID_A 0x50325341 #define P2S_TABLE_ID_X 0x50325358 +// clang-format off static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), @@ -128,6 +129,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(DisableAllSmuFeatures, PPSMC_MSG_DisableAllSmuFeatures, 0), MSG_MAP(RequestI2cTransaction, PPSMC_MSG_RequestI2cTransaction, 0), MSG_MAP(GetMetricsTable, PPSMC_MSG_GetMetricsTable, 1), + MSG_MAP(GetMetricsVersion, PPSMC_MSG_GetMetricsVersion, 1), MSG_MAP(GetEnabledSmuFeaturesHigh, PPSMC_MSG_GetEnabledSmuFeaturesHigh, 1), MSG_MAP(GetEnabledSmuFeaturesLow, PPSMC_MSG_GetEnabledSmuFeaturesLow, 1), MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverDramAddrHigh, 1), @@ -171,6 +173,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(SelectPLPDMode, PPSMC_MSG_SelectPLPDMode, 0), }; +// clang-format on static const struct cmn2asic_mapping smu_v13_0_6_clk_map[SMU_CLK_COUNT] = { CLK_MAP(SOCCLK, PPCLK_SOCCLK), CLK_MAP(FCLK, PPCLK_FCLK), @@ -432,6 +435,41 @@ static int smu_v13_0_6_get_metrics_table(struct smu_context *smu, return 0; } +static ssize_t smu_v13_0_6_get_pm_metrics(struct smu_context *smu, + void *metrics, size_t max_size) +{ + struct smu_table_context *smu_tbl_ctxt = &smu->smu_table; + uint32_t table_version = smu_tbl_ctxt->tables[SMU_TABLE_SMU_METRICS].version; + uint32_t table_size = smu_tbl_ctxt->tables[SMU_TABLE_SMU_METRICS].size; + struct amdgpu_pm_metrics *pm_metrics = metrics; + uint32_t pmfw_version; + int ret; + + if (!pm_metrics || !max_size) + return -EINVAL; + + if (max_size < (table_size + sizeof(pm_metrics->common_header))) + return -EOVERFLOW; + + /* Don't use cached metrics data */ + ret = smu_v13_0_6_get_metrics_table(smu, pm_metrics->data, true); + if (ret) + return ret; + + smu_cmn_get_smc_version(smu, NULL, &pmfw_version); + + memset(&pm_metrics->common_header, 0, + sizeof(pm_metrics->common_header)); + pm_metrics->common_header.mp1_ip_discovery_version = + IP_VERSION(13, 0, 6); + pm_metrics->common_header.pmfw_version = pmfw_version; + pm_metrics->common_header.pmmetrics_version = table_version; + pm_metrics->common_header.structure_size = + sizeof(pm_metrics->common_header) + table_size; + + return pm_metrics->common_header.structure_size; +} + static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; @@ -441,6 +479,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) (struct PPTable_t *)smu_table->driver_pptable; struct amdgpu_device *adev = smu->adev; int ret, i, retry = 100; + uint32_t table_version; /* Store one-time values in driver PPTable */ if (!pptable->Init) { @@ -459,6 +498,13 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) if (!retry) return -ETIME; + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetMetricsVersion, + &table_version); + if (ret) + return ret; + smu_table->tables[SMU_TABLE_SMU_METRICS].version = + table_version; + pptable->MaxSocketPowerLimit = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketPowerLimit)); pptable->MaxGfxclkFrequency = @@ -1477,7 +1523,6 @@ static int smu_v13_0_6_mca_set_debug_mode(struct smu_context *smu, bool enable) if (smu->smc_fw_version < 0x554800) return 0; - amdgpu_ras_set_mca_debug_mode(smu->adev, enable); return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ClearMcaOnRead, enable ? 0 : ClearMcaOnRead_UE_FLAG_MASK | ClearMcaOnRead_CE_POLL_MASK, NULL); @@ -2329,16 +2374,6 @@ static int smu_v13_0_6_smu_send_hbm_bad_page_num(struct smu_context *smu, return ret; } -static int smu_v13_0_6_post_init(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - - if (!amdgpu_sriov_vf(adev) && adev->ras_enabled) - return smu_v13_0_6_mca_set_debug_mode(smu, false); - - return 0; -} - static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) { struct smu_context *smu = adev->powerplay.pp_handle; @@ -2421,8 +2456,8 @@ static const struct mca_bank_ipid smu_v13_0_6_mca_ipid_table[AMDGPU_MCA_IP_COUNT static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_bank_info *info) { - uint64_t ipid = entry->regs[MCA_REG_IDX_IPID]; - uint32_t insthi; + u64 ipid = entry->regs[MCA_REG_IDX_IPID]; + u32 instidhi, instid; /* NOTE: All MCA IPID register share the same format, * so the driver can share the MCMP1 register header file. @@ -2431,9 +2466,15 @@ static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_ info->hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); info->mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); - insthi = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi); - info->aid = ((insthi >> 2) & 0x03); - info->socket_id = insthi & 0x03; + /* + * Unfied DieID Format: SAASS. A:AID, S:Socket. + * Unfied DieID[4] = InstanceId[0] + * Unfied DieID[0:3] = InstanceIdHi[0:3] + */ + instidhi = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi); + instid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo); + info->aid = ((instidhi >> 2) & 0x03); + info->socket_id = ((instid & 0x1) << 2) | (instidhi & 0x03); } static int mca_bank_read_reg(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, @@ -2512,9 +2553,9 @@ static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct return 0; } - if (type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(status0)) + if (type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(adev, status0)) *count = 1; - else if (type == AMDGPU_MCA_ERROR_TYPE_CE && umc_v12_0_is_correctable_error(status0)) + else if (type == AMDGPU_MCA_ERROR_TYPE_CE && umc_v12_0_is_correctable_error(adev, status0)) *count = 1; return 0; @@ -2525,13 +2566,15 @@ static int mca_pcs_xgmi_mca_get_err_count(const struct mca_ras_info *mca_ras, st uint32_t *count) { u32 ext_error_code; + u32 err_cnt; ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(entry->regs[MCA_REG_IDX_STATUS]); + err_cnt = MCA_REG__MISC0__ERRCNT(entry->regs[MCA_REG_IDX_MISC0]); if (type == AMDGPU_MCA_ERROR_TYPE_UE && ext_error_code == 0) - *count = 1; + *count = err_cnt; else if (type == AMDGPU_MCA_ERROR_TYPE_CE && ext_error_code == 6) - *count = 1; + *count = err_cnt; return 0; } @@ -2607,6 +2650,7 @@ static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct uint32_t instlo; instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo); + instlo &= GENMASK(31, 1); switch (instlo) { case 0x36430400: /* SMNAID XCD 0 */ case 0x38430400: /* SMNAID XCD 1 */ @@ -2626,6 +2670,7 @@ static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amd uint32_t errcode, instlo; instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo); + instlo &= GENMASK(31, 1); if (instlo != 0x03b30400) return false; @@ -2848,6 +2893,13 @@ static int smu_v13_0_6_select_xgmi_plpd_policy(struct smu_context *smu, return ret; } +static ssize_t smu_v13_0_6_get_ecc_info(struct smu_context *smu, + void *table) +{ + /* Support ecc info by default */ + return 0; +} + static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { /* init dpm */ .get_allowed_feature_mask = smu_v13_0_6_get_allowed_feature_mask, @@ -2892,6 +2944,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .log_thermal_throttling_event = smu_v13_0_6_log_thermal_throttling_event, .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, .get_gpu_metrics = smu_v13_0_6_get_gpu_metrics, + .get_pm_metrics = smu_v13_0_6_get_pm_metrics, .get_thermal_temperature_range = smu_v13_0_6_get_thermal_temperature_range, .mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported, .mode2_reset_is_support = smu_v13_0_6_is_mode2_reset_supported, @@ -2901,7 +2954,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .i2c_init = smu_v13_0_6_i2c_control_init, .i2c_fini = smu_v13_0_6_i2c_control_fini, .send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num, - .post_init = smu_v13_0_6_post_init, + .get_ecc_info = smu_v13_0_6_get_ecc_info, }; void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 81eafed76045..59606a19e3d2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -140,6 +140,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0), MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 0), MSG_MAP(NotifyPowerSource, PPSMC_MSG_NotifyPowerSource, 0), + MSG_MAP(EnableUCLKShadow, PPSMC_MSG_EnableUCLKShadow, 0), }; static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = { @@ -222,6 +223,7 @@ static struct cmn2asic_mapping smu_v13_0_7_table_map[SMU_TABLE_COUNT] = { TAB_MAP(ACTIVITY_MONITOR_COEFF), [SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE}, TAB_MAP(OVERDRIVE), + TAB_MAP(WIFIBAND), }; static struct cmn2asic_mapping smu_v13_0_7_pwr_src_map[SMU_POWER_SOURCE_COUNT] = { @@ -512,6 +514,9 @@ static int smu_v13_0_7_tables_init(struct smu_context *smu) AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_COMBO_PPTABLE, MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + SMU_TABLE_INIT(tables, SMU_TABLE_WIFIBAND, + sizeof(WifiBandEntryTable_t), PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM); smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), GFP_KERNEL); if (!smu_table->metrics_table) @@ -2515,38 +2520,6 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu, return ret; } -static int smu_v13_0_7_baco_enter(struct smu_context *smu) -{ - struct smu_baco_context *smu_baco = &smu->smu_baco; - struct amdgpu_device *adev = smu->adev; - - if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) - return smu_v13_0_baco_set_armd3_sequence(smu, - (smu_baco->maco_support && amdgpu_runtime_pm != 1) ? - BACO_SEQ_BAMACO : BACO_SEQ_BACO); - else - return smu_v13_0_baco_enter(smu); -} - -static int smu_v13_0_7_baco_exit(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - int ret; - - if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) { - /* Wait for PMFW handling for the Dstate change */ - usleep_range(10000, 11000); - ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS); - } else { - ret = smu_v13_0_baco_exit(smu); - } - - if (!ret) - adev->gfx.is_poweron = false; - - return ret; -} - static bool smu_v13_0_7_is_mode1_reset_supported(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; @@ -2567,6 +2540,11 @@ static int smu_v13_0_7_set_df_cstate(struct smu_context *smu, NULL); } +static bool smu_v13_0_7_wbrf_support_check(struct smu_context *smu) +{ + return smu->smc_fw_version > 0x00524600; +} + static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_7_set_default_dpm_table, @@ -2626,15 +2604,16 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, .set_pp_feature_mask = smu_cmn_set_pp_feature_mask, .baco_is_support = smu_v13_0_baco_is_support, - .baco_get_state = smu_v13_0_baco_get_state, - .baco_set_state = smu_v13_0_baco_set_state, - .baco_enter = smu_v13_0_7_baco_enter, - .baco_exit = smu_v13_0_7_baco_exit, + .baco_enter = smu_v13_0_baco_enter, + .baco_exit = smu_v13_0_baco_exit, .mode1_reset_is_support = smu_v13_0_7_is_mode1_reset_supported, .mode1_reset = smu_v13_0_mode1_reset, .set_mp1_state = smu_v13_0_7_set_mp1_state, .set_df_cstate = smu_v13_0_7_set_df_cstate, .gpo_control = smu_v13_0_gpo_control, + .is_asic_wbrf_supported = smu_v13_0_7_wbrf_support_check, + .enable_uclk_shadow = smu_v13_0_enable_uclk_shadow, + .set_wbrf_exclusion_ranges = smu_v13_0_set_wbrf_exclusion_ranges, }; void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index d8f8ad0e7137..4894f7ee737b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -224,7 +224,7 @@ int smu_v14_0_check_fw_version(struct smu_context *smu) if (smu->is_apu) adev->pm.fw_version = smu_version; - switch (adev->ip_versions[MP1_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(14, 0, 2): smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_2; break; @@ -235,7 +235,7 @@ int smu_v14_0_check_fw_version(struct smu_context *smu) break; default: dev_err(adev->dev, "smu unsupported IP version: 0x%x.\n", - adev->ip_versions[MP1_HWIP][0]); + amdgpu_ip_version(adev, MP1_HWIP, 0)); smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_INV; break; } @@ -733,7 +733,7 @@ int smu_v14_0_gfx_off_control(struct smu_context *smu, bool enable) int ret = 0; struct amdgpu_device *adev = smu->adev; - switch (adev->ip_versions[MP1_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(14, 0, 2): case IP_VERSION(14, 0, 0): if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c index 94ccdbfd7090..47fdbae4adfc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c @@ -1085,6 +1085,25 @@ static int smu_v14_0_0_set_umsch_mm_enable(struct smu_context *smu, 0, NULL); } +static int smu_14_0_0_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table) +{ + DpmClocks_t *clk_table = smu->smu_table.clocks_table; + uint8_t idx; + + /* Only the Clock information of SOC and VPE is copied to provide VPE DPM settings for use. */ + for (idx = 0; idx < NUM_SOCCLK_DPM_LEVELS; idx++) { + clock_table->SocClocks[idx].Freq = (idx < clk_table->NumSocClkLevelsEnabled) ? clk_table->SocClocks[idx]:0; + clock_table->SocClocks[idx].Vol = 0; + } + + for (idx = 0; idx < NUM_VPE_DPM_LEVELS; idx++) { + clock_table->VPEClocks[idx].Freq = (idx < clk_table->VpeClkLevelsEnabled) ? clk_table->VPEClocks[idx]:0; + clock_table->VPEClocks[idx].Vol = 0; + } + + return 0; +} + static const struct pptable_funcs smu_v14_0_0_ppt_funcs = { .check_fw_status = smu_v14_0_check_fw_status, .check_fw_version = smu_v14_0_check_fw_version, @@ -1115,6 +1134,7 @@ static const struct pptable_funcs smu_v14_0_0_ppt_funcs = { .set_gfx_power_up_by_imu = smu_v14_0_set_gfx_power_up_by_imu, .dpm_set_vpe_enable = smu_v14_0_0_set_vpe_enable, .dpm_set_umsch_mm_enable = smu_v14_0_0_set_umsch_mm_enable, + .get_dpm_clock_table = smu_14_0_0_get_dpm_table, }; static void smu_v14_0_0_set_smu_mailbox_registers(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h index 64766ac69c53..6f4d212607d7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h @@ -98,6 +98,9 @@ #define smu_set_config_table(smu, config_table) smu_ppt_funcs(set_config_table, -EOPNOTSUPP, smu, config_table) #define smu_init_pptable_microcode(smu) smu_ppt_funcs(init_pptable_microcode, 0, smu) #define smu_notify_rlc_state(smu, en) smu_ppt_funcs(notify_rlc_state, 0, smu, en) +#define smu_is_asic_wbrf_supported(smu) smu_ppt_funcs(is_asic_wbrf_supported, false, smu) +#define smu_enable_uclk_shadow(smu, enable) smu_ppt_funcs(enable_uclk_shadow, 0, smu, enable) +#define smu_set_wbrf_exclusion_ranges(smu, freq_band_range) smu_ppt_funcs(set_wbrf_exclusion_ranges, -EOPNOTSUPP, smu, freq_band_range) #endif #endif diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c index dc01c43f6193..d72c22dcf685 100644 --- a/drivers/gpu/drm/arm/malidp_crtc.c +++ b/drivers/gpu/drm/arm/malidp_crtc.c @@ -221,7 +221,7 @@ static int malidp_crtc_atomic_check_ctm(struct drm_crtc *crtc, /* * The size of the ctm is checked in - * drm_atomic_replace_property_blob_from_id. + * drm_property_replace_blob_from_id. */ ctm = (struct drm_color_ctm *)state->ctm->data; for (i = 0; i < ARRAY_SIZE(ctm->matrix); ++i) { diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c index 15dd667aa2e7..c78687c755a8 100644 --- a/drivers/gpu/drm/armada/armada_crtc.c +++ b/drivers/gpu/drm/armada/armada_crtc.c @@ -7,8 +7,9 @@ #include <linux/clk.h> #include <linux/component.h> #include <linux/module.h> -#include <linux/of_device.h> +#include <linux/of.h> #include <linux/platform_device.h> +#include <linux/property.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> @@ -1012,26 +1013,17 @@ armada_lcd_bind(struct device *dev, struct device *master, void *data) int irq = platform_get_irq(pdev, 0); const struct armada_variant *variant; struct device_node *port = NULL; + struct device_node *np, *parent = dev->of_node; if (irq < 0) return irq; - if (!dev->of_node) { - const struct platform_device_id *id; - id = platform_get_device_id(pdev); - if (!id) - return -ENXIO; - - variant = (const struct armada_variant *)id->driver_data; - } else { - const struct of_device_id *match; - struct device_node *np, *parent = dev->of_node; - - match = of_match_device(dev->driver->of_match_table, dev); - if (!match) - return -ENXIO; + variant = device_get_match_data(dev); + if (!variant) + return -ENXIO; + if (parent) { np = of_get_child_by_name(parent, "ports"); if (np) parent = np; @@ -1041,8 +1033,6 @@ armada_lcd_bind(struct device *dev, struct device *master, void *data) dev_err(dev, "no port node found in %pOF\n", parent); return -ENXIO; } - - variant = match->data; } return armada_drm_crtc_create(drm, dev, res, irq, variant, port); @@ -1066,10 +1056,9 @@ static int armada_lcd_probe(struct platform_device *pdev) return component_add(&pdev->dev, &armada_lcd_ops); } -static int armada_lcd_remove(struct platform_device *pdev) +static void armada_lcd_remove(struct platform_device *pdev) { component_del(&pdev->dev, &armada_lcd_ops); - return 0; } static const struct of_device_id armada_lcd_of_match[] = { @@ -1095,7 +1084,7 @@ MODULE_DEVICE_TABLE(platform, armada_lcd_platform_ids); struct platform_driver armada_lcd_platform_driver = { .probe = armada_lcd_probe, - .remove = armada_lcd_remove, + .remove_new = armada_lcd_remove, .driver = { .name = "armada-lcd", .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c index fa1c67598706..e51ecc4f7ef4 100644 --- a/drivers/gpu/drm/armada/armada_drv.c +++ b/drivers/gpu/drm/armada/armada_drv.c @@ -226,10 +226,9 @@ static int armada_drm_probe(struct platform_device *pdev) match); } -static int armada_drm_remove(struct platform_device *pdev) +static void armada_drm_remove(struct platform_device *pdev) { component_master_del(&pdev->dev, &armada_master_ops); - return 0; } static void armada_drm_shutdown(struct platform_device *pdev) @@ -249,7 +248,7 @@ MODULE_DEVICE_TABLE(platform, armada_drm_platform_ids); static struct platform_driver armada_drm_platform_driver = { .probe = armada_drm_probe, - .remove = armada_drm_remove, + .remove_new = armada_drm_remove, .shutdown = armada_drm_shutdown, .driver = { .name = "armada-drm", diff --git a/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c b/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c index 78122b35a0cb..a7a6b70220eb 100644 --- a/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c +++ b/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c @@ -6,10 +6,10 @@ #include <linux/irq.h> #include <linux/mfd/syscon.h> #include <linux/module.h> -#include <linux/of.h> -#include <linux/of_device.h> +#include <linux/mod_devicetable.h> #include <linux/of_reserved_mem.h> #include <linux/platform_device.h> +#include <linux/property.h> #include <linux/regmap.h> #include <linux/reset.h> @@ -143,7 +143,6 @@ static int aspeed_gfx_load(struct drm_device *drm) struct aspeed_gfx *priv = to_aspeed_gfx(drm); struct device_node *np = pdev->dev.of_node; const struct aspeed_gfx_config *config; - const struct of_device_id *match; struct resource *res; int ret; @@ -152,10 +151,9 @@ static int aspeed_gfx_load(struct drm_device *drm) if (IS_ERR(priv->base)) return PTR_ERR(priv->base); - match = of_match_device(aspeed_gfx_match, &pdev->dev); - if (!match) + config = device_get_match_data(&pdev->dev); + if (!config) return -EINVAL; - config = match->data; priv->dac_reg = config->dac_reg; priv->int_clr_reg = config->int_clear_reg; diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c index cf5b754f044c..90bcb1eb9cd9 100644 --- a/drivers/gpu/drm/ast/ast_drv.c +++ b/drivers/gpu/drm/ast/ast_drv.c @@ -89,11 +89,194 @@ static const struct pci_device_id ast_pciidlist[] = { MODULE_DEVICE_TABLE(pci, ast_pciidlist); +static bool ast_is_vga_enabled(void __iomem *ioregs) +{ + u8 vgaer = __ast_read8(ioregs, AST_IO_VGAER); + + return vgaer & AST_IO_VGAER_VGA_ENABLE; +} + +static void ast_enable_vga(void __iomem *ioregs) +{ + __ast_write8(ioregs, AST_IO_VGAER, AST_IO_VGAER_VGA_ENABLE); + __ast_write8(ioregs, AST_IO_VGAMR_W, AST_IO_VGAMR_IOSEL); +} + +/* + * Run this function as part of the HW device cleanup; not + * when the DRM device gets released. + */ +static void ast_enable_mmio_release(void *data) +{ + void __iomem *ioregs = (void __force __iomem *)data; + + /* enable standard VGA decode */ + __ast_write8_i(ioregs, AST_IO_VGACRI, 0xa1, AST_IO_VGACRA1_MMIO_ENABLED); +} + +static int ast_enable_mmio(struct device *dev, void __iomem *ioregs) +{ + void *data = (void __force *)ioregs; + + __ast_write8_i(ioregs, AST_IO_VGACRI, 0xa1, + AST_IO_VGACRA1_MMIO_ENABLED | + AST_IO_VGACRA1_VGAIO_DISABLED); + + return devm_add_action_or_reset(dev, ast_enable_mmio_release, data); +} + +static void ast_open_key(void __iomem *ioregs) +{ + __ast_write8_i(ioregs, AST_IO_VGACRI, 0x80, AST_IO_VGACR80_PASSWORD); +} + +static int ast_detect_chip(struct pci_dev *pdev, + void __iomem *regs, void __iomem *ioregs, + enum ast_chip *chip_out, + enum ast_config_mode *config_mode_out) +{ + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + enum ast_config_mode config_mode = ast_use_defaults; + uint32_t scu_rev = 0xffffffff; + enum ast_chip chip; + u32 data; + u8 vgacrd0, vgacrd1; + + /* + * Find configuration mode and read SCU revision + */ + + /* Check if we have device-tree properties */ + if (np && !of_property_read_u32(np, "aspeed,scu-revision-id", &data)) { + /* We do, disable P2A access */ + config_mode = ast_use_dt; + scu_rev = data; + } else if (pdev->device == PCI_CHIP_AST2000) { // Not all families have a P2A bridge + /* + * The BMC will set SCU 0x40 D[12] to 1 if the P2 bridge + * is disabled. We force using P2A if VGA only mode bit + * is set D[7] + */ + vgacrd0 = __ast_read8_i(ioregs, AST_IO_VGACRI, 0xd0); + vgacrd1 = __ast_read8_i(ioregs, AST_IO_VGACRI, 0xd1); + if (!(vgacrd0 & 0x80) || !(vgacrd1 & 0x10)) { + + /* + * We have a P2A bridge and it is enabled. + */ + + /* Patch AST2500/AST2510 */ + if ((pdev->revision & 0xf0) == 0x40) { + if (!(vgacrd0 & AST_VRAM_INIT_STATUS_MASK)) + ast_patch_ahb_2500(regs); + } + + /* Double check that it's actually working */ + data = __ast_read32(regs, 0xf004); + if ((data != 0xffffffff) && (data != 0x00)) { + config_mode = ast_use_p2a; + + /* Read SCU7c (silicon revision register) */ + __ast_write32(regs, 0xf004, 0x1e6e0000); + __ast_write32(regs, 0xf000, 0x1); + scu_rev = __ast_read32(regs, 0x1207c); + } + } + } + + switch (config_mode) { + case ast_use_defaults: + dev_info(dev, "Using default configuration\n"); + break; + case ast_use_dt: + dev_info(dev, "Using device-tree for configuration\n"); + break; + case ast_use_p2a: + dev_info(dev, "Using P2A bridge for configuration\n"); + break; + } + + /* + * Identify chipset + */ + + if (pdev->revision >= 0x50) { + chip = AST2600; + dev_info(dev, "AST 2600 detected\n"); + } else if (pdev->revision >= 0x40) { + switch (scu_rev & 0x300) { + case 0x0100: + chip = AST2510; + dev_info(dev, "AST 2510 detected\n"); + break; + default: + chip = AST2500; + dev_info(dev, "AST 2500 detected\n"); + break; + } + } else if (pdev->revision >= 0x30) { + switch (scu_rev & 0x300) { + case 0x0100: + chip = AST1400; + dev_info(dev, "AST 1400 detected\n"); + break; + default: + chip = AST2400; + dev_info(dev, "AST 2400 detected\n"); + break; + } + } else if (pdev->revision >= 0x20) { + switch (scu_rev & 0x300) { + case 0x0000: + chip = AST1300; + dev_info(dev, "AST 1300 detected\n"); + break; + default: + chip = AST2300; + dev_info(dev, "AST 2300 detected\n"); + break; + } + } else if (pdev->revision >= 0x10) { + switch (scu_rev & 0x0300) { + case 0x0200: + chip = AST1100; + dev_info(dev, "AST 1100 detected\n"); + break; + case 0x0100: + chip = AST2200; + dev_info(dev, "AST 2200 detected\n"); + break; + case 0x0000: + chip = AST2150; + dev_info(dev, "AST 2150 detected\n"); + break; + default: + chip = AST2100; + dev_info(dev, "AST 2100 detected\n"); + break; + } + } else { + chip = AST2000; + dev_info(dev, "AST 2000 detected\n"); + } + + *chip_out = chip; + *config_mode_out = config_mode; + + return 0; +} + static int ast_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { - struct ast_device *ast; - struct drm_device *dev; + struct device *dev = &pdev->dev; int ret; + void __iomem *regs; + void __iomem *ioregs; + enum ast_config_mode config_mode; + enum ast_chip chip; + struct drm_device *drm; + bool need_post = false; ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &ast_driver); if (ret) @@ -103,16 +286,80 @@ static int ast_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) return ret; - ast = ast_device_create(&ast_driver, pdev, ent->driver_data); - if (IS_ERR(ast)) - return PTR_ERR(ast); - dev = &ast->base; + regs = pcim_iomap(pdev, 1, 0); + if (!regs) + return -EIO; + + if (pdev->revision >= 0x40) { + /* + * On AST2500 and later models, MMIO is enabled by + * default. Adopt it to be compatible with ARM. + */ + resource_size_t len = pci_resource_len(pdev, 1); + + if (len < AST_IO_MM_OFFSET) + return -EIO; + if ((len - AST_IO_MM_OFFSET) < AST_IO_MM_LENGTH) + return -EIO; + ioregs = regs + AST_IO_MM_OFFSET; + } else if (pci_resource_flags(pdev, 2) & IORESOURCE_IO) { + /* + * Map I/O registers if we have a PCI BAR for I/O. + */ + resource_size_t len = pci_resource_len(pdev, 2); + + if (len < AST_IO_MM_LENGTH) + return -EIO; + ioregs = pcim_iomap(pdev, 2, 0); + if (!ioregs) + return -EIO; + } else { + /* + * Anything else is best effort. + */ + resource_size_t len = pci_resource_len(pdev, 1); + + if (len < AST_IO_MM_OFFSET) + return -EIO; + if ((len - AST_IO_MM_OFFSET) < AST_IO_MM_LENGTH) + return -EIO; + ioregs = regs + AST_IO_MM_OFFSET; + + dev_info(dev, "Platform has no I/O space, using MMIO\n"); + } + + if (!ast_is_vga_enabled(ioregs)) { + dev_info(dev, "VGA not enabled on entry, requesting chip POST\n"); + need_post = true; + } + + /* + * If VGA isn't enabled, we need to enable now or subsequent + * access to the scratch registers will fail. + */ + if (need_post) + ast_enable_vga(ioregs); + /* Enable extended register access */ + ast_open_key(ioregs); + + ret = ast_enable_mmio(dev, ioregs); + if (ret) + return ret; + + ret = ast_detect_chip(pdev, regs, ioregs, &chip, &config_mode); + if (ret) + return ret; + + drm = ast_device_create(pdev, &ast_driver, chip, config_mode, regs, ioregs, need_post); + if (IS_ERR(drm)) + return PTR_ERR(drm); + pci_set_drvdata(pdev, drm); - ret = drm_dev_register(dev, ent->driver_data); + ret = drm_dev_register(drm, ent->driver_data); if (ret) return ret; - drm_fbdev_generic_setup(dev, 32); + drm_fbdev_generic_setup(drm, 32); return 0; } diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index 772f3b049c16..3be5ccf1f5f4 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -98,6 +98,12 @@ enum ast_tx_chip { #define AST_TX_DP501_BIT BIT(AST_TX_DP501) #define AST_TX_ASTDP_BIT BIT(AST_TX_ASTDP) +enum ast_config_mode { + ast_use_p2a, + ast_use_dt, + ast_use_defaults +}; + #define AST_DRAM_512Mx16 0 #define AST_DRAM_1Gx16 1 #define AST_DRAM_512Mx32 2 @@ -192,12 +198,13 @@ to_ast_bmc_connector(struct drm_connector *connector) struct ast_device { struct drm_device base; - struct mutex ioregs_lock; /* Protects access to I/O registers in ioregs */ void __iomem *regs; void __iomem *ioregs; void __iomem *dp501_fw_buf; + enum ast_config_mode config_mode; enum ast_chip chip; + uint32_t dram_bus_width; uint32_t dram_type; uint32_t mclk; @@ -207,6 +214,8 @@ struct ast_device { unsigned long vram_size; unsigned long vram_fb_available; + struct mutex modeset_lock; /* Protects access to modeset I/O registers in ioregs */ + struct ast_plane primary_plane; struct ast_plane cursor_plane; struct drm_crtc crtc; @@ -234,11 +243,6 @@ struct ast_device { } output; bool support_wide_screen; - enum { - ast_use_p2a, - ast_use_dt, - ast_use_defaults - } config_mode; unsigned long tx_chip_types; /* bitfield of enum ast_chip_type */ u8 *dp501_fw_addr; @@ -250,9 +254,13 @@ static inline struct ast_device *to_ast_device(struct drm_device *dev) return container_of(dev, struct ast_device, base); } -struct ast_device *ast_device_create(const struct drm_driver *drv, - struct pci_dev *pdev, - unsigned long flags); +struct drm_device *ast_device_create(struct pci_dev *pdev, + const struct drm_driver *drv, + enum ast_chip chip, + enum ast_config_mode config_mode, + void __iomem *regs, + void __iomem *ioregs, + bool need_post); static inline unsigned long __ast_gen(struct ast_device *ast) { @@ -272,55 +280,94 @@ static inline bool __ast_gen_is_eq(struct ast_device *ast, unsigned long gen) #define IS_AST_GEN6(__ast) __ast_gen_is_eq(__ast, 6) #define IS_AST_GEN7(__ast) __ast_gen_is_eq(__ast, 7) +static inline u8 __ast_read8(const void __iomem *addr, u32 reg) +{ + return ioread8(addr + reg); +} + +static inline u32 __ast_read32(const void __iomem *addr, u32 reg) +{ + return ioread32(addr + reg); +} + +static inline void __ast_write8(void __iomem *addr, u32 reg, u8 val) +{ + iowrite8(val, addr + reg); +} + +static inline void __ast_write32(void __iomem *addr, u32 reg, u32 val) +{ + iowrite32(val, addr + reg); +} + +static inline u8 __ast_read8_i(void __iomem *addr, u32 reg, u8 index) +{ + __ast_write8(addr, reg, index); + return __ast_read8(addr, reg + 1); +} + +static inline u8 __ast_read8_i_masked(void __iomem *addr, u32 reg, u8 index, u8 read_mask) +{ + u8 val = __ast_read8_i(addr, reg, index); + + return val & read_mask; +} + +static inline void __ast_write8_i(void __iomem *addr, u32 reg, u8 index, u8 val) +{ + __ast_write8(addr, reg, index); + __ast_write8(addr, reg + 1, val); +} + +static inline void __ast_write8_i_masked(void __iomem *addr, u32 reg, u8 index, u8 read_mask, + u8 val) +{ + u8 tmp = __ast_read8_i_masked(addr, reg, index, read_mask); + + tmp |= val; + __ast_write8_i(addr, reg, index, tmp); +} + static inline u32 ast_read32(struct ast_device *ast, u32 reg) { - return ioread32(ast->regs + reg); + return __ast_read32(ast->regs, reg); } static inline void ast_write32(struct ast_device *ast, u32 reg, u32 val) { - iowrite32(val, ast->regs + reg); + __ast_write32(ast->regs, reg, val); } static inline u8 ast_io_read8(struct ast_device *ast, u32 reg) { - return ioread8(ast->ioregs + reg); + return __ast_read8(ast->ioregs, reg); } static inline void ast_io_write8(struct ast_device *ast, u32 reg, u8 val) { - iowrite8(val, ast->ioregs + reg); + __ast_write8(ast->ioregs, reg, val); } static inline u8 ast_get_index_reg(struct ast_device *ast, u32 base, u8 index) { - ast_io_write8(ast, base, index); - ++base; - return ast_io_read8(ast, base); + return __ast_read8_i(ast->ioregs, base, index); } static inline u8 ast_get_index_reg_mask(struct ast_device *ast, u32 base, u8 index, u8 preserve_mask) { - u8 val = ast_get_index_reg(ast, base, index); - - return val & preserve_mask; + return __ast_read8_i_masked(ast->ioregs, base, index, preserve_mask); } static inline void ast_set_index_reg(struct ast_device *ast, u32 base, u8 index, u8 val) { - ast_io_write8(ast, base, index); - ++base; - ast_io_write8(ast, base, val); + __ast_write8_i(ast->ioregs, base, index, val); } static inline void ast_set_index_reg_mask(struct ast_device *ast, u32 base, u8 index, u8 preserve_mask, u8 val) { - u8 tmp = ast_get_index_reg_mask(ast, base, index, preserve_mask); - - tmp |= val; - ast_set_index_reg(ast, base, index, tmp); + __ast_write8_i_masked(ast->ioregs, base, index, preserve_mask, val); } #define AST_VIDMEM_SIZE_8M 0x00800000 @@ -442,7 +489,7 @@ int ast_mm_init(struct ast_device *ast); void ast_post_gpu(struct drm_device *dev); u32 ast_mindwm(struct ast_device *ast, u32 r); void ast_moutdwm(struct ast_device *ast, u32 r, u32 v); -void ast_patch_ahb_2500(struct ast_device *ast); +void ast_patch_ahb_2500(void __iomem *regs); /* ast dp501 */ void ast_set_dp501_video_output(struct drm_device *dev, u8 mode); bool ast_backup_fw(struct drm_device *dev, u8 *addr, u32 size); diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index f4ab40e22cea..2f3ad5f949fc 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -35,180 +35,6 @@ #include "ast_drv.h" -static bool ast_is_vga_enabled(struct drm_device *dev) -{ - struct ast_device *ast = to_ast_device(dev); - u8 ch; - - ch = ast_io_read8(ast, AST_IO_VGAER); - - return !!(ch & 0x01); -} - -static void ast_enable_vga(struct drm_device *dev) -{ - struct ast_device *ast = to_ast_device(dev); - - ast_io_write8(ast, AST_IO_VGAER, 0x01); - ast_io_write8(ast, AST_IO_VGAMR_W, 0x01); -} - -/* - * Run this function as part of the HW device cleanup; not - * when the DRM device gets released. - */ -static void ast_enable_mmio_release(void *data) -{ - struct ast_device *ast = data; - - /* enable standard VGA decode */ - ast_set_index_reg(ast, AST_IO_VGACRI, 0xa1, 0x04); -} - -static int ast_enable_mmio(struct ast_device *ast) -{ - struct drm_device *dev = &ast->base; - - ast_set_index_reg(ast, AST_IO_VGACRI, 0xa1, 0x06); - - return devm_add_action_or_reset(dev->dev, ast_enable_mmio_release, ast); -} - -static void ast_open_key(struct ast_device *ast) -{ - ast_set_index_reg(ast, AST_IO_VGACRI, 0x80, 0xA8); -} - -static int ast_device_config_init(struct ast_device *ast) -{ - struct drm_device *dev = &ast->base; - struct pci_dev *pdev = to_pci_dev(dev->dev); - struct device_node *np = dev->dev->of_node; - uint32_t scu_rev = 0xffffffff; - u32 data; - u8 jregd0, jregd1; - - /* - * Find configuration mode and read SCU revision - */ - - ast->config_mode = ast_use_defaults; - - /* Check if we have device-tree properties */ - if (np && !of_property_read_u32(np, "aspeed,scu-revision-id", &data)) { - /* We do, disable P2A access */ - ast->config_mode = ast_use_dt; - scu_rev = data; - } else if (pdev->device == PCI_CHIP_AST2000) { // Not all families have a P2A bridge - /* - * The BMC will set SCU 0x40 D[12] to 1 if the P2 bridge - * is disabled. We force using P2A if VGA only mode bit - * is set D[7] - */ - jregd0 = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd0, 0xff); - jregd1 = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd1, 0xff); - if (!(jregd0 & 0x80) || !(jregd1 & 0x10)) { - - /* - * We have a P2A bridge and it is enabled. - */ - - /* Patch AST2500/AST2510 */ - if ((pdev->revision & 0xf0) == 0x40) { - if (!(jregd0 & AST_VRAM_INIT_STATUS_MASK)) - ast_patch_ahb_2500(ast); - } - - /* Double check that it's actually working */ - data = ast_read32(ast, 0xf004); - if ((data != 0xffffffff) && (data != 0x00)) { - ast->config_mode = ast_use_p2a; - - /* Read SCU7c (silicon revision register) */ - ast_write32(ast, 0xf004, 0x1e6e0000); - ast_write32(ast, 0xf000, 0x1); - scu_rev = ast_read32(ast, 0x1207c); - } - } - } - - switch (ast->config_mode) { - case ast_use_defaults: - drm_info(dev, "Using default configuration\n"); - break; - case ast_use_dt: - drm_info(dev, "Using device-tree for configuration\n"); - break; - case ast_use_p2a: - drm_info(dev, "Using P2A bridge for configuration\n"); - break; - } - - /* - * Identify chipset - */ - - if (pdev->revision >= 0x50) { - ast->chip = AST2600; - drm_info(dev, "AST 2600 detected\n"); - } else if (pdev->revision >= 0x40) { - switch (scu_rev & 0x300) { - case 0x0100: - ast->chip = AST2510; - drm_info(dev, "AST 2510 detected\n"); - break; - default: - ast->chip = AST2500; - drm_info(dev, "AST 2500 detected\n"); - } - } else if (pdev->revision >= 0x30) { - switch (scu_rev & 0x300) { - case 0x0100: - ast->chip = AST1400; - drm_info(dev, "AST 1400 detected\n"); - break; - default: - ast->chip = AST2400; - drm_info(dev, "AST 2400 detected\n"); - } - } else if (pdev->revision >= 0x20) { - switch (scu_rev & 0x300) { - case 0x0000: - ast->chip = AST1300; - drm_info(dev, "AST 1300 detected\n"); - break; - default: - ast->chip = AST2300; - drm_info(dev, "AST 2300 detected\n"); - break; - } - } else if (pdev->revision >= 0x10) { - switch (scu_rev & 0x0300) { - case 0x0200: - ast->chip = AST1100; - drm_info(dev, "AST 1100 detected\n"); - break; - case 0x0100: - ast->chip = AST2200; - drm_info(dev, "AST 2200 detected\n"); - break; - case 0x0000: - ast->chip = AST2150; - drm_info(dev, "AST 2150 detected\n"); - break; - default: - ast->chip = AST2100; - drm_info(dev, "AST 2100 detected\n"); - break; - } - } else { - ast->chip = AST2000; - drm_info(dev, "AST 2000 detected\n"); - } - - return 0; -} - static void ast_detect_widescreen(struct ast_device *ast) { u8 jreg; @@ -424,69 +250,27 @@ static int ast_get_dram_info(struct drm_device *dev) return 0; } -struct ast_device *ast_device_create(const struct drm_driver *drv, - struct pci_dev *pdev, - unsigned long flags) +struct drm_device *ast_device_create(struct pci_dev *pdev, + const struct drm_driver *drv, + enum ast_chip chip, + enum ast_config_mode config_mode, + void __iomem *regs, + void __iomem *ioregs, + bool need_post) { struct drm_device *dev; struct ast_device *ast; - bool need_post = false; - int ret = 0; + int ret; ast = devm_drm_dev_alloc(&pdev->dev, drv, struct ast_device, base); if (IS_ERR(ast)) - return ast; + return ERR_CAST(ast); dev = &ast->base; - pci_set_drvdata(pdev, dev); - - ret = drmm_mutex_init(dev, &ast->ioregs_lock); - if (ret) - return ERR_PTR(ret); - - ast->regs = pcim_iomap(pdev, 1, 0); - if (!ast->regs) - return ERR_PTR(-EIO); - - /* - * After AST2500, MMIO is enabled by default, and it should be adopted - * to be compatible with Arm. - */ - if (pdev->revision >= 0x40) { - ast->ioregs = ast->regs + AST_IO_MM_OFFSET; - } else if (!(pci_resource_flags(pdev, 2) & IORESOURCE_IO)) { - drm_info(dev, "platform has no IO space, trying MMIO\n"); - ast->ioregs = ast->regs + AST_IO_MM_OFFSET; - } - - /* "map" IO regs if the above hasn't done so already */ - if (!ast->ioregs) { - ast->ioregs = pcim_iomap(pdev, 2, 0); - if (!ast->ioregs) - return ERR_PTR(-EIO); - } - - if (!ast_is_vga_enabled(dev)) { - drm_info(dev, "VGA not enabled on entry, requesting chip POST\n"); - need_post = true; - } - - /* - * If VGA isn't enabled, we need to enable now or subsequent - * access to the scratch registers will fail. - */ - if (need_post) - ast_enable_vga(dev); - - /* Enable extended register access */ - ast_open_key(ast); - ret = ast_enable_mmio(ast); - if (ret) - return ERR_PTR(ret); - - ret = ast_device_config_init(ast); - if (ret) - return ERR_PTR(ret); + ast->chip = chip; + ast->config_mode = config_mode; + ast->regs = regs; + ast->ioregs = ioregs; ast_detect_widescreen(ast); ast_detect_tx_chip(ast, need_post); @@ -517,5 +301,5 @@ struct ast_device *ast_device_create(const struct drm_driver *drv, if (ret) return ERR_PTR(ret); - return ast; + return dev; } diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index c20534d0ef7c..a718646a66b8 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -1358,13 +1358,13 @@ static int ast_vga_connector_helper_get_modes(struct drm_connector *connector) * Protect access to I/O registers from concurrent modesetting * by acquiring the I/O-register lock. */ - mutex_lock(&ast->ioregs_lock); + mutex_lock(&ast->modeset_lock); edid = drm_get_edid(connector, &ast_vga_connector->i2c->adapter); if (!edid) goto err_mutex_unlock; - mutex_unlock(&ast->ioregs_lock); + mutex_unlock(&ast->modeset_lock); count = drm_add_edid_modes(connector, edid); kfree(edid); @@ -1372,7 +1372,7 @@ static int ast_vga_connector_helper_get_modes(struct drm_connector *connector) return count; err_mutex_unlock: - mutex_unlock(&ast->ioregs_lock); + mutex_unlock(&ast->modeset_lock); err_drm_connector_update_edid_property: drm_connector_update_edid_property(connector, NULL); return 0; @@ -1464,13 +1464,13 @@ static int ast_sil164_connector_helper_get_modes(struct drm_connector *connector * Protect access to I/O registers from concurrent modesetting * by acquiring the I/O-register lock. */ - mutex_lock(&ast->ioregs_lock); + mutex_lock(&ast->modeset_lock); edid = drm_get_edid(connector, &ast_sil164_connector->i2c->adapter); if (!edid) goto err_mutex_unlock; - mutex_unlock(&ast->ioregs_lock); + mutex_unlock(&ast->modeset_lock); count = drm_add_edid_modes(connector, edid); kfree(edid); @@ -1478,7 +1478,7 @@ static int ast_sil164_connector_helper_get_modes(struct drm_connector *connector return count; err_mutex_unlock: - mutex_unlock(&ast->ioregs_lock); + mutex_unlock(&ast->modeset_lock); err_drm_connector_update_edid_property: drm_connector_update_edid_property(connector, NULL); return 0; @@ -1670,13 +1670,13 @@ static int ast_astdp_connector_helper_get_modes(struct drm_connector *connector) * Protect access to I/O registers from concurrent modesetting * by acquiring the I/O-register lock. */ - mutex_lock(&ast->ioregs_lock); + mutex_lock(&ast->modeset_lock); succ = ast_astdp_read_edid(connector->dev, edid); if (succ < 0) goto err_mutex_unlock; - mutex_unlock(&ast->ioregs_lock); + mutex_unlock(&ast->modeset_lock); drm_connector_update_edid_property(connector, edid); count = drm_add_edid_modes(connector, edid); @@ -1685,7 +1685,7 @@ static int ast_astdp_connector_helper_get_modes(struct drm_connector *connector) return count; err_mutex_unlock: - mutex_unlock(&ast->ioregs_lock); + mutex_unlock(&ast->modeset_lock); kfree(edid); err_drm_connector_update_edid_property: drm_connector_update_edid_property(connector, NULL); @@ -1870,9 +1870,9 @@ static void ast_mode_config_helper_atomic_commit_tail(struct drm_atomic_state *s * display modes. Protect access to I/O registers by acquiring * the I/O-register lock. Released in atomic_flush(). */ - mutex_lock(&ast->ioregs_lock); + mutex_lock(&ast->modeset_lock); drm_atomic_helper_commit_tail_rpm(state); - mutex_unlock(&ast->ioregs_lock); + mutex_unlock(&ast->modeset_lock); } static const struct drm_mode_config_helper_funcs ast_mode_config_helper_funcs = { @@ -1910,6 +1910,10 @@ int ast_mode_config_init(struct ast_device *ast) struct drm_connector *physical_connector = NULL; int ret; + ret = drmm_mutex_init(dev, &ast->modeset_lock); + if (ret) + return ret; + ret = drmm_mode_config_init(dev); if (ret) return ret; diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c index 7a993a384314..22f548805dfb 100644 --- a/drivers/gpu/drm/ast/ast_post.c +++ b/drivers/gpu/drm/ast/ast_post.c @@ -77,28 +77,42 @@ ast_set_def_ext_reg(struct drm_device *dev) ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xb6, 0xff, reg); } -u32 ast_mindwm(struct ast_device *ast, u32 r) +static u32 __ast_mindwm(void __iomem *regs, u32 r) { - uint32_t data; + u32 data; - ast_write32(ast, 0xf004, r & 0xffff0000); - ast_write32(ast, 0xf000, 0x1); + __ast_write32(regs, 0xf004, r & 0xffff0000); + __ast_write32(regs, 0xf000, 0x1); do { - data = ast_read32(ast, 0xf004) & 0xffff0000; + data = __ast_read32(regs, 0xf004) & 0xffff0000; } while (data != (r & 0xffff0000)); - return ast_read32(ast, 0x10000 + (r & 0x0000ffff)); + + return __ast_read32(regs, 0x10000 + (r & 0x0000ffff)); } -void ast_moutdwm(struct ast_device *ast, u32 r, u32 v) +static void __ast_moutdwm(void __iomem *regs, u32 r, u32 v) { - uint32_t data; - ast_write32(ast, 0xf004, r & 0xffff0000); - ast_write32(ast, 0xf000, 0x1); + u32 data; + + __ast_write32(regs, 0xf004, r & 0xffff0000); + __ast_write32(regs, 0xf000, 0x1); + do { - data = ast_read32(ast, 0xf004) & 0xffff0000; + data = __ast_read32(regs, 0xf004) & 0xffff0000; } while (data != (r & 0xffff0000)); - ast_write32(ast, 0x10000 + (r & 0x0000ffff), v); + + __ast_write32(regs, 0x10000 + (r & 0x0000ffff), v); +} + +u32 ast_mindwm(struct ast_device *ast, u32 r) +{ + return __ast_mindwm(ast->regs, r); +} + +void ast_moutdwm(struct ast_device *ast, u32 r, u32 v) +{ + __ast_moutdwm(ast->regs, r, v); } /* @@ -1987,17 +2001,18 @@ static bool ast_dram_init_2500(struct ast_device *ast) return true; } -void ast_patch_ahb_2500(struct ast_device *ast) +void ast_patch_ahb_2500(void __iomem *regs) { - u32 data; + u32 data; /* Clear bus lock condition */ - ast_moutdwm(ast, 0x1e600000, 0xAEED1A03); - ast_moutdwm(ast, 0x1e600084, 0x00010000); - ast_moutdwm(ast, 0x1e600088, 0x00000000); - ast_moutdwm(ast, 0x1e6e2000, 0x1688A8A8); - data = ast_mindwm(ast, 0x1e6e2070); - if (data & 0x08000000) { /* check fast reset */ + __ast_moutdwm(regs, 0x1e600000, 0xAEED1A03); + __ast_moutdwm(regs, 0x1e600084, 0x00010000); + __ast_moutdwm(regs, 0x1e600088, 0x00000000); + __ast_moutdwm(regs, 0x1e6e2000, 0x1688A8A8); + + data = __ast_mindwm(regs, 0x1e6e2070); + if (data & 0x08000000) { /* check fast reset */ /* * If "Fast restet" is enabled for ARM-ICE debugger, * then WDT needs to enable, that @@ -2009,16 +2024,18 @@ void ast_patch_ahb_2500(struct ast_device *ast) * [1]:= 1:WDT will be cleeared and disabled after timeout occurs * [0]:= 1:WDT enable */ - ast_moutdwm(ast, 0x1E785004, 0x00000010); - ast_moutdwm(ast, 0x1E785008, 0x00004755); - ast_moutdwm(ast, 0x1E78500c, 0x00000033); + __ast_moutdwm(regs, 0x1E785004, 0x00000010); + __ast_moutdwm(regs, 0x1E785008, 0x00004755); + __ast_moutdwm(regs, 0x1E78500c, 0x00000033); udelay(1000); } + do { - ast_moutdwm(ast, 0x1e6e2000, 0x1688A8A8); - data = ast_mindwm(ast, 0x1e6e2000); - } while (data != 1); - ast_moutdwm(ast, 0x1e6e207c, 0x08000000); /* clear fast reset */ + __ast_moutdwm(regs, 0x1e6e2000, 0x1688A8A8); + data = __ast_mindwm(regs, 0x1e6e2000); + } while (data != 1); + + __ast_moutdwm(regs, 0x1e6e207c, 0x08000000); /* clear fast reset */ } void ast_post_chip_2500(struct drm_device *dev) @@ -2030,7 +2047,7 @@ void ast_post_chip_2500(struct drm_device *dev) reg = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd0, 0xff); if ((reg & AST_VRAM_INIT_STATUS_MASK) == 0) {/* vga only */ /* Clear bus lock condition */ - ast_patch_ahb_2500(ast); + ast_patch_ahb_2500(ast->regs); /* Disable watchdog */ ast_moutdwm(ast, 0x1E78502C, 0x00000000); diff --git a/drivers/gpu/drm/ast/ast_reg.h b/drivers/gpu/drm/ast/ast_reg.h index 555286ecf520..62dddbf3fe56 100644 --- a/drivers/gpu/drm/ast/ast_reg.h +++ b/drivers/gpu/drm/ast/ast_reg.h @@ -10,10 +10,17 @@ */ #define AST_IO_MM_OFFSET (0x380) +#define AST_IO_MM_LENGTH (128) #define AST_IO_VGAARI_W (0x40) + #define AST_IO_VGAMR_W (0x42) +#define AST_IO_VGAMR_R (0x4c) +#define AST_IO_VGAMR_IOSEL BIT(0) + #define AST_IO_VGAER (0x43) +#define AST_IO_VGAER_VGA_ENABLE BIT(0) + #define AST_IO_VGASRI (0x44) #define AST_IO_VGADRR (0x47) #define AST_IO_VGADWR (0x48) @@ -21,14 +28,15 @@ #define AST_IO_VGAGRI (0x4E) #define AST_IO_VGACRI (0x54) +#define AST_IO_VGACR80_PASSWORD (0xa8) +#define AST_IO_VGACRA1_VGAIO_DISABLED BIT(1) +#define AST_IO_VGACRA1_MMIO_ENABLED BIT(2) #define AST_IO_VGACRCB_HWC_16BPP BIT(0) /* set: ARGB4444, cleared: 2bpp palette */ #define AST_IO_VGACRCB_HWC_ENABLED BIT(1) #define AST_IO_VGAIR1_R (0x5A) #define AST_IO_VGAIR1_VREFRESH BIT(3) -#define AST_IO_VGAMR_R (0x4C) - /* * Display Transmitter Type */ diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 3e6a4e2044c0..efd996f6c138 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -12,6 +12,23 @@ config DRM_PANEL_BRIDGE help DRM bridge wrapper of DRM panels +config DRM_AUX_BRIDGE + tristate + depends on DRM_BRIDGE && OF + select AUXILIARY_BUS + select DRM_PANEL_BRIDGE + help + Simple transparent bridge that is used by several non-DRM drivers to + build bridges chain. + +config DRM_AUX_HPD_BRIDGE + tristate + depends on DRM_BRIDGE && OF + select AUXILIARY_BUS + help + Simple bridge that terminates the bridge chain and provides HPD + support. + menu "Display Interface Bridges" depends on DRM && DRM_BRIDGE diff --git a/drivers/gpu/drm/bridge/Makefile b/drivers/gpu/drm/bridge/Makefile index 2b892b7ed59e..017b5832733b 100644 --- a/drivers/gpu/drm/bridge/Makefile +++ b/drivers/gpu/drm/bridge/Makefile @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_DRM_AUX_BRIDGE) += aux-bridge.o +obj-$(CONFIG_DRM_AUX_HPD_BRIDGE) += aux-hpd-bridge.o obj-$(CONFIG_DRM_CHIPONE_ICN6211) += chipone-icn6211.o obj-$(CONFIG_DRM_CHRONTEL_CH7033) += chrontel-ch7033.o obj-$(CONFIG_DRM_CROS_EC_ANX7688) += cros-ec-anx7688.o diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index 8f740154707d..ef31033439bc 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -1298,10 +1298,32 @@ static void anx7625_config(struct anx7625_data *ctx) XTAL_FRQ_SEL, XTAL_FRQ_27M); } +static int anx7625_hpd_timer_config(struct anx7625_data *ctx) +{ + int ret; + + /* Set irq detect window to 2ms */ + ret = anx7625_reg_write(ctx, ctx->i2c.tx_p2_client, + HPD_DET_TIMER_BIT0_7, HPD_TIME & 0xFF); + ret |= anx7625_reg_write(ctx, ctx->i2c.tx_p2_client, + HPD_DET_TIMER_BIT8_15, + (HPD_TIME >> 8) & 0xFF); + ret |= anx7625_reg_write(ctx, ctx->i2c.tx_p2_client, + HPD_DET_TIMER_BIT16_23, + (HPD_TIME >> 16) & 0xFF); + + return ret; +} + +static int anx7625_read_hpd_gpio_config_status(struct anx7625_data *ctx) +{ + return anx7625_reg_read(ctx, ctx->i2c.rx_p0_client, GPIO_CTRL_2); +} + static void anx7625_disable_pd_protocol(struct anx7625_data *ctx) { struct device *dev = ctx->dev; - int ret; + int ret, val; /* Reset main ocm */ ret = anx7625_reg_write(ctx, ctx->i2c.rx_p0_client, 0x88, 0x40); @@ -1315,6 +1337,19 @@ static void anx7625_disable_pd_protocol(struct anx7625_data *ctx) DRM_DEV_DEBUG_DRIVER(dev, "disable PD feature fail.\n"); else DRM_DEV_DEBUG_DRIVER(dev, "disable PD feature succeeded.\n"); + + /* + * Make sure the HPD GPIO already be configured after OCM release before + * setting HPD detect window register. Here we poll the status register + * at maximum 40ms, then config HPD irq detect window register + */ + readx_poll_timeout(anx7625_read_hpd_gpio_config_status, + ctx, val, + ((val & HPD_SOURCE) || (val < 0)), + 2000, 2000 * 20); + + /* Set HPD irq detect window to 2ms */ + anx7625_hpd_timer_config(ctx); } static int anx7625_ocm_loading_check(struct anx7625_data *ctx) @@ -1437,20 +1472,6 @@ static void anx7625_start_dp_work(struct anx7625_data *ctx) static int anx7625_read_hpd_status_p0(struct anx7625_data *ctx) { - int ret; - - /* Set irq detect window to 2ms */ - ret = anx7625_reg_write(ctx, ctx->i2c.tx_p2_client, - HPD_DET_TIMER_BIT0_7, HPD_TIME & 0xFF); - ret |= anx7625_reg_write(ctx, ctx->i2c.tx_p2_client, - HPD_DET_TIMER_BIT8_15, - (HPD_TIME >> 8) & 0xFF); - ret |= anx7625_reg_write(ctx, ctx->i2c.tx_p2_client, - HPD_DET_TIMER_BIT16_23, - (HPD_TIME >> 16) & 0xFF); - if (ret < 0) - return ret; - return anx7625_reg_read(ctx, ctx->i2c.rx_p0_client, SYSTEM_STSTUS); } @@ -1464,9 +1485,6 @@ static int _anx7625_hpd_polling(struct anx7625_data *ctx, if (ctx->pdata.intp_irq) return 0; - /* Delay 200ms for FW HPD de-bounce */ - msleep(200); - ret = readx_poll_timeout(anx7625_read_hpd_status_p0, ctx, val, ((val & HPD_STATUS) || (val < 0)), diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.h b/drivers/gpu/drm/bridge/analogix/anx7625.h index 5af819611ebc..66ebee7f3d83 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.h +++ b/drivers/gpu/drm/bridge/analogix/anx7625.h @@ -259,6 +259,10 @@ #define AP_MIPI_RX_EN BIT(5) /* 1: MIPI RX input in 0: no RX in */ #define AP_DISABLE_PD BIT(6) #define AP_DISABLE_DISPLAY BIT(7) + +#define GPIO_CTRL_2 0x49 +#define HPD_SOURCE BIT(6) + /***************************************************************/ /* Register definition of device address 0x84 */ #define MIPI_PHY_CONTROL_3 0x03 diff --git a/drivers/gpu/drm/bridge/aux-bridge.c b/drivers/gpu/drm/bridge/aux-bridge.c new file mode 100644 index 000000000000..b29980f95379 --- /dev/null +++ b/drivers/gpu/drm/bridge/aux-bridge.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2023 Linaro Ltd. + * + * Author: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> + */ +#include <linux/auxiliary_bus.h> +#include <linux/module.h> +#include <linux/of.h> + +#include <drm/drm_bridge.h> +#include <drm/bridge/aux-bridge.h> + +static DEFINE_IDA(drm_aux_bridge_ida); + +static void drm_aux_bridge_release(struct device *dev) +{ + struct auxiliary_device *adev = to_auxiliary_dev(dev); + + ida_free(&drm_aux_bridge_ida, adev->id); + + kfree(adev); +} + +static void drm_aux_bridge_unregister_adev(void *_adev) +{ + struct auxiliary_device *adev = _adev; + + auxiliary_device_delete(adev); + auxiliary_device_uninit(adev); +} + +/** + * drm_aux_bridge_register - Create a simple bridge device to link the chain + * @parent: device instance providing this bridge + * + * Creates a simple DRM bridge that doesn't implement any drm_bridge + * operations. Such bridges merely fill a place in the bridge chain linking + * surrounding DRM bridges. + * + * Return: zero on success, negative error code on failure + */ +int drm_aux_bridge_register(struct device *parent) +{ + struct auxiliary_device *adev; + int ret; + + adev = kzalloc(sizeof(*adev), GFP_KERNEL); + if (!adev) + return -ENOMEM; + + ret = ida_alloc(&drm_aux_bridge_ida, GFP_KERNEL); + if (ret < 0) { + kfree(adev); + return ret; + } + + adev->id = ret; + adev->name = "aux_bridge"; + adev->dev.parent = parent; + adev->dev.of_node = of_node_get(parent->of_node); + adev->dev.release = drm_aux_bridge_release; + + ret = auxiliary_device_init(adev); + if (ret) { + ida_free(&drm_aux_bridge_ida, adev->id); + kfree(adev); + return ret; + } + + ret = auxiliary_device_add(adev); + if (ret) { + auxiliary_device_uninit(adev); + return ret; + } + + return devm_add_action_or_reset(parent, drm_aux_bridge_unregister_adev, adev); +} +EXPORT_SYMBOL_GPL(drm_aux_bridge_register); + +struct drm_aux_bridge_data { + struct drm_bridge bridge; + struct drm_bridge *next_bridge; + struct device *dev; +}; + +static int drm_aux_bridge_attach(struct drm_bridge *bridge, + enum drm_bridge_attach_flags flags) +{ + struct drm_aux_bridge_data *data; + + if (!(flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR)) + return -EINVAL; + + data = container_of(bridge, struct drm_aux_bridge_data, bridge); + + return drm_bridge_attach(bridge->encoder, data->next_bridge, bridge, + DRM_BRIDGE_ATTACH_NO_CONNECTOR); +} + +static const struct drm_bridge_funcs drm_aux_bridge_funcs = { + .attach = drm_aux_bridge_attach, +}; + +static int drm_aux_bridge_probe(struct auxiliary_device *auxdev, + const struct auxiliary_device_id *id) +{ + struct drm_aux_bridge_data *data; + + data = devm_kzalloc(&auxdev->dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->dev = &auxdev->dev; + data->next_bridge = devm_drm_of_get_bridge(&auxdev->dev, auxdev->dev.of_node, 0, 0); + if (IS_ERR(data->next_bridge)) + return dev_err_probe(&auxdev->dev, PTR_ERR(data->next_bridge), + "failed to acquire drm_bridge\n"); + + data->bridge.funcs = &drm_aux_bridge_funcs; + data->bridge.of_node = data->dev->of_node; + + return devm_drm_bridge_add(data->dev, &data->bridge); +} + +static const struct auxiliary_device_id drm_aux_bridge_table[] = { + { .name = KBUILD_MODNAME ".aux_bridge" }, + {}, +}; +MODULE_DEVICE_TABLE(auxiliary, drm_aux_bridge_table); + +static struct auxiliary_driver drm_aux_bridge_drv = { + .name = "aux_bridge", + .id_table = drm_aux_bridge_table, + .probe = drm_aux_bridge_probe, +}; +module_auxiliary_driver(drm_aux_bridge_drv); + +MODULE_AUTHOR("Dmitry Baryshkov <dmitry.baryshkov@linaro.org>"); +MODULE_DESCRIPTION("DRM transparent bridge"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c new file mode 100644 index 000000000000..bb55f697a181 --- /dev/null +++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2023 Linaro Ltd. + * + * Author: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> + */ +#include <linux/auxiliary_bus.h> +#include <linux/module.h> +#include <linux/of.h> + +#include <drm/drm_bridge.h> +#include <drm/bridge/aux-bridge.h> + +static DEFINE_IDA(drm_aux_hpd_bridge_ida); + +struct drm_aux_hpd_bridge_data { + struct drm_bridge bridge; + struct device *dev; +}; + +static void drm_aux_hpd_bridge_release(struct device *dev) +{ + struct auxiliary_device *adev = to_auxiliary_dev(dev); + + ida_free(&drm_aux_hpd_bridge_ida, adev->id); + + of_node_put(adev->dev.platform_data); + + kfree(adev); +} + +static void drm_aux_hpd_bridge_unregister_adev(void *_adev) +{ + struct auxiliary_device *adev = _adev; + + auxiliary_device_delete(adev); + auxiliary_device_uninit(adev); +} + +/** + * drm_dp_hpd_bridge_register - Create a simple HPD DisplayPort bridge + * @parent: device instance providing this bridge + * @np: device node pointer corresponding to this bridge instance + * + * Creates a simple DRM bridge with the type set to + * DRM_MODE_CONNECTOR_DisplayPort, which terminates the bridge chain and is + * able to send the HPD events. + * + * Return: device instance that will handle created bridge or an error code + * encoded into the pointer. + */ +struct device *drm_dp_hpd_bridge_register(struct device *parent, + struct device_node *np) +{ + struct auxiliary_device *adev; + int ret; + + adev = kzalloc(sizeof(*adev), GFP_KERNEL); + if (!adev) + return ERR_PTR(-ENOMEM); + + ret = ida_alloc(&drm_aux_hpd_bridge_ida, GFP_KERNEL); + if (ret < 0) { + kfree(adev); + return ERR_PTR(ret); + } + + adev->id = ret; + adev->name = "dp_hpd_bridge"; + adev->dev.parent = parent; + adev->dev.of_node = of_node_get(parent->of_node); + adev->dev.release = drm_aux_hpd_bridge_release; + adev->dev.platform_data = of_node_get(np); + + ret = auxiliary_device_init(adev); + if (ret) { + ida_free(&drm_aux_hpd_bridge_ida, adev->id); + kfree(adev); + return ERR_PTR(ret); + } + + ret = auxiliary_device_add(adev); + if (ret) { + auxiliary_device_uninit(adev); + return ERR_PTR(ret); + } + + ret = devm_add_action_or_reset(parent, drm_aux_hpd_bridge_unregister_adev, adev); + if (ret) + return ERR_PTR(ret); + + return &adev->dev; +} +EXPORT_SYMBOL_GPL(drm_dp_hpd_bridge_register); + +/** + * drm_aux_hpd_bridge_notify - notify hot plug detection events + * @dev: device created for the HPD bridge + * @status: output connection status + * + * A wrapper around drm_bridge_hpd_notify() that is used to report hot plug + * detection events for bridges created via drm_dp_hpd_bridge_register(). + * + * This function shall be called in a context that can sleep. + */ +void drm_aux_hpd_bridge_notify(struct device *dev, enum drm_connector_status status) +{ + struct auxiliary_device *adev = to_auxiliary_dev(dev); + struct drm_aux_hpd_bridge_data *data = auxiliary_get_drvdata(adev); + + if (!data) + return; + + drm_bridge_hpd_notify(&data->bridge, status); +} +EXPORT_SYMBOL_GPL(drm_aux_hpd_bridge_notify); + +static int drm_aux_hpd_bridge_attach(struct drm_bridge *bridge, + enum drm_bridge_attach_flags flags) +{ + return flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR ? 0 : -EINVAL; +} + +static const struct drm_bridge_funcs drm_aux_hpd_bridge_funcs = { + .attach = drm_aux_hpd_bridge_attach, +}; + +static int drm_aux_hpd_bridge_probe(struct auxiliary_device *auxdev, + const struct auxiliary_device_id *id) +{ + struct drm_aux_hpd_bridge_data *data; + + data = devm_kzalloc(&auxdev->dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->dev = &auxdev->dev; + data->bridge.funcs = &drm_aux_hpd_bridge_funcs; + data->bridge.of_node = dev_get_platdata(data->dev); + data->bridge.ops = DRM_BRIDGE_OP_HPD; + data->bridge.type = id->driver_data; + + auxiliary_set_drvdata(auxdev, data); + + return devm_drm_bridge_add(data->dev, &data->bridge); +} + +static const struct auxiliary_device_id drm_aux_hpd_bridge_table[] = { + { .name = KBUILD_MODNAME ".dp_hpd_bridge", .driver_data = DRM_MODE_CONNECTOR_DisplayPort, }, + {}, +}; +MODULE_DEVICE_TABLE(auxiliary, drm_aux_hpd_bridge_table); + +static struct auxiliary_driver drm_aux_hpd_bridge_drv = { + .name = "aux_hpd_bridge", + .id_table = drm_aux_hpd_bridge_table, + .probe = drm_aux_hpd_bridge_probe, +}; +module_auxiliary_driver(drm_aux_hpd_bridge_drv); + +MODULE_AUTHOR("Dmitry Baryshkov <dmitry.baryshkov@linaro.org>"); +MODULE_DESCRIPTION("DRM HPD bridge"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c index 6af565ac307a..7d470527455b 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c +++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c @@ -2596,11 +2596,10 @@ clk_disable: return ret; } -static int cdns_mhdp_remove(struct platform_device *pdev) +static void cdns_mhdp_remove(struct platform_device *pdev) { struct cdns_mhdp_device *mhdp = platform_get_drvdata(pdev); unsigned long timeout = msecs_to_jiffies(100); - bool stop_fw = false; int ret; drm_bridge_remove(&mhdp->bridge); @@ -2608,18 +2607,19 @@ static int cdns_mhdp_remove(struct platform_device *pdev) ret = wait_event_timeout(mhdp->fw_load_wq, mhdp->hw_state == MHDP_HW_READY, timeout); - if (ret == 0) - dev_err(mhdp->dev, "%s: Timeout waiting for fw loading\n", - __func__); - else - stop_fw = true; - spin_lock(&mhdp->start_lock); mhdp->hw_state = MHDP_HW_STOPPED; spin_unlock(&mhdp->start_lock); - if (stop_fw) + if (ret == 0) { + dev_err(mhdp->dev, "%s: Timeout waiting for fw loading\n", + __func__); + } else { ret = cdns_mhdp_set_firmware_active(mhdp, false); + if (ret) + dev_err(mhdp->dev, "Failed to stop firmware (%pe)\n", + ERR_PTR(ret)); + } phy_exit(mhdp->phy); @@ -2634,8 +2634,6 @@ static int cdns_mhdp_remove(struct platform_device *pdev) /* Ignoring mhdp->hdcp.check_work and mhdp->hdcp.prop_work here. */ clk_disable_unprepare(mhdp->clk); - - return ret; } static const struct of_device_id mhdp_ids[] = { @@ -2658,7 +2656,7 @@ static struct platform_driver mhdp_driver = { .of_match_table = mhdp_ids, }, .probe = cdns_mhdp_probe, - .remove = cdns_mhdp_remove, + .remove_new = cdns_mhdp_remove, }; module_platform_driver(mhdp_driver); diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c index 946212a95598..5e3b8edcf794 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c +++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c @@ -403,7 +403,8 @@ static int _cdns_mhdp_hdcp_disable(struct cdns_mhdp_device *mhdp) static int _cdns_mhdp_hdcp_enable(struct cdns_mhdp_device *mhdp, u8 content_type) { - int ret, tries = 3; + int ret = -EINVAL; + int tries = 3; u32 i; for (i = 0; i < tries; i++) { diff --git a/drivers/gpu/drm/bridge/imx/imx93-mipi-dsi.c b/drivers/gpu/drm/bridge/imx/imx93-mipi-dsi.c index 3ff30ce80c5b..2347f8dd632f 100644 --- a/drivers/gpu/drm/bridge/imx/imx93-mipi-dsi.c +++ b/drivers/gpu/drm/bridge/imx/imx93-mipi-dsi.c @@ -226,8 +226,8 @@ dphy_pll_get_configure_from_opts(struct imx93_dsi *dsi, unsigned long fout; unsigned long best_fout = 0; unsigned int fvco_div; - unsigned int min_n, max_n, n, best_n; - unsigned long m, best_m; + unsigned int min_n, max_n, n, best_n = UINT_MAX; + unsigned long m, best_m = 0; unsigned long min_delta = ULONG_MAX; unsigned long delta; u64 tmp; diff --git a/drivers/gpu/drm/bridge/lontium-lt8912b.c b/drivers/gpu/drm/bridge/lontium-lt8912b.c index 03532efb893b..273157428c82 100644 --- a/drivers/gpu/drm/bridge/lontium-lt8912b.c +++ b/drivers/gpu/drm/bridge/lontium-lt8912b.c @@ -43,6 +43,8 @@ struct lt8912 { struct videomode mode; + struct regulator_bulk_data supplies[7]; + u8 data_lanes; bool is_power_on; }; @@ -257,6 +259,12 @@ static int lt8912_free_i2c(struct lt8912 *lt) static int lt8912_hard_power_on(struct lt8912 *lt) { + int ret; + + ret = regulator_bulk_enable(ARRAY_SIZE(lt->supplies), lt->supplies); + if (ret) + return ret; + gpiod_set_value_cansleep(lt->gp_reset, 0); msleep(20); @@ -267,6 +275,9 @@ static void lt8912_hard_power_off(struct lt8912 *lt) { gpiod_set_value_cansleep(lt->gp_reset, 1); msleep(20); + + regulator_bulk_disable(ARRAY_SIZE(lt->supplies), lt->supplies); + lt->is_power_on = false; } @@ -634,6 +645,48 @@ static const struct drm_bridge_funcs lt8912_bridge_funcs = { .get_edid = lt8912_bridge_get_edid, }; +static int lt8912_bridge_resume(struct device *dev) +{ + struct lt8912 *lt = dev_get_drvdata(dev); + int ret; + + ret = lt8912_hard_power_on(lt); + if (ret) + return ret; + + ret = lt8912_soft_power_on(lt); + if (ret) + return ret; + + return lt8912_video_on(lt); +} + +static int lt8912_bridge_suspend(struct device *dev) +{ + struct lt8912 *lt = dev_get_drvdata(dev); + + lt8912_hard_power_off(lt); + + return 0; +} + +static DEFINE_SIMPLE_DEV_PM_OPS(lt8912_bridge_pm_ops, lt8912_bridge_suspend, lt8912_bridge_resume); + +static int lt8912_get_regulators(struct lt8912 *lt) +{ + unsigned int i; + const char * const supply_names[] = { + "vdd", "vccmipirx", "vccsysclk", "vcclvdstx", + "vcchdmitx", "vcclvdspll", "vcchdmipll" + }; + + for (i = 0; i < ARRAY_SIZE(lt->supplies); i++) + lt->supplies[i].supply = supply_names[i]; + + return devm_regulator_bulk_get(lt->dev, ARRAY_SIZE(lt->supplies), + lt->supplies); +} + static int lt8912_parse_dt(struct lt8912 *lt) { struct gpio_desc *gp_reset; @@ -685,6 +738,10 @@ static int lt8912_parse_dt(struct lt8912 *lt) goto err_free_host_node; } + ret = lt8912_get_regulators(lt); + if (ret) + goto err_free_host_node; + of_node_put(port_node); return 0; @@ -770,6 +827,7 @@ static struct i2c_driver lt8912_i2c_driver = { .driver = { .name = "lt8912", .of_match_table = lt8912_dt_match, + .pm = pm_sleep_ptr(<8912_bridge_pm_ops), }, .probe = lt8912_probe, .remove = lt8912_remove, diff --git a/drivers/gpu/drm/bridge/nxp-ptn3460.c b/drivers/gpu/drm/bridge/nxp-ptn3460.c index d81920227a8a..7c0076e49953 100644 --- a/drivers/gpu/drm/bridge/nxp-ptn3460.c +++ b/drivers/gpu/drm/bridge/nxp-ptn3460.c @@ -54,13 +54,13 @@ static int ptn3460_read_bytes(struct ptn3460_bridge *ptn_bridge, char addr, int ret; ret = i2c_master_send(ptn_bridge->client, &addr, 1); - if (ret <= 0) { + if (ret < 0) { DRM_ERROR("Failed to send i2c command, ret=%d\n", ret); return ret; } ret = i2c_master_recv(ptn_bridge->client, buf, len); - if (ret <= 0) { + if (ret < 0) { DRM_ERROR("Failed to recv i2c data, ret=%d\n", ret); return ret; } @@ -78,7 +78,7 @@ static int ptn3460_write_byte(struct ptn3460_bridge *ptn_bridge, char addr, buf[1] = val; ret = i2c_master_send(ptn_bridge->client, buf, ARRAY_SIZE(buf)); - if (ret <= 0) { + if (ret < 0) { DRM_ERROR("Failed to send i2c command, ret=%d\n", ret); return ret; } diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index ef2e373606ba..615cc8f950d7 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -2273,7 +2273,7 @@ static int tc_probe(struct i2c_client *client) } else { if (tc->hpd_pin < 0 || tc->hpd_pin > 1) { dev_err(dev, "failed to parse HPD number\n"); - return ret; + return -EINVAL; } } diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index b5464199b633..62cc3893dca5 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -1415,11 +1415,9 @@ static int ti_sn_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, int ret; if (!pdata->pwm_enabled) { - ret = pm_runtime_get_sync(pdata->dev); - if (ret < 0) { - pm_runtime_put_sync(pdata->dev); + ret = pm_runtime_resume_and_get(chip->dev); + if (ret < 0) return ret; - } } if (state->enabled) { @@ -1433,7 +1431,7 @@ static int ti_sn_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, SN_GPIO_MUX_MASK << (2 * SN_PWM_GPIO_IDX), SN_GPIO_MUX_SPECIAL << (2 * SN_PWM_GPIO_IDX)); if (ret) { - dev_err(pdata->dev, "failed to mux in PWM function\n"); + dev_err(chip->dev, "failed to mux in PWM function\n"); goto out; } } @@ -1509,7 +1507,7 @@ static int ti_sn_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, ret = regmap_write(pdata->regmap, SN_PWM_PRE_DIV_REG, pre_div); if (ret) { - dev_err(pdata->dev, "failed to update PWM_PRE_DIV\n"); + dev_err(chip->dev, "failed to update PWM_PRE_DIV\n"); goto out; } @@ -1521,7 +1519,7 @@ static int ti_sn_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, FIELD_PREP(SN_PWM_INV_MASK, state->polarity == PWM_POLARITY_INVERSED); ret = regmap_write(pdata->regmap, SN_PWM_EN_INV_REG, pwm_en_inv); if (ret) { - dev_err(pdata->dev, "failed to update PWM_EN/PWM_INV\n"); + dev_err(chip->dev, "failed to update PWM_EN/PWM_INV\n"); goto out; } @@ -1529,7 +1527,7 @@ static int ti_sn_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, out: if (!pdata->pwm_enabled) - pm_runtime_put_sync(pdata->dev); + pm_runtime_put_sync(chip->dev); return ret; } @@ -1589,12 +1587,14 @@ static int ti_sn_pwm_probe(struct auxiliary_device *adev, { struct ti_sn65dsi86 *pdata = dev_get_drvdata(adev->dev.parent); - pdata->pchip.dev = pdata->dev; + pdata->pchip.dev = &adev->dev; pdata->pchip.ops = &ti_sn_pwm_ops; pdata->pchip.npwm = 1; pdata->pchip.of_xlate = of_pwm_single_xlate; pdata->pchip.of_pwm_n_cells = 1; + devm_pm_runtime_enable(&adev->dev); + return pwmchip_add(&pdata->pchip); } @@ -1605,7 +1605,7 @@ static void ti_sn_pwm_remove(struct auxiliary_device *adev) pwmchip_remove(&pdata->pchip); if (pdata->pwm_enabled) - pm_runtime_put_sync(pdata->dev); + pm_runtime_put_sync(&adev->dev); } static const struct auxiliary_device_id ti_sn_pwm_id_table[] = { diff --git a/drivers/gpu/drm/bridge/ti-tpd12s015.c b/drivers/gpu/drm/bridge/ti-tpd12s015.c index e0e015243a60..f9fb35683a27 100644 --- a/drivers/gpu/drm/bridge/ti-tpd12s015.c +++ b/drivers/gpu/drm/bridge/ti-tpd12s015.c @@ -179,13 +179,11 @@ static int tpd12s015_probe(struct platform_device *pdev) return 0; } -static int __exit tpd12s015_remove(struct platform_device *pdev) +static void tpd12s015_remove(struct platform_device *pdev) { struct tpd12s015_device *tpd = platform_get_drvdata(pdev); drm_bridge_remove(&tpd->bridge); - - return 0; } static const struct of_device_id tpd12s015_of_match[] = { @@ -197,7 +195,7 @@ MODULE_DEVICE_TABLE(of, tpd12s015_of_match); static struct platform_driver tpd12s015_driver = { .probe = tpd12s015_probe, - .remove = __exit_p(tpd12s015_remove), + .remove_new = tpd12s015_remove, .driver = { .name = "tpd12s015", .of_match_table = tpd12s015_of_match, diff --git a/drivers/gpu/drm/ci/arm64.config b/drivers/gpu/drm/ci/arm64.config index b4f653417883..8dbce9919a57 100644 --- a/drivers/gpu/drm/ci/arm64.config +++ b/drivers/gpu/drm/ci/arm64.config @@ -186,6 +186,7 @@ CONFIG_HW_RANDOM_MTK=y CONFIG_MTK_DEVAPC=y CONFIG_PWM_MTK_DISP=y CONFIG_MTK_CMDQ=y +CONFIG_REGULATOR_DA9211=y # For nouveau. Note that DRM must be a module so that it's loaded after NFS is up to provide the firmware. CONFIG_ARCH_TEGRA=y diff --git a/drivers/gpu/drm/ci/build.sh b/drivers/gpu/drm/ci/build.sh index e5c5dcedd108..f73f3471e94e 100644 --- a/drivers/gpu/drm/ci/build.sh +++ b/drivers/gpu/drm/ci/build.sh @@ -19,7 +19,7 @@ if [[ "$KERNEL_ARCH" = "arm64" ]]; then DEVICE_TREES+=" arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dtb" DEVICE_TREES+=" arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dtb" DEVICE_TREES+=" arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dtb" - DEVICE_TREES+=" arch/arm64/boot/dts/qcom/apq8016-sbc.dtb" + DEVICE_TREES+=" arch/arm64/boot/dts/qcom/apq8016-sbc-usb-host.dtb" DEVICE_TREES+=" arch/arm64/boot/dts/qcom/apq8096-db820c.dtb" DEVICE_TREES+=" arch/arm64/boot/dts/amlogic/meson-g12b-a311d-khadas-vim3.dtb" DEVICE_TREES+=" arch/arm64/boot/dts/mediatek/mt8173-elm-hana.dtb" @@ -58,6 +58,9 @@ git config --global user.email "fdo@example.com" git config --global user.name "freedesktop.org CI" git config --global pull.rebase true +# cleanup git state on the worker +rm -rf .git/rebase-merge + # Try to merge fixes from target repo if [ "$(git ls-remote --exit-code --heads ${UPSTREAM_REPO} ${TARGET_BRANCH}-external-fixes)" ]; then git pull ${UPSTREAM_REPO} ${TARGET_BRANCH}-external-fixes @@ -75,19 +78,19 @@ else fi fi -for opt in $ENABLE_KCONFIGS; do - echo CONFIG_$opt=y >> drivers/gpu/drm/ci/${KERNEL_ARCH}.config -done -for opt in $DISABLE_KCONFIGS; do - echo CONFIG_$opt=n >> drivers/gpu/drm/ci/${KERNEL_ARCH}.config -done - if [[ -n "${MERGE_FRAGMENT}" ]]; then ./scripts/kconfig/merge_config.sh ${DEFCONFIG} drivers/gpu/drm/ci/${MERGE_FRAGMENT} else make `basename ${DEFCONFIG}` fi +for opt in $ENABLE_KCONFIGS; do + ./scripts/config --enable CONFIG_$opt +done +for opt in $DISABLE_KCONFIGS; do + ./scripts/config --disable CONFIG_$opt +done + make ${KERNEL_IMAGE_NAME} mkdir -p /lava-files/ diff --git a/drivers/gpu/drm/ci/gitlab-ci.yml b/drivers/gpu/drm/ci/gitlab-ci.yml index aeb9bab1b069..dac92cc2777c 100644 --- a/drivers/gpu/drm/ci/gitlab-ci.yml +++ b/drivers/gpu/drm/ci/gitlab-ci.yml @@ -5,7 +5,7 @@ variables: UPSTREAM_REPO: git://anongit.freedesktop.org/drm/drm TARGET_BRANCH: drm-next - IGT_VERSION: d1db7333d9c5fbbb05e50b0804123950d9dc1c46 + IGT_VERSION: d2af13d9f5be5ce23d996e4afd3e45990f5ab977 DEQP_RUNNER_GIT_URL: https://gitlab.freedesktop.org/anholt/deqp-runner.git DEQP_RUNNER_GIT_TAG: v0.15.0 diff --git a/drivers/gpu/drm/ci/igt_runner.sh b/drivers/gpu/drm/ci/igt_runner.sh index 2f815ee3a8a3..f1a08b9b146f 100755 --- a/drivers/gpu/drm/ci/igt_runner.sh +++ b/drivers/gpu/drm/ci/igt_runner.sh @@ -15,15 +15,21 @@ cat /sys/kernel/debug/device_component/* ' # Dump drm state to confirm that kernel was able to find a connected display: -# TODO this path might not exist for all drivers.. maybe run modetest instead? set +e cat /sys/kernel/debug/dri/*/state set -e case "$DRIVER_NAME" in - rockchip|mediatek|meson) + rockchip|meson) export IGT_FORCE_DRIVER="panfrost" ;; + mediatek) + if [ "$GPU_VERSION" = "mt8173" ]; then + export IGT_FORCE_DRIVER=${DRIVER_NAME} + elif [ "$GPU_VERSION" = "mt8183" ]; then + export IGT_FORCE_DRIVER="panfrost" + fi + ;; amdgpu) # Cannot use HWCI_KERNEL_MODULES as at that point we don't have the module in /lib mv /install/modules/lib/modules/* /lib/modules/. diff --git a/drivers/gpu/drm/ci/test.yml b/drivers/gpu/drm/ci/test.yml index f285ed67eb3d..2c9a1838e728 100644 --- a/drivers/gpu/drm/ci/test.yml +++ b/drivers/gpu/drm/ci/test.yml @@ -102,15 +102,12 @@ msm:apq8016: stage: msm variables: DRIVER_NAME: msm - BM_DTB: https://${PIPELINE_ARTIFACTS_BASE}/arm64/apq8016-sbc.dtb + BM_DTB: https://${PIPELINE_ARTIFACTS_BASE}/arm64/apq8016-sbc-usb-host.dtb GPU_VERSION: apq8016 BM_CMDLINE: "ip=dhcp console=ttyMSM0,115200n8 $BM_KERNEL_EXTRA_ARGS root=/dev/nfs rw nfsrootdebug nfsroot=,tcp,nfsvers=4.2 init=/init $BM_KERNELARGS" RUNNER_TAG: google-freedreno-db410c script: - ./install/bare-metal/fastboot.sh - rules: - # TODO: current issue: it is not fiding the NFS root. Fix and remove this rule. - - when: never msm:apq8096: extends: @@ -280,9 +277,6 @@ mediatek:mt8173: DEVICE_TYPE: mt8173-elm-hana GPU_VERSION: mt8173 RUNNER_TAG: mesa-ci-x86-64-lava-mt8173-elm-hana - rules: - # TODO: current issue: device is hanging. Fix and remove this rule. - - when: never mediatek:mt8183: extends: @@ -335,11 +329,10 @@ virtio_gpu:none: script: - ln -sf $CI_PROJECT_DIR/install /install - mv install/bzImage /lava-files/bzImage + - mkdir -p $CI_PROJECT_DIR/results + - ln -sf $CI_PROJECT_DIR/results /results - install/crosvm-runner.sh install/igt_runner.sh needs: - debian/x86_64_test-gl - testing:x86_64 - igt:x86_64 - rules: - # TODO: current issue: malloc(): corrupted top size. Fix and remove this rule. - - when: never
\ No newline at end of file diff --git a/drivers/gpu/drm/ci/xfails/mediatek-mt8173-fails.txt b/drivers/gpu/drm/ci/xfails/mediatek-mt8173-fails.txt index 671916067dba..ef0cb7c3698c 100644 --- a/drivers/gpu/drm/ci/xfails/mediatek-mt8173-fails.txt +++ b/drivers/gpu/drm/ci/xfails/mediatek-mt8173-fails.txt @@ -1,5 +1,4 @@ kms_3d,Fail -kms_addfb_basic@addfb25-bad-modifier,Fail kms_bw@linear-tiling-1-displays-1920x1080p,Fail kms_bw@linear-tiling-1-displays-2560x1440p,Fail kms_bw@linear-tiling-1-displays-3840x2160p,Fail @@ -9,13 +8,19 @@ kms_bw@linear-tiling-2-displays-3840x2160p,Fail kms_bw@linear-tiling-3-displays-1920x1080p,Fail kms_bw@linear-tiling-3-displays-2560x1440p,Fail kms_bw@linear-tiling-3-displays-3840x2160p,Fail +kms_color@invalid-gamma-lut-sizes,Fail kms_color@pipe-A-invalid-gamma-lut-sizes,Fail kms_color@pipe-B-invalid-gamma-lut-sizes,Fail -kms_force_connector_basic@force-connector-state,Fail +kms_cursor_legacy@cursor-vs-flip-atomic,Fail +kms_cursor_legacy@cursor-vs-flip-legacy,Fail +kms_flip@flip-vs-modeset-vs-hang,Fail +kms_flip@flip-vs-panning-vs-hang,Fail +kms_flip@flip-vs-suspend,Fail +kms_flip@flip-vs-suspend-interruptible,Fail kms_force_connector_basic@force-edid,Fail kms_force_connector_basic@force-load-detect,Fail kms_force_connector_basic@prune-stale-modes,Fail -kms_invalid_mode@int-max-clock,Fail +kms_hdmi_inject@inject-4k,Fail kms_plane_scaling@planes-upscale-20x20,Fail kms_plane_scaling@planes-upscale-20x20-downscale-factor-0-25,Fail kms_plane_scaling@planes-upscale-20x20-downscale-factor-0-5,Fail @@ -27,3 +32,5 @@ kms_properties@get_properties-sanity-atomic,Fail kms_properties@plane-properties-atomic,Fail kms_properties@plane-properties-legacy,Fail kms_rmfb@close-fd,Fail +kms_selftest@drm_format,Timeout +kms_selftest@drm_format_helper,Timeout diff --git a/drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt b/drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt index 9981682feab2..d39d254c935e 100644 --- a/drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt +++ b/drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt @@ -6,10 +6,15 @@ kms_cursor_legacy@all-pipes-single-bo,Fail kms_cursor_legacy@all-pipes-single-move,Fail kms_cursor_legacy@all-pipes-torture-bo,Fail kms_cursor_legacy@all-pipes-torture-move,Fail +kms_cursor_legacy@forked-bo,Fail +kms_cursor_legacy@forked-move,Fail kms_cursor_legacy@pipe-A-forked-bo,Fail kms_cursor_legacy@pipe-A-forked-move,Fail kms_cursor_legacy@pipe-A-single-bo,Fail kms_cursor_legacy@pipe-A-single-move,Fail kms_cursor_legacy@pipe-A-torture-bo,Fail kms_cursor_legacy@pipe-A-torture-move,Fail +kms_force_connector_basic@force-edid,Fail kms_hdmi_inject@inject-4k,Fail +kms_selftest@drm_format,Timeout +kms_selftest@drm_format_helper,Timeout diff --git a/drivers/gpu/drm/ci/xfails/virtio_gpu-none-fails.txt b/drivers/gpu/drm/ci/xfails/virtio_gpu-none-fails.txt index 9586b2339f6f..007f21e56d89 100644 --- a/drivers/gpu/drm/ci/xfails/virtio_gpu-none-fails.txt +++ b/drivers/gpu/drm/ci/xfails/virtio_gpu-none-fails.txt @@ -10,6 +10,49 @@ kms_bw@linear-tiling-1-displays-3840x2160p,Fail kms_bw@linear-tiling-2-displays-1920x1080p,Fail kms_bw@linear-tiling-2-displays-2560x1440p,Fail kms_bw@linear-tiling-2-displays-3840x2160p,Fail +kms_bw@linear-tiling-3-displays-1920x1080p,Fail +kms_bw@linear-tiling-3-displays-2560x1440p,Fail +kms_bw@linear-tiling-3-displays-3840x2160p,Fail +kms_bw@linear-tiling-4-displays-1920x1080p,Fail +kms_bw@linear-tiling-4-displays-2560x1440p,Fail +kms_bw@linear-tiling-4-displays-3840x2160p,Fail +kms_bw@linear-tiling-5-displays-1920x1080p,Fail +kms_bw@linear-tiling-5-displays-2560x1440p,Fail +kms_bw@linear-tiling-5-displays-3840x2160p,Fail +kms_bw@linear-tiling-6-displays-1920x1080p,Fail +kms_bw@linear-tiling-6-displays-2560x1440p,Fail +kms_bw@linear-tiling-6-displays-3840x2160p,Fail +kms_bw@linear-tiling-7-displays-1920x1080p,Fail +kms_bw@linear-tiling-7-displays-2560x1440p,Fail +kms_bw@linear-tiling-7-displays-3840x2160p,Fail +kms_bw@linear-tiling-8-displays-1920x1080p,Fail +kms_bw@linear-tiling-8-displays-2560x1440p,Fail +kms_bw@linear-tiling-8-displays-3840x2160p,Fail +kms_flip@absolute-wf_vblank,Fail +kms_flip@absolute-wf_vblank-interruptible,Fail +kms_flip@basic-flip-vs-wf_vblank,Fail +kms_flip@blocking-absolute-wf_vblank,Fail +kms_flip@blocking-absolute-wf_vblank-interruptible,Fail +kms_flip@blocking-wf_vblank,Fail +kms_flip@busy-flip,Fail +kms_flip@dpms-vs-vblank-race,Fail +kms_flip@dpms-vs-vblank-race-interruptible,Fail +kms_flip@flip-vs-absolute-wf_vblank,Fail +kms_flip@flip-vs-absolute-wf_vblank-interruptible,Fail +kms_flip@flip-vs-blocking-wf-vblank,Fail +kms_flip@flip-vs-expired-vblank,Fail +kms_flip@flip-vs-expired-vblank-interruptible,Fail +kms_flip@flip-vs-modeset-vs-hang,Fail +kms_flip@flip-vs-panning-vs-hang,Fail +kms_flip@flip-vs-wf_vblank-interruptible,Fail +kms_flip@modeset-vs-vblank-race,Fail +kms_flip@modeset-vs-vblank-race-interruptible,Fail +kms_flip@plain-flip-fb-recreate,Fail +kms_flip@plain-flip-fb-recreate-interruptible,Fail +kms_flip@plain-flip-ts-check,Fail +kms_flip@plain-flip-ts-check-interruptible,Fail +kms_flip@wf_vblank-ts-check,Fail +kms_flip@wf_vblank-ts-check-interruptible,Fail kms_invalid_mode@int-max-clock,Fail kms_plane_scaling@downscale-with-modifier-factor-0-25,Fail kms_plane_scaling@downscale-with-rotation-factor-0-25,Fail @@ -22,6 +65,9 @@ kms_plane_scaling@upscale-with-modifier-factor-0-25,Fail kms_plane_scaling@upscale-with-pixel-format-20x20,Fail kms_plane_scaling@upscale-with-pixel-format-factor-0-25,Fail kms_plane_scaling@upscale-with-rotation-20x20,Fail +kms_selftest@drm_format,Timeout +kms_selftest@drm_format_helper,Timeout +kms_setmode@basic,Fail kms_vblank@crtc-id,Fail kms_vblank@invalid,Fail kms_vblank@pipe-A-accuracy-idle,Fail diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index f3680f4e6970..d72b6f9a352c 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -2245,6 +2245,8 @@ static const struct dpcd_quirk dpcd_quirk_list[] = { { OUI(0x00, 0x00, 0x00), DEVICE_ID('C', 'H', '7', '5', '1', '1'), false, BIT(DP_DPCD_QUIRK_NO_SINK_COUNT) }, /* Synaptics DP1.4 MST hubs can support DSC without virtual DPCD */ { OUI(0x90, 0xCC, 0x24), DEVICE_ID_ANY, true, BIT(DP_DPCD_QUIRK_DSC_WITHOUT_VIRTUAL_DPCD) }, + /* Synaptics DP1.4 MST hubs require DSC for some modes on which it applies HBLANK expansion. */ + { OUI(0x90, 0xCC, 0x24), DEVICE_ID_ANY, true, BIT(DP_DPCD_QUIRK_HBLANK_EXPANSION_REQUIRES_DSC) }, /* Apple MacBookPro 2017 15 inch eDP Retina panel reports too low DP_MAX_LINK_RATE */ { OUI(0x00, 0x10, 0xfa), DEVICE_ID(101, 68, 21, 101, 98, 97), false, BIT(DP_DPCD_QUIRK_CAN_DO_MAX_LINK_RATE_3_24_GBPS) }, }; @@ -2327,6 +2329,33 @@ int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc, EXPORT_SYMBOL(drm_dp_read_desc); /** + * drm_dp_dsc_sink_bpp_incr() - Get bits per pixel increment + * @dsc_dpcd: DSC capabilities from DPCD + * + * Returns the bpp precision supported by the DP sink. + */ +u8 drm_dp_dsc_sink_bpp_incr(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE]) +{ + u8 bpp_increment_dpcd = dsc_dpcd[DP_DSC_BITS_PER_PIXEL_INC - DP_DSC_SUPPORT]; + + switch (bpp_increment_dpcd) { + case DP_DSC_BITS_PER_PIXEL_1_16: + return 16; + case DP_DSC_BITS_PER_PIXEL_1_8: + return 8; + case DP_DSC_BITS_PER_PIXEL_1_4: + return 4; + case DP_DSC_BITS_PER_PIXEL_1_2: + return 2; + case DP_DSC_BITS_PER_PIXEL_1_1: + return 1; + } + + return 0; +} +EXPORT_SYMBOL(drm_dp_dsc_sink_bpp_incr); + +/** * drm_dp_dsc_sink_max_slice_count() - Get the max slice count * supported by the DSC sink. * @dsc_dpcd: DSC capabilities from DPCD @@ -3898,3 +3927,135 @@ int drm_panel_dp_aux_backlight(struct drm_panel *panel, struct drm_dp_aux *aux) EXPORT_SYMBOL(drm_panel_dp_aux_backlight); #endif + +/* See DP Standard v2.1 2.6.4.4.1.1, 2.8.4.4, 2.8.7 */ +static int drm_dp_link_symbol_cycles(int lane_count, int pixels, int bpp_x16, + int symbol_size, bool is_mst) +{ + int cycles = DIV_ROUND_UP(pixels * bpp_x16, 16 * symbol_size * lane_count); + int align = is_mst ? 4 / lane_count : 1; + + return ALIGN(cycles, align); +} + +static int drm_dp_link_dsc_symbol_cycles(int lane_count, int pixels, int slice_count, + int bpp_x16, int symbol_size, bool is_mst) +{ + int slice_pixels = DIV_ROUND_UP(pixels, slice_count); + int slice_data_cycles = drm_dp_link_symbol_cycles(lane_count, slice_pixels, + bpp_x16, symbol_size, is_mst); + int slice_eoc_cycles = is_mst ? 4 / lane_count : 1; + + return slice_count * (slice_data_cycles + slice_eoc_cycles); +} + +/** + * drm_dp_bw_overhead - Calculate the BW overhead of a DP link stream + * @lane_count: DP link lane count + * @hactive: pixel count of the active period in one scanline of the stream + * @dsc_slice_count: DSC slice count if @flags/DRM_DP_LINK_BW_OVERHEAD_DSC is set + * @bpp_x16: bits per pixel in .4 binary fixed point + * @flags: DRM_DP_OVERHEAD_x flags + * + * Calculate the BW allocation overhead of a DP link stream, depending + * on the link's + * - @lane_count + * - SST/MST mode (@flags / %DRM_DP_OVERHEAD_MST) + * - symbol size (@flags / %DRM_DP_OVERHEAD_UHBR) + * - FEC mode (@flags / %DRM_DP_OVERHEAD_FEC) + * - SSC/REF_CLK mode (@flags / %DRM_DP_OVERHEAD_SSC_REF_CLK) + * as well as the stream's + * - @hactive timing + * - @bpp_x16 color depth + * - compression mode (@flags / %DRM_DP_OVERHEAD_DSC). + * Note that this overhead doesn't account for the 8b/10b, 128b/132b + * channel coding efficiency, for that see + * @drm_dp_link_bw_channel_coding_efficiency(). + * + * Returns the overhead as 100% + overhead% in 1ppm units. + */ +int drm_dp_bw_overhead(int lane_count, int hactive, + int dsc_slice_count, + int bpp_x16, unsigned long flags) +{ + int symbol_size = flags & DRM_DP_BW_OVERHEAD_UHBR ? 32 : 8; + bool is_mst = flags & DRM_DP_BW_OVERHEAD_MST; + u32 overhead = 1000000; + int symbol_cycles; + + /* + * DP Standard v2.1 2.6.4.1 + * SSC downspread and ref clock variation margin: + * 5300ppm + 300ppm ~ 0.6% + */ + if (flags & DRM_DP_BW_OVERHEAD_SSC_REF_CLK) + overhead += 6000; + + /* + * DP Standard v2.1 2.6.4.1.1, 3.5.1.5.4: + * FEC symbol insertions for 8b/10b channel coding: + * After each 250 data symbols on 2-4 lanes: + * 250 LL + 5 FEC_PARITY_PH + 1 CD_ADJ (256 byte FEC block) + * After each 2 x 250 data symbols on 1 lane: + * 2 * 250 LL + 11 FEC_PARITY_PH + 1 CD_ADJ (512 byte FEC block) + * After 256 (2-4 lanes) or 128 (1 lane) FEC blocks: + * 256 * 256 bytes + 1 FEC_PM + * or + * 128 * 512 bytes + 1 FEC_PM + * (256 * 6 + 1) / (256 * 250) = 2.4015625 % + */ + if (flags & DRM_DP_BW_OVERHEAD_FEC) + overhead += 24016; + + /* + * DP Standard v2.1 2.7.9, 5.9.7 + * The FEC overhead for UHBR is accounted for in its 96.71% channel + * coding efficiency. + */ + WARN_ON((flags & DRM_DP_BW_OVERHEAD_UHBR) && + (flags & DRM_DP_BW_OVERHEAD_FEC)); + + if (flags & DRM_DP_BW_OVERHEAD_DSC) + symbol_cycles = drm_dp_link_dsc_symbol_cycles(lane_count, hactive, + dsc_slice_count, + bpp_x16, symbol_size, + is_mst); + else + symbol_cycles = drm_dp_link_symbol_cycles(lane_count, hactive, + bpp_x16, symbol_size, + is_mst); + + return DIV_ROUND_UP_ULL(mul_u32_u32(symbol_cycles * symbol_size * lane_count, + overhead * 16), + hactive * bpp_x16); +} +EXPORT_SYMBOL(drm_dp_bw_overhead); + +/** + * drm_dp_bw_channel_coding_efficiency - Get a DP link's channel coding efficiency + * @is_uhbr: Whether the link has a 128b/132b channel coding + * + * Return the channel coding efficiency of the given DP link type, which is + * either 8b/10b or 128b/132b (aka UHBR). The corresponding overhead includes + * the 8b -> 10b, 128b -> 132b pixel data to link symbol conversion overhead + * and for 128b/132b any link or PHY level control symbol insertion overhead + * (LLCP, FEC, PHY sync, see DP Standard v2.1 3.5.2.18). For 8b/10b the + * corresponding FEC overhead is BW allocation specific, included in the value + * returned by drm_dp_bw_overhead(). + * + * Returns the efficiency in the 100%/coding-overhead% ratio in + * 1ppm units. + */ +int drm_dp_bw_channel_coding_efficiency(bool is_uhbr) +{ + if (is_uhbr) + return 967100; + else + /* + * Note that on 8b/10b MST the efficiency is only + * 78.75% due to the 1 out of 64 MTPH packet overhead, + * not accounted for here. + */ + return 800000; +} +EXPORT_SYMBOL(drm_dp_bw_channel_coding_efficiency); diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index 0e0d0e76de06..8ca01a6bf645 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -43,6 +43,7 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_drv.h> #include <drm/drm_edid.h> +#include <drm/drm_fixed.h> #include <drm/drm_print.h> #include <drm/drm_probe_helper.h> @@ -3578,16 +3579,26 @@ static int drm_dp_send_up_ack_reply(struct drm_dp_mst_topology_mgr *mgr, * value is in units of PBNs/(timeslots/1 MTP). This value can be used to * convert the number of PBNs required for a given stream to the number of * timeslots this stream requires in each MTP. + * + * Returns the BW / timeslot value in 20.12 fixed point format. */ -int drm_dp_get_vc_payload_bw(const struct drm_dp_mst_topology_mgr *mgr, - int link_rate, int link_lane_count) +fixed20_12 drm_dp_get_vc_payload_bw(const struct drm_dp_mst_topology_mgr *mgr, + int link_rate, int link_lane_count) { + int ch_coding_efficiency = + drm_dp_bw_channel_coding_efficiency(drm_dp_is_uhbr_rate(link_rate)); + fixed20_12 ret; + if (link_rate == 0 || link_lane_count == 0) drm_dbg_kms(mgr->dev, "invalid link rate/lane count: (%d / %d)\n", link_rate, link_lane_count); - /* See DP v2.0 2.6.4.2, VCPayload_Bandwidth_for_OneTimeSlotPer_MTP_Allocation */ - return link_rate * link_lane_count / 54000; + /* See DP v2.0 2.6.4.2, 2.7.6.3 VCPayload_Bandwidth_for_OneTimeSlotPer_MTP_Allocation */ + ret.full = DIV_ROUND_DOWN_ULL(mul_u32_u32(link_rate * link_lane_count, + ch_coding_efficiency), + (1000000ULL * 8 * 5400) >> 12); + + return ret; } EXPORT_SYMBOL(drm_dp_get_vc_payload_bw); @@ -4335,7 +4346,7 @@ int drm_dp_atomic_find_time_slots(struct drm_atomic_state *state, } } - req_slots = DIV_ROUND_UP(pbn, topology_state->pbn_div); + req_slots = DIV_ROUND_UP(dfixed_const(pbn), topology_state->pbn_div.full); drm_dbg_atomic(mgr->dev, "[CONNECTOR:%d:%s] [MST PORT:%p] TU %d -> %d\n", port->connector->base.id, port->connector->name, @@ -4718,35 +4729,36 @@ EXPORT_SYMBOL(drm_dp_check_act_status); /** * drm_dp_calc_pbn_mode() - Calculate the PBN for a mode. - * @clock: dot clock for the mode - * @bpp: bpp for the mode. - * @dsc: DSC mode. If true, bpp has units of 1/16 of a bit per pixel + * @clock: dot clock + * @bpp: bpp as .4 binary fixed point * * This uses the formula in the spec to calculate the PBN value for a mode. */ -int drm_dp_calc_pbn_mode(int clock, int bpp, bool dsc) +int drm_dp_calc_pbn_mode(int clock, int bpp) { /* - * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006 * The unit of 54/64Mbytes/sec is an arbitrary unit chosen based on * common multiplier to render an integer PBN for all link rate/lane * counts combinations * calculate - * peak_kbps *= (1006/1000) - * peak_kbps *= (64/54) - * peak_kbps *= 8 convert to bytes - * - * If the bpp is in units of 1/16, further divide by 16. Put this - * factor in the numerator rather than the denominator to avoid - * integer overflow + * peak_kbps = clock * bpp / 16 + * peak_kbps *= SSC overhead / 1000000 + * peak_kbps /= 8 convert to Kbytes + * peak_kBps *= (64/54) / 1000 convert to PBN */ + /* + * TODO: Use the actual link and mode parameters to calculate + * the overhead. For now it's assumed that these are + * 4 link lanes, 4096 hactive pixels, which don't add any + * significant data padding overhead and that there is no DSC + * or FEC overhead. + */ + int overhead = drm_dp_bw_overhead(4, 4096, 0, bpp, + DRM_DP_BW_OVERHEAD_MST | + DRM_DP_BW_OVERHEAD_SSC_REF_CLK); - if (dsc) - return DIV_ROUND_UP_ULL(mul_u32_u32(clock * (bpp / 16), 64 * 1006), - 8 * 54 * 1000 * 1000); - - return DIV_ROUND_UP_ULL(mul_u32_u32(clock * bpp, 64 * 1006), - 8 * 54 * 1000 * 1000); + return DIV64_U64_ROUND_UP(mul_u32_u32(clock * bpp, 64 * overhead >> 4), + 1000000ULL * 8 * 54 * 1000); } EXPORT_SYMBOL(drm_dp_calc_pbn_mode); @@ -4871,7 +4883,8 @@ void drm_dp_mst_dump_topology(struct seq_file *m, state = to_drm_dp_mst_topology_state(mgr->base.state); seq_printf(m, "\n*** Atomic state info ***\n"); seq_printf(m, "payload_mask: %x, max_payloads: %d, start_slot: %u, pbn_div: %d\n", - state->payload_mask, mgr->max_payloads, state->start_slot, state->pbn_div); + state->payload_mask, mgr->max_payloads, state->start_slot, + dfixed_trunc(state->pbn_div)); seq_printf(m, "\n| idx | port | vcpi | slots | pbn | dsc | status | sink name |\n"); for (i = 0; i < mgr->max_payloads; i++) { @@ -5136,13 +5149,67 @@ static bool drm_dp_mst_port_downstream_of_branch(struct drm_dp_mst_port *port, return false; } +static bool +drm_dp_mst_port_downstream_of_parent_locked(struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_mst_port *port, + struct drm_dp_mst_port *parent) +{ + if (!mgr->mst_primary) + return false; + + port = drm_dp_mst_topology_get_port_validated_locked(mgr->mst_primary, + port); + if (!port) + return false; + + if (!parent) + return true; + + parent = drm_dp_mst_topology_get_port_validated_locked(mgr->mst_primary, + parent); + if (!parent) + return false; + + if (!parent->mstb) + return false; + + return drm_dp_mst_port_downstream_of_branch(port, parent->mstb); +} + +/** + * drm_dp_mst_port_downstream_of_parent - check if a port is downstream of a parent port + * @mgr: MST topology manager + * @port: the port being looked up + * @parent: the parent port + * + * The function returns %true if @port is downstream of @parent. If @parent is + * %NULL - denoting the root port - the function returns %true if @port is in + * @mgr's topology. + */ +bool +drm_dp_mst_port_downstream_of_parent(struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_mst_port *port, + struct drm_dp_mst_port *parent) +{ + bool ret; + + mutex_lock(&mgr->lock); + ret = drm_dp_mst_port_downstream_of_parent_locked(mgr, port, parent); + mutex_unlock(&mgr->lock); + + return ret; +} +EXPORT_SYMBOL(drm_dp_mst_port_downstream_of_parent); + static int drm_dp_mst_atomic_check_port_bw_limit(struct drm_dp_mst_port *port, - struct drm_dp_mst_topology_state *state); + struct drm_dp_mst_topology_state *state, + struct drm_dp_mst_port **failing_port); static int drm_dp_mst_atomic_check_mstb_bw_limit(struct drm_dp_mst_branch *mstb, - struct drm_dp_mst_topology_state *state) + struct drm_dp_mst_topology_state *state, + struct drm_dp_mst_port **failing_port) { struct drm_dp_mst_atomic_payload *payload; struct drm_dp_mst_port *port; @@ -5171,7 +5238,7 @@ drm_dp_mst_atomic_check_mstb_bw_limit(struct drm_dp_mst_branch *mstb, drm_dbg_atomic(mstb->mgr->dev, "[MSTB:%p] Checking bandwidth limits\n", mstb); list_for_each_entry(port, &mstb->ports, next) { - ret = drm_dp_mst_atomic_check_port_bw_limit(port, state); + ret = drm_dp_mst_atomic_check_port_bw_limit(port, state, failing_port); if (ret < 0) return ret; @@ -5183,7 +5250,8 @@ drm_dp_mst_atomic_check_mstb_bw_limit(struct drm_dp_mst_branch *mstb, static int drm_dp_mst_atomic_check_port_bw_limit(struct drm_dp_mst_port *port, - struct drm_dp_mst_topology_state *state) + struct drm_dp_mst_topology_state *state, + struct drm_dp_mst_port **failing_port) { struct drm_dp_mst_atomic_payload *payload; int pbn_used = 0; @@ -5204,13 +5272,15 @@ drm_dp_mst_atomic_check_port_bw_limit(struct drm_dp_mst_port *port, drm_dbg_atomic(port->mgr->dev, "[MSTB:%p] [MST PORT:%p] no BW available for the port\n", port->parent, port); + *failing_port = port; return -EINVAL; } pbn_used = payload->pbn; } else { pbn_used = drm_dp_mst_atomic_check_mstb_bw_limit(port->mstb, - state); + state, + failing_port); if (pbn_used <= 0) return pbn_used; } @@ -5219,6 +5289,7 @@ drm_dp_mst_atomic_check_port_bw_limit(struct drm_dp_mst_port *port, drm_dbg_atomic(port->mgr->dev, "[MSTB:%p] [MST PORT:%p] required PBN of %d exceeds port limit of %d\n", port->parent, port, pbn_used, port->full_pbn); + *failing_port = port; return -ENOSPC; } @@ -5271,10 +5342,10 @@ drm_dp_mst_atomic_check_payload_alloc_limits(struct drm_dp_mst_topology_mgr *mgr } if (!payload_count) - mst_state->pbn_div = 0; + mst_state->pbn_div.full = dfixed_const(0); drm_dbg_atomic(mgr->dev, "[MST MGR:%p] mst state %p TU pbn_div=%d avail=%d used=%d\n", - mgr, mst_state, mst_state->pbn_div, avail_slots, + mgr, mst_state, dfixed_trunc(mst_state->pbn_div), avail_slots, mst_state->total_avail_slots - avail_slots); return 0; @@ -5397,19 +5468,81 @@ int drm_dp_mst_atomic_enable_dsc(struct drm_atomic_state *state, EXPORT_SYMBOL(drm_dp_mst_atomic_enable_dsc); /** + * drm_dp_mst_atomic_check_mgr - Check the atomic state of an MST topology manager + * @state: The global atomic state + * @mgr: Manager to check + * @mst_state: The MST atomic state for @mgr + * @failing_port: Returns the port with a BW limitation + * + * Checks the given MST manager's topology state for an atomic update to ensure + * that it's valid. This includes checking whether there's enough bandwidth to + * support the new timeslot allocations in the atomic update. + * + * Any atomic drivers supporting DP MST must make sure to call this or + * the drm_dp_mst_atomic_check() function after checking the rest of their state + * in their &drm_mode_config_funcs.atomic_check() callback. + * + * See also: + * drm_dp_mst_atomic_check() + * drm_dp_atomic_find_time_slots() + * drm_dp_atomic_release_time_slots() + * + * Returns: + * - 0 if the new state is valid + * - %-ENOSPC, if the new state is invalid, because of BW limitation + * @failing_port is set to: + * - The non-root port where a BW limit check failed + * with all the ports downstream of @failing_port passing + * the BW limit check. + * The returned port pointer is valid until at least + * one payload downstream of it exists. + * - %NULL if the BW limit check failed at the root port + * with all the ports downstream of the root port passing + * the BW limit check. + * - %-EINVAL, if the new state is invalid, because the root port has + * too many payloads. + */ +int drm_dp_mst_atomic_check_mgr(struct drm_atomic_state *state, + struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_mst_topology_state *mst_state, + struct drm_dp_mst_port **failing_port) +{ + int ret; + + *failing_port = NULL; + + if (!mgr->mst_state) + return 0; + + mutex_lock(&mgr->lock); + ret = drm_dp_mst_atomic_check_mstb_bw_limit(mgr->mst_primary, + mst_state, + failing_port); + mutex_unlock(&mgr->lock); + + if (ret < 0) + return ret; + + return drm_dp_mst_atomic_check_payload_alloc_limits(mgr, mst_state); +} +EXPORT_SYMBOL(drm_dp_mst_atomic_check_mgr); + +/** * drm_dp_mst_atomic_check - Check that the new state of an MST topology in an * atomic update is valid * @state: Pointer to the new &struct drm_dp_mst_topology_state * * Checks the given topology state for an atomic update to ensure that it's - * valid. This includes checking whether there's enough bandwidth to support - * the new timeslot allocations in the atomic update. + * valid, calling drm_dp_mst_atomic_check_mgr() for all MST manager in the + * atomic state. This includes checking whether there's enough bandwidth to + * support the new timeslot allocations in the atomic update. * * Any atomic drivers supporting DP MST must make sure to call this after * checking the rest of their state in their * &drm_mode_config_funcs.atomic_check() callback. * * See also: + * drm_dp_mst_atomic_check_mgr() * drm_dp_atomic_find_time_slots() * drm_dp_atomic_release_time_slots() * @@ -5424,21 +5557,11 @@ int drm_dp_mst_atomic_check(struct drm_atomic_state *state) int i, ret = 0; for_each_new_mst_mgr_in_state(state, mgr, mst_state, i) { - if (!mgr->mst_state) - continue; + struct drm_dp_mst_port *tmp_port; - ret = drm_dp_mst_atomic_check_payload_alloc_limits(mgr, mst_state); + ret = drm_dp_mst_atomic_check_mgr(state, mgr, mst_state, &tmp_port); if (ret) break; - - mutex_lock(&mgr->lock); - ret = drm_dp_mst_atomic_check_mstb_bw_limit(mgr->mst_primary, - mst_state); - mutex_unlock(&mgr->lock); - if (ret < 0) - break; - else - ret = 0; } return ret; @@ -5894,6 +6017,7 @@ static bool drm_dp_mst_is_virtual_dpcd(struct drm_dp_mst_port *port) struct drm_dp_aux *drm_dp_mst_dsc_aux_for_port(struct drm_dp_mst_port *port) { struct drm_dp_mst_port *immediate_upstream_port; + struct drm_dp_aux *immediate_upstream_aux; struct drm_dp_mst_port *fec_port; struct drm_dp_desc desc = {}; u8 endpoint_fec; @@ -5958,21 +6082,25 @@ struct drm_dp_aux *drm_dp_mst_dsc_aux_for_port(struct drm_dp_mst_port *port) * - Port is on primary branch device * - Not a VGA adapter (DP_DWN_STRM_PORT_TYPE_ANALOG) */ - if (drm_dp_read_desc(port->mgr->aux, &desc, true)) + if (immediate_upstream_port) + immediate_upstream_aux = &immediate_upstream_port->aux; + else + immediate_upstream_aux = port->mgr->aux; + + if (drm_dp_read_desc(immediate_upstream_aux, &desc, true)) return NULL; - if (drm_dp_has_quirk(&desc, DP_DPCD_QUIRK_DSC_WITHOUT_VIRTUAL_DPCD) && - port->mgr->dpcd[DP_DPCD_REV] >= DP_DPCD_REV_14 && - port->parent == port->mgr->mst_primary) { + if (drm_dp_has_quirk(&desc, DP_DPCD_QUIRK_DSC_WITHOUT_VIRTUAL_DPCD)) { u8 dpcd_ext[DP_RECEIVER_CAP_SIZE]; - if (drm_dp_read_dpcd_caps(port->mgr->aux, dpcd_ext) < 0) + if (drm_dp_read_dpcd_caps(immediate_upstream_aux, dpcd_ext) < 0) return NULL; - if ((dpcd_ext[DP_DOWNSTREAMPORT_PRESENT] & DP_DWN_STRM_PORT_PRESENT) && + if (dpcd_ext[DP_DPCD_REV] >= DP_DPCD_REV_14 && + ((dpcd_ext[DP_DOWNSTREAMPORT_PRESENT] & DP_DWN_STRM_PORT_PRESENT) && ((dpcd_ext[DP_DOWNSTREAMPORT_PRESENT] & DP_DWN_STRM_PORT_TYPE_MASK) - != DP_DWN_STRM_PORT_TYPE_ANALOG)) - return port->mgr->aux; + != DP_DWN_STRM_PORT_TYPE_ANALOG))) + return immediate_upstream_aux; } /* diff --git a/drivers/gpu/drm/drm_agpsupport.c b/drivers/gpu/drm/drm_agpsupport.c deleted file mode 100644 index a4ad6fd13abc..000000000000 --- a/drivers/gpu/drm/drm_agpsupport.c +++ /dev/null @@ -1,451 +0,0 @@ -/* - * \file drm_agpsupport.c - * DRM support for AGP/GART backend - * - * \author Rickard E. (Rik) Faith <faith@valinux.com> - * \author Gareth Hughes <gareth@valinux.com> - */ - -/* - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/slab.h> - -#if IS_ENABLED(CONFIG_AGP) -#include <asm/agp.h> -#endif - -#include <drm/drm_device.h> -#include <drm/drm_drv.h> -#include <drm/drm_file.h> -#include <drm/drm_print.h> - -#include "drm_legacy.h" - -#if IS_ENABLED(CONFIG_AGP) - -/* - * Get AGP information. - * - * \return zero on success or a negative number on failure. - * - * Verifies the AGP device has been initialized and acquired and fills in the - * drm_agp_info structure with the information in drm_agp_head::agp_info. - */ -int drm_legacy_agp_info(struct drm_device *dev, struct drm_agp_info *info) -{ - struct agp_kern_info *kern; - - if (!dev->agp || !dev->agp->acquired) - return -EINVAL; - - kern = &dev->agp->agp_info; - info->agp_version_major = kern->version.major; - info->agp_version_minor = kern->version.minor; - info->mode = kern->mode; - info->aperture_base = kern->aper_base; - info->aperture_size = kern->aper_size * 1024 * 1024; - info->memory_allowed = kern->max_memory << PAGE_SHIFT; - info->memory_used = kern->current_memory << PAGE_SHIFT; - info->id_vendor = kern->device->vendor; - info->id_device = kern->device->device; - - return 0; -} -EXPORT_SYMBOL(drm_legacy_agp_info); - -int drm_legacy_agp_info_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_agp_info *info = data; - int err; - - err = drm_legacy_agp_info(dev, info); - if (err) - return err; - - return 0; -} - -/* - * Acquire the AGP device. - * - * \param dev DRM device that is to acquire AGP. - * \return zero on success or a negative number on failure. - * - * Verifies the AGP device hasn't been acquired before and calls - * \c agp_backend_acquire. - */ -int drm_legacy_agp_acquire(struct drm_device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev->dev); - - if (!dev->agp) - return -ENODEV; - if (dev->agp->acquired) - return -EBUSY; - dev->agp->bridge = agp_backend_acquire(pdev); - if (!dev->agp->bridge) - return -ENODEV; - dev->agp->acquired = 1; - return 0; -} -EXPORT_SYMBOL(drm_legacy_agp_acquire); - -/* - * Acquire the AGP device (ioctl). - * - * \return zero on success or a negative number on failure. - * - * Verifies the AGP device hasn't been acquired before and calls - * \c agp_backend_acquire. - */ -int drm_legacy_agp_acquire_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - return drm_legacy_agp_acquire((struct drm_device *)file_priv->minor->dev); -} - -/* - * Release the AGP device. - * - * \param dev DRM device that is to release AGP. - * \return zero on success or a negative number on failure. - * - * Verifies the AGP device has been acquired and calls \c agp_backend_release. - */ -int drm_legacy_agp_release(struct drm_device *dev) -{ - if (!dev->agp || !dev->agp->acquired) - return -EINVAL; - agp_backend_release(dev->agp->bridge); - dev->agp->acquired = 0; - return 0; -} -EXPORT_SYMBOL(drm_legacy_agp_release); - -int drm_legacy_agp_release_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - return drm_legacy_agp_release(dev); -} - -/* - * Enable the AGP bus. - * - * \param dev DRM device that has previously acquired AGP. - * \param mode Requested AGP mode. - * \return zero on success or a negative number on failure. - * - * Verifies the AGP device has been acquired but not enabled, and calls - * \c agp_enable. - */ -int drm_legacy_agp_enable(struct drm_device *dev, struct drm_agp_mode mode) -{ - if (!dev->agp || !dev->agp->acquired) - return -EINVAL; - - dev->agp->mode = mode.mode; - agp_enable(dev->agp->bridge, mode.mode); - dev->agp->enabled = 1; - return 0; -} -EXPORT_SYMBOL(drm_legacy_agp_enable); - -int drm_legacy_agp_enable_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_agp_mode *mode = data; - - return drm_legacy_agp_enable(dev, *mode); -} - -/* - * Allocate AGP memory. - * - * \return zero on success or a negative number on failure. - * - * Verifies the AGP device is present and has been acquired, allocates the - * memory via agp_allocate_memory() and creates a drm_agp_mem entry for it. - */ -int drm_legacy_agp_alloc(struct drm_device *dev, struct drm_agp_buffer *request) -{ - struct drm_agp_mem *entry; - struct agp_memory *memory; - unsigned long pages; - u32 type; - - if (!dev->agp || !dev->agp->acquired) - return -EINVAL; - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return -ENOMEM; - - pages = DIV_ROUND_UP(request->size, PAGE_SIZE); - type = (u32) request->type; - memory = agp_allocate_memory(dev->agp->bridge, pages, type); - if (!memory) { - kfree(entry); - return -ENOMEM; - } - - entry->handle = (unsigned long)memory->key + 1; - entry->memory = memory; - entry->bound = 0; - entry->pages = pages; - list_add(&entry->head, &dev->agp->memory); - - request->handle = entry->handle; - request->physical = memory->physical; - - return 0; -} -EXPORT_SYMBOL(drm_legacy_agp_alloc); - - -int drm_legacy_agp_alloc_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_agp_buffer *request = data; - - return drm_legacy_agp_alloc(dev, request); -} - -/* - * Search for the AGP memory entry associated with a handle. - * - * \param dev DRM device structure. - * \param handle AGP memory handle. - * \return pointer to the drm_agp_mem structure associated with \p handle. - * - * Walks through drm_agp_head::memory until finding a matching handle. - */ -static struct drm_agp_mem *drm_legacy_agp_lookup_entry(struct drm_device *dev, - unsigned long handle) -{ - struct drm_agp_mem *entry; - - list_for_each_entry(entry, &dev->agp->memory, head) { - if (entry->handle == handle) - return entry; - } - return NULL; -} - -/* - * Unbind AGP memory from the GATT (ioctl). - * - * \return zero on success or a negative number on failure. - * - * Verifies the AGP device is present and acquired, looks-up the AGP memory - * entry and passes it to the unbind_agp() function. - */ -int drm_legacy_agp_unbind(struct drm_device *dev, struct drm_agp_binding *request) -{ - struct drm_agp_mem *entry; - int ret; - - if (!dev->agp || !dev->agp->acquired) - return -EINVAL; - entry = drm_legacy_agp_lookup_entry(dev, request->handle); - if (!entry || !entry->bound) - return -EINVAL; - ret = agp_unbind_memory(entry->memory); - if (ret == 0) - entry->bound = 0; - return ret; -} -EXPORT_SYMBOL(drm_legacy_agp_unbind); - - -int drm_legacy_agp_unbind_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_agp_binding *request = data; - - return drm_legacy_agp_unbind(dev, request); -} - -/* - * Bind AGP memory into the GATT (ioctl) - * - * \return zero on success or a negative number on failure. - * - * Verifies the AGP device is present and has been acquired and that no memory - * is currently bound into the GATT. Looks-up the AGP memory entry and passes - * it to bind_agp() function. - */ -int drm_legacy_agp_bind(struct drm_device *dev, struct drm_agp_binding *request) -{ - struct drm_agp_mem *entry; - int retcode; - int page; - - if (!dev->agp || !dev->agp->acquired) - return -EINVAL; - entry = drm_legacy_agp_lookup_entry(dev, request->handle); - if (!entry || entry->bound) - return -EINVAL; - page = DIV_ROUND_UP(request->offset, PAGE_SIZE); - retcode = agp_bind_memory(entry->memory, page); - if (retcode) - return retcode; - entry->bound = dev->agp->base + (page << PAGE_SHIFT); - DRM_DEBUG("base = 0x%lx entry->bound = 0x%lx\n", - dev->agp->base, entry->bound); - return 0; -} -EXPORT_SYMBOL(drm_legacy_agp_bind); - - -int drm_legacy_agp_bind_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_agp_binding *request = data; - - return drm_legacy_agp_bind(dev, request); -} - -/* - * Free AGP memory (ioctl). - * - * \return zero on success or a negative number on failure. - * - * Verifies the AGP device is present and has been acquired and looks up the - * AGP memory entry. If the memory is currently bound, unbind it via - * unbind_agp(). Frees it via free_agp() as well as the entry itself - * and unlinks from the doubly linked list it's inserted in. - */ -int drm_legacy_agp_free(struct drm_device *dev, struct drm_agp_buffer *request) -{ - struct drm_agp_mem *entry; - - if (!dev->agp || !dev->agp->acquired) - return -EINVAL; - entry = drm_legacy_agp_lookup_entry(dev, request->handle); - if (!entry) - return -EINVAL; - if (entry->bound) - agp_unbind_memory(entry->memory); - - list_del(&entry->head); - - agp_free_memory(entry->memory); - kfree(entry); - return 0; -} -EXPORT_SYMBOL(drm_legacy_agp_free); - - -int drm_legacy_agp_free_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_agp_buffer *request = data; - - return drm_legacy_agp_free(dev, request); -} - -/* - * Initialize the AGP resources. - * - * \return pointer to a drm_agp_head structure. - * - * Gets the drm_agp_t structure which is made available by the agpgart module - * via the inter_module_* functions. Creates and initializes a drm_agp_head - * structure. - * - * Note that final cleanup of the kmalloced structure is directly done in - * drm_pci_agp_destroy. - */ -struct drm_agp_head *drm_legacy_agp_init(struct drm_device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev->dev); - struct drm_agp_head *head = NULL; - - head = kzalloc(sizeof(*head), GFP_KERNEL); - if (!head) - return NULL; - head->bridge = agp_find_bridge(pdev); - if (!head->bridge) { - head->bridge = agp_backend_acquire(pdev); - if (!head->bridge) { - kfree(head); - return NULL; - } - agp_copy_info(head->bridge, &head->agp_info); - agp_backend_release(head->bridge); - } else { - agp_copy_info(head->bridge, &head->agp_info); - } - if (head->agp_info.chipset == NOT_SUPPORTED) { - kfree(head); - return NULL; - } - INIT_LIST_HEAD(&head->memory); - head->cant_use_aperture = head->agp_info.cant_use_aperture; - head->page_mask = head->agp_info.page_mask; - head->base = head->agp_info.aper_base; - return head; -} -/* Only exported for i810.ko */ -EXPORT_SYMBOL(drm_legacy_agp_init); - -/** - * drm_legacy_agp_clear - Clear AGP resource list - * @dev: DRM device - * - * Iterate over all AGP resources and remove them. But keep the AGP head - * intact so it can still be used. It is safe to call this if AGP is disabled or - * was already removed. - * - * Cleanup is only done for drivers who have DRIVER_LEGACY set. - */ -void drm_legacy_agp_clear(struct drm_device *dev) -{ - struct drm_agp_mem *entry, *tempe; - - if (!dev->agp) - return; - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return; - - list_for_each_entry_safe(entry, tempe, &dev->agp->memory, head) { - if (entry->bound) - agp_unbind_memory(entry->memory); - agp_free_memory(entry->memory); - kfree(entry); - } - INIT_LIST_HEAD(&dev->agp->memory); - - if (dev->agp->acquired) - drm_legacy_agp_release(dev); - - dev->agp->acquired = 0; - dev->agp->enabled = 0; -} - -#endif diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index f1a503aafe5a..a91737adf8e7 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -733,6 +733,7 @@ static void drm_atomic_plane_print_state(struct drm_printer *p, drm_get_color_encoding_name(state->color_encoding)); drm_printf(p, "\tcolor-range=%s\n", drm_get_color_range_name(state->color_range)); + drm_printf(p, "\tcolor_mgmt_changed=%d\n", state->color_mgmt_changed); if (plane->funcs->atomic_print_state) plane->funcs->atomic_print_state(p, state); @@ -1773,6 +1774,7 @@ static void __drm_state_dump(struct drm_device *dev, struct drm_printer *p, struct drm_crtc *crtc; struct drm_connector *connector; struct drm_connector_list_iter conn_iter; + struct drm_private_obj *obj; if (!drm_drv_uses_atomic_modeset(dev)) return; @@ -1801,6 +1803,14 @@ static void __drm_state_dump(struct drm_device *dev, struct drm_printer *p, if (take_locks) drm_modeset_unlock(&dev->mode_config.connection_mutex); drm_connector_list_iter_end(&conn_iter); + + list_for_each_entry(obj, &config->privobj_list, head) { + if (take_locks) + drm_modeset_lock(&obj->lock, NULL); + drm_atomic_private_obj_print_state(p, obj->state); + if (take_locks) + drm_modeset_unlock(&obj->lock); + } } /** diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 68ffcc0b00dc..39ef0a6addeb 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -795,9 +795,9 @@ drm_atomic_helper_check_modeset(struct drm_device *dev, EXPORT_SYMBOL(drm_atomic_helper_check_modeset); /** - * drm_atomic_helper_check_wb_encoder_state() - Check writeback encoder state - * @encoder: encoder state to check - * @conn_state: connector state to check + * drm_atomic_helper_check_wb_connector_state() - Check writeback connector state + * @connector: corresponding connector + * @state: the driver state object * * Checks if the writeback connector state is valid, and returns an error if it * isn't. @@ -806,9 +806,11 @@ EXPORT_SYMBOL(drm_atomic_helper_check_modeset); * Zero for success or -errno */ int -drm_atomic_helper_check_wb_encoder_state(struct drm_encoder *encoder, - struct drm_connector_state *conn_state) +drm_atomic_helper_check_wb_connector_state(struct drm_connector *connector, + struct drm_atomic_state *state) { + struct drm_connector_state *conn_state = + drm_atomic_get_new_connector_state(state, connector); struct drm_writeback_job *wb_job = conn_state->writeback_job; struct drm_property_blob *pixel_format_blob; struct drm_framebuffer *fb; @@ -827,11 +829,11 @@ drm_atomic_helper_check_wb_encoder_state(struct drm_encoder *encoder, if (fb->format->format == formats[i]) return 0; - drm_dbg_kms(encoder->dev, "Invalid pixel format %p4cc\n", &fb->format->format); + drm_dbg_kms(connector->dev, "Invalid pixel format %p4cc\n", &fb->format->format); return -EINVAL; } -EXPORT_SYMBOL(drm_atomic_helper_check_wb_encoder_state); +EXPORT_SYMBOL(drm_atomic_helper_check_wb_connector_state); /** * drm_atomic_helper_check_plane_state() - Check plane state for validity @@ -2382,10 +2384,10 @@ int drm_atomic_helper_setup_commit(struct drm_atomic_state *state, EXPORT_SYMBOL(drm_atomic_helper_setup_commit); /** - * drm_atomic_helper_wait_for_dependencies - wait for required preceeding commits + * drm_atomic_helper_wait_for_dependencies - wait for required preceding commits * @old_state: atomic state object with old state structures * - * This function waits for all preceeding commits that touch the same CRTC as + * This function waits for all preceding commits that touch the same CRTC as * @old_state to both be committed to the hardware (as signalled by * drm_atomic_helper_commit_hw_done()) and executed by the hardware (as signalled * by calling drm_crtc_send_vblank_event() on the &drm_crtc_state.event). diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c index 784e63d70a42..519228eb1095 100644 --- a/drivers/gpu/drm/drm_atomic_state_helper.c +++ b/drivers/gpu/drm/drm_atomic_state_helper.c @@ -275,6 +275,20 @@ void __drm_atomic_helper_plane_state_reset(struct drm_plane_state *plane_state, plane_state->normalized_zpos = val; } } + + if (plane->hotspot_x_property) { + if (!drm_object_property_get_default_value(&plane->base, + plane->hotspot_x_property, + &val)) + plane_state->hotspot_x = val; + } + + if (plane->hotspot_y_property) { + if (!drm_object_property_get_default_value(&plane->base, + plane->hotspot_y_property, + &val)) + plane_state->hotspot_y = val; + } } EXPORT_SYMBOL(__drm_atomic_helper_plane_state_reset); @@ -338,6 +352,7 @@ void __drm_atomic_helper_plane_duplicate_state(struct drm_plane *plane, state->fence = NULL; state->commit = NULL; state->fb_damage_clips = NULL; + state->color_mgmt_changed = false; } EXPORT_SYMBOL(__drm_atomic_helper_plane_duplicate_state); diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 98d3b10c08ae..29d4940188d4 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -362,48 +362,6 @@ static s32 __user *get_out_fence_for_connector(struct drm_atomic_state *state, return fence_ptr; } -static int -drm_atomic_replace_property_blob_from_id(struct drm_device *dev, - struct drm_property_blob **blob, - uint64_t blob_id, - ssize_t expected_size, - ssize_t expected_elem_size, - bool *replaced) -{ - struct drm_property_blob *new_blob = NULL; - - if (blob_id != 0) { - new_blob = drm_property_lookup_blob(dev, blob_id); - if (new_blob == NULL) { - drm_dbg_atomic(dev, - "cannot find blob ID %llu\n", blob_id); - return -EINVAL; - } - - if (expected_size > 0 && - new_blob->length != expected_size) { - drm_dbg_atomic(dev, - "[BLOB:%d] length %zu different from expected %zu\n", - new_blob->base.id, new_blob->length, expected_size); - drm_property_blob_put(new_blob); - return -EINVAL; - } - if (expected_elem_size > 0 && - new_blob->length % expected_elem_size != 0) { - drm_dbg_atomic(dev, - "[BLOB:%d] length %zu not divisible by element size %zu\n", - new_blob->base.id, new_blob->length, expected_elem_size); - drm_property_blob_put(new_blob); - return -EINVAL; - } - } - - *replaced |= drm_property_replace_blob(blob, new_blob); - drm_property_blob_put(new_blob); - - return 0; -} - static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, struct drm_crtc_state *state, struct drm_property *property, uint64_t val) @@ -424,7 +382,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, } else if (property == config->prop_vrr_enabled) { state->vrr_enabled = val; } else if (property == config->degamma_lut_property) { - ret = drm_atomic_replace_property_blob_from_id(dev, + ret = drm_property_replace_blob_from_id(dev, &state->degamma_lut, val, -1, sizeof(struct drm_color_lut), @@ -432,7 +390,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, state->color_mgmt_changed |= replaced; return ret; } else if (property == config->ctm_property) { - ret = drm_atomic_replace_property_blob_from_id(dev, + ret = drm_property_replace_blob_from_id(dev, &state->ctm, val, sizeof(struct drm_color_ctm), -1, @@ -440,7 +398,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, state->color_mgmt_changed |= replaced; return ret; } else if (property == config->gamma_lut_property) { - ret = drm_atomic_replace_property_blob_from_id(dev, + ret = drm_property_replace_blob_from_id(dev, &state->gamma_lut, val, -1, sizeof(struct drm_color_lut), @@ -581,7 +539,7 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane, } else if (property == plane->color_range_property) { state->color_range = val; } else if (property == config->prop_fb_damage_clips) { - ret = drm_atomic_replace_property_blob_from_id(dev, + ret = drm_property_replace_blob_from_id(dev, &state->fb_damage_clips, val, -1, @@ -593,6 +551,22 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane, } else if (plane->funcs->atomic_set_property) { return plane->funcs->atomic_set_property(plane, state, property, val); + } else if (property == plane->hotspot_x_property) { + if (plane->type != DRM_PLANE_TYPE_CURSOR) { + drm_dbg_atomic(plane->dev, + "[PLANE:%d:%s] is not a cursor plane: 0x%llx\n", + plane->base.id, plane->name, val); + return -EINVAL; + } + state->hotspot_x = val; + } else if (property == plane->hotspot_y_property) { + if (plane->type != DRM_PLANE_TYPE_CURSOR) { + drm_dbg_atomic(plane->dev, + "[PLANE:%d:%s] is not a cursor plane: 0x%llx\n", + plane->base.id, plane->name, val); + return -EINVAL; + } + state->hotspot_y = val; } else { drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] unknown property [PROP:%d:%s]\n", @@ -653,6 +627,10 @@ drm_atomic_plane_get_property(struct drm_plane *plane, *val = state->scaling_filter; } else if (plane->funcs->atomic_get_property) { return plane->funcs->atomic_get_property(plane, state, property, val); + } else if (property == plane->hotspot_x_property) { + *val = state->hotspot_x; + } else if (property == plane->hotspot_y_property) { + *val = state->hotspot_y; } else { drm_dbg_atomic(dev, "[PLANE:%d:%s] unknown property [PROP:%d:%s]\n", @@ -758,7 +736,7 @@ static int drm_atomic_connector_set_property(struct drm_connector *connector, if (state->link_status != DRM_LINK_STATUS_GOOD) state->link_status = val; } else if (property == config->hdr_output_metadata_property) { - ret = drm_atomic_replace_property_blob_from_id(dev, + ret = drm_property_replace_blob_from_id(dev, &state->hdr_output_metadata, val, sizeof(struct hdr_output_metadata), -1, @@ -1006,13 +984,28 @@ out: return ret; } +static int drm_atomic_check_prop_changes(int ret, uint64_t old_val, uint64_t prop_value, + struct drm_property *prop) +{ + if (ret != 0 || old_val != prop_value) { + drm_dbg_atomic(prop->dev, + "[PROP:%d:%s] No prop can be changed during async flip\n", + prop->base.id, prop->name); + return -EINVAL; + } + + return 0; +} + int drm_atomic_set_property(struct drm_atomic_state *state, struct drm_file *file_priv, struct drm_mode_object *obj, struct drm_property *prop, - uint64_t prop_value) + u64 prop_value, + bool async_flip) { struct drm_mode_object *ref; + u64 old_val; int ret; if (!drm_property_change_valid_get(prop, prop_value, &ref)) @@ -1029,6 +1022,13 @@ int drm_atomic_set_property(struct drm_atomic_state *state, break; } + if (async_flip) { + ret = drm_atomic_connector_get_property(connector, connector_state, + prop, &old_val); + ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); + break; + } + ret = drm_atomic_connector_set_property(connector, connector_state, file_priv, prop, prop_value); @@ -1044,6 +1044,13 @@ int drm_atomic_set_property(struct drm_atomic_state *state, break; } + if (async_flip) { + ret = drm_atomic_crtc_get_property(crtc, crtc_state, + prop, &old_val); + ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); + break; + } + ret = drm_atomic_crtc_set_property(crtc, crtc_state, prop, prop_value); break; @@ -1051,6 +1058,7 @@ int drm_atomic_set_property(struct drm_atomic_state *state, case DRM_MODE_OBJECT_PLANE: { struct drm_plane *plane = obj_to_plane(obj); struct drm_plane_state *plane_state; + struct drm_mode_config *config = &plane->dev->mode_config; plane_state = drm_atomic_get_plane_state(state, plane); if (IS_ERR(plane_state)) { @@ -1058,6 +1066,21 @@ int drm_atomic_set_property(struct drm_atomic_state *state, break; } + if (async_flip && prop != config->prop_fb_id) { + ret = drm_atomic_plane_get_property(plane, plane_state, + prop, &old_val); + ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); + break; + } + + if (async_flip && plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY) { + drm_dbg_atomic(prop->dev, + "[OBJECT:%d] Only primary planes can be changed during async flip\n", + obj->id); + ret = -EINVAL; + break; + } + ret = drm_atomic_plane_set_property(plane, plane_state, file_priv, prop, prop_value); @@ -1323,6 +1346,18 @@ static void complete_signaling(struct drm_device *dev, kfree(fence_state); } +static void +set_async_flip(struct drm_atomic_state *state) +{ + struct drm_crtc *crtc; + struct drm_crtc_state *crtc_state; + int i; + + for_each_new_crtc_in_state(state, crtc, crtc_state, i) { + crtc_state->async_flip = true; + } +} + int drm_mode_atomic_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { @@ -1337,6 +1372,7 @@ int drm_mode_atomic_ioctl(struct drm_device *dev, struct drm_out_fence_state *fence_state; int ret = 0; unsigned int i, j, num_fences; + bool async_flip = false; /* disallow for drivers not supporting atomic: */ if (!drm_core_check_feature(dev, DRIVER_ATOMIC)) @@ -1363,9 +1399,13 @@ int drm_mode_atomic_ioctl(struct drm_device *dev, } if (arg->flags & DRM_MODE_PAGE_FLIP_ASYNC) { - drm_dbg_atomic(dev, - "commit failed: invalid flag DRM_MODE_PAGE_FLIP_ASYNC\n"); - return -EINVAL; + if (!dev->mode_config.async_page_flip) { + drm_dbg_atomic(dev, + "commit failed: DRM_MODE_PAGE_FLIP_ASYNC not supported\n"); + return -EINVAL; + } + + async_flip = true; } /* can't test and expect an event at the same time. */ @@ -1450,8 +1490,8 @@ retry: goto out; } - ret = drm_atomic_set_property(state, file_priv, - obj, prop, prop_value); + ret = drm_atomic_set_property(state, file_priv, obj, + prop, prop_value, async_flip); if (ret) { drm_mode_object_put(obj); goto out; @@ -1468,6 +1508,9 @@ retry: if (ret) goto out; + if (arg->flags & DRM_MODE_PAGE_FLIP_ASYNC) + set_async_flip(state); + if (arg->flags & DRM_MODE_ATOMIC_TEST_ONLY) { ret = drm_atomic_check_only(state); } else if (arg->flags & DRM_MODE_ATOMIC_NONBLOCK) { diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c index 6899b3dc1f12..22aa015df387 100644 --- a/drivers/gpu/drm/drm_auth.c +++ b/drivers/gpu/drm/drm_auth.c @@ -37,13 +37,12 @@ #include <drm/drm_print.h> #include "drm_internal.h" -#include "drm_legacy.h" /** * DOC: master and authentication * * &struct drm_master is used to track groups of clients with open - * primary/legacy device nodes. For every &struct drm_file which has had at + * primary device nodes. For every &struct drm_file which has had at * least once successfully became the device master (either through the * SET_MASTER IOCTL, or implicitly through opening the primary device node when * no one else is the current master that time) there exists one &drm_master. @@ -139,7 +138,6 @@ struct drm_master *drm_master_create(struct drm_device *dev) return NULL; kref_init(&master->refcount); - drm_master_legacy_init(master); idr_init_base(&master->magic_map, 1); master->dev = dev; @@ -365,8 +363,6 @@ void drm_master_release(struct drm_file *file_priv) if (!drm_is_current_master_locked(file_priv)) goto out; - drm_legacy_lock_master_cleanup(dev, master); - if (dev->master == file_priv->master) drm_drop_master(dev, file_priv); out: @@ -429,8 +425,6 @@ static void drm_master_destroy(struct kref *kref) if (drm_core_check_feature(dev, DRIVER_MODESET)) drm_lease_destroy(master); - drm_legacy_master_rmmaps(dev, master); - idr_destroy(&master->magic_map); idr_destroy(&master->leases); idr_destroy(&master->lessee_idr); diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c index 30d66bee0ec6..cee3188adf3d 100644 --- a/drivers/gpu/drm/drm_bridge.c +++ b/drivers/gpu/drm/drm_bridge.c @@ -1347,50 +1347,6 @@ struct drm_bridge *of_drm_find_bridge(struct device_node *np) EXPORT_SYMBOL(of_drm_find_bridge); #endif -#ifdef CONFIG_DEBUG_FS -static int drm_bridge_chains_info(struct seq_file *m, void *data) -{ - struct drm_debugfs_entry *entry = m->private; - struct drm_device *dev = entry->dev; - struct drm_printer p = drm_seq_file_printer(m); - struct drm_mode_config *config = &dev->mode_config; - struct drm_encoder *encoder; - unsigned int bridge_idx = 0; - - list_for_each_entry(encoder, &config->encoder_list, head) { - struct drm_bridge *bridge; - - drm_printf(&p, "encoder[%u]\n", encoder->base.id); - - drm_for_each_bridge_in_chain(encoder, bridge) { - drm_printf(&p, "\tbridge[%u] type: %u, ops: %#x", - bridge_idx, bridge->type, bridge->ops); - -#ifdef CONFIG_OF - if (bridge->of_node) - drm_printf(&p, ", OF: %pOFfc", bridge->of_node); -#endif - - drm_printf(&p, "\n"); - - bridge_idx++; - } - } - - return 0; -} - -static const struct drm_debugfs_info drm_bridge_debugfs_list[] = { - { "bridge_chains", drm_bridge_chains_info, 0 }, -}; - -void drm_bridge_debugfs_init(struct drm_device *dev) -{ - drm_debugfs_add_files(dev, drm_bridge_debugfs_list, - ARRAY_SIZE(drm_bridge_debugfs_list)); -} -#endif - MODULE_AUTHOR("Ajay Kumar <ajaykumar.rs@samsung.com>"); MODULE_DESCRIPTION("DRM bridge infrastructure"); MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/gpu/drm/drm_bridge_connector.c b/drivers/gpu/drm/drm_bridge_connector.c index 8239ad43aed5..3acd67021ec6 100644 --- a/drivers/gpu/drm/drm_bridge_connector.c +++ b/drivers/gpu/drm/drm_bridge_connector.c @@ -198,12 +198,6 @@ static void drm_bridge_connector_destroy(struct drm_connector *connector) struct drm_bridge_connector *bridge_connector = to_drm_bridge_connector(connector); - if (bridge_connector->bridge_hpd) { - struct drm_bridge *hpd = bridge_connector->bridge_hpd; - - drm_bridge_hpd_disable(hpd); - } - drm_connector_unregister(connector); drm_connector_cleanup(connector); diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c deleted file mode 100644 index 86700560fea2..000000000000 --- a/drivers/gpu/drm/drm_bufs.c +++ /dev/null @@ -1,1627 +0,0 @@ -/* - * Legacy: Generic DRM Buffer Management - * - * Copyright 1999, 2000 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Author: Rickard E. (Rik) Faith <faith@valinux.com> - * Author: Gareth Hughes <gareth@valinux.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include <linux/export.h> -#include <linux/log2.h> -#include <linux/mm.h> -#include <linux/mman.h> -#include <linux/nospec.h> -#include <linux/pci.h> -#include <linux/slab.h> -#include <linux/uaccess.h> -#include <linux/vmalloc.h> - -#include <asm/shmparam.h> - -#include <drm/drm_device.h> -#include <drm/drm_drv.h> -#include <drm/drm_file.h> -#include <drm/drm_print.h> - -#include "drm_legacy.h" - - -static struct drm_map_list *drm_find_matching_map(struct drm_device *dev, - struct drm_local_map *map) -{ - struct drm_map_list *entry; - - list_for_each_entry(entry, &dev->maplist, head) { - /* - * Because the kernel-userspace ABI is fixed at a 32-bit offset - * while PCI resources may live above that, we only compare the - * lower 32 bits of the map offset for maps of type - * _DRM_FRAMEBUFFER or _DRM_REGISTERS. - * It is assumed that if a driver have more than one resource - * of each type, the lower 32 bits are different. - */ - if (!entry->map || - map->type != entry->map->type || - entry->master != dev->master) - continue; - switch (map->type) { - case _DRM_SHM: - if (map->flags != _DRM_CONTAINS_LOCK) - break; - return entry; - case _DRM_REGISTERS: - case _DRM_FRAME_BUFFER: - if ((entry->map->offset & 0xffffffff) == - (map->offset & 0xffffffff)) - return entry; - break; - default: /* Make gcc happy */ - break; - } - if (entry->map->offset == map->offset) - return entry; - } - - return NULL; -} - -static int drm_map_handle(struct drm_device *dev, struct drm_hash_item *hash, - unsigned long user_token, int hashed_handle, int shm) -{ - int use_hashed_handle, shift; - unsigned long add; - -#if (BITS_PER_LONG == 64) - use_hashed_handle = ((user_token & 0xFFFFFFFF00000000UL) || hashed_handle); -#elif (BITS_PER_LONG == 32) - use_hashed_handle = hashed_handle; -#else -#error Unsupported long size. Neither 64 nor 32 bits. -#endif - - if (!use_hashed_handle) { - int ret; - - hash->key = user_token >> PAGE_SHIFT; - ret = drm_ht_insert_item(&dev->map_hash, hash); - if (ret != -EINVAL) - return ret; - } - - shift = 0; - add = DRM_MAP_HASH_OFFSET >> PAGE_SHIFT; - if (shm && (SHMLBA > PAGE_SIZE)) { - int bits = ilog2(SHMLBA >> PAGE_SHIFT) + 1; - - /* For shared memory, we have to preserve the SHMLBA - * bits of the eventual vma->vm_pgoff value during - * mmap(). Otherwise we run into cache aliasing problems - * on some platforms. On these platforms, the pgoff of - * a mmap() request is used to pick a suitable virtual - * address for the mmap() region such that it will not - * cause cache aliasing problems. - * - * Therefore, make sure the SHMLBA relevant bits of the - * hash value we use are equal to those in the original - * kernel virtual address. - */ - shift = bits; - add |= ((user_token >> PAGE_SHIFT) & ((1UL << bits) - 1UL)); - } - - return drm_ht_just_insert_please(&dev->map_hash, hash, - user_token, 32 - PAGE_SHIFT - 3, - shift, add); -} - -/* - * Core function to create a range of memory available for mapping by a - * non-root process. - * - * Adjusts the memory offset to its absolute value according to the mapping - * type. Adds the map to the map list drm_device::maplist. Adds MTRR's where - * applicable and if supported by the kernel. - */ -static int drm_addmap_core(struct drm_device *dev, resource_size_t offset, - unsigned int size, enum drm_map_type type, - enum drm_map_flags flags, - struct drm_map_list **maplist) -{ - struct drm_local_map *map; - struct drm_map_list *list; - unsigned long user_token; - int ret; - - map = kmalloc(sizeof(*map), GFP_KERNEL); - if (!map) - return -ENOMEM; - - map->offset = offset; - map->size = size; - map->flags = flags; - map->type = type; - - /* Only allow shared memory to be removable since we only keep enough - * book keeping information about shared memory to allow for removal - * when processes fork. - */ - if ((map->flags & _DRM_REMOVABLE) && map->type != _DRM_SHM) { - kfree(map); - return -EINVAL; - } - DRM_DEBUG("offset = 0x%08llx, size = 0x%08lx, type = %d\n", - (unsigned long long)map->offset, map->size, map->type); - - /* page-align _DRM_SHM maps. They are allocated here so there is no security - * hole created by that and it works around various broken drivers that use - * a non-aligned quantity to map the SAREA. --BenH - */ - if (map->type == _DRM_SHM) - map->size = PAGE_ALIGN(map->size); - - if ((map->offset & (~(resource_size_t)PAGE_MASK)) || (map->size & (~PAGE_MASK))) { - kfree(map); - return -EINVAL; - } - map->mtrr = -1; - map->handle = NULL; - - switch (map->type) { - case _DRM_REGISTERS: - case _DRM_FRAME_BUFFER: -#if !defined(__sparc__) && !defined(__alpha__) && !defined(__ia64__) && !defined(__powerpc64__) && !defined(__x86_64__) && !defined(__arm__) - if (map->offset + (map->size-1) < map->offset || - map->offset < virt_to_phys(high_memory)) { - kfree(map); - return -EINVAL; - } -#endif - /* Some drivers preinitialize some maps, without the X Server - * needing to be aware of it. Therefore, we just return success - * when the server tries to create a duplicate map. - */ - list = drm_find_matching_map(dev, map); - if (list != NULL) { - if (list->map->size != map->size) { - DRM_DEBUG("Matching maps of type %d with " - "mismatched sizes, (%ld vs %ld)\n", - map->type, map->size, - list->map->size); - list->map->size = map->size; - } - - kfree(map); - *maplist = list; - return 0; - } - - if (map->type == _DRM_FRAME_BUFFER || - (map->flags & _DRM_WRITE_COMBINING)) { - map->mtrr = - arch_phys_wc_add(map->offset, map->size); - } - if (map->type == _DRM_REGISTERS) { - if (map->flags & _DRM_WRITE_COMBINING) - map->handle = ioremap_wc(map->offset, - map->size); - else - map->handle = ioremap(map->offset, map->size); - if (!map->handle) { - kfree(map); - return -ENOMEM; - } - } - - break; - case _DRM_SHM: - list = drm_find_matching_map(dev, map); - if (list != NULL) { - if (list->map->size != map->size) { - DRM_DEBUG("Matching maps of type %d with " - "mismatched sizes, (%ld vs %ld)\n", - map->type, map->size, list->map->size); - list->map->size = map->size; - } - - kfree(map); - *maplist = list; - return 0; - } - map->handle = vmalloc_user(map->size); - DRM_DEBUG("%lu %d %p\n", - map->size, order_base_2(map->size), map->handle); - if (!map->handle) { - kfree(map); - return -ENOMEM; - } - map->offset = (unsigned long)map->handle; - if (map->flags & _DRM_CONTAINS_LOCK) { - /* Prevent a 2nd X Server from creating a 2nd lock */ - if (dev->master->lock.hw_lock != NULL) { - vfree(map->handle); - kfree(map); - return -EBUSY; - } - dev->sigdata.lock = dev->master->lock.hw_lock = map->handle; /* Pointer to lock */ - } - break; - case _DRM_AGP: { - struct drm_agp_mem *entry; - int valid = 0; - - if (!dev->agp) { - kfree(map); - return -EINVAL; - } -#ifdef __alpha__ - map->offset += dev->hose->mem_space->start; -#endif - /* In some cases (i810 driver), user space may have already - * added the AGP base itself, because dev->agp->base previously - * only got set during AGP enable. So, only add the base - * address if the map's offset isn't already within the - * aperture. - */ - if (map->offset < dev->agp->base || - map->offset > dev->agp->base + - dev->agp->agp_info.aper_size * 1024 * 1024 - 1) { - map->offset += dev->agp->base; - } - map->mtrr = dev->agp->agp_mtrr; /* for getmap */ - - /* This assumes the DRM is in total control of AGP space. - * It's not always the case as AGP can be in the control - * of user space (i.e. i810 driver). So this loop will get - * skipped and we double check that dev->agp->memory is - * actually set as well as being invalid before EPERM'ing - */ - list_for_each_entry(entry, &dev->agp->memory, head) { - if ((map->offset >= entry->bound) && - (map->offset + map->size <= entry->bound + entry->pages * PAGE_SIZE)) { - valid = 1; - break; - } - } - if (!list_empty(&dev->agp->memory) && !valid) { - kfree(map); - return -EPERM; - } - DRM_DEBUG("AGP offset = 0x%08llx, size = 0x%08lx\n", - (unsigned long long)map->offset, map->size); - - break; - } - case _DRM_SCATTER_GATHER: - if (!dev->sg) { - kfree(map); - return -EINVAL; - } - map->offset += (unsigned long)dev->sg->virtual; - break; - case _DRM_CONSISTENT: - /* dma_addr_t is 64bit on i386 with CONFIG_HIGHMEM64G, - * As we're limiting the address to 2^32-1 (or less), - * casting it down to 32 bits is no problem, but we - * need to point to a 64bit variable first. - */ - map->handle = dma_alloc_coherent(dev->dev, - map->size, - &map->offset, - GFP_KERNEL); - if (!map->handle) { - kfree(map); - return -ENOMEM; - } - break; - default: - kfree(map); - return -EINVAL; - } - - list = kzalloc(sizeof(*list), GFP_KERNEL); - if (!list) { - if (map->type == _DRM_REGISTERS) - iounmap(map->handle); - kfree(map); - return -EINVAL; - } - list->map = map; - - mutex_lock(&dev->struct_mutex); - list_add(&list->head, &dev->maplist); - - /* Assign a 32-bit handle */ - /* We do it here so that dev->struct_mutex protects the increment */ - user_token = (map->type == _DRM_SHM) ? (unsigned long)map->handle : - map->offset; - ret = drm_map_handle(dev, &list->hash, user_token, 0, - (map->type == _DRM_SHM)); - if (ret) { - if (map->type == _DRM_REGISTERS) - iounmap(map->handle); - kfree(map); - kfree(list); - mutex_unlock(&dev->struct_mutex); - return ret; - } - - list->user_token = list->hash.key << PAGE_SHIFT; - mutex_unlock(&dev->struct_mutex); - - if (!(map->flags & _DRM_DRIVER)) - list->master = dev->master; - *maplist = list; - return 0; -} - -int drm_legacy_addmap(struct drm_device *dev, resource_size_t offset, - unsigned int size, enum drm_map_type type, - enum drm_map_flags flags, struct drm_local_map **map_ptr) -{ - struct drm_map_list *list; - int rc; - - rc = drm_addmap_core(dev, offset, size, type, flags, &list); - if (!rc) - *map_ptr = list->map; - return rc; -} -EXPORT_SYMBOL(drm_legacy_addmap); - -struct drm_local_map *drm_legacy_findmap(struct drm_device *dev, - unsigned int token) -{ - struct drm_map_list *_entry; - - list_for_each_entry(_entry, &dev->maplist, head) - if (_entry->user_token == token) - return _entry->map; - return NULL; -} -EXPORT_SYMBOL(drm_legacy_findmap); - -/* - * Ioctl to specify a range of memory that is available for mapping by a - * non-root process. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg pointer to a drm_map structure. - * \return zero on success or a negative value on error. - * - */ -int drm_legacy_addmap_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_map *map = data; - struct drm_map_list *maplist; - int err; - - if (!(capable(CAP_SYS_ADMIN) || map->type == _DRM_AGP || map->type == _DRM_SHM)) - return -EPERM; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - err = drm_addmap_core(dev, map->offset, map->size, map->type, - map->flags, &maplist); - - if (err) - return err; - - /* avoid a warning on 64-bit, this casting isn't very nice, but the API is set so too late */ - map->handle = (void *)(unsigned long)maplist->user_token; - - /* - * It appears that there are no users of this value whatsoever -- - * drmAddMap just discards it. Let's not encourage its use. - * (Keeping drm_addmap_core's returned mtrr value would be wrong -- - * it's not a real mtrr index anymore.) - */ - map->mtrr = -1; - - return 0; -} - -/* - * Get a mapping information. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument, pointing to a drm_map structure. - * - * \return zero on success or a negative number on failure. - * - * Searches for the mapping with the specified offset and copies its information - * into userspace - */ -int drm_legacy_getmap_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_map *map = data; - struct drm_map_list *r_list = NULL; - struct list_head *list; - int idx; - int i; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - idx = map->offset; - if (idx < 0) - return -EINVAL; - - i = 0; - mutex_lock(&dev->struct_mutex); - list_for_each(list, &dev->maplist) { - if (i == idx) { - r_list = list_entry(list, struct drm_map_list, head); - break; - } - i++; - } - if (!r_list || !r_list->map) { - mutex_unlock(&dev->struct_mutex); - return -EINVAL; - } - - map->offset = r_list->map->offset; - map->size = r_list->map->size; - map->type = r_list->map->type; - map->flags = r_list->map->flags; - map->handle = (void *)(unsigned long) r_list->user_token; - map->mtrr = arch_phys_wc_index(r_list->map->mtrr); - - mutex_unlock(&dev->struct_mutex); - - return 0; -} - -/* - * Remove a map private from list and deallocate resources if the mapping - * isn't in use. - * - * Searches the map on drm_device::maplist, removes it from the list, see if - * it's being used, and free any associated resource (such as MTRR's) if it's not - * being on use. - * - * \sa drm_legacy_addmap - */ -int drm_legacy_rmmap_locked(struct drm_device *dev, struct drm_local_map *map) -{ - struct drm_map_list *r_list = NULL, *list_t; - int found = 0; - struct drm_master *master; - - /* Find the list entry for the map and remove it */ - list_for_each_entry_safe(r_list, list_t, &dev->maplist, head) { - if (r_list->map == map) { - master = r_list->master; - list_del(&r_list->head); - drm_ht_remove_key(&dev->map_hash, - r_list->user_token >> PAGE_SHIFT); - kfree(r_list); - found = 1; - break; - } - } - - if (!found) - return -EINVAL; - - switch (map->type) { - case _DRM_REGISTERS: - iounmap(map->handle); - fallthrough; - case _DRM_FRAME_BUFFER: - arch_phys_wc_del(map->mtrr); - break; - case _DRM_SHM: - vfree(map->handle); - if (master) { - if (dev->sigdata.lock == master->lock.hw_lock) - dev->sigdata.lock = NULL; - master->lock.hw_lock = NULL; /* SHM removed */ - master->lock.file_priv = NULL; - wake_up_interruptible_all(&master->lock.lock_queue); - } - break; - case _DRM_AGP: - case _DRM_SCATTER_GATHER: - break; - case _DRM_CONSISTENT: - dma_free_coherent(dev->dev, - map->size, - map->handle, - map->offset); - break; - } - kfree(map); - - return 0; -} -EXPORT_SYMBOL(drm_legacy_rmmap_locked); - -void drm_legacy_rmmap(struct drm_device *dev, struct drm_local_map *map) -{ - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return; - - mutex_lock(&dev->struct_mutex); - drm_legacy_rmmap_locked(dev, map); - mutex_unlock(&dev->struct_mutex); -} -EXPORT_SYMBOL(drm_legacy_rmmap); - -void drm_legacy_master_rmmaps(struct drm_device *dev, struct drm_master *master) -{ - struct drm_map_list *r_list, *list_temp; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return; - - mutex_lock(&dev->struct_mutex); - list_for_each_entry_safe(r_list, list_temp, &dev->maplist, head) { - if (r_list->master == master) { - drm_legacy_rmmap_locked(dev, r_list->map); - r_list = NULL; - } - } - mutex_unlock(&dev->struct_mutex); -} - -void drm_legacy_rmmaps(struct drm_device *dev) -{ - struct drm_map_list *r_list, *list_temp; - - list_for_each_entry_safe(r_list, list_temp, &dev->maplist, head) - drm_legacy_rmmap(dev, r_list->map); -} - -/* The rmmap ioctl appears to be unnecessary. All mappings are torn down on - * the last close of the device, and this is necessary for cleanup when things - * exit uncleanly. Therefore, having userland manually remove mappings seems - * like a pointless exercise since they're going away anyway. - * - * One use case might be after addmap is allowed for normal users for SHM and - * gets used by drivers that the server doesn't need to care about. This seems - * unlikely. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg pointer to a struct drm_map structure. - * \return zero on success or a negative value on error. - */ -int drm_legacy_rmmap_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_map *request = data; - struct drm_local_map *map = NULL; - struct drm_map_list *r_list; - int ret; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - mutex_lock(&dev->struct_mutex); - list_for_each_entry(r_list, &dev->maplist, head) { - if (r_list->map && - r_list->user_token == (unsigned long)request->handle && - r_list->map->flags & _DRM_REMOVABLE) { - map = r_list->map; - break; - } - } - - /* List has wrapped around to the head pointer, or it's empty we didn't - * find anything. - */ - if (list_empty(&dev->maplist) || !map) { - mutex_unlock(&dev->struct_mutex); - return -EINVAL; - } - - /* Register and framebuffer maps are permanent */ - if ((map->type == _DRM_REGISTERS) || (map->type == _DRM_FRAME_BUFFER)) { - mutex_unlock(&dev->struct_mutex); - return 0; - } - - ret = drm_legacy_rmmap_locked(dev, map); - - mutex_unlock(&dev->struct_mutex); - - return ret; -} - -/* - * Cleanup after an error on one of the addbufs() functions. - * - * \param dev DRM device. - * \param entry buffer entry where the error occurred. - * - * Frees any pages and buffers associated with the given entry. - */ -static void drm_cleanup_buf_error(struct drm_device *dev, - struct drm_buf_entry *entry) -{ - drm_dma_handle_t *dmah; - int i; - - if (entry->seg_count) { - for (i = 0; i < entry->seg_count; i++) { - if (entry->seglist[i]) { - dmah = entry->seglist[i]; - dma_free_coherent(dev->dev, - dmah->size, - dmah->vaddr, - dmah->busaddr); - kfree(dmah); - } - } - kfree(entry->seglist); - - entry->seg_count = 0; - } - - if (entry->buf_count) { - for (i = 0; i < entry->buf_count; i++) { - kfree(entry->buflist[i].dev_private); - } - kfree(entry->buflist); - - entry->buf_count = 0; - } -} - -#if IS_ENABLED(CONFIG_AGP) -/* - * Add AGP buffers for DMA transfers. - * - * \param dev struct drm_device to which the buffers are to be added. - * \param request pointer to a struct drm_buf_desc describing the request. - * \return zero on success or a negative number on failure. - * - * After some sanity checks creates a drm_buf structure for each buffer and - * reallocates the buffer list of the same size order to accommodate the new - * buffers. - */ -int drm_legacy_addbufs_agp(struct drm_device *dev, - struct drm_buf_desc *request) -{ - struct drm_device_dma *dma = dev->dma; - struct drm_buf_entry *entry; - struct drm_agp_mem *agp_entry; - struct drm_buf *buf; - unsigned long offset; - unsigned long agp_offset; - int count; - int order; - int size; - int alignment; - int page_order; - int total; - int byte_count; - int i, valid; - struct drm_buf **temp_buflist; - - if (!dma) - return -EINVAL; - - count = request->count; - order = order_base_2(request->size); - size = 1 << order; - - alignment = (request->flags & _DRM_PAGE_ALIGN) - ? PAGE_ALIGN(size) : size; - page_order = order - PAGE_SHIFT > 0 ? order - PAGE_SHIFT : 0; - total = PAGE_SIZE << page_order; - - byte_count = 0; - agp_offset = dev->agp->base + request->agp_start; - - DRM_DEBUG("count: %d\n", count); - DRM_DEBUG("order: %d\n", order); - DRM_DEBUG("size: %d\n", size); - DRM_DEBUG("agp_offset: %lx\n", agp_offset); - DRM_DEBUG("alignment: %d\n", alignment); - DRM_DEBUG("page_order: %d\n", page_order); - DRM_DEBUG("total: %d\n", total); - - if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER) - return -EINVAL; - - /* Make sure buffers are located in AGP memory that we own */ - valid = 0; - list_for_each_entry(agp_entry, &dev->agp->memory, head) { - if ((agp_offset >= agp_entry->bound) && - (agp_offset + total * count <= agp_entry->bound + agp_entry->pages * PAGE_SIZE)) { - valid = 1; - break; - } - } - if (!list_empty(&dev->agp->memory) && !valid) { - DRM_DEBUG("zone invalid\n"); - return -EINVAL; - } - spin_lock(&dev->buf_lock); - if (dev->buf_use) { - spin_unlock(&dev->buf_lock); - return -EBUSY; - } - atomic_inc(&dev->buf_alloc); - spin_unlock(&dev->buf_lock); - - mutex_lock(&dev->struct_mutex); - entry = &dma->bufs[order]; - if (entry->buf_count) { - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; /* May only call once for each order */ - } - - if (count < 0 || count > 4096) { - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -EINVAL; - } - - entry->buflist = kcalloc(count, sizeof(*entry->buflist), GFP_KERNEL); - if (!entry->buflist) { - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - - entry->buf_size = size; - entry->page_order = page_order; - - offset = 0; - - while (entry->buf_count < count) { - buf = &entry->buflist[entry->buf_count]; - buf->idx = dma->buf_count + entry->buf_count; - buf->total = alignment; - buf->order = order; - buf->used = 0; - - buf->offset = (dma->byte_count + offset); - buf->bus_address = agp_offset + offset; - buf->address = (void *)(agp_offset + offset); - buf->next = NULL; - buf->waiting = 0; - buf->pending = 0; - buf->file_priv = NULL; - - buf->dev_priv_size = dev->driver->dev_priv_size; - buf->dev_private = kzalloc(buf->dev_priv_size, GFP_KERNEL); - if (!buf->dev_private) { - /* Set count correctly so we free the proper amount. */ - entry->buf_count = count; - drm_cleanup_buf_error(dev, entry); - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - - DRM_DEBUG("buffer %d @ %p\n", entry->buf_count, buf->address); - - offset += alignment; - entry->buf_count++; - byte_count += PAGE_SIZE << page_order; - } - - DRM_DEBUG("byte_count: %d\n", byte_count); - - temp_buflist = krealloc(dma->buflist, - (dma->buf_count + entry->buf_count) * - sizeof(*dma->buflist), GFP_KERNEL); - if (!temp_buflist) { - /* Free the entry because it isn't valid */ - drm_cleanup_buf_error(dev, entry); - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - dma->buflist = temp_buflist; - - for (i = 0; i < entry->buf_count; i++) { - dma->buflist[i + dma->buf_count] = &entry->buflist[i]; - } - - dma->buf_count += entry->buf_count; - dma->seg_count += entry->seg_count; - dma->page_count += byte_count >> PAGE_SHIFT; - dma->byte_count += byte_count; - - DRM_DEBUG("dma->buf_count : %d\n", dma->buf_count); - DRM_DEBUG("entry->buf_count : %d\n", entry->buf_count); - - mutex_unlock(&dev->struct_mutex); - - request->count = entry->buf_count; - request->size = size; - - dma->flags = _DRM_DMA_USE_AGP; - - atomic_dec(&dev->buf_alloc); - return 0; -} -EXPORT_SYMBOL(drm_legacy_addbufs_agp); -#endif /* CONFIG_AGP */ - -int drm_legacy_addbufs_pci(struct drm_device *dev, - struct drm_buf_desc *request) -{ - struct drm_device_dma *dma = dev->dma; - int count; - int order; - int size; - int total; - int page_order; - struct drm_buf_entry *entry; - drm_dma_handle_t *dmah; - struct drm_buf *buf; - int alignment; - unsigned long offset; - int i; - int byte_count; - int page_count; - unsigned long *temp_pagelist; - struct drm_buf **temp_buflist; - - if (!drm_core_check_feature(dev, DRIVER_PCI_DMA)) - return -EOPNOTSUPP; - - if (!dma) - return -EINVAL; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - count = request->count; - order = order_base_2(request->size); - size = 1 << order; - - DRM_DEBUG("count=%d, size=%d (%d), order=%d\n", - request->count, request->size, size, order); - - if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER) - return -EINVAL; - - alignment = (request->flags & _DRM_PAGE_ALIGN) - ? PAGE_ALIGN(size) : size; - page_order = order - PAGE_SHIFT > 0 ? order - PAGE_SHIFT : 0; - total = PAGE_SIZE << page_order; - - spin_lock(&dev->buf_lock); - if (dev->buf_use) { - spin_unlock(&dev->buf_lock); - return -EBUSY; - } - atomic_inc(&dev->buf_alloc); - spin_unlock(&dev->buf_lock); - - mutex_lock(&dev->struct_mutex); - entry = &dma->bufs[order]; - if (entry->buf_count) { - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; /* May only call once for each order */ - } - - if (count < 0 || count > 4096) { - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -EINVAL; - } - - entry->buflist = kcalloc(count, sizeof(*entry->buflist), GFP_KERNEL); - if (!entry->buflist) { - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - - entry->seglist = kcalloc(count, sizeof(*entry->seglist), GFP_KERNEL); - if (!entry->seglist) { - kfree(entry->buflist); - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - - /* Keep the original pagelist until we know all the allocations - * have succeeded - */ - temp_pagelist = kmalloc_array(dma->page_count + (count << page_order), - sizeof(*dma->pagelist), - GFP_KERNEL); - if (!temp_pagelist) { - kfree(entry->buflist); - kfree(entry->seglist); - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - memcpy(temp_pagelist, - dma->pagelist, dma->page_count * sizeof(*dma->pagelist)); - DRM_DEBUG("pagelist: %d entries\n", - dma->page_count + (count << page_order)); - - entry->buf_size = size; - entry->page_order = page_order; - byte_count = 0; - page_count = 0; - - while (entry->buf_count < count) { - dmah = kmalloc(sizeof(drm_dma_handle_t), GFP_KERNEL); - if (!dmah) { - /* Set count correctly so we free the proper amount. */ - entry->buf_count = count; - entry->seg_count = count; - drm_cleanup_buf_error(dev, entry); - kfree(temp_pagelist); - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - - dmah->size = total; - dmah->vaddr = dma_alloc_coherent(dev->dev, - dmah->size, - &dmah->busaddr, - GFP_KERNEL); - if (!dmah->vaddr) { - kfree(dmah); - - /* Set count correctly so we free the proper amount. */ - entry->buf_count = count; - entry->seg_count = count; - drm_cleanup_buf_error(dev, entry); - kfree(temp_pagelist); - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - entry->seglist[entry->seg_count++] = dmah; - for (i = 0; i < (1 << page_order); i++) { - DRM_DEBUG("page %d @ 0x%08lx\n", - dma->page_count + page_count, - (unsigned long)dmah->vaddr + PAGE_SIZE * i); - temp_pagelist[dma->page_count + page_count++] - = (unsigned long)dmah->vaddr + PAGE_SIZE * i; - } - for (offset = 0; - offset + size <= total && entry->buf_count < count; - offset += alignment, ++entry->buf_count) { - buf = &entry->buflist[entry->buf_count]; - buf->idx = dma->buf_count + entry->buf_count; - buf->total = alignment; - buf->order = order; - buf->used = 0; - buf->offset = (dma->byte_count + byte_count + offset); - buf->address = (void *)(dmah->vaddr + offset); - buf->bus_address = dmah->busaddr + offset; - buf->next = NULL; - buf->waiting = 0; - buf->pending = 0; - buf->file_priv = NULL; - - buf->dev_priv_size = dev->driver->dev_priv_size; - buf->dev_private = kzalloc(buf->dev_priv_size, - GFP_KERNEL); - if (!buf->dev_private) { - /* Set count correctly so we free the proper amount. */ - entry->buf_count = count; - entry->seg_count = count; - drm_cleanup_buf_error(dev, entry); - kfree(temp_pagelist); - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - - DRM_DEBUG("buffer %d @ %p\n", - entry->buf_count, buf->address); - } - byte_count += PAGE_SIZE << page_order; - } - - temp_buflist = krealloc(dma->buflist, - (dma->buf_count + entry->buf_count) * - sizeof(*dma->buflist), GFP_KERNEL); - if (!temp_buflist) { - /* Free the entry because it isn't valid */ - drm_cleanup_buf_error(dev, entry); - kfree(temp_pagelist); - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - dma->buflist = temp_buflist; - - for (i = 0; i < entry->buf_count; i++) { - dma->buflist[i + dma->buf_count] = &entry->buflist[i]; - } - - /* No allocations failed, so now we can replace the original pagelist - * with the new one. - */ - if (dma->page_count) { - kfree(dma->pagelist); - } - dma->pagelist = temp_pagelist; - - dma->buf_count += entry->buf_count; - dma->seg_count += entry->seg_count; - dma->page_count += entry->seg_count << page_order; - dma->byte_count += PAGE_SIZE * (entry->seg_count << page_order); - - mutex_unlock(&dev->struct_mutex); - - request->count = entry->buf_count; - request->size = size; - - if (request->flags & _DRM_PCI_BUFFER_RO) - dma->flags = _DRM_DMA_USE_PCI_RO; - - atomic_dec(&dev->buf_alloc); - return 0; - -} -EXPORT_SYMBOL(drm_legacy_addbufs_pci); - -static int drm_legacy_addbufs_sg(struct drm_device *dev, - struct drm_buf_desc *request) -{ - struct drm_device_dma *dma = dev->dma; - struct drm_buf_entry *entry; - struct drm_buf *buf; - unsigned long offset; - unsigned long agp_offset; - int count; - int order; - int size; - int alignment; - int page_order; - int total; - int byte_count; - int i; - struct drm_buf **temp_buflist; - - if (!drm_core_check_feature(dev, DRIVER_SG)) - return -EOPNOTSUPP; - - if (!dma) - return -EINVAL; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - count = request->count; - order = order_base_2(request->size); - size = 1 << order; - - alignment = (request->flags & _DRM_PAGE_ALIGN) - ? PAGE_ALIGN(size) : size; - page_order = order - PAGE_SHIFT > 0 ? order - PAGE_SHIFT : 0; - total = PAGE_SIZE << page_order; - - byte_count = 0; - agp_offset = request->agp_start; - - DRM_DEBUG("count: %d\n", count); - DRM_DEBUG("order: %d\n", order); - DRM_DEBUG("size: %d\n", size); - DRM_DEBUG("agp_offset: %lu\n", agp_offset); - DRM_DEBUG("alignment: %d\n", alignment); - DRM_DEBUG("page_order: %d\n", page_order); - DRM_DEBUG("total: %d\n", total); - - if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER) - return -EINVAL; - - spin_lock(&dev->buf_lock); - if (dev->buf_use) { - spin_unlock(&dev->buf_lock); - return -EBUSY; - } - atomic_inc(&dev->buf_alloc); - spin_unlock(&dev->buf_lock); - - mutex_lock(&dev->struct_mutex); - entry = &dma->bufs[order]; - if (entry->buf_count) { - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; /* May only call once for each order */ - } - - if (count < 0 || count > 4096) { - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -EINVAL; - } - - entry->buflist = kcalloc(count, sizeof(*entry->buflist), GFP_KERNEL); - if (!entry->buflist) { - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - - entry->buf_size = size; - entry->page_order = page_order; - - offset = 0; - - while (entry->buf_count < count) { - buf = &entry->buflist[entry->buf_count]; - buf->idx = dma->buf_count + entry->buf_count; - buf->total = alignment; - buf->order = order; - buf->used = 0; - - buf->offset = (dma->byte_count + offset); - buf->bus_address = agp_offset + offset; - buf->address = (void *)(agp_offset + offset - + (unsigned long)dev->sg->virtual); - buf->next = NULL; - buf->waiting = 0; - buf->pending = 0; - buf->file_priv = NULL; - - buf->dev_priv_size = dev->driver->dev_priv_size; - buf->dev_private = kzalloc(buf->dev_priv_size, GFP_KERNEL); - if (!buf->dev_private) { - /* Set count correctly so we free the proper amount. */ - entry->buf_count = count; - drm_cleanup_buf_error(dev, entry); - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - - DRM_DEBUG("buffer %d @ %p\n", entry->buf_count, buf->address); - - offset += alignment; - entry->buf_count++; - byte_count += PAGE_SIZE << page_order; - } - - DRM_DEBUG("byte_count: %d\n", byte_count); - - temp_buflist = krealloc(dma->buflist, - (dma->buf_count + entry->buf_count) * - sizeof(*dma->buflist), GFP_KERNEL); - if (!temp_buflist) { - /* Free the entry because it isn't valid */ - drm_cleanup_buf_error(dev, entry); - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - dma->buflist = temp_buflist; - - for (i = 0; i < entry->buf_count; i++) { - dma->buflist[i + dma->buf_count] = &entry->buflist[i]; - } - - dma->buf_count += entry->buf_count; - dma->seg_count += entry->seg_count; - dma->page_count += byte_count >> PAGE_SHIFT; - dma->byte_count += byte_count; - - DRM_DEBUG("dma->buf_count : %d\n", dma->buf_count); - DRM_DEBUG("entry->buf_count : %d\n", entry->buf_count); - - mutex_unlock(&dev->struct_mutex); - - request->count = entry->buf_count; - request->size = size; - - dma->flags = _DRM_DMA_USE_SG; - - atomic_dec(&dev->buf_alloc); - return 0; -} - -/* - * Add buffers for DMA transfers (ioctl). - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg pointer to a struct drm_buf_desc request. - * \return zero on success or a negative number on failure. - * - * According with the memory type specified in drm_buf_desc::flags and the - * build options, it dispatches the call either to addbufs_agp(), - * addbufs_sg() or addbufs_pci() for AGP, scatter-gather or consistent - * PCI memory respectively. - */ -int drm_legacy_addbufs(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_buf_desc *request = data; - int ret; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA)) - return -EOPNOTSUPP; - -#if IS_ENABLED(CONFIG_AGP) - if (request->flags & _DRM_AGP_BUFFER) - ret = drm_legacy_addbufs_agp(dev, request); - else -#endif - if (request->flags & _DRM_SG_BUFFER) - ret = drm_legacy_addbufs_sg(dev, request); - else if (request->flags & _DRM_FB_BUFFER) - ret = -EINVAL; - else - ret = drm_legacy_addbufs_pci(dev, request); - - return ret; -} - -/* - * Get information about the buffer mappings. - * - * This was originally mean for debugging purposes, or by a sophisticated - * client library to determine how best to use the available buffers (e.g., - * large buffers can be used for image transfer). - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg pointer to a drm_buf_info structure. - * \return zero on success or a negative number on failure. - * - * Increments drm_device::buf_use while holding the drm_device::buf_lock - * lock, preventing of allocating more buffers after this call. Information - * about each requested buffer is then copied into user space. - */ -int __drm_legacy_infobufs(struct drm_device *dev, - void *data, int *p, - int (*f)(void *, int, struct drm_buf_entry *)) -{ - struct drm_device_dma *dma = dev->dma; - int i; - int count; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA)) - return -EOPNOTSUPP; - - if (!dma) - return -EINVAL; - - spin_lock(&dev->buf_lock); - if (atomic_read(&dev->buf_alloc)) { - spin_unlock(&dev->buf_lock); - return -EBUSY; - } - ++dev->buf_use; /* Can't allocate more after this call */ - spin_unlock(&dev->buf_lock); - - for (i = 0, count = 0; i < DRM_MAX_ORDER + 1; i++) { - if (dma->bufs[i].buf_count) - ++count; - } - - DRM_DEBUG("count = %d\n", count); - - if (*p >= count) { - for (i = 0, count = 0; i < DRM_MAX_ORDER + 1; i++) { - struct drm_buf_entry *from = &dma->bufs[i]; - - if (from->buf_count) { - if (f(data, count, from) < 0) - return -EFAULT; - DRM_DEBUG("%d %d %d %d %d\n", - i, - dma->bufs[i].buf_count, - dma->bufs[i].buf_size, - dma->bufs[i].low_mark, - dma->bufs[i].high_mark); - ++count; - } - } - } - *p = count; - - return 0; -} - -static int copy_one_buf(void *data, int count, struct drm_buf_entry *from) -{ - struct drm_buf_info *request = data; - struct drm_buf_desc __user *to = &request->list[count]; - struct drm_buf_desc v = {.count = from->buf_count, - .size = from->buf_size, - .low_mark = from->low_mark, - .high_mark = from->high_mark}; - - if (copy_to_user(to, &v, offsetof(struct drm_buf_desc, flags))) - return -EFAULT; - return 0; -} - -int drm_legacy_infobufs(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_buf_info *request = data; - - return __drm_legacy_infobufs(dev, data, &request->count, copy_one_buf); -} - -/* - * Specifies a low and high water mark for buffer allocation - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg a pointer to a drm_buf_desc structure. - * \return zero on success or a negative number on failure. - * - * Verifies that the size order is bounded between the admissible orders and - * updates the respective drm_device_dma::bufs entry low and high water mark. - * - * \note This ioctl is deprecated and mostly never used. - */ -int drm_legacy_markbufs(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_device_dma *dma = dev->dma; - struct drm_buf_desc *request = data; - int order; - struct drm_buf_entry *entry; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA)) - return -EOPNOTSUPP; - - if (!dma) - return -EINVAL; - - DRM_DEBUG("%d, %d, %d\n", - request->size, request->low_mark, request->high_mark); - order = order_base_2(request->size); - if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER) - return -EINVAL; - entry = &dma->bufs[order]; - - if (request->low_mark < 0 || request->low_mark > entry->buf_count) - return -EINVAL; - if (request->high_mark < 0 || request->high_mark > entry->buf_count) - return -EINVAL; - - entry->low_mark = request->low_mark; - entry->high_mark = request->high_mark; - - return 0; -} - -/* - * Unreserve the buffers in list, previously reserved using drmDMA. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg pointer to a drm_buf_free structure. - * \return zero on success or a negative number on failure. - * - * Calls free_buffer() for each used buffer. - * This function is primarily used for debugging. - */ -int drm_legacy_freebufs(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_device_dma *dma = dev->dma; - struct drm_buf_free *request = data; - int i; - int idx; - struct drm_buf *buf; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA)) - return -EOPNOTSUPP; - - if (!dma) - return -EINVAL; - - DRM_DEBUG("%d\n", request->count); - for (i = 0; i < request->count; i++) { - if (copy_from_user(&idx, &request->list[i], sizeof(idx))) - return -EFAULT; - if (idx < 0 || idx >= dma->buf_count) { - DRM_ERROR("Index %d (of %d max)\n", - idx, dma->buf_count - 1); - return -EINVAL; - } - idx = array_index_nospec(idx, dma->buf_count); - buf = dma->buflist[idx]; - if (buf->file_priv != file_priv) { - DRM_ERROR("Process %d freeing buffer not owned\n", - task_pid_nr(current)); - return -EINVAL; - } - drm_legacy_free_buffer(dev, buf); - } - - return 0; -} - -/* - * Maps all of the DMA buffers into client-virtual space (ioctl). - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg pointer to a drm_buf_map structure. - * \return zero on success or a negative number on failure. - * - * Maps the AGP, SG or PCI buffer region with vm_mmap(), and copies information - * about each buffer into user space. For PCI buffers, it calls vm_mmap() with - * offset equal to 0, which drm_mmap() interprets as PCI buffers and calls - * drm_mmap_dma(). - */ -int __drm_legacy_mapbufs(struct drm_device *dev, void *data, int *p, - void __user **v, - int (*f)(void *, int, unsigned long, - struct drm_buf *), - struct drm_file *file_priv) -{ - struct drm_device_dma *dma = dev->dma; - int retcode = 0; - unsigned long virtual; - int i; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA)) - return -EOPNOTSUPP; - - if (!dma) - return -EINVAL; - - spin_lock(&dev->buf_lock); - if (atomic_read(&dev->buf_alloc)) { - spin_unlock(&dev->buf_lock); - return -EBUSY; - } - dev->buf_use++; /* Can't allocate more after this call */ - spin_unlock(&dev->buf_lock); - - if (*p >= dma->buf_count) { - if ((dev->agp && (dma->flags & _DRM_DMA_USE_AGP)) - || (drm_core_check_feature(dev, DRIVER_SG) - && (dma->flags & _DRM_DMA_USE_SG))) { - struct drm_local_map *map = dev->agp_buffer_map; - unsigned long token = dev->agp_buffer_token; - - if (!map) { - retcode = -EINVAL; - goto done; - } - virtual = vm_mmap(file_priv->filp, 0, map->size, - PROT_READ | PROT_WRITE, - MAP_SHARED, - token); - } else { - virtual = vm_mmap(file_priv->filp, 0, dma->byte_count, - PROT_READ | PROT_WRITE, - MAP_SHARED, 0); - } - if (virtual > -1024UL) { - /* Real error */ - retcode = (signed long)virtual; - goto done; - } - *v = (void __user *)virtual; - - for (i = 0; i < dma->buf_count; i++) { - if (f(data, i, virtual, dma->buflist[i]) < 0) { - retcode = -EFAULT; - goto done; - } - } - } - done: - *p = dma->buf_count; - DRM_DEBUG("%d buffers, retcode = %d\n", *p, retcode); - - return retcode; -} - -static int map_one_buf(void *data, int idx, unsigned long virtual, - struct drm_buf *buf) -{ - struct drm_buf_map *request = data; - unsigned long address = virtual + buf->offset; /* *** */ - - if (copy_to_user(&request->list[idx].idx, &buf->idx, - sizeof(request->list[0].idx))) - return -EFAULT; - if (copy_to_user(&request->list[idx].total, &buf->total, - sizeof(request->list[0].total))) - return -EFAULT; - if (clear_user(&request->list[idx].used, sizeof(int))) - return -EFAULT; - if (copy_to_user(&request->list[idx].address, &address, - sizeof(address))) - return -EFAULT; - return 0; -} - -int drm_legacy_mapbufs(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_buf_map *request = data; - - return __drm_legacy_mapbufs(dev, data, &request->count, - &request->virtual, map_one_buf, - file_priv); -} - -int drm_legacy_dma_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - if (dev->driver->dma_ioctl) - return dev->driver->dma_ioctl(dev, data, file_priv); - else - return -EINVAL; -} - -struct drm_local_map *drm_legacy_getsarea(struct drm_device *dev) -{ - struct drm_map_list *entry; - - list_for_each_entry(entry, &dev->maplist, head) { - if (entry->map && entry->map->type == _DRM_SHM && - (entry->map->flags & _DRM_CONTAINS_LOCK)) { - return entry->map; - } - } - return NULL; -} -EXPORT_SYMBOL(drm_legacy_getsarea); diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c index c3027115d055..9403b3f576f7 100644 --- a/drivers/gpu/drm/drm_client.c +++ b/drivers/gpu/drm/drm_client.c @@ -5,7 +5,6 @@ #include <linux/iosys-map.h> #include <linux/list.h> -#include <linux/module.h> #include <linux/mutex.h> #include <linux/seq_file.h> #include <linux/slab.h> @@ -84,16 +83,13 @@ int drm_client_init(struct drm_device *dev, struct drm_client_dev *client, if (!drm_core_check_feature(dev, DRIVER_MODESET) || !dev->driver->dumb_create) return -EOPNOTSUPP; - if (funcs && !try_module_get(funcs->owner)) - return -ENODEV; - client->dev = dev; client->name = name; client->funcs = funcs; ret = drm_client_modeset_create(client); if (ret) - goto err_put_module; + return ret; ret = drm_client_open(client); if (ret) @@ -105,10 +101,6 @@ int drm_client_init(struct drm_device *dev, struct drm_client_dev *client, err_free: drm_client_modeset_free(client); -err_put_module: - if (funcs) - module_put(funcs->owner); - return ret; } EXPORT_SYMBOL(drm_client_init); @@ -177,8 +169,6 @@ void drm_client_release(struct drm_client_dev *client) drm_client_modeset_free(client); drm_client_close(client); drm_dev_put(dev); - if (client->funcs) - module_put(client->funcs->owner); } EXPORT_SYMBOL(drm_client_release); diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index c3725086f413..b0516505f7ae 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -1198,6 +1198,12 @@ static const u32 dp_colorspaces = * drm_connector_set_path_property(), in the case of DP MST with the * path property the MST manager created. Userspace cannot change this * property. + * + * In the case of DP MST, the property has the format + * ``mst:<parent>-<ports>`` where ``<parent>`` is the KMS object ID of the + * parent connector and ``<ports>`` is a hyphen-separated list of DP MST + * port numbers. Note, KMS object IDs are not guaranteed to be stable + * across reboots. * TILE: * Connector tile group property to indicate how a set of DRM connector * compose together into one logical screen. This is used by both high-res diff --git a/drivers/gpu/drm/drm_context.c b/drivers/gpu/drm/drm_context.c deleted file mode 100644 index a0fc779e5e1e..000000000000 --- a/drivers/gpu/drm/drm_context.c +++ /dev/null @@ -1,513 +0,0 @@ -/* - * Legacy: Generic DRM Contexts - * - * Copyright 1999, 2000 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Author: Rickard E. (Rik) Faith <faith@valinux.com> - * Author: Gareth Hughes <gareth@valinux.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include <linux/slab.h> -#include <linux/uaccess.h> - -#include <drm/drm_drv.h> -#include <drm/drm_file.h> -#include <drm/drm_print.h> - -#include "drm_legacy.h" - -struct drm_ctx_list { - struct list_head head; - drm_context_t handle; - struct drm_file *tag; -}; - -/******************************************************************/ -/** \name Context bitmap support */ -/*@{*/ - -/* - * Free a handle from the context bitmap. - * - * \param dev DRM device. - * \param ctx_handle context handle. - * - * Clears the bit specified by \p ctx_handle in drm_device::ctx_bitmap and the entry - * in drm_device::ctx_idr, while holding the drm_device::struct_mutex - * lock. - */ -void drm_legacy_ctxbitmap_free(struct drm_device * dev, int ctx_handle) -{ - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return; - - mutex_lock(&dev->struct_mutex); - idr_remove(&dev->ctx_idr, ctx_handle); - mutex_unlock(&dev->struct_mutex); -} - -/* - * Context bitmap allocation. - * - * \param dev DRM device. - * \return (non-negative) context handle on success or a negative number on failure. - * - * Allocate a new idr from drm_device::ctx_idr while holding the - * drm_device::struct_mutex lock. - */ -static int drm_legacy_ctxbitmap_next(struct drm_device * dev) -{ - int ret; - - mutex_lock(&dev->struct_mutex); - ret = idr_alloc(&dev->ctx_idr, NULL, DRM_RESERVED_CONTEXTS, 0, - GFP_KERNEL); - mutex_unlock(&dev->struct_mutex); - return ret; -} - -/* - * Context bitmap initialization. - * - * \param dev DRM device. - * - * Initialise the drm_device::ctx_idr - */ -void drm_legacy_ctxbitmap_init(struct drm_device * dev) -{ - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return; - - idr_init(&dev->ctx_idr); -} - -/* - * Context bitmap cleanup. - * - * \param dev DRM device. - * - * Free all idr members using drm_ctx_sarea_free helper function - * while holding the drm_device::struct_mutex lock. - */ -void drm_legacy_ctxbitmap_cleanup(struct drm_device * dev) -{ - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return; - - mutex_lock(&dev->struct_mutex); - idr_destroy(&dev->ctx_idr); - mutex_unlock(&dev->struct_mutex); -} - -/** - * drm_legacy_ctxbitmap_flush() - Flush all contexts owned by a file - * @dev: DRM device to operate on - * @file: Open file to flush contexts for - * - * This iterates over all contexts on @dev and drops them if they're owned by - * @file. Note that after this call returns, new contexts might be added if - * the file is still alive. - */ -void drm_legacy_ctxbitmap_flush(struct drm_device *dev, struct drm_file *file) -{ - struct drm_ctx_list *pos, *tmp; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return; - - mutex_lock(&dev->ctxlist_mutex); - - list_for_each_entry_safe(pos, tmp, &dev->ctxlist, head) { - if (pos->tag == file && - pos->handle != DRM_KERNEL_CONTEXT) { - if (dev->driver->context_dtor) - dev->driver->context_dtor(dev, pos->handle); - - drm_legacy_ctxbitmap_free(dev, pos->handle); - list_del(&pos->head); - kfree(pos); - } - } - - mutex_unlock(&dev->ctxlist_mutex); -} - -/*@}*/ - -/******************************************************************/ -/** \name Per Context SAREA Support */ -/*@{*/ - -/* - * Get per-context SAREA. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument pointing to a drm_ctx_priv_map structure. - * \return zero on success or a negative number on failure. - * - * Gets the map from drm_device::ctx_idr with the handle specified and - * returns its handle. - */ -int drm_legacy_getsareactx(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_ctx_priv_map *request = data; - struct drm_local_map *map; - struct drm_map_list *_entry; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - mutex_lock(&dev->struct_mutex); - - map = idr_find(&dev->ctx_idr, request->ctx_id); - if (!map) { - mutex_unlock(&dev->struct_mutex); - return -EINVAL; - } - - request->handle = NULL; - list_for_each_entry(_entry, &dev->maplist, head) { - if (_entry->map == map) { - request->handle = - (void *)(unsigned long)_entry->user_token; - break; - } - } - - mutex_unlock(&dev->struct_mutex); - - if (request->handle == NULL) - return -EINVAL; - - return 0; -} - -/* - * Set per-context SAREA. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument pointing to a drm_ctx_priv_map structure. - * \return zero on success or a negative number on failure. - * - * Searches the mapping specified in \p arg and update the entry in - * drm_device::ctx_idr with it. - */ -int drm_legacy_setsareactx(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_ctx_priv_map *request = data; - struct drm_local_map *map = NULL; - struct drm_map_list *r_list = NULL; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - mutex_lock(&dev->struct_mutex); - list_for_each_entry(r_list, &dev->maplist, head) { - if (r_list->map - && r_list->user_token == (unsigned long) request->handle) - goto found; - } - bad: - mutex_unlock(&dev->struct_mutex); - return -EINVAL; - - found: - map = r_list->map; - if (!map) - goto bad; - - if (IS_ERR(idr_replace(&dev->ctx_idr, map, request->ctx_id))) - goto bad; - - mutex_unlock(&dev->struct_mutex); - - return 0; -} - -/*@}*/ - -/******************************************************************/ -/** \name The actual DRM context handling routines */ -/*@{*/ - -/* - * Switch context. - * - * \param dev DRM device. - * \param old old context handle. - * \param new new context handle. - * \return zero on success or a negative number on failure. - * - * Attempt to set drm_device::context_flag. - */ -static int drm_context_switch(struct drm_device * dev, int old, int new) -{ - if (test_and_set_bit(0, &dev->context_flag)) { - DRM_ERROR("Reentering -- FIXME\n"); - return -EBUSY; - } - - DRM_DEBUG("Context switch from %d to %d\n", old, new); - - if (new == dev->last_context) { - clear_bit(0, &dev->context_flag); - return 0; - } - - return 0; -} - -/* - * Complete context switch. - * - * \param dev DRM device. - * \param new new context handle. - * \return zero on success or a negative number on failure. - * - * Updates drm_device::last_context and drm_device::last_switch. Verifies the - * hardware lock is held, clears the drm_device::context_flag and wakes up - * drm_device::context_wait. - */ -static int drm_context_switch_complete(struct drm_device *dev, - struct drm_file *file_priv, int new) -{ - dev->last_context = new; /* PRE/POST: This is the _only_ writer. */ - - if (!_DRM_LOCK_IS_HELD(file_priv->master->lock.hw_lock->lock)) { - DRM_ERROR("Lock isn't held after context switch\n"); - } - - /* If a context switch is ever initiated - when the kernel holds the lock, release - that lock here. - */ - clear_bit(0, &dev->context_flag); - - return 0; -} - -/* - * Reserve contexts. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument pointing to a drm_ctx_res structure. - * \return zero on success or a negative number on failure. - */ -int drm_legacy_resctx(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_ctx_res *res = data; - struct drm_ctx ctx; - int i; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - if (res->count >= DRM_RESERVED_CONTEXTS) { - memset(&ctx, 0, sizeof(ctx)); - for (i = 0; i < DRM_RESERVED_CONTEXTS; i++) { - ctx.handle = i; - if (copy_to_user(&res->contexts[i], &ctx, sizeof(ctx))) - return -EFAULT; - } - } - res->count = DRM_RESERVED_CONTEXTS; - - return 0; -} - -/* - * Add context. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument pointing to a drm_ctx structure. - * \return zero on success or a negative number on failure. - * - * Get a new handle for the context and copy to userspace. - */ -int drm_legacy_addctx(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_ctx_list *ctx_entry; - struct drm_ctx *ctx = data; - int tmp_handle; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - tmp_handle = drm_legacy_ctxbitmap_next(dev); - if (tmp_handle == DRM_KERNEL_CONTEXT) { - /* Skip kernel's context and get a new one. */ - tmp_handle = drm_legacy_ctxbitmap_next(dev); - } - DRM_DEBUG("%d\n", tmp_handle); - if (tmp_handle < 0) { - DRM_DEBUG("Not enough free contexts.\n"); - /* Should this return -EBUSY instead? */ - return tmp_handle; - } - - ctx->handle = tmp_handle; - - ctx_entry = kmalloc(sizeof(*ctx_entry), GFP_KERNEL); - if (!ctx_entry) { - DRM_DEBUG("out of memory\n"); - return -ENOMEM; - } - - INIT_LIST_HEAD(&ctx_entry->head); - ctx_entry->handle = ctx->handle; - ctx_entry->tag = file_priv; - - mutex_lock(&dev->ctxlist_mutex); - list_add(&ctx_entry->head, &dev->ctxlist); - mutex_unlock(&dev->ctxlist_mutex); - - return 0; -} - -/* - * Get context. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument pointing to a drm_ctx structure. - * \return zero on success or a negative number on failure. - */ -int drm_legacy_getctx(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_ctx *ctx = data; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - /* This is 0, because we don't handle any context flags */ - ctx->flags = 0; - - return 0; -} - -/* - * Switch context. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument pointing to a drm_ctx structure. - * \return zero on success or a negative number on failure. - * - * Calls context_switch(). - */ -int drm_legacy_switchctx(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_ctx *ctx = data; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - DRM_DEBUG("%d\n", ctx->handle); - return drm_context_switch(dev, dev->last_context, ctx->handle); -} - -/* - * New context. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument pointing to a drm_ctx structure. - * \return zero on success or a negative number on failure. - * - * Calls context_switch_complete(). - */ -int drm_legacy_newctx(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_ctx *ctx = data; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - DRM_DEBUG("%d\n", ctx->handle); - drm_context_switch_complete(dev, file_priv, ctx->handle); - - return 0; -} - -/* - * Remove context. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument pointing to a drm_ctx structure. - * \return zero on success or a negative number on failure. - * - * If not the special kernel context, calls ctxbitmap_free() to free the specified context. - */ -int drm_legacy_rmctx(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_ctx *ctx = data; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - DRM_DEBUG("%d\n", ctx->handle); - if (ctx->handle != DRM_KERNEL_CONTEXT) { - if (dev->driver->context_dtor) - dev->driver->context_dtor(dev, ctx->handle); - drm_legacy_ctxbitmap_free(dev, ctx->handle); - } - - mutex_lock(&dev->ctxlist_mutex); - if (!list_empty(&dev->ctxlist)) { - struct drm_ctx_list *pos, *n; - - list_for_each_entry_safe(pos, n, &dev->ctxlist, head) { - if (pos->handle == ctx->handle) { - list_del(&pos->head); - kfree(pos); - } - } - } - mutex_unlock(&dev->ctxlist_mutex); - - return 0; -} - -/*@}*/ diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index a209659a996c..2dafc39a27cb 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -439,11 +439,8 @@ EXPORT_SYMBOL(drm_crtc_helper_set_mode); * @state: atomic state object * * Provides a default CRTC-state check handler for CRTCs that only have - * one primary plane attached to it. - * - * This is often the case for the CRTC of simple framebuffers. See also - * drm_plane_helper_atomic_check() for the respective plane-state check - * helper function. + * one primary plane attached to it. This is often the case for the CRTC + * of simple framebuffers. * * RETURNS: * Zero on success, or an errno code otherwise. diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h index 8556c3b3ff88..a514d5207e41 100644 --- a/drivers/gpu/drm/drm_crtc_internal.h +++ b/drivers/gpu/drm/drm_crtc_internal.h @@ -222,6 +222,8 @@ int drm_mode_addfb2_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int drm_mode_rmfb_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int drm_mode_closefb_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv); int drm_mode_getfb(struct drm_device *dev, void *data, struct drm_file *file_priv); int drm_mode_getfb2_ioctl(struct drm_device *dev, @@ -251,7 +253,7 @@ int drm_atomic_set_property(struct drm_atomic_state *state, struct drm_file *file_priv, struct drm_mode_object *obj, struct drm_property *prop, - uint64_t prop_value); + u64 prop_value, bool async_flip); int drm_atomic_get_property(struct drm_mode_object *obj, struct drm_property *property, uint64_t *val); diff --git a/drivers/gpu/drm/drm_damage_helper.c b/drivers/gpu/drm/drm_damage_helper.c index d8b2955e88fd..afb02aae707b 100644 --- a/drivers/gpu/drm/drm_damage_helper.c +++ b/drivers/gpu/drm/drm_damage_helper.c @@ -241,7 +241,8 @@ drm_atomic_helper_damage_iter_init(struct drm_atomic_helper_damage_iter *iter, iter->plane_src.x2 = (src.x2 >> 16) + !!(src.x2 & 0xFFFF); iter->plane_src.y2 = (src.y2 >> 16) + !!(src.y2 & 0xFFFF); - if (!iter->clips || !drm_rect_equals(&state->src, &old_state->src)) { + if (!iter->clips || state->ignore_damage_clips || + !drm_rect_equals(&state->src, &old_state->src)) { iter->clips = NULL; iter->num_clips = 0; iter->full_update = true; diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c index f291fb4b359f..f4715a67e340 100644 --- a/drivers/gpu/drm/drm_debugfs.c +++ b/drivers/gpu/drm/drm_debugfs.c @@ -314,10 +314,8 @@ void drm_debugfs_dev_register(struct drm_device *dev) drm_framebuffer_debugfs_init(dev); drm_client_debugfs_init(dev); } - if (drm_drv_uses_atomic_modeset(dev)) { + if (drm_drv_uses_atomic_modeset(dev)) drm_atomic_debugfs_init(dev); - drm_bridge_debugfs_init(dev); - } } int drm_debugfs_register(struct drm_minor *minor, int minor_id, @@ -589,4 +587,65 @@ void drm_debugfs_crtc_remove(struct drm_crtc *crtc) crtc->debugfs_entry = NULL; } +static int bridges_show(struct seq_file *m, void *data) +{ + struct drm_encoder *encoder = m->private; + struct drm_printer p = drm_seq_file_printer(m); + struct drm_bridge *bridge; + unsigned int idx = 0; + + drm_for_each_bridge_in_chain(encoder, bridge) { + drm_printf(&p, "bridge[%d]: %ps\n", idx++, bridge->funcs); + drm_printf(&p, "\ttype: [%d] %s\n", + bridge->type, + drm_get_connector_type_name(bridge->type)); +#ifdef CONFIG_OF + if (bridge->of_node) + drm_printf(&p, "\tOF: %pOFfc\n", bridge->of_node); +#endif + drm_printf(&p, "\tops: [0x%x]", bridge->ops); + if (bridge->ops & DRM_BRIDGE_OP_DETECT) + drm_puts(&p, " detect"); + if (bridge->ops & DRM_BRIDGE_OP_EDID) + drm_puts(&p, " edid"); + if (bridge->ops & DRM_BRIDGE_OP_HPD) + drm_puts(&p, " hpd"); + if (bridge->ops & DRM_BRIDGE_OP_MODES) + drm_puts(&p, " modes"); + drm_puts(&p, "\n"); + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(bridges); + +void drm_debugfs_encoder_add(struct drm_encoder *encoder) +{ + struct drm_minor *minor = encoder->dev->primary; + struct dentry *root; + char *name; + + name = kasprintf(GFP_KERNEL, "encoder-%d", encoder->index); + if (!name) + return; + + root = debugfs_create_dir(name, minor->debugfs_root); + kfree(name); + + encoder->debugfs_entry = root; + + /* bridges list */ + debugfs_create_file("bridges", 0444, root, encoder, + &bridges_fops); + + if (encoder->funcs && encoder->funcs->debugfs_init) + encoder->funcs->debugfs_init(encoder, root); +} + +void drm_debugfs_encoder_remove(struct drm_encoder *encoder) +{ + debugfs_remove_recursive(encoder->debugfs_entry); + encoder->debugfs_entry = NULL; +} + #endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/drm/drm_dma.c b/drivers/gpu/drm/drm_dma.c deleted file mode 100644 index eb6b741a6f99..000000000000 --- a/drivers/gpu/drm/drm_dma.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * \file drm_dma.c - * DMA IOCTL and function support - * - * \author Rickard E. (Rik) Faith <faith@valinux.com> - * \author Gareth Hughes <gareth@valinux.com> - */ - -/* - * Created: Fri Mar 19 14:30:16 1999 by faith@valinux.com - * - * Copyright 1999, 2000 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include <linux/export.h> -#include <linux/pci.h> - -#include <drm/drm_drv.h> -#include <drm/drm_print.h> - -#include "drm_legacy.h" - -/** - * drm_legacy_dma_setup() - Initialize the DMA data. - * - * @dev: DRM device. - * Return: zero on success or a negative value on failure. - * - * Allocate and initialize a drm_device_dma structure. - */ -int drm_legacy_dma_setup(struct drm_device *dev) -{ - int i; - - if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA) || - !drm_core_check_feature(dev, DRIVER_LEGACY)) - return 0; - - dev->buf_use = 0; - atomic_set(&dev->buf_alloc, 0); - - dev->dma = kzalloc(sizeof(*dev->dma), GFP_KERNEL); - if (!dev->dma) - return -ENOMEM; - - for (i = 0; i <= DRM_MAX_ORDER; i++) - memset(&dev->dma->bufs[i], 0, sizeof(dev->dma->bufs[0])); - - return 0; -} - -/** - * drm_legacy_dma_takedown() - Cleanup the DMA resources. - * - * @dev: DRM device. - * - * Free all pages associated with DMA buffers, the buffers and pages lists, and - * finally the drm_device::dma structure itself. - */ -void drm_legacy_dma_takedown(struct drm_device *dev) -{ - struct drm_device_dma *dma = dev->dma; - drm_dma_handle_t *dmah; - int i, j; - - if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA) || - !drm_core_check_feature(dev, DRIVER_LEGACY)) - return; - - if (!dma) - return; - - /* Clear dma buffers */ - for (i = 0; i <= DRM_MAX_ORDER; i++) { - if (dma->bufs[i].seg_count) { - DRM_DEBUG("order %d: buf_count = %d," - " seg_count = %d\n", - i, - dma->bufs[i].buf_count, - dma->bufs[i].seg_count); - for (j = 0; j < dma->bufs[i].seg_count; j++) { - if (dma->bufs[i].seglist[j]) { - dmah = dma->bufs[i].seglist[j]; - dma_free_coherent(dev->dev, - dmah->size, - dmah->vaddr, - dmah->busaddr); - kfree(dmah); - } - } - kfree(dma->bufs[i].seglist); - } - if (dma->bufs[i].buf_count) { - for (j = 0; j < dma->bufs[i].buf_count; j++) { - kfree(dma->bufs[i].buflist[j].dev_private); - } - kfree(dma->bufs[i].buflist); - } - } - - kfree(dma->buflist); - kfree(dma->pagelist); - kfree(dev->dma); - dev->dma = NULL; -} - -/** - * drm_legacy_free_buffer() - Free a buffer. - * - * @dev: DRM device. - * @buf: buffer to free. - * - * Resets the fields of \p buf. - */ -void drm_legacy_free_buffer(struct drm_device *dev, struct drm_buf * buf) -{ - if (!buf) - return; - - buf->waiting = 0; - buf->pending = 0; - buf->file_priv = NULL; - buf->used = 0; -} - -/** - * drm_legacy_reclaim_buffers() - Reclaim the buffers. - * - * @dev: DRM device. - * @file_priv: DRM file private. - * - * Frees each buffer associated with \p file_priv not already on the hardware. - */ -void drm_legacy_reclaim_buffers(struct drm_device *dev, - struct drm_file *file_priv) -{ - struct drm_device_dma *dma = dev->dma; - int i; - - if (!dma) - return; - for (i = 0; i < dma->buf_count; i++) { - if (dma->buflist[i]->file_priv == file_priv) { - switch (dma->buflist[i]->list) { - case DRM_LIST_NONE: - drm_legacy_free_buffer(dev, dma->buflist[i]); - break; - case DRM_LIST_WAIT: - dma->buflist[i]->list = DRM_LIST_RECLAIM; - break; - default: - /* Buffer already on hardware. */ - break; - } - } - } -} diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 535f16e7882e..243cacb3575c 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -48,7 +48,6 @@ #include "drm_crtc_internal.h" #include "drm_internal.h" -#include "drm_legacy.h" MODULE_AUTHOR("Gareth Hughes, Leif Delgass, José Fonseca, Jon Smirl"); MODULE_DESCRIPTION("DRM shared core routines"); @@ -585,8 +584,6 @@ static void drm_fs_inode_free(struct inode *inode) static void drm_dev_init_release(struct drm_device *dev, void *res) { - drm_legacy_ctxbitmap_cleanup(dev); - drm_legacy_remove_map_hash(dev); drm_fs_inode_free(dev->anon_inode); put_device(dev->dev); @@ -597,7 +594,6 @@ static void drm_dev_init_release(struct drm_device *dev, void *res) mutex_destroy(&dev->clientlist_mutex); mutex_destroy(&dev->filelist_mutex); mutex_destroy(&dev->struct_mutex); - drm_legacy_destroy_members(dev); } static int drm_dev_init(struct drm_device *dev, @@ -632,7 +628,6 @@ static int drm_dev_init(struct drm_device *dev, return -EINVAL; } - drm_legacy_init_members(dev); INIT_LIST_HEAD(&dev->filelist); INIT_LIST_HEAD(&dev->filelist_internal); INIT_LIST_HEAD(&dev->clientlist); @@ -673,12 +668,6 @@ static int drm_dev_init(struct drm_device *dev, goto err; } - ret = drm_legacy_create_map_hash(dev); - if (ret) - goto err; - - drm_legacy_ctxbitmap_init(dev); - if (drm_core_check_feature(dev, DRIVER_GEM)) { ret = drm_gem_init(dev); if (ret) { @@ -949,8 +938,11 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags) goto err_minors; } - if (drm_core_check_feature(dev, DRIVER_MODESET)) - drm_modeset_register_all(dev); + if (drm_core_check_feature(dev, DRIVER_MODESET)) { + ret = drm_modeset_register_all(dev); + if (ret) + goto err_unload; + } DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n", driver->name, driver->major, driver->minor, @@ -960,6 +952,9 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags) goto out_unlock; +err_unload: + if (dev->driver->unload) + dev->driver->unload(dev); err_minors: remove_compat_control_link(dev); drm_minor_unregister(dev, DRM_MINOR_ACCEL); @@ -990,9 +985,6 @@ EXPORT_SYMBOL(drm_dev_register); */ void drm_dev_unregister(struct drm_device *dev) { - if (drm_core_check_feature(dev, DRIVER_LEGACY)) - drm_lastclose(dev); - dev->registered = false; drm_client_dev_unregister(dev); @@ -1003,9 +995,6 @@ void drm_dev_unregister(struct drm_device *dev) if (dev->driver->unload) dev->driver->unload(dev); - drm_legacy_pci_agp_destroy(dev); - drm_legacy_rmmaps(dev); - remove_compat_control_link(dev); drm_minor_unregister(dev, DRM_MINOR_ACCEL); drm_minor_unregister(dev, DRM_MINOR_PRIMARY); diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 3b4065099872..69c68804023f 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -41,10 +41,12 @@ #include <drm/drm_displayid.h> #include <drm/drm_drv.h> #include <drm/drm_edid.h> +#include <drm/drm_eld.h> #include <drm/drm_encoder.h> #include <drm/drm_print.h> #include "drm_crtc_internal.h" +#include "drm_internal.h" static int oui(u8 first, u8 second, u8 third) { @@ -5511,6 +5513,27 @@ static void clear_eld(struct drm_connector *connector) } /* + * Get 3-byte SAD buffer from struct cea_sad. + */ +void drm_edid_cta_sad_get(const struct cea_sad *cta_sad, u8 *sad) +{ + sad[0] = cta_sad->format << 3 | cta_sad->channels; + sad[1] = cta_sad->freq; + sad[2] = cta_sad->byte2; +} + +/* + * Set struct cea_sad from 3-byte SAD buffer. + */ +void drm_edid_cta_sad_set(struct cea_sad *cta_sad, const u8 *sad) +{ + cta_sad->format = (sad[0] & 0x78) >> 3; + cta_sad->channels = sad[0] & 0x07; + cta_sad->freq = sad[1] & 0x7f; + cta_sad->byte2 = sad[2]; +} + +/* * drm_edid_to_eld - build ELD from EDID * @connector: connector corresponding to the HDMI/DP sink * @drm_edid: EDID to parse @@ -5594,7 +5617,7 @@ static void drm_edid_to_eld(struct drm_connector *connector, } static int _drm_edid_to_sad(const struct drm_edid *drm_edid, - struct cea_sad **sads) + struct cea_sad **psads) { const struct cea_db *db; struct cea_db_iter iter; @@ -5603,20 +5626,16 @@ static int _drm_edid_to_sad(const struct drm_edid *drm_edid, cea_db_iter_edid_begin(drm_edid, &iter); cea_db_iter_for_each(db, &iter) { if (cea_db_tag(db) == CTA_DB_AUDIO) { - int j; + struct cea_sad *sads; + int i; count = cea_db_payload_len(db) / 3; /* SAD is 3B */ - *sads = kcalloc(count, sizeof(**sads), GFP_KERNEL); - if (!*sads) + sads = kcalloc(count, sizeof(*sads), GFP_KERNEL); + *psads = sads; + if (!sads) return -ENOMEM; - for (j = 0; j < count; j++) { - const u8 *sad = &db->data[j * 3]; - - (*sads)[j].format = (sad[0] & 0x78) >> 3; - (*sads)[j].channels = sad[0] & 0x7; - (*sads)[j].freq = sad[1] & 0x7F; - (*sads)[j].byte2 = sad[2]; - } + for (i = 0; i < count; i++) + drm_edid_cta_sad_set(&sads[i], &db->data[i * 3]); break; } } diff --git a/drivers/gpu/drm/drm_edid_load.c b/drivers/gpu/drm/drm_edid_load.c index 5d9ef267ebb3..60fcb80bce61 100644 --- a/drivers/gpu/drm/drm_edid_load.c +++ b/drivers/gpu/drm/drm_edid_load.c @@ -23,22 +23,6 @@ module_param_string(edid_firmware, edid_firmware, sizeof(edid_firmware), 0644); MODULE_PARM_DESC(edid_firmware, "Do not probe monitor, use specified EDID blob " "from built-in data or /lib/firmware instead. "); -/* Use only for backward compatibility with drm_kms_helper.edid_firmware */ -int __drm_set_edid_firmware_path(const char *path) -{ - scnprintf(edid_firmware, sizeof(edid_firmware), "%s", path); - - return 0; -} -EXPORT_SYMBOL(__drm_set_edid_firmware_path); - -/* Use only for backward compatibility with drm_kms_helper.edid_firmware */ -int __drm_get_edid_firmware_path(char *buf, size_t bufsize) -{ - return scnprintf(buf, bufsize, "%s", edid_firmware); -} -EXPORT_SYMBOL(__drm_get_edid_firmware_path); - #define GENERIC_EDIDS 6 static const char * const generic_edid_name[GENERIC_EDIDS] = { "edid/800x600.bin", diff --git a/drivers/gpu/drm/drm_eld.c b/drivers/gpu/drm/drm_eld.c new file mode 100644 index 000000000000..5177991aa272 --- /dev/null +++ b/drivers/gpu/drm/drm_eld.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <drm/drm_edid.h> +#include <drm/drm_eld.h> + +#include "drm_internal.h" + +/** + * drm_eld_sad_get - get SAD from ELD to struct cea_sad + * @eld: ELD buffer + * @sad_index: SAD index + * @cta_sad: destination struct cea_sad + * + * @return: 0 on success, or negative on errors + */ +int drm_eld_sad_get(const u8 *eld, int sad_index, struct cea_sad *cta_sad) +{ + const u8 *sad; + + if (sad_index >= drm_eld_sad_count(eld)) + return -EINVAL; + + sad = eld + DRM_ELD_CEA_SAD(drm_eld_mnl(eld), sad_index); + + drm_edid_cta_sad_set(cta_sad, sad); + + return 0; +} +EXPORT_SYMBOL(drm_eld_sad_get); + +/** + * drm_eld_sad_set - set SAD to ELD from struct cea_sad + * @eld: ELD buffer + * @sad_index: SAD index + * @cta_sad: source struct cea_sad + * + * @return: 0 on success, or negative on errors + */ +int drm_eld_sad_set(u8 *eld, int sad_index, const struct cea_sad *cta_sad) +{ + u8 *sad; + + if (sad_index >= drm_eld_sad_count(eld)) + return -EINVAL; + + sad = eld + DRM_ELD_CEA_SAD(drm_eld_mnl(eld), sad_index); + + drm_edid_cta_sad_get(cta_sad, sad); + + return 0; +} +EXPORT_SYMBOL(drm_eld_sad_set); diff --git a/drivers/gpu/drm/drm_encoder.c b/drivers/gpu/drm/drm_encoder.c index 1143bc7f3252..8f2bc6a28482 100644 --- a/drivers/gpu/drm/drm_encoder.c +++ b/drivers/gpu/drm/drm_encoder.c @@ -30,6 +30,7 @@ #include <drm/drm_print.h> #include "drm_crtc_internal.h" +#include "drm_internal.h" /** * DOC: overview @@ -74,6 +75,8 @@ int drm_encoder_register_all(struct drm_device *dev) int ret = 0; drm_for_each_encoder(encoder, dev) { + drm_debugfs_encoder_add(encoder); + if (encoder->funcs && encoder->funcs->late_register) ret = encoder->funcs->late_register(encoder); if (ret) @@ -90,6 +93,7 @@ void drm_encoder_unregister_all(struct drm_device *dev) drm_for_each_encoder(encoder, dev) { if (encoder->funcs && encoder->funcs->early_unregister) encoder->funcs->early_unregister(encoder); + drm_debugfs_encoder_remove(encoder); } } diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c index 5d2809de4517..48ee851b61d9 100644 --- a/drivers/gpu/drm/drm_exec.c +++ b/drivers/gpu/drm/drm_exec.c @@ -69,16 +69,23 @@ static void drm_exec_unlock_all(struct drm_exec *exec) * drm_exec_init - initialize a drm_exec object * @exec: the drm_exec object to initialize * @flags: controls locking behavior, see DRM_EXEC_* defines + * @nr: the initial # of objects * * Initialize the object and make sure that we can track locked objects. + * + * If nr is non-zero then it is used as the initial objects table size. + * In either case, the table will grow (be re-allocated) on demand. */ -void drm_exec_init(struct drm_exec *exec, uint32_t flags) +void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr) { + if (!nr) + nr = PAGE_SIZE / sizeof(void *); + exec->flags = flags; - exec->objects = kmalloc(PAGE_SIZE, GFP_KERNEL); + exec->objects = kvmalloc_array(nr, sizeof(void *), GFP_KERNEL); /* If allocation here fails, just delay that till the first use */ - exec->max_objects = exec->objects ? PAGE_SIZE / sizeof(void *) : 0; + exec->max_objects = exec->objects ? nr : 0; exec->num_objects = 0; exec->contended = DRM_EXEC_DUMMY; exec->prelocked = NULL; diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index 446458aca8e9..8c87287c3e16 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -47,7 +47,6 @@ #include "drm_crtc_internal.h" #include "drm_internal.h" -#include "drm_legacy.h" /* from BKL pushdown */ DEFINE_MUTEX(drm_global_mutex); @@ -55,14 +54,6 @@ DEFINE_MUTEX(drm_global_mutex); bool drm_dev_needs_global_mutex(struct drm_device *dev) { /* - * Legacy drivers rely on all kinds of BKL locking semantics, don't - * bother. They also still need BKL locking for their ioctls, so better - * safe than sorry. - */ - if (drm_core_check_feature(dev, DRIVER_LEGACY)) - return true; - - /* * The deprecated ->load callback must be called after the driver is * already registered. This means such drivers rely on the BKL to make * sure an open can't proceed until the driver is actually fully set up. @@ -107,9 +98,7 @@ bool drm_dev_needs_global_mutex(struct drm_device *dev) * drm_send_event() as the main starting points. * * The memory mapping implementation will vary depending on how the driver - * manages memory. Legacy drivers will use the deprecated drm_legacy_mmap() - * function, modern drivers should use one of the provided memory-manager - * specific implementations. For GEM-based drivers this is drm_gem_mmap(). + * manages memory. For GEM-based drivers this is drm_gem_mmap(). * * No other file operations are supported by the DRM userspace API. Overall the * following is an example &file_operations structure:: @@ -254,18 +243,6 @@ void drm_file_free(struct drm_file *file) (long)old_encode_dev(file->minor->kdev->devt), atomic_read(&dev->open_count)); -#ifdef CONFIG_DRM_LEGACY - if (drm_core_check_feature(dev, DRIVER_LEGACY) && - dev->driver->preclose) - dev->driver->preclose(dev, file); -#endif - - if (drm_core_check_feature(dev, DRIVER_LEGACY)) - drm_legacy_lock_release(dev, file->filp); - - if (drm_core_check_feature(dev, DRIVER_HAVE_DMA)) - drm_legacy_reclaim_buffers(dev, file); - drm_events_release(file); if (drm_core_check_feature(dev, DRIVER_MODESET)) { @@ -279,8 +256,6 @@ void drm_file_free(struct drm_file *file) if (drm_core_check_feature(dev, DRIVER_GEM)) drm_gem_release(dev, file); - drm_legacy_ctxbitmap_flush(dev, file); - if (drm_is_primary_client(file)) drm_master_release(file); @@ -367,29 +342,6 @@ int drm_open_helper(struct file *filp, struct drm_minor *minor) list_add(&priv->lhead, &dev->filelist); mutex_unlock(&dev->filelist_mutex); -#ifdef CONFIG_DRM_LEGACY -#ifdef __alpha__ - /* - * Default the hose - */ - if (!dev->hose) { - struct pci_dev *pci_dev; - - pci_dev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, NULL); - if (pci_dev) { - dev->hose = pci_dev->sysdata; - pci_dev_put(pci_dev); - } - if (!dev->hose) { - struct pci_bus *b = list_entry(pci_root_buses.next, - struct pci_bus, node); - if (b) - dev->hose = b->sysdata; - } - } -#endif -#endif - return 0; } @@ -411,7 +363,6 @@ int drm_open(struct inode *inode, struct file *filp) struct drm_device *dev; struct drm_minor *minor; int retcode; - int need_setup = 0; minor = drm_minor_acquire(iminor(inode)); if (IS_ERR(minor)) @@ -421,8 +372,7 @@ int drm_open(struct inode *inode, struct file *filp) if (drm_dev_needs_global_mutex(dev)) mutex_lock(&drm_global_mutex); - if (!atomic_fetch_inc(&dev->open_count)) - need_setup = 1; + atomic_fetch_inc(&dev->open_count); /* share address_space across all char-devs of a single device */ filp->f_mapping = dev->anon_inode->i_mapping; @@ -430,13 +380,6 @@ int drm_open(struct inode *inode, struct file *filp) retcode = drm_open_helper(filp, minor); if (retcode) goto err_undo; - if (need_setup) { - retcode = drm_legacy_setup(dev); - if (retcode) { - drm_close_helper(filp); - goto err_undo; - } - } if (drm_dev_needs_global_mutex(dev)) mutex_unlock(&drm_global_mutex); @@ -460,9 +403,6 @@ void drm_lastclose(struct drm_device * dev) dev->driver->lastclose(dev); drm_dbg_core(dev, "driver lastclose completed\n"); - if (drm_core_check_feature(dev, DRIVER_LEGACY)) - drm_legacy_dev_reinit(dev); - drm_client_dev_restore(dev); } @@ -913,7 +853,7 @@ static void print_size(struct drm_printer *p, const char *stat, unsigned u; for (u = 0; u < ARRAY_SIZE(units) - 1; u++) { - if (sz < SZ_1K) + if (sz == 0 || !IS_ALIGNED(sz, SZ_1K)) break; sz = div_u64(sz, SZ_1K); } @@ -958,7 +898,7 @@ void drm_show_memory_stats(struct drm_printer *p, struct drm_file *file) { struct drm_gem_object *obj; struct drm_memory_stats status = {}; - enum drm_gem_object_status supported_status; + enum drm_gem_object_status supported_status = 0; int id; spin_lock(&file->table_lock); diff --git a/drivers/gpu/drm/drm_flip_work.c b/drivers/gpu/drm/drm_flip_work.c index 060b753881a2..8c6090a90d56 100644 --- a/drivers/gpu/drm/drm_flip_work.c +++ b/drivers/gpu/drm/drm_flip_work.c @@ -27,14 +27,12 @@ #include <drm/drm_print.h> #include <drm/drm_util.h> -/** - * drm_flip_work_allocate_task - allocate a flip-work task - * @data: data associated to the task - * @flags: allocator flags - * - * Allocate a drm_flip_task object and attach private data to it. - */ -struct drm_flip_task *drm_flip_work_allocate_task(void *data, gfp_t flags) +struct drm_flip_task { + struct list_head node; + void *data; +}; + +static struct drm_flip_task *drm_flip_work_allocate_task(void *data, gfp_t flags) { struct drm_flip_task *task; @@ -44,18 +42,8 @@ struct drm_flip_task *drm_flip_work_allocate_task(void *data, gfp_t flags) return task; } -EXPORT_SYMBOL(drm_flip_work_allocate_task); -/** - * drm_flip_work_queue_task - queue a specific task - * @work: the flip-work - * @task: the task to handle - * - * Queues task, that will later be run (passed back to drm_flip_func_t - * func) on a work queue after drm_flip_work_commit() is called. - */ -void drm_flip_work_queue_task(struct drm_flip_work *work, - struct drm_flip_task *task) +static void drm_flip_work_queue_task(struct drm_flip_work *work, struct drm_flip_task *task) { unsigned long flags; @@ -63,7 +51,6 @@ void drm_flip_work_queue_task(struct drm_flip_work *work, list_add_tail(&task->node, &work->queued); spin_unlock_irqrestore(&work->lock, flags); } -EXPORT_SYMBOL(drm_flip_work_queue_task); /** * drm_flip_work_queue - queue work diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c index f93a4efcee90..b1be458ed4dd 100644 --- a/drivers/gpu/drm/drm_format_helper.c +++ b/drivers/gpu/drm/drm_format_helper.c @@ -20,6 +20,97 @@ #include <drm/drm_print.h> #include <drm/drm_rect.h> +/** + * drm_format_conv_state_init - Initialize format-conversion state + * @state: The state to initialize + * + * Clears all fields in struct drm_format_conv_state. The state will + * be empty with no preallocated resources. + */ +void drm_format_conv_state_init(struct drm_format_conv_state *state) +{ + state->tmp.mem = NULL; + state->tmp.size = 0; + state->tmp.preallocated = false; +} +EXPORT_SYMBOL(drm_format_conv_state_init); + +/** + * drm_format_conv_state_copy - Copy format-conversion state + * @state: Destination state + * @old_state: Source state + * + * Copies format-conversion state from @old_state to @state; except for + * temporary storage. + */ +void drm_format_conv_state_copy(struct drm_format_conv_state *state, + const struct drm_format_conv_state *old_state) +{ + /* + * So far, there's only temporary storage here, which we don't + * duplicate. Just clear the fields. + */ + state->tmp.mem = NULL; + state->tmp.size = 0; + state->tmp.preallocated = false; +} +EXPORT_SYMBOL(drm_format_conv_state_copy); + +/** + * drm_format_conv_state_reserve - Allocates storage for format conversion + * @state: The format-conversion state + * @new_size: The minimum allocation size + * @flags: Flags for kmalloc() + * + * Allocates at least @new_size bytes and returns a pointer to the memory + * range. After calling this function, previously returned memory blocks + * are invalid. It's best to collect all memory requirements of a format + * conversion and call this function once to allocate the range. + * + * Returns: + * A pointer to the allocated memory range, or NULL otherwise. + */ +void *drm_format_conv_state_reserve(struct drm_format_conv_state *state, + size_t new_size, gfp_t flags) +{ + void *mem; + + if (new_size <= state->tmp.size) + goto out; + else if (state->tmp.preallocated) + return NULL; + + mem = krealloc(state->tmp.mem, new_size, flags); + if (!mem) + return NULL; + + state->tmp.mem = mem; + state->tmp.size = new_size; + +out: + return state->tmp.mem; +} +EXPORT_SYMBOL(drm_format_conv_state_reserve); + +/** + * drm_format_conv_state_release - Releases an format-conversion storage + * @state: The format-conversion state + * + * Releases the memory range references by the format-conversion state. + * After this call, all pointers to the memory are invalid. Prefer + * drm_format_conv_state_init() for cleaning up and unloading a driver. + */ +void drm_format_conv_state_release(struct drm_format_conv_state *state) +{ + if (state->tmp.preallocated) + return; + + kfree(state->tmp.mem); + state->tmp.mem = NULL; + state->tmp.size = 0; +} +EXPORT_SYMBOL(drm_format_conv_state_release); + static unsigned int clip_offset(const struct drm_rect *clip, unsigned int pitch, unsigned int cpp) { return clip->y1 * pitch + clip->x1 * cpp; @@ -45,6 +136,7 @@ EXPORT_SYMBOL(drm_fb_clip_offset); static int __drm_fb_xfrm(void *dst, unsigned long dst_pitch, unsigned long dst_pixsize, const void *vaddr, const struct drm_framebuffer *fb, const struct drm_rect *clip, bool vaddr_cached_hint, + struct drm_format_conv_state *state, void (*xfrm_line)(void *dbuf, const void *sbuf, unsigned int npixels)) { unsigned long linepixels = drm_rect_width(clip); @@ -60,7 +152,7 @@ static int __drm_fb_xfrm(void *dst, unsigned long dst_pitch, unsigned long dst_p * one line at a time. */ if (!vaddr_cached_hint) { - stmp = kmalloc(sbuf_len, GFP_KERNEL); + stmp = drm_format_conv_state_reserve(state, sbuf_len, GFP_KERNEL); if (!stmp) return -ENOMEM; } @@ -79,8 +171,6 @@ static int __drm_fb_xfrm(void *dst, unsigned long dst_pitch, unsigned long dst_p dst += dst_pitch; } - kfree(stmp); - return 0; } @@ -88,6 +178,7 @@ static int __drm_fb_xfrm(void *dst, unsigned long dst_pitch, unsigned long dst_p static int __drm_fb_xfrm_toio(void __iomem *dst, unsigned long dst_pitch, unsigned long dst_pixsize, const void *vaddr, const struct drm_framebuffer *fb, const struct drm_rect *clip, bool vaddr_cached_hint, + struct drm_format_conv_state *state, void (*xfrm_line)(void *dbuf, const void *sbuf, unsigned int npixels)) { unsigned long linepixels = drm_rect_width(clip); @@ -101,9 +192,9 @@ static int __drm_fb_xfrm_toio(void __iomem *dst, unsigned long dst_pitch, unsign void *dbuf; if (vaddr_cached_hint) { - dbuf = kmalloc(dbuf_len, GFP_KERNEL); + dbuf = drm_format_conv_state_reserve(state, dbuf_len, GFP_KERNEL); } else { - dbuf = kmalloc(stmp_off + sbuf_len, GFP_KERNEL); + dbuf = drm_format_conv_state_reserve(state, stmp_off + sbuf_len, GFP_KERNEL); stmp = dbuf + stmp_off; } if (!dbuf) @@ -124,8 +215,6 @@ static int __drm_fb_xfrm_toio(void __iomem *dst, unsigned long dst_pitch, unsign dst += dst_pitch; } - kfree(dbuf); - return 0; } @@ -134,6 +223,7 @@ static int drm_fb_xfrm(struct iosys_map *dst, const unsigned int *dst_pitch, const u8 *dst_pixsize, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, bool vaddr_cached_hint, + struct drm_format_conv_state *state, void (*xfrm_line)(void *dbuf, const void *sbuf, unsigned int npixels)) { static const unsigned int default_dst_pitch[DRM_FORMAT_MAX_PLANES] = { @@ -146,10 +236,12 @@ static int drm_fb_xfrm(struct iosys_map *dst, /* TODO: handle src in I/O memory here */ if (dst[0].is_iomem) return __drm_fb_xfrm_toio(dst[0].vaddr_iomem, dst_pitch[0], dst_pixsize[0], - src[0].vaddr, fb, clip, vaddr_cached_hint, xfrm_line); + src[0].vaddr, fb, clip, vaddr_cached_hint, state, + xfrm_line); else return __drm_fb_xfrm(dst[0].vaddr, dst_pitch[0], dst_pixsize[0], - src[0].vaddr, fb, clip, vaddr_cached_hint, xfrm_line); + src[0].vaddr, fb, clip, vaddr_cached_hint, state, + xfrm_line); } /** @@ -235,6 +327,7 @@ static void drm_fb_swab32_line(void *dbuf, const void *sbuf, unsigned int pixels * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @cached: Source buffer is mapped cached (eg. not write-combined) + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and swaps per-pixel * bytes during the process. Destination and framebuffer formats must match. The @@ -249,7 +342,8 @@ static void drm_fb_swab32_line(void *dbuf, const void *sbuf, unsigned int pixels */ void drm_fb_swab(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip, bool cached) + const struct drm_rect *clip, bool cached, + struct drm_format_conv_state *state) { const struct drm_format_info *format = fb->format; u8 cpp = DIV_ROUND_UP(drm_format_info_bpp(format, 0), 8); @@ -268,7 +362,7 @@ void drm_fb_swab(struct iosys_map *dst, const unsigned int *dst_pitch, return; } - drm_fb_xfrm(dst, dst_pitch, &cpp, src, fb, clip, cached, swab_line); + drm_fb_xfrm(dst, dst_pitch, &cpp, src, fb, clip, cached, state, swab_line); } EXPORT_SYMBOL(drm_fb_swab); @@ -295,6 +389,7 @@ static void drm_fb_xrgb8888_to_rgb332_line(void *dbuf, const void *sbuf, unsigne * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The @@ -309,13 +404,13 @@ static void drm_fb_xrgb8888_to_rgb332_line(void *dbuf, const void *sbuf, unsigne */ void drm_fb_xrgb8888_to_rgb332(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 1, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_rgb332_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb332); @@ -364,6 +459,7 @@ static void drm_fb_xrgb8888_to_rgb565_swab_line(void *dbuf, const void *sbuf, * @src: Array of XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * @swab: Swap bytes * * This function copies parts of a framebuffer to display memory and converts the @@ -379,7 +475,8 @@ static void drm_fb_xrgb8888_to_rgb565_swab_line(void *dbuf, const void *sbuf, */ void drm_fb_xrgb8888_to_rgb565(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip, bool swab) + const struct drm_rect *clip, struct drm_format_conv_state *state, + bool swab) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 2, @@ -392,7 +489,7 @@ void drm_fb_xrgb8888_to_rgb565(struct iosys_map *dst, const unsigned int *dst_pi else xfrm_line = drm_fb_xrgb8888_to_rgb565_line; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, xfrm_line); + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, xfrm_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb565); @@ -421,6 +518,7 @@ static void drm_fb_xrgb8888_to_xrgb1555_line(void *dbuf, const void *sbuf, unsig * @src: Array of XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts * the color format during the process. The parameters @dst, @dst_pitch and @@ -436,13 +534,13 @@ static void drm_fb_xrgb8888_to_xrgb1555_line(void *dbuf, const void *sbuf, unsig */ void drm_fb_xrgb8888_to_xrgb1555(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 2, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_xrgb1555_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_xrgb1555); @@ -473,6 +571,7 @@ static void drm_fb_xrgb8888_to_argb1555_line(void *dbuf, const void *sbuf, unsig * @src: Array of XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts * the color format during the process. The parameters @dst, @dst_pitch and @@ -488,13 +587,13 @@ static void drm_fb_xrgb8888_to_argb1555_line(void *dbuf, const void *sbuf, unsig */ void drm_fb_xrgb8888_to_argb1555(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 2, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_argb1555_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_argb1555); @@ -525,6 +624,7 @@ static void drm_fb_xrgb8888_to_rgba5551_line(void *dbuf, const void *sbuf, unsig * @src: Array of XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts * the color format during the process. The parameters @dst, @dst_pitch and @@ -540,13 +640,13 @@ static void drm_fb_xrgb8888_to_rgba5551_line(void *dbuf, const void *sbuf, unsig */ void drm_fb_xrgb8888_to_rgba5551(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 2, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_rgba5551_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgba5551); @@ -575,6 +675,7 @@ static void drm_fb_xrgb8888_to_rgb888_line(void *dbuf, const void *sbuf, unsigne * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The @@ -590,13 +691,13 @@ static void drm_fb_xrgb8888_to_rgb888_line(void *dbuf, const void *sbuf, unsigne */ void drm_fb_xrgb8888_to_rgb888(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 3, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_rgb888_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb888); @@ -623,6 +724,7 @@ static void drm_fb_xrgb8888_to_argb8888_line(void *dbuf, const void *sbuf, unsig * @src: Array of XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. The parameters @dst, @dst_pitch and @src refer @@ -638,13 +740,13 @@ static void drm_fb_xrgb8888_to_argb8888_line(void *dbuf, const void *sbuf, unsig */ void drm_fb_xrgb8888_to_argb8888(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 4, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_argb8888_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_argb8888); @@ -669,13 +771,14 @@ static void drm_fb_xrgb8888_to_abgr8888_line(void *dbuf, const void *sbuf, unsig static void drm_fb_xrgb8888_to_abgr8888(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, + struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 4, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_abgr8888_line); } @@ -699,13 +802,14 @@ static void drm_fb_xrgb8888_to_xbgr8888_line(void *dbuf, const void *sbuf, unsig static void drm_fb_xrgb8888_to_xbgr8888(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, + struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 4, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_xbgr8888_line); } @@ -735,6 +839,7 @@ static void drm_fb_xrgb8888_to_xrgb2101010_line(void *dbuf, const void *sbuf, un * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The @@ -750,13 +855,14 @@ static void drm_fb_xrgb8888_to_xrgb2101010_line(void *dbuf, const void *sbuf, un */ void drm_fb_xrgb8888_to_xrgb2101010(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, + struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 4, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_xrgb2101010_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_xrgb2101010); @@ -788,6 +894,7 @@ static void drm_fb_xrgb8888_to_argb2101010_line(void *dbuf, const void *sbuf, un * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts * the color format during the process. The parameters @dst, @dst_pitch and @@ -803,13 +910,14 @@ static void drm_fb_xrgb8888_to_argb2101010_line(void *dbuf, const void *sbuf, un */ void drm_fb_xrgb8888_to_argb2101010(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, + struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 4, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_argb2101010_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_argb2101010); @@ -839,6 +947,7 @@ static void drm_fb_xrgb8888_to_gray8_line(void *dbuf, const void *sbuf, unsigned * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The @@ -858,13 +967,13 @@ static void drm_fb_xrgb8888_to_gray8_line(void *dbuf, const void *sbuf, unsigned */ void drm_fb_xrgb8888_to_gray8(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 1, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_gray8_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_gray8); @@ -878,6 +987,7 @@ EXPORT_SYMBOL(drm_fb_xrgb8888_to_gray8); * @src: The framebuffer memory to copy from * @fb: The framebuffer to copy from * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory. If the * formats of the display and the framebuffer mismatch, the blit function @@ -896,7 +1006,7 @@ EXPORT_SYMBOL(drm_fb_xrgb8888_to_gray8); */ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t dst_format, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, struct drm_format_conv_state *state) { uint32_t fb_format = fb->format->format; @@ -904,44 +1014,44 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t d drm_fb_memcpy(dst, dst_pitch, src, fb, clip); return 0; } else if (fb_format == (dst_format | DRM_FORMAT_BIG_ENDIAN)) { - drm_fb_swab(dst, dst_pitch, src, fb, clip, false); + drm_fb_swab(dst, dst_pitch, src, fb, clip, false, state); return 0; } else if (fb_format == (dst_format & ~DRM_FORMAT_BIG_ENDIAN)) { - drm_fb_swab(dst, dst_pitch, src, fb, clip, false); + drm_fb_swab(dst, dst_pitch, src, fb, clip, false, state); return 0; } else if (fb_format == DRM_FORMAT_XRGB8888) { if (dst_format == DRM_FORMAT_RGB565) { - drm_fb_xrgb8888_to_rgb565(dst, dst_pitch, src, fb, clip, false); + drm_fb_xrgb8888_to_rgb565(dst, dst_pitch, src, fb, clip, state, false); return 0; } else if (dst_format == DRM_FORMAT_XRGB1555) { - drm_fb_xrgb8888_to_xrgb1555(dst, dst_pitch, src, fb, clip); + drm_fb_xrgb8888_to_xrgb1555(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_ARGB1555) { - drm_fb_xrgb8888_to_argb1555(dst, dst_pitch, src, fb, clip); + drm_fb_xrgb8888_to_argb1555(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_RGBA5551) { - drm_fb_xrgb8888_to_rgba5551(dst, dst_pitch, src, fb, clip); + drm_fb_xrgb8888_to_rgba5551(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_RGB888) { - drm_fb_xrgb8888_to_rgb888(dst, dst_pitch, src, fb, clip); + drm_fb_xrgb8888_to_rgb888(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_ARGB8888) { - drm_fb_xrgb8888_to_argb8888(dst, dst_pitch, src, fb, clip); + drm_fb_xrgb8888_to_argb8888(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_XBGR8888) { - drm_fb_xrgb8888_to_xbgr8888(dst, dst_pitch, src, fb, clip); + drm_fb_xrgb8888_to_xbgr8888(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_ABGR8888) { - drm_fb_xrgb8888_to_abgr8888(dst, dst_pitch, src, fb, clip); + drm_fb_xrgb8888_to_abgr8888(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_XRGB2101010) { - drm_fb_xrgb8888_to_xrgb2101010(dst, dst_pitch, src, fb, clip); + drm_fb_xrgb8888_to_xrgb2101010(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_ARGB2101010) { - drm_fb_xrgb8888_to_argb2101010(dst, dst_pitch, src, fb, clip); + drm_fb_xrgb8888_to_argb2101010(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_BGRX8888) { - drm_fb_swab(dst, dst_pitch, src, fb, clip, false); + drm_fb_swab(dst, dst_pitch, src, fb, clip, false, state); return 0; } } @@ -978,6 +1088,7 @@ static void drm_fb_gray8_to_mono_line(void *dbuf, const void *sbuf, unsigned int * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The @@ -1002,7 +1113,7 @@ static void drm_fb_gray8_to_mono_line(void *dbuf, const void *sbuf, unsigned int */ void drm_fb_xrgb8888_to_mono(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, struct drm_format_conv_state *state) { static const unsigned int default_dst_pitch[DRM_FORMAT_MAX_PLANES] = { 0, 0, 0, 0 @@ -1042,7 +1153,7 @@ void drm_fb_xrgb8888_to_mono(struct iosys_map *dst, const unsigned int *dst_pitc * Allocate a buffer to be used for both copying from the cma * memory and to store the intermediate grayscale line pixels. */ - src32 = kmalloc(len_src32 + linepixels, GFP_KERNEL); + src32 = drm_format_conv_state_reserve(state, len_src32 + linepixels, GFP_KERNEL); if (!src32) return; @@ -1056,8 +1167,6 @@ void drm_fb_xrgb8888_to_mono(struct iosys_map *dst, const unsigned int *dst_pitc vaddr += fb->pitches[0]; mono += dst_pitch_0; } - - kfree(src32); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_mono); diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index d3ba0698b84b..888aadb6a4ac 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -394,6 +394,31 @@ static void drm_mode_rmfb_work_fn(struct work_struct *w) } } +static int drm_mode_closefb(struct drm_framebuffer *fb, + struct drm_file *file_priv) +{ + struct drm_framebuffer *fbl; + bool found = false; + + mutex_lock(&file_priv->fbs_lock); + list_for_each_entry(fbl, &file_priv->fbs, filp_head) + if (fb == fbl) + found = true; + + if (!found) { + mutex_unlock(&file_priv->fbs_lock); + return -ENOENT; + } + + list_del_init(&fb->filp_head); + mutex_unlock(&file_priv->fbs_lock); + + /* Drop the reference that was stored in the fbs list */ + drm_framebuffer_put(fb); + + return 0; +} + /** * drm_mode_rmfb - remove an FB from the configuration * @dev: drm device @@ -410,9 +435,8 @@ static void drm_mode_rmfb_work_fn(struct work_struct *w) int drm_mode_rmfb(struct drm_device *dev, u32 fb_id, struct drm_file *file_priv) { - struct drm_framebuffer *fb = NULL; - struct drm_framebuffer *fbl = NULL; - int found = 0; + struct drm_framebuffer *fb; + int ret; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; @@ -421,24 +445,13 @@ int drm_mode_rmfb(struct drm_device *dev, u32 fb_id, if (!fb) return -ENOENT; - mutex_lock(&file_priv->fbs_lock); - list_for_each_entry(fbl, &file_priv->fbs, filp_head) - if (fb == fbl) - found = 1; - if (!found) { - mutex_unlock(&file_priv->fbs_lock); - goto fail_unref; + ret = drm_mode_closefb(fb, file_priv); + if (ret != 0) { + drm_framebuffer_put(fb); + return ret; } - list_del_init(&fb->filp_head); - mutex_unlock(&file_priv->fbs_lock); - - /* drop the reference we picked up in framebuffer lookup */ - drm_framebuffer_put(fb); - /* - * we now own the reference that was stored in the fbs list - * * drm_framebuffer_remove may fail with -EINTR on pending signals, * so run this in a separate stack as there's no way to correctly * handle this after the fb is already removed from the lookup table. @@ -448,6 +461,7 @@ int drm_mode_rmfb(struct drm_device *dev, u32 fb_id, INIT_WORK_ONSTACK(&arg.work, drm_mode_rmfb_work_fn); INIT_LIST_HEAD(&arg.fbs); + drm_WARN_ON(dev, !list_empty(&fb->filp_head)); list_add_tail(&fb->filp_head, &arg.fbs); schedule_work(&arg.work); @@ -457,10 +471,6 @@ int drm_mode_rmfb(struct drm_device *dev, u32 fb_id, drm_framebuffer_put(fb); return 0; - -fail_unref: - drm_framebuffer_put(fb); - return -ENOENT; } int drm_mode_rmfb_ioctl(struct drm_device *dev, @@ -471,6 +481,28 @@ int drm_mode_rmfb_ioctl(struct drm_device *dev, return drm_mode_rmfb(dev, *fb_id, file_priv); } +int drm_mode_closefb_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv) +{ + struct drm_mode_closefb *r = data; + struct drm_framebuffer *fb; + int ret; + + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EOPNOTSUPP; + + if (r->pad) + return -EINVAL; + + fb = drm_framebuffer_lookup(dev, file_priv, r->fb_id); + if (!fb) + return -ENOENT; + + ret = drm_mode_closefb(fb, file_priv); + drm_framebuffer_put(fb); + return ret; +} + /** * drm_mode_getfb - get FB info * @dev: drm device for the ioctl @@ -552,7 +584,7 @@ int drm_mode_getfb2_ioctl(struct drm_device *dev, struct drm_mode_fb_cmd2 *r = data; struct drm_framebuffer *fb; unsigned int i; - int ret; + int ret = 0; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EINVAL; @@ -796,6 +828,8 @@ void drm_framebuffer_free(struct kref *kref) container_of(kref, struct drm_framebuffer, base.refcount); struct drm_device *dev = fb->dev; + drm_WARN_ON(dev, !list_empty(&fb->filp_head)); + /* * The lookup idr holds a weak reference, which has not necessarily been * removed at this point. Check for that. @@ -1088,7 +1122,7 @@ void drm_framebuffer_remove(struct drm_framebuffer *fb) dev = fb->dev; - WARN_ON(!list_empty(&fb->filp_head)); + drm_WARN_ON(dev, !list_empty(&fb->filp_head)); /* * drm ABI mandates that we remove any deleted framebuffers from active diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index 5d4b9cd077f7..e440f458b663 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -218,7 +218,14 @@ void __drm_gem_duplicate_shadow_plane_state(struct drm_plane *plane, struct drm_shadow_plane_state *new_shadow_plane_state) { + struct drm_plane_state *plane_state = plane->state; + struct drm_shadow_plane_state *shadow_plane_state = + to_drm_shadow_plane_state(plane_state); + __drm_atomic_helper_plane_duplicate_state(plane, &new_shadow_plane_state->base); + + drm_format_conv_state_copy(&shadow_plane_state->fmtcnv_state, + &new_shadow_plane_state->fmtcnv_state); } EXPORT_SYMBOL(__drm_gem_duplicate_shadow_plane_state); @@ -266,6 +273,7 @@ EXPORT_SYMBOL(drm_gem_duplicate_shadow_plane_state); */ void __drm_gem_destroy_shadow_plane_state(struct drm_shadow_plane_state *shadow_plane_state) { + drm_format_conv_state_release(&shadow_plane_state->fmtcnv_state); __drm_atomic_helper_plane_destroy_state(&shadow_plane_state->base); } EXPORT_SYMBOL(__drm_gem_destroy_shadow_plane_state); @@ -302,6 +310,7 @@ void __drm_gem_reset_shadow_plane(struct drm_plane *plane, struct drm_shadow_plane_state *shadow_plane_state) { __drm_atomic_helper_plane_reset(plane, &shadow_plane_state->base); + drm_format_conv_state_init(&shadow_plane_state->fmtcnv_state); } EXPORT_SYMBOL(__drm_gem_reset_shadow_plane); diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index b80d4e1cc9b7..f9eb56f24bef 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -61,6 +61,42 @@ * contained within struct drm_gpuva already. Hence, for inserting &drm_gpuva * entries from within dma-fence signalling critical sections it is enough to * pre-allocate the &drm_gpuva structures. + * + * &drm_gem_objects which are private to a single VM can share a common + * &dma_resv in order to improve locking efficiency (e.g. with &drm_exec). + * For this purpose drivers must pass a &drm_gem_object to drm_gpuvm_init(), in + * the following called 'resv object', which serves as the container of the + * GPUVM's shared &dma_resv. This resv object can be a driver specific + * &drm_gem_object, such as the &drm_gem_object containing the root page table, + * but it can also be a 'dummy' object, which can be allocated with + * drm_gpuvm_resv_object_alloc(). + * + * In order to connect a struct drm_gpuva its backing &drm_gem_object each + * &drm_gem_object maintains a list of &drm_gpuvm_bo structures, and each + * &drm_gpuvm_bo contains a list of &drm_gpuva structures. + * + * A &drm_gpuvm_bo is an abstraction that represents a combination of a + * &drm_gpuvm and a &drm_gem_object. Every such combination should be unique. + * This is ensured by the API through drm_gpuvm_bo_obtain() and + * drm_gpuvm_bo_obtain_prealloc() which first look into the corresponding + * &drm_gem_object list of &drm_gpuvm_bos for an existing instance of this + * particular combination. If not existent a new instance is created and linked + * to the &drm_gem_object. + * + * &drm_gpuvm_bo structures, since unique for a given &drm_gpuvm, are also used + * as entry for the &drm_gpuvm's lists of external and evicted objects. Those + * lists are maintained in order to accelerate locking of dma-resv locks and + * validation of evicted objects bound in a &drm_gpuvm. For instance, all + * &drm_gem_object's &dma_resv of a given &drm_gpuvm can be locked by calling + * drm_gpuvm_exec_lock(). Once locked drivers can call drm_gpuvm_validate() in + * order to validate all evicted &drm_gem_objects. It is also possible to lock + * additional &drm_gem_objects by providing the corresponding parameters to + * drm_gpuvm_exec_lock() as well as open code the &drm_exec loop while making + * use of helper functions such as drm_gpuvm_prepare_range() or + * drm_gpuvm_prepare_objects(). + * + * Every bound &drm_gem_object is treated as external object when its &dma_resv + * structure is different than the &drm_gpuvm's common &dma_resv structure. */ /** @@ -386,21 +422,42 @@ /** * DOC: Locking * - * Generally, the GPU VA manager does not take care of locking itself, it is - * the drivers responsibility to take care about locking. Drivers might want to - * protect the following operations: inserting, removing and iterating - * &drm_gpuva objects as well as generating all kinds of operations, such as - * split / merge or prefetch. - * - * The GPU VA manager also does not take care of the locking of the backing - * &drm_gem_object buffers GPU VA lists by itself; drivers are responsible to - * enforce mutual exclusion using either the GEMs dma_resv lock or alternatively - * a driver specific external lock. For the latter see also - * drm_gem_gpuva_set_lock(). - * - * However, the GPU VA manager contains lockdep checks to ensure callers of its - * API hold the corresponding lock whenever the &drm_gem_objects GPU VA list is - * accessed by functions such as drm_gpuva_link() or drm_gpuva_unlink(). + * In terms of managing &drm_gpuva entries DRM GPUVM does not take care of + * locking itself, it is the drivers responsibility to take care about locking. + * Drivers might want to protect the following operations: inserting, removing + * and iterating &drm_gpuva objects as well as generating all kinds of + * operations, such as split / merge or prefetch. + * + * DRM GPUVM also does not take care of the locking of the backing + * &drm_gem_object buffers GPU VA lists and &drm_gpuvm_bo abstractions by + * itself; drivers are responsible to enforce mutual exclusion using either the + * GEMs dma_resv lock or alternatively a driver specific external lock. For the + * latter see also drm_gem_gpuva_set_lock(). + * + * However, DRM GPUVM contains lockdep checks to ensure callers of its API hold + * the corresponding lock whenever the &drm_gem_objects GPU VA list is accessed + * by functions such as drm_gpuva_link() or drm_gpuva_unlink(), but also + * drm_gpuvm_bo_obtain() and drm_gpuvm_bo_put(). + * + * The latter is required since on creation and destruction of a &drm_gpuvm_bo + * the &drm_gpuvm_bo is attached / removed from the &drm_gem_objects gpuva list. + * Subsequent calls to drm_gpuvm_bo_obtain() for the same &drm_gpuvm and + * &drm_gem_object must be able to observe previous creations and destructions + * of &drm_gpuvm_bos in order to keep instances unique. + * + * The &drm_gpuvm's lists for keeping track of external and evicted objects are + * protected against concurrent insertion / removal and iteration internally. + * + * However, drivers still need ensure to protect concurrent calls to functions + * iterating those lists, namely drm_gpuvm_prepare_objects() and + * drm_gpuvm_validate(). + * + * Alternatively, drivers can set the &DRM_GPUVM_RESV_PROTECTED flag to indicate + * that the corresponding &dma_resv locks are held in order to protect the + * lists. If &DRM_GPUVM_RESV_PROTECTED is set, internal locking is disabled and + * the corresponding lockdep checks are enabled. This is an optimization for + * drivers which are capable of taking the corresponding &dma_resv locks and + * hence do not require internal locking. */ /** @@ -430,6 +487,7 @@ * { * struct drm_gpuva_ops *ops; * struct drm_gpuva_op *op + * struct drm_gpuvm_bo *vm_bo; * * driver_lock_va_space(); * ops = drm_gpuvm_sm_map_ops_create(gpuvm, addr, range, @@ -437,6 +495,10 @@ * if (IS_ERR(ops)) * return PTR_ERR(ops); * + * vm_bo = drm_gpuvm_bo_obtain(gpuvm, obj); + * if (IS_ERR(vm_bo)) + * return PTR_ERR(vm_bo); + * * drm_gpuva_for_each_op(op, ops) { * struct drm_gpuva *va; * @@ -449,7 +511,7 @@ * * driver_vm_map(); * drm_gpuva_map(gpuvm, va, &op->map); - * drm_gpuva_link(va); + * drm_gpuva_link(va, vm_bo); * * break; * case DRM_GPUVA_OP_REMAP: { @@ -476,11 +538,11 @@ * driver_vm_remap(); * drm_gpuva_remap(prev, next, &op->remap); * - * drm_gpuva_unlink(va); * if (prev) - * drm_gpuva_link(prev); + * drm_gpuva_link(prev, va->vm_bo); * if (next) - * drm_gpuva_link(next); + * drm_gpuva_link(next, va->vm_bo); + * drm_gpuva_unlink(va); * * break; * } @@ -496,6 +558,7 @@ * break; * } * } + * drm_gpuvm_bo_put(vm_bo); * driver_unlock_va_space(); * * return 0; @@ -505,6 +568,7 @@ * * struct driver_context { * struct drm_gpuvm *gpuvm; + * struct drm_gpuvm_bo *vm_bo; * struct drm_gpuva *new_va; * struct drm_gpuva *prev_va; * struct drm_gpuva *next_va; @@ -525,6 +589,7 @@ * struct drm_gem_object *obj, u64 offset) * { * struct driver_context ctx; + * struct drm_gpuvm_bo *vm_bo; * struct drm_gpuva_ops *ops; * struct drm_gpuva_op *op; * int ret = 0; @@ -534,16 +599,23 @@ * ctx.new_va = kzalloc(sizeof(*ctx.new_va), GFP_KERNEL); * ctx.prev_va = kzalloc(sizeof(*ctx.prev_va), GFP_KERNEL); * ctx.next_va = kzalloc(sizeof(*ctx.next_va), GFP_KERNEL); - * if (!ctx.new_va || !ctx.prev_va || !ctx.next_va) { + * ctx.vm_bo = drm_gpuvm_bo_create(gpuvm, obj); + * if (!ctx.new_va || !ctx.prev_va || !ctx.next_va || !vm_bo) { * ret = -ENOMEM; * goto out; * } * + * // Typically protected with a driver specific GEM gpuva lock + * // used in the fence signaling path for drm_gpuva_link() and + * // drm_gpuva_unlink(), hence pre-allocate. + * ctx.vm_bo = drm_gpuvm_bo_obtain_prealloc(ctx.vm_bo); + * * driver_lock_va_space(); * ret = drm_gpuvm_sm_map(gpuvm, &ctx, addr, range, obj, offset); * driver_unlock_va_space(); * * out: + * drm_gpuvm_bo_put(ctx.vm_bo); * kfree(ctx.new_va); * kfree(ctx.prev_va); * kfree(ctx.next_va); @@ -556,7 +628,7 @@ * * drm_gpuva_map(ctx->vm, ctx->new_va, &op->map); * - * drm_gpuva_link(ctx->new_va); + * drm_gpuva_link(ctx->new_va, ctx->vm_bo); * * // prevent the new GPUVA from being freed in * // driver_mapping_create() @@ -568,22 +640,23 @@ * int driver_gpuva_remap(struct drm_gpuva_op *op, void *__ctx) * { * struct driver_context *ctx = __ctx; + * struct drm_gpuva *va = op->remap.unmap->va; * * drm_gpuva_remap(ctx->prev_va, ctx->next_va, &op->remap); * - * drm_gpuva_unlink(op->remap.unmap->va); - * kfree(op->remap.unmap->va); - * * if (op->remap.prev) { - * drm_gpuva_link(ctx->prev_va); + * drm_gpuva_link(ctx->prev_va, va->vm_bo); * ctx->prev_va = NULL; * } * * if (op->remap.next) { - * drm_gpuva_link(ctx->next_va); + * drm_gpuva_link(ctx->next_va, va->vm_bo); * ctx->next_va = NULL; * } * + * drm_gpuva_unlink(va); + * kfree(va); + * * return 0; * } * @@ -597,6 +670,201 @@ * } */ +/** + * get_next_vm_bo_from_list() - get the next vm_bo element + * @__gpuvm: the &drm_gpuvm + * @__list_name: the name of the list we're iterating on + * @__local_list: a pointer to the local list used to store already iterated items + * @__prev_vm_bo: the previous element we got from get_next_vm_bo_from_list() + * + * This helper is here to provide lockless list iteration. Lockless as in, the + * iterator releases the lock immediately after picking the first element from + * the list, so list insertion deletion can happen concurrently. + * + * Elements popped from the original list are kept in a local list, so removal + * and is_empty checks can still happen while we're iterating the list. + */ +#define get_next_vm_bo_from_list(__gpuvm, __list_name, __local_list, __prev_vm_bo) \ + ({ \ + struct drm_gpuvm_bo *__vm_bo = NULL; \ + \ + drm_gpuvm_bo_put(__prev_vm_bo); \ + \ + spin_lock(&(__gpuvm)->__list_name.lock); \ + if (!(__gpuvm)->__list_name.local_list) \ + (__gpuvm)->__list_name.local_list = __local_list; \ + else \ + drm_WARN_ON((__gpuvm)->drm, \ + (__gpuvm)->__list_name.local_list != __local_list); \ + \ + while (!list_empty(&(__gpuvm)->__list_name.list)) { \ + __vm_bo = list_first_entry(&(__gpuvm)->__list_name.list, \ + struct drm_gpuvm_bo, \ + list.entry.__list_name); \ + if (kref_get_unless_zero(&__vm_bo->kref)) { \ + list_move_tail(&(__vm_bo)->list.entry.__list_name, \ + __local_list); \ + break; \ + } else { \ + list_del_init(&(__vm_bo)->list.entry.__list_name); \ + __vm_bo = NULL; \ + } \ + } \ + spin_unlock(&(__gpuvm)->__list_name.lock); \ + \ + __vm_bo; \ + }) + +/** + * for_each_vm_bo_in_list() - internal vm_bo list iterator + * @__gpuvm: the &drm_gpuvm + * @__list_name: the name of the list we're iterating on + * @__local_list: a pointer to the local list used to store already iterated items + * @__vm_bo: the struct drm_gpuvm_bo to assign in each iteration step + * + * This helper is here to provide lockless list iteration. Lockless as in, the + * iterator releases the lock immediately after picking the first element from the + * list, hence list insertion and deletion can happen concurrently. + * + * It is not allowed to re-assign the vm_bo pointer from inside this loop. + * + * Typical use: + * + * struct drm_gpuvm_bo *vm_bo; + * LIST_HEAD(my_local_list); + * + * ret = 0; + * for_each_vm_bo_in_list(gpuvm, <list_name>, &my_local_list, vm_bo) { + * ret = do_something_with_vm_bo(..., vm_bo); + * if (ret) + * break; + * } + * // Drop ref in case we break out of the loop. + * drm_gpuvm_bo_put(vm_bo); + * restore_vm_bo_list(gpuvm, <list_name>, &my_local_list); + * + * + * Only used for internal list iterations, not meant to be exposed to the outside + * world. + */ +#define for_each_vm_bo_in_list(__gpuvm, __list_name, __local_list, __vm_bo) \ + for (__vm_bo = get_next_vm_bo_from_list(__gpuvm, __list_name, \ + __local_list, NULL); \ + __vm_bo; \ + __vm_bo = get_next_vm_bo_from_list(__gpuvm, __list_name, \ + __local_list, __vm_bo)) + +static void +__restore_vm_bo_list(struct drm_gpuvm *gpuvm, spinlock_t *lock, + struct list_head *list, struct list_head **local_list) +{ + /* Merge back the two lists, moving local list elements to the + * head to preserve previous ordering, in case it matters. + */ + spin_lock(lock); + if (*local_list) { + list_splice(*local_list, list); + *local_list = NULL; + } + spin_unlock(lock); +} + +/** + * restore_vm_bo_list() - move vm_bo elements back to their original list + * @__gpuvm: the &drm_gpuvm + * @__list_name: the name of the list we're iterating on + * + * When we're done iterating a vm_bo list, we should call restore_vm_bo_list() + * to restore the original state and let new iterations take place. + */ +#define restore_vm_bo_list(__gpuvm, __list_name) \ + __restore_vm_bo_list((__gpuvm), &(__gpuvm)->__list_name.lock, \ + &(__gpuvm)->__list_name.list, \ + &(__gpuvm)->__list_name.local_list) + +static void +cond_spin_lock(spinlock_t *lock, bool cond) +{ + if (cond) + spin_lock(lock); +} + +static void +cond_spin_unlock(spinlock_t *lock, bool cond) +{ + if (cond) + spin_unlock(lock); +} + +static void +__drm_gpuvm_bo_list_add(struct drm_gpuvm *gpuvm, spinlock_t *lock, + struct list_head *entry, struct list_head *list) +{ + cond_spin_lock(lock, !!lock); + if (list_empty(entry)) + list_add_tail(entry, list); + cond_spin_unlock(lock, !!lock); +} + +/** + * drm_gpuvm_bo_list_add() - insert a vm_bo into the given list + * @__vm_bo: the &drm_gpuvm_bo + * @__list_name: the name of the list to insert into + * @__lock: whether to lock with the internal spinlock + * + * Inserts the given @__vm_bo into the list specified by @__list_name. + */ +#define drm_gpuvm_bo_list_add(__vm_bo, __list_name, __lock) \ + __drm_gpuvm_bo_list_add((__vm_bo)->vm, \ + __lock ? &(__vm_bo)->vm->__list_name.lock : \ + NULL, \ + &(__vm_bo)->list.entry.__list_name, \ + &(__vm_bo)->vm->__list_name.list) + +static void +__drm_gpuvm_bo_list_del(struct drm_gpuvm *gpuvm, spinlock_t *lock, + struct list_head *entry, bool init) +{ + cond_spin_lock(lock, !!lock); + if (init) { + if (!list_empty(entry)) + list_del_init(entry); + } else { + list_del(entry); + } + cond_spin_unlock(lock, !!lock); +} + +/** + * drm_gpuvm_bo_list_del_init() - remove a vm_bo from the given list + * @__vm_bo: the &drm_gpuvm_bo + * @__list_name: the name of the list to insert into + * @__lock: whether to lock with the internal spinlock + * + * Removes the given @__vm_bo from the list specified by @__list_name. + */ +#define drm_gpuvm_bo_list_del_init(__vm_bo, __list_name, __lock) \ + __drm_gpuvm_bo_list_del((__vm_bo)->vm, \ + __lock ? &(__vm_bo)->vm->__list_name.lock : \ + NULL, \ + &(__vm_bo)->list.entry.__list_name, \ + true) + +/** + * drm_gpuvm_bo_list_del() - remove a vm_bo from the given list + * @__vm_bo: the &drm_gpuvm_bo + * @__list_name: the name of the list to insert into + * @__lock: whether to lock with the internal spinlock + * + * Removes the given @__vm_bo from the list specified by @__list_name. + */ +#define drm_gpuvm_bo_list_del(__vm_bo, __list_name, __lock) \ + __drm_gpuvm_bo_list_del((__vm_bo)->vm, \ + __lock ? &(__vm_bo)->vm->__list_name.lock : \ + NULL, \ + &(__vm_bo)->list.entry.__list_name, \ + false) + #define to_drm_gpuva(__node) container_of((__node), struct drm_gpuva, rb.node) #define GPUVA_START(node) ((node)->va.addr) @@ -618,8 +886,14 @@ drm_gpuvm_check_overflow(u64 addr, u64 range) { u64 end; - return WARN(check_add_overflow(addr, range, &end), - "GPUVA address limited to %zu bytes.\n", sizeof(end)); + return check_add_overflow(addr, range, &end); +} + +static bool +drm_gpuvm_warn_check_overflow(struct drm_gpuvm *gpuvm, u64 addr, u64 range) +{ + return drm_WARN(gpuvm->drm, drm_gpuvm_check_overflow(addr, range), + "GPUVA address limited to %zu bytes.\n", sizeof(addr)); } static bool @@ -643,7 +917,18 @@ drm_gpuvm_in_kernel_node(struct drm_gpuvm *gpuvm, u64 addr, u64 range) return krange && addr < kend && kstart < end; } -static bool +/** + * drm_gpuvm_range_valid() - checks whether the given range is valid for the + * given &drm_gpuvm + * @gpuvm: the GPUVM to check the range for + * @addr: the base address + * @range: the range starting from the base address + * + * Checks whether the range is within the GPUVM's managed boundaries. + * + * Returns: true for a valid range, false otherwise + */ +bool drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, u64 addr, u64 range) { @@ -651,11 +936,52 @@ drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, drm_gpuvm_in_mm_range(gpuvm, addr, range) && !drm_gpuvm_in_kernel_node(gpuvm, addr, range); } +EXPORT_SYMBOL_GPL(drm_gpuvm_range_valid); + +static void +drm_gpuvm_gem_object_free(struct drm_gem_object *obj) +{ + drm_gem_object_release(obj); + kfree(obj); +} + +static const struct drm_gem_object_funcs drm_gpuvm_object_funcs = { + .free = drm_gpuvm_gem_object_free, +}; + +/** + * drm_gpuvm_resv_object_alloc() - allocate a dummy &drm_gem_object + * @drm: the drivers &drm_device + * + * Allocates a dummy &drm_gem_object which can be passed to drm_gpuvm_init() in + * order to serve as root GEM object providing the &drm_resv shared across + * &drm_gem_objects local to a single GPUVM. + * + * Returns: the &drm_gem_object on success, NULL on failure + */ +struct drm_gem_object * +drm_gpuvm_resv_object_alloc(struct drm_device *drm) +{ + struct drm_gem_object *obj; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return NULL; + + obj->funcs = &drm_gpuvm_object_funcs; + drm_gem_private_object_init(drm, obj, 0); + + return obj; +} +EXPORT_SYMBOL_GPL(drm_gpuvm_resv_object_alloc); /** * drm_gpuvm_init() - initialize a &drm_gpuvm * @gpuvm: pointer to the &drm_gpuvm to initialize * @name: the name of the GPU VA space + * @flags: the &drm_gpuvm_flags for this GPUVM + * @drm: the &drm_device this VM resides in + * @r_obj: the resv &drm_gem_object providing the GPUVM's common &dma_resv * @start_offset: the start offset of the GPU VA space * @range: the size of the GPU VA space * @reserve_offset: the start of the kernel reserved GPU VA area @@ -668,8 +994,10 @@ drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, * &name is expected to be managed by the surrounding driver structures. */ void -drm_gpuvm_init(struct drm_gpuvm *gpuvm, - const char *name, +drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, + enum drm_gpuvm_flags flags, + struct drm_device *drm, + struct drm_gem_object *r_obj, u64 start_offset, u64 range, u64 reserve_offset, u64 reserve_range, const struct drm_gpuvm_ops *ops) @@ -677,45 +1005,713 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, gpuvm->rb.tree = RB_ROOT_CACHED; INIT_LIST_HEAD(&gpuvm->rb.list); - drm_gpuvm_check_overflow(start_offset, range); - gpuvm->mm_start = start_offset; - gpuvm->mm_range = range; + INIT_LIST_HEAD(&gpuvm->extobj.list); + spin_lock_init(&gpuvm->extobj.lock); + + INIT_LIST_HEAD(&gpuvm->evict.list); + spin_lock_init(&gpuvm->evict.lock); + + kref_init(&gpuvm->kref); gpuvm->name = name ? name : "unknown"; + gpuvm->flags = flags; gpuvm->ops = ops; + gpuvm->drm = drm; + gpuvm->r_obj = r_obj; - memset(&gpuvm->kernel_alloc_node, 0, sizeof(struct drm_gpuva)); + drm_gem_object_get(r_obj); + + drm_gpuvm_warn_check_overflow(gpuvm, start_offset, range); + gpuvm->mm_start = start_offset; + gpuvm->mm_range = range; + memset(&gpuvm->kernel_alloc_node, 0, sizeof(struct drm_gpuva)); if (reserve_range) { gpuvm->kernel_alloc_node.va.addr = reserve_offset; gpuvm->kernel_alloc_node.va.range = reserve_range; - if (likely(!drm_gpuvm_check_overflow(reserve_offset, - reserve_range))) + if (likely(!drm_gpuvm_warn_check_overflow(gpuvm, reserve_offset, + reserve_range))) __drm_gpuva_insert(gpuvm, &gpuvm->kernel_alloc_node); } } EXPORT_SYMBOL_GPL(drm_gpuvm_init); +static void +drm_gpuvm_fini(struct drm_gpuvm *gpuvm) +{ + gpuvm->name = NULL; + + if (gpuvm->kernel_alloc_node.va.range) + __drm_gpuva_remove(&gpuvm->kernel_alloc_node); + + drm_WARN(gpuvm->drm, !RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root), + "GPUVA tree is not empty, potentially leaking memory.\n"); + + drm_WARN(gpuvm->drm, !list_empty(&gpuvm->extobj.list), + "Extobj list should be empty.\n"); + drm_WARN(gpuvm->drm, !list_empty(&gpuvm->evict.list), + "Evict list should be empty.\n"); + + drm_gem_object_put(gpuvm->r_obj); +} + +static void +drm_gpuvm_free(struct kref *kref) +{ + struct drm_gpuvm *gpuvm = container_of(kref, struct drm_gpuvm, kref); + + drm_gpuvm_fini(gpuvm); + + if (drm_WARN_ON(gpuvm->drm, !gpuvm->ops->vm_free)) + return; + + gpuvm->ops->vm_free(gpuvm); +} + /** - * drm_gpuvm_destroy() - cleanup a &drm_gpuvm - * @gpuvm: pointer to the &drm_gpuvm to clean up + * drm_gpuvm_put() - drop a struct drm_gpuvm reference + * @gpuvm: the &drm_gpuvm to release the reference of + * + * This releases a reference to @gpuvm. * - * Note that it is a bug to call this function on a manager that still - * holds GPU VA mappings. + * This function may be called from atomic context. */ void -drm_gpuvm_destroy(struct drm_gpuvm *gpuvm) +drm_gpuvm_put(struct drm_gpuvm *gpuvm) { - gpuvm->name = NULL; + if (gpuvm) + kref_put(&gpuvm->kref, drm_gpuvm_free); +} +EXPORT_SYMBOL_GPL(drm_gpuvm_put); - if (gpuvm->kernel_alloc_node.va.range) - __drm_gpuva_remove(&gpuvm->kernel_alloc_node); +static int +exec_prepare_obj(struct drm_exec *exec, struct drm_gem_object *obj, + unsigned int num_fences) +{ + return num_fences ? drm_exec_prepare_obj(exec, obj, num_fences) : + drm_exec_lock_obj(exec, obj); +} - WARN(!RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root), - "GPUVA tree is not empty, potentially leaking memory."); +/** + * drm_gpuvm_prepare_vm() - prepare the GPUVMs common dma-resv + * @gpuvm: the &drm_gpuvm + * @exec: the &drm_exec context + * @num_fences: the amount of &dma_fences to reserve + * + * Calls drm_exec_prepare_obj() for the GPUVMs dummy &drm_gem_object; if + * @num_fences is zero drm_exec_lock_obj() is called instead. + * + * Using this function directly, it is the drivers responsibility to call + * drm_exec_init() and drm_exec_fini() accordingly. + * + * Returns: 0 on success, negative error code on failure. + */ +int +drm_gpuvm_prepare_vm(struct drm_gpuvm *gpuvm, + struct drm_exec *exec, + unsigned int num_fences) +{ + return exec_prepare_obj(exec, gpuvm->r_obj, num_fences); +} +EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_vm); + +static int +__drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm, + struct drm_exec *exec, + unsigned int num_fences) +{ + struct drm_gpuvm_bo *vm_bo; + LIST_HEAD(extobjs); + int ret = 0; + + for_each_vm_bo_in_list(gpuvm, extobj, &extobjs, vm_bo) { + ret = exec_prepare_obj(exec, vm_bo->obj, num_fences); + if (ret) + break; + } + /* Drop ref in case we break out of the loop. */ + drm_gpuvm_bo_put(vm_bo); + restore_vm_bo_list(gpuvm, extobj); + + return ret; } -EXPORT_SYMBOL_GPL(drm_gpuvm_destroy); + +static int +drm_gpuvm_prepare_objects_locked(struct drm_gpuvm *gpuvm, + struct drm_exec *exec, + unsigned int num_fences) +{ + struct drm_gpuvm_bo *vm_bo; + int ret = 0; + + drm_gpuvm_resv_assert_held(gpuvm); + list_for_each_entry(vm_bo, &gpuvm->extobj.list, list.entry.extobj) { + ret = exec_prepare_obj(exec, vm_bo->obj, num_fences); + if (ret) + break; + + if (vm_bo->evicted) + drm_gpuvm_bo_list_add(vm_bo, evict, false); + } + + return ret; +} + +/** + * drm_gpuvm_prepare_objects() - prepare all assoiciated BOs + * @gpuvm: the &drm_gpuvm + * @exec: the &drm_exec locking context + * @num_fences: the amount of &dma_fences to reserve + * + * Calls drm_exec_prepare_obj() for all &drm_gem_objects the given + * &drm_gpuvm contains mappings of; if @num_fences is zero drm_exec_lock_obj() + * is called instead. + * + * Using this function directly, it is the drivers responsibility to call + * drm_exec_init() and drm_exec_fini() accordingly. + * + * Note: This function is safe against concurrent insertion and removal of + * external objects, however it is not safe against concurrent usage itself. + * + * Drivers need to make sure to protect this case with either an outer VM lock + * or by calling drm_gpuvm_prepare_vm() before this function within the + * drm_exec_until_all_locked() loop, such that the GPUVM's dma-resv lock ensures + * mutual exclusion. + * + * Returns: 0 on success, negative error code on failure. + */ +int +drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm, + struct drm_exec *exec, + unsigned int num_fences) +{ + if (drm_gpuvm_resv_protected(gpuvm)) + return drm_gpuvm_prepare_objects_locked(gpuvm, exec, + num_fences); + else + return __drm_gpuvm_prepare_objects(gpuvm, exec, num_fences); +} +EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_objects); + +/** + * drm_gpuvm_prepare_range() - prepare all BOs mapped within a given range + * @gpuvm: the &drm_gpuvm + * @exec: the &drm_exec locking context + * @addr: the start address within the VA space + * @range: the range to iterate within the VA space + * @num_fences: the amount of &dma_fences to reserve + * + * Calls drm_exec_prepare_obj() for all &drm_gem_objects mapped between @addr + * and @addr + @range; if @num_fences is zero drm_exec_lock_obj() is called + * instead. + * + * Returns: 0 on success, negative error code on failure. + */ +int +drm_gpuvm_prepare_range(struct drm_gpuvm *gpuvm, struct drm_exec *exec, + u64 addr, u64 range, unsigned int num_fences) +{ + struct drm_gpuva *va; + u64 end = addr + range; + int ret; + + drm_gpuvm_for_each_va_range(va, gpuvm, addr, end) { + struct drm_gem_object *obj = va->gem.obj; + + ret = exec_prepare_obj(exec, obj, num_fences); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_range); + +/** + * drm_gpuvm_exec_lock() - lock all dma-resv of all assoiciated BOs + * @vm_exec: the &drm_gpuvm_exec wrapper + * + * Acquires all dma-resv locks of all &drm_gem_objects the given + * &drm_gpuvm contains mappings of. + * + * Addionally, when calling this function with struct drm_gpuvm_exec::extra + * being set the driver receives the given @fn callback to lock additional + * dma-resv in the context of the &drm_gpuvm_exec instance. Typically, drivers + * would call drm_exec_prepare_obj() from within this callback. + * + * Returns: 0 on success, negative error code on failure. + */ +int +drm_gpuvm_exec_lock(struct drm_gpuvm_exec *vm_exec) +{ + struct drm_gpuvm *gpuvm = vm_exec->vm; + struct drm_exec *exec = &vm_exec->exec; + unsigned int num_fences = vm_exec->num_fences; + int ret; + + drm_exec_init(exec, vm_exec->flags, 0); + + drm_exec_until_all_locked(exec) { + ret = drm_gpuvm_prepare_vm(gpuvm, exec, num_fences); + drm_exec_retry_on_contention(exec); + if (ret) + goto err; + + ret = drm_gpuvm_prepare_objects(gpuvm, exec, num_fences); + drm_exec_retry_on_contention(exec); + if (ret) + goto err; + + if (vm_exec->extra.fn) { + ret = vm_exec->extra.fn(vm_exec); + drm_exec_retry_on_contention(exec); + if (ret) + goto err; + } + } + + return 0; + +err: + drm_exec_fini(exec); + return ret; +} +EXPORT_SYMBOL_GPL(drm_gpuvm_exec_lock); + +static int +fn_lock_array(struct drm_gpuvm_exec *vm_exec) +{ + struct { + struct drm_gem_object **objs; + unsigned int num_objs; + } *args = vm_exec->extra.priv; + + return drm_exec_prepare_array(&vm_exec->exec, args->objs, + args->num_objs, vm_exec->num_fences); +} + +/** + * drm_gpuvm_exec_lock_array() - lock all dma-resv of all assoiciated BOs + * @vm_exec: the &drm_gpuvm_exec wrapper + * @objs: additional &drm_gem_objects to lock + * @num_objs: the number of additional &drm_gem_objects to lock + * + * Acquires all dma-resv locks of all &drm_gem_objects the given &drm_gpuvm + * contains mappings of, plus the ones given through @objs. + * + * Returns: 0 on success, negative error code on failure. + */ +int +drm_gpuvm_exec_lock_array(struct drm_gpuvm_exec *vm_exec, + struct drm_gem_object **objs, + unsigned int num_objs) +{ + struct { + struct drm_gem_object **objs; + unsigned int num_objs; + } args; + + args.objs = objs; + args.num_objs = num_objs; + + vm_exec->extra.fn = fn_lock_array; + vm_exec->extra.priv = &args; + + return drm_gpuvm_exec_lock(vm_exec); +} +EXPORT_SYMBOL_GPL(drm_gpuvm_exec_lock_array); + +/** + * drm_gpuvm_exec_lock_range() - prepare all BOs mapped within a given range + * @vm_exec: the &drm_gpuvm_exec wrapper + * @addr: the start address within the VA space + * @range: the range to iterate within the VA space + * + * Acquires all dma-resv locks of all &drm_gem_objects mapped between @addr and + * @addr + @range. + * + * Returns: 0 on success, negative error code on failure. + */ +int +drm_gpuvm_exec_lock_range(struct drm_gpuvm_exec *vm_exec, + u64 addr, u64 range) +{ + struct drm_gpuvm *gpuvm = vm_exec->vm; + struct drm_exec *exec = &vm_exec->exec; + int ret; + + drm_exec_init(exec, vm_exec->flags, 0); + + drm_exec_until_all_locked(exec) { + ret = drm_gpuvm_prepare_range(gpuvm, exec, addr, range, + vm_exec->num_fences); + drm_exec_retry_on_contention(exec); + if (ret) + goto err; + } + + return ret; + +err: + drm_exec_fini(exec); + return ret; +} +EXPORT_SYMBOL_GPL(drm_gpuvm_exec_lock_range); + +static int +__drm_gpuvm_validate(struct drm_gpuvm *gpuvm, struct drm_exec *exec) +{ + const struct drm_gpuvm_ops *ops = gpuvm->ops; + struct drm_gpuvm_bo *vm_bo; + LIST_HEAD(evict); + int ret = 0; + + for_each_vm_bo_in_list(gpuvm, evict, &evict, vm_bo) { + ret = ops->vm_bo_validate(vm_bo, exec); + if (ret) + break; + } + /* Drop ref in case we break out of the loop. */ + drm_gpuvm_bo_put(vm_bo); + restore_vm_bo_list(gpuvm, evict); + + return ret; +} + +static int +drm_gpuvm_validate_locked(struct drm_gpuvm *gpuvm, struct drm_exec *exec) +{ + const struct drm_gpuvm_ops *ops = gpuvm->ops; + struct drm_gpuvm_bo *vm_bo, *next; + int ret = 0; + + drm_gpuvm_resv_assert_held(gpuvm); + + list_for_each_entry_safe(vm_bo, next, &gpuvm->evict.list, + list.entry.evict) { + ret = ops->vm_bo_validate(vm_bo, exec); + if (ret) + break; + + dma_resv_assert_held(vm_bo->obj->resv); + if (!vm_bo->evicted) + drm_gpuvm_bo_list_del_init(vm_bo, evict, false); + } + + return ret; +} + +/** + * drm_gpuvm_validate() - validate all BOs marked as evicted + * @gpuvm: the &drm_gpuvm to validate evicted BOs + * @exec: the &drm_exec instance used for locking the GPUVM + * + * Calls the &drm_gpuvm_ops::vm_bo_validate callback for all evicted buffer + * objects being mapped in the given &drm_gpuvm. + * + * Returns: 0 on success, negative error code on failure. + */ +int +drm_gpuvm_validate(struct drm_gpuvm *gpuvm, struct drm_exec *exec) +{ + const struct drm_gpuvm_ops *ops = gpuvm->ops; + + if (unlikely(!ops || !ops->vm_bo_validate)) + return -EOPNOTSUPP; + + if (drm_gpuvm_resv_protected(gpuvm)) + return drm_gpuvm_validate_locked(gpuvm, exec); + else + return __drm_gpuvm_validate(gpuvm, exec); +} +EXPORT_SYMBOL_GPL(drm_gpuvm_validate); + +/** + * drm_gpuvm_resv_add_fence - add fence to private and all extobj + * dma-resv + * @gpuvm: the &drm_gpuvm to add a fence to + * @exec: the &drm_exec locking context + * @fence: fence to add + * @private_usage: private dma-resv usage + * @extobj_usage: extobj dma-resv usage + */ +void +drm_gpuvm_resv_add_fence(struct drm_gpuvm *gpuvm, + struct drm_exec *exec, + struct dma_fence *fence, + enum dma_resv_usage private_usage, + enum dma_resv_usage extobj_usage) +{ + struct drm_gem_object *obj; + unsigned long index; + + drm_exec_for_each_locked_object(exec, index, obj) { + dma_resv_assert_held(obj->resv); + dma_resv_add_fence(obj->resv, fence, + drm_gpuvm_is_extobj(gpuvm, obj) ? + extobj_usage : private_usage); + } +} +EXPORT_SYMBOL_GPL(drm_gpuvm_resv_add_fence); + +/** + * drm_gpuvm_bo_create() - create a new instance of struct drm_gpuvm_bo + * @gpuvm: The &drm_gpuvm the @obj is mapped in. + * @obj: The &drm_gem_object being mapped in the @gpuvm. + * + * If provided by the driver, this function uses the &drm_gpuvm_ops + * vm_bo_alloc() callback to allocate. + * + * Returns: a pointer to the &drm_gpuvm_bo on success, NULL on failure + */ +struct drm_gpuvm_bo * +drm_gpuvm_bo_create(struct drm_gpuvm *gpuvm, + struct drm_gem_object *obj) +{ + const struct drm_gpuvm_ops *ops = gpuvm->ops; + struct drm_gpuvm_bo *vm_bo; + + if (ops && ops->vm_bo_alloc) + vm_bo = ops->vm_bo_alloc(); + else + vm_bo = kzalloc(sizeof(*vm_bo), GFP_KERNEL); + + if (unlikely(!vm_bo)) + return NULL; + + vm_bo->vm = drm_gpuvm_get(gpuvm); + vm_bo->obj = obj; + drm_gem_object_get(obj); + + kref_init(&vm_bo->kref); + INIT_LIST_HEAD(&vm_bo->list.gpuva); + INIT_LIST_HEAD(&vm_bo->list.entry.gem); + + INIT_LIST_HEAD(&vm_bo->list.entry.extobj); + INIT_LIST_HEAD(&vm_bo->list.entry.evict); + + return vm_bo; +} +EXPORT_SYMBOL_GPL(drm_gpuvm_bo_create); + +static void +drm_gpuvm_bo_destroy(struct kref *kref) +{ + struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo, + kref); + struct drm_gpuvm *gpuvm = vm_bo->vm; + const struct drm_gpuvm_ops *ops = gpuvm->ops; + struct drm_gem_object *obj = vm_bo->obj; + bool lock = !drm_gpuvm_resv_protected(gpuvm); + + if (!lock) + drm_gpuvm_resv_assert_held(gpuvm); + + drm_gpuvm_bo_list_del(vm_bo, extobj, lock); + drm_gpuvm_bo_list_del(vm_bo, evict, lock); + + drm_gem_gpuva_assert_lock_held(obj); + list_del(&vm_bo->list.entry.gem); + + if (ops && ops->vm_bo_free) + ops->vm_bo_free(vm_bo); + else + kfree(vm_bo); + + drm_gpuvm_put(gpuvm); + drm_gem_object_put(obj); +} + +/** + * drm_gpuvm_bo_put() - drop a struct drm_gpuvm_bo reference + * @vm_bo: the &drm_gpuvm_bo to release the reference of + * + * This releases a reference to @vm_bo. + * + * If the reference count drops to zero, the &gpuvm_bo is destroyed, which + * includes removing it from the GEMs gpuva list. Hence, if a call to this + * function can potentially let the reference count drop to zero the caller must + * hold the dma-resv or driver specific GEM gpuva lock. + * + * This function may only be called from non-atomic context. + * + * Returns: true if vm_bo was destroyed, false otherwise. + */ +bool +drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo) +{ + might_sleep(); + + if (vm_bo) + return !!kref_put(&vm_bo->kref, drm_gpuvm_bo_destroy); + + return false; +} +EXPORT_SYMBOL_GPL(drm_gpuvm_bo_put); + +static struct drm_gpuvm_bo * +__drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, + struct drm_gem_object *obj) +{ + struct drm_gpuvm_bo *vm_bo; + + drm_gem_gpuva_assert_lock_held(obj); + drm_gem_for_each_gpuvm_bo(vm_bo, obj) + if (vm_bo->vm == gpuvm) + return vm_bo; + + return NULL; +} + +/** + * drm_gpuvm_bo_find() - find the &drm_gpuvm_bo for the given + * &drm_gpuvm and &drm_gem_object + * @gpuvm: The &drm_gpuvm the @obj is mapped in. + * @obj: The &drm_gem_object being mapped in the @gpuvm. + * + * Find the &drm_gpuvm_bo representing the combination of the given + * &drm_gpuvm and &drm_gem_object. If found, increases the reference + * count of the &drm_gpuvm_bo accordingly. + * + * Returns: a pointer to the &drm_gpuvm_bo on success, NULL on failure + */ +struct drm_gpuvm_bo * +drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, + struct drm_gem_object *obj) +{ + struct drm_gpuvm_bo *vm_bo = __drm_gpuvm_bo_find(gpuvm, obj); + + return vm_bo ? drm_gpuvm_bo_get(vm_bo) : NULL; +} +EXPORT_SYMBOL_GPL(drm_gpuvm_bo_find); + +/** + * drm_gpuvm_bo_obtain() - obtains and instance of the &drm_gpuvm_bo for the + * given &drm_gpuvm and &drm_gem_object + * @gpuvm: The &drm_gpuvm the @obj is mapped in. + * @obj: The &drm_gem_object being mapped in the @gpuvm. + * + * Find the &drm_gpuvm_bo representing the combination of the given + * &drm_gpuvm and &drm_gem_object. If found, increases the reference + * count of the &drm_gpuvm_bo accordingly. If not found, allocates a new + * &drm_gpuvm_bo. + * + * A new &drm_gpuvm_bo is added to the GEMs gpuva list. + * + * Returns: a pointer to the &drm_gpuvm_bo on success, an ERR_PTR on failure + */ +struct drm_gpuvm_bo * +drm_gpuvm_bo_obtain(struct drm_gpuvm *gpuvm, + struct drm_gem_object *obj) +{ + struct drm_gpuvm_bo *vm_bo; + + vm_bo = drm_gpuvm_bo_find(gpuvm, obj); + if (vm_bo) + return vm_bo; + + vm_bo = drm_gpuvm_bo_create(gpuvm, obj); + if (!vm_bo) + return ERR_PTR(-ENOMEM); + + drm_gem_gpuva_assert_lock_held(obj); + list_add_tail(&vm_bo->list.entry.gem, &obj->gpuva.list); + + return vm_bo; +} +EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain); + +/** + * drm_gpuvm_bo_obtain_prealloc() - obtains and instance of the &drm_gpuvm_bo + * for the given &drm_gpuvm and &drm_gem_object + * @__vm_bo: A pre-allocated struct drm_gpuvm_bo. + * + * Find the &drm_gpuvm_bo representing the combination of the given + * &drm_gpuvm and &drm_gem_object. If found, increases the reference + * count of the found &drm_gpuvm_bo accordingly, while the @__vm_bo reference + * count is decreased. If not found @__vm_bo is returned without further + * increase of the reference count. + * + * A new &drm_gpuvm_bo is added to the GEMs gpuva list. + * + * Returns: a pointer to the found &drm_gpuvm_bo or @__vm_bo if no existing + * &drm_gpuvm_bo was found + */ +struct drm_gpuvm_bo * +drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo *__vm_bo) +{ + struct drm_gpuvm *gpuvm = __vm_bo->vm; + struct drm_gem_object *obj = __vm_bo->obj; + struct drm_gpuvm_bo *vm_bo; + + vm_bo = drm_gpuvm_bo_find(gpuvm, obj); + if (vm_bo) { + drm_gpuvm_bo_put(__vm_bo); + return vm_bo; + } + + drm_gem_gpuva_assert_lock_held(obj); + list_add_tail(&__vm_bo->list.entry.gem, &obj->gpuva.list); + + return __vm_bo; +} +EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain_prealloc); + +/** + * drm_gpuvm_bo_extobj_add() - adds the &drm_gpuvm_bo to its &drm_gpuvm's + * extobj list + * @vm_bo: The &drm_gpuvm_bo to add to its &drm_gpuvm's the extobj list. + * + * Adds the given @vm_bo to its &drm_gpuvm's extobj list if not on the list + * already and if the corresponding &drm_gem_object is an external object, + * actually. + */ +void +drm_gpuvm_bo_extobj_add(struct drm_gpuvm_bo *vm_bo) +{ + struct drm_gpuvm *gpuvm = vm_bo->vm; + bool lock = !drm_gpuvm_resv_protected(gpuvm); + + if (!lock) + drm_gpuvm_resv_assert_held(gpuvm); + + if (drm_gpuvm_is_extobj(gpuvm, vm_bo->obj)) + drm_gpuvm_bo_list_add(vm_bo, extobj, lock); +} +EXPORT_SYMBOL_GPL(drm_gpuvm_bo_extobj_add); + +/** + * drm_gpuvm_bo_evict() - add / remove a &drm_gpuvm_bo to / from the &drm_gpuvms + * evicted list + * @vm_bo: the &drm_gpuvm_bo to add or remove + * @evict: indicates whether the object is evicted + * + * Adds a &drm_gpuvm_bo to or removes it from the &drm_gpuvms evicted list. + */ +void +drm_gpuvm_bo_evict(struct drm_gpuvm_bo *vm_bo, bool evict) +{ + struct drm_gpuvm *gpuvm = vm_bo->vm; + struct drm_gem_object *obj = vm_bo->obj; + bool lock = !drm_gpuvm_resv_protected(gpuvm); + + dma_resv_assert_held(obj->resv); + vm_bo->evicted = evict; + + /* Can't add external objects to the evicted list directly if not using + * internal spinlocks, since in this case the evicted list is protected + * with the VM's common dma-resv lock. + */ + if (drm_gpuvm_is_extobj(gpuvm, obj) && !lock) + return; + + if (evict) + drm_gpuvm_bo_list_add(vm_bo, evict, lock); + else + drm_gpuvm_bo_list_del_init(vm_bo, evict, lock); +} +EXPORT_SYMBOL_GPL(drm_gpuvm_bo_evict); static int __drm_gpuva_insert(struct drm_gpuvm *gpuvm, @@ -764,11 +1760,21 @@ drm_gpuva_insert(struct drm_gpuvm *gpuvm, { u64 addr = va->va.addr; u64 range = va->va.range; + int ret; if (unlikely(!drm_gpuvm_range_valid(gpuvm, addr, range))) return -EINVAL; - return __drm_gpuva_insert(gpuvm, va); + ret = __drm_gpuva_insert(gpuvm, va); + if (likely(!ret)) + /* Take a reference of the GPUVM for the successfully inserted + * drm_gpuva. We can't take the reference in + * __drm_gpuva_insert() itself, since we don't want to increse + * the reference count for the GPUVM's kernel_alloc_node. + */ + drm_gpuvm_get(gpuvm); + + return ret; } EXPORT_SYMBOL_GPL(drm_gpuva_insert); @@ -795,35 +1801,46 @@ drm_gpuva_remove(struct drm_gpuva *va) struct drm_gpuvm *gpuvm = va->vm; if (unlikely(va == &gpuvm->kernel_alloc_node)) { - WARN(1, "Can't destroy kernel reserved node.\n"); + drm_WARN(gpuvm->drm, 1, + "Can't destroy kernel reserved node.\n"); return; } __drm_gpuva_remove(va); + drm_gpuvm_put(va->vm); } EXPORT_SYMBOL_GPL(drm_gpuva_remove); /** * drm_gpuva_link() - link a &drm_gpuva * @va: the &drm_gpuva to link + * @vm_bo: the &drm_gpuvm_bo to add the &drm_gpuva to * - * This adds the given &va to the GPU VA list of the &drm_gem_object it is - * associated with. + * This adds the given &va to the GPU VA list of the &drm_gpuvm_bo and the + * &drm_gpuvm_bo to the &drm_gem_object it is associated with. + * + * For every &drm_gpuva entry added to the &drm_gpuvm_bo an additional + * reference of the latter is taken. * * This function expects the caller to protect the GEM's GPUVA list against - * concurrent access using the GEMs dma_resv lock. + * concurrent access using either the GEMs dma_resv lock or a driver specific + * lock set through drm_gem_gpuva_set_lock(). */ void -drm_gpuva_link(struct drm_gpuva *va) +drm_gpuva_link(struct drm_gpuva *va, struct drm_gpuvm_bo *vm_bo) { struct drm_gem_object *obj = va->gem.obj; + struct drm_gpuvm *gpuvm = va->vm; if (unlikely(!obj)) return; - drm_gem_gpuva_assert_lock_held(obj); + drm_WARN_ON(gpuvm->drm, obj != vm_bo->obj); - list_add_tail(&va->gem.entry, &obj->gpuva.list); + va->vm_bo = drm_gpuvm_bo_get(vm_bo); + + drm_gem_gpuva_assert_lock_held(obj); + list_add_tail(&va->gem.entry, &vm_bo->list.gpuva); } EXPORT_SYMBOL_GPL(drm_gpuva_link); @@ -834,20 +1851,31 @@ EXPORT_SYMBOL_GPL(drm_gpuva_link); * This removes the given &va from the GPU VA list of the &drm_gem_object it is * associated with. * + * This removes the given &va from the GPU VA list of the &drm_gpuvm_bo and + * the &drm_gpuvm_bo from the &drm_gem_object it is associated with in case + * this call unlinks the last &drm_gpuva from the &drm_gpuvm_bo. + * + * For every &drm_gpuva entry removed from the &drm_gpuvm_bo a reference of + * the latter is dropped. + * * This function expects the caller to protect the GEM's GPUVA list against - * concurrent access using the GEMs dma_resv lock. + * concurrent access using either the GEMs dma_resv lock or a driver specific + * lock set through drm_gem_gpuva_set_lock(). */ void drm_gpuva_unlink(struct drm_gpuva *va) { struct drm_gem_object *obj = va->gem.obj; + struct drm_gpuvm_bo *vm_bo = va->vm_bo; if (unlikely(!obj)) return; drm_gem_gpuva_assert_lock_held(obj); - list_del_init(&va->gem.entry); + + va->vm_bo = NULL; + drm_gpuvm_bo_put(vm_bo); } EXPORT_SYMBOL_GPL(drm_gpuva_unlink); @@ -992,10 +2020,10 @@ drm_gpuva_remap(struct drm_gpuva *prev, struct drm_gpuva *next, struct drm_gpuva_op_remap *op) { - struct drm_gpuva *curr = op->unmap->va; - struct drm_gpuvm *gpuvm = curr->vm; + struct drm_gpuva *va = op->unmap->va; + struct drm_gpuvm *gpuvm = va->vm; - drm_gpuva_remove(curr); + drm_gpuva_remove(va); if (op->prev) { drm_gpuva_init_from_op(prev, op->prev); @@ -1637,9 +2665,8 @@ err_free_ops: EXPORT_SYMBOL_GPL(drm_gpuvm_prefetch_ops_create); /** - * drm_gpuvm_gem_unmap_ops_create() - creates the &drm_gpuva_ops to unmap a GEM - * @gpuvm: the &drm_gpuvm representing the GPU VA space - * @obj: the &drm_gem_object to unmap + * drm_gpuvm_bo_unmap_ops_create() - creates the &drm_gpuva_ops to unmap a GEM + * @vm_bo: the &drm_gpuvm_bo abstraction * * This function creates a list of operations to perform unmapping for every * GPUVA attached to a GEM. @@ -1656,15 +2683,14 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_prefetch_ops_create); * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure */ struct drm_gpuva_ops * -drm_gpuvm_gem_unmap_ops_create(struct drm_gpuvm *gpuvm, - struct drm_gem_object *obj) +drm_gpuvm_bo_unmap_ops_create(struct drm_gpuvm_bo *vm_bo) { struct drm_gpuva_ops *ops; struct drm_gpuva_op *op; struct drm_gpuva *va; int ret; - drm_gem_gpuva_assert_lock_held(obj); + drm_gem_gpuva_assert_lock_held(vm_bo->obj); ops = kzalloc(sizeof(*ops), GFP_KERNEL); if (!ops) @@ -1672,8 +2698,8 @@ drm_gpuvm_gem_unmap_ops_create(struct drm_gpuvm *gpuvm, INIT_LIST_HEAD(&ops->list); - drm_gem_for_each_gpuva(va, obj) { - op = gpuva_op_alloc(gpuvm); + drm_gpuvm_bo_for_each_va(va, vm_bo) { + op = gpuva_op_alloc(vm_bo->vm); if (!op) { ret = -ENOMEM; goto err_free_ops; @@ -1687,10 +2713,10 @@ drm_gpuvm_gem_unmap_ops_create(struct drm_gpuvm *gpuvm, return ops; err_free_ops: - drm_gpuva_ops_free(gpuvm, ops); + drm_gpuva_ops_free(vm_bo->vm, ops); return ERR_PTR(ret); } -EXPORT_SYMBOL_GPL(drm_gpuvm_gem_unmap_ops_create); +EXPORT_SYMBOL_GPL(drm_gpuvm_bo_unmap_ops_create); /** * drm_gpuva_ops_free() - free the given &drm_gpuva_ops diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c deleted file mode 100644 index 60afa1865559..000000000000 --- a/drivers/gpu/drm/drm_hashtab.c +++ /dev/null @@ -1,203 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND. USA. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * - **************************************************************************/ -/* - * Simple open hash tab implementation. - * - * Authors: - * Thomas Hellström <thomas-at-tungstengraphics-dot-com> - */ - -#include <linux/hash.h> -#include <linux/mm.h> -#include <linux/rculist.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> - -#include <drm/drm_print.h> - -#include "drm_legacy.h" - -int drm_ht_create(struct drm_open_hash *ht, unsigned int order) -{ - unsigned int size = 1 << order; - - ht->order = order; - ht->table = NULL; - if (size <= PAGE_SIZE / sizeof(*ht->table)) - ht->table = kcalloc(size, sizeof(*ht->table), GFP_KERNEL); - else - ht->table = vzalloc(array_size(size, sizeof(*ht->table))); - if (!ht->table) { - DRM_ERROR("Out of memory for hash table\n"); - return -ENOMEM; - } - return 0; -} - -void drm_ht_verbose_list(struct drm_open_hash *ht, unsigned long key) -{ - struct drm_hash_item *entry; - struct hlist_head *h_list; - unsigned int hashed_key; - int count = 0; - - hashed_key = hash_long(key, ht->order); - DRM_DEBUG("Key is 0x%08lx, Hashed key is 0x%08x\n", key, hashed_key); - h_list = &ht->table[hashed_key]; - hlist_for_each_entry(entry, h_list, head) - DRM_DEBUG("count %d, key: 0x%08lx\n", count++, entry->key); -} - -static struct hlist_node *drm_ht_find_key(struct drm_open_hash *ht, - unsigned long key) -{ - struct drm_hash_item *entry; - struct hlist_head *h_list; - unsigned int hashed_key; - - hashed_key = hash_long(key, ht->order); - h_list = &ht->table[hashed_key]; - hlist_for_each_entry(entry, h_list, head) { - if (entry->key == key) - return &entry->head; - if (entry->key > key) - break; - } - return NULL; -} - -static struct hlist_node *drm_ht_find_key_rcu(struct drm_open_hash *ht, - unsigned long key) -{ - struct drm_hash_item *entry; - struct hlist_head *h_list; - unsigned int hashed_key; - - hashed_key = hash_long(key, ht->order); - h_list = &ht->table[hashed_key]; - hlist_for_each_entry_rcu(entry, h_list, head) { - if (entry->key == key) - return &entry->head; - if (entry->key > key) - break; - } - return NULL; -} - -int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item) -{ - struct drm_hash_item *entry; - struct hlist_head *h_list; - struct hlist_node *parent; - unsigned int hashed_key; - unsigned long key = item->key; - - hashed_key = hash_long(key, ht->order); - h_list = &ht->table[hashed_key]; - parent = NULL; - hlist_for_each_entry(entry, h_list, head) { - if (entry->key == key) - return -EINVAL; - if (entry->key > key) - break; - parent = &entry->head; - } - if (parent) { - hlist_add_behind_rcu(&item->head, parent); - } else { - hlist_add_head_rcu(&item->head, h_list); - } - return 0; -} - -/* - * Just insert an item and return any "bits" bit key that hasn't been - * used before. - */ -int drm_ht_just_insert_please(struct drm_open_hash *ht, struct drm_hash_item *item, - unsigned long seed, int bits, int shift, - unsigned long add) -{ - int ret; - unsigned long mask = (1UL << bits) - 1; - unsigned long first, unshifted_key; - - unshifted_key = hash_long(seed, bits); - first = unshifted_key; - do { - item->key = (unshifted_key << shift) + add; - ret = drm_ht_insert_item(ht, item); - if (ret) - unshifted_key = (unshifted_key + 1) & mask; - } while(ret && (unshifted_key != first)); - - if (ret) { - DRM_ERROR("Available key bit space exhausted\n"); - return -EINVAL; - } - return 0; -} - -int drm_ht_find_item(struct drm_open_hash *ht, unsigned long key, - struct drm_hash_item **item) -{ - struct hlist_node *list; - - list = drm_ht_find_key_rcu(ht, key); - if (!list) - return -EINVAL; - - *item = hlist_entry(list, struct drm_hash_item, head); - return 0; -} - -int drm_ht_remove_key(struct drm_open_hash *ht, unsigned long key) -{ - struct hlist_node *list; - - list = drm_ht_find_key(ht, key); - if (list) { - hlist_del_init_rcu(list); - return 0; - } - return -EINVAL; -} - -int drm_ht_remove_item(struct drm_open_hash *ht, struct drm_hash_item *item) -{ - hlist_del_init_rcu(&item->head); - return 0; -} - -void drm_ht_remove(struct drm_open_hash *ht) -{ - if (ht->table) { - kvfree(ht->table); - ht->table = NULL; - } -} diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 8462b657c375..8e4faf0a28e6 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -22,6 +22,7 @@ */ #include <linux/kthread.h> +#include <linux/types.h> #include <drm/drm_ioctl.h> #include <drm/drm_vblank.h> @@ -31,6 +32,7 @@ #define DRM_IF_VERSION(maj, min) (maj << 16 | min) +struct cea_sad; struct dentry; struct dma_buf; struct iosys_map; @@ -115,17 +117,10 @@ void drm_handle_vblank_works(struct drm_vblank_crtc *vblank); /* IOCTLS */ int drm_wait_vblank_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); -int drm_legacy_modeset_ctl_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); /* drm_irq.c */ /* IOCTLS */ -#if IS_ENABLED(CONFIG_DRM_LEGACY) -int drm_legacy_irq_control(struct drm_device *dev, void *data, - struct drm_file *file_priv); -#endif - int drm_crtc_get_sequence_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); @@ -192,6 +187,8 @@ void drm_debugfs_connector_remove(struct drm_connector *connector); void drm_debugfs_crtc_add(struct drm_crtc *crtc); void drm_debugfs_crtc_remove(struct drm_crtc *crtc); void drm_debugfs_crtc_crc_add(struct drm_crtc *crtc); +void drm_debugfs_encoder_add(struct drm_encoder *encoder); +void drm_debugfs_encoder_remove(struct drm_encoder *encoder); #else static inline void drm_debugfs_dev_fini(struct drm_device *dev) { @@ -229,6 +226,14 @@ static inline void drm_debugfs_crtc_crc_add(struct drm_crtc *crtc) { } +static inline void drm_debugfs_encoder_add(struct drm_encoder *encoder) +{ +} + +static inline void drm_debugfs_encoder_remove(struct drm_encoder *encoder) +{ +} + #endif drm_ioctl_t drm_version; @@ -267,3 +272,7 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, void drm_framebuffer_print_info(struct drm_printer *p, unsigned int indent, const struct drm_framebuffer *fb); void drm_framebuffer_debugfs_init(struct drm_device *dev); + +/* drm_edid.c */ +void drm_edid_cta_sad_get(const struct cea_sad *cta_sad, u8 *sad); +void drm_edid_cta_sad_set(struct cea_sad *cta_sad, const u8 *sad); diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index 025dc558c94e..129e2b91dbfe 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -31,12 +31,12 @@ #include <linux/ratelimit.h> #include <linux/export.h> +#include <drm/drm_device.h> #include <drm/drm_file.h> #include <drm/drm_print.h> #include "drm_crtc_internal.h" #include "drm_internal.h" -#include "drm_legacy.h" #define DRM_IOCTL_VERSION32 DRM_IOWR(0x00, drm_version32_t) #define DRM_IOCTL_GET_UNIQUE32 DRM_IOWR(0x01, drm_unique32_t) @@ -163,92 +163,6 @@ static int compat_drm_setunique(struct file *file, unsigned int cmd, return -EINVAL; } -#if IS_ENABLED(CONFIG_DRM_LEGACY) -typedef struct drm_map32 { - u32 offset; /* Requested physical address (0 for SAREA) */ - u32 size; /* Requested physical size (bytes) */ - enum drm_map_type type; /* Type of memory to map */ - enum drm_map_flags flags; /* Flags */ - u32 handle; /* User-space: "Handle" to pass to mmap() */ - int mtrr; /* MTRR slot used */ -} drm_map32_t; - -static int compat_drm_getmap(struct file *file, unsigned int cmd, - unsigned long arg) -{ - drm_map32_t __user *argp = (void __user *)arg; - drm_map32_t m32; - struct drm_map map; - int err; - - if (copy_from_user(&m32, argp, sizeof(m32))) - return -EFAULT; - - map.offset = m32.offset; - err = drm_ioctl_kernel(file, drm_legacy_getmap_ioctl, &map, 0); - if (err) - return err; - - m32.offset = map.offset; - m32.size = map.size; - m32.type = map.type; - m32.flags = map.flags; - m32.handle = ptr_to_compat((void __user *)map.handle); - m32.mtrr = map.mtrr; - if (copy_to_user(argp, &m32, sizeof(m32))) - return -EFAULT; - return 0; - -} - -static int compat_drm_addmap(struct file *file, unsigned int cmd, - unsigned long arg) -{ - drm_map32_t __user *argp = (void __user *)arg; - drm_map32_t m32; - struct drm_map map; - int err; - - if (copy_from_user(&m32, argp, sizeof(m32))) - return -EFAULT; - - map.offset = m32.offset; - map.size = m32.size; - map.type = m32.type; - map.flags = m32.flags; - - err = drm_ioctl_kernel(file, drm_legacy_addmap_ioctl, &map, - DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY); - if (err) - return err; - - m32.offset = map.offset; - m32.mtrr = map.mtrr; - m32.handle = ptr_to_compat((void __user *)map.handle); - if (map.handle != compat_ptr(m32.handle)) - pr_err_ratelimited("compat_drm_addmap truncated handle %p for type %d offset %x\n", - map.handle, m32.type, m32.offset); - - if (copy_to_user(argp, &m32, sizeof(m32))) - return -EFAULT; - - return 0; -} - -static int compat_drm_rmmap(struct file *file, unsigned int cmd, - unsigned long arg) -{ - drm_map32_t __user *argp = (void __user *)arg; - struct drm_map map; - u32 handle; - - if (get_user(handle, &argp->handle)) - return -EFAULT; - map.handle = compat_ptr(handle); - return drm_ioctl_kernel(file, drm_legacy_rmmap_ioctl, &map, DRM_AUTH); -} -#endif - typedef struct drm_client32 { int idx; /* Which client desired? */ int auth; /* Is client authenticated? */ @@ -308,501 +222,6 @@ static int compat_drm_getstats(struct file *file, unsigned int cmd, return 0; } -#if IS_ENABLED(CONFIG_DRM_LEGACY) -typedef struct drm_buf_desc32 { - int count; /* Number of buffers of this size */ - int size; /* Size in bytes */ - int low_mark; /* Low water mark */ - int high_mark; /* High water mark */ - int flags; - u32 agp_start; /* Start address in the AGP aperture */ -} drm_buf_desc32_t; - -static int compat_drm_addbufs(struct file *file, unsigned int cmd, - unsigned long arg) -{ - drm_buf_desc32_t __user *argp = (void __user *)arg; - drm_buf_desc32_t desc32; - struct drm_buf_desc desc; - int err; - - if (copy_from_user(&desc32, argp, sizeof(drm_buf_desc32_t))) - return -EFAULT; - - desc = (struct drm_buf_desc){ - desc32.count, desc32.size, desc32.low_mark, desc32.high_mark, - desc32.flags, desc32.agp_start - }; - - err = drm_ioctl_kernel(file, drm_legacy_addbufs, &desc, - DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY); - if (err) - return err; - - desc32 = (drm_buf_desc32_t){ - desc.count, desc.size, desc.low_mark, desc.high_mark, - desc.flags, desc.agp_start - }; - if (copy_to_user(argp, &desc32, sizeof(drm_buf_desc32_t))) - return -EFAULT; - - return 0; -} - -static int compat_drm_markbufs(struct file *file, unsigned int cmd, - unsigned long arg) -{ - drm_buf_desc32_t b32; - drm_buf_desc32_t __user *argp = (void __user *)arg; - struct drm_buf_desc buf; - - if (copy_from_user(&b32, argp, sizeof(b32))) - return -EFAULT; - - buf.size = b32.size; - buf.low_mark = b32.low_mark; - buf.high_mark = b32.high_mark; - - return drm_ioctl_kernel(file, drm_legacy_markbufs, &buf, - DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY); -} - -typedef struct drm_buf_info32 { - int count; /**< Entries in list */ - u32 list; -} drm_buf_info32_t; - -static int copy_one_buf32(void *data, int count, struct drm_buf_entry *from) -{ - drm_buf_info32_t *request = data; - drm_buf_desc32_t __user *to = compat_ptr(request->list); - drm_buf_desc32_t v = {.count = from->buf_count, - .size = from->buf_size, - .low_mark = from->low_mark, - .high_mark = from->high_mark}; - - if (copy_to_user(to + count, &v, offsetof(drm_buf_desc32_t, flags))) - return -EFAULT; - return 0; -} - -static int drm_legacy_infobufs32(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - drm_buf_info32_t *request = data; - - return __drm_legacy_infobufs(dev, data, &request->count, copy_one_buf32); -} - -static int compat_drm_infobufs(struct file *file, unsigned int cmd, - unsigned long arg) -{ - drm_buf_info32_t req32; - drm_buf_info32_t __user *argp = (void __user *)arg; - int err; - - if (copy_from_user(&req32, argp, sizeof(req32))) - return -EFAULT; - - if (req32.count < 0) - req32.count = 0; - - err = drm_ioctl_kernel(file, drm_legacy_infobufs32, &req32, DRM_AUTH); - if (err) - return err; - - if (put_user(req32.count, &argp->count)) - return -EFAULT; - - return 0; -} - -typedef struct drm_buf_pub32 { - int idx; /**< Index into the master buffer list */ - int total; /**< Buffer size */ - int used; /**< Amount of buffer in use (for DMA) */ - u32 address; /**< Address of buffer */ -} drm_buf_pub32_t; - -typedef struct drm_buf_map32 { - int count; /**< Length of the buffer list */ - u32 virtual; /**< Mmap'd area in user-virtual */ - u32 list; /**< Buffer information */ -} drm_buf_map32_t; - -static int map_one_buf32(void *data, int idx, unsigned long virtual, - struct drm_buf *buf) -{ - drm_buf_map32_t *request = data; - drm_buf_pub32_t __user *to = compat_ptr(request->list) + idx; - drm_buf_pub32_t v; - - v.idx = buf->idx; - v.total = buf->total; - v.used = 0; - v.address = virtual + buf->offset; - if (copy_to_user(to, &v, sizeof(v))) - return -EFAULT; - return 0; -} - -static int drm_legacy_mapbufs32(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - drm_buf_map32_t *request = data; - void __user *v; - int err = __drm_legacy_mapbufs(dev, data, &request->count, - &v, map_one_buf32, - file_priv); - request->virtual = ptr_to_compat(v); - return err; -} - -static int compat_drm_mapbufs(struct file *file, unsigned int cmd, - unsigned long arg) -{ - drm_buf_map32_t __user *argp = (void __user *)arg; - drm_buf_map32_t req32; - int err; - - if (copy_from_user(&req32, argp, sizeof(req32))) - return -EFAULT; - if (req32.count < 0) - return -EINVAL; - - err = drm_ioctl_kernel(file, drm_legacy_mapbufs32, &req32, DRM_AUTH); - if (err) - return err; - - if (put_user(req32.count, &argp->count) - || put_user(req32.virtual, &argp->virtual)) - return -EFAULT; - - return 0; -} - -typedef struct drm_buf_free32 { - int count; - u32 list; -} drm_buf_free32_t; - -static int compat_drm_freebufs(struct file *file, unsigned int cmd, - unsigned long arg) -{ - drm_buf_free32_t req32; - struct drm_buf_free request; - drm_buf_free32_t __user *argp = (void __user *)arg; - - if (copy_from_user(&req32, argp, sizeof(req32))) - return -EFAULT; - - request.count = req32.count; - request.list = compat_ptr(req32.list); - return drm_ioctl_kernel(file, drm_legacy_freebufs, &request, DRM_AUTH); -} - -typedef struct drm_ctx_priv_map32 { - unsigned int ctx_id; /**< Context requesting private mapping */ - u32 handle; /**< Handle of map */ -} drm_ctx_priv_map32_t; - -static int compat_drm_setsareactx(struct file *file, unsigned int cmd, - unsigned long arg) -{ - drm_ctx_priv_map32_t req32; - struct drm_ctx_priv_map request; - drm_ctx_priv_map32_t __user *argp = (void __user *)arg; - - if (copy_from_user(&req32, argp, sizeof(req32))) - return -EFAULT; - - request.ctx_id = req32.ctx_id; - request.handle = compat_ptr(req32.handle); - return drm_ioctl_kernel(file, drm_legacy_setsareactx, &request, - DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY); -} - -static int compat_drm_getsareactx(struct file *file, unsigned int cmd, - unsigned long arg) -{ - struct drm_ctx_priv_map req; - drm_ctx_priv_map32_t req32; - drm_ctx_priv_map32_t __user *argp = (void __user *)arg; - int err; - - if (copy_from_user(&req32, argp, sizeof(req32))) - return -EFAULT; - - req.ctx_id = req32.ctx_id; - err = drm_ioctl_kernel(file, drm_legacy_getsareactx, &req, DRM_AUTH); - if (err) - return err; - - req32.handle = ptr_to_compat((void __user *)req.handle); - if (copy_to_user(argp, &req32, sizeof(req32))) - return -EFAULT; - - return 0; -} - -typedef struct drm_ctx_res32 { - int count; - u32 contexts; -} drm_ctx_res32_t; - -static int compat_drm_resctx(struct file *file, unsigned int cmd, - unsigned long arg) -{ - drm_ctx_res32_t __user *argp = (void __user *)arg; - drm_ctx_res32_t res32; - struct drm_ctx_res res; - int err; - - if (copy_from_user(&res32, argp, sizeof(res32))) - return -EFAULT; - - res.count = res32.count; - res.contexts = compat_ptr(res32.contexts); - err = drm_ioctl_kernel(file, drm_legacy_resctx, &res, DRM_AUTH); - if (err) - return err; - - res32.count = res.count; - if (copy_to_user(argp, &res32, sizeof(res32))) - return -EFAULT; - - return 0; -} - -typedef struct drm_dma32 { - int context; /**< Context handle */ - int send_count; /**< Number of buffers to send */ - u32 send_indices; /**< List of handles to buffers */ - u32 send_sizes; /**< Lengths of data to send */ - enum drm_dma_flags flags; /**< Flags */ - int request_count; /**< Number of buffers requested */ - int request_size; /**< Desired size for buffers */ - u32 request_indices; /**< Buffer information */ - u32 request_sizes; - int granted_count; /**< Number of buffers granted */ -} drm_dma32_t; - -static int compat_drm_dma(struct file *file, unsigned int cmd, - unsigned long arg) -{ - drm_dma32_t d32; - drm_dma32_t __user *argp = (void __user *)arg; - struct drm_dma d; - int err; - - if (copy_from_user(&d32, argp, sizeof(d32))) - return -EFAULT; - - d.context = d32.context; - d.send_count = d32.send_count; - d.send_indices = compat_ptr(d32.send_indices); - d.send_sizes = compat_ptr(d32.send_sizes); - d.flags = d32.flags; - d.request_count = d32.request_count; - d.request_indices = compat_ptr(d32.request_indices); - d.request_sizes = compat_ptr(d32.request_sizes); - err = drm_ioctl_kernel(file, drm_legacy_dma_ioctl, &d, DRM_AUTH); - if (err) - return err; - - if (put_user(d.request_size, &argp->request_size) - || put_user(d.granted_count, &argp->granted_count)) - return -EFAULT; - - return 0; -} -#endif - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -#if IS_ENABLED(CONFIG_AGP) -typedef struct drm_agp_mode32 { - u32 mode; /**< AGP mode */ -} drm_agp_mode32_t; - -static int compat_drm_agp_enable(struct file *file, unsigned int cmd, - unsigned long arg) -{ - drm_agp_mode32_t __user *argp = (void __user *)arg; - struct drm_agp_mode mode; - - if (get_user(mode.mode, &argp->mode)) - return -EFAULT; - - return drm_ioctl_kernel(file, drm_legacy_agp_enable_ioctl, &mode, - DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY); -} - -typedef struct drm_agp_info32 { - int agp_version_major; - int agp_version_minor; - u32 mode; - u32 aperture_base; /* physical address */ - u32 aperture_size; /* bytes */ - u32 memory_allowed; /* bytes */ - u32 memory_used; - - /* PCI information */ - unsigned short id_vendor; - unsigned short id_device; -} drm_agp_info32_t; - -static int compat_drm_agp_info(struct file *file, unsigned int cmd, - unsigned long arg) -{ - drm_agp_info32_t __user *argp = (void __user *)arg; - drm_agp_info32_t i32; - struct drm_agp_info info; - int err; - - err = drm_ioctl_kernel(file, drm_legacy_agp_info_ioctl, &info, DRM_AUTH); - if (err) - return err; - - i32.agp_version_major = info.agp_version_major; - i32.agp_version_minor = info.agp_version_minor; - i32.mode = info.mode; - i32.aperture_base = info.aperture_base; - i32.aperture_size = info.aperture_size; - i32.memory_allowed = info.memory_allowed; - i32.memory_used = info.memory_used; - i32.id_vendor = info.id_vendor; - i32.id_device = info.id_device; - if (copy_to_user(argp, &i32, sizeof(i32))) - return -EFAULT; - - return 0; -} - -typedef struct drm_agp_buffer32 { - u32 size; /**< In bytes -- will round to page boundary */ - u32 handle; /**< Used for binding / unbinding */ - u32 type; /**< Type of memory to allocate */ - u32 physical; /**< Physical used by i810 */ -} drm_agp_buffer32_t; - -static int compat_drm_agp_alloc(struct file *file, unsigned int cmd, - unsigned long arg) -{ - drm_agp_buffer32_t __user *argp = (void __user *)arg; - drm_agp_buffer32_t req32; - struct drm_agp_buffer request; - int err; - - if (copy_from_user(&req32, argp, sizeof(req32))) - return -EFAULT; - - request.size = req32.size; - request.type = req32.type; - err = drm_ioctl_kernel(file, drm_legacy_agp_alloc_ioctl, &request, - DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY); - if (err) - return err; - - req32.handle = request.handle; - req32.physical = request.physical; - if (copy_to_user(argp, &req32, sizeof(req32))) { - drm_ioctl_kernel(file, drm_legacy_agp_free_ioctl, &request, - DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY); - return -EFAULT; - } - - return 0; -} - -static int compat_drm_agp_free(struct file *file, unsigned int cmd, - unsigned long arg) -{ - drm_agp_buffer32_t __user *argp = (void __user *)arg; - struct drm_agp_buffer request; - - if (get_user(request.handle, &argp->handle)) - return -EFAULT; - - return drm_ioctl_kernel(file, drm_legacy_agp_free_ioctl, &request, - DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY); -} - -typedef struct drm_agp_binding32 { - u32 handle; /**< From drm_agp_buffer */ - u32 offset; /**< In bytes -- will round to page boundary */ -} drm_agp_binding32_t; - -static int compat_drm_agp_bind(struct file *file, unsigned int cmd, - unsigned long arg) -{ - drm_agp_binding32_t __user *argp = (void __user *)arg; - drm_agp_binding32_t req32; - struct drm_agp_binding request; - - if (copy_from_user(&req32, argp, sizeof(req32))) - return -EFAULT; - - request.handle = req32.handle; - request.offset = req32.offset; - return drm_ioctl_kernel(file, drm_legacy_agp_bind_ioctl, &request, - DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY); -} - -static int compat_drm_agp_unbind(struct file *file, unsigned int cmd, - unsigned long arg) -{ - drm_agp_binding32_t __user *argp = (void __user *)arg; - struct drm_agp_binding request; - - if (get_user(request.handle, &argp->handle)) - return -EFAULT; - - return drm_ioctl_kernel(file, drm_legacy_agp_unbind_ioctl, &request, - DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY); -} -#endif /* CONFIG_AGP */ - -typedef struct drm_scatter_gather32 { - u32 size; /**< In bytes -- will round to page boundary */ - u32 handle; /**< Used for mapping / unmapping */ -} drm_scatter_gather32_t; - -static int compat_drm_sg_alloc(struct file *file, unsigned int cmd, - unsigned long arg) -{ - drm_scatter_gather32_t __user *argp = (void __user *)arg; - struct drm_scatter_gather request; - int err; - - if (get_user(request.size, &argp->size)) - return -EFAULT; - - err = drm_ioctl_kernel(file, drm_legacy_sg_alloc, &request, - DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY); - if (err) - return err; - - /* XXX not sure about the handle conversion here... */ - if (put_user(request.handle >> PAGE_SHIFT, &argp->handle)) - return -EFAULT; - - return 0; -} - -static int compat_drm_sg_free(struct file *file, unsigned int cmd, - unsigned long arg) -{ - drm_scatter_gather32_t __user *argp = (void __user *)arg; - struct drm_scatter_gather request; - unsigned long x; - - if (get_user(x, &argp->handle)) - return -EFAULT; - request.handle = x << PAGE_SHIFT; - return drm_ioctl_kernel(file, drm_legacy_sg_free, &request, - DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY); -} -#endif #if defined(CONFIG_X86) typedef struct drm_update_draw32 { drm_drawable_t handle; @@ -854,7 +273,7 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd, req.request.type = req32.request.type; req.request.sequence = req32.request.sequence; req.request.signal = req32.request.signal; - err = drm_ioctl_kernel(file, drm_wait_vblank_ioctl, &req, DRM_UNLOCKED); + err = drm_ioctl_kernel(file, drm_wait_vblank_ioctl, &req, 0); req32.reply.type = req.reply.type; req32.reply.sequence = req.reply.sequence; @@ -914,37 +333,9 @@ static struct { #define DRM_IOCTL32_DEF(n, f) [DRM_IOCTL_NR(n##32)] = {.fn = f, .name = #n} DRM_IOCTL32_DEF(DRM_IOCTL_VERSION, compat_drm_version), DRM_IOCTL32_DEF(DRM_IOCTL_GET_UNIQUE, compat_drm_getunique), -#if IS_ENABLED(CONFIG_DRM_LEGACY) - DRM_IOCTL32_DEF(DRM_IOCTL_GET_MAP, compat_drm_getmap), -#endif DRM_IOCTL32_DEF(DRM_IOCTL_GET_CLIENT, compat_drm_getclient), DRM_IOCTL32_DEF(DRM_IOCTL_GET_STATS, compat_drm_getstats), DRM_IOCTL32_DEF(DRM_IOCTL_SET_UNIQUE, compat_drm_setunique), -#if IS_ENABLED(CONFIG_DRM_LEGACY) - DRM_IOCTL32_DEF(DRM_IOCTL_ADD_MAP, compat_drm_addmap), - DRM_IOCTL32_DEF(DRM_IOCTL_ADD_BUFS, compat_drm_addbufs), - DRM_IOCTL32_DEF(DRM_IOCTL_MARK_BUFS, compat_drm_markbufs), - DRM_IOCTL32_DEF(DRM_IOCTL_INFO_BUFS, compat_drm_infobufs), - DRM_IOCTL32_DEF(DRM_IOCTL_MAP_BUFS, compat_drm_mapbufs), - DRM_IOCTL32_DEF(DRM_IOCTL_FREE_BUFS, compat_drm_freebufs), - DRM_IOCTL32_DEF(DRM_IOCTL_RM_MAP, compat_drm_rmmap), - DRM_IOCTL32_DEF(DRM_IOCTL_SET_SAREA_CTX, compat_drm_setsareactx), - DRM_IOCTL32_DEF(DRM_IOCTL_GET_SAREA_CTX, compat_drm_getsareactx), - DRM_IOCTL32_DEF(DRM_IOCTL_RES_CTX, compat_drm_resctx), - DRM_IOCTL32_DEF(DRM_IOCTL_DMA, compat_drm_dma), -#if IS_ENABLED(CONFIG_AGP) - DRM_IOCTL32_DEF(DRM_IOCTL_AGP_ENABLE, compat_drm_agp_enable), - DRM_IOCTL32_DEF(DRM_IOCTL_AGP_INFO, compat_drm_agp_info), - DRM_IOCTL32_DEF(DRM_IOCTL_AGP_ALLOC, compat_drm_agp_alloc), - DRM_IOCTL32_DEF(DRM_IOCTL_AGP_FREE, compat_drm_agp_free), - DRM_IOCTL32_DEF(DRM_IOCTL_AGP_BIND, compat_drm_agp_bind), - DRM_IOCTL32_DEF(DRM_IOCTL_AGP_UNBIND, compat_drm_agp_unbind), -#endif -#endif -#if IS_ENABLED(CONFIG_DRM_LEGACY) - DRM_IOCTL32_DEF(DRM_IOCTL_SG_ALLOC, compat_drm_sg_alloc), - DRM_IOCTL32_DEF(DRM_IOCTL_SG_FREE, compat_drm_sg_free), -#endif #if defined(CONFIG_X86) DRM_IOCTL32_DEF(DRM_IOCTL_UPDATE_DRAW, compat_drm_update_draw), #endif diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 77590b0f38fa..e368fc084c77 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -42,7 +42,6 @@ #include "drm_crtc_internal.h" #include "drm_internal.h" -#include "drm_legacy.h" /** * DOC: getunique and setversion story @@ -301,6 +300,10 @@ static int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_ case DRM_CAP_CRTC_IN_VBLANK_EVENT: req->value = 1; break; + case DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP: + req->value = drm_core_check_feature(dev, DRIVER_ATOMIC) && + dev->mode_config.async_page_flip; + break; default: return -EINVAL; } @@ -361,6 +364,15 @@ drm_setclientcap(struct drm_device *dev, void *data, struct drm_file *file_priv) return -EINVAL; file_priv->writeback_connectors = req->value; break; + case DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT: + if (!drm_core_check_feature(dev, DRIVER_CURSOR_HOTSPOT)) + return -EOPNOTSUPP; + if (!file_priv->atomic) + return -EINVAL; + if (req->value > 1) + return -EINVAL; + file_priv->supports_virtualized_cursor_plane = req->value; + break; default: return -EINVAL; } @@ -559,21 +571,11 @@ static int drm_ioctl_permit(u32 flags, struct drm_file *file_priv) .name = #ioctl \ } -#if IS_ENABLED(CONFIG_DRM_LEGACY) -#define DRM_LEGACY_IOCTL_DEF(ioctl, _func, _flags) DRM_IOCTL_DEF(ioctl, _func, _flags) -#else -#define DRM_LEGACY_IOCTL_DEF(ioctl, _func, _flags) DRM_IOCTL_DEF(ioctl, drm_invalid_op, _flags) -#endif - /* Ioctl table */ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_GET_UNIQUE, drm_getunique, 0), DRM_IOCTL_DEF(DRM_IOCTL_GET_MAGIC, drm_getmagic, 0), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_IRQ_BUSID, drm_legacy_irq_by_busid, - DRM_MASTER|DRM_ROOT_ONLY), - - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_GET_MAP, drm_legacy_getmap_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_GET_CLIENT, drm_getclient, 0), DRM_IOCTL_DEF(DRM_IOCTL_GET_STATS, drm_getstats, 0), @@ -586,63 +588,15 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_UNBLOCK, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_IOCTL_AUTH_MAGIC, drm_authmagic, DRM_MASTER), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_ADD_MAP, drm_legacy_addmap_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_RM_MAP, drm_legacy_rmmap_ioctl, DRM_AUTH), - - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_SET_SAREA_CTX, drm_legacy_setsareactx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_GET_SAREA_CTX, drm_legacy_getsareactx, DRM_AUTH), - DRM_IOCTL_DEF(DRM_IOCTL_SET_MASTER, drm_setmaster_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_DROP_MASTER, drm_dropmaster_ioctl, 0), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_ADD_CTX, drm_legacy_addctx, DRM_AUTH|DRM_ROOT_ONLY), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_RM_CTX, drm_legacy_rmctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_MOD_CTX, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_GET_CTX, drm_legacy_getctx, DRM_AUTH), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_SWITCH_CTX, drm_legacy_switchctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_NEW_CTX, drm_legacy_newctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_RES_CTX, drm_legacy_resctx, DRM_AUTH), - DRM_IOCTL_DEF(DRM_IOCTL_ADD_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_IOCTL_RM_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_LOCK, drm_legacy_lock, DRM_AUTH), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_UNLOCK, drm_legacy_unlock, DRM_AUTH), - DRM_IOCTL_DEF(DRM_IOCTL_FINISH, drm_noop, DRM_AUTH), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_ADD_BUFS, drm_legacy_addbufs, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_MARK_BUFS, drm_legacy_markbufs, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_INFO_BUFS, drm_legacy_infobufs, DRM_AUTH), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_MAP_BUFS, drm_legacy_mapbufs, DRM_AUTH), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_FREE_BUFS, drm_legacy_freebufs, DRM_AUTH), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_DMA, drm_legacy_dma_ioctl, DRM_AUTH), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_CONTROL, drm_legacy_irq_control, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - -#if IS_ENABLED(CONFIG_AGP) - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_AGP_ACQUIRE, drm_legacy_agp_acquire_ioctl, - DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_AGP_RELEASE, drm_legacy_agp_release_ioctl, - DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_AGP_ENABLE, drm_legacy_agp_enable_ioctl, - DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_AGP_INFO, drm_legacy_agp_info_ioctl, DRM_AUTH), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_AGP_ALLOC, drm_legacy_agp_alloc_ioctl, - DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_AGP_FREE, drm_legacy_agp_free_ioctl, - DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_AGP_BIND, drm_legacy_agp_bind_ioctl, - DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_AGP_UNBIND, drm_legacy_agp_unbind_ioctl, - DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), -#endif - - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_SG_ALLOC, drm_legacy_sg_alloc, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_SG_FREE, drm_legacy_sg_free, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - - DRM_IOCTL_DEF(DRM_IOCTL_WAIT_VBLANK, drm_wait_vblank_ioctl, DRM_UNLOCKED), - - DRM_IOCTL_DEF(DRM_IOCTL_MODESET_CTL, drm_legacy_modeset_ctl_ioctl, 0), + DRM_IOCTL_DEF(DRM_IOCTL_WAIT_VBLANK, drm_wait_vblank_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_UPDATE_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), @@ -675,6 +629,7 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB2, drm_mode_addfb2_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb_ioctl, 0), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_CLOSEFB, drm_mode_closefb_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER), DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER), DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, 0), @@ -774,7 +729,7 @@ long drm_ioctl_kernel(struct file *file, drm_ioctl_t *func, void *kdata, { struct drm_file *file_priv = file->private_data; struct drm_device *dev = file_priv->minor->dev; - int retcode; + int ret; /* Update drm_file owner if fd was passed along. */ drm_file_update_pid(file_priv); @@ -782,20 +737,11 @@ long drm_ioctl_kernel(struct file *file, drm_ioctl_t *func, void *kdata, if (drm_dev_is_unplugged(dev)) return -ENODEV; - retcode = drm_ioctl_permit(flags, file_priv); - if (unlikely(retcode)) - return retcode; - - /* Enforce sane locking for modern driver ioctls. */ - if (likely(!drm_core_check_feature(dev, DRIVER_LEGACY)) || - (flags & DRM_UNLOCKED)) - retcode = func(dev, kdata, file_priv); - else { - mutex_lock(&drm_global_mutex); - retcode = func(dev, kdata, file_priv); - mutex_unlock(&drm_global_mutex); - } - return retcode; + ret = drm_ioctl_permit(flags, file_priv); + if (unlikely(ret)) + return ret; + + return func(dev, kdata, file_priv); } EXPORT_SYMBOL(drm_ioctl_kernel); diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c deleted file mode 100644 index d327638e15ee..000000000000 --- a/drivers/gpu/drm/drm_irq.c +++ /dev/null @@ -1,204 +0,0 @@ -/* - * drm_irq.c IRQ and vblank support - * - * \author Rickard E. (Rik) Faith <faith@valinux.com> - * \author Gareth Hughes <gareth@valinux.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * Created: Fri Mar 19 14:30:16 1999 by faith@valinux.com - * - * Copyright 1999, 2000 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - - -#include <linux/export.h> -#include <linux/interrupt.h> /* For task queue support */ -#include <linux/pci.h> -#include <linux/vgaarb.h> - -#include <drm/drm.h> -#include <drm/drm_device.h> -#include <drm/drm_drv.h> -#include <drm/drm_legacy.h> -#include <drm/drm_print.h> -#include <drm/drm_vblank.h> - -#include "drm_internal.h" - -static int drm_legacy_irq_install(struct drm_device *dev, int irq) -{ - int ret; - unsigned long sh_flags = 0; - - if (irq == 0) - return -EINVAL; - - if (dev->irq_enabled) - return -EBUSY; - dev->irq_enabled = true; - - DRM_DEBUG("irq=%d\n", irq); - - /* Before installing handler */ - if (dev->driver->irq_preinstall) - dev->driver->irq_preinstall(dev); - - /* PCI devices require shared interrupts. */ - if (dev_is_pci(dev->dev)) - sh_flags = IRQF_SHARED; - - ret = request_irq(irq, dev->driver->irq_handler, - sh_flags, dev->driver->name, dev); - - if (ret < 0) { - dev->irq_enabled = false; - return ret; - } - - /* After installing handler */ - if (dev->driver->irq_postinstall) - ret = dev->driver->irq_postinstall(dev); - - if (ret < 0) { - dev->irq_enabled = false; - if (drm_core_check_feature(dev, DRIVER_LEGACY)) - vga_client_unregister(to_pci_dev(dev->dev)); - free_irq(irq, dev); - } else { - dev->irq = irq; - } - - return ret; -} - -int drm_legacy_irq_uninstall(struct drm_device *dev) -{ - unsigned long irqflags; - bool irq_enabled; - int i; - - irq_enabled = dev->irq_enabled; - dev->irq_enabled = false; - - /* - * Wake up any waiters so they don't hang. This is just to paper over - * issues for UMS drivers which aren't in full control of their - * vblank/irq handling. KMS drivers must ensure that vblanks are all - * disabled when uninstalling the irq handler. - */ - if (drm_dev_has_vblank(dev)) { - spin_lock_irqsave(&dev->vbl_lock, irqflags); - for (i = 0; i < dev->num_crtcs; i++) { - struct drm_vblank_crtc *vblank = &dev->vblank[i]; - - if (!vblank->enabled) - continue; - - WARN_ON(drm_core_check_feature(dev, DRIVER_MODESET)); - - drm_vblank_disable_and_save(dev, i); - wake_up(&vblank->queue); - } - spin_unlock_irqrestore(&dev->vbl_lock, irqflags); - } - - if (!irq_enabled) - return -EINVAL; - - DRM_DEBUG("irq=%d\n", dev->irq); - - if (drm_core_check_feature(dev, DRIVER_LEGACY)) - vga_client_unregister(to_pci_dev(dev->dev)); - - if (dev->driver->irq_uninstall) - dev->driver->irq_uninstall(dev); - - free_irq(dev->irq, dev); - - return 0; -} -EXPORT_SYMBOL(drm_legacy_irq_uninstall); - -int drm_legacy_irq_control(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_control *ctl = data; - int ret = 0, irq; - struct pci_dev *pdev; - - /* if we haven't irq we fallback for compatibility reasons - - * this used to be a separate function in drm_dma.h - */ - - if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ)) - return 0; - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return 0; - /* UMS was only ever supported on pci devices. */ - if (WARN_ON(!dev_is_pci(dev->dev))) - return -EINVAL; - - switch (ctl->func) { - case DRM_INST_HANDLER: - pdev = to_pci_dev(dev->dev); - irq = pdev->irq; - - if (dev->if_version < DRM_IF_VERSION(1, 2) && - ctl->irq != irq) - return -EINVAL; - mutex_lock(&dev->struct_mutex); - ret = drm_legacy_irq_install(dev, irq); - mutex_unlock(&dev->struct_mutex); - - return ret; - case DRM_UNINST_HANDLER: - mutex_lock(&dev->struct_mutex); - ret = drm_legacy_irq_uninstall(dev); - mutex_unlock(&dev->struct_mutex); - - return ret; - default: - return -EINVAL; - } -} diff --git a/drivers/gpu/drm/drm_kms_helper_common.c b/drivers/gpu/drm/drm_kms_helper_common.c index 0bf0fc1abf54..0c7550c0462b 100644 --- a/drivers/gpu/drm/drm_kms_helper_common.c +++ b/drivers/gpu/drm/drm_kms_helper_common.c @@ -27,38 +27,6 @@ #include <linux/module.h> -#include <drm/drm_edid.h> -#include <drm/drm_print.h> - -#include "drm_crtc_helper_internal.h" - MODULE_AUTHOR("David Airlie, Jesse Barnes"); MODULE_DESCRIPTION("DRM KMS helper"); MODULE_LICENSE("GPL and additional rights"); - -#if IS_ENABLED(CONFIG_DRM_LOAD_EDID_FIRMWARE) - -/* Backward compatibility for drm_kms_helper.edid_firmware */ -static int edid_firmware_set(const char *val, const struct kernel_param *kp) -{ - DRM_NOTE("drm_kms_helper.edid_firmware is deprecated, please use drm.edid_firmware instead.\n"); - - return __drm_set_edid_firmware_path(val); -} - -static int edid_firmware_get(char *buffer, const struct kernel_param *kp) -{ - return __drm_get_edid_firmware_path(buffer, PAGE_SIZE); -} - -static const struct kernel_param_ops edid_firmware_ops = { - .set = edid_firmware_set, - .get = edid_firmware_get, -}; - -module_param_cb(edid_firmware, &edid_firmware_ops, NULL, 0644); -__MODULE_PARM_TYPE(edid_firmware, "charp"); -MODULE_PARM_DESC(edid_firmware, - "DEPRECATED. Use drm.edid_firmware module parameter instead."); - -#endif diff --git a/drivers/gpu/drm/drm_legacy.h b/drivers/gpu/drm/drm_legacy.h deleted file mode 100644 index 70c9dba114a6..000000000000 --- a/drivers/gpu/drm/drm_legacy.h +++ /dev/null @@ -1,290 +0,0 @@ -#ifndef __DRM_LEGACY_H__ -#define __DRM_LEGACY_H__ - -/* - * Copyright (c) 2014 David Herrmann <dh.herrmann@gmail.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * This file contains legacy interfaces that modern drm drivers - * should no longer be using. They cannot be removed as legacy - * drivers use them, and removing them are API breaks. - */ -#include <linux/list.h> - -#include <drm/drm.h> -#include <drm/drm_device.h> -#include <drm/drm_legacy.h> - -struct agp_memory; -struct drm_buf_desc; -struct drm_device; -struct drm_file; -struct drm_hash_item; -struct drm_open_hash; - -/* - * Hash-table Support - */ - -#define drm_hash_entry(_ptr, _type, _member) container_of(_ptr, _type, _member) - -/* drm_hashtab.c */ -#if IS_ENABLED(CONFIG_DRM_LEGACY) -int drm_ht_create(struct drm_open_hash *ht, unsigned int order); -int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item); -int drm_ht_just_insert_please(struct drm_open_hash *ht, struct drm_hash_item *item, - unsigned long seed, int bits, int shift, - unsigned long add); -int drm_ht_find_item(struct drm_open_hash *ht, unsigned long key, struct drm_hash_item **item); - -void drm_ht_verbose_list(struct drm_open_hash *ht, unsigned long key); -int drm_ht_remove_key(struct drm_open_hash *ht, unsigned long key); -int drm_ht_remove_item(struct drm_open_hash *ht, struct drm_hash_item *item); -void drm_ht_remove(struct drm_open_hash *ht); -#endif - -/* - * RCU-safe interface - * - * The user of this API needs to make sure that two or more instances of the - * hash table manipulation functions are never run simultaneously. - * The lookup function drm_ht_find_item_rcu may, however, run simultaneously - * with any of the manipulation functions as long as it's called from within - * an RCU read-locked section. - */ -#define drm_ht_insert_item_rcu drm_ht_insert_item -#define drm_ht_just_insert_please_rcu drm_ht_just_insert_please -#define drm_ht_remove_key_rcu drm_ht_remove_key -#define drm_ht_remove_item_rcu drm_ht_remove_item -#define drm_ht_find_item_rcu drm_ht_find_item - -/* - * Generic DRM Contexts - */ - -#define DRM_KERNEL_CONTEXT 0 -#define DRM_RESERVED_CONTEXTS 1 - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -void drm_legacy_ctxbitmap_init(struct drm_device *dev); -void drm_legacy_ctxbitmap_cleanup(struct drm_device *dev); -void drm_legacy_ctxbitmap_flush(struct drm_device *dev, struct drm_file *file); -#else -static inline void drm_legacy_ctxbitmap_init(struct drm_device *dev) {} -static inline void drm_legacy_ctxbitmap_cleanup(struct drm_device *dev) {} -static inline void drm_legacy_ctxbitmap_flush(struct drm_device *dev, struct drm_file *file) {} -#endif - -void drm_legacy_ctxbitmap_free(struct drm_device *dev, int ctx_handle); - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -int drm_legacy_resctx(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_addctx(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_getctx(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_switchctx(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_newctx(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_rmctx(struct drm_device *d, void *v, struct drm_file *f); - -int drm_legacy_setsareactx(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_getsareactx(struct drm_device *d, void *v, struct drm_file *f); -#endif - -/* - * Generic Buffer Management - */ - -#define DRM_MAP_HASH_OFFSET 0x10000000 - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -static inline int drm_legacy_create_map_hash(struct drm_device *dev) -{ - return drm_ht_create(&dev->map_hash, 12); -} - -static inline void drm_legacy_remove_map_hash(struct drm_device *dev) -{ - drm_ht_remove(&dev->map_hash); -} -#else -static inline int drm_legacy_create_map_hash(struct drm_device *dev) -{ - return 0; -} - -static inline void drm_legacy_remove_map_hash(struct drm_device *dev) {} -#endif - - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -int drm_legacy_getmap_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int drm_legacy_addmap_ioctl(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_rmmap_ioctl(struct drm_device *d, void *v, struct drm_file *f); - -int drm_legacy_addbufs(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_infobufs(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_markbufs(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_freebufs(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_mapbufs(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_dma_ioctl(struct drm_device *d, void *v, struct drm_file *f); -#endif - -int __drm_legacy_infobufs(struct drm_device *, void *, int *, - int (*)(void *, int, struct drm_buf_entry *)); -int __drm_legacy_mapbufs(struct drm_device *, void *, int *, - void __user **, - int (*)(void *, int, unsigned long, struct drm_buf *), - struct drm_file *); - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -void drm_legacy_master_rmmaps(struct drm_device *dev, - struct drm_master *master); -void drm_legacy_rmmaps(struct drm_device *dev); -#else -static inline void drm_legacy_master_rmmaps(struct drm_device *dev, - struct drm_master *master) {} -static inline void drm_legacy_rmmaps(struct drm_device *dev) {} -#endif - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -void drm_legacy_vma_flush(struct drm_device *d); -#else -static inline void drm_legacy_vma_flush(struct drm_device *d) -{ - /* do nothing */ -} -#endif - -/* - * AGP Support - */ - -struct drm_agp_mem { - unsigned long handle; - struct agp_memory *memory; - unsigned long bound; - int pages; - struct list_head head; -}; - -/* drm_agpsupport.c */ -#if IS_ENABLED(CONFIG_DRM_LEGACY) && IS_ENABLED(CONFIG_AGP) -void drm_legacy_agp_clear(struct drm_device *dev); - -int drm_legacy_agp_acquire_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int drm_legacy_agp_release_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int drm_legacy_agp_enable_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int drm_legacy_agp_info_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int drm_legacy_agp_alloc_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int drm_legacy_agp_free_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int drm_legacy_agp_unbind_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int drm_legacy_agp_bind_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -#else -static inline void drm_legacy_agp_clear(struct drm_device *dev) {} -#endif - -/* drm_lock.c */ -#if IS_ENABLED(CONFIG_DRM_LEGACY) -int drm_legacy_lock(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_unlock(struct drm_device *d, void *v, struct drm_file *f); -void drm_legacy_lock_release(struct drm_device *dev, struct file *filp); -#else -static inline void drm_legacy_lock_release(struct drm_device *dev, struct file *filp) {} -#endif - -/* DMA support */ -#if IS_ENABLED(CONFIG_DRM_LEGACY) -int drm_legacy_dma_setup(struct drm_device *dev); -void drm_legacy_dma_takedown(struct drm_device *dev); -#else -static inline int drm_legacy_dma_setup(struct drm_device *dev) -{ - return 0; -} -#endif - -void drm_legacy_free_buffer(struct drm_device *dev, - struct drm_buf * buf); -#if IS_ENABLED(CONFIG_DRM_LEGACY) -void drm_legacy_reclaim_buffers(struct drm_device *dev, - struct drm_file *filp); -#else -static inline void drm_legacy_reclaim_buffers(struct drm_device *dev, - struct drm_file *filp) {} -#endif - -/* Scatter Gather Support */ -#if IS_ENABLED(CONFIG_DRM_LEGACY) -void drm_legacy_sg_cleanup(struct drm_device *dev); -int drm_legacy_sg_alloc(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int drm_legacy_sg_free(struct drm_device *dev, void *data, - struct drm_file *file_priv); -#endif - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -void drm_legacy_init_members(struct drm_device *dev); -void drm_legacy_destroy_members(struct drm_device *dev); -void drm_legacy_dev_reinit(struct drm_device *dev); -int drm_legacy_setup(struct drm_device * dev); -#else -static inline void drm_legacy_init_members(struct drm_device *dev) {} -static inline void drm_legacy_destroy_members(struct drm_device *dev) {} -static inline void drm_legacy_dev_reinit(struct drm_device *dev) {} -static inline int drm_legacy_setup(struct drm_device * dev) { return 0; } -#endif - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -void drm_legacy_lock_master_cleanup(struct drm_device *dev, struct drm_master *master); -#else -static inline void drm_legacy_lock_master_cleanup(struct drm_device *dev, struct drm_master *master) {} -#endif - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -void drm_master_legacy_init(struct drm_master *master); -#else -static inline void drm_master_legacy_init(struct drm_master *master) {} -#endif - -/* drm_pci.c */ -#if IS_ENABLED(CONFIG_DRM_LEGACY) && IS_ENABLED(CONFIG_PCI) -int drm_legacy_irq_by_busid(struct drm_device *dev, void *data, struct drm_file *file_priv); -void drm_legacy_pci_agp_destroy(struct drm_device *dev); -#else -static inline int drm_legacy_irq_by_busid(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - return -EINVAL; -} - -static inline void drm_legacy_pci_agp_destroy(struct drm_device *dev) {} -#endif - -#endif /* __DRM_LEGACY_H__ */ diff --git a/drivers/gpu/drm/drm_legacy_misc.c b/drivers/gpu/drm/drm_legacy_misc.c deleted file mode 100644 index d4c5434062d7..000000000000 --- a/drivers/gpu/drm/drm_legacy_misc.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * \file drm_legacy_misc.c - * Misc legacy support functions. - * - * \author Rickard E. (Rik) Faith <faith@valinux.com> - * \author Gareth Hughes <gareth@valinux.com> - */ - -/* - * Created: Tue Feb 2 08:37:54 1999 by faith@valinux.com - * - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include <drm/drm_device.h> -#include <drm/drm_drv.h> -#include <drm/drm_print.h> - -#include "drm_internal.h" -#include "drm_legacy.h" - -void drm_legacy_init_members(struct drm_device *dev) -{ - INIT_LIST_HEAD(&dev->ctxlist); - INIT_LIST_HEAD(&dev->vmalist); - INIT_LIST_HEAD(&dev->maplist); - spin_lock_init(&dev->buf_lock); - mutex_init(&dev->ctxlist_mutex); -} - -void drm_legacy_destroy_members(struct drm_device *dev) -{ - mutex_destroy(&dev->ctxlist_mutex); -} - -int drm_legacy_setup(struct drm_device * dev) -{ - int ret; - - if (dev->driver->firstopen && - drm_core_check_feature(dev, DRIVER_LEGACY)) { - ret = dev->driver->firstopen(dev); - if (ret != 0) - return ret; - } - - ret = drm_legacy_dma_setup(dev); - if (ret < 0) - return ret; - - - DRM_DEBUG("\n"); - return 0; -} - -void drm_legacy_dev_reinit(struct drm_device *dev) -{ - if (dev->irq_enabled) - drm_legacy_irq_uninstall(dev); - - mutex_lock(&dev->struct_mutex); - - drm_legacy_agp_clear(dev); - - drm_legacy_sg_cleanup(dev); - drm_legacy_vma_flush(dev); - drm_legacy_dma_takedown(dev); - - mutex_unlock(&dev->struct_mutex); - - dev->sigdata.lock = NULL; - - dev->context_flag = 0; - dev->last_context = 0; - dev->if_version = 0; - - DRM_DEBUG("lastclose completed\n"); -} - -void drm_master_legacy_init(struct drm_master *master) -{ - spin_lock_init(&master->lock.spinlock); - init_waitqueue_head(&master->lock.lock_queue); -} diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c deleted file mode 100644 index 1efbd5389d89..000000000000 --- a/drivers/gpu/drm/drm_lock.c +++ /dev/null @@ -1,373 +0,0 @@ -/* - * \file drm_lock.c - * IOCTLs for locking - * - * \author Rickard E. (Rik) Faith <faith@valinux.com> - * \author Gareth Hughes <gareth@valinux.com> - */ - -/* - * Created: Tue Feb 2 08:37:54 1999 by faith@valinux.com - * - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include <linux/export.h> -#include <linux/sched/signal.h> - -#include <drm/drm.h> -#include <drm/drm_drv.h> -#include <drm/drm_file.h> -#include <drm/drm_print.h> - -#include "drm_internal.h" -#include "drm_legacy.h" - -static int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context); - -/* - * Take the heavyweight lock. - * - * \param lock lock pointer. - * \param context locking context. - * \return one if the lock is held, or zero otherwise. - * - * Attempt to mark the lock as held by the given context, via the \p cmpxchg instruction. - */ -static -int drm_lock_take(struct drm_lock_data *lock_data, - unsigned int context) -{ - unsigned int old, new, prev; - volatile unsigned int *lock = &lock_data->hw_lock->lock; - - spin_lock_bh(&lock_data->spinlock); - do { - old = *lock; - if (old & _DRM_LOCK_HELD) - new = old | _DRM_LOCK_CONT; - else { - new = context | _DRM_LOCK_HELD | - ((lock_data->user_waiters + lock_data->kernel_waiters > 1) ? - _DRM_LOCK_CONT : 0); - } - prev = cmpxchg(lock, old, new); - } while (prev != old); - spin_unlock_bh(&lock_data->spinlock); - - if (_DRM_LOCKING_CONTEXT(old) == context) { - if (old & _DRM_LOCK_HELD) { - if (context != DRM_KERNEL_CONTEXT) { - DRM_ERROR("%d holds heavyweight lock\n", - context); - } - return 0; - } - } - - if ((_DRM_LOCKING_CONTEXT(new)) == context && (new & _DRM_LOCK_HELD)) { - /* Have lock */ - return 1; - } - return 0; -} - -/* - * This takes a lock forcibly and hands it to context. Should ONLY be used - * inside *_unlock to give lock to kernel before calling *_dma_schedule. - * - * \param dev DRM device. - * \param lock lock pointer. - * \param context locking context. - * \return always one. - * - * Resets the lock file pointer. - * Marks the lock as held by the given context, via the \p cmpxchg instruction. - */ -static int drm_lock_transfer(struct drm_lock_data *lock_data, - unsigned int context) -{ - unsigned int old, new, prev; - volatile unsigned int *lock = &lock_data->hw_lock->lock; - - lock_data->file_priv = NULL; - do { - old = *lock; - new = context | _DRM_LOCK_HELD; - prev = cmpxchg(lock, old, new); - } while (prev != old); - return 1; -} - -static int drm_legacy_lock_free(struct drm_lock_data *lock_data, - unsigned int context) -{ - unsigned int old, new, prev; - volatile unsigned int *lock = &lock_data->hw_lock->lock; - - spin_lock_bh(&lock_data->spinlock); - if (lock_data->kernel_waiters != 0) { - drm_lock_transfer(lock_data, 0); - lock_data->idle_has_lock = 1; - spin_unlock_bh(&lock_data->spinlock); - return 1; - } - spin_unlock_bh(&lock_data->spinlock); - - do { - old = *lock; - new = _DRM_LOCKING_CONTEXT(old); - prev = cmpxchg(lock, old, new); - } while (prev != old); - - if (_DRM_LOCK_IS_HELD(old) && _DRM_LOCKING_CONTEXT(old) != context) { - DRM_ERROR("%d freed heavyweight lock held by %d\n", - context, _DRM_LOCKING_CONTEXT(old)); - return 1; - } - wake_up_interruptible(&lock_data->lock_queue); - return 0; -} - -/* - * Lock ioctl. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument, pointing to a drm_lock structure. - * \return zero on success or negative number on failure. - * - * Add the current task to the lock wait queue, and attempt to take to lock. - */ -int drm_legacy_lock(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - DECLARE_WAITQUEUE(entry, current); - struct drm_lock *lock = data; - struct drm_master *master = file_priv->master; - int ret = 0; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - ++file_priv->lock_count; - - if (lock->context == DRM_KERNEL_CONTEXT) { - DRM_ERROR("Process %d using kernel context %d\n", - task_pid_nr(current), lock->context); - return -EINVAL; - } - - DRM_DEBUG("%d (pid %d) requests lock (0x%08x), flags = 0x%08x\n", - lock->context, task_pid_nr(current), - master->lock.hw_lock ? master->lock.hw_lock->lock : -1, - lock->flags); - - add_wait_queue(&master->lock.lock_queue, &entry); - spin_lock_bh(&master->lock.spinlock); - master->lock.user_waiters++; - spin_unlock_bh(&master->lock.spinlock); - - for (;;) { - __set_current_state(TASK_INTERRUPTIBLE); - if (!master->lock.hw_lock) { - /* Device has been unregistered */ - send_sig(SIGTERM, current, 0); - ret = -EINTR; - break; - } - if (drm_lock_take(&master->lock, lock->context)) { - master->lock.file_priv = file_priv; - master->lock.lock_time = jiffies; - break; /* Got lock */ - } - - /* Contention */ - mutex_unlock(&drm_global_mutex); - schedule(); - mutex_lock(&drm_global_mutex); - if (signal_pending(current)) { - ret = -EINTR; - break; - } - } - spin_lock_bh(&master->lock.spinlock); - master->lock.user_waiters--; - spin_unlock_bh(&master->lock.spinlock); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&master->lock.lock_queue, &entry); - - DRM_DEBUG("%d %s\n", lock->context, - ret ? "interrupted" : "has lock"); - if (ret) return ret; - - /* don't set the block all signals on the master process for now - * really probably not the correct answer but lets us debug xkb - * xserver for now */ - if (!drm_is_current_master(file_priv)) { - dev->sigdata.context = lock->context; - dev->sigdata.lock = master->lock.hw_lock; - } - - if (dev->driver->dma_quiescent && (lock->flags & _DRM_LOCK_QUIESCENT)) - { - if (dev->driver->dma_quiescent(dev)) { - DRM_DEBUG("%d waiting for DMA quiescent\n", - lock->context); - return -EBUSY; - } - } - - return 0; -} - -/* - * Unlock ioctl. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument, pointing to a drm_lock structure. - * \return zero on success or negative number on failure. - * - * Transfer and free the lock. - */ -int drm_legacy_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv) -{ - struct drm_lock *lock = data; - struct drm_master *master = file_priv->master; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - if (lock->context == DRM_KERNEL_CONTEXT) { - DRM_ERROR("Process %d using kernel context %d\n", - task_pid_nr(current), lock->context); - return -EINVAL; - } - - if (drm_legacy_lock_free(&master->lock, lock->context)) { - /* FIXME: Should really bail out here. */ - } - - return 0; -} - -/* - * This function returns immediately and takes the hw lock - * with the kernel context if it is free, otherwise it gets the highest priority when and if - * it is eventually released. - * - * This guarantees that the kernel will _eventually_ have the lock _unless_ it is held - * by a blocked process. (In the latter case an explicit wait for the hardware lock would cause - * a deadlock, which is why the "idlelock" was invented). - * - * This should be sufficient to wait for GPU idle without - * having to worry about starvation. - */ -void drm_legacy_idlelock_take(struct drm_lock_data *lock_data) -{ - int ret; - - spin_lock_bh(&lock_data->spinlock); - lock_data->kernel_waiters++; - if (!lock_data->idle_has_lock) { - - spin_unlock_bh(&lock_data->spinlock); - ret = drm_lock_take(lock_data, DRM_KERNEL_CONTEXT); - spin_lock_bh(&lock_data->spinlock); - - if (ret == 1) - lock_data->idle_has_lock = 1; - } - spin_unlock_bh(&lock_data->spinlock); -} -EXPORT_SYMBOL(drm_legacy_idlelock_take); - -void drm_legacy_idlelock_release(struct drm_lock_data *lock_data) -{ - unsigned int old, prev; - volatile unsigned int *lock = &lock_data->hw_lock->lock; - - spin_lock_bh(&lock_data->spinlock); - if (--lock_data->kernel_waiters == 0) { - if (lock_data->idle_has_lock) { - do { - old = *lock; - prev = cmpxchg(lock, old, DRM_KERNEL_CONTEXT); - } while (prev != old); - wake_up_interruptible(&lock_data->lock_queue); - lock_data->idle_has_lock = 0; - } - } - spin_unlock_bh(&lock_data->spinlock); -} -EXPORT_SYMBOL(drm_legacy_idlelock_release); - -static int drm_legacy_i_have_hw_lock(struct drm_device *dev, - struct drm_file *file_priv) -{ - struct drm_master *master = file_priv->master; - - return (file_priv->lock_count && master->lock.hw_lock && - _DRM_LOCK_IS_HELD(master->lock.hw_lock->lock) && - master->lock.file_priv == file_priv); -} - -void drm_legacy_lock_release(struct drm_device *dev, struct file *filp) -{ - struct drm_file *file_priv = filp->private_data; - - /* if the master has gone away we can't do anything with the lock */ - if (!dev->master) - return; - - if (drm_legacy_i_have_hw_lock(dev, file_priv)) { - DRM_DEBUG("File %p released, freeing lock for context %d\n", - filp, _DRM_LOCKING_CONTEXT(file_priv->master->lock.hw_lock->lock)); - drm_legacy_lock_free(&file_priv->master->lock, - _DRM_LOCKING_CONTEXT(file_priv->master->lock.hw_lock->lock)); - } -} - -void drm_legacy_lock_master_cleanup(struct drm_device *dev, struct drm_master *master) -{ - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return; - - /* - * Since the master is disappearing, so is the - * possibility to lock. - */ - mutex_lock(&dev->struct_mutex); - if (master->lock.hw_lock) { - if (dev->sigdata.lock == master->lock.hw_lock) - dev->sigdata.lock = NULL; - master->lock.hw_lock = NULL; - master->lock.file_priv = NULL; - wake_up_interruptible_all(&master->lock.lock_queue); - } - mutex_unlock(&dev->struct_mutex); -} diff --git a/drivers/gpu/drm/drm_memory.c b/drivers/gpu/drm/drm_memory.c deleted file mode 100644 index d2e1dccd8113..000000000000 --- a/drivers/gpu/drm/drm_memory.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - * \file drm_memory.c - * Memory management wrappers for DRM - * - * \author Rickard E. (Rik) Faith <faith@valinux.com> - * \author Gareth Hughes <gareth@valinux.com> - */ - -/* - * Created: Thu Feb 4 14:00:34 1999 by faith@valinux.com - * - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include <linux/export.h> -#include <linux/highmem.h> -#include <linux/pci.h> -#include <linux/vmalloc.h> - -#include <drm/drm_cache.h> -#include <drm/drm_device.h> - -#include "drm_legacy.h" - -#if IS_ENABLED(CONFIG_AGP) - -#ifdef HAVE_PAGE_AGP -# include <asm/agp.h> -#else -# ifdef __powerpc__ -# define PAGE_AGP pgprot_noncached_wc(PAGE_KERNEL) -# else -# define PAGE_AGP PAGE_KERNEL -# endif -#endif - -static void *agp_remap(unsigned long offset, unsigned long size, - struct drm_device *dev) -{ - unsigned long i, num_pages = - PAGE_ALIGN(size) / PAGE_SIZE; - struct drm_agp_mem *agpmem; - struct page **page_map; - struct page **phys_page_map; - void *addr; - - size = PAGE_ALIGN(size); - -#ifdef __alpha__ - offset -= dev->hose->mem_space->start; -#endif - - list_for_each_entry(agpmem, &dev->agp->memory, head) - if (agpmem->bound <= offset - && (agpmem->bound + (agpmem->pages << PAGE_SHIFT)) >= - (offset + size)) - break; - if (&agpmem->head == &dev->agp->memory) - return NULL; - - /* - * OK, we're mapping AGP space on a chipset/platform on which memory accesses by - * the CPU do not get remapped by the GART. We fix this by using the kernel's - * page-table instead (that's probably faster anyhow...). - */ - /* note: use vmalloc() because num_pages could be large... */ - page_map = vmalloc(array_size(num_pages, sizeof(struct page *))); - if (!page_map) - return NULL; - - phys_page_map = (agpmem->memory->pages + (offset - agpmem->bound) / PAGE_SIZE); - for (i = 0; i < num_pages; ++i) - page_map[i] = phys_page_map[i]; - addr = vmap(page_map, num_pages, VM_IOREMAP, PAGE_AGP); - vfree(page_map); - - return addr; -} - -#else /* CONFIG_AGP */ -static inline void *agp_remap(unsigned long offset, unsigned long size, - struct drm_device *dev) -{ - return NULL; -} - -#endif /* CONFIG_AGP */ - -void drm_legacy_ioremap(struct drm_local_map *map, struct drm_device *dev) -{ - if (dev->agp && dev->agp->cant_use_aperture && map->type == _DRM_AGP) - map->handle = agp_remap(map->offset, map->size, dev); - else - map->handle = ioremap(map->offset, map->size); -} -EXPORT_SYMBOL(drm_legacy_ioremap); - -void drm_legacy_ioremap_wc(struct drm_local_map *map, struct drm_device *dev) -{ - if (dev->agp && dev->agp->cant_use_aperture && map->type == _DRM_AGP) - map->handle = agp_remap(map->offset, map->size, dev); - else - map->handle = ioremap_wc(map->offset, map->size); -} -EXPORT_SYMBOL(drm_legacy_ioremap_wc); - -void drm_legacy_ioremapfree(struct drm_local_map *map, struct drm_device *dev) -{ - if (!map->handle || !map->size) - return; - - if (dev->agp && dev->agp->cant_use_aperture && map->type == _DRM_AGP) - vunmap(map->handle); - else - iounmap(map->handle); -} -EXPORT_SYMBOL(drm_legacy_ioremapfree); diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c index e90f0bf895b3..daac649aabdb 100644 --- a/drivers/gpu/drm/drm_mipi_dbi.c +++ b/drivers/gpu/drm/drm_mipi_dbi.c @@ -197,12 +197,14 @@ EXPORT_SYMBOL(mipi_dbi_command_stackbuf); * @fb: The source framebuffer * @clip: Clipping rectangle of the area to be copied * @swap: When true, swap MSB/LSB of 16-bit values + * @fmtcnv_state: Format-conversion state * * Returns: * Zero on success, negative error code on failure. */ int mipi_dbi_buf_copy(void *dst, struct iosys_map *src, struct drm_framebuffer *fb, - struct drm_rect *clip, bool swap) + struct drm_rect *clip, bool swap, + struct drm_format_conv_state *fmtcnv_state) { struct drm_gem_object *gem = drm_gem_fb_get_obj(fb, 0); struct iosys_map dst_map = IOSYS_MAP_INIT_VADDR(dst); @@ -215,12 +217,13 @@ int mipi_dbi_buf_copy(void *dst, struct iosys_map *src, struct drm_framebuffer * switch (fb->format->format) { case DRM_FORMAT_RGB565: if (swap) - drm_fb_swab(&dst_map, NULL, src, fb, clip, !gem->import_attach); + drm_fb_swab(&dst_map, NULL, src, fb, clip, !gem->import_attach, + fmtcnv_state); else drm_fb_memcpy(&dst_map, NULL, src, fb, clip); break; case DRM_FORMAT_XRGB8888: - drm_fb_xrgb8888_to_rgb565(&dst_map, NULL, src, fb, clip, swap); + drm_fb_xrgb8888_to_rgb565(&dst_map, NULL, src, fb, clip, fmtcnv_state, swap); break; default: drm_err_once(fb->dev, "Format is not supported: %p4cc\n", @@ -252,7 +255,7 @@ static void mipi_dbi_set_window_address(struct mipi_dbi_dev *dbidev, } static void mipi_dbi_fb_dirty(struct iosys_map *src, struct drm_framebuffer *fb, - struct drm_rect *rect) + struct drm_rect *rect, struct drm_format_conv_state *fmtcnv_state) { struct mipi_dbi_dev *dbidev = drm_to_mipi_dbi_dev(fb->dev); unsigned int height = rect->y2 - rect->y1; @@ -270,7 +273,7 @@ static void mipi_dbi_fb_dirty(struct iosys_map *src, struct drm_framebuffer *fb, if (!dbi->dc || !full || swap || fb->format->format == DRM_FORMAT_XRGB8888) { tr = dbidev->tx_buf; - ret = mipi_dbi_buf_copy(tr, src, fb, rect, swap); + ret = mipi_dbi_buf_copy(tr, src, fb, rect, swap, fmtcnv_state); if (ret) goto err_msg; } else { @@ -332,7 +335,8 @@ void mipi_dbi_pipe_update(struct drm_simple_display_pipe *pipe, return; if (drm_atomic_helper_damage_merged(old_state, state, &rect)) - mipi_dbi_fb_dirty(&shadow_plane_state->data[0], fb, &rect); + mipi_dbi_fb_dirty(&shadow_plane_state->data[0], fb, &rect, + &shadow_plane_state->fmtcnv_state); drm_dev_exit(idx); } @@ -368,7 +372,8 @@ void mipi_dbi_enable_flush(struct mipi_dbi_dev *dbidev, if (!drm_dev_enter(&dbidev->drm, &idx)) return; - mipi_dbi_fb_dirty(&shadow_plane_state->data[0], fb, &rect); + mipi_dbi_fb_dirty(&shadow_plane_state->data[0], fb, &rect, + &shadow_plane_state->fmtcnv_state); backlight_enable(dbidev->backlight); drm_dev_exit(idx); diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c index 14201f73aab1..843a6dbda93a 100644 --- a/drivers/gpu/drm/drm_mipi_dsi.c +++ b/drivers/gpu/drm/drm_mipi_dsi.c @@ -347,7 +347,8 @@ static int mipi_dsi_remove_device_fn(struct device *dev, void *priv) { struct mipi_dsi_device *dsi = to_mipi_dsi_device(dev); - mipi_dsi_detach(dsi); + if (dsi->attached) + mipi_dsi_detach(dsi); mipi_dsi_device_unregister(dsi); return 0; @@ -370,11 +371,18 @@ EXPORT_SYMBOL(mipi_dsi_host_unregister); int mipi_dsi_attach(struct mipi_dsi_device *dsi) { const struct mipi_dsi_host_ops *ops = dsi->host->ops; + int ret; if (!ops || !ops->attach) return -ENOSYS; - return ops->attach(dsi->host, dsi); + ret = ops->attach(dsi->host, dsi); + if (ret) + return ret; + + dsi->attached = true; + + return 0; } EXPORT_SYMBOL(mipi_dsi_attach); @@ -386,9 +394,14 @@ int mipi_dsi_detach(struct mipi_dsi_device *dsi) { const struct mipi_dsi_host_ops *ops = dsi->host->ops; + if (WARN_ON(!dsi->attached)) + return -EINVAL; + if (!ops || !ops->detach) return -ENOSYS; + dsi->attached = false; + return ops->detach(dsi->host, dsi); } EXPORT_SYMBOL(mipi_dsi_detach); diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c index ac0d2ce3f870..0e8355063eee 100644 --- a/drivers/gpu/drm/drm_mode_object.c +++ b/drivers/gpu/drm/drm_mode_object.c @@ -538,7 +538,7 @@ retry: obj_to_connector(obj), prop_value); } else { - ret = drm_atomic_set_property(state, file_priv, obj, prop, prop_value); + ret = drm_atomic_set_property(state, file_priv, obj, prop, prop_value, false); if (ret) goto out; ret = drm_atomic_commit(state); diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c index 39d35fc3a43b..c585f1e8803e 100644 --- a/drivers/gpu/drm/drm_pci.c +++ b/drivers/gpu/drm/drm_pci.c @@ -29,18 +29,12 @@ #include <linux/pci.h> #include <linux/slab.h> +#include <drm/drm_auth.h> #include <drm/drm.h> #include <drm/drm_drv.h> #include <drm/drm_print.h> #include "drm_internal.h" -#include "drm_legacy.h" - -#ifdef CONFIG_DRM_LEGACY -/* List of devices hanging off drivers with stealth attach. */ -static LIST_HEAD(legacy_dev_list); -static DEFINE_MUTEX(legacy_dev_list_lock); -#endif static int drm_get_pci_domain(struct drm_device *dev) { @@ -71,199 +65,3 @@ int drm_pci_set_busid(struct drm_device *dev, struct drm_master *master) master->unique_len = strlen(master->unique); return 0; } - -#ifdef CONFIG_DRM_LEGACY - -static int drm_legacy_pci_irq_by_busid(struct drm_device *dev, struct drm_irq_busid *p) -{ - struct pci_dev *pdev = to_pci_dev(dev->dev); - - if ((p->busnum >> 8) != drm_get_pci_domain(dev) || - (p->busnum & 0xff) != pdev->bus->number || - p->devnum != PCI_SLOT(pdev->devfn) || p->funcnum != PCI_FUNC(pdev->devfn)) - return -EINVAL; - - p->irq = pdev->irq; - - DRM_DEBUG("%d:%d:%d => IRQ %d\n", p->busnum, p->devnum, p->funcnum, - p->irq); - return 0; -} - -/** - * drm_legacy_irq_by_busid - Get interrupt from bus ID - * @dev: DRM device - * @data: IOCTL parameter pointing to a drm_irq_busid structure - * @file_priv: DRM file private. - * - * Finds the PCI device with the specified bus id and gets its IRQ number. - * This IOCTL is deprecated, and will now return EINVAL for any busid not equal - * to that of the device that this DRM instance attached to. - * - * Return: 0 on success or a negative error code on failure. - */ -int drm_legacy_irq_by_busid(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_irq_busid *p = data; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - /* UMS was only ever support on PCI devices. */ - if (WARN_ON(!dev_is_pci(dev->dev))) - return -EINVAL; - - if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ)) - return -EOPNOTSUPP; - - return drm_legacy_pci_irq_by_busid(dev, p); -} - -void drm_legacy_pci_agp_destroy(struct drm_device *dev) -{ - if (dev->agp) { - arch_phys_wc_del(dev->agp->agp_mtrr); - drm_legacy_agp_clear(dev); - kfree(dev->agp); - dev->agp = NULL; - } -} - -static void drm_legacy_pci_agp_init(struct drm_device *dev) -{ - if (drm_core_check_feature(dev, DRIVER_USE_AGP)) { - if (pci_find_capability(to_pci_dev(dev->dev), PCI_CAP_ID_AGP)) - dev->agp = drm_legacy_agp_init(dev); - if (dev->agp) { - dev->agp->agp_mtrr = arch_phys_wc_add( - dev->agp->agp_info.aper_base, - dev->agp->agp_info.aper_size * - 1024 * 1024); - } - } -} - -static int drm_legacy_get_pci_dev(struct pci_dev *pdev, - const struct pci_device_id *ent, - const struct drm_driver *driver) -{ - struct drm_device *dev; - int ret; - - DRM_DEBUG("\n"); - - dev = drm_dev_alloc(driver, &pdev->dev); - if (IS_ERR(dev)) - return PTR_ERR(dev); - - ret = pci_enable_device(pdev); - if (ret) - goto err_free; - -#ifdef __alpha__ - dev->hose = pdev->sysdata; -#endif - - drm_legacy_pci_agp_init(dev); - - ret = drm_dev_register(dev, ent->driver_data); - if (ret) - goto err_agp; - - if (drm_core_check_feature(dev, DRIVER_LEGACY)) { - mutex_lock(&legacy_dev_list_lock); - list_add_tail(&dev->legacy_dev_list, &legacy_dev_list); - mutex_unlock(&legacy_dev_list_lock); - } - - return 0; - -err_agp: - drm_legacy_pci_agp_destroy(dev); - pci_disable_device(pdev); -err_free: - drm_dev_put(dev); - return ret; -} - -/** - * drm_legacy_pci_init - shadow-attach a legacy DRM PCI driver - * @driver: DRM device driver - * @pdriver: PCI device driver - * - * This is only used by legacy dri1 drivers and deprecated. - * - * Return: 0 on success or a negative error code on failure. - */ -int drm_legacy_pci_init(const struct drm_driver *driver, - struct pci_driver *pdriver) -{ - struct pci_dev *pdev = NULL; - const struct pci_device_id *pid; - int i; - - DRM_DEBUG("\n"); - - if (WARN_ON(!(driver->driver_features & DRIVER_LEGACY))) - return -EINVAL; - - /* If not using KMS, fall back to stealth mode manual scanning. */ - for (i = 0; pdriver->id_table[i].vendor != 0; i++) { - pid = &pdriver->id_table[i]; - - /* Loop around setting up a DRM device for each PCI device - * matching our ID and device class. If we had the internal - * function that pci_get_subsys and pci_get_class used, we'd - * be able to just pass pid in instead of doing a two-stage - * thing. - */ - pdev = NULL; - while ((pdev = - pci_get_subsys(pid->vendor, pid->device, pid->subvendor, - pid->subdevice, pdev)) != NULL) { - if ((pdev->class & pid->class_mask) != pid->class) - continue; - - /* stealth mode requires a manual probe */ - pci_dev_get(pdev); - drm_legacy_get_pci_dev(pdev, pid, driver); - } - } - return 0; -} -EXPORT_SYMBOL(drm_legacy_pci_init); - -/** - * drm_legacy_pci_exit - unregister shadow-attach legacy DRM driver - * @driver: DRM device driver - * @pdriver: PCI device driver - * - * Unregister a DRM driver shadow-attached through drm_legacy_pci_init(). This - * is deprecated and only used by dri1 drivers. - */ -void drm_legacy_pci_exit(const struct drm_driver *driver, - struct pci_driver *pdriver) -{ - struct drm_device *dev, *tmp; - - DRM_DEBUG("\n"); - - if (!(driver->driver_features & DRIVER_LEGACY)) { - WARN_ON(1); - } else { - mutex_lock(&legacy_dev_list_lock); - list_for_each_entry_safe(dev, tmp, &legacy_dev_list, - legacy_dev_list) { - if (dev->driver == driver) { - list_del(&dev->legacy_dev_list); - drm_put_dev(dev); - } - } - mutex_unlock(&legacy_dev_list_lock); - } - DRM_INFO("Module unloaded\n"); -} -EXPORT_SYMBOL(drm_legacy_pci_exit); - -#endif diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 24e7998d1731..672c655c7a8e 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -230,6 +230,103 @@ static int create_in_format_blob(struct drm_device *dev, struct drm_plane *plane return 0; } +/** + * DOC: hotspot properties + * + * HOTSPOT_X: property to set mouse hotspot x offset. + * HOTSPOT_Y: property to set mouse hotspot y offset. + * + * When the plane is being used as a cursor image to display a mouse pointer, + * the "hotspot" is the offset within the cursor image where mouse events + * are expected to go. + * + * Positive values move the hotspot from the top-left corner of the cursor + * plane towards the right and bottom. + * + * Most display drivers do not need this information because the + * hotspot is not actually connected to anything visible on screen. + * However, this is necessary for display drivers like the para-virtualized + * drivers (eg qxl, vbox, virtio, vmwgfx), that are attached to a user console + * with a mouse pointer. Since these consoles are often being remoted over a + * network, they would otherwise have to wait to display the pointer movement to + * the user until a full network round-trip has occurred. New mouse events have + * to be sent from the user's console, over the network to the virtual input + * devices, forwarded to the desktop for processing, and then the cursor plane's + * position can be updated and sent back to the user's console over the network. + * Instead, with the hotspot information, the console can anticipate the new + * location, and draw the mouse cursor there before the confirmation comes in. + * To do that correctly, the user's console must be able predict how the + * desktop will process mouse events, which normally requires the desktop's + * mouse topology information, ie where each CRTC sits in the mouse coordinate + * space. This is typically sent to the para-virtualized drivers using some + * driver-specific method, and the driver then forwards it to the console by + * way of the virtual display device or hypervisor. + * + * The assumption is generally made that there is only one cursor plane being + * used this way at a time, and that the desktop is feeding all mouse devices + * into the same global pointer. Para-virtualized drivers that require this + * should only be exposing a single cursor plane, or find some other way + * to coordinate with a userspace desktop that supports multiple pointers. + * If the hotspot properties are set, the cursor plane is therefore assumed to be + * used only for displaying a mouse cursor image, and the position of the combined + * cursor plane + offset can therefore be used for coordinating with input from a + * mouse device. + * + * The cursor will then be drawn either at the location of the plane in the CRTC + * console, or as a free-floating cursor plane on the user's console + * corresponding to their desktop mouse position. + * + * DRM clients which would like to work correctly on drivers which expose + * hotspot properties should advertise DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT. + * Setting this property on drivers which do not special case + * cursor planes will return EOPNOTSUPP, which can be used by userspace to + * gauge requirements of the hardware/drivers they're running on. Advertising + * DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT implies that the userspace client will be + * correctly setting the hotspot properties. + */ + +/** + * drm_plane_create_hotspot_properties - creates the mouse hotspot + * properties and attaches them to the given cursor plane + * + * @plane: drm cursor plane + * + * This function enables the mouse hotspot property on a given + * cursor plane. Look at the documentation for hotspot properties + * to get a better understanding for what they're used for. + * + * RETURNS: + * Zero for success or -errno + */ +static int drm_plane_create_hotspot_properties(struct drm_plane *plane) +{ + struct drm_property *prop_x; + struct drm_property *prop_y; + + drm_WARN_ON(plane->dev, + !drm_core_check_feature(plane->dev, + DRIVER_CURSOR_HOTSPOT)); + + prop_x = drm_property_create_signed_range(plane->dev, 0, "HOTSPOT_X", + INT_MIN, INT_MAX); + if (IS_ERR(prop_x)) + return PTR_ERR(prop_x); + + prop_y = drm_property_create_signed_range(plane->dev, 0, "HOTSPOT_Y", + INT_MIN, INT_MAX); + if (IS_ERR(prop_y)) { + drm_property_destroy(plane->dev, prop_x); + return PTR_ERR(prop_y); + } + + drm_object_attach_property(&plane->base, prop_x, 0); + drm_object_attach_property(&plane->base, prop_y, 0); + plane->hotspot_x_property = prop_x; + plane->hotspot_y_property = prop_y; + + return 0; +} + __printf(9, 0) static int __drm_universal_plane_init(struct drm_device *dev, struct drm_plane *plane, @@ -348,6 +445,10 @@ static int __drm_universal_plane_init(struct drm_device *dev, drm_object_attach_property(&plane->base, config->prop_src_w, 0); drm_object_attach_property(&plane->base, config->prop_src_h, 0); } + if (drm_core_check_feature(dev, DRIVER_CURSOR_HOTSPOT) && + type == DRM_PLANE_TYPE_CURSOR) { + drm_plane_create_hotspot_properties(plane); + } if (format_modifier_count) create_in_format_blob(dev, plane); @@ -678,6 +779,19 @@ int drm_mode_getplane_res(struct drm_device *dev, void *data, !file_priv->universal_planes) continue; + /* + * If we're running on a virtualized driver then, + * unless userspace advertizes support for the + * virtualized cursor plane, disable cursor planes + * because they'll be broken due to missing cursor + * hotspot info. + */ + if (plane->type == DRM_PLANE_TYPE_CURSOR && + drm_core_check_feature(dev, DRIVER_CURSOR_HOTSPOT) && + file_priv->atomic && + !file_priv->supports_virtualized_cursor_plane) + continue; + if (drm_lease_held(file_priv, plane->base.id)) { if (count < plane_resp->count_planes && put_user(plane->base.id, plane_ptr + count)) @@ -1052,8 +1166,10 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc, return PTR_ERR(fb); } - fb->hot_x = req->hot_x; - fb->hot_y = req->hot_y; + if (plane->hotspot_x_property && plane->state) + plane->state->hotspot_x = req->hot_x; + if (plane->hotspot_y_property && plane->state) + plane->state->hotspot_y = req->hot_y; } else { fb = NULL; } @@ -1387,6 +1503,7 @@ retry: out: if (fb) drm_framebuffer_put(fb); + fb = NULL; if (plane->old_fb) drm_framebuffer_put(plane->old_fb); plane->old_fb = NULL; @@ -1442,6 +1559,36 @@ out: * Drivers implementing damage can use drm_atomic_helper_damage_iter_init() and * drm_atomic_helper_damage_iter_next() helper iterator function to get damage * rectangles clipped to &drm_plane_state.src. + * + * Note that there are two types of damage handling: frame damage and buffer + * damage, the type of damage handling implemented depends on a driver's upload + * target. Drivers implementing a per-plane or per-CRTC upload target need to + * handle frame damage, while drivers implementing a per-buffer upload target + * need to handle buffer damage. + * + * The existing damage helpers only support the frame damage type, there is no + * buffer age support or similar damage accumulation algorithm implemented yet. + * + * Only drivers handling frame damage can use the mentioned damage helpers to + * iterate over the damaged regions. Drivers that handle buffer damage, must set + * &drm_plane_state.ignore_damage_clips for drm_atomic_helper_damage_iter_init() + * to know that damage clips should be ignored and return &drm_plane_state.src + * as the damage rectangle, to force a full plane update. + * + * Drivers with a per-buffer upload target could compare the &drm_plane_state.fb + * of the old and new plane states to determine if the framebuffer attached to a + * plane has changed or not since the last plane update. If &drm_plane_state.fb + * has changed, then &drm_plane_state.ignore_damage_clips must be set to true. + * + * That is because drivers with a per-plane upload target, expect the backing + * storage buffer to not change for a given plane. If the upload buffer changes + * between page flips, the new upload buffer has to be updated as a whole. This + * can be improved in the future if support for frame damage is added to the DRM + * damage helpers, similarly to how user-space already handle this case as it is + * explained in the following documents: + * + * https://registry.khronos.org/EGL/extensions/KHR/EGL_KHR_swap_buffers_with_damage.txt + * https://emersion.fr/blog/2019/intro-to-damage-tracking/ */ /** diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c index 5e95089676ff..7982be4b0306 100644 --- a/drivers/gpu/drm/drm_plane_helper.c +++ b/drivers/gpu/drm/drm_plane_helper.c @@ -279,35 +279,3 @@ void drm_plane_helper_destroy(struct drm_plane *plane) kfree(plane); } EXPORT_SYMBOL(drm_plane_helper_destroy); - -/** - * drm_plane_helper_atomic_check() - Helper to check plane atomic-state - * @plane: plane to check - * @state: atomic state object - * - * Provides a default plane-state check handler for planes whose atomic-state - * scale and positioning are not expected to change since the plane is always - * a fullscreen scanout buffer. - * - * This is often the case for the primary plane of simple framebuffers. See - * also drm_crtc_helper_atomic_check() for the respective CRTC-state check - * helper function. - * - * RETURNS: - * Zero on success, or an errno code otherwise. - */ -int drm_plane_helper_atomic_check(struct drm_plane *plane, struct drm_atomic_state *state) -{ - struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); - struct drm_crtc *new_crtc = new_plane_state->crtc; - struct drm_crtc_state *new_crtc_state = NULL; - - if (new_crtc) - new_crtc_state = drm_atomic_get_new_crtc_state(state, new_crtc); - - return drm_atomic_helper_check_plane_state(new_plane_state, new_crtc_state, - DRM_PLANE_NO_SCALING, - DRM_PLANE_NO_SCALING, - false, false); -} -EXPORT_SYMBOL(drm_plane_helper_atomic_check); diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c index dfec479830e4..596272149a35 100644 --- a/drivers/gpu/drm/drm_property.c +++ b/drivers/gpu/drm/drm_property.c @@ -27,6 +27,7 @@ #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_framebuffer.h> +#include <drm/drm_print.h> #include <drm/drm_property.h> #include "drm_crtc_internal.h" @@ -751,6 +752,64 @@ bool drm_property_replace_blob(struct drm_property_blob **blob, } EXPORT_SYMBOL(drm_property_replace_blob); +/** + * drm_property_replace_blob_from_id - replace a blob property taking a reference + * @dev: DRM device + * @blob: a pointer to the member blob to be replaced + * @blob_id: the id of the new blob to replace with + * @expected_size: expected size of the blob property + * @expected_elem_size: expected size of an element in the blob property + * @replaced: if the blob was in fact replaced + * + * Look up the new blob from id, take its reference, check expected sizes of + * the blob and its element and replace the old blob by the new one. Advertise + * if the replacement operation was successful. + * + * Return: true if the blob was in fact replaced. -EINVAL if the new blob was + * not found or sizes don't match. + */ +int drm_property_replace_blob_from_id(struct drm_device *dev, + struct drm_property_blob **blob, + uint64_t blob_id, + ssize_t expected_size, + ssize_t expected_elem_size, + bool *replaced) +{ + struct drm_property_blob *new_blob = NULL; + + if (blob_id != 0) { + new_blob = drm_property_lookup_blob(dev, blob_id); + if (new_blob == NULL) { + drm_dbg_atomic(dev, + "cannot find blob ID %llu\n", blob_id); + return -EINVAL; + } + + if (expected_size > 0 && + new_blob->length != expected_size) { + drm_dbg_atomic(dev, + "[BLOB:%d] length %zu different from expected %zu\n", + new_blob->base.id, new_blob->length, expected_size); + drm_property_blob_put(new_blob); + return -EINVAL; + } + if (expected_elem_size > 0 && + new_blob->length % expected_elem_size != 0) { + drm_dbg_atomic(dev, + "[BLOB:%d] length %zu not divisible by element size %zu\n", + new_blob->base.id, new_blob->length, expected_elem_size); + drm_property_blob_put(new_blob); + return -EINVAL; + } + } + + *replaced |= drm_property_replace_blob(blob, new_blob); + drm_property_blob_put(new_blob); + + return 0; +} +EXPORT_SYMBOL(drm_property_replace_blob_from_id); + int drm_mode_getblob_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { diff --git a/drivers/gpu/drm/drm_scatter.c b/drivers/gpu/drm/drm_scatter.c deleted file mode 100644 index f4e6184d1877..000000000000 --- a/drivers/gpu/drm/drm_scatter.c +++ /dev/null @@ -1,220 +0,0 @@ -/* - * \file drm_scatter.c - * IOCTLs to manage scatter/gather memory - * - * \author Gareth Hughes <gareth@valinux.com> - */ - -/* - * Created: Mon Dec 18 23:20:54 2000 by gareth@valinux.com - * - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include <linux/mm.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> - -#include <drm/drm.h> -#include <drm/drm_drv.h> -#include <drm/drm_print.h> - -#include "drm_legacy.h" - -#define DEBUG_SCATTER 0 - -static void drm_sg_cleanup(struct drm_sg_mem * entry) -{ - struct page *page; - int i; - - for (i = 0; i < entry->pages; i++) { - page = entry->pagelist[i]; - if (page) - ClearPageReserved(page); - } - - vfree(entry->virtual); - - kfree(entry->busaddr); - kfree(entry->pagelist); - kfree(entry); -} - -void drm_legacy_sg_cleanup(struct drm_device *dev) -{ - if (drm_core_check_feature(dev, DRIVER_SG) && dev->sg && - drm_core_check_feature(dev, DRIVER_LEGACY)) { - drm_sg_cleanup(dev->sg); - dev->sg = NULL; - } -} -#ifdef _LP64 -# define ScatterHandle(x) (unsigned int)((x >> 32) + (x & ((1L << 32) - 1))) -#else -# define ScatterHandle(x) (unsigned int)(x) -#endif - -int drm_legacy_sg_alloc(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_scatter_gather *request = data; - struct drm_sg_mem *entry; - unsigned long pages, i, j; - - DRM_DEBUG("\n"); - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - if (!drm_core_check_feature(dev, DRIVER_SG)) - return -EOPNOTSUPP; - - if (request->size > SIZE_MAX - PAGE_SIZE) - return -EINVAL; - - if (dev->sg) - return -EINVAL; - - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return -ENOMEM; - - pages = (request->size + PAGE_SIZE - 1) / PAGE_SIZE; - DRM_DEBUG("size=%ld pages=%ld\n", request->size, pages); - - entry->pages = pages; - entry->pagelist = kcalloc(pages, sizeof(*entry->pagelist), GFP_KERNEL); - if (!entry->pagelist) { - kfree(entry); - return -ENOMEM; - } - - entry->busaddr = kcalloc(pages, sizeof(*entry->busaddr), GFP_KERNEL); - if (!entry->busaddr) { - kfree(entry->pagelist); - kfree(entry); - return -ENOMEM; - } - - entry->virtual = vmalloc_32(pages << PAGE_SHIFT); - if (!entry->virtual) { - kfree(entry->busaddr); - kfree(entry->pagelist); - kfree(entry); - return -ENOMEM; - } - - /* This also forces the mapping of COW pages, so our page list - * will be valid. Please don't remove it... - */ - memset(entry->virtual, 0, pages << PAGE_SHIFT); - - entry->handle = ScatterHandle((unsigned long)entry->virtual); - - DRM_DEBUG("handle = %08lx\n", entry->handle); - DRM_DEBUG("virtual = %p\n", entry->virtual); - - for (i = (unsigned long)entry->virtual, j = 0; j < pages; - i += PAGE_SIZE, j++) { - entry->pagelist[j] = vmalloc_to_page((void *)i); - if (!entry->pagelist[j]) - goto failed; - SetPageReserved(entry->pagelist[j]); - } - - request->handle = entry->handle; - - dev->sg = entry; - -#if DEBUG_SCATTER - /* Verify that each page points to its virtual address, and vice - * versa. - */ - { - int error = 0; - - for (i = 0; i < pages; i++) { - unsigned long *tmp; - - tmp = page_address(entry->pagelist[i]); - for (j = 0; - j < PAGE_SIZE / sizeof(unsigned long); - j++, tmp++) { - *tmp = 0xcafebabe; - } - tmp = (unsigned long *)((u8 *) entry->virtual + - (PAGE_SIZE * i)); - for (j = 0; - j < PAGE_SIZE / sizeof(unsigned long); - j++, tmp++) { - if (*tmp != 0xcafebabe && error == 0) { - error = 1; - DRM_ERROR("Scatter allocation error, " - "pagelist does not match " - "virtual mapping\n"); - } - } - tmp = page_address(entry->pagelist[i]); - for (j = 0; - j < PAGE_SIZE / sizeof(unsigned long); - j++, tmp++) { - *tmp = 0; - } - } - if (error == 0) - DRM_ERROR("Scatter allocation matches pagelist\n"); - } -#endif - - return 0; - - failed: - drm_sg_cleanup(entry); - return -ENOMEM; -} - -int drm_legacy_sg_free(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_scatter_gather *request = data; - struct drm_sg_mem *entry; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - if (!drm_core_check_feature(dev, DRIVER_SG)) - return -EOPNOTSUPP; - - entry = dev->sg; - dev->sg = NULL; - - if (!entry || entry->handle != request->handle) - return -EINVAL; - - DRM_DEBUG("virtual = %p\n", entry->virtual); - - drm_sg_cleanup(entry); - - return 0; -} diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index b9cc62982196..84101baeecc6 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -126,6 +126,11 @@ * synchronize between the two. * This requirement is inherited from the Vulkan fence API. * + * If &DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE is set, the ioctl will also set + * a fence deadline hint on the backing fences before waiting, to provide the + * fence signaler with an appropriate sense of urgency. The deadline is + * specified as an absolute &CLOCK_MONOTONIC value in units of ns. + * * Similarly, &DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT takes an array of syncobj * handles as well as an array of u64 points and does a host-side wait on all * of syncobj fences at the given points simultaneously. @@ -1027,7 +1032,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, uint32_t count, uint32_t flags, signed long timeout, - uint32_t *idx) + uint32_t *idx, + ktime_t *deadline) { struct syncobj_wait_entry *entries; struct dma_fence *fence; @@ -1108,6 +1114,15 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, drm_syncobj_fence_add_wait(syncobjs[i], &entries[i]); } + if (deadline) { + for (i = 0; i < count; ++i) { + fence = entries[i].fence; + if (!fence) + continue; + dma_fence_set_deadline(fence, *deadline); + } + } + do { set_current_state(TASK_INTERRUPTIBLE); @@ -1206,7 +1221,8 @@ static int drm_syncobj_array_wait(struct drm_device *dev, struct drm_file *file_private, struct drm_syncobj_wait *wait, struct drm_syncobj_timeline_wait *timeline_wait, - struct drm_syncobj **syncobjs, bool timeline) + struct drm_syncobj **syncobjs, bool timeline, + ktime_t *deadline) { signed long timeout = 0; uint32_t first = ~0; @@ -1217,7 +1233,8 @@ static int drm_syncobj_array_wait(struct drm_device *dev, NULL, wait->count_handles, wait->flags, - timeout, &first); + timeout, &first, + deadline); if (timeout < 0) return timeout; wait->first_signaled = first; @@ -1227,7 +1244,8 @@ static int drm_syncobj_array_wait(struct drm_device *dev, u64_to_user_ptr(timeline_wait->points), timeline_wait->count_handles, timeline_wait->flags, - timeout, &first); + timeout, &first, + deadline); if (timeout < 0) return timeout; timeline_wait->first_signaled = first; @@ -1298,17 +1316,22 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, { struct drm_syncobj_wait *args = data; struct drm_syncobj **syncobjs; + unsigned int possible_flags; + ktime_t t, *tp = NULL; int ret = 0; if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) return -EOPNOTSUPP; - if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL | - DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)) + possible_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE; + + if (args->flags & ~possible_flags) return -EINVAL; if (args->count_handles == 0) - return -EINVAL; + return 0; ret = drm_syncobj_array_find(file_private, u64_to_user_ptr(args->handles), @@ -1317,8 +1340,13 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, if (ret < 0) return ret; + if (args->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE) { + t = ns_to_ktime(args->deadline_nsec); + tp = &t; + } + ret = drm_syncobj_array_wait(dev, file_private, - args, NULL, syncobjs, false); + args, NULL, syncobjs, false, tp); drm_syncobj_array_free(syncobjs, args->count_handles); @@ -1331,18 +1359,23 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data, { struct drm_syncobj_timeline_wait *args = data; struct drm_syncobj **syncobjs; + unsigned int possible_flags; + ktime_t t, *tp = NULL; int ret = 0; if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE)) return -EOPNOTSUPP; - if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL | - DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | - DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) + possible_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE; + + if (args->flags & ~possible_flags) return -EINVAL; if (args->count_handles == 0) - return -EINVAL; + return 0; ret = drm_syncobj_array_find(file_private, u64_to_user_ptr(args->handles), @@ -1351,8 +1384,13 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data, if (ret < 0) return ret; + if (args->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE) { + t = ns_to_ktime(args->deadline_nsec); + tp = &t; + } + ret = drm_syncobj_array_wait(dev, file_private, - NULL, args, syncobjs, true); + NULL, args, syncobjs, true, tp); drm_syncobj_array_free(syncobjs, args->count_handles); diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index 877e2067534f..702a12bc93bd 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -210,11 +210,6 @@ static u32 __get_vblank_counter(struct drm_device *dev, unsigned int pipe) if (crtc->funcs->get_vblank_counter) return crtc->funcs->get_vblank_counter(crtc); } -#ifdef CONFIG_DRM_LEGACY - else if (dev->driver->get_vblank_counter) { - return dev->driver->get_vblank_counter(dev, pipe); - } -#endif return drm_vblank_no_hw_counter(dev, pipe); } @@ -433,11 +428,6 @@ static void __disable_vblank(struct drm_device *dev, unsigned int pipe) if (crtc->funcs->disable_vblank) crtc->funcs->disable_vblank(crtc); } -#ifdef CONFIG_DRM_LEGACY - else { - dev->driver->disable_vblank(dev, pipe); - } -#endif } /* @@ -1151,11 +1141,6 @@ static int __enable_vblank(struct drm_device *dev, unsigned int pipe) if (crtc->funcs->enable_vblank) return crtc->funcs->enable_vblank(crtc); } -#ifdef CONFIG_DRM_LEGACY - else if (dev->driver->enable_vblank) { - return dev->driver->enable_vblank(dev, pipe); - } -#endif return -EINVAL; } @@ -1574,88 +1559,6 @@ void drm_crtc_vblank_restore(struct drm_crtc *crtc) } EXPORT_SYMBOL(drm_crtc_vblank_restore); -static void drm_legacy_vblank_pre_modeset(struct drm_device *dev, - unsigned int pipe) -{ - struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; - - /* vblank is not initialized (IRQ not installed ?), or has been freed */ - if (!drm_dev_has_vblank(dev)) - return; - - if (drm_WARN_ON(dev, pipe >= dev->num_crtcs)) - return; - - /* - * To avoid all the problems that might happen if interrupts - * were enabled/disabled around or between these calls, we just - * have the kernel take a reference on the CRTC (just once though - * to avoid corrupting the count if multiple, mismatch calls occur), - * so that interrupts remain enabled in the interim. - */ - if (!vblank->inmodeset) { - vblank->inmodeset = 0x1; - if (drm_vblank_get(dev, pipe) == 0) - vblank->inmodeset |= 0x2; - } -} - -static void drm_legacy_vblank_post_modeset(struct drm_device *dev, - unsigned int pipe) -{ - struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; - - /* vblank is not initialized (IRQ not installed ?), or has been freed */ - if (!drm_dev_has_vblank(dev)) - return; - - if (drm_WARN_ON(dev, pipe >= dev->num_crtcs)) - return; - - if (vblank->inmodeset) { - spin_lock_irq(&dev->vbl_lock); - drm_reset_vblank_timestamp(dev, pipe); - spin_unlock_irq(&dev->vbl_lock); - - if (vblank->inmodeset & 0x2) - drm_vblank_put(dev, pipe); - - vblank->inmodeset = 0; - } -} - -int drm_legacy_modeset_ctl_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_modeset_ctl *modeset = data; - unsigned int pipe; - - /* If drm_vblank_init() hasn't been called yet, just no-op */ - if (!drm_dev_has_vblank(dev)) - return 0; - - /* KMS drivers handle this internally */ - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return 0; - - pipe = modeset->crtc; - if (pipe >= dev->num_crtcs) - return -EINVAL; - - switch (modeset->cmd) { - case _DRM_PRE_MODESET: - drm_legacy_vblank_pre_modeset(dev, pipe); - break; - case _DRM_POST_MODESET: - drm_legacy_vblank_post_modeset(dev, pipe); - break; - default: - return -EINVAL; - } - - return 0; -} - static int drm_queue_vblank_event(struct drm_device *dev, unsigned int pipe, u64 req_seq, union drm_wait_vblank *vblwait, @@ -1780,10 +1683,6 @@ static void drm_wait_vblank_reply(struct drm_device *dev, unsigned int pipe, static bool drm_wait_vblank_supported(struct drm_device *dev) { -#if IS_ENABLED(CONFIG_DRM_LEGACY) - if (unlikely(drm_core_check_feature(dev, DRIVER_LEGACY))) - return dev->irq_enabled; -#endif return drm_dev_has_vblank(dev); } diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c deleted file mode 100644 index 87c9fe55dec7..000000000000 --- a/drivers/gpu/drm/drm_vm.c +++ /dev/null @@ -1,665 +0,0 @@ -/* - * \file drm_vm.c - * Memory mapping for DRM - * - * \author Rickard E. (Rik) Faith <faith@valinux.com> - * \author Gareth Hughes <gareth@valinux.com> - */ - -/* - * Created: Mon Jan 4 08:58:31 1999 by faith@valinux.com - * - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include <linux/export.h> -#include <linux/pci.h> -#include <linux/seq_file.h> -#include <linux/vmalloc.h> -#include <linux/pgtable.h> - -#if defined(__ia64__) -#include <linux/efi.h> -#include <linux/slab.h> -#endif -#include <linux/mem_encrypt.h> - -#include <drm/drm_device.h> -#include <drm/drm_drv.h> -#include <drm/drm_file.h> -#include <drm/drm_framebuffer.h> -#include <drm/drm_print.h> - -#include "drm_internal.h" -#include "drm_legacy.h" - -struct drm_vma_entry { - struct list_head head; - struct vm_area_struct *vma; - pid_t pid; -}; - -static void drm_vm_open(struct vm_area_struct *vma); -static void drm_vm_close(struct vm_area_struct *vma); - -static pgprot_t drm_io_prot(struct drm_local_map *map, - struct vm_area_struct *vma) -{ - pgprot_t tmp = vm_get_page_prot(vma->vm_flags); - -#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \ - defined(__mips__) || defined(__loongarch__) - if (map->type == _DRM_REGISTERS && !(map->flags & _DRM_WRITE_COMBINING)) - tmp = pgprot_noncached(tmp); - else - tmp = pgprot_writecombine(tmp); -#elif defined(__ia64__) - if (efi_range_is_wc(vma->vm_start, vma->vm_end - - vma->vm_start)) - tmp = pgprot_writecombine(tmp); - else - tmp = pgprot_noncached(tmp); -#elif defined(__sparc__) || defined(__arm__) - tmp = pgprot_noncached(tmp); -#endif - return tmp; -} - -static pgprot_t drm_dma_prot(uint32_t map_type, struct vm_area_struct *vma) -{ - pgprot_t tmp = vm_get_page_prot(vma->vm_flags); - -#if defined(__powerpc__) && defined(CONFIG_NOT_COHERENT_CACHE) - tmp = pgprot_noncached_wc(tmp); -#endif - return tmp; -} - -/* - * \c fault method for AGP virtual memory. - * - * \param vma virtual memory area. - * \param address access address. - * \return pointer to the page structure. - * - * Find the right map and if it's AGP memory find the real physical page to - * map, get the page, increment the use count and return it. - */ -#if IS_ENABLED(CONFIG_AGP) -static vm_fault_t drm_vm_fault(struct vm_fault *vmf) -{ - struct vm_area_struct *vma = vmf->vma; - struct drm_file *priv = vma->vm_file->private_data; - struct drm_device *dev = priv->minor->dev; - struct drm_local_map *map = NULL; - struct drm_map_list *r_list; - struct drm_hash_item *hash; - - /* - * Find the right map - */ - if (!dev->agp) - goto vm_fault_error; - - if (!dev->agp || !dev->agp->cant_use_aperture) - goto vm_fault_error; - - if (drm_ht_find_item(&dev->map_hash, vma->vm_pgoff, &hash)) - goto vm_fault_error; - - r_list = drm_hash_entry(hash, struct drm_map_list, hash); - map = r_list->map; - - if (map && map->type == _DRM_AGP) { - /* - * Using vm_pgoff as a selector forces us to use this unusual - * addressing scheme. - */ - resource_size_t offset = vmf->address - vma->vm_start; - resource_size_t baddr = map->offset + offset; - struct drm_agp_mem *agpmem; - struct page *page; - -#ifdef __alpha__ - /* - * Adjust to a bus-relative address - */ - baddr -= dev->hose->mem_space->start; -#endif - - /* - * It's AGP memory - find the real physical page to map - */ - list_for_each_entry(agpmem, &dev->agp->memory, head) { - if (agpmem->bound <= baddr && - agpmem->bound + agpmem->pages * PAGE_SIZE > baddr) - break; - } - - if (&agpmem->head == &dev->agp->memory) - goto vm_fault_error; - - /* - * Get the page, inc the use count, and return it - */ - offset = (baddr - agpmem->bound) >> PAGE_SHIFT; - page = agpmem->memory->pages[offset]; - get_page(page); - vmf->page = page; - - DRM_DEBUG - ("baddr = 0x%llx page = 0x%p, offset = 0x%llx, count=%d\n", - (unsigned long long)baddr, - agpmem->memory->pages[offset], - (unsigned long long)offset, - page_count(page)); - return 0; - } -vm_fault_error: - return VM_FAULT_SIGBUS; /* Disallow mremap */ -} -#else -static vm_fault_t drm_vm_fault(struct vm_fault *vmf) -{ - return VM_FAULT_SIGBUS; -} -#endif - -/* - * \c nopage method for shared virtual memory. - * - * \param vma virtual memory area. - * \param address access address. - * \return pointer to the page structure. - * - * Get the mapping, find the real physical page to map, get the page, and - * return it. - */ -static vm_fault_t drm_vm_shm_fault(struct vm_fault *vmf) -{ - struct vm_area_struct *vma = vmf->vma; - struct drm_local_map *map = vma->vm_private_data; - unsigned long offset; - unsigned long i; - struct page *page; - - if (!map) - return VM_FAULT_SIGBUS; /* Nothing allocated */ - - offset = vmf->address - vma->vm_start; - i = (unsigned long)map->handle + offset; - page = vmalloc_to_page((void *)i); - if (!page) - return VM_FAULT_SIGBUS; - get_page(page); - vmf->page = page; - - DRM_DEBUG("shm_fault 0x%lx\n", offset); - return 0; -} - -/* - * \c close method for shared virtual memory. - * - * \param vma virtual memory area. - * - * Deletes map information if we are the last - * person to close a mapping and it's not in the global maplist. - */ -static void drm_vm_shm_close(struct vm_area_struct *vma) -{ - struct drm_file *priv = vma->vm_file->private_data; - struct drm_device *dev = priv->minor->dev; - struct drm_vma_entry *pt, *temp; - struct drm_local_map *map; - struct drm_map_list *r_list; - int found_maps = 0; - - DRM_DEBUG("0x%08lx,0x%08lx\n", - vma->vm_start, vma->vm_end - vma->vm_start); - - map = vma->vm_private_data; - - mutex_lock(&dev->struct_mutex); - list_for_each_entry_safe(pt, temp, &dev->vmalist, head) { - if (pt->vma->vm_private_data == map) - found_maps++; - if (pt->vma == vma) { - list_del(&pt->head); - kfree(pt); - } - } - - /* We were the only map that was found */ - if (found_maps == 1 && map->flags & _DRM_REMOVABLE) { - /* Check to see if we are in the maplist, if we are not, then - * we delete this mappings information. - */ - found_maps = 0; - list_for_each_entry(r_list, &dev->maplist, head) { - if (r_list->map == map) - found_maps++; - } - - if (!found_maps) { - switch (map->type) { - case _DRM_REGISTERS: - case _DRM_FRAME_BUFFER: - arch_phys_wc_del(map->mtrr); - iounmap(map->handle); - break; - case _DRM_SHM: - vfree(map->handle); - break; - case _DRM_AGP: - case _DRM_SCATTER_GATHER: - break; - case _DRM_CONSISTENT: - dma_free_coherent(dev->dev, - map->size, - map->handle, - map->offset); - break; - } - kfree(map); - } - } - mutex_unlock(&dev->struct_mutex); -} - -/* - * \c fault method for DMA virtual memory. - * - * \param address access address. - * \return pointer to the page structure. - * - * Determine the page number from the page offset and get it from drm_device_dma::pagelist. - */ -static vm_fault_t drm_vm_dma_fault(struct vm_fault *vmf) -{ - struct vm_area_struct *vma = vmf->vma; - struct drm_file *priv = vma->vm_file->private_data; - struct drm_device *dev = priv->minor->dev; - struct drm_device_dma *dma = dev->dma; - unsigned long offset; - unsigned long page_nr; - struct page *page; - - if (!dma) - return VM_FAULT_SIGBUS; /* Error */ - if (!dma->pagelist) - return VM_FAULT_SIGBUS; /* Nothing allocated */ - - offset = vmf->address - vma->vm_start; - /* vm_[pg]off[set] should be 0 */ - page_nr = offset >> PAGE_SHIFT; /* page_nr could just be vmf->pgoff */ - page = virt_to_page((void *)dma->pagelist[page_nr]); - - get_page(page); - vmf->page = page; - - DRM_DEBUG("dma_fault 0x%lx (page %lu)\n", offset, page_nr); - return 0; -} - -/* - * \c fault method for scatter-gather virtual memory. - * - * \param address access address. - * \return pointer to the page structure. - * - * Determine the map offset from the page offset and get it from drm_sg_mem::pagelist. - */ -static vm_fault_t drm_vm_sg_fault(struct vm_fault *vmf) -{ - struct vm_area_struct *vma = vmf->vma; - struct drm_local_map *map = vma->vm_private_data; - struct drm_file *priv = vma->vm_file->private_data; - struct drm_device *dev = priv->minor->dev; - struct drm_sg_mem *entry = dev->sg; - unsigned long offset; - unsigned long map_offset; - unsigned long page_offset; - struct page *page; - - if (!entry) - return VM_FAULT_SIGBUS; /* Error */ - if (!entry->pagelist) - return VM_FAULT_SIGBUS; /* Nothing allocated */ - - offset = vmf->address - vma->vm_start; - map_offset = map->offset - (unsigned long)dev->sg->virtual; - page_offset = (offset >> PAGE_SHIFT) + (map_offset >> PAGE_SHIFT); - page = entry->pagelist[page_offset]; - get_page(page); - vmf->page = page; - - return 0; -} - -/** AGP virtual memory operations */ -static const struct vm_operations_struct drm_vm_ops = { - .fault = drm_vm_fault, - .open = drm_vm_open, - .close = drm_vm_close, -}; - -/** Shared virtual memory operations */ -static const struct vm_operations_struct drm_vm_shm_ops = { - .fault = drm_vm_shm_fault, - .open = drm_vm_open, - .close = drm_vm_shm_close, -}; - -/** DMA virtual memory operations */ -static const struct vm_operations_struct drm_vm_dma_ops = { - .fault = drm_vm_dma_fault, - .open = drm_vm_open, - .close = drm_vm_close, -}; - -/** Scatter-gather virtual memory operations */ -static const struct vm_operations_struct drm_vm_sg_ops = { - .fault = drm_vm_sg_fault, - .open = drm_vm_open, - .close = drm_vm_close, -}; - -static void drm_vm_open_locked(struct drm_device *dev, - struct vm_area_struct *vma) -{ - struct drm_vma_entry *vma_entry; - - DRM_DEBUG("0x%08lx,0x%08lx\n", - vma->vm_start, vma->vm_end - vma->vm_start); - - vma_entry = kmalloc(sizeof(*vma_entry), GFP_KERNEL); - if (vma_entry) { - vma_entry->vma = vma; - vma_entry->pid = current->pid; - list_add(&vma_entry->head, &dev->vmalist); - } -} - -static void drm_vm_open(struct vm_area_struct *vma) -{ - struct drm_file *priv = vma->vm_file->private_data; - struct drm_device *dev = priv->minor->dev; - - mutex_lock(&dev->struct_mutex); - drm_vm_open_locked(dev, vma); - mutex_unlock(&dev->struct_mutex); -} - -static void drm_vm_close_locked(struct drm_device *dev, - struct vm_area_struct *vma) -{ - struct drm_vma_entry *pt, *temp; - - DRM_DEBUG("0x%08lx,0x%08lx\n", - vma->vm_start, vma->vm_end - vma->vm_start); - - list_for_each_entry_safe(pt, temp, &dev->vmalist, head) { - if (pt->vma == vma) { - list_del(&pt->head); - kfree(pt); - break; - } - } -} - -/* - * \c close method for all virtual memory types. - * - * \param vma virtual memory area. - * - * Search the \p vma private data entry in drm_device::vmalist, unlink it, and - * free it. - */ -static void drm_vm_close(struct vm_area_struct *vma) -{ - struct drm_file *priv = vma->vm_file->private_data; - struct drm_device *dev = priv->minor->dev; - - mutex_lock(&dev->struct_mutex); - drm_vm_close_locked(dev, vma); - mutex_unlock(&dev->struct_mutex); -} - -/* - * mmap DMA memory. - * - * \param file_priv DRM file private. - * \param vma virtual memory area. - * \return zero on success or a negative number on failure. - * - * Sets the virtual memory area operations structure to vm_dma_ops, the file - * pointer, and calls vm_open(). - */ -static int drm_mmap_dma(struct file *filp, struct vm_area_struct *vma) -{ - struct drm_file *priv = filp->private_data; - struct drm_device *dev; - struct drm_device_dma *dma; - unsigned long length = vma->vm_end - vma->vm_start; - - dev = priv->minor->dev; - dma = dev->dma; - DRM_DEBUG("start = 0x%lx, end = 0x%lx, page offset = 0x%lx\n", - vma->vm_start, vma->vm_end, vma->vm_pgoff); - - /* Length must match exact page count */ - if (!dma || (length >> PAGE_SHIFT) != dma->page_count) { - return -EINVAL; - } - - if (!capable(CAP_SYS_ADMIN) && - (dma->flags & _DRM_DMA_USE_PCI_RO)) { - vm_flags_clear(vma, VM_WRITE | VM_MAYWRITE); -#if defined(__i386__) || defined(__x86_64__) - pgprot_val(vma->vm_page_prot) &= ~_PAGE_RW; -#else - /* Ye gads this is ugly. With more thought - we could move this up higher and use - `protection_map' instead. */ - vma->vm_page_prot = - __pgprot(pte_val - (pte_wrprotect - (__pte(pgprot_val(vma->vm_page_prot))))); -#endif - } - - vma->vm_ops = &drm_vm_dma_ops; - - vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP); - - drm_vm_open_locked(dev, vma); - return 0; -} - -static resource_size_t drm_core_get_reg_ofs(struct drm_device *dev) -{ -#ifdef __alpha__ - return dev->hose->dense_mem_base; -#else - return 0; -#endif -} - -/* - * mmap DMA memory. - * - * \param file_priv DRM file private. - * \param vma virtual memory area. - * \return zero on success or a negative number on failure. - * - * If the virtual memory area has no offset associated with it then it's a DMA - * area, so calls mmap_dma(). Otherwise searches the map in drm_device::maplist, - * checks that the restricted flag is not set, sets the virtual memory operations - * according to the mapping type and remaps the pages. Finally sets the file - * pointer and calls vm_open(). - */ -static int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma) -{ - struct drm_file *priv = filp->private_data; - struct drm_device *dev = priv->minor->dev; - struct drm_local_map *map = NULL; - resource_size_t offset = 0; - struct drm_hash_item *hash; - - DRM_DEBUG("start = 0x%lx, end = 0x%lx, page offset = 0x%lx\n", - vma->vm_start, vma->vm_end, vma->vm_pgoff); - - if (!priv->authenticated) - return -EACCES; - - /* We check for "dma". On Apple's UniNorth, it's valid to have - * the AGP mapped at physical address 0 - * --BenH. - */ - if (!vma->vm_pgoff -#if IS_ENABLED(CONFIG_AGP) - && (!dev->agp - || dev->agp->agp_info.device->vendor != PCI_VENDOR_ID_APPLE) -#endif - ) - return drm_mmap_dma(filp, vma); - - if (drm_ht_find_item(&dev->map_hash, vma->vm_pgoff, &hash)) { - DRM_ERROR("Could not find map\n"); - return -EINVAL; - } - - map = drm_hash_entry(hash, struct drm_map_list, hash)->map; - if (!map || ((map->flags & _DRM_RESTRICTED) && !capable(CAP_SYS_ADMIN))) - return -EPERM; - - /* Check for valid size. */ - if (map->size < vma->vm_end - vma->vm_start) - return -EINVAL; - - if (!capable(CAP_SYS_ADMIN) && (map->flags & _DRM_READ_ONLY)) { - vm_flags_clear(vma, VM_WRITE | VM_MAYWRITE); -#if defined(__i386__) || defined(__x86_64__) - pgprot_val(vma->vm_page_prot) &= ~_PAGE_RW; -#else - /* Ye gads this is ugly. With more thought - we could move this up higher and use - `protection_map' instead. */ - vma->vm_page_prot = - __pgprot(pte_val - (pte_wrprotect - (__pte(pgprot_val(vma->vm_page_prot))))); -#endif - } - - switch (map->type) { -#if !defined(__arm__) - case _DRM_AGP: - if (dev->agp && dev->agp->cant_use_aperture) { - /* - * On some platforms we can't talk to bus dma address from the CPU, so for - * memory of type DRM_AGP, we'll deal with sorting out the real physical - * pages and mappings in fault() - */ -#if defined(__powerpc__) - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -#endif - vma->vm_ops = &drm_vm_ops; - break; - } - fallthrough; /* to _DRM_FRAME_BUFFER... */ -#endif - case _DRM_FRAME_BUFFER: - case _DRM_REGISTERS: - offset = drm_core_get_reg_ofs(dev); - vma->vm_page_prot = drm_io_prot(map, vma); - if (io_remap_pfn_range(vma, vma->vm_start, - (map->offset + offset) >> PAGE_SHIFT, - vma->vm_end - vma->vm_start, - vma->vm_page_prot)) - return -EAGAIN; - DRM_DEBUG(" Type = %d; start = 0x%lx, end = 0x%lx," - " offset = 0x%llx\n", - map->type, - vma->vm_start, vma->vm_end, (unsigned long long)(map->offset + offset)); - - vma->vm_ops = &drm_vm_ops; - break; - case _DRM_CONSISTENT: - /* Consistent memory is really like shared memory. But - * it's allocated in a different way, so avoid fault */ - if (remap_pfn_range(vma, vma->vm_start, - page_to_pfn(virt_to_page(map->handle)), - vma->vm_end - vma->vm_start, vma->vm_page_prot)) - return -EAGAIN; - vma->vm_page_prot = drm_dma_prot(map->type, vma); - fallthrough; /* to _DRM_SHM */ - case _DRM_SHM: - vma->vm_ops = &drm_vm_shm_ops; - vma->vm_private_data = (void *)map; - break; - case _DRM_SCATTER_GATHER: - vma->vm_ops = &drm_vm_sg_ops; - vma->vm_private_data = (void *)map; - vma->vm_page_prot = drm_dma_prot(map->type, vma); - break; - default: - return -EINVAL; /* This should never happen. */ - } - vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP); - - drm_vm_open_locked(dev, vma); - return 0; -} - -int drm_legacy_mmap(struct file *filp, struct vm_area_struct *vma) -{ - struct drm_file *priv = filp->private_data; - struct drm_device *dev = priv->minor->dev; - int ret; - - if (drm_dev_is_unplugged(dev)) - return -ENODEV; - - mutex_lock(&dev->struct_mutex); - ret = drm_mmap_locked(filp, vma); - mutex_unlock(&dev->struct_mutex); - - return ret; -} -EXPORT_SYMBOL(drm_legacy_mmap); - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -void drm_legacy_vma_flush(struct drm_device *dev) -{ - struct drm_vma_entry *vma, *vma_temp; - - /* Clear vma list (only needed for legacy drivers) */ - list_for_each_entry_safe(vma, vma_temp, &dev->vmalist, head) { - list_del(&vma->head); - kfree(vma); - } -} -#endif diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index a8d3fa81e4ec..6228ce603248 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -640,16 +640,14 @@ static int etnaviv_pdev_probe(struct platform_device *pdev) return component_master_add_with_match(dev, &etnaviv_master_ops, match); } -static int etnaviv_pdev_remove(struct platform_device *pdev) +static void etnaviv_pdev_remove(struct platform_device *pdev) { component_master_del(&pdev->dev, &etnaviv_master_ops); - - return 0; } static struct platform_driver etnaviv_platform_driver = { .probe = etnaviv_pdev_probe, - .remove = etnaviv_pdev_remove, + .remove_new = etnaviv_pdev_remove, .driver = { .name = "etnaviv", }, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index 2416c526f9b0..3d0f8d182506 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -535,7 +535,7 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, ret = drm_sched_job_init(&submit->sched_job, &ctx->sched_entity[args->pipe], - submit->ctx); + 1, submit->ctx); if (ret) goto err_submit_put; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 9276756e1397..9b8445d2a128 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1904,11 +1904,10 @@ static int etnaviv_gpu_platform_probe(struct platform_device *pdev) return 0; } -static int etnaviv_gpu_platform_remove(struct platform_device *pdev) +static void etnaviv_gpu_platform_remove(struct platform_device *pdev) { component_del(&pdev->dev, &gpu_ops); pm_runtime_disable(&pdev->dev); - return 0; } static int etnaviv_gpu_rpm_suspend(struct device *dev) @@ -1917,7 +1916,7 @@ static int etnaviv_gpu_rpm_suspend(struct device *dev) u32 idle, mask; /* If there are any jobs in the HW queue, we're not idle */ - if (atomic_read(&gpu->sched.hw_rq_count)) + if (atomic_read(&gpu->sched.credit_count)) return -EBUSY; /* Check whether the hardware (except FE and MC) is idle */ @@ -1970,6 +1969,6 @@ struct platform_driver etnaviv_gpu_driver = { .of_match_table = etnaviv_gpu_match, }, .probe = etnaviv_gpu_platform_probe, - .remove = etnaviv_gpu_platform_remove, + .remove_new = etnaviv_gpu_platform_remove, .id_table = gpu_ids, }; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 9b79f218e21a..c4b04b0dee16 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -134,7 +134,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu) { int ret; - ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, + ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, etnaviv_hw_jobs_limit, etnaviv_job_hang_limit, msecs_to_jiffies(500), NULL, NULL, diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 4d986077738b..776f2f0b602d 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -862,18 +862,16 @@ err_disable_pm_runtime: return ret; } -static int exynos5433_decon_remove(struct platform_device *pdev) +static void exynos5433_decon_remove(struct platform_device *pdev) { pm_runtime_disable(&pdev->dev); component_del(&pdev->dev, &decon_component_ops); - - return 0; } struct platform_driver exynos5433_decon_driver = { .probe = exynos5433_decon_probe, - .remove = exynos5433_decon_remove, + .remove_new = exynos5433_decon_remove, .driver = { .name = "exynos5433-decon", .pm = pm_ptr(&exynos5433_decon_pm_ops), diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index 0156a5e94435..0d185c0564b9 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -765,7 +765,7 @@ err_iounmap: return ret; } -static int decon_remove(struct platform_device *pdev) +static void decon_remove(struct platform_device *pdev) { struct decon_context *ctx = dev_get_drvdata(&pdev->dev); @@ -774,8 +774,6 @@ static int decon_remove(struct platform_device *pdev) iounmap(ctx->regs); component_del(&pdev->dev, &decon_component_ops); - - return 0; } static int exynos7_decon_suspend(struct device *dev) @@ -840,7 +838,7 @@ static DEFINE_RUNTIME_DEV_PM_OPS(exynos7_decon_pm_ops, exynos7_decon_suspend, struct platform_driver decon_driver = { .probe = decon_probe, - .remove = decon_remove, + .remove_new = decon_remove, .driver = { .name = "exynos-decon", .pm = pm_ptr(&exynos7_decon_pm_ops), diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c index 3404ec1367fb..ca31bad6c576 100644 --- a/drivers/gpu/drm/exynos/exynos_dp.c +++ b/drivers/gpu/drm/exynos/exynos_dp.c @@ -250,14 +250,12 @@ out: return component_add(&pdev->dev, &exynos_dp_ops); } -static int exynos_dp_remove(struct platform_device *pdev) +static void exynos_dp_remove(struct platform_device *pdev) { struct exynos_dp_device *dp = platform_get_drvdata(pdev); component_del(&pdev->dev, &exynos_dp_ops); analogix_dp_remove(dp->adp); - - return 0; } static int exynos_dp_suspend(struct device *dev) @@ -285,7 +283,7 @@ MODULE_DEVICE_TABLE(of, exynos_dp_match); struct platform_driver dp_driver = { .probe = exynos_dp_probe, - .remove = exynos_dp_remove, + .remove_new = exynos_dp_remove, .driver = { .name = "exynos-dp", .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/exynos/exynos_drm_dpi.c b/drivers/gpu/drm/exynos/exynos_drm_dpi.c index 378e5381978f..0dc36df6ada3 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dpi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dpi.c @@ -101,7 +101,7 @@ static int exynos_dpi_create_connector(struct drm_encoder *encoder) ret = drm_connector_init(encoder->dev, connector, &exynos_dpi_connector_funcs, - DRM_MODE_CONNECTOR_VGA); + DRM_MODE_CONNECTOR_DPI); if (ret) { DRM_DEV_ERROR(ctx->dev, "failed to initialize connector with drm\n"); diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 8399256cb5c9..7c59e1164a48 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -300,6 +300,7 @@ err_mode_config_cleanup: drm_mode_config_cleanup(drm); exynos_drm_cleanup_dma(drm); kfree(private); + dev_set_drvdata(dev, NULL); err_free_drm: drm_dev_put(drm); @@ -313,6 +314,7 @@ static void exynos_drm_unbind(struct device *dev) drm_dev_unregister(drm); drm_kms_helper_poll_fini(drm); + drm_atomic_helper_shutdown(drm); component_unbind_all(drm->dev, drm); drm_mode_config_cleanup(drm); @@ -344,15 +346,23 @@ static int exynos_drm_platform_probe(struct platform_device *pdev) match); } -static int exynos_drm_platform_remove(struct platform_device *pdev) +static void exynos_drm_platform_remove(struct platform_device *pdev) { component_master_del(&pdev->dev, &exynos_drm_ops); - return 0; +} + +static void exynos_drm_platform_shutdown(struct platform_device *pdev) +{ + struct drm_device *drm = platform_get_drvdata(pdev); + + if (drm) + drm_atomic_helper_shutdown(drm); } static struct platform_driver exynos_drm_platform_driver = { .probe = exynos_drm_platform_probe, - .remove = exynos_drm_platform_remove, + .remove_new = exynos_drm_platform_remove, + .shutdown = exynos_drm_platform_shutdown, .driver = { .name = "exynos-drm", .pm = &exynos_drm_pm_ops, diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index 8de2714599fc..e81a576de398 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -1367,7 +1367,7 @@ err_pm_dis: return ret; } -static int fimc_remove(struct platform_device *pdev) +static void fimc_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct fimc_context *ctx = get_fimc_context(dev); @@ -1377,8 +1377,6 @@ static int fimc_remove(struct platform_device *pdev) pm_runtime_disable(dev); fimc_put_clocks(ctx); - - return 0; } static int fimc_runtime_suspend(struct device *dev) @@ -1410,7 +1408,7 @@ MODULE_DEVICE_TABLE(of, fimc_of_match); struct platform_driver fimc_driver = { .probe = fimc_probe, - .remove = fimc_remove, + .remove_new = fimc_remove, .driver = { .of_match_table = fimc_of_match, .name = "exynos-drm-fimc", diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 8dde7b1e9b35..a9f1c5c05894 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -1277,13 +1277,11 @@ err_disable_pm_runtime: return ret; } -static int fimd_remove(struct platform_device *pdev) +static void fimd_remove(struct platform_device *pdev) { pm_runtime_disable(&pdev->dev); component_del(&pdev->dev, &fimd_component_ops); - - return 0; } static int exynos_fimd_suspend(struct device *dev) @@ -1325,7 +1323,7 @@ static DEFINE_RUNTIME_DEV_PM_OPS(exynos_fimd_pm_ops, exynos_fimd_suspend, struct platform_driver fimd_driver = { .probe = fimd_probe, - .remove = fimd_remove, + .remove_new = fimd_remove, .driver = { .name = "exynos4-fb", .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 414e585ec7dd..f3138423612e 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -1530,7 +1530,7 @@ err_destroy_slab: return ret; } -static int g2d_remove(struct platform_device *pdev) +static void g2d_remove(struct platform_device *pdev) { struct g2d_data *g2d = platform_get_drvdata(pdev); @@ -1545,8 +1545,6 @@ static int g2d_remove(struct platform_device *pdev) g2d_fini_cmdlist(g2d); destroy_workqueue(g2d->g2d_workq); kmem_cache_destroy(g2d->runqueue_slab); - - return 0; } static int g2d_suspend(struct device *dev) @@ -1609,7 +1607,7 @@ MODULE_DEVICE_TABLE(of, exynos_g2d_match); struct platform_driver g2d_driver = { .probe = g2d_probe, - .remove = g2d_remove, + .remove_new = g2d_remove, .driver = { .name = "exynos-drm-g2d", .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 34cdabc30b4f..e9a769590415 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -11,9 +11,10 @@ #include <linux/component.h> #include <linux/kernel.h> #include <linux/mfd/syscon.h> -#include <linux/of_device.h> +#include <linux/mod_devicetable.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <linux/property.h> #include <linux/regmap.h> #include <drm/drm_fourcc.h> @@ -103,7 +104,7 @@ struct gsc_context { unsigned int num_formats; void __iomem *regs; - const char **clk_names; + const char *const *clk_names; struct clk *clocks[GSC_MAX_CLOCKS]; int num_clocks; struct gsc_scaler sc; @@ -1217,7 +1218,7 @@ static const unsigned int gsc_tiled_formats[] = { static int gsc_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct gsc_driverdata *driver_data; + const struct gsc_driverdata *driver_data; struct exynos_drm_ipp_formats *formats; struct gsc_context *ctx; int num_formats, ret, i, j; @@ -1226,7 +1227,7 @@ static int gsc_probe(struct platform_device *pdev) if (!ctx) return -ENOMEM; - driver_data = (struct gsc_driverdata *)of_device_get_match_data(dev); + driver_data = device_get_match_data(dev); ctx->dev = dev; ctx->num_clocks = driver_data->num_clocks; ctx->clk_names = driver_data->clk_names; @@ -1308,15 +1309,13 @@ err_pm_dis: return ret; } -static int gsc_remove(struct platform_device *pdev) +static void gsc_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; component_del(dev, &gsc_component_ops); pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); - - return 0; } static int __maybe_unused gsc_runtime_suspend(struct device *dev) @@ -1421,7 +1420,7 @@ MODULE_DEVICE_TABLE(of, exynos_drm_gsc_of_match); struct platform_driver gsc_driver = { .probe = gsc_probe, - .remove = gsc_remove, + .remove_new = gsc_remove, .driver = { .name = "exynos-drm-gsc", .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c index 17bab5b1663f..e2920960180f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_mic.c +++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c @@ -442,7 +442,7 @@ err: return ret; } -static int exynos_mic_remove(struct platform_device *pdev) +static void exynos_mic_remove(struct platform_device *pdev) { struct exynos_mic *mic = platform_get_drvdata(pdev); @@ -450,8 +450,6 @@ static int exynos_mic_remove(struct platform_device *pdev) pm_runtime_disable(&pdev->dev); drm_bridge_remove(&mic->bridge); - - return 0; } static const struct of_device_id exynos_mic_of_match[] = { @@ -462,7 +460,7 @@ MODULE_DEVICE_TABLE(of, exynos_mic_of_match); struct platform_driver mic_driver = { .probe = exynos_mic_probe, - .remove = exynos_mic_remove, + .remove_new = exynos_mic_remove, .driver = { .name = "exynos-mic", .pm = pm_ptr(&exynos_mic_pm_ops), diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c index ffb327c5139e..5f7516655b08 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c +++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c @@ -329,15 +329,13 @@ err_component: return ret; } -static int rotator_remove(struct platform_device *pdev) +static void rotator_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; component_del(dev, &rotator_component_ops); pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); - - return 0; } static int rotator_runtime_suspend(struct device *dev) @@ -453,7 +451,7 @@ static DEFINE_RUNTIME_DEV_PM_OPS(rotator_pm_ops, rotator_runtime_suspend, struct platform_driver rotator_driver = { .probe = rotator_probe, - .remove = rotator_remove, + .remove_new = rotator_remove, .driver = { .name = "exynos-rotator", .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c index f2b8b09a6b4e..392f721f13ab 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c +++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c @@ -539,15 +539,13 @@ err_ippdrv_register: return ret; } -static int scaler_remove(struct platform_device *pdev) +static void scaler_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; component_del(dev, &scaler_component_ops); pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); - - return 0; } static int clk_disable_unprepare_wrapper(struct clk *clk) @@ -721,7 +719,7 @@ MODULE_DEVICE_TABLE(of, exynos_scaler_match); struct platform_driver scaler_driver = { .probe = scaler_probe, - .remove = scaler_remove, + .remove_new = scaler_remove, .driver = { .name = "exynos-scaler", .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c index f5e1adfcaa51..00382f28748a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c @@ -462,7 +462,7 @@ static int vidi_probe(struct platform_device *pdev) return component_add(dev, &vidi_component_ops); } -static int vidi_remove(struct platform_device *pdev) +static void vidi_remove(struct platform_device *pdev) { struct vidi_context *ctx = platform_get_drvdata(pdev); @@ -472,13 +472,11 @@ static int vidi_remove(struct platform_device *pdev) } component_del(&pdev->dev, &vidi_component_ops); - - return 0; } struct platform_driver vidi_driver = { .probe = vidi_probe, - .remove = vidi_remove, + .remove_new = vidi_remove, .driver = { .name = "exynos-drm-vidi", .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index dd9903eab563..43bed6cbaaea 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -2069,7 +2069,7 @@ err_ddc: return ret; } -static int hdmi_remove(struct platform_device *pdev) +static void hdmi_remove(struct platform_device *pdev) { struct hdmi_context *hdata = platform_get_drvdata(pdev); @@ -2092,8 +2092,6 @@ static int hdmi_remove(struct platform_device *pdev) put_device(&hdata->ddc_adpt->dev); mutex_destroy(&hdata->mutex); - - return 0; } static int __maybe_unused exynos_hdmi_suspend(struct device *dev) @@ -2125,7 +2123,7 @@ static const struct dev_pm_ops exynos_hdmi_pm_ops = { struct platform_driver hdmi_driver = { .probe = hdmi_probe, - .remove = hdmi_remove, + .remove_new = hdmi_remove, .driver = { .name = "exynos-hdmi", .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index b302392ff0d7..6822333fd0e6 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -1258,13 +1258,11 @@ static int mixer_probe(struct platform_device *pdev) return ret; } -static int mixer_remove(struct platform_device *pdev) +static void mixer_remove(struct platform_device *pdev) { pm_runtime_disable(&pdev->dev); component_del(&pdev->dev, &mixer_component_ops); - - return 0; } static int __maybe_unused exynos_mixer_suspend(struct device *dev) @@ -1338,5 +1336,5 @@ struct platform_driver mixer_driver = { .of_match_table = mixer_match_types, }, .probe = mixer_probe, - .remove = mixer_remove, + .remove_new = mixer_remove, }; diff --git a/drivers/gpu/drm/gud/gud_pipe.c b/drivers/gpu/drm/gud/gud_pipe.c index a02f75be81f0..e163649816d5 100644 --- a/drivers/gpu/drm/gud/gud_pipe.c +++ b/drivers/gpu/drm/gud/gud_pipe.c @@ -51,7 +51,8 @@ static bool gud_is_big_endian(void) static size_t gud_xrgb8888_to_r124(u8 *dst, const struct drm_format_info *format, void *src, struct drm_framebuffer *fb, - struct drm_rect *rect) + struct drm_rect *rect, + struct drm_format_conv_state *fmtcnv_state) { unsigned int block_width = drm_format_info_block_width(format, 0); unsigned int bits_per_pixel = 8 / block_width; @@ -75,7 +76,7 @@ static size_t gud_xrgb8888_to_r124(u8 *dst, const struct drm_format_info *format iosys_map_set_vaddr(&dst_map, buf); iosys_map_set_vaddr(&vmap, src); - drm_fb_xrgb8888_to_gray8(&dst_map, NULL, &vmap, fb, rect); + drm_fb_xrgb8888_to_gray8(&dst_map, NULL, &vmap, fb, rect, fmtcnv_state); pix8 = buf; for (y = 0; y < height; y++) { @@ -152,7 +153,8 @@ static size_t gud_xrgb8888_to_color(u8 *dst, const struct drm_format_info *forma static int gud_prep_flush(struct gud_device *gdrm, struct drm_framebuffer *fb, const struct iosys_map *src, bool cached_reads, const struct drm_format_info *format, struct drm_rect *rect, - struct gud_set_buffer_req *req) + struct gud_set_buffer_req *req, + struct drm_format_conv_state *fmtcnv_state) { u8 compression = gdrm->compression; struct iosys_map dst; @@ -178,23 +180,23 @@ retry: */ if (format != fb->format) { if (format->format == GUD_DRM_FORMAT_R1) { - len = gud_xrgb8888_to_r124(buf, format, vaddr, fb, rect); + len = gud_xrgb8888_to_r124(buf, format, vaddr, fb, rect, fmtcnv_state); if (!len) return -ENOMEM; } else if (format->format == DRM_FORMAT_R8) { - drm_fb_xrgb8888_to_gray8(&dst, NULL, src, fb, rect); + drm_fb_xrgb8888_to_gray8(&dst, NULL, src, fb, rect, fmtcnv_state); } else if (format->format == DRM_FORMAT_RGB332) { - drm_fb_xrgb8888_to_rgb332(&dst, NULL, src, fb, rect); + drm_fb_xrgb8888_to_rgb332(&dst, NULL, src, fb, rect, fmtcnv_state); } else if (format->format == DRM_FORMAT_RGB565) { - drm_fb_xrgb8888_to_rgb565(&dst, NULL, src, fb, rect, + drm_fb_xrgb8888_to_rgb565(&dst, NULL, src, fb, rect, fmtcnv_state, gud_is_big_endian()); } else if (format->format == DRM_FORMAT_RGB888) { - drm_fb_xrgb8888_to_rgb888(&dst, NULL, src, fb, rect); + drm_fb_xrgb8888_to_rgb888(&dst, NULL, src, fb, rect, fmtcnv_state); } else { len = gud_xrgb8888_to_color(buf, format, vaddr, fb, rect); } } else if (gud_is_big_endian() && format->cpp[0] > 1) { - drm_fb_swab(&dst, NULL, src, fb, rect, cached_reads); + drm_fb_swab(&dst, NULL, src, fb, rect, cached_reads, fmtcnv_state); } else if (compression && cached_reads && pitch == fb->pitches[0]) { /* can compress directly from the framebuffer */ buf = vaddr + rect->y1 * pitch; @@ -266,7 +268,8 @@ static int gud_usb_bulk(struct gud_device *gdrm, size_t len) static int gud_flush_rect(struct gud_device *gdrm, struct drm_framebuffer *fb, const struct iosys_map *src, bool cached_reads, - const struct drm_format_info *format, struct drm_rect *rect) + const struct drm_format_info *format, struct drm_rect *rect, + struct drm_format_conv_state *fmtcnv_state) { struct gud_set_buffer_req req; size_t len, trlen; @@ -274,7 +277,7 @@ static int gud_flush_rect(struct gud_device *gdrm, struct drm_framebuffer *fb, drm_dbg(&gdrm->drm, "Flushing [FB:%d] " DRM_RECT_FMT "\n", fb->base.id, DRM_RECT_ARG(rect)); - ret = gud_prep_flush(gdrm, fb, src, cached_reads, format, rect, &req); + ret = gud_prep_flush(gdrm, fb, src, cached_reads, format, rect, &req, fmtcnv_state); if (ret) return ret; @@ -318,6 +321,7 @@ static void gud_flush_damage(struct gud_device *gdrm, struct drm_framebuffer *fb const struct iosys_map *src, bool cached_reads, struct drm_rect *damage) { + struct drm_format_conv_state fmtcnv_state = DRM_FORMAT_CONV_STATE_INIT; const struct drm_format_info *format; unsigned int i, lines; size_t pitch; @@ -340,7 +344,7 @@ static void gud_flush_damage(struct gud_device *gdrm, struct drm_framebuffer *fb rect.y1 += i * lines; rect.y2 = min_t(u32, rect.y1 + lines, damage->y2); - ret = gud_flush_rect(gdrm, fb, src, cached_reads, format, &rect); + ret = gud_flush_rect(gdrm, fb, src, cached_reads, format, &rect, &fmtcnv_state); if (ret) { if (ret != -ENODEV && ret != -ECONNRESET && ret != -ESHUTDOWN && ret != -EPROTO) @@ -350,6 +354,8 @@ static void gud_flush_damage(struct gud_device *gdrm, struct drm_framebuffer *fb break; } } + + drm_format_conv_state_release(&fmtcnv_state); } void gud_flush_work(struct work_struct *work) diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index ce397a8797f7..b5d6e3352071 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -94,7 +94,7 @@ config DRM_I915_CAPTURE_ERROR This option enables capturing the GPU state when a hang is detected. This information is vital for triaging hangs and assists in debugging. Please report any hang for triaging according to: - https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs + https://drm.pages.freedesktop.org/intel-docs/how-to-file-i915-bugs.html If in doubt, say "Y". diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 2d21930d5501..5b7162076850 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -24,7 +24,9 @@ config DRM_I915_DEBUG select DEBUG_FS select PREEMPT_COUNT select I2C_CHARDEV + select REF_TRACKER select STACKDEPOT + select STACKTRACE select DRM_DP_AUX_CHARDEV select X86_MSR # used by igt/pm_rpm select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) @@ -38,6 +40,7 @@ config DRM_I915_DEBUG select DRM_I915_DEBUG_GEM_ONCE select DRM_I915_DEBUG_MMIO select DRM_I915_DEBUG_RUNTIME_PM + select DRM_I915_DEBUG_WAKEREF select DRM_I915_SW_FENCE_DEBUG_OBJECTS select DRM_I915_SELFTEST default n @@ -231,7 +234,9 @@ config DRM_I915_DEBUG_RUNTIME_PM bool "Enable extra state checking for runtime PM" depends on DRM_I915 default n + select REF_TRACKER select STACKDEPOT + select STACKTRACE help Choose this option to turn on extra state checking for the runtime PM functionality. This may introduce overhead during @@ -240,3 +245,16 @@ config DRM_I915_DEBUG_RUNTIME_PM Recommended for driver developers only. If in doubt, say "N" + +config DRM_I915_DEBUG_WAKEREF + bool "Enable extra tracking for wakerefs" + depends on DRM_I915 + select REF_TRACKER + select STACKDEPOT + select STACKTRACE + help + Choose this option to turn on extra state checking and usage + tracking for the wakerefPM functionality. This may introduce + overhead during driver runtime. + + If in doubt, say "N" diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 88b2bb005014..e777686190ca 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -47,33 +47,34 @@ subdir-ccflags-y += -I$(srctree)/$(src) # Please keep these build lists sorted! # core driver code -i915-y += i915_driver.o \ - i915_drm_client.o \ - i915_config.o \ - i915_getparam.o \ - i915_ioctl.o \ - i915_irq.o \ - i915_mitigations.o \ - i915_module.o \ - i915_params.o \ - i915_pci.o \ - i915_scatterlist.o \ - i915_suspend.o \ - i915_switcheroo.o \ - i915_sysfs.o \ - i915_utils.o \ - intel_clock_gating.o \ - intel_device_info.o \ - intel_memory_region.o \ - intel_pcode.o \ - intel_region_ttm.o \ - intel_runtime_pm.o \ - intel_sbi.o \ - intel_step.o \ - intel_uncore.o \ - intel_wakeref.o \ - vlv_sideband.o \ - vlv_suspend.o +i915-y += \ + i915_config.o \ + i915_driver.o \ + i915_drm_client.o \ + i915_getparam.o \ + i915_ioctl.o \ + i915_irq.o \ + i915_mitigations.o \ + i915_module.o \ + i915_params.o \ + i915_pci.o \ + i915_scatterlist.o \ + i915_suspend.o \ + i915_switcheroo.o \ + i915_sysfs.o \ + i915_utils.o \ + intel_clock_gating.o \ + intel_device_info.o \ + intel_memory_region.o \ + intel_pcode.o \ + intel_region_ttm.o \ + intel_runtime_pm.o \ + intel_sbi.o \ + intel_step.o \ + intel_uncore.o \ + intel_wakeref.o \ + vlv_sideband.o \ + vlv_suspend.o # core peripheral code i915-y += \ @@ -90,13 +91,13 @@ i915-y += \ i915_syncmap.o \ i915_user_extensions.o -i915-$(CONFIG_COMPAT) += i915_ioc32.o +i915-$(CONFIG_COMPAT) += \ + i915_ioc32.o i915-$(CONFIG_DEBUG_FS) += \ i915_debugfs.o \ - i915_debugfs_params.o \ - display/intel_display_debugfs.o \ - display/intel_pipe_crc.o -i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o + i915_debugfs_params.o +i915-$(CONFIG_PERF_EVENTS) += \ + i915_pmu.o # "Graphics Technology" (aka we talk to the gpu) gt-y += \ @@ -153,7 +154,8 @@ gt-y += \ gt/sysfs_engines.o # x86 intel-gtt module support -gt-$(CONFIG_X86) += gt/intel_ggtt_gmch.o +gt-$(CONFIG_X86) += \ + gt/intel_ggtt_gmch.o # autogenerated null render state gt-y += \ gt/gen6_renderstate.o \ @@ -172,9 +174,9 @@ gem-y += \ gem/i915_gem_domain.o \ gem/i915_gem_execbuffer.o \ gem/i915_gem_internal.o \ - gem/i915_gem_object.o \ gem/i915_gem_lmem.o \ gem/i915_gem_mman.o \ + gem/i915_gem_object.o \ gem/i915_gem_pages.o \ gem/i915_gem_phys.o \ gem/i915_gem_pm.o \ @@ -191,57 +193,61 @@ gem-y += \ gem/i915_gem_wait.o \ gem/i915_gemfs.o i915-y += \ - $(gem-y) \ - i915_active.o \ - i915_cmd_parser.o \ - i915_deps.o \ - i915_gem_evict.o \ - i915_gem_gtt.o \ - i915_gem_ww.o \ - i915_gem.o \ - i915_query.o \ - i915_request.o \ - i915_scheduler.o \ - i915_trace_points.o \ - i915_ttm_buddy_manager.o \ - i915_vma.o \ - i915_vma_resource.o + $(gem-y) \ + i915_active.o \ + i915_cmd_parser.o \ + i915_deps.o \ + i915_gem.o \ + i915_gem_evict.o \ + i915_gem_gtt.o \ + i915_gem_ww.o \ + i915_query.o \ + i915_request.o \ + i915_scheduler.o \ + i915_trace_points.o \ + i915_ttm_buddy_manager.o \ + i915_vma.o \ + i915_vma_resource.o # general-purpose microcontroller (GuC) support i915-y += \ - gt/uc/intel_gsc_fw.o \ - gt/uc/intel_gsc_proxy.o \ - gt/uc/intel_gsc_uc.o \ - gt/uc/intel_gsc_uc_debugfs.o \ - gt/uc/intel_gsc_uc_heci_cmd_submit.o \ - gt/uc/intel_guc.o \ - gt/uc/intel_guc_ads.o \ - gt/uc/intel_guc_capture.o \ - gt/uc/intel_guc_ct.o \ - gt/uc/intel_guc_debugfs.o \ - gt/uc/intel_guc_fw.o \ - gt/uc/intel_guc_hwconfig.o \ - gt/uc/intel_guc_log.o \ - gt/uc/intel_guc_log_debugfs.o \ - gt/uc/intel_guc_rc.o \ - gt/uc/intel_guc_slpc.o \ - gt/uc/intel_guc_submission.o \ - gt/uc/intel_huc.o \ - gt/uc/intel_huc_debugfs.o \ - gt/uc/intel_huc_fw.o \ - gt/uc/intel_uc.o \ - gt/uc/intel_uc_debugfs.o \ - gt/uc/intel_uc_fw.o + gt/uc/intel_gsc_fw.o \ + gt/uc/intel_gsc_proxy.o \ + gt/uc/intel_gsc_uc.o \ + gt/uc/intel_gsc_uc_debugfs.o \ + gt/uc/intel_gsc_uc_heci_cmd_submit.o\ + gt/uc/intel_guc.o \ + gt/uc/intel_guc_ads.o \ + gt/uc/intel_guc_capture.o \ + gt/uc/intel_guc_ct.o \ + gt/uc/intel_guc_debugfs.o \ + gt/uc/intel_guc_fw.o \ + gt/uc/intel_guc_hwconfig.o \ + gt/uc/intel_guc_log.o \ + gt/uc/intel_guc_log_debugfs.o \ + gt/uc/intel_guc_rc.o \ + gt/uc/intel_guc_slpc.o \ + gt/uc/intel_guc_submission.o \ + gt/uc/intel_huc.o \ + gt/uc/intel_huc_debugfs.o \ + gt/uc/intel_huc_fw.o \ + gt/uc/intel_uc.o \ + gt/uc/intel_uc_debugfs.o \ + gt/uc/intel_uc_fw.o # graphics system controller (GSC) support -i915-y += gt/intel_gsc.o +i915-y += \ + gt/intel_gsc.o # graphics hardware monitoring (HWMON) support -i915-$(CONFIG_HWMON) += i915_hwmon.o +i915-$(CONFIG_HWMON) += \ + i915_hwmon.o # modesetting core code i915-y += \ display/hsw_ips.o \ + display/i9xx_plane.o \ + display/i9xx_wm.o \ display/intel_atomic.o \ display/intel_atomic_plane.o \ display/intel_audio.o \ @@ -257,6 +263,7 @@ i915-y += \ display/intel_display.o \ display/intel_display_driver.o \ display/intel_display_irq.o \ + display/intel_display_params.o \ display/intel_display_power.o \ display/intel_display_power_map.o \ display/intel_display_power_well.o \ @@ -268,9 +275,12 @@ i915-y += \ display/intel_dpll.o \ display/intel_dpll_mgr.o \ display/intel_dpt.o \ + display/intel_dpt_common.o \ display/intel_drrs.o \ display/intel_dsb.o \ + display/intel_dsb_buffer.o \ display/intel_fb.o \ + display/intel_fb_bo.o \ display/intel_fb_pin.o \ display/intel_fbc.o \ display/intel_fdi.o \ @@ -287,8 +297,8 @@ i915-y += \ display/intel_load_detect.o \ display/intel_lpe_audio.o \ display/intel_modeset_lock.o \ - display/intel_modeset_verify.o \ display/intel_modeset_setup.o \ + display/intel_modeset_verify.o \ display/intel_overlay.o \ display/intel_pch_display.o \ display/intel_pch_refclk.o \ @@ -302,8 +312,6 @@ i915-y += \ display/intel_vblank.o \ display/intel_vga.o \ display/intel_wm.o \ - display/i9xx_plane.o \ - display/i9xx_wm.o \ display/skl_scaler.o \ display/skl_universal_plane.o \ display/skl_watermark.o @@ -311,7 +319,12 @@ i915-$(CONFIG_ACPI) += \ display/intel_acpi.o \ display/intel_opregion.o i915-$(CONFIG_DRM_FBDEV_EMULATION) += \ - display/intel_fbdev.o + display/intel_fbdev.o \ + display/intel_fbdev_fb.o +i915-$(CONFIG_DEBUG_FS) += \ + display/intel_display_debugfs.o \ + display/intel_display_debugfs_params.o \ + display/intel_pipe_crc.o # modesetting output/encoder code i915-y += \ @@ -357,13 +370,14 @@ i915-y += \ display/vlv_dsi.o \ display/vlv_dsi_pll.o -i915-y += i915_perf.o +i915-y += \ + i915_perf.o # Protected execution platform (PXP) support. Base support is required for HuC i915-y += \ pxp/intel_pxp.o \ - pxp/intel_pxp_tee.o \ - pxp/intel_pxp_huc.o + pxp/intel_pxp_huc.o \ + pxp/intel_pxp_tee.o i915-$(CONFIG_DRM_I915_PXP) += \ pxp/intel_pxp_cmd.o \ @@ -374,11 +388,11 @@ i915-$(CONFIG_DRM_I915_PXP) += \ pxp/intel_pxp_session.o # Post-mortem debug and GPU hang state capture -i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o +i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += \ + i915_gpu_error.o i915-$(CONFIG_DRM_I915_SELFTEST) += \ gem/selftests/i915_gem_client_blt.o \ gem/selftests/igt_gem_utils.o \ - selftests/intel_scheduler_helpers.o \ selftests/i915_random.o \ selftests/i915_selftest.o \ selftests/igt_atomic.o \ @@ -387,10 +401,12 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += \ selftests/igt_mmap.o \ selftests/igt_reset.o \ selftests/igt_spinner.o \ + selftests/intel_scheduler_helpers.o \ selftests/librapl.o # virtual gpu code -i915-y += i915_vgpu.o +i915-y += \ + i915_vgpu.o i915-$(CONFIG_DRM_I915_GVT) += \ intel_gvt.o \ diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c index e8ee0a08947e..dfe0b07a122d 100644 --- a/drivers/gpu/drm/i915/display/g4x_dp.c +++ b/drivers/gpu/drm/i915/display/g4x_dp.c @@ -432,7 +432,7 @@ intel_dp_link_down(struct intel_encoder *encoder, intel_de_write(dev_priv, intel_dp->output_reg, intel_dp->DP); intel_de_posting_read(dev_priv, intel_dp->output_reg); - intel_dp->DP &= ~(DP_PORT_EN | DP_AUDIO_OUTPUT_ENABLE); + intel_dp->DP &= ~DP_PORT_EN; intel_de_write(dev_priv, intel_dp->output_reg, intel_dp->DP); intel_de_posting_read(dev_priv, intel_dp->output_reg); @@ -475,6 +475,40 @@ intel_dp_link_down(struct intel_encoder *encoder, } } +static void g4x_dp_audio_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + + if (!crtc_state->has_audio) + return; + + /* Enable audio presence detect */ + intel_dp->DP |= DP_AUDIO_OUTPUT_ENABLE; + intel_de_write(i915, intel_dp->output_reg, intel_dp->DP); + + intel_audio_codec_enable(encoder, crtc_state, conn_state); +} + +static void g4x_dp_audio_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + + if (!old_crtc_state->has_audio) + return; + + intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state); + + /* Disable audio presence detect */ + intel_dp->DP &= ~DP_AUDIO_OUTPUT_ENABLE; + intel_de_write(i915, intel_dp->output_reg, intel_dp->DP); +} + static void intel_disable_dp(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, @@ -484,8 +518,6 @@ static void intel_disable_dp(struct intel_atomic_state *state, intel_dp->link_trained = false; - intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state); - /* * Make sure the panel is off before trying to change the mode. * But also ensure that we have vdd while we switch off the panel. @@ -631,8 +663,6 @@ static void intel_dp_enable_port(struct intel_dp *intel_dp, * fail when the power sequencer is freshly used for this port. */ intel_dp->DP |= DP_PORT_EN; - if (crtc_state->has_audio) - intel_dp->DP |= DP_AUDIO_OUTPUT_ENABLE; intel_de_write(dev_priv, intel_dp->output_reg, intel_dp->DP); intel_de_posting_read(dev_priv, intel_dp->output_reg); @@ -686,8 +716,8 @@ static void g4x_enable_dp(struct intel_atomic_state *state, const struct drm_connector_state *conn_state) { intel_enable_dp(state, encoder, pipe_config, conn_state); - intel_audio_codec_enable(encoder, pipe_config, conn_state); intel_edp_backlight_on(pipe_config, conn_state); + encoder->audio_enable(encoder, pipe_config, conn_state); } static void vlv_enable_dp(struct intel_atomic_state *state, @@ -695,8 +725,8 @@ static void vlv_enable_dp(struct intel_atomic_state *state, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - intel_audio_codec_enable(encoder, pipe_config, conn_state); intel_edp_backlight_on(pipe_config, conn_state); + encoder->audio_enable(encoder, pipe_config, conn_state); } static void g4x_pre_enable_dp(struct intel_atomic_state *state, @@ -1325,6 +1355,8 @@ bool g4x_dp_init(struct drm_i915_private *dev_priv, intel_encoder->disable = g4x_disable_dp; intel_encoder->post_disable = g4x_post_disable_dp; } + intel_encoder->audio_enable = g4x_dp_audio_enable; + intel_encoder->audio_disable = g4x_dp_audio_disable; if ((IS_IVYBRIDGE(dev_priv) && port == PORT_A) || (HAS_PCH_CPT(dev_priv) && port != PORT_A)) diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c index 45e044b4a88d..8096492b3fad 100644 --- a/drivers/gpu/drm/i915/display/g4x_hdmi.c +++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c @@ -228,25 +228,51 @@ static void g4x_hdmi_enable_port(struct intel_encoder *encoder, temp = intel_de_read(dev_priv, intel_hdmi->hdmi_reg); temp |= SDVO_ENABLE; - if (pipe_config->has_audio) - temp |= HDMI_AUDIO_ENABLE; intel_de_write(dev_priv, intel_hdmi->hdmi_reg, temp); intel_de_posting_read(dev_priv, intel_hdmi->hdmi_reg); } +static void g4x_hdmi_audio_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_hdmi *hdmi = enc_to_intel_hdmi(encoder); + + if (!crtc_state->has_audio) + return; + + drm_WARN_ON(&i915->drm, !crtc_state->has_hdmi_sink); + + /* Enable audio presence detect */ + intel_de_rmw(i915, hdmi->hdmi_reg, 0, HDMI_AUDIO_ENABLE); + + intel_audio_codec_enable(encoder, crtc_state, conn_state); +} + +static void g4x_hdmi_audio_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_hdmi *hdmi = enc_to_intel_hdmi(encoder); + + if (!old_crtc_state->has_audio) + return; + + intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state); + + /* Disable audio presence detect */ + intel_de_rmw(i915, hdmi->hdmi_reg, HDMI_AUDIO_ENABLE, 0); +} + static void g4x_enable_hdmi(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - g4x_hdmi_enable_port(encoder, pipe_config); - - drm_WARN_ON(&dev_priv->drm, pipe_config->has_audio && - !pipe_config->has_hdmi_sink); - intel_audio_codec_enable(encoder, pipe_config, conn_state); } static void ibx_enable_hdmi(struct intel_atomic_state *state, @@ -262,8 +288,6 @@ static void ibx_enable_hdmi(struct intel_atomic_state *state, temp = intel_de_read(dev_priv, intel_hdmi->hdmi_reg); temp |= SDVO_ENABLE; - if (pipe_config->has_audio) - temp |= HDMI_AUDIO_ENABLE; /* * HW workaround, need to write this twice for issue @@ -296,10 +320,6 @@ static void ibx_enable_hdmi(struct intel_atomic_state *state, intel_de_write(dev_priv, intel_hdmi->hdmi_reg, temp); intel_de_posting_read(dev_priv, intel_hdmi->hdmi_reg); } - - drm_WARN_ON(&dev_priv->drm, pipe_config->has_audio && - !pipe_config->has_hdmi_sink); - intel_audio_codec_enable(encoder, pipe_config, conn_state); } static void cpt_enable_hdmi(struct intel_atomic_state *state, @@ -317,8 +337,6 @@ static void cpt_enable_hdmi(struct intel_atomic_state *state, temp = intel_de_read(dev_priv, intel_hdmi->hdmi_reg); temp |= SDVO_ENABLE; - if (pipe_config->has_audio) - temp |= HDMI_AUDIO_ENABLE; /* * WaEnableHDMI8bpcBefore12bpc:snb,ivb @@ -351,10 +369,6 @@ static void cpt_enable_hdmi(struct intel_atomic_state *state, intel_de_rmw(dev_priv, TRANS_CHICKEN1(pipe), TRANS_CHICKEN1_HDMIUNIT_GC_DISABLE, 0); } - - drm_WARN_ON(&dev_priv->drm, pipe_config->has_audio && - !pipe_config->has_hdmi_sink); - intel_audio_codec_enable(encoder, pipe_config, conn_state); } static void vlv_enable_hdmi(struct intel_atomic_state *state, @@ -362,11 +376,6 @@ static void vlv_enable_hdmi(struct intel_atomic_state *state, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - - drm_WARN_ON(&dev_priv->drm, pipe_config->has_audio && - !pipe_config->has_hdmi_sink); - intel_audio_codec_enable(encoder, pipe_config, conn_state); } static void intel_disable_hdmi(struct intel_atomic_state *state, @@ -384,7 +393,7 @@ static void intel_disable_hdmi(struct intel_atomic_state *state, temp = intel_de_read(dev_priv, intel_hdmi->hdmi_reg); - temp &= ~(SDVO_ENABLE | HDMI_AUDIO_ENABLE); + temp &= ~SDVO_ENABLE; intel_de_write(dev_priv, intel_hdmi->hdmi_reg, temp); intel_de_posting_read(dev_priv, intel_hdmi->hdmi_reg); @@ -433,8 +442,6 @@ static void g4x_disable_hdmi(struct intel_atomic_state *state, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state); - intel_disable_hdmi(state, encoder, old_crtc_state, old_conn_state); } @@ -443,7 +450,6 @@ static void pch_disable_hdmi(struct intel_atomic_state *state, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state); } static void pch_post_disable_hdmi(struct intel_atomic_state *state, @@ -750,6 +756,8 @@ void g4x_hdmi_init(struct drm_i915_private *dev_priv, else intel_encoder->enable = g4x_enable_hdmi; } + intel_encoder->audio_enable = g4x_hdmi_audio_enable; + intel_encoder->audio_disable = g4x_hdmi_audio_disable; intel_encoder->shutdown = intel_hdmi_encoder_shutdown; intel_encoder->type = INTEL_OUTPUT_HDMI; diff --git a/drivers/gpu/drm/i915/display/hsw_ips.c b/drivers/gpu/drm/i915/display/hsw_ips.c index 7dc38ac02092..611a7d6ef80c 100644 --- a/drivers/gpu/drm/i915/display/hsw_ips.c +++ b/drivers/gpu/drm/i915/display/hsw_ips.c @@ -193,7 +193,7 @@ bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state) if (!hsw_crtc_supports_ips(crtc)) return false; - if (!i915->params.enable_ips) + if (!i915->display.params.enable_ips) return false; if (crtc_state->pipe_bpp > 24) @@ -329,7 +329,7 @@ static int hsw_ips_debugfs_status_show(struct seq_file *m, void *unused) wakeref = intel_runtime_pm_get(&i915->runtime_pm); seq_printf(m, "Enabled by kernel parameter: %s\n", - str_yes_no(i915->params.enable_ips)); + str_yes_no(i915->display.params.enable_ips)); if (DISPLAY_VER(i915) >= 8) { seq_puts(m, "Currently: unknown\n"); diff --git a/drivers/gpu/drm/i915/display/i9xx_wm.c b/drivers/gpu/drm/i915/display/i9xx_wm.c index af0c79a4c9a4..11ca9572e8b3 100644 --- a/drivers/gpu/drm/i915/display/i9xx_wm.c +++ b/drivers/gpu/drm/i915/display/i9xx_wm.c @@ -608,7 +608,7 @@ static bool intel_crtc_active(struct intel_crtc *crtc) * crtc->state->active once we have proper CRTC states wired up * for atomic. */ - return crtc && crtc->active && crtc->base.primary->state->fb && + return crtc->active && crtc->base.primary->state->fb && crtc->config->hw.adjusted_mode.crtc_clock; } @@ -2477,7 +2477,7 @@ static unsigned int ilk_plane_wm_max(const struct drm_i915_private *dev_priv, * FIFO size is only half of the self * refresh FIFO size on ILK/SNB. */ - if (DISPLAY_VER(dev_priv) <= 6) + if (DISPLAY_VER(dev_priv) < 7) fifo_size /= 2; } @@ -2818,7 +2818,7 @@ static int ilk_compute_pipe_wm(struct intel_atomic_state *state, usable_level = dev_priv->display.wm.num_levels - 1; /* ILK/SNB: LP2+ watermarks only w/o sprites */ - if (DISPLAY_VER(dev_priv) <= 6 && pipe_wm->sprites_enabled) + if (DISPLAY_VER(dev_priv) < 7 && pipe_wm->sprites_enabled) usable_level = 1; /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */ @@ -2961,7 +2961,7 @@ static void ilk_wm_merge(struct drm_i915_private *dev_priv, int last_enabled_level = num_levels - 1; /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */ - if ((DISPLAY_VER(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) && + if ((DISPLAY_VER(dev_priv) < 7 || IS_IVYBRIDGE(dev_priv)) && config->num_pipes_active > 1) last_enabled_level = 0; @@ -2993,7 +2993,7 @@ static void ilk_wm_merge(struct drm_i915_private *dev_priv, /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */ if (DISPLAY_VER(dev_priv) == 5 && HAS_FBC(dev_priv) && - dev_priv->params.enable_fbc && !merged->fbc_wm_enabled) { + dev_priv->display.params.enable_fbc && !merged->fbc_wm_enabled) { for (level = 2; level < num_levels; level++) { struct intel_wm_level *wm = &merged->wm[level]; @@ -3060,7 +3060,7 @@ static void ilk_compute_wm_results(struct drm_i915_private *dev_priv, * Always set WM_LP_SPRITE_EN when spr_val != 0, even if the * level is disabled. Doing otherwise could cause underruns. */ - if (DISPLAY_VER(dev_priv) <= 6 && r->spr_val) { + if (DISPLAY_VER(dev_priv) < 7 && r->spr_val) { drm_WARN_ON(&dev_priv->drm, wm_lp != 1); results->wm_lp_spr[wm_lp - 1] |= WM_LP_SPRITE_ENABLE; } diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 67143a0f5189..ac456a2275db 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -330,7 +330,7 @@ static int afe_clk(struct intel_encoder *encoder, int bpp; if (crtc_state->dsc.compression_enable) - bpp = crtc_state->dsc.compressed_bpp; + bpp = to_bpp_int(crtc_state->dsc.compressed_bpp_x16); else bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); @@ -860,7 +860,7 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder, * compressed and non-compressed bpp. */ if (crtc_state->dsc.compression_enable) { - mul = crtc_state->dsc.compressed_bpp; + mul = to_bpp_int(crtc_state->dsc.compressed_bpp_x16); div = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); } @@ -884,7 +884,7 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder, int bpp, line_time_us, byte_clk_period_ns; if (crtc_state->dsc.compression_enable) - bpp = crtc_state->dsc.compressed_bpp; + bpp = to_bpp_int(crtc_state->dsc.compressed_bpp_x16); else bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); @@ -1458,8 +1458,8 @@ static void gen11_dsi_get_timings(struct intel_encoder *encoder, struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; - if (pipe_config->dsc.compressed_bpp) { - int div = pipe_config->dsc.compressed_bpp; + if (pipe_config->dsc.compressed_bpp_x16) { + int div = to_bpp_int(pipe_config->dsc.compressed_bpp_x16); int mul = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); adjusted_mode->crtc_htotal = diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index 5d18145da279..ec0d5168b503 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -331,9 +331,6 @@ void intel_atomic_state_free(struct drm_atomic_state *_state) drm_atomic_state_default_release(&state->base); kfree(state->global_objs); - - i915_sw_fence_fini(&state->commit_ready); - kfree(state); } diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index b1074350616c..06c2455bdd78 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -31,7 +31,10 @@ * prepare/check/commit/cleanup steps. */ +#include <linux/dma-fence-chain.h> + #include <drm/drm_atomic_helper.h> +#include <drm/drm_gem_atomic_helper.h> #include <drm/drm_blend.h> #include <drm/drm_fourcc.h> @@ -1012,6 +1015,41 @@ int intel_plane_check_src_coordinates(struct intel_plane_state *plane_state) return 0; } +static int add_dma_resv_fences(struct dma_resv *resv, + struct drm_plane_state *new_plane_state) +{ + struct dma_fence *fence = dma_fence_get(new_plane_state->fence); + struct dma_fence *new; + int ret; + + ret = dma_resv_get_singleton(resv, dma_resv_usage_rw(false), &new); + if (ret) + goto error; + + if (new && fence) { + struct dma_fence_chain *chain = dma_fence_chain_alloc(); + + if (!chain) { + ret = -ENOMEM; + goto error; + } + + dma_fence_chain_init(chain, fence, new, 1); + fence = &chain->base; + + } else if (new) { + fence = new; + } + + dma_fence_put(new_plane_state->fence); + new_plane_state->fence = fence; + return 0; + +error: + dma_fence_put(fence); + return ret; +} + /** * intel_prepare_plane_fb - Prepare fb for usage on plane * @_plane: drm plane to prepare for @@ -1035,7 +1073,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane, struct intel_atomic_state *state = to_intel_atomic_state(new_plane_state->uapi.state); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - const struct intel_plane_state *old_plane_state = + struct intel_plane_state *old_plane_state = intel_atomic_get_old_plane_state(state, plane); struct drm_i915_gem_object *obj = intel_fb_obj(new_plane_state->hw.fb); struct drm_i915_gem_object *old_obj = intel_fb_obj(old_plane_state->hw.fb); @@ -1058,55 +1096,28 @@ intel_prepare_plane_fb(struct drm_plane *_plane, * can safely continue. */ if (new_crtc_state && intel_crtc_needs_modeset(new_crtc_state)) { - ret = i915_sw_fence_await_reservation(&state->commit_ready, - old_obj->base.resv, - false, 0, - GFP_KERNEL); + ret = add_dma_resv_fences(intel_bo_to_drm_bo(old_obj)->resv, + &new_plane_state->uapi); if (ret < 0) return ret; } } - if (new_plane_state->uapi.fence) { /* explicit fencing */ - i915_gem_fence_wait_priority(new_plane_state->uapi.fence, - &attr); - ret = i915_sw_fence_await_dma_fence(&state->commit_ready, - new_plane_state->uapi.fence, - i915_fence_timeout(dev_priv), - GFP_KERNEL); - if (ret < 0) - return ret; - } - if (!obj) return 0; - ret = intel_plane_pin_fb(new_plane_state); if (ret) return ret; - i915_gem_object_wait_priority(obj, 0, &attr); + ret = drm_gem_plane_helper_prepare_fb(&plane->base, &new_plane_state->uapi); + if (ret < 0) + goto unpin_fb; - if (!new_plane_state->uapi.fence) { /* implicit fencing */ - struct dma_resv_iter cursor; - struct dma_fence *fence; - - ret = i915_sw_fence_await_reservation(&state->commit_ready, - obj->base.resv, false, - i915_fence_timeout(dev_priv), - GFP_KERNEL); - if (ret < 0) - goto unpin_fb; + if (new_plane_state->uapi.fence) { + i915_gem_fence_wait_priority(new_plane_state->uapi.fence, + &attr); - dma_resv_iter_begin(&cursor, obj->base.resv, - DMA_RESV_USAGE_WRITE); - dma_resv_for_each_fence_unlocked(&cursor, fence) { - intel_display_rps_boost_after_vblank(new_plane_state->hw.crtc, - fence); - } - dma_resv_iter_end(&cursor); - } else { intel_display_rps_boost_after_vblank(new_plane_state->hw.crtc, new_plane_state->uapi.fence); } diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 19605264a35c..07e0c73204f3 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -25,6 +25,7 @@ #include <linux/kernel.h> #include <drm/drm_edid.h> +#include <drm/drm_eld.h> #include <drm/i915_component.h> #include "i915_drv.h" @@ -521,25 +522,25 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder, unsigned int link_clks_available, link_clks_required; unsigned int tu_data, tu_line, link_clks_active; unsigned int h_active, h_total, hblank_delta, pixel_clk; - unsigned int fec_coeff, cdclk, vdsc_bpp; + unsigned int fec_coeff, cdclk, vdsc_bppx16; unsigned int link_clk, lanes; unsigned int hblank_rise; h_active = crtc_state->hw.adjusted_mode.crtc_hdisplay; h_total = crtc_state->hw.adjusted_mode.crtc_htotal; pixel_clk = crtc_state->hw.adjusted_mode.crtc_clock; - vdsc_bpp = crtc_state->dsc.compressed_bpp; + vdsc_bppx16 = crtc_state->dsc.compressed_bpp_x16; cdclk = i915->display.cdclk.hw.cdclk; /* fec= 0.972261, using rounding multiplier of 1000000 */ fec_coeff = 972261; link_clk = crtc_state->port_clock; lanes = crtc_state->lane_count; - drm_dbg_kms(&i915->drm, "h_active = %u link_clk = %u :" - "lanes = %u vdsc_bpp = %u cdclk = %u\n", - h_active, link_clk, lanes, vdsc_bpp, cdclk); + drm_dbg_kms(&i915->drm, + "h_active = %u link_clk = %u : lanes = %u vdsc_bpp = " BPP_X16_FMT " cdclk = %u\n", + h_active, link_clk, lanes, BPP_X16_ARGS(vdsc_bppx16), cdclk); - if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bpp || !cdclk)) + if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bppx16 || !cdclk)) return 0; link_clks_available = (h_total - h_active) * link_clk / pixel_clk - 28; @@ -551,8 +552,8 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder, hblank_delta = DIV64_U64_ROUND_UP(mul_u32_u32(5 * (link_clk + cdclk), pixel_clk), mul_u32_u32(link_clk, cdclk)); - tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bpp * 8, 1000000), - mul_u32_u32(link_clk * lanes, fec_coeff)); + tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bppx16 * 8, 1000000), + mul_u32_u32(link_clk * lanes * 16, fec_coeff)); tu_line = div64_u64(h_active * mul_u32_u32(link_clk, fec_coeff), mul_u32_u32(64 * pixel_clk, 1000000)); link_clks_active = (tu_line - 1) * 64 + tu_data; diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index 2e8f17c04522..612d4cd9dacb 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -88,10 +88,10 @@ u32 intel_backlight_invert_pwm_level(struct intel_connector *connector, u32 val) drm_WARN_ON(&i915->drm, panel->backlight.pwm_level_max == 0); - if (i915->params.invert_brightness < 0) + if (i915->display.params.invert_brightness < 0) return val; - if (i915->params.invert_brightness > 0 || + if (i915->display.params.invert_brightness > 0 || intel_has_quirk(i915, QUIRK_INVERT_BRIGHTNESS)) { return panel->backlight.pwm_level_max - val + panel->backlight.pwm_level_min; } @@ -132,8 +132,9 @@ u32 intel_backlight_level_from_pwm(struct intel_connector *connector, u32 val) drm_WARN_ON_ONCE(&i915->drm, panel->backlight.max == 0 || panel->backlight.pwm_level_max == 0); - if (i915->params.invert_brightness > 0 || - (i915->params.invert_brightness == 0 && intel_has_quirk(i915, QUIRK_INVERT_BRIGHTNESS))) + if (i915->display.params.invert_brightness > 0 || + (i915->display.params.invert_brightness == 0 && + intel_has_quirk(i915, QUIRK_INVERT_BRIGHTNESS))) val = panel->backlight.pwm_level_max - (val - panel->backlight.pwm_level_min); return scale(val, panel->backlight.pwm_level_min, panel->backlight.pwm_level_max, diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 4e8f1e91bb08..aa169b0055e9 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1116,7 +1116,7 @@ parse_sdvo_panel_data(struct drm_i915_private *i915, struct drm_display_mode *panel_fixed_mode; int index; - index = i915->params.vbt_sdvo_panel_type; + index = i915->display.params.vbt_sdvo_panel_type; if (index == -2) { drm_dbg_kms(&i915->drm, "Ignore SDVO panel mode from BIOS VBT tables.\n"); @@ -1514,9 +1514,9 @@ parse_edp(struct drm_i915_private *i915, u8 vswing; /* Don't read from VBT if module parameter has valid value*/ - if (i915->params.edp_vswing) { + if (i915->display.params.edp_vswing) { panel->vbt.edp.low_vswing = - i915->params.edp_vswing == 1; + i915->display.params.edp_vswing == 1; } else { vswing = (edp->edp_vswing_preemph >> (panel_type * 4)) & 0xF; panel->vbt.edp.low_vswing = vswing == 0; @@ -2201,6 +2201,9 @@ static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin) const u8 *ddc_pin_map; int i, n_entries; + if (IS_DGFX(i915)) + return vbt_pin; + if (INTEL_PCH_TYPE(i915) >= PCH_LNL || HAS_PCH_MTP(i915) || IS_ALDERLAKE_P(i915)) { ddc_pin_map = adlp_ddc_pin_map; @@ -2208,8 +2211,6 @@ static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin) } else if (IS_ALDERLAKE_S(i915)) { ddc_pin_map = adls_ddc_pin_map; n_entries = ARRAY_SIZE(adls_ddc_pin_map); - } else if (INTEL_PCH_TYPE(i915) >= PCH_DG1) { - return vbt_pin; } else if (IS_ROCKETLAKE(i915) && INTEL_PCH_TYPE(i915) == PCH_TGP) { ddc_pin_map = rkl_pch_tgp_ddc_pin_map; n_entries = ARRAY_SIZE(rkl_pch_tgp_ddc_pin_map); @@ -2473,6 +2474,27 @@ static void sanitize_device_type(struct intel_bios_encoder_data *devdata, devdata->child.device_type |= DEVICE_TYPE_NOT_HDMI_OUTPUT; } +static void sanitize_hdmi_level_shift(struct intel_bios_encoder_data *devdata, + enum port port) +{ + struct drm_i915_private *i915 = devdata->i915; + + if (!intel_bios_encoder_supports_dvi(devdata)) + return; + + /* + * Some BDW machines (eg. HP Pavilion 15-ab) shipped + * with a HSW VBT where the level shifter value goes + * up to 11, whereas the BDW max is 9. + */ + if (IS_BROADWELL(i915) && devdata->child.hdmi_level_shifter_value > 9) { + drm_dbg_kms(&i915->drm, "Bogus port %c VBT HDMI level shift %d, adjusting to %d\n", + port_name(port), devdata->child.hdmi_level_shifter_value, 9); + + devdata->child.hdmi_level_shifter_value = 9; + } +} + static bool intel_bios_encoder_supports_crt(const struct intel_bios_encoder_data *devdata) { @@ -2652,6 +2674,7 @@ static void parse_ddi_port(struct intel_bios_encoder_data *devdata) } sanitize_device_type(devdata, port); + sanitize_hdmi_level_shift(devdata, port); } static bool has_ddi_port_info(struct drm_i915_private *i915) @@ -3392,8 +3415,8 @@ static void fill_dsc(struct intel_crtc_state *crtc_state, crtc_state->pipe_bpp = bpc * 3; - crtc_state->dsc.compressed_bpp = min(crtc_state->pipe_bpp, - VBT_DSC_MAX_BPP(dsc->max_bpp)); + crtc_state->dsc.compressed_bpp_x16 = to_bpp_x16(min(crtc_state->pipe_bpp, + VBT_DSC_MAX_BPP(dsc->max_bpp))); /* * FIXME: This is ugly, and slice count should take DSC engine @@ -3452,8 +3475,7 @@ bool intel_bios_get_dsc_params(struct intel_encoder *encoder, if (!devdata->dsc) return false; - if (crtc_state) - fill_dsc(crtc_state, devdata->dsc, dsc_max_bpc); + fill_dsc(crtc_state, devdata->dsc, dsc_max_bpc); return true; } diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index bef96db62c80..7f2a50b4f494 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -87,7 +87,8 @@ static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv, return ret; dclk = val & 0xffff; - sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv) > 11 ? 500 : 0), 1000); + sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv) >= 12 ? 500 : 0), + 1000); sp->t_rp = (val & 0xff0000) >> 16; sp->t_rcd = (val & 0xff000000) >> 24; @@ -480,7 +481,7 @@ static int tgl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel if (num_channels < qi.max_numchannels && DISPLAY_VER(dev_priv) >= 12) qi.deinterleave = max(DIV_ROUND_UP(qi.deinterleave, 2), 1); - if (DISPLAY_VER(dev_priv) > 11 && num_channels > qi.max_numchannels) + if (DISPLAY_VER(dev_priv) >= 12 && num_channels > qi.max_numchannels) drm_warn(&dev_priv->drm, "Number of channels exceeds max number of channels."); if (qi.max_numchannels != 0) num_channels = min_t(u8, num_channels, qi.max_numchannels); @@ -897,7 +898,7 @@ static int icl_find_qgv_points(struct drm_i915_private *i915, unsigned int idx; unsigned int max_data_rate; - if (DISPLAY_VER(i915) > 11) + if (DISPLAY_VER(i915) >= 12) idx = tgl_max_bw_index(i915, num_active_planes, i); else idx = icl_max_bw_index(i915, num_active_planes, i); diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index c4839c67cb0f..c985ebb6831a 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1180,7 +1180,7 @@ sanitize: /* force cdclk programming */ dev_priv->display.cdclk.hw.cdclk = 0; /* force full PLL disable + enable */ - dev_priv->display.cdclk.hw.vco = -1; + dev_priv->display.cdclk.hw.vco = ~0; } static void skl_cdclk_init_hw(struct drm_i915_private *dev_priv) @@ -1446,50 +1446,77 @@ static u8 bxt_calc_voltage_level(int cdclk) return DIV_ROUND_UP(cdclk, 25000); } +static u8 calc_voltage_level(int cdclk, int num_voltage_levels, + const int voltage_level_max_cdclk[]) +{ + int voltage_level; + + for (voltage_level = 0; voltage_level < num_voltage_levels; voltage_level++) { + if (cdclk <= voltage_level_max_cdclk[voltage_level]) + return voltage_level; + } + + MISSING_CASE(cdclk); + return num_voltage_levels - 1; +} + static u8 icl_calc_voltage_level(int cdclk) { - if (cdclk > 556800) - return 2; - else if (cdclk > 312000) - return 1; - else - return 0; + static const int icl_voltage_level_max_cdclk[] = { + [0] = 312000, + [1] = 556800, + [2] = 652800, + }; + + return calc_voltage_level(cdclk, + ARRAY_SIZE(icl_voltage_level_max_cdclk), + icl_voltage_level_max_cdclk); } static u8 ehl_calc_voltage_level(int cdclk) { - if (cdclk > 326400) - return 3; - else if (cdclk > 312000) - return 2; - else if (cdclk > 180000) - return 1; - else - return 0; + static const int ehl_voltage_level_max_cdclk[] = { + [0] = 180000, + [1] = 312000, + [2] = 326400, + /* + * Bspec lists the limit as 556.8 MHz, but some JSL + * development boards (at least) boot with 652.8 MHz + */ + [3] = 652800, + }; + + return calc_voltage_level(cdclk, + ARRAY_SIZE(ehl_voltage_level_max_cdclk), + ehl_voltage_level_max_cdclk); } static u8 tgl_calc_voltage_level(int cdclk) { - if (cdclk > 556800) - return 3; - else if (cdclk > 326400) - return 2; - else if (cdclk > 312000) - return 1; - else - return 0; + static const int tgl_voltage_level_max_cdclk[] = { + [0] = 312000, + [1] = 326400, + [2] = 556800, + [3] = 652800, + }; + + return calc_voltage_level(cdclk, + ARRAY_SIZE(tgl_voltage_level_max_cdclk), + tgl_voltage_level_max_cdclk); } static u8 rplu_calc_voltage_level(int cdclk) { - if (cdclk > 556800) - return 3; - else if (cdclk > 480000) - return 2; - else if (cdclk > 312000) - return 1; - else - return 0; + static const int rplu_voltage_level_max_cdclk[] = { + [0] = 312000, + [1] = 480000, + [2] = 556800, + [3] = 652800, + }; + + return calc_voltage_level(cdclk, + ARRAY_SIZE(rplu_voltage_level_max_cdclk), + rplu_voltage_level_max_cdclk); } static void icl_readout_refclk(struct drm_i915_private *dev_priv, @@ -1800,6 +1827,8 @@ static bool cdclk_pll_is_unknown(unsigned int vco) return vco == ~0; } +static const int cdclk_squash_len = 16; + static int cdclk_squash_divider(u16 waveform) { return hweight16(waveform ?: 0xffff); @@ -1811,7 +1840,6 @@ static bool cdclk_compute_crawl_and_squash_midpoint(struct drm_i915_private *i91 struct intel_cdclk_config *mid_cdclk_config) { u16 old_waveform, new_waveform, mid_waveform; - int size = 16; int div = 2; /* Return if PLL is in an unknown state, force a complete disable and re-enable. */ @@ -1850,7 +1878,8 @@ static bool cdclk_compute_crawl_and_squash_midpoint(struct drm_i915_private *i91 } mid_cdclk_config->cdclk = DIV_ROUND_CLOSEST(cdclk_squash_divider(mid_waveform) * - mid_cdclk_config->vco, size * div); + mid_cdclk_config->vco, + cdclk_squash_len * div); /* make sure the mid clock came out sane */ @@ -1878,9 +1907,9 @@ static void _bxt_set_cdclk(struct drm_i915_private *dev_priv, { int cdclk = cdclk_config->cdclk; int vco = cdclk_config->vco; - u32 val; + int unsquashed_cdclk; u16 waveform; - int clock; + u32 val; if (HAS_CDCLK_CRAWL(dev_priv) && dev_priv->display.cdclk.hw.vco > 0 && vco > 0 && !cdclk_pll_is_unknown(dev_priv->display.cdclk.hw.vco)) { @@ -1897,15 +1926,13 @@ static void _bxt_set_cdclk(struct drm_i915_private *dev_priv, waveform = cdclk_squash_waveform(dev_priv, cdclk); - if (waveform) - clock = vco / 2; - else - clock = cdclk; + unsquashed_cdclk = DIV_ROUND_CLOSEST(cdclk * cdclk_squash_len, + cdclk_squash_divider(waveform)); if (HAS_CDCLK_SQUASH(dev_priv)) dg2_cdclk_squash_program(dev_priv, waveform); - val = bxt_cdclk_cd2x_div_sel(dev_priv, clock, vco) | + val = bxt_cdclk_cd2x_div_sel(dev_priv, unsquashed_cdclk, vco) | bxt_cdclk_cd2x_pipe(dev_priv, pipe); /* @@ -2075,7 +2102,7 @@ sanitize: dev_priv->display.cdclk.hw.cdclk = 0; /* force full PLL disable + enable */ - dev_priv->display.cdclk.hw.vco = -1; + dev_priv->display.cdclk.hw.vco = ~0; } static void bxt_cdclk_init_hw(struct drm_i915_private *dev_priv) @@ -2597,9 +2624,10 @@ static int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state) * Since PPC = 2 with bigjoiner * => CDCLK >= compressed_bpp * Pixel clock / 2 * Bigjoiner Interface bits */ - int bigjoiner_interface_bits = DISPLAY_VER(i915) > 13 ? 36 : 24; - int min_cdclk_bj = (crtc_state->dsc.compressed_bpp * pixel_clock) / - (2 * bigjoiner_interface_bits); + int bigjoiner_interface_bits = DISPLAY_VER(i915) >= 14 ? 36 : 24; + int min_cdclk_bj = + (to_bpp_int_roundup(crtc_state->dsc.compressed_bpp_x16) * + pixel_clock) / (2 * bigjoiner_interface_bits); min_cdclk = max(min_cdclk, min_cdclk_bj); } @@ -3488,7 +3516,7 @@ static const struct intel_cdclk_funcs mtl_cdclk_funcs = { .get_cdclk = bxt_get_cdclk, .set_cdclk = bxt_set_cdclk, .modeset_calc_cdclk = bxt_modeset_calc_cdclk, - .calc_voltage_level = tgl_calc_voltage_level, + .calc_voltage_level = rplu_calc_voltage_level, }; static const struct intel_cdclk_funcs rplu_cdclk_funcs = { diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 1d26be54ddfc..c5092b7e87d5 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -785,14 +785,12 @@ static void chv_assign_csc(struct intel_crtc_state *crtc_state) /* convert hw value with given bit_precision to lut property val */ static u32 intel_color_lut_pack(u32 val, int bit_precision) { - u32 max = 0xffff >> (16 - bit_precision); - - val = clamp_val(val, 0, max); - - if (bit_precision < 16) - val <<= 16 - bit_precision; - - return val; + if (bit_precision > 16) + return DIV_ROUND_CLOSEST_ULL(mul_u32_u32(val, (1 << 16) - 1), + (1 << bit_precision) - 1); + else + return DIV_ROUND_CLOSEST(val * ((1 << 16) - 1), + (1 << bit_precision) - 1); } static u32 i9xx_lut_8(const struct drm_color_lut *color) @@ -911,7 +909,7 @@ static void i965_lut_10p6_pack(struct drm_color_lut *entry, u32 ldw, u32 udw) static u16 i965_lut_11p6_max_pack(u32 val) { /* PIPEGCMAX is 11.6, clamp to 10.6 */ - return clamp_val(val, 0, 0xffff); + return min(val, 0xffffu); } static u32 ilk_lut_10(const struct drm_color_lut *color) @@ -1528,14 +1526,27 @@ static int glk_degamma_lut_size(struct drm_i915_private *i915) return 35; } -/* - * change_lut_val_precision: helper function to upscale or downscale lut values. - * Parameters 'to' and 'from' needs to be less than 32. This should be sufficient - * as currently there are no lut values exceeding 32 bit. - */ -static u32 change_lut_val_precision(u32 lut_val, int to, int from) +static u32 glk_degamma_lut(const struct drm_color_lut *color) +{ + return color->green; +} + +static void glk_degamma_lut_pack(struct drm_color_lut *entry, u32 val) +{ + /* PRE_CSC_GAMC_DATA is 3.16, clamp to 0.16 */ + entry->red = entry->green = entry->blue = min(val, 0xffffu); +} + +static u32 mtl_degamma_lut(const struct drm_color_lut *color) +{ + return drm_color_lut_extract(color->green, 24); +} + +static void mtl_degamma_lut_pack(struct drm_color_lut *entry, u32 val) { - return mul_u32_u32(lut_val, (1 << to)) / (1 << from); + /* PRE_CSC_GAMC_DATA is 3.24, clamp to 0.16 */ + entry->red = entry->green = entry->blue = + intel_color_lut_pack(min(val, 0xffffffu), 24); } static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state, @@ -1572,20 +1583,16 @@ static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state, * ToDo: Extend to max 7.0. Enable 32 bit input value * as compared to just 16 to achieve this. */ - u32 lut_val; - - if (DISPLAY_VER(i915) >= 14) - lut_val = change_lut_val_precision(lut[i].green, 24, 16); - else - lut_val = lut[i].green; - ilk_lut_write(crtc_state, PRE_CSC_GAMC_DATA(pipe), - lut_val); + DISPLAY_VER(i915) >= 14 ? + mtl_degamma_lut(&lut[i]) : glk_degamma_lut(&lut[i])); } /* Clamp values > 1.0. */ while (i++ < glk_degamma_lut_size(i915)) - ilk_lut_write(crtc_state, PRE_CSC_GAMC_DATA(pipe), 1 << 16); + ilk_lut_write(crtc_state, PRE_CSC_GAMC_DATA(pipe), + DISPLAY_VER(i915) >= 14 ? + 1 << 24 : 1 << 16); ilk_lut_write(crtc_state, PRE_CSC_GAMC_INDEX(pipe), 0); } @@ -3572,17 +3579,10 @@ static struct drm_property_blob *glk_read_degamma_lut(struct intel_crtc *crtc) for (i = 0; i < lut_size; i++) { u32 val = intel_de_read_fw(dev_priv, PRE_CSC_GAMC_DATA(pipe)); - /* - * For MTL and beyond, convert back the 24 bit lut values - * read from HW to 16 bit values to maintain parity with - * userspace values - */ if (DISPLAY_VER(dev_priv) >= 14) - val = change_lut_val_precision(val, 16, 24); - - lut[i].red = val; - lut[i].green = val; - lut[i].blue = val; + mtl_degamma_lut_pack(&lut[i], val); + else + glk_degamma_lut_pack(&lut[i], val); } intel_de_write_fw(dev_priv, PRE_CSC_GAMC_INDEX(pipe), diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 6f6b348b8a40..abaacea5c2cc 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -846,7 +846,7 @@ intel_crt_detect(struct drm_connector *connector, if (!intel_display_device_enabled(dev_priv)) return connector_status_disconnected; - if (dev_priv->params.load_detect_test) { + if (dev_priv->display.params.load_detect_test) { wakeref = intel_display_power_get(dev_priv, intel_encoder->power_domain); goto load_detect; @@ -906,7 +906,7 @@ load_detect: else if (DISPLAY_VER(dev_priv) < 4) status = intel_crt_load_detect(crt, to_intel_crtc(connector->state->crtc)->pipe); - else if (dev_priv->params.load_detect_test) + else if (dev_priv->display.params.load_detect_test) status = connector_status_disconnected; else status = connector_status_unknown; diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index 1fd068e6e26c..8a84a31c7b48 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -553,8 +553,15 @@ void intel_pipe_update_start(struct intel_atomic_state *state, intel_psr_lock(new_crtc_state); - if (new_crtc_state->do_async_flip) + if (new_crtc_state->do_async_flip) { + spin_lock_irq(&crtc->base.dev->event_lock); + /* arm the event for the flip done irq handler */ + crtc->flip_done_event = new_crtc_state->uapi.event; + spin_unlock_irq(&crtc->base.dev->event_lock); + + new_crtc_state->uapi.event = NULL; return; + } if (intel_crtc_needs_vblank_work(new_crtc_state)) intel_crtc_vblank_work_init(new_crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c index 66fe880af8f3..49fd100ec98a 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c +++ b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c @@ -4,6 +4,7 @@ */ #include <drm/drm_edid.h> +#include <drm/drm_eld.h> #include "i915_drv.h" #include "intel_crtc_state_dump.h" @@ -261,6 +262,15 @@ void intel_crtc_state_dump(const struct intel_crtc_state *pipe_config, drm_dbg_kms(&i915->drm, "fec: %s, enhanced framing: %s\n", str_enabled_disabled(pipe_config->fec_enable), str_enabled_disabled(pipe_config->enhanced_framing)); + + drm_dbg_kms(&i915->drm, "sdp split: %s\n", + str_enabled_disabled(pipe_config->sdp_split_enable)); + + drm_dbg_kms(&i915->drm, "psr: %s, psr2: %s, panel replay: %s, selective fetch: %s\n", + str_enabled_disabled(pipe_config->has_psr), + str_enabled_disabled(pipe_config->has_psr2), + str_enabled_disabled(pipe_config->has_panel_replay), + str_enabled_disabled(pipe_config->enable_psr2_sel_fetch)); } drm_dbg_kms(&i915->drm, "framestart delay: %d, MSA timing delay: %d\n", diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index b342fad180ca..926e2de00eb5 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -21,8 +21,11 @@ #include "intel_fb_pin.h" #include "intel_frontbuffer.h" #include "intel_psr.h" +#include "intel_psr_regs.h" #include "skl_watermark.h" +#include "gem/i915_gem_object.h" + /* Cursor formats */ static const u32 intel_cursor_formats[] = { DRM_FORMAT_ARGB8888, @@ -33,11 +36,11 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state) struct drm_i915_private *dev_priv = to_i915(plane_state->uapi.plane->dev); const struct drm_framebuffer *fb = plane_state->hw.fb; - const struct drm_i915_gem_object *obj = intel_fb_obj(fb); + struct drm_i915_gem_object *obj = intel_fb_obj(fb); u32 base; if (DISPLAY_INFO(dev_priv)->cursor_needs_physical) - base = sg_dma_address(obj->mm.pages->sgl); + base = i915_gem_object_get_dma_address(obj, 0); else base = intel_plane_ggtt_offset(plane_state); @@ -484,6 +487,35 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state, return 0; } +static void i9xx_cursor_disable_sel_fetch_arm(struct intel_plane *plane, + const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum pipe pipe = plane->pipe; + + if (!crtc_state->enable_psr2_sel_fetch) + return; + + intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_CTL(pipe, plane->id), 0); +} + +static void i9xx_cursor_update_sel_fetch_arm(struct intel_plane *plane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *i915 = to_i915(plane->base.dev); + enum pipe pipe = plane->pipe; + + if (!crtc_state->enable_psr2_sel_fetch) + return; + + if (drm_rect_height(&plane_state->psr2_sel_fetch_area) > 0) + intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), + plane_state->ctl); + else + i9xx_cursor_disable_sel_fetch_arm(plane, crtc_state); +} + /* TODO: split into noarm+arm pair */ static void i9xx_cursor_update_arm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, @@ -531,10 +563,10 @@ static void i9xx_cursor_update_arm(struct intel_plane *plane, skl_write_cursor_wm(plane, crtc_state); if (plane_state) - intel_psr2_program_plane_sel_fetch_arm(plane, crtc_state, - plane_state); + i9xx_cursor_update_sel_fetch_arm(plane, crtc_state, + plane_state); else - intel_psr2_disable_plane_sel_fetch_arm(plane, crtc_state); + i9xx_cursor_disable_sel_fetch_arm(plane, crtc_state); if (plane->cursor.base != base || plane->cursor.size != fbc_ctl || diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index ccf225afeb2a..884a1da36089 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -31,7 +31,7 @@ bool intel_is_c10phy(struct drm_i915_private *i915, enum phy phy) { - if (DISPLAY_VER_FULL(i915) == IP_VER(14, 0) && phy < PHY_C) + if ((IS_LUNARLAKE(i915) || IS_METEORLAKE(i915)) && phy < PHY_C) return true; return false; @@ -206,6 +206,13 @@ static int __intel_cx0_read_once(struct drm_i915_private *i915, enum port port, intel_clear_response_ready_flag(i915, port, lane); + /* + * FIXME: Workaround to let HW to settle + * down and let the message bus to end up + * in a known state + */ + intel_cx0_bus_reset(i915, port, lane); + return REG_FIELD_GET(XELPDP_PORT_P2M_DATA_MASK, val); } @@ -285,6 +292,13 @@ static int __intel_cx0_write_once(struct drm_i915_private *i915, enum port port, intel_clear_response_ready_flag(i915, port, lane); + /* + * FIXME: Workaround to let HW to settle + * down and let the message bus to end up + * in a known state + */ + intel_cx0_bus_reset(i915, port, lane); + return 0; } @@ -401,9 +415,15 @@ void intel_cx0_phy_set_signal_levels(struct intel_encoder *encoder, struct drm_i915_private *i915 = to_i915(encoder->base.dev); const struct intel_ddi_buf_trans *trans; enum phy phy = intel_port_to_phy(i915, encoder->port); - u8 owned_lane_mask = intel_cx0_get_owned_lane_mask(i915, encoder); + u8 owned_lane_mask; intel_wakeref_t wakeref; int n_entries, ln; + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); + + if (intel_tc_port_in_tbt_alt_mode(dig_port)) + return; + + owned_lane_mask = intel_cx0_get_owned_lane_mask(i915, encoder); wakeref = intel_cx0_phy_transaction_begin(encoder); @@ -725,7 +745,6 @@ static const struct intel_c10pll_state * const mtl_c10_edp_tables[] = { /* C20 basic DP 1.4 tables */ static const struct intel_c20pll_state mtl_c20_dp_rbr = { - .link_bit_rate = 162000, .clock = 162000, .tx = { 0xbe88, /* tx cfg0 */ 0x5800, /* tx cfg1 */ @@ -751,7 +770,6 @@ static const struct intel_c20pll_state mtl_c20_dp_rbr = { }; static const struct intel_c20pll_state mtl_c20_dp_hbr1 = { - .link_bit_rate = 270000, .clock = 270000, .tx = { 0xbe88, /* tx cfg0 */ 0x4800, /* tx cfg1 */ @@ -777,7 +795,6 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr1 = { }; static const struct intel_c20pll_state mtl_c20_dp_hbr2 = { - .link_bit_rate = 540000, .clock = 540000, .tx = { 0xbe88, /* tx cfg0 */ 0x4800, /* tx cfg1 */ @@ -803,7 +820,6 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr2 = { }; static const struct intel_c20pll_state mtl_c20_dp_hbr3 = { - .link_bit_rate = 810000, .clock = 810000, .tx = { 0xbe88, /* tx cfg0 */ 0x4800, /* tx cfg1 */ @@ -830,8 +846,7 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr3 = { /* C20 basic DP 2.0 tables */ static const struct intel_c20pll_state mtl_c20_dp_uhbr10 = { - .link_bit_rate = 1000000, /* 10 Gbps */ - .clock = 312500, + .clock = 1000000, /* 10 Gbps */ .tx = { 0xbe21, /* tx cfg0 */ 0x4800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -855,8 +870,7 @@ static const struct intel_c20pll_state mtl_c20_dp_uhbr10 = { }; static const struct intel_c20pll_state mtl_c20_dp_uhbr13_5 = { - .link_bit_rate = 1350000, /* 13.5 Gbps */ - .clock = 421875, + .clock = 1350000, /* 13.5 Gbps */ .tx = { 0xbea0, /* tx cfg0 */ 0x4800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -881,8 +895,7 @@ static const struct intel_c20pll_state mtl_c20_dp_uhbr13_5 = { }; static const struct intel_c20pll_state mtl_c20_dp_uhbr20 = { - .link_bit_rate = 2000000, /* 20 Gbps */ - .clock = 625000, + .clock = 2000000, /* 20 Gbps */ .tx = { 0xbe20, /* tx cfg0 */ 0x4800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1501,7 +1514,6 @@ static const struct intel_c10pll_state * const mtl_c10_hdmi_tables[] = { }; static const struct intel_c20pll_state mtl_c20_hdmi_25_175 = { - .link_bit_rate = 25175, .clock = 25175, .tx = { 0xbe88, /* tx cfg0 */ 0x9800, /* tx cfg1 */ @@ -1527,7 +1539,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_25_175 = { }; static const struct intel_c20pll_state mtl_c20_hdmi_27_0 = { - .link_bit_rate = 27000, .clock = 27000, .tx = { 0xbe88, /* tx cfg0 */ 0x9800, /* tx cfg1 */ @@ -1553,7 +1564,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_27_0 = { }; static const struct intel_c20pll_state mtl_c20_hdmi_74_25 = { - .link_bit_rate = 74250, .clock = 74250, .tx = { 0xbe88, /* tx cfg0 */ 0x9800, /* tx cfg1 */ @@ -1579,7 +1589,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_74_25 = { }; static const struct intel_c20pll_state mtl_c20_hdmi_148_5 = { - .link_bit_rate = 148500, .clock = 148500, .tx = { 0xbe88, /* tx cfg0 */ 0x9800, /* tx cfg1 */ @@ -1605,7 +1614,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_148_5 = { }; static const struct intel_c20pll_state mtl_c20_hdmi_594 = { - .link_bit_rate = 594000, .clock = 594000, .tx = { 0xbe88, /* tx cfg0 */ 0x9800, /* tx cfg1 */ @@ -1631,8 +1639,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_594 = { }; static const struct intel_c20pll_state mtl_c20_hdmi_300 = { - .link_bit_rate = 3000000, - .clock = 166670, + .clock = 3000000, .tx = { 0xbe98, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1657,8 +1664,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_300 = { }; static const struct intel_c20pll_state mtl_c20_hdmi_600 = { - .link_bit_rate = 6000000, - .clock = 333330, + .clock = 6000000, .tx = { 0xbe98, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1683,8 +1689,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_600 = { }; static const struct intel_c20pll_state mtl_c20_hdmi_800 = { - .link_bit_rate = 8000000, - .clock = 444440, + .clock = 8000000, .tx = { 0xbe98, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1709,8 +1714,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_800 = { }; static const struct intel_c20pll_state mtl_c20_hdmi_1000 = { - .link_bit_rate = 10000000, - .clock = 555560, + .clock = 10000000, .tx = { 0xbe98, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1735,8 +1739,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_1000 = { }; static const struct intel_c20pll_state mtl_c20_hdmi_1200 = { - .link_bit_rate = 12000000, - .clock = 666670, + .clock = 12000000, .tx = { 0xbe98, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1850,8 +1853,8 @@ static int intel_c10pll_calc_state(struct intel_crtc_state *crtc_state, return -EINVAL; } -void intel_c10pll_readout_hw_state(struct intel_encoder *encoder, - struct intel_c10pll_state *pll_state) +static void intel_c10pll_readout_hw_state(struct intel_encoder *encoder, + struct intel_c10pll_state *pll_state) { struct drm_i915_private *i915 = to_i915(encoder->base.dev); u8 lane = INTEL_CX0_LANE0; @@ -1985,7 +1988,6 @@ static int intel_c20_compute_hdmi_tmds_pll(u64 pixel_clock, struct intel_c20pll_ else mpllb_ana_freq_vco = MPLLB_ANA_FREQ_VCO_0; - pll_state->link_bit_rate = pixel_clock; pll_state->clock = pixel_clock; pll_state->tx[0] = 0xbe88; pll_state->tx[1] = 0x9800; @@ -2022,7 +2024,7 @@ static int intel_c20_phy_check_hdmi_link_rate(int clock) int i; for (i = 0; tables[i]; i++) { - if (clock == tables[i]->link_bit_rate) + if (clock == tables[i]->clock) return MODE_OK; } @@ -2074,7 +2076,7 @@ static int intel_c20pll_calc_state(struct intel_crtc_state *crtc_state, return -EINVAL; for (i = 0; tables[i]; i++) { - if (crtc_state->port_clock == tables[i]->link_bit_rate) { + if (crtc_state->port_clock == tables[i]->clock) { crtc_state->cx0pll_state.c20 = *tables[i]; return 0; } @@ -2097,14 +2099,14 @@ int intel_cx0pll_calc_state(struct intel_crtc_state *crtc_state, static bool intel_c20_use_mplla(u32 clock) { /* 10G and 20G rates use MPLLA */ - if (clock == 312500 || clock == 625000) + if (clock == 1000000 || clock == 2000000) return true; return false; } -void intel_c20pll_readout_hw_state(struct intel_encoder *encoder, - struct intel_c20pll_state *pll_state) +static void intel_c20pll_readout_hw_state(struct intel_encoder *encoder, + struct intel_c20pll_state *pll_state) { struct drm_i915_private *i915 = to_i915(encoder->base.dev); bool cntx; @@ -2200,11 +2202,11 @@ static u8 intel_c20_get_dp_rate(u32 clock) return 6; case 432000: /* 4.32 Gbps eDP */ return 7; - case 312500: /* 10 Gbps DP2.0 */ + case 1000000: /* 10 Gbps DP2.0 */ return 8; - case 421875: /* 13.5 Gbps DP2.0 */ + case 1350000: /* 13.5 Gbps DP2.0 */ return 9; - case 625000: /* 20 Gbps DP2.0*/ + case 2000000: /* 20 Gbps DP2.0 */ return 10; case 648000: /* 6.48 Gbps eDP*/ return 11; @@ -2222,13 +2224,13 @@ static u8 intel_c20_get_hdmi_rate(u32 clock) return 0; switch (clock) { - case 166670: /* 3 Gbps */ - case 333330: /* 6 Gbps */ - case 666670: /* 12 Gbps */ + case 300000: /* 3 Gbps */ + case 600000: /* 6 Gbps */ + case 1200000: /* 12 Gbps */ return 1; - case 444440: /* 8 Gbps */ + case 800000: /* 8 Gbps */ return 2; - case 555560: /* 10 Gbps */ + case 1000000: /* 10 Gbps */ return 3; default: MISSING_CASE(clock); @@ -2239,7 +2241,7 @@ static u8 intel_c20_get_hdmi_rate(u32 clock) static bool is_dp2(u32 clock) { /* DP2.0 clock rates */ - if (clock == 312500 || clock == 421875 || clock == 625000) + if (clock == 1000000 || clock == 1350000 || clock == 2000000) return true; return false; @@ -2248,11 +2250,11 @@ static bool is_dp2(u32 clock) static bool is_hdmi_frl(u32 clock) { switch (clock) { - case 166670: /* 3 Gbps */ - case 333330: /* 6 Gbps */ - case 444440: /* 8 Gbps */ - case 555560: /* 10 Gbps */ - case 666670: /* 12 Gbps */ + case 300000: /* 3 Gbps */ + case 600000: /* 6 Gbps */ + case 800000: /* 8 Gbps */ + case 1000000: /* 10 Gbps */ + case 1200000: /* 12 Gbps */ return true; default: return false; @@ -2285,6 +2287,7 @@ static void intel_c20_pll_program(struct drm_i915_private *i915, const struct intel_c20pll_state *pll_state = &crtc_state->cx0pll_state.c20; bool dp = false; int lane = crtc_state->lane_count > 2 ? INTEL_CX0_BOTH_LANES : INTEL_CX0_LANE0; + u32 clock = crtc_state->port_clock; bool cntx; int i; @@ -2323,7 +2326,7 @@ static void intel_c20_pll_program(struct drm_i915_private *i915, } /* 3.3 mpllb or mplla configuration */ - if (intel_c20_use_mplla(pll_state->clock)) { + if (intel_c20_use_mplla(clock)) { for (i = 0; i < ARRAY_SIZE(pll_state->mplla); i++) { if (cntx) intel_c20_sram_write(i915, encoder->port, INTEL_CX0_LANE0, @@ -2350,23 +2353,23 @@ static void intel_c20_pll_program(struct drm_i915_private *i915, /* 4. Program custom width to match the link protocol */ intel_cx0_rmw(i915, encoder->port, lane, PHY_C20_VDR_CUSTOM_WIDTH, PHY_C20_CUSTOM_WIDTH_MASK, - PHY_C20_CUSTOM_WIDTH(intel_get_c20_custom_width(pll_state->clock, dp)), + PHY_C20_CUSTOM_WIDTH(intel_get_c20_custom_width(clock, dp)), MB_WRITE_COMMITTED); /* 5. For DP or 6. For HDMI */ if (dp) { intel_cx0_rmw(i915, encoder->port, lane, PHY_C20_VDR_CUSTOM_SERDES_RATE, BIT(6) | PHY_C20_CUSTOM_SERDES_MASK, - BIT(6) | PHY_C20_CUSTOM_SERDES(intel_c20_get_dp_rate(pll_state->clock)), + BIT(6) | PHY_C20_CUSTOM_SERDES(intel_c20_get_dp_rate(clock)), MB_WRITE_COMMITTED); } else { intel_cx0_rmw(i915, encoder->port, lane, PHY_C20_VDR_CUSTOM_SERDES_RATE, BIT(7) | PHY_C20_CUSTOM_SERDES_MASK, - is_hdmi_frl(pll_state->clock) ? BIT(7) : 0, + is_hdmi_frl(clock) ? BIT(7) : 0, MB_WRITE_COMMITTED); intel_cx0_write(i915, encoder->port, INTEL_CX0_BOTH_LANES, PHY_C20_VDR_HDMI_RATE, - intel_c20_get_hdmi_rate(pll_state->clock), + intel_c20_get_hdmi_rate(clock), MB_WRITE_COMMITTED); } @@ -2378,8 +2381,8 @@ static void intel_c20_pll_program(struct drm_i915_private *i915, BIT(0), cntx ? 0 : 1, MB_WRITE_COMMITTED); } -int intel_c10pll_calc_port_clock(struct intel_encoder *encoder, - const struct intel_c10pll_state *pll_state) +static int intel_c10pll_calc_port_clock(struct intel_encoder *encoder, + const struct intel_c10pll_state *pll_state) { unsigned int frac_quot = 0, frac_rem = 0, frac_den = 1; unsigned int multiplier, tx_clk_div, hdmi_div, refclk = 38400; @@ -2405,8 +2408,8 @@ int intel_c10pll_calc_port_clock(struct intel_encoder *encoder, return tmpclk; } -int intel_c20pll_calc_port_clock(struct intel_encoder *encoder, - const struct intel_c20pll_state *pll_state) +static int intel_c20pll_calc_port_clock(struct intel_encoder *encoder, + const struct intel_c20pll_state *pll_state) { unsigned int frac, frac_en, frac_quot, frac_rem, frac_den; unsigned int multiplier, refclk = 38400; @@ -3004,17 +3007,110 @@ intel_mtl_port_pll_type(struct intel_encoder *encoder, return ICL_PORT_DPLL_DEFAULT; } -void intel_c10pll_state_verify(struct intel_atomic_state *state, +static void intel_c10pll_state_verify(const struct intel_crtc_state *state, + struct intel_crtc *crtc, + struct intel_encoder *encoder, + struct intel_c10pll_state *mpllb_hw_state) +{ + struct drm_i915_private *i915 = to_i915(crtc->base.dev); + const struct intel_c10pll_state *mpllb_sw_state = &state->cx0pll_state.c10; + int i; + + for (i = 0; i < ARRAY_SIZE(mpllb_sw_state->pll); i++) { + u8 expected = mpllb_sw_state->pll[i]; + + I915_STATE_WARN(i915, mpllb_hw_state->pll[i] != expected, + "[CRTC:%d:%s] mismatch in C10MPLLB: Register[%d] (expected 0x%02x, found 0x%02x)", + crtc->base.base.id, crtc->base.name, i, + expected, mpllb_hw_state->pll[i]); + } + + I915_STATE_WARN(i915, mpllb_hw_state->tx != mpllb_sw_state->tx, + "[CRTC:%d:%s] mismatch in C10MPLLB: Register TX0 (expected 0x%02x, found 0x%02x)", + crtc->base.base.id, crtc->base.name, + mpllb_sw_state->tx, mpllb_hw_state->tx); + + I915_STATE_WARN(i915, mpllb_hw_state->cmn != mpllb_sw_state->cmn, + "[CRTC:%d:%s] mismatch in C10MPLLB: Register CMN0 (expected 0x%02x, found 0x%02x)", + crtc->base.base.id, crtc->base.name, + mpllb_sw_state->cmn, mpllb_hw_state->cmn); +} + +void intel_cx0pll_readout_hw_state(struct intel_encoder *encoder, + struct intel_cx0pll_state *pll_state) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(i915, encoder->port); + + if (intel_is_c10phy(i915, phy)) + intel_c10pll_readout_hw_state(encoder, &pll_state->c10); + else + intel_c20pll_readout_hw_state(encoder, &pll_state->c20); +} + +int intel_cx0pll_calc_port_clock(struct intel_encoder *encoder, + const struct intel_cx0pll_state *pll_state) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(i915, encoder->port); + + if (intel_is_c10phy(i915, phy)) + return intel_c10pll_calc_port_clock(encoder, &pll_state->c10); + + return intel_c20pll_calc_port_clock(encoder, &pll_state->c20); +} + +static void intel_c20pll_state_verify(const struct intel_crtc_state *state, + struct intel_crtc *crtc, + struct intel_encoder *encoder, + struct intel_c20pll_state *mpll_hw_state) +{ + struct drm_i915_private *i915 = to_i915(crtc->base.dev); + const struct intel_c20pll_state *mpll_sw_state = &state->cx0pll_state.c20; + bool use_mplla; + int i; + + use_mplla = intel_c20_use_mplla(mpll_hw_state->clock); + if (use_mplla) { + for (i = 0; i < ARRAY_SIZE(mpll_sw_state->mplla); i++) { + I915_STATE_WARN(i915, mpll_hw_state->mplla[i] != mpll_sw_state->mplla[i], + "[CRTC:%d:%s] mismatch in C20MPLLA: Register[%d] (expected 0x%04x, found 0x%04x)", + crtc->base.base.id, crtc->base.name, i, + mpll_sw_state->mplla[i], mpll_hw_state->mplla[i]); + } + } else { + for (i = 0; i < ARRAY_SIZE(mpll_sw_state->mpllb); i++) { + I915_STATE_WARN(i915, mpll_hw_state->mpllb[i] != mpll_sw_state->mpllb[i], + "[CRTC:%d:%s] mismatch in C20MPLLB: Register[%d] (expected 0x%04x, found 0x%04x)", + crtc->base.base.id, crtc->base.name, i, + mpll_sw_state->mpllb[i], mpll_hw_state->mpllb[i]); + } + } + + for (i = 0; i < ARRAY_SIZE(mpll_sw_state->tx); i++) { + I915_STATE_WARN(i915, mpll_hw_state->tx[i] != mpll_sw_state->tx[i], + "[CRTC:%d:%s] mismatch in C20: Register TX[%i] (expected 0x%04x, found 0x%04x)", + crtc->base.base.id, crtc->base.name, i, + mpll_sw_state->tx[i], mpll_hw_state->tx[i]); + } + + for (i = 0; i < ARRAY_SIZE(mpll_sw_state->cmn); i++) { + I915_STATE_WARN(i915, mpll_hw_state->cmn[i] != mpll_sw_state->cmn[i], + "[CRTC:%d:%s] mismatch in C20: Register CMN[%i] (expected 0x%04x, found 0x%04x)", + crtc->base.base.id, crtc->base.name, i, + mpll_sw_state->cmn[i], mpll_hw_state->cmn[i]); + } +} + +void intel_cx0pll_state_verify(struct intel_atomic_state *state, struct intel_crtc *crtc) { struct drm_i915_private *i915 = to_i915(state->base.dev); const struct intel_crtc_state *new_crtc_state = intel_atomic_get_new_crtc_state(state, crtc); - struct intel_c10pll_state mpllb_hw_state = {}; - const struct intel_c10pll_state *mpllb_sw_state = &new_crtc_state->cx0pll_state.c10; struct intel_encoder *encoder; + struct intel_cx0pll_state mpll_hw_state = {}; enum phy phy; - int i; if (DISPLAY_VER(i915) < 14) return; @@ -3030,27 +3126,13 @@ void intel_c10pll_state_verify(struct intel_atomic_state *state, encoder = intel_get_crtc_new_encoder(state, new_crtc_state); phy = intel_port_to_phy(i915, encoder->port); - if (!intel_is_c10phy(i915, phy)) + if (intel_tc_port_in_tbt_alt_mode(enc_to_dig_port(encoder))) return; - intel_c10pll_readout_hw_state(encoder, &mpllb_hw_state); + intel_cx0pll_readout_hw_state(encoder, &mpll_hw_state); - for (i = 0; i < ARRAY_SIZE(mpllb_sw_state->pll); i++) { - u8 expected = mpllb_sw_state->pll[i]; - - I915_STATE_WARN(i915, mpllb_hw_state.pll[i] != expected, - "[CRTC:%d:%s] mismatch in C10MPLLB: Register[%d] (expected 0x%02x, found 0x%02x)", - crtc->base.base.id, crtc->base.name, i, - expected, mpllb_hw_state.pll[i]); - } - - I915_STATE_WARN(i915, mpllb_hw_state.tx != mpllb_sw_state->tx, - "[CRTC:%d:%s] mismatch in C10MPLLB: Register TX0 (expected 0x%02x, found 0x%02x)", - crtc->base.base.id, crtc->base.name, - mpllb_sw_state->tx, mpllb_hw_state.tx); - - I915_STATE_WARN(i915, mpllb_hw_state.cmn != mpllb_sw_state->cmn, - "[CRTC:%d:%s] mismatch in C10MPLLB: Register CMN0 (expected 0x%02x, found 0x%02x)", - crtc->base.base.id, crtc->base.name, - mpllb_sw_state->cmn, mpllb_hw_state.cmn); + if (intel_is_c10phy(i915, phy)) + intel_c10pll_state_verify(new_crtc_state, crtc, encoder, &mpll_hw_state.c10); + else + intel_c20pll_state_verify(new_crtc_state, crtc, encoder, &mpll_hw_state.c20); } diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.h b/drivers/gpu/drm/i915/display/intel_cx0_phy.h index 0e0a38dac8cd..c6682677253a 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.h +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.h @@ -16,6 +16,7 @@ struct drm_i915_private; struct intel_atomic_state; struct intel_c10pll_state; struct intel_c20pll_state; +struct intel_cx0pll_state; struct intel_crtc; struct intel_crtc_state; struct intel_encoder; @@ -28,20 +29,19 @@ void intel_mtl_pll_disable(struct intel_encoder *encoder); enum icl_port_dpll_id intel_mtl_port_pll_type(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); -void intel_c10pll_readout_hw_state(struct intel_encoder *encoder, struct intel_c10pll_state *pll_state); + int intel_cx0pll_calc_state(struct intel_crtc_state *crtc_state, struct intel_encoder *encoder); +void intel_cx0pll_readout_hw_state(struct intel_encoder *encoder, + struct intel_cx0pll_state *pll_state); +int intel_cx0pll_calc_port_clock(struct intel_encoder *encoder, + const struct intel_cx0pll_state *pll_state); + void intel_c10pll_dump_hw_state(struct drm_i915_private *dev_priv, const struct intel_c10pll_state *hw_state); -int intel_c10pll_calc_port_clock(struct intel_encoder *encoder, - const struct intel_c10pll_state *pll_state); -void intel_c10pll_state_verify(struct intel_atomic_state *state, +void intel_cx0pll_state_verify(struct intel_atomic_state *state, struct intel_crtc *crtc); -void intel_c20pll_readout_hw_state(struct intel_encoder *encoder, - struct intel_c20pll_state *pll_state); void intel_c20pll_dump_hw_state(struct drm_i915_private *i915, const struct intel_c20pll_state *hw_state); -int intel_c20pll_calc_port_clock(struct intel_encoder *encoder, - const struct intel_c20pll_state *pll_state); void intel_cx0_phy_set_signal_levels(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); int intel_cx0_phy_check_hdmi_link_rate(struct intel_hdmi *hdmi, int clock); diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 9151d5add960..12a29363e5df 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -25,6 +25,7 @@ * */ +#include <linux/iopoll.h> #include <linux/string_helpers.h> #include <drm/display/drm_scdc_helper.h> @@ -2210,16 +2211,87 @@ static void intel_dp_sink_set_msa_timing_par_ignore_state(struct intel_dp *intel } static void intel_dp_sink_set_fec_ready(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) + const struct intel_crtc_state *crtc_state, + bool enable) { struct drm_i915_private *i915 = dp_to_i915(intel_dp); if (!crtc_state->fec_enable) return; - if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_FEC_CONFIGURATION, DP_FEC_READY) <= 0) - drm_dbg_kms(&i915->drm, - "Failed to set FEC_READY in the sink\n"); + if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_FEC_CONFIGURATION, + enable ? DP_FEC_READY : 0) <= 0) + drm_dbg_kms(&i915->drm, "Failed to set FEC_READY to %s in the sink\n", + enable ? "enabled" : "disabled"); + + if (enable && + drm_dp_dpcd_writeb(&intel_dp->aux, DP_FEC_STATUS, + DP_FEC_DECODE_EN_DETECTED | DP_FEC_DECODE_DIS_DETECTED) <= 0) + drm_dbg_kms(&i915->drm, "Failed to clear FEC detected flags\n"); +} + +static int read_fec_detected_status(struct drm_dp_aux *aux) +{ + int ret; + u8 status; + + ret = drm_dp_dpcd_readb(aux, DP_FEC_STATUS, &status); + if (ret < 0) + return ret; + + return status; +} + +static void wait_for_fec_detected(struct drm_dp_aux *aux, bool enabled) +{ + struct drm_i915_private *i915 = to_i915(aux->drm_dev); + int mask = enabled ? DP_FEC_DECODE_EN_DETECTED : DP_FEC_DECODE_DIS_DETECTED; + int status; + int err; + + err = readx_poll_timeout(read_fec_detected_status, aux, status, + status & mask || status < 0, + 10000, 200000); + + if (!err && status >= 0) + return; + + if (err == -ETIMEDOUT) + drm_dbg_kms(&i915->drm, "Timeout waiting for FEC %s to get detected\n", + str_enabled_disabled(enabled)); + else + drm_dbg_kms(&i915->drm, "FEC detected status read error: %d\n", status); +} + +void intel_ddi_wait_for_fec_status(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + bool enabled) +{ + struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + int ret; + + if (!crtc_state->fec_enable) + return; + + if (enabled) + ret = intel_de_wait_for_set(i915, dp_tp_status_reg(encoder, crtc_state), + DP_TP_STATUS_FEC_ENABLE_LIVE, 1); + else + ret = intel_de_wait_for_clear(i915, dp_tp_status_reg(encoder, crtc_state), + DP_TP_STATUS_FEC_ENABLE_LIVE, 1); + + if (ret) + drm_err(&i915->drm, + "Timeout waiting for FEC live state to get %s\n", + str_enabled_disabled(enabled)); + + /* + * At least the Synoptics MST hub doesn't set the detected flag for + * FEC decoding disabling so skip waiting for that. + */ + if (enabled) + wait_for_fec_detected(&intel_dp->aux, enabled); } static void intel_ddi_enable_fec(struct intel_encoder *encoder, @@ -2234,8 +2306,8 @@ static void intel_ddi_enable_fec(struct intel_encoder *encoder, 0, DP_TP_CTL_FEC_ENABLE); } -static void intel_ddi_disable_fec_state(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state) +static void intel_ddi_disable_fec(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -2466,13 +2538,17 @@ static void mtl_ddi_pre_enable_dp(struct intel_atomic_state *state, intel_dp_set_power(intel_dp, DP_SET_POWER_D0); intel_dp_configure_protocol_converter(intel_dp, crtc_state); - intel_dp_sink_set_decompression_state(intel_dp, crtc_state, true); + if (!is_mst) + intel_dp_sink_enable_decompression(state, + to_intel_connector(conn_state->connector), + crtc_state); + /* * DDI FEC: "anticipates enabling FEC encoding sets the FEC_READY bit * in the FEC_CONFIGURATION register to 1 before initiating link * training */ - intel_dp_sink_set_fec_ready(intel_dp, crtc_state); + intel_dp_sink_set_fec_ready(intel_dp, crtc_state, true); intel_dp_check_frl_training(intel_dp); intel_dp_pcon_dsc_configure(intel_dp, crtc_state); @@ -2505,7 +2581,8 @@ static void mtl_ddi_pre_enable_dp(struct intel_atomic_state *state, /* 6.o Configure and enable FEC if needed */ intel_ddi_enable_fec(encoder, crtc_state); - intel_dsc_dp_pps_write(encoder, crtc_state); + if (!is_mst) + intel_dsc_dp_pps_write(encoder, crtc_state); } static void tgl_ddi_pre_enable_dp(struct intel_atomic_state *state, @@ -2616,13 +2693,16 @@ static void tgl_ddi_pre_enable_dp(struct intel_atomic_state *state, intel_dp_set_power(intel_dp, DP_SET_POWER_D0); intel_dp_configure_protocol_converter(intel_dp, crtc_state); - intel_dp_sink_set_decompression_state(intel_dp, crtc_state, true); + if (!is_mst) + intel_dp_sink_enable_decompression(state, + to_intel_connector(conn_state->connector), + crtc_state); /* * DDI FEC: "anticipates enabling FEC encoding sets the FEC_READY bit * in the FEC_CONFIGURATION register to 1 before initiating link * training */ - intel_dp_sink_set_fec_ready(intel_dp, crtc_state); + intel_dp_sink_set_fec_ready(intel_dp, crtc_state, true); intel_dp_check_frl_training(intel_dp); intel_dp_pcon_dsc_configure(intel_dp, crtc_state); @@ -2643,7 +2723,8 @@ static void tgl_ddi_pre_enable_dp(struct intel_atomic_state *state, /* 7.l Configure and enable FEC if needed */ intel_ddi_enable_fec(encoder, crtc_state); - intel_dsc_dp_pps_write(encoder, crtc_state); + if (!is_mst) + intel_dsc_dp_pps_write(encoder, crtc_state); } static void hsw_ddi_pre_enable_dp(struct intel_atomic_state *state, @@ -2695,9 +2776,11 @@ static void hsw_ddi_pre_enable_dp(struct intel_atomic_state *state, if (!is_mst) intel_dp_set_power(intel_dp, DP_SET_POWER_D0); intel_dp_configure_protocol_converter(intel_dp, crtc_state); - intel_dp_sink_set_decompression_state(intel_dp, crtc_state, - true); - intel_dp_sink_set_fec_ready(intel_dp, crtc_state); + if (!is_mst) + intel_dp_sink_enable_decompression(state, + to_intel_connector(conn_state->connector), + crtc_state); + intel_dp_sink_set_fec_ready(intel_dp, crtc_state, true); intel_dp_start_link_train(intel_dp, crtc_state); if ((port != PORT_A || DISPLAY_VER(dev_priv) >= 9) && !is_trans_port_sync_mode(crtc_state)) @@ -2705,10 +2788,10 @@ static void hsw_ddi_pre_enable_dp(struct intel_atomic_state *state, intel_ddi_enable_fec(encoder, crtc_state); - if (!is_mst) + if (!is_mst) { intel_ddi_enable_transcoder_clock(encoder, crtc_state); - - intel_dsc_dp_pps_write(encoder, crtc_state); + intel_dsc_dp_pps_write(encoder, crtc_state); + } } static void intel_ddi_pre_enable_dp(struct intel_atomic_state *state, @@ -2717,10 +2800,15 @@ static void intel_ddi_pre_enable_dp(struct intel_atomic_state *state, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - if (HAS_DP20(dev_priv)) + if (HAS_DP20(dev_priv)) { intel_dp_128b132b_sdp_crc16(enc_to_intel_dp(encoder), crtc_state); + if (crtc_state->has_panel_replay) + drm_dp_dpcd_writeb(&intel_dp->aux, PANEL_REPLAY_CONFIG, + DP_PANEL_REPLAY_ENABLE); + } if (DISPLAY_VER(dev_priv) >= 14) mtl_ddi_pre_enable_dp(state, encoder, crtc_state, conn_state); @@ -2866,8 +2954,7 @@ static void disable_ddi_buf(struct intel_encoder *encoder, intel_de_rmw(dev_priv, dp_tp_ctl_reg(encoder, crtc_state), DP_TP_CTL_ENABLE, 0); - /* Disable FEC in DP Sink */ - intel_ddi_disable_fec_state(encoder, crtc_state); + intel_ddi_disable_fec(encoder, crtc_state); if (wait) intel_wait_ddi_buf_idle(dev_priv, port); @@ -2882,10 +2969,12 @@ static void intel_disable_ddi_buf(struct intel_encoder *encoder, mtl_disable_ddi_buf(encoder, crtc_state); /* 3.f Disable DP_TP_CTL FEC Enable if it is needed */ - intel_ddi_disable_fec_state(encoder, crtc_state); + intel_ddi_disable_fec(encoder, crtc_state); } else { disable_ddi_buf(encoder, crtc_state); } + + intel_ddi_wait_for_fec_status(encoder, crtc_state, false); } static void intel_ddi_post_disable_dp(struct intel_atomic_state *state, @@ -2925,6 +3014,8 @@ static void intel_ddi_post_disable_dp(struct intel_atomic_state *state, intel_disable_ddi_buf(encoder, old_crtc_state); + intel_dp_sink_set_fec_ready(intel_dp, old_crtc_state, false); + /* * From TGL spec: "If single stream or multi-stream master transcoder: * Configure Transcoder Clock select to direct no clock to the @@ -3110,11 +3201,18 @@ static void intel_enable_ddi_dp(struct intel_atomic_state *state, if (!dig_port->lspcon.active || intel_dp_has_hdmi_sink(&dig_port->dp)) intel_dp_set_infoframes(encoder, true, crtc_state, conn_state); - intel_audio_codec_enable(encoder, crtc_state, conn_state); - trans_port_sync_stop_link_train(state, encoder, crtc_state); } +/* FIXME bad home for this function */ +i915_reg_t hsw_chicken_trans_reg(struct drm_i915_private *i915, + enum transcoder cpu_transcoder) +{ + return DISPLAY_VER(i915) >= 14 ? + MTL_CHICKEN_TRANS(cpu_transcoder) : + CHICKEN_TRANS(cpu_transcoder); +} + static i915_reg_t gen9_chicken_trans_reg_by_port(struct drm_i915_private *dev_priv, enum port port) @@ -3233,8 +3331,6 @@ static void intel_enable_ddi_hdmi(struct intel_atomic_state *state, intel_de_write(dev_priv, DDI_BUF_CTL(port), buf_ctl); intel_wait_ddi_buf_active(dev_priv, port); - - intel_audio_codec_enable(encoder, crtc_state, conn_state); } static void intel_enable_ddi(struct intel_atomic_state *state, @@ -3252,6 +3348,8 @@ static void intel_enable_ddi(struct intel_atomic_state *state, intel_enable_transcoder(crtc_state); + intel_ddi_wait_for_fec_status(encoder, crtc_state, true); + intel_crtc_vblank_on(crtc_state); if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) @@ -3259,10 +3357,8 @@ static void intel_enable_ddi(struct intel_atomic_state *state, else intel_enable_ddi_dp(state, encoder, crtc_state, conn_state); - /* Enable hdcp if it's desired */ - if (conn_state->content_protection == - DRM_MODE_CONTENT_PROTECTION_DESIRED) - intel_hdcp_enable(state, encoder, crtc_state, conn_state); + intel_hdcp_enable(state, encoder, crtc_state, conn_state); + } static void intel_disable_ddi_dp(struct intel_atomic_state *state, @@ -3271,16 +3367,16 @@ static void intel_disable_ddi_dp(struct intel_atomic_state *state, const struct drm_connector_state *old_conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct intel_connector *connector = + to_intel_connector(old_conn_state->connector); intel_dp->link_trained = false; - intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state); - intel_psr_disable(intel_dp, old_crtc_state); intel_edp_backlight_off(old_conn_state); /* Disable the decompression in DP Sink */ - intel_dp_sink_set_decompression_state(intel_dp, old_crtc_state, - false); + intel_dp_sink_disable_decompression(state, + connector, old_crtc_state); /* Disable Ignore_MSA bit in DP Sink */ intel_dp_sink_set_msa_timing_par_ignore_state(intel_dp, old_crtc_state, false); @@ -3294,8 +3390,6 @@ static void intel_disable_ddi_hdmi(struct intel_atomic_state *state, struct drm_i915_private *i915 = to_i915(encoder->base.dev); struct drm_connector *connector = old_conn_state->connector; - intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state); - if (!intel_hdmi_handle_sink_scrambling(encoder, connector, false, false)) drm_dbg_kms(&i915->drm, @@ -3578,16 +3672,42 @@ static bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, AUDIO_OUTPUT_ENABLE(cpu_transcoder); } -void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, - struct intel_crtc_state *crtc_state) +static int tgl_ddi_min_voltage_level(const struct intel_crtc_state *crtc_state) +{ + if (crtc_state->port_clock > 594000) + return 2; + else + return 0; +} + +static int jsl_ddi_min_voltage_level(const struct intel_crtc_state *crtc_state) +{ + if (crtc_state->port_clock > 594000) + return 3; + else + return 0; +} + +static int icl_ddi_min_voltage_level(const struct intel_crtc_state *crtc_state) +{ + if (crtc_state->port_clock > 594000) + return 1; + else + return 0; +} + +void intel_ddi_compute_min_voltage_level(struct intel_crtc_state *crtc_state) { - if (DISPLAY_VER(dev_priv) >= 12 && crtc_state->port_clock > 594000) - crtc_state->min_voltage_level = 2; - else if ((IS_JASPERLAKE(dev_priv) || IS_ELKHARTLAKE(dev_priv)) && - crtc_state->port_clock > 594000) - crtc_state->min_voltage_level = 3; - else if (DISPLAY_VER(dev_priv) >= 11 && crtc_state->port_clock > 594000) - crtc_state->min_voltage_level = 1; + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + + if (DISPLAY_VER(dev_priv) >= 14) + crtc_state->min_voltage_level = icl_ddi_min_voltage_level(crtc_state); + else if (DISPLAY_VER(dev_priv) >= 12) + crtc_state->min_voltage_level = tgl_ddi_min_voltage_level(crtc_state); + else if (IS_JASPERLAKE(dev_priv) || IS_ELKHARTLAKE(dev_priv)) + crtc_state->min_voltage_level = jsl_ddi_min_voltage_level(crtc_state); + else if (DISPLAY_VER(dev_priv) >= 11) + crtc_state->min_voltage_level = icl_ddi_min_voltage_level(crtc_state); } static enum transcoder bdw_transcoder_master_readout(struct drm_i915_private *dev_priv, @@ -3801,7 +3921,7 @@ static void intel_ddi_get_config(struct intel_encoder *encoder, pipe_config->lane_lat_optim_mask = bxt_ddi_phy_get_lane_lat_optim_mask(encoder); - intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); + intel_ddi_compute_min_voltage_level(pipe_config); intel_hdmi_read_gcp_infoframe(encoder, pipe_config); @@ -3854,18 +3974,13 @@ void intel_ddi_get_clock(struct intel_encoder *encoder, static void mtl_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - enum phy phy = intel_port_to_phy(i915, encoder->port); struct intel_digital_port *dig_port = enc_to_dig_port(encoder); if (intel_tc_port_in_tbt_alt_mode(dig_port)) { crtc_state->port_clock = intel_mtl_tbt_calc_port_clock(encoder); - } else if (intel_is_c10phy(i915, phy)) { - intel_c10pll_readout_hw_state(encoder, &crtc_state->cx0pll_state.c10); - crtc_state->port_clock = intel_c10pll_calc_port_clock(encoder, &crtc_state->cx0pll_state.c10); } else { - intel_c20pll_readout_hw_state(encoder, &crtc_state->cx0pll_state.c20); - crtc_state->port_clock = intel_c20pll_calc_port_clock(encoder, &crtc_state->cx0pll_state.c20); + intel_cx0pll_readout_hw_state(encoder, &crtc_state->cx0pll_state); + crtc_state->port_clock = intel_cx0pll_calc_port_clock(encoder, &crtc_state->cx0pll_state); } intel_ddi_get_config(encoder, crtc_state); @@ -4086,7 +4201,7 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder, pipe_config->lane_lat_optim_mask = bxt_ddi_phy_calc_lane_lat_optim_mask(pipe_config->lane_count); - intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); + intel_ddi_compute_min_voltage_level(pipe_config); return 0; } @@ -4844,6 +4959,8 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, encoder->post_pll_disable = intel_ddi_post_pll_disable; encoder->post_disable = intel_ddi_post_disable; encoder->update_pipe = intel_ddi_update_pipe; + encoder->audio_enable = intel_audio_codec_enable; + encoder->audio_disable = intel_audio_codec_disable; encoder->get_hw_state = intel_ddi_get_hw_state; encoder->sync_state = intel_ddi_sync_state; encoder->initial_fastset_check = intel_ddi_initial_fastset_check; diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h index 4999c0ee229b..434de7196875 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.h +++ b/drivers/gpu/drm/i915/display/intel_ddi.h @@ -27,6 +27,8 @@ i915_reg_t dp_tp_ctl_reg(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); i915_reg_t dp_tp_status_reg(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); +i915_reg_t hsw_chicken_trans_reg(struct drm_i915_private *i915, + enum transcoder cpu_transcoder); void intel_ddi_fdi_post_disable(struct intel_atomic_state *state, struct intel_encoder *intel_encoder, const struct intel_crtc_state *old_crtc_state, @@ -60,13 +62,15 @@ void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state void intel_ddi_enable_transcoder_clock(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); void intel_ddi_disable_transcoder_clock(const struct intel_crtc_state *crtc_state); +void intel_ddi_wait_for_fec_status(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + bool enabled); void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector); void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, bool state); -void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, - struct intel_crtc_state *crtc_state); +void intel_ddi_compute_min_voltage_level(struct intel_crtc_state *crtc_state); int intel_ddi_toggle_hdcp_bits(struct intel_encoder *intel_encoder, enum transcoder cpu_transcoder, bool enable, u32 hdcp_mask); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index df582ff81b45..b10aad15a63d 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -48,6 +48,7 @@ #include "g4x_dp.h" #include "g4x_hdmi.h" #include "hsw_ips.h" +#include "i915_config.h" #include "i915_drv.h" #include "i915_reg.h" #include "i915_utils.h" @@ -72,10 +73,10 @@ #include "intel_dp.h" #include "intel_dp_link_training.h" #include "intel_dp_mst.h" -#include "intel_dpio_phy.h" #include "intel_dpll.h" #include "intel_dpll_mgr.h" #include "intel_dpt.h" +#include "intel_dpt_common.h" #include "intel_drrs.h" #include "intel_dsb.h" #include "intel_dsi.h" @@ -193,12 +194,9 @@ static bool is_hdr_mode(const struct intel_crtc_state *crtc_state) static void skl_wa_827(struct drm_i915_private *dev_priv, enum pipe pipe, bool enable) { - if (enable) - intel_de_rmw(dev_priv, CLKGATE_DIS_PSL(pipe), - 0, DUPS1_GATING_DIS | DUPS2_GATING_DIS); - else - intel_de_rmw(dev_priv, CLKGATE_DIS_PSL(pipe), - DUPS1_GATING_DIS | DUPS2_GATING_DIS, 0); + intel_de_rmw(dev_priv, CLKGATE_DIS_PSL(pipe), + DUPS1_GATING_DIS | DUPS2_GATING_DIS, + enable ? DUPS1_GATING_DIS | DUPS2_GATING_DIS : 0); } /* Wa_2006604312:icl,ehl */ @@ -206,10 +204,9 @@ static void icl_wa_scalerclkgating(struct drm_i915_private *dev_priv, enum pipe pipe, bool enable) { - if (enable) - intel_de_rmw(dev_priv, CLKGATE_DIS_PSL(pipe), 0, DPFR_GATING_DIS); - else - intel_de_rmw(dev_priv, CLKGATE_DIS_PSL(pipe), DPFR_GATING_DIS, 0); + intel_de_rmw(dev_priv, CLKGATE_DIS_PSL(pipe), + DPFR_GATING_DIS, + enable ? DPFR_GATING_DIS : 0); } /* Wa_1604331009:icl,jsl,ehl */ @@ -217,7 +214,8 @@ static void icl_wa_cursorclkgating(struct drm_i915_private *dev_priv, enum pipe pipe, bool enable) { - intel_de_rmw(dev_priv, CLKGATE_DIS_PSL(pipe), CURSOR_GATING_DIS, + intel_de_rmw(dev_priv, CLKGATE_DIS_PSL(pipe), + CURSOR_GATING_DIS, enable ? CURSOR_GATING_DIS : 0); } @@ -397,7 +395,6 @@ void intel_enable_transcoder(const struct intel_crtc_state *new_crtc_state) struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = new_crtc_state->cpu_transcoder; enum pipe pipe = crtc->pipe; - i915_reg_t reg; u32 val; drm_dbg_kms(&dev_priv->drm, "enabling pipe %c\n", pipe_name(pipe)); @@ -430,16 +427,16 @@ void intel_enable_transcoder(const struct intel_crtc_state *new_crtc_state) intel_de_rmw(dev_priv, PIPE_ARB_CTL(pipe), 0, PIPE_ARB_USE_PROG_SLOTS); - reg = TRANSCONF(cpu_transcoder); - val = intel_de_read(dev_priv, reg); + val = intel_de_read(dev_priv, TRANSCONF(cpu_transcoder)); if (val & TRANSCONF_ENABLE) { /* we keep both pipes enabled on 830 */ drm_WARN_ON(&dev_priv->drm, !IS_I830(dev_priv)); return; } - intel_de_write(dev_priv, reg, val | TRANSCONF_ENABLE); - intel_de_posting_read(dev_priv, reg); + intel_de_write(dev_priv, TRANSCONF(cpu_transcoder), + val | TRANSCONF_ENABLE); + intel_de_posting_read(dev_priv, TRANSCONF(cpu_transcoder)); /* * Until the pipe starts PIPEDSL reads will return a stale value, @@ -458,7 +455,6 @@ void intel_disable_transcoder(const struct intel_crtc_state *old_crtc_state) struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder; enum pipe pipe = crtc->pipe; - i915_reg_t reg; u32 val; drm_dbg_kms(&dev_priv->drm, "disabling pipe %c\n", pipe_name(pipe)); @@ -469,8 +465,7 @@ void intel_disable_transcoder(const struct intel_crtc_state *old_crtc_state) */ assert_planes_disabled(crtc); - reg = TRANSCONF(cpu_transcoder); - val = intel_de_read(dev_priv, reg); + val = intel_de_read(dev_priv, TRANSCONF(cpu_transcoder)); if ((val & TRANSCONF_ENABLE) == 0) return; @@ -485,14 +480,12 @@ void intel_disable_transcoder(const struct intel_crtc_state *old_crtc_state) if (!IS_I830(dev_priv)) val &= ~TRANSCONF_ENABLE; - if (DISPLAY_VER(dev_priv) >= 14) - intel_de_rmw(dev_priv, MTL_CHICKEN_TRANS(cpu_transcoder), - FECSTALL_DIS_DPTSTREAM_DPTTG, 0); - else if (DISPLAY_VER(dev_priv) >= 12) - intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder), + intel_de_write(dev_priv, TRANSCONF(cpu_transcoder), val); + + if (DISPLAY_VER(dev_priv) >= 12) + intel_de_rmw(dev_priv, hsw_chicken_trans_reg(dev_priv, cpu_transcoder), FECSTALL_DIS_DPTSTREAM_DPTTG, 0); - intel_de_write(dev_priv, reg, val); if ((val & TRANSCONF_ENABLE) == 0) intel_wait_for_pipe_off(old_crtc_state); } @@ -896,6 +889,48 @@ static bool needs_async_flip_vtd_wa(const struct intel_crtc_state *crtc_state) (DISPLAY_VER(i915) == 9 || IS_BROADWELL(i915) || IS_HASWELL(i915)); } +static void intel_encoders_audio_enable(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + const struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + const struct drm_connector_state *conn_state; + struct drm_connector *conn; + int i; + + for_each_new_connector_in_state(&state->base, conn, conn_state, i) { + struct intel_encoder *encoder = + to_intel_encoder(conn_state->best_encoder); + + if (conn_state->crtc != &crtc->base) + continue; + + if (encoder->audio_enable) + encoder->audio_enable(encoder, crtc_state, conn_state); + } +} + +static void intel_encoders_audio_disable(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + const struct drm_connector_state *old_conn_state; + struct drm_connector *conn; + int i; + + for_each_old_connector_in_state(&state->base, conn, old_conn_state, i) { + struct intel_encoder *encoder = + to_intel_encoder(old_conn_state->best_encoder); + + if (old_conn_state->crtc != &crtc->base) + continue; + + if (encoder->audio_disable) + encoder->audio_disable(encoder, old_crtc_state, old_conn_state); + } +} + #define is_enabling(feature, old_crtc_state, new_crtc_state) \ ((!(old_crtc_state)->feature || intel_crtc_needs_modeset(new_crtc_state)) && \ (new_crtc_state)->feature) @@ -955,6 +990,28 @@ static bool vrr_disabling(const struct intel_crtc_state *old_crtc_state, vrr_params_changed(old_crtc_state, new_crtc_state))); } +static bool audio_enabling(const struct intel_crtc_state *old_crtc_state, + const struct intel_crtc_state *new_crtc_state) +{ + if (!new_crtc_state->hw.active) + return false; + + return is_enabling(has_audio, old_crtc_state, new_crtc_state) || + (new_crtc_state->has_audio && + memcmp(old_crtc_state->eld, new_crtc_state->eld, MAX_ELD_BYTES) != 0); +} + +static bool audio_disabling(const struct intel_crtc_state *old_crtc_state, + const struct intel_crtc_state *new_crtc_state) +{ + if (!old_crtc_state->hw.active) + return false; + + return is_disabling(has_audio, old_crtc_state, new_crtc_state) || + (old_crtc_state->has_audio && + memcmp(old_crtc_state->eld, new_crtc_state->eld, MAX_ELD_BYTES) != 0); +} + #undef is_disabling #undef is_enabling @@ -995,6 +1052,9 @@ static void intel_post_plane_update(struct intel_atomic_state *state, if (intel_crtc_needs_color_update(new_crtc_state)) intel_color_post_update(new_crtc_state); + + if (audio_enabling(old_crtc_state, new_crtc_state)) + intel_encoders_audio_enable(state, crtc); } static void intel_crtc_enable_flip_done(struct intel_atomic_state *state, @@ -1078,6 +1138,9 @@ static void intel_pre_plane_update(struct intel_atomic_state *state, intel_crtc_update_active_timings(old_crtc_state, false); } + if (audio_disabling(old_crtc_state, new_crtc_state)) + intel_encoders_audio_disable(state, crtc); + intel_drrs_deactivate(old_crtc_state); intel_psr_pre_plane_update(state, crtc); @@ -1513,12 +1576,9 @@ static void hsw_set_linetime_wm(const struct intel_crtc_state *crtc_state) static void hsw_set_frame_start_delay(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - enum transcoder transcoder = crtc_state->cpu_transcoder; - i915_reg_t reg = DISPLAY_VER(dev_priv) >= 14 ? MTL_CHICKEN_TRANS(transcoder) : - CHICKEN_TRANS(transcoder); + struct drm_i915_private *i915 = to_i915(crtc->base.dev); - intel_de_rmw(dev_priv, reg, + intel_de_rmw(i915, hsw_chicken_trans_reg(i915, crtc_state->cpu_transcoder), HSW_FRAME_START_DELAY_MASK, HSW_FRAME_START_DELAY(crtc_state->framestart_delay - 1)); } @@ -1796,31 +1856,31 @@ bool intel_phy_is_combo(struct drm_i915_private *dev_priv, enum phy phy) bool intel_phy_is_tc(struct drm_i915_private *dev_priv, enum phy phy) { + /* + * DG2's "TC1", although TC-capable output, doesn't share the same flow + * as other platforms on the display engine side and rather rely on the + * SNPS PHY, that is programmed separately + */ if (IS_DG2(dev_priv)) - /* DG2's "TC1" output uses a SNPS PHY */ return false; - else if (IS_ALDERLAKE_P(dev_priv) || DISPLAY_VER_FULL(dev_priv) == IP_VER(14, 0)) + + if (DISPLAY_VER(dev_priv) >= 13) return phy >= PHY_F && phy <= PHY_I; else if (IS_TIGERLAKE(dev_priv)) return phy >= PHY_D && phy <= PHY_I; else if (IS_ICELAKE(dev_priv)) return phy >= PHY_C && phy <= PHY_F; - else - return false; + + return false; } bool intel_phy_is_snps(struct drm_i915_private *dev_priv, enum phy phy) { - if (phy == PHY_NONE) - return false; - else if (IS_DG2(dev_priv)) - /* - * All four "combo" ports and the TC1 port (PHY E) use - * Synopsis PHYs. - */ - return phy <= PHY_E; - - return false; + /* + * For DG2, and for DG2 only, all four "combo" ports and the TC1 port + * (PHY E) use Synopsis PHYs. See intel_phy_is_tc(). + */ + return IS_DG2(dev_priv) && phy > PHY_NONE && phy <= PHY_E; } enum phy intel_port_to_phy(struct drm_i915_private *i915, enum port port) @@ -2409,15 +2469,15 @@ static void compute_m_n(u32 *ret_m, u32 *ret_n, } void -intel_link_compute_m_n(u16 bits_per_pixel, int nlanes, +intel_link_compute_m_n(u16 bits_per_pixel_x16, int nlanes, int pixel_clock, int link_clock, - struct intel_link_m_n *m_n, - bool fec_enable) + int bw_overhead, + struct intel_link_m_n *m_n) { - u32 data_clock = bits_per_pixel * pixel_clock; - - if (fec_enable) - data_clock = intel_dp_mode_to_fec_clock(data_clock); + u32 link_symbol_clock = intel_dp_link_symbol_clock(link_clock); + u32 data_m = intel_dp_effective_data_rate(pixel_clock, bits_per_pixel_x16, + bw_overhead); + u32 data_n = intel_dp_max_data_rate(link_clock, nlanes); /* * Windows/BIOS uses fixed M/N values always. Follow suit. @@ -2428,11 +2488,11 @@ intel_link_compute_m_n(u16 bits_per_pixel, int nlanes, */ m_n->tu = 64; compute_m_n(&m_n->data_m, &m_n->data_n, - data_clock, link_clock * nlanes * 8, + data_m, data_n, 0x8000000); compute_m_n(&m_n->link_m, &m_n->link_n, - pixel_clock, link_clock, + pixel_clock, link_symbol_clock, 0x80000); } @@ -2567,7 +2627,7 @@ static void intel_set_transcoder_timings(const struct intel_crtc_state *crtc_sta crtc_vblank_start = 1; } - if (DISPLAY_VER(dev_priv) > 3) + if (DISPLAY_VER(dev_priv) >= 4) intel_de_write(dev_priv, TRANS_VSYNCSHIFT(cpu_transcoder), vsyncshift); @@ -2850,67 +2910,6 @@ static void i9xx_get_pfit_config(struct intel_crtc_state *crtc_state) intel_de_read(dev_priv, PFIT_PGM_RATIOS); } -static void vlv_crtc_clock_get(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) -{ - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - enum pipe pipe = crtc->pipe; - struct dpll clock; - u32 mdiv; - int refclk = 100000; - - /* In case of DSI, DPLL will not be used */ - if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0) - return; - - vlv_dpio_get(dev_priv); - mdiv = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW3(pipe)); - vlv_dpio_put(dev_priv); - - clock.m1 = (mdiv >> DPIO_M1DIV_SHIFT) & 7; - clock.m2 = mdiv & DPIO_M2DIV_MASK; - clock.n = (mdiv >> DPIO_N_SHIFT) & 0xf; - clock.p1 = (mdiv >> DPIO_P1_SHIFT) & 7; - clock.p2 = (mdiv >> DPIO_P2_SHIFT) & 0x1f; - - pipe_config->port_clock = vlv_calc_dpll_params(refclk, &clock); -} - -static void chv_crtc_clock_get(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) -{ - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - enum pipe pipe = crtc->pipe; - enum dpio_channel port = vlv_pipe_to_channel(pipe); - struct dpll clock; - u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2, pll_dw3; - int refclk = 100000; - - /* In case of DSI, DPLL will not be used */ - if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0) - return; - - vlv_dpio_get(dev_priv); - cmn_dw13 = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW13(port)); - pll_dw0 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW0(port)); - pll_dw1 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW1(port)); - pll_dw2 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW2(port)); - pll_dw3 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW3(port)); - vlv_dpio_put(dev_priv); - - clock.m1 = (pll_dw1 & 0x7) == DPIO_CHV_M1_DIV_BY_2 ? 2 : 0; - clock.m2 = (pll_dw0 & 0xff) << 22; - if (pll_dw3 & DPIO_CHV_FRAC_DIV_EN) - clock.m2 |= pll_dw2 & 0x3fffff; - clock.n = (pll_dw1 >> DPIO_CHV_N_DIV_SHIFT) & 0xf; - clock.p1 = (cmn_dw13 >> DPIO_CHV_P1_DIV_SHIFT) & 0x7; - clock.p2 = (cmn_dw13 >> DPIO_CHV_P2_DIV_SHIFT) & 0x1f; - - pipe_config->port_clock = chv_calc_dpll_params(refclk, &clock); -} - static enum intel_output_format bdw_get_pipe_misc_output_format(struct intel_crtc *crtc) { @@ -3168,7 +3167,7 @@ static void bdw_set_pipe_misc(const struct intel_crtc_state *crtc_state) break; case 36: /* Port output 12BPC defined for ADLP+ */ - if (DISPLAY_VER(dev_priv) > 12) + if (DISPLAY_VER(dev_priv) >= 13) val |= PIPE_MISC_BPC_12_ADLP; break; default: @@ -3225,7 +3224,7 @@ int bdw_get_pipe_misc_bpp(struct intel_crtc *crtc) * MIPI DSI HW readout. */ case PIPE_MISC_BPC_12_ADLP: - if (DISPLAY_VER(dev_priv) > 12) + if (DISPLAY_VER(dev_priv) >= 13) return 36; fallthrough; default: @@ -3802,9 +3801,7 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc, } if (!transcoder_is_dsi(pipe_config->cpu_transcoder)) { - tmp = intel_de_read(dev_priv, DISPLAY_VER(dev_priv) >= 14 ? - MTL_CHICKEN_TRANS(pipe_config->cpu_transcoder) : - CHICKEN_TRANS(pipe_config->cpu_transcoder)); + tmp = intel_de_read(dev_priv, hsw_chicken_trans_reg(dev_priv, pipe_config->cpu_transcoder)); pipe_config->framestart_delay = REG_FIELD_GET(HSW_FRAME_START_DELAY_MASK, tmp) + 1; } else { @@ -3833,133 +3830,27 @@ bool intel_crtc_get_pipe_config(struct intel_crtc_state *crtc_state) return true; } -static int i9xx_pll_refclk(struct drm_device *dev, - const struct intel_crtc_state *pipe_config) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - u32 dpll = pipe_config->dpll_hw_state.dpll; - - if ((dpll & PLL_REF_INPUT_MASK) == PLLB_REF_INPUT_SPREADSPECTRUMIN) - return dev_priv->display.vbt.lvds_ssc_freq; - else if (HAS_PCH_SPLIT(dev_priv)) - return 120000; - else if (DISPLAY_VER(dev_priv) != 2) - return 96000; - else - return 48000; -} - -/* Returns the clock of the currently programmed mode of the given pipe. */ -void i9xx_crtc_clock_get(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) -{ - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - u32 dpll = pipe_config->dpll_hw_state.dpll; - u32 fp; - struct dpll clock; - int port_clock; - int refclk = i9xx_pll_refclk(dev, pipe_config); - - if ((dpll & DISPLAY_RATE_SELECT_FPA1) == 0) - fp = pipe_config->dpll_hw_state.fp0; - else - fp = pipe_config->dpll_hw_state.fp1; - - clock.m1 = (fp & FP_M1_DIV_MASK) >> FP_M1_DIV_SHIFT; - if (IS_PINEVIEW(dev_priv)) { - clock.n = ffs((fp & FP_N_PINEVIEW_DIV_MASK) >> FP_N_DIV_SHIFT) - 1; - clock.m2 = (fp & FP_M2_PINEVIEW_DIV_MASK) >> FP_M2_DIV_SHIFT; - } else { - clock.n = (fp & FP_N_DIV_MASK) >> FP_N_DIV_SHIFT; - clock.m2 = (fp & FP_M2_DIV_MASK) >> FP_M2_DIV_SHIFT; - } - - if (DISPLAY_VER(dev_priv) != 2) { - if (IS_PINEVIEW(dev_priv)) - clock.p1 = ffs((dpll & DPLL_FPA01_P1_POST_DIV_MASK_PINEVIEW) >> - DPLL_FPA01_P1_POST_DIV_SHIFT_PINEVIEW); - else - clock.p1 = ffs((dpll & DPLL_FPA01_P1_POST_DIV_MASK) >> - DPLL_FPA01_P1_POST_DIV_SHIFT); - - switch (dpll & DPLL_MODE_MASK) { - case DPLLB_MODE_DAC_SERIAL: - clock.p2 = dpll & DPLL_DAC_SERIAL_P2_CLOCK_DIV_5 ? - 5 : 10; - break; - case DPLLB_MODE_LVDS: - clock.p2 = dpll & DPLLB_LVDS_P2_CLOCK_DIV_7 ? - 7 : 14; - break; - default: - drm_dbg_kms(&dev_priv->drm, - "Unknown DPLL mode %08x in programmed " - "mode\n", (int)(dpll & DPLL_MODE_MASK)); - return; - } - - if (IS_PINEVIEW(dev_priv)) - port_clock = pnv_calc_dpll_params(refclk, &clock); - else - port_clock = i9xx_calc_dpll_params(refclk, &clock); - } else { - enum pipe lvds_pipe; - - if (IS_I85X(dev_priv) && - intel_lvds_port_enabled(dev_priv, LVDS, &lvds_pipe) && - lvds_pipe == crtc->pipe) { - u32 lvds = intel_de_read(dev_priv, LVDS); - - clock.p1 = ffs((dpll & DPLL_FPA01_P1_POST_DIV_MASK_I830_LVDS) >> - DPLL_FPA01_P1_POST_DIV_SHIFT); - - if (lvds & LVDS_CLKB_POWER_UP) - clock.p2 = 7; - else - clock.p2 = 14; - } else { - if (dpll & PLL_P1_DIVIDE_BY_TWO) - clock.p1 = 2; - else { - clock.p1 = ((dpll & DPLL_FPA01_P1_POST_DIV_MASK_I830) >> - DPLL_FPA01_P1_POST_DIV_SHIFT) + 2; - } - if (dpll & PLL_P2_DIVIDE_BY_4) - clock.p2 = 4; - else - clock.p2 = 2; - } - - port_clock = i9xx_calc_dpll_params(refclk, &clock); - } - - /* - * This value includes pixel_multiplier. We will use - * port_clock to compute adjusted_mode.crtc_clock in the - * encoder's get_config() function. - */ - pipe_config->port_clock = port_clock; -} - int intel_dotclock_calculate(int link_freq, const struct intel_link_m_n *m_n) { /* - * The calculation for the data clock is: + * The calculation for the data clock -> pixel clock is: * pixel_clock = ((m/n)*(link_clock * nr_lanes))/bpp * But we want to avoid losing precison if possible, so: * pixel_clock = ((m * link_clock * nr_lanes)/(n*bpp)) * - * and the link clock is simpler: - * link_clock = (m * link_clock) / n + * and for link freq (10kbs units) -> pixel clock it is: + * link_symbol_clock = link_freq * 10 / link_symbol_size + * pixel_clock = (m * link_symbol_clock) / n + * or for more precision: + * pixel_clock = (m * link_freq * 10) / (n * link_symbol_size) */ if (!m_n->link_n) return 0; - return DIV_ROUND_UP_ULL(mul_u32_u32(m_n->link_m, link_freq), - m_n->link_n); + return DIV_ROUND_UP_ULL(mul_u32_u32(m_n->link_m, link_freq * 10), + m_n->link_n * intel_dp_link_symbol_size(link_freq)); } int intel_crtc_dotclock(const struct intel_crtc_state *pipe_config) @@ -4691,6 +4582,7 @@ intel_modeset_pipe_config(struct intel_atomic_state *state, if (ret) return ret; + crtc_state->fec_enable = limits->force_fec_pipes & BIT(crtc->pipe); crtc_state->max_link_bpp_x16 = limits->max_bpp_x16[crtc->pipe]; if (crtc_state->pipe_bpp > to_bpp_int(crtc_state->max_link_bpp_x16)) { @@ -5031,6 +4923,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_X(name) do { \ if (current_config->name != pipe_config->name) { \ + BUILD_BUG_ON_MSG(__same_type(current_config->name, bool), \ + __stringify(name) " is bool"); \ pipe_config_mismatch(fastset, crtc, __stringify(name), \ "(expected 0x%08x, found 0x%08x)", \ current_config->name, \ @@ -5041,6 +4935,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_X_WITH_MASK(name, mask) do { \ if ((current_config->name & (mask)) != (pipe_config->name & (mask))) { \ + BUILD_BUG_ON_MSG(__same_type(current_config->name, bool), \ + __stringify(name) " is bool"); \ pipe_config_mismatch(fastset, crtc, __stringify(name), \ "(expected 0x%08x, found 0x%08x)", \ current_config->name & (mask), \ @@ -5051,6 +4947,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_I(name) do { \ if (current_config->name != pipe_config->name) { \ + BUILD_BUG_ON_MSG(__same_type(current_config->name, bool), \ + __stringify(name) " is bool"); \ pipe_config_mismatch(fastset, crtc, __stringify(name), \ "(expected %i, found %i)", \ current_config->name, \ @@ -5061,6 +4959,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_BOOL(name) do { \ if (current_config->name != pipe_config->name) { \ + BUILD_BUG_ON_MSG(!__same_type(current_config->name, bool), \ + __stringify(name) " is not bool"); \ pipe_config_mismatch(fastset, crtc, __stringify(name), \ "(expected %s, found %s)", \ str_yes_no(current_config->name), \ @@ -5069,23 +4969,6 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, } \ } while (0) -/* - * Checks state where we only read out the enabling, but not the entire - * state itself (like full infoframes or ELD for audio). These states - * require a full modeset on bootup to fix up. - */ -#define PIPE_CONF_CHECK_BOOL_INCOMPLETE(name) do { \ - if (!fixup_inherited || (!current_config->name && !pipe_config->name)) { \ - PIPE_CONF_CHECK_BOOL(name); \ - } else { \ - pipe_config_mismatch(fastset, crtc, __stringify(name), \ - "unable to verify whether state matches exactly, forcing modeset (expected %s, found %s)", \ - str_yes_no(current_config->name), \ - str_yes_no(pipe_config->name)); \ - ret = false; \ - } \ -} while (0) - #define PIPE_CONF_CHECK_P(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_mismatch(fastset, crtc, __stringify(name), \ @@ -5216,8 +5099,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_QUIRK(quirk) \ ((current_config->quirks | pipe_config->quirks) & (quirk)) - PIPE_CONF_CHECK_I(hw.enable); - PIPE_CONF_CHECK_I(hw.active); + PIPE_CONF_CHECK_BOOL(hw.enable); + PIPE_CONF_CHECK_BOOL(hw.active); PIPE_CONF_CHECK_I(cpu_transcoder); PIPE_CONF_CHECK_I(mst_master_transcoder); @@ -5273,8 +5156,10 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_BOOL(enhanced_framing); PIPE_CONF_CHECK_BOOL(fec_enable); - PIPE_CONF_CHECK_BOOL_INCOMPLETE(has_audio); - PIPE_CONF_CHECK_BUFFER(eld, MAX_ELD_BYTES); + if (!fastset) { + PIPE_CONF_CHECK_BOOL(has_audio); + PIPE_CONF_CHECK_BUFFER(eld, MAX_ELD_BYTES); + } PIPE_CONF_CHECK_X(gmch_pfit.control); /* pfit ratios are autocomputed by the hw on gen4+ */ @@ -5424,9 +5309,9 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_I(dsc.config.second_line_bpg_offset); PIPE_CONF_CHECK_I(dsc.config.nsl_bpg_offset); - PIPE_CONF_CHECK_I(dsc.compression_enable); - PIPE_CONF_CHECK_I(dsc.dsc_split); - PIPE_CONF_CHECK_I(dsc.compressed_bpp); + PIPE_CONF_CHECK_BOOL(dsc.compression_enable); + PIPE_CONF_CHECK_BOOL(dsc.dsc_split); + PIPE_CONF_CHECK_I(dsc.compressed_bpp_x16); PIPE_CONF_CHECK_BOOL(splitter.enable); PIPE_CONF_CHECK_I(splitter.link_count); @@ -5444,7 +5329,6 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #undef PIPE_CONF_CHECK_X #undef PIPE_CONF_CHECK_I #undef PIPE_CONF_CHECK_BOOL -#undef PIPE_CONF_CHECK_BOOL_INCOMPLETE #undef PIPE_CONF_CHECK_P #undef PIPE_CONF_CHECK_FLAGS #undef PIPE_CONF_CHECK_COLOR_LUT @@ -5535,6 +5419,16 @@ int intel_modeset_pipes_in_mask_early(struct intel_atomic_state *state, return 0; } +static void +intel_crtc_flag_modeset(struct intel_crtc_state *crtc_state) +{ + crtc_state->uapi.mode_changed = true; + + crtc_state->update_pipe = false; + crtc_state->update_m_n = false; + crtc_state->update_lrr = false; +} + /** * intel_modeset_all_pipes_late - force a full modeset on all pipes * @state: intel atomic state @@ -5568,9 +5462,8 @@ int intel_modeset_all_pipes_late(struct intel_atomic_state *state, if (ret) return ret; - crtc_state->update_pipe = false; - crtc_state->update_m_n = false; - crtc_state->update_lrr = false; + intel_crtc_flag_modeset(crtc_state); + crtc_state->update_planes |= crtc_state->active_planes; crtc_state->async_flip_planes = 0; crtc_state->do_async_flip = false; @@ -5683,17 +5576,17 @@ static void intel_crtc_check_fastset(const struct intel_crtc_state *old_crtc_sta else new_crtc_state->uapi.mode_changed = false; - if (intel_crtc_needs_modeset(new_crtc_state) || - intel_compare_link_m_n(&old_crtc_state->dp_m_n, + if (intel_compare_link_m_n(&old_crtc_state->dp_m_n, &new_crtc_state->dp_m_n)) new_crtc_state->update_m_n = false; - if (intel_crtc_needs_modeset(new_crtc_state) || - (old_crtc_state->hw.adjusted_mode.crtc_vtotal == new_crtc_state->hw.adjusted_mode.crtc_vtotal && + if ((old_crtc_state->hw.adjusted_mode.crtc_vtotal == new_crtc_state->hw.adjusted_mode.crtc_vtotal && old_crtc_state->hw.adjusted_mode.crtc_vblank_end == new_crtc_state->hw.adjusted_mode.crtc_vblank_end)) new_crtc_state->update_lrr = false; - if (!intel_crtc_needs_modeset(new_crtc_state)) + if (intel_crtc_needs_modeset(new_crtc_state)) + intel_crtc_flag_modeset(new_crtc_state); + else new_crtc_state->update_pipe = true; } @@ -6476,15 +6369,14 @@ int intel_atomic_check(struct drm_device *dev, if (!new_crtc_state->hw.enable || intel_crtc_needs_modeset(new_crtc_state)) continue; + if (intel_dp_mst_crtc_needs_modeset(state, crtc)) + intel_crtc_flag_modeset(new_crtc_state); + if (intel_dp_mst_is_slave_trans(new_crtc_state)) { enum transcoder master = new_crtc_state->mst_master_transcoder; - if (intel_cpu_transcoders_need_modeset(state, BIT(master))) { - new_crtc_state->uapi.mode_changed = true; - new_crtc_state->update_pipe = false; - new_crtc_state->update_m_n = false; - new_crtc_state->update_lrr = false; - } + if (intel_cpu_transcoders_need_modeset(state, BIT(master))) + intel_crtc_flag_modeset(new_crtc_state); } if (is_trans_port_sync_mode(new_crtc_state)) { @@ -6493,21 +6385,13 @@ int intel_atomic_check(struct drm_device *dev, if (new_crtc_state->master_transcoder != INVALID_TRANSCODER) trans |= BIT(new_crtc_state->master_transcoder); - if (intel_cpu_transcoders_need_modeset(state, trans)) { - new_crtc_state->uapi.mode_changed = true; - new_crtc_state->update_pipe = false; - new_crtc_state->update_m_n = false; - new_crtc_state->update_lrr = false; - } + if (intel_cpu_transcoders_need_modeset(state, trans)) + intel_crtc_flag_modeset(new_crtc_state); } if (new_crtc_state->bigjoiner_pipes) { - if (intel_pipes_need_modeset(state, new_crtc_state->bigjoiner_pipes)) { - new_crtc_state->uapi.mode_changed = true; - new_crtc_state->update_pipe = false; - new_crtc_state->update_m_n = false; - new_crtc_state->update_lrr = false; - } + if (intel_pipes_need_modeset(state, new_crtc_state->bigjoiner_pipes)) + intel_crtc_flag_modeset(new_crtc_state); } } @@ -6528,10 +6412,6 @@ int intel_atomic_check(struct drm_device *dev, goto fail; } - ret = drm_dp_mst_atomic_check(&state->base); - if (ret) - goto fail; - ret = intel_atomic_check_planes(state); if (ret) goto fail; @@ -6767,8 +6647,8 @@ static void intel_enable_crtc(struct intel_atomic_state *state, intel_crtc_enable_pipe_crc(crtc); } -static void intel_update_crtc(struct intel_atomic_state *state, - struct intel_crtc *crtc) +static void intel_pre_update_crtc(struct intel_atomic_state *state, + struct intel_crtc *crtc) { struct drm_i915_private *i915 = to_i915(state->base.dev); const struct intel_crtc_state *old_crtc_state = @@ -6810,6 +6690,15 @@ static void intel_update_crtc(struct intel_atomic_state *state, intel_color_commit_noarm(new_crtc_state); intel_crtc_planes_update_noarm(state, crtc); +} + +static void intel_update_crtc(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); /* Perform vblank evasion around commit operation */ intel_pipe_update_start(state, crtc); @@ -6838,7 +6727,7 @@ static void intel_update_crtc(struct intel_atomic_state *state, * valid pipe configuration from the BIOS we need to take care * of enabling them on the CRTC's first fastset. */ - if (intel_crtc_needs_fastset(new_crtc_state) && !modeset && + if (intel_crtc_needs_fastset(new_crtc_state) && old_crtc_state->inherited) intel_crtc_arm_fifo_underrun(crtc, new_crtc_state); } @@ -6934,6 +6823,13 @@ static void intel_commit_modeset_enables(struct intel_atomic_state *state) continue; intel_enable_crtc(state, crtc); + intel_pre_update_crtc(state, crtc); + } + + for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { + if (!new_crtc_state->hw.active) + continue; + intel_update_crtc(state, crtc); } } @@ -6971,6 +6867,15 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state) * So first lets enable all pipes that do not need a fullmodeset as * those don't have any external dependency. */ + for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { + enum pipe pipe = crtc->pipe; + + if ((update_pipes & BIT(pipe)) == 0) + continue; + + intel_pre_update_crtc(state, crtc); + } + while (update_pipes) { for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { @@ -7047,6 +6952,15 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state) if ((update_pipes & BIT(pipe)) == 0) continue; + intel_pre_update_crtc(state, crtc); + } + + for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { + enum pipe pipe = crtc->pipe; + + if ((update_pipes & BIT(pipe)) == 0) + continue; + drm_WARN_ON(&dev_priv->drm, skl_ddb_allocation_overlaps(&new_crtc_state->wm.skl.ddb, entries, I915_MAX_PIPES, pipe)); @@ -7060,49 +6974,24 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state) drm_WARN_ON(&dev_priv->drm, update_pipes); } -static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv) -{ - struct intel_atomic_state *state, *next; - struct llist_node *freed; - - freed = llist_del_all(&dev_priv->display.atomic_helper.free_list); - llist_for_each_entry_safe(state, next, freed, freed) - drm_atomic_state_put(&state->base); -} - -void intel_atomic_helper_free_state_worker(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), display.atomic_helper.free_work); - - intel_atomic_helper_free_state(dev_priv); -} - static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_state) { - struct wait_queue_entry wait_fence, wait_reset; - struct drm_i915_private *dev_priv = to_i915(intel_state->base.dev); - - init_wait_entry(&wait_fence, 0); - init_wait_entry(&wait_reset, 0); - for (;;) { - prepare_to_wait(&intel_state->commit_ready.wait, - &wait_fence, TASK_UNINTERRUPTIBLE); - prepare_to_wait(bit_waitqueue(&to_gt(dev_priv)->reset.flags, - I915_RESET_MODESET), - &wait_reset, TASK_UNINTERRUPTIBLE); - + struct drm_i915_private *i915 = to_i915(intel_state->base.dev); + struct drm_plane *plane; + struct drm_plane_state *new_plane_state; + int ret, i; - if (i915_sw_fence_done(&intel_state->commit_ready) || - test_bit(I915_RESET_MODESET, &to_gt(dev_priv)->reset.flags)) - break; + for_each_new_plane_in_state(&intel_state->base, plane, new_plane_state, i) { + if (new_plane_state->fence) { + ret = dma_fence_wait_timeout(new_plane_state->fence, false, + i915_fence_timeout(i915)); + if (ret <= 0) + break; - schedule(); + dma_fence_put(new_plane_state->fence); + new_plane_state->fence = NULL; + } } - finish_wait(&intel_state->commit_ready.wait, &wait_fence); - finish_wait(bit_waitqueue(&to_gt(dev_priv)->reset.flags, - I915_RESET_MODESET), - &wait_reset); } static void intel_atomic_cleanup_work(struct work_struct *work) @@ -7120,8 +7009,6 @@ static void intel_atomic_cleanup_work(struct work_struct *work) drm_atomic_helper_cleanup_planes(&i915->drm, &state->base); drm_atomic_helper_commit_cleanup_done(&state->base); drm_atomic_state_put(&state->base); - - intel_atomic_helper_free_state(i915); } static void intel_atomic_prepare_plane_clear_colors(struct intel_atomic_state *state) @@ -7394,32 +7281,6 @@ static void intel_atomic_commit_work(struct work_struct *work) intel_atomic_commit_tail(state); } -static int -intel_atomic_commit_ready(struct i915_sw_fence *fence, - enum i915_sw_fence_notify notify) -{ - struct intel_atomic_state *state = - container_of(fence, struct intel_atomic_state, commit_ready); - - switch (notify) { - case FENCE_COMPLETE: - /* we do blocking waits in the worker, nothing to do here */ - break; - case FENCE_FREE: - { - struct drm_i915_private *i915 = to_i915(state->base.dev); - struct intel_atomic_helper *helper = - &i915->display.atomic_helper; - - if (llist_add(&state->freed, &helper->free_list)) - queue_work(i915->unordered_wq, &helper->free_work); - break; - } - } - - return NOTIFY_DONE; -} - static void intel_atomic_track_fbs(struct intel_atomic_state *state) { struct intel_plane_state *old_plane_state, *new_plane_state; @@ -7442,10 +7303,6 @@ int intel_atomic_commit(struct drm_device *dev, struct drm_atomic_state *_state, state->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); - drm_atomic_state_get(&state->base); - i915_sw_fence_init(&state->commit_ready, - intel_atomic_commit_ready); - /* * The intel_legacy_cursor_update() fast path takes care * of avoiding the vblank waits for simple cursor @@ -7478,7 +7335,6 @@ int intel_atomic_commit(struct drm_device *dev, struct drm_atomic_state *_state, if (ret) { drm_dbg_atomic(&dev_priv->drm, "Preparing state failed with %i\n", ret); - i915_sw_fence_commit(&state->commit_ready); intel_runtime_pm_put(&dev_priv->runtime_pm, state->wakeref); return ret; } @@ -7494,8 +7350,6 @@ int intel_atomic_commit(struct drm_device *dev, struct drm_atomic_state *_state, struct intel_crtc *crtc; int i; - i915_sw_fence_commit(&state->commit_ready); - for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) intel_color_cleanup_commit(new_crtc_state); @@ -7509,7 +7363,6 @@ int intel_atomic_commit(struct drm_device *dev, struct drm_atomic_state *_state, drm_atomic_state_get(&state->base); INIT_WORK(&state->base.commit_work, intel_atomic_commit_work); - i915_sw_fence_commit(&state->commit_ready); if (nonblock && state->modeset) { queue_work(dev_priv->display.wq.modeset, &state->base.commit_work); } else if (nonblock) { @@ -7909,7 +7762,7 @@ enum drm_mode_status intel_cpu_transcoder_mode_valid(struct drm_i915_private *de * Cantiga+ cannot handle modes with a hsync front porch of 0. * WaPruneModeWithIncorrectHsyncOffset:ctg,elk,ilk,snb,ivb,vlv,hsw. */ - if ((DISPLAY_VER(dev_priv) > 4 || IS_G4X(dev_priv)) && + if ((DISPLAY_VER(dev_priv) >= 5 || IS_G4X(dev_priv)) && mode->hsync_start == mode->hdisplay) return MODE_H_ILLEGAL; diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index a05c7e2b782e..f4a0773f0fca 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -105,7 +105,6 @@ enum i9xx_plane_id { }; #define plane_name(p) ((p) + 'A') -#define sprite_name(p, s) ((p) * DISPLAY_RUNTIME_INFO(dev_priv)->num_sprites[(p)] + (s) + 'A') #define for_each_plane_id_on_crtc(__crtc, __p) \ for ((__p) = PLANE_PRIMARY; (__p) < I915_MAX_PLANES; (__p)++) \ @@ -395,8 +394,8 @@ u8 intel_calc_active_pipes(struct intel_atomic_state *state, u8 active_pipes); void intel_link_compute_m_n(u16 bpp, int nlanes, int pixel_clock, int link_clock, - struct intel_link_m_n *m_n, - bool fec_enable); + int bw_overhead, + struct intel_link_m_n *m_n); u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv, u32 pixel_format, u64 modifier); enum drm_mode_status @@ -485,8 +484,6 @@ void intel_cpu_transcoder_get_m1_n1(struct intel_crtc *crtc, void intel_cpu_transcoder_get_m2_n2(struct intel_crtc *crtc, enum transcoder cpu_transcoder, struct intel_link_m_n *m_n); -void i9xx_crtc_clock_get(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config); int intel_dotclock_calculate(int link_freq, const struct intel_link_m_n *m_n); int intel_crtc_dotclock(const struct intel_crtc_state *pipe_config); enum intel_display_power_domain intel_port_to_power_domain(struct intel_digital_port *dig_port); @@ -555,7 +552,7 @@ bool assert_port_valid(struct drm_i915_private *i915, enum port port); struct drm_device *drm = &(__i915)->drm; \ int __ret_warn_on = !!(condition); \ if (unlikely(__ret_warn_on)) \ - if (!drm_WARN(drm, i915_modparams.verbose_state_checks, format)) \ + if (!drm_WARN(drm, __i915->display.params.verbose_state_checks, format)) \ drm_err(drm, format); \ unlikely(__ret_warn_on); \ }) diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h index ccfe27630fb6..47297ed85822 100644 --- a/drivers/gpu/drm/i915/display/intel_display_core.h +++ b/drivers/gpu/drm/i915/display/intel_display_core.h @@ -19,6 +19,7 @@ #include "intel_cdclk.h" #include "intel_display_device.h" #include "intel_display_limits.h" +#include "intel_display_params.h" #include "intel_display_power.h" #include "intel_dpll_mgr.h" #include "intel_fbc.h" @@ -297,12 +298,6 @@ struct intel_display { const struct intel_audio_funcs *audio; } funcs; - /* Grouping using anonymous structs. Keep sorted. */ - struct intel_atomic_helper { - struct llist_head free_list; - struct work_struct free_work; - } atomic_helper; - struct { /* backlight registers and fields in struct intel_panel */ struct mutex lock; @@ -348,15 +343,6 @@ struct intel_display { } dbuf; struct { - wait_queue_head_t waitqueue; - - /* mutex to protect pmdemand programming sequence */ - struct mutex lock; - - struct intel_global_obj obj; - } pmdemand; - - struct { /* * dkl.phy_lock protects against concurrent access of the * Dekel TypeC PHYs. @@ -444,6 +430,15 @@ struct intel_display { } ips; struct { + wait_queue_head_t waitqueue; + + /* mutex to protect pmdemand programming sequence */ + struct mutex lock; + + struct intel_global_obj obj; + } pmdemand; + + struct { struct i915_power_domains domains; /* Shadow for DISPLAY_PHY_CONTROL which can't be safely read */ @@ -520,6 +515,7 @@ struct intel_display { struct intel_hotplug hotplug; struct intel_opregion opregion; struct intel_overlay *overlay; + struct intel_display_params params; struct intel_vbt_data vbt; struct intel_wm wm; }; diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 2836826f8c05..d951edb36687 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -17,6 +17,7 @@ #include "intel_de.h" #include "intel_crtc_state_dump.h" #include "intel_display_debugfs.h" +#include "intel_display_debugfs_params.h" #include "intel_display_power.h" #include "intel_display_power_well.h" #include "intel_display_types.h" @@ -641,6 +642,17 @@ static int i915_display_info(struct seq_file *m, void *unused) return 0; } +static int i915_display_capabilities(struct seq_file *m, void *unused) +{ + struct drm_i915_private *i915 = node_to_i915(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + intel_display_device_info_print(DISPLAY_INFO(i915), + DISPLAY_RUNTIME_INFO(i915), &p); + + return 0; +} + static int i915_shared_dplls_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -1059,6 +1071,7 @@ static const struct drm_info_list intel_display_debugfs_list[] = { {"i915_gem_framebuffer", i915_gem_framebuffer_info, 0}, {"i915_power_domain_info", i915_power_domain_info, 0}, {"i915_display_info", i915_display_info, 0}, + {"i915_display_capabilities", i915_display_capabilities, 0}, {"i915_shared_dplls_info", i915_shared_dplls_info, 0}, {"i915_dp_mst_info", i915_dp_mst_info, 0}, {"i915_ddb_info", i915_ddb_info, 0}, @@ -1082,7 +1095,7 @@ void intel_display_debugfs_register(struct drm_i915_private *i915) for (i = 0; i < ARRAY_SIZE(intel_display_debugfs_files); i++) { debugfs_create_file(intel_display_debugfs_files[i].name, - S_IRUGO | S_IWUSR, + 0644, minor->debugfs_root, to_i915(minor->dev), intel_display_debugfs_files[i].fops); @@ -1098,15 +1111,15 @@ void intel_display_debugfs_register(struct drm_i915_private *i915) intel_hpd_debugfs_register(i915); intel_psr_debugfs_register(i915); intel_wm_debugfs_register(i915); + intel_display_debugfs_params(i915); } static int i915_panel_show(struct seq_file *m, void *data) { - struct drm_connector *connector = m->private; - struct intel_dp *intel_dp = - intel_attached_dp(to_intel_connector(connector)); + struct intel_connector *connector = m->private; + struct intel_dp *intel_dp = intel_attached_dp(connector); - if (connector->status != connector_status_connected) + if (connector->base.status != connector_status_connected) return -ENODEV; seq_printf(m, "Panel power up delay: %d\n", @@ -1124,23 +1137,23 @@ DEFINE_SHOW_ATTRIBUTE(i915_panel); static int i915_hdcp_sink_capability_show(struct seq_file *m, void *data) { - struct drm_connector *connector = m->private; - struct drm_i915_private *i915 = to_i915(connector->dev); - struct intel_connector *intel_connector = to_intel_connector(connector); + struct intel_connector *connector = m->private; + struct drm_i915_private *i915 = to_i915(connector->base.dev); int ret; ret = drm_modeset_lock_single_interruptible(&i915->drm.mode_config.connection_mutex); if (ret) return ret; - if (!connector->encoder || connector->status != connector_status_connected) { + if (!connector->base.encoder || + connector->base.status != connector_status_connected) { ret = -ENODEV; goto out; } - seq_printf(m, "%s:%d HDCP version: ", connector->name, - connector->base.id); - intel_hdcp_info(m, intel_connector); + seq_printf(m, "%s:%d HDCP version: ", connector->base.name, + connector->base.base.id); + intel_hdcp_info(m, connector); out: drm_modeset_unlock(&i915->drm.mode_config.connection_mutex); @@ -1151,16 +1164,16 @@ DEFINE_SHOW_ATTRIBUTE(i915_hdcp_sink_capability); static int i915_lpsp_capability_show(struct seq_file *m, void *data) { - struct drm_connector *connector = m->private; - struct drm_i915_private *i915 = to_i915(connector->dev); - struct intel_encoder *encoder; + struct intel_connector *connector = m->private; + struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_encoder *encoder = intel_attached_encoder(connector); + int connector_type = connector->base.connector_type; bool lpsp_capable = false; - encoder = intel_attached_encoder(to_intel_connector(connector)); if (!encoder) return -ENODEV; - if (connector->status != connector_status_connected) + if (connector->base.status != connector_status_connected) return -ENODEV; if (DISPLAY_VER(i915) >= 13) @@ -1173,15 +1186,15 @@ static int i915_lpsp_capability_show(struct seq_file *m, void *data) */ lpsp_capable = encoder->port <= PORT_B; else if (DISPLAY_VER(i915) == 11) - lpsp_capable = (connector->connector_type == DRM_MODE_CONNECTOR_DSI || - connector->connector_type == DRM_MODE_CONNECTOR_eDP); + lpsp_capable = (connector_type == DRM_MODE_CONNECTOR_DSI || + connector_type == DRM_MODE_CONNECTOR_eDP); else if (IS_DISPLAY_VER(i915, 9, 10)) lpsp_capable = (encoder->port == PORT_A && - (connector->connector_type == DRM_MODE_CONNECTOR_DSI || - connector->connector_type == DRM_MODE_CONNECTOR_eDP || - connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort)); + (connector_type == DRM_MODE_CONNECTOR_DSI || + connector_type == DRM_MODE_CONNECTOR_eDP || + connector_type == DRM_MODE_CONNECTOR_DisplayPort)); else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) - lpsp_capable = connector->connector_type == DRM_MODE_CONNECTOR_eDP; + lpsp_capable = connector_type == DRM_MODE_CONNECTOR_eDP; seq_printf(m, "LPSP: %s\n", lpsp_capable ? "capable" : "incapable"); @@ -1191,7 +1204,7 @@ DEFINE_SHOW_ATTRIBUTE(i915_lpsp_capability); static int i915_dsc_fec_support_show(struct seq_file *m, void *data) { - struct intel_connector *connector = to_intel_connector(m->private); + struct intel_connector *connector = m->private; struct drm_i915_private *i915 = to_i915(connector->base.dev); struct drm_crtc *crtc; struct intel_dp *intel_dp; @@ -1242,6 +1255,8 @@ static int i915_dsc_fec_support_show(struct seq_file *m, void *data) DP_DSC_YCbCr420_Native)), str_yes_no(drm_dp_dsc_sink_supports_format(connector->dp.dsc_dpcd, DP_DSC_YCbCr444))); + seq_printf(m, "DSC_Sink_BPP_Precision: %d\n", + drm_dp_dsc_sink_bpp_incr(connector->dp.dsc_dpcd)); seq_printf(m, "Force_DSC_Enable: %s\n", str_yes_no(intel_dp->force_dsc_en)); if (!intel_dp_is_edp(intel_dp)) @@ -1259,13 +1274,13 @@ static ssize_t i915_dsc_fec_support_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) { + struct seq_file *m = file->private_data; + struct intel_connector *connector = m->private; + struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_encoder *encoder = intel_attached_encoder(connector); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); bool dsc_enable = false; int ret; - struct drm_connector *connector = - ((struct seq_file *)file->private_data)->private; - struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); if (len == 0) return 0; @@ -1303,22 +1318,22 @@ static const struct file_operations i915_dsc_fec_support_fops = { static int i915_dsc_bpc_show(struct seq_file *m, void *data) { - struct drm_connector *connector = m->private; - struct drm_device *dev = connector->dev; + struct intel_connector *connector = m->private; + struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_encoder *encoder = intel_attached_encoder(connector); struct drm_crtc *crtc; struct intel_crtc_state *crtc_state; - struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); int ret; if (!encoder) return -ENODEV; - ret = drm_modeset_lock_single_interruptible(&dev->mode_config.connection_mutex); + ret = drm_modeset_lock_single_interruptible(&i915->drm.mode_config.connection_mutex); if (ret) return ret; - crtc = connector->state->crtc; - if (connector->status != connector_status_connected || !crtc) { + crtc = connector->base.state->crtc; + if (connector->base.status != connector_status_connected || !crtc) { ret = -ENODEV; goto out; } @@ -1326,7 +1341,7 @@ static int i915_dsc_bpc_show(struct seq_file *m, void *data) crtc_state = to_intel_crtc_state(crtc->state); seq_printf(m, "Input_BPC: %d\n", crtc_state->dsc.config.bits_per_component); -out: drm_modeset_unlock(&dev->mode_config.connection_mutex); +out: drm_modeset_unlock(&i915->drm.mode_config.connection_mutex); return ret; } @@ -1335,9 +1350,9 @@ static ssize_t i915_dsc_bpc_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) { - struct drm_connector *connector = - ((struct seq_file *)file->private_data)->private; - struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); + struct seq_file *m = file->private_data; + struct intel_connector *connector = m->private; + struct intel_encoder *encoder = intel_attached_encoder(connector); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); int dsc_bpc = 0; int ret; @@ -1369,22 +1384,22 @@ static const struct file_operations i915_dsc_bpc_fops = { static int i915_dsc_output_format_show(struct seq_file *m, void *data) { - struct drm_connector *connector = m->private; - struct drm_device *dev = connector->dev; + struct intel_connector *connector = m->private; + struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_encoder *encoder = intel_attached_encoder(connector); struct drm_crtc *crtc; struct intel_crtc_state *crtc_state; - struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); int ret; if (!encoder) return -ENODEV; - ret = drm_modeset_lock_single_interruptible(&dev->mode_config.connection_mutex); + ret = drm_modeset_lock_single_interruptible(&i915->drm.mode_config.connection_mutex); if (ret) return ret; - crtc = connector->state->crtc; - if (connector->status != connector_status_connected || !crtc) { + crtc = connector->base.state->crtc; + if (connector->base.status != connector_status_connected || !crtc) { ret = -ENODEV; goto out; } @@ -1393,7 +1408,7 @@ static int i915_dsc_output_format_show(struct seq_file *m, void *data) seq_printf(m, "DSC_Output_Format: %s\n", intel_output_format_name(crtc_state->output_format)); -out: drm_modeset_unlock(&dev->mode_config.connection_mutex); +out: drm_modeset_unlock(&i915->drm.mode_config.connection_mutex); return ret; } @@ -1402,9 +1417,9 @@ static ssize_t i915_dsc_output_format_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) { - struct drm_connector *connector = - ((struct seq_file *)file->private_data)->private; - struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); + struct seq_file *m = file->private_data; + struct intel_connector *connector = m->private; + struct intel_encoder *encoder = intel_attached_encoder(connector); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); int dsc_output_format = 0; int ret; @@ -1434,6 +1449,84 @@ static const struct file_operations i915_dsc_output_format_fops = { .write = i915_dsc_output_format_write }; +static int i915_dsc_fractional_bpp_show(struct seq_file *m, void *data) +{ + struct intel_connector *connector = m->private; + struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_encoder *encoder = intel_attached_encoder(connector); + struct drm_crtc *crtc; + struct intel_dp *intel_dp; + int ret; + + if (!encoder) + return -ENODEV; + + ret = drm_modeset_lock_single_interruptible(&i915->drm.mode_config.connection_mutex); + if (ret) + return ret; + + crtc = connector->base.state->crtc; + if (connector->base.status != connector_status_connected || !crtc) { + ret = -ENODEV; + goto out; + } + + intel_dp = intel_attached_dp(connector); + seq_printf(m, "Force_DSC_Fractional_BPP_Enable: %s\n", + str_yes_no(intel_dp->force_dsc_fractional_bpp_en)); + +out: + drm_modeset_unlock(&i915->drm.mode_config.connection_mutex); + + return ret; +} + +static ssize_t i915_dsc_fractional_bpp_write(struct file *file, + const char __user *ubuf, + size_t len, loff_t *offp) +{ + struct seq_file *m = file->private_data; + struct intel_connector *connector = m->private; + struct intel_encoder *encoder = intel_attached_encoder(connector); + struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + bool dsc_fractional_bpp_enable = false; + int ret; + + if (len == 0) + return 0; + + drm_dbg(&i915->drm, + "Copied %zu bytes from user to force fractional bpp for DSC\n", len); + + ret = kstrtobool_from_user(ubuf, len, &dsc_fractional_bpp_enable); + if (ret < 0) + return ret; + + drm_dbg(&i915->drm, "Got %s for DSC Fractional BPP Enable\n", + (dsc_fractional_bpp_enable) ? "true" : "false"); + intel_dp->force_dsc_fractional_bpp_en = dsc_fractional_bpp_enable; + + *offp += len; + + return len; +} + +static int i915_dsc_fractional_bpp_open(struct inode *inode, + struct file *file) +{ + return single_open(file, i915_dsc_fractional_bpp_show, inode->i_private); +} + +static const struct file_operations i915_dsc_fractional_bpp_fops = { + .owner = THIS_MODULE, + .open = i915_dsc_fractional_bpp_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = i915_dsc_fractional_bpp_write +}; + /* * Returns the Current CRTC's bpc. * Example usage: cat /sys/kernel/debug/dri/0/crtc-0/i915_current_bpc @@ -1470,39 +1563,38 @@ DEFINE_SHOW_ATTRIBUTE(intel_crtc_pipe); /** * intel_connector_debugfs_add - add i915 specific connector debugfs files - * @intel_connector: pointer to a registered drm_connector + * @connector: pointer to a registered intel_connector * * Cleanup will be done by drm_connector_unregister() through a call to * drm_debugfs_connector_remove(). */ -void intel_connector_debugfs_add(struct intel_connector *intel_connector) +void intel_connector_debugfs_add(struct intel_connector *connector) { - struct drm_connector *connector = &intel_connector->base; - struct dentry *root = connector->debugfs_entry; - struct drm_i915_private *dev_priv = to_i915(connector->dev); + struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct dentry *root = connector->base.debugfs_entry; + int connector_type = connector->base.connector_type; /* The connector must have been registered beforehands. */ if (!root) return; - intel_drrs_connector_debugfs_add(intel_connector); - intel_psr_connector_debugfs_add(intel_connector); + intel_drrs_connector_debugfs_add(connector); + intel_psr_connector_debugfs_add(connector); - if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) - debugfs_create_file("i915_panel_timings", S_IRUGO, root, + if (connector_type == DRM_MODE_CONNECTOR_eDP) + debugfs_create_file("i915_panel_timings", 0444, root, connector, &i915_panel_fops); - if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort || - connector->connector_type == DRM_MODE_CONNECTOR_HDMIA || - connector->connector_type == DRM_MODE_CONNECTOR_HDMIB) { - debugfs_create_file("i915_hdcp_sink_capability", S_IRUGO, root, + if (connector_type == DRM_MODE_CONNECTOR_DisplayPort || + connector_type == DRM_MODE_CONNECTOR_HDMIA || + connector_type == DRM_MODE_CONNECTOR_HDMIB) { + debugfs_create_file("i915_hdcp_sink_capability", 0444, root, connector, &i915_hdcp_sink_capability_fops); } - if (DISPLAY_VER(dev_priv) >= 11 && - ((connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort && - !to_intel_connector(connector)->mst_port) || - connector->connector_type == DRM_MODE_CONNECTOR_eDP)) { + if (DISPLAY_VER(i915) >= 11 && + ((connector_type == DRM_MODE_CONNECTOR_DisplayPort && !connector->mst_port) || + connector_type == DRM_MODE_CONNECTOR_eDP)) { debugfs_create_file("i915_dsc_fec_support", 0644, root, connector, &i915_dsc_fec_support_fops); @@ -1511,13 +1603,16 @@ void intel_connector_debugfs_add(struct intel_connector *intel_connector) debugfs_create_file("i915_dsc_output_format", 0644, root, connector, &i915_dsc_output_format_fops); + + debugfs_create_file("i915_dsc_fractional_bpp", 0644, root, + connector, &i915_dsc_fractional_bpp_fops); } - if (connector->connector_type == DRM_MODE_CONNECTOR_DSI || - connector->connector_type == DRM_MODE_CONNECTOR_eDP || - connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort || - connector->connector_type == DRM_MODE_CONNECTOR_HDMIA || - connector->connector_type == DRM_MODE_CONNECTOR_HDMIB) + if (connector_type == DRM_MODE_CONNECTOR_DSI || + connector_type == DRM_MODE_CONNECTOR_eDP || + connector_type == DRM_MODE_CONNECTOR_DisplayPort || + connector_type == DRM_MODE_CONNECTOR_HDMIA || + connector_type == DRM_MODE_CONNECTOR_HDMIB) debugfs_create_file("i915_lpsp_capability", 0444, root, connector, &i915_lpsp_capability_fops); } diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c new file mode 100644 index 000000000000..b7e68eb62452 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <linux/kernel.h> + +#include <drm/drm_drv.h> + +#include "intel_display_debugfs_params.h" +#include "i915_drv.h" +#include "intel_display_params.h" + +/* int param */ +static int intel_display_param_int_show(struct seq_file *m, void *data) +{ + int *value = m->private; + + seq_printf(m, "%d\n", *value); + + return 0; +} + +static int intel_display_param_int_open(struct inode *inode, struct file *file) +{ + return single_open(file, intel_display_param_int_show, inode->i_private); +} + +static ssize_t intel_display_param_int_write(struct file *file, + const char __user *ubuf, size_t len, + loff_t *offp) +{ + struct seq_file *m = file->private_data; + int *value = m->private; + int ret; + + ret = kstrtoint_from_user(ubuf, len, 0, value); + if (ret) { + /* support boolean values too */ + bool b; + + ret = kstrtobool_from_user(ubuf, len, &b); + if (!ret) + *value = b; + } + + return ret ?: len; +} + +static const struct file_operations intel_display_param_int_fops = { + .owner = THIS_MODULE, + .open = intel_display_param_int_open, + .read = seq_read, + .write = intel_display_param_int_write, + .llseek = default_llseek, + .release = single_release, +}; + +static const struct file_operations intel_display_param_int_fops_ro = { + .owner = THIS_MODULE, + .open = intel_display_param_int_open, + .read = seq_read, + .llseek = default_llseek, + .release = single_release, +}; + +/* unsigned int param */ +static int intel_display_param_uint_show(struct seq_file *m, void *data) +{ + unsigned int *value = m->private; + + seq_printf(m, "%u\n", *value); + + return 0; +} + +static int intel_display_param_uint_open(struct inode *inode, struct file *file) +{ + return single_open(file, intel_display_param_uint_show, inode->i_private); +} + +static ssize_t intel_display_param_uint_write(struct file *file, + const char __user *ubuf, size_t len, + loff_t *offp) +{ + struct seq_file *m = file->private_data; + unsigned int *value = m->private; + int ret; + + ret = kstrtouint_from_user(ubuf, len, 0, value); + if (ret) { + /* support boolean values too */ + bool b; + + ret = kstrtobool_from_user(ubuf, len, &b); + if (!ret) + *value = b; + } + + return ret ?: len; +} + +static const struct file_operations intel_display_param_uint_fops = { + .owner = THIS_MODULE, + .open = intel_display_param_uint_open, + .read = seq_read, + .write = intel_display_param_uint_write, + .llseek = default_llseek, + .release = single_release, +}; + +static const struct file_operations intel_display_param_uint_fops_ro = { + .owner = THIS_MODULE, + .open = intel_display_param_uint_open, + .read = seq_read, + .llseek = default_llseek, + .release = single_release, +}; + +#define RO(mode) (((mode) & 0222) == 0) + +__maybe_unused static struct dentry * +intel_display_debugfs_create_int(const char *name, umode_t mode, + struct dentry *parent, int *value) +{ + return debugfs_create_file_unsafe(name, mode, parent, value, + RO(mode) ? &intel_display_param_int_fops_ro : + &intel_display_param_int_fops); +} + +__maybe_unused static struct dentry * +intel_display_debugfs_create_uint(const char *name, umode_t mode, + struct dentry *parent, unsigned int *value) +{ + return debugfs_create_file_unsafe(name, mode, parent, value, + RO(mode) ? &intel_display_param_uint_fops_ro : + &intel_display_param_uint_fops); +} + +#define _intel_display_param_create_file(parent, name, mode, valp) \ + do { \ + if (mode) \ + _Generic(valp, \ + bool * : debugfs_create_bool, \ + int * : intel_display_debugfs_create_int, \ + unsigned int * : intel_display_debugfs_create_uint, \ + unsigned long * : debugfs_create_ulong, \ + char ** : debugfs_create_str) \ + (name, mode, parent, valp); \ + } while (0) + +/* add a subdirectory with files for each intel display param */ +void intel_display_debugfs_params(struct drm_i915_private *i915) +{ + struct drm_minor *minor = i915->drm.primary; + struct dentry *dir; + char dirname[16]; + + snprintf(dirname, sizeof(dirname), "%s_params", i915->drm.driver->name); + dir = debugfs_lookup(dirname, minor->debugfs_root); + if (!dir) + dir = debugfs_create_dir(dirname, minor->debugfs_root); + if (IS_ERR(dir)) + return; + + /* + * Note: We could create files for params needing special handling + * here. Set mode in params to 0 to skip the generic create file, or + * just let the generic create file fail silently with -EEXIST. + */ + +#define REGISTER(T, x, unused, mode, ...) _intel_display_param_create_file( \ + dir, #x, mode, &i915->display.params.x); + INTEL_DISPLAY_PARAMS_FOR_EACH(REGISTER); +#undef REGISTER +} diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs_params.h b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.h new file mode 100644 index 000000000000..1e9945a4044c --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __INTEL_DISPLAY_DEBUGFS_PARAMS__ +#define __INTEL_DISPLAY_DEBUGFS_PARAMS__ + +struct drm_i915_private; + +void intel_display_debugfs_params(struct drm_i915_private *i915); + +#endif /* __INTEL_DISPLAY_DEBUGFS_PARAMS__ */ diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c index 2b1ec23ba9c3..0b522c6a8d6f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.c +++ b/drivers/gpu/drm/i915/display/intel_display_device.c @@ -12,6 +12,7 @@ #include "intel_de.h" #include "intel_display.h" #include "intel_display_device.h" +#include "intel_display_params.h" #include "intel_display_power.h" #include "intel_display_reg_defs.h" #include "intel_fbc.h" @@ -937,6 +938,13 @@ void intel_display_device_probe(struct drm_i915_private *i915) DISPLAY_RUNTIME_INFO(i915)->ip.rel = rel; DISPLAY_RUNTIME_INFO(i915)->ip.step = step; } + + intel_display_params_copy(&i915->display.params); +} + +void intel_display_device_remove(struct drm_i915_private *i915) +{ + intel_display_params_free(&i915->display.params); } static void __intel_display_device_info_runtime_init(struct drm_i915_private *i915) @@ -1105,7 +1113,7 @@ void intel_display_device_info_runtime_init(struct drm_i915_private *i915) } /* Disable nuclear pageflip by default on pre-g4x */ - if (!i915->params.nuclear_pageflip && + if (!i915->display.params.nuclear_pageflip && DISPLAY_VER(i915) < 5 && !IS_G4X(i915)) i915->drm.driver_features &= ~DRIVER_ATOMIC; } @@ -1145,5 +1153,6 @@ bool intel_display_device_enabled(struct drm_i915_private *i915) /* Only valid when HAS_DISPLAY() is true */ drm_WARN_ON(&i915->drm, !HAS_DISPLAY(i915)); - return !i915->params.disable_display && !intel_opregion_headless_sku(i915); + return !i915->display.params.disable_display && + !intel_opregion_headless_sku(i915); } diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index 5b5c0e53307f..fe4268813786 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -36,7 +36,7 @@ struct drm_printer; #define HAS_ASYNC_FLIPS(i915) (DISPLAY_VER(i915) >= 5) #define HAS_CDCLK_CRAWL(i915) (DISPLAY_INFO(i915)->has_cdclk_crawl) #define HAS_CDCLK_SQUASH(i915) (DISPLAY_INFO(i915)->has_cdclk_squash) -#define HAS_CUR_FBC(i915) (!HAS_GMCH(i915) && DISPLAY_VER(i915) >= 7) +#define HAS_CUR_FBC(i915) (!HAS_GMCH(i915) && IS_DISPLAY_VER(i915, 7, 13)) #define HAS_D12_PLANE_MINIMIZATION(i915) (IS_ROCKETLAKE(i915) || IS_ALDERLAKE_S(i915)) #define HAS_DDI(i915) (DISPLAY_INFO(i915)->has_ddi) #define HAS_DISPLAY(i915) (DISPLAY_RUNTIME_INFO(i915)->pipe_mask != 0) @@ -49,7 +49,7 @@ struct drm_printer; #define HAS_DSC(__i915) (DISPLAY_RUNTIME_INFO(__i915)->has_dsc) #define HAS_FBC(i915) (DISPLAY_RUNTIME_INFO(i915)->fbc_mask != 0) #define HAS_FPGA_DBG_UNCLAIMED(i915) (DISPLAY_INFO(i915)->has_fpga_dbg) -#define HAS_FW_BLC(i915) (DISPLAY_VER(i915) > 2) +#define HAS_FW_BLC(i915) (DISPLAY_VER(i915) >= 3) #define HAS_GMBUS_IRQ(i915) (DISPLAY_VER(i915) >= 4) #define HAS_GMBUS_BURST_READ(i915) (DISPLAY_VER(i915) >= 10 || IS_KABYLAKE(i915)) #define HAS_GMCH(i915) (DISPLAY_INFO(i915)->has_gmch) @@ -161,6 +161,7 @@ struct intel_display_device_info { bool intel_display_device_enabled(struct drm_i915_private *i915); void intel_display_device_probe(struct drm_i915_private *i915); +void intel_display_device_remove(struct drm_i915_private *i915); void intel_display_device_info_runtime_init(struct drm_i915_private *i915); void intel_display_device_info_print(const struct intel_display_device_info *info, diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index 44b59ac301e6..9df9097a0255 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -181,6 +181,13 @@ void intel_display_driver_early_probe(struct drm_i915_private *i915) if (!HAS_DISPLAY(i915)) return; + spin_lock_init(&i915->display.fb_tracking.lock); + mutex_init(&i915->display.backlight.lock); + mutex_init(&i915->display.audio.mutex); + mutex_init(&i915->display.wm.wm_mutex); + mutex_init(&i915->display.pps.mutex); + mutex_init(&i915->display.hdcp.hdcp_mutex); + intel_display_irq_init(i915); intel_dkl_phy_init(i915); intel_color_init_hooks(i915); @@ -252,10 +259,6 @@ int intel_display_driver_probe_noirq(struct drm_i915_private *i915) if (ret) goto cleanup_vga_client_pw_domain_dmc; - init_llist_head(&i915->display.atomic_helper.free_list); - INIT_WORK(&i915->display.atomic_helper.free_work, - intel_atomic_helper_free_state_worker); - intel_init_quirks(i915); intel_fbc_init(i915); @@ -423,9 +426,6 @@ void intel_display_driver_remove(struct drm_i915_private *i915) flush_workqueue(i915->display.wq.flip); flush_workqueue(i915->display.wq.modeset); - flush_work(&i915->display.atomic_helper.free_work); - drm_WARN_ON(&i915->drm, !llist_empty(&i915->display.atomic_helper.free_list)); - /* * MST topology needs to be suspended so we don't have any calls to * fbdev after it's finalized. MST will be destroyed later as part of diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index bff4a76310c0..a7d8f3fc98de 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -340,18 +340,15 @@ static void flip_done_handler(struct drm_i915_private *i915, enum pipe pipe) { struct intel_crtc *crtc = intel_crtc_for_pipe(i915, pipe); - struct drm_crtc_state *crtc_state = crtc->base.state; - struct drm_pending_vblank_event *e = crtc_state->event; - struct drm_device *dev = &i915->drm; - unsigned long irqflags; - - spin_lock_irqsave(&dev->event_lock, irqflags); - crtc_state->event = NULL; + spin_lock(&i915->drm.event_lock); - drm_crtc_send_vblank_event(&crtc->base, e); + if (crtc->flip_done_event) { + drm_crtc_send_vblank_event(&crtc->base, crtc->flip_done_event); + crtc->flip_done_event = NULL; + } - spin_unlock_irqrestore(&dev->event_lock, irqflags); + spin_unlock(&i915->drm.event_lock); } static void hsw_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, @@ -896,7 +893,7 @@ gen8_de_misc_irq_handler(struct drm_i915_private *dev_priv, u32 iir) } if (!found) - drm_err(&dev_priv->drm, "Unexpected DE Misc interrupt\n"); + drm_err(&dev_priv->drm, "Unexpected DE Misc interrupt: 0x%08x\n", iir); } static void gen11_dsi_te_interrupt_handler(struct drm_i915_private *dev_priv, @@ -1653,7 +1650,7 @@ void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) else if (HAS_PCH_SPLIT(dev_priv)) ibx_irq_postinstall(dev_priv); - if (DISPLAY_VER(dev_priv) <= 10) + if (DISPLAY_VER(dev_priv) < 11) de_misc_masked |= GEN8_DE_MISC_GSE; if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c new file mode 100644 index 000000000000..11e03cfb774d --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_display_params.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "intel_display_params.h" +#include "i915_drv.h" + +#define intel_display_param_named(name, T, perm, desc) \ + module_param_named(name, intel_display_modparams.name, T, perm); \ + MODULE_PARM_DESC(name, desc) +#define intel_display_param_named_unsafe(name, T, perm, desc) \ + module_param_named_unsafe(name, intel_display_modparams.name, T, perm); \ + MODULE_PARM_DESC(name, desc) + +static struct intel_display_params intel_display_modparams __read_mostly = { +#define MEMBER(T, member, value, ...) .member = (value), + INTEL_DISPLAY_PARAMS_FOR_EACH(MEMBER) +#undef MEMBER +}; +/* + * Note: As a rule, keep module parameter sysfs permissions read-only + * 0400. Runtime changes are only supported through i915 debugfs. + * + * For any exceptions requiring write access and runtime changes through module + * parameter sysfs, prevent debugfs file creation by setting the parameter's + * debugfs mode to 0. + */ + +intel_display_param_named_unsafe(vbt_firmware, charp, 0400, + "Load VBT from specified file under /lib/firmware"); + +intel_display_param_named_unsafe(lvds_channel_mode, int, 0400, + "Specify LVDS channel mode " + "(0=probe BIOS [default], 1=single-channel, 2=dual-channel)"); + +intel_display_param_named_unsafe(panel_use_ssc, int, 0400, + "Use Spread Spectrum Clock with panels [LVDS/eDP] " + "(default: auto from VBT)"); + +intel_display_param_named_unsafe(vbt_sdvo_panel_type, int, 0400, + "Override/Ignore selection of SDVO panel mode in the VBT " + "(-2=ignore, -1=auto [default], index in VBT BIOS table)"); + +intel_display_param_named_unsafe(enable_dc, int, 0400, + "Enable power-saving display C-states. " + "(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6; " + "3=up to DC5 with DC3CO; 4=up to DC6 with DC3CO)"); + +intel_display_param_named_unsafe(enable_dpt, bool, 0400, + "Enable display page table (DPT) (default: true)"); + +intel_display_param_named_unsafe(enable_sagv, bool, 0400, + "Enable system agent voltage/frequency scaling (SAGV) (default: true)"); + +intel_display_param_named_unsafe(disable_power_well, int, 0400, + "Disable display power wells when possible " + "(-1=auto [default], 0=power wells always on, 1=power wells disabled when possible)"); + +intel_display_param_named_unsafe(enable_ips, bool, 0400, "Enable IPS (default: true)"); + +intel_display_param_named_unsafe(invert_brightness, int, 0400, + "Invert backlight brightness " + "(-1 force normal, 0 machine defaults, 1 force inversion), please " + "report PCI device ID, subsystem vendor and subsystem device ID " + "to dri-devel@lists.freedesktop.org, if your machine needs it. " + "It will then be included in an upcoming module version."); + +/* WA to get away with the default setting in VBT for early platforms.Will be removed */ +intel_display_param_named_unsafe(edp_vswing, int, 0400, + "Ignore/Override vswing pre-emph table selection from VBT " + "(0=use value from vbt [default], 1=low power swing(200mV)," + "2=default swing(400mV))"); + +intel_display_param_named(enable_dpcd_backlight, int, 0400, + "Enable support for DPCD backlight control" + "(-1=use per-VBT LFP backlight type setting [default], 0=disabled, 1=enable, 2=force VESA interface, 3=force Intel interface)"); + +intel_display_param_named_unsafe(load_detect_test, bool, 0400, + "Force-enable the VGA load detect code for testing (default:false). " + "For developers only."); + +intel_display_param_named_unsafe(force_reset_modeset_test, bool, 0400, + "Force a modeset during gpu reset for testing (default:false). " + "For developers only."); + +intel_display_param_named(disable_display, bool, 0400, + "Disable display (default: false)"); + +intel_display_param_named(verbose_state_checks, bool, 0400, + "Enable verbose logs (ie. WARN_ON()) in case of unexpected hw state conditions."); + +intel_display_param_named_unsafe(nuclear_pageflip, bool, 0400, + "Force enable atomic functionality on platforms that don't have full support yet."); + +intel_display_param_named_unsafe(enable_dp_mst, bool, 0400, + "Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)"); + +intel_display_param_named_unsafe(enable_fbc, int, 0400, + "Enable frame buffer compression for power savings " + "(default: -1 (use per-chip default))"); + +intel_display_param_named_unsafe(enable_psr, int, 0400, + "Enable PSR " + "(0=disabled, 1=enable up to PSR1, 2=enable up to PSR2) " + "Default: -1 (use per-chip default)"); + +intel_display_param_named(psr_safest_params, bool, 0400, + "Replace PSR VBT parameters by the safest and not optimal ones. This " + "is helpful to detect if PSR issues are related to bad values set in " + " VBT. (0=use VBT parameters, 1=use safest parameters)" + "Default: 0"); + +intel_display_param_named_unsafe(enable_psr2_sel_fetch, bool, 0400, + "Enable PSR2 selective fetch " + "(0=disabled, 1=enabled) " + "Default: 1"); + +__maybe_unused +static void _param_print_bool(struct drm_printer *p, const char *driver_name, + const char *name, bool val) +{ + drm_printf(p, "%s.%s=%s\n", driver_name, name, str_yes_no(val)); +} + +__maybe_unused +static void _param_print_int(struct drm_printer *p, const char *driver_name, + const char *name, int val) +{ + drm_printf(p, "%s.%s=%d\n", driver_name, name, val); +} + +__maybe_unused +static void _param_print_uint(struct drm_printer *p, const char *driver_name, + const char *name, unsigned int val) +{ + drm_printf(p, "%s.%s=%u\n", driver_name, name, val); +} + +__maybe_unused +static void _param_print_ulong(struct drm_printer *p, const char *driver_name, + const char *name, unsigned long val) +{ + drm_printf(p, "%s.%s=%lu\n", driver_name, name, val); +} + +__maybe_unused +static void _param_print_charp(struct drm_printer *p, const char *driver_name, + const char *name, const char *val) +{ + drm_printf(p, "%s.%s=%s\n", driver_name, name, val); +} + +#define _param_print(p, driver_name, name, val) \ + _Generic(val, \ + bool : _param_print_bool, \ + int : _param_print_int, \ + unsigned int : _param_print_uint, \ + unsigned long : _param_print_ulong, \ + char * : _param_print_charp)(p, driver_name, name, val) + +/** + * intel_display_params_dump - dump intel display modparams + * @i915: i915 device + * @p: the &drm_printer + * + * Pretty printer for i915 modparams. + */ +void intel_display_params_dump(struct drm_i915_private *i915, struct drm_printer *p) +{ +#define PRINT(T, x, ...) _param_print(p, i915->drm.driver->name, #x, i915->display.params.x); + INTEL_DISPLAY_PARAMS_FOR_EACH(PRINT); +#undef PRINT +} + +__maybe_unused static void _param_dup_charp(char **valp) +{ + *valp = kstrdup(*valp ? *valp : "", GFP_ATOMIC); +} + +__maybe_unused static void _param_nop(void *valp) +{ +} + +#define _param_dup(valp) \ + _Generic(valp, \ + char ** : _param_dup_charp, \ + default : _param_nop) \ + (valp) + +void intel_display_params_copy(struct intel_display_params *dest) +{ + *dest = intel_display_modparams; +#define DUP(T, x, ...) _param_dup(&dest->x); + INTEL_DISPLAY_PARAMS_FOR_EACH(DUP); +#undef DUP +} + +__maybe_unused static void _param_free_charp(char **valp) +{ + kfree(*valp); + *valp = NULL; +} + +#define _param_free(valp) \ + _Generic(valp, \ + char ** : _param_free_charp, \ + default : _param_nop) \ + (valp) + +/* free the allocated members, *not* the passed in params itself */ +void intel_display_params_free(struct intel_display_params *params) +{ +#define FREE(T, x, ...) _param_free(¶ms->x); + INTEL_DISPLAY_PARAMS_FOR_EACH(FREE); +#undef FREE +} diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h new file mode 100644 index 000000000000..6206cc51df04 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_display_params.h @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _INTEL_DISPLAY_PARAMS_H_ +#define _INTEL_DISPLAY_PARAMS_H_ + +#include <linux/types.h> + +struct drm_printer; +struct drm_i915_private; + +/* + * Invoke param, a function-like macro, for each intel display param, with + * arguments: + * + * param(type, name, value, mode) + * + * type: parameter type, one of {bool, int, unsigned int, unsigned long, char *} + * name: name of the parameter + * value: initial/default value of the parameter + * mode: debugfs file permissions, one of {0400, 0600, 0}, use 0 to not create + * debugfs file + */ +#define INTEL_DISPLAY_PARAMS_FOR_EACH(param) \ + param(char *, vbt_firmware, NULL, 0400) \ + param(int, lvds_channel_mode, 0, 0400) \ + param(int, panel_use_ssc, -1, 0600) \ + param(int, vbt_sdvo_panel_type, -1, 0400) \ + param(int, enable_dc, -1, 0400) \ + param(bool, enable_dpt, true, 0400) \ + param(bool, enable_sagv, true, 0600) \ + param(int, disable_power_well, -1, 0400) \ + param(bool, enable_ips, true, 0600) \ + param(int, invert_brightness, 0, 0600) \ + param(int, edp_vswing, 0, 0400) \ + param(int, enable_dpcd_backlight, -1, 0600) \ + param(bool, load_detect_test, false, 0600) \ + param(bool, force_reset_modeset_test, false, 0600) \ + param(bool, disable_display, false, 0400) \ + param(bool, verbose_state_checks, true, 0400) \ + param(bool, nuclear_pageflip, false, 0400) \ + param(bool, enable_dp_mst, true, 0600) \ + param(int, enable_fbc, -1, 0600) \ + param(int, enable_psr, -1, 0600) \ + param(bool, psr_safest_params, false, 0400) \ + param(bool, enable_psr2_sel_fetch, true, 0400) \ + +#define MEMBER(T, member, ...) T member; +struct intel_display_params { + INTEL_DISPLAY_PARAMS_FOR_EACH(MEMBER); +}; +#undef MEMBER + +void intel_display_params_dump(struct drm_i915_private *i915, + struct drm_printer *p); +void intel_display_params_copy(struct intel_display_params *dest); +void intel_display_params_free(struct intel_display_params *params); + +#endif diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index e25785ae1c20..5f091502719b 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -405,7 +405,7 @@ print_async_put_domains_state(struct i915_power_domains *power_domains) struct drm_i915_private, display.power.domains); - drm_dbg(&i915->drm, "async_put_wakeref %u\n", + drm_dbg(&i915->drm, "async_put_wakeref %lu\n", power_domains->async_put_wakeref); print_power_domains(power_domains, "async_put_domains[0]", @@ -967,7 +967,7 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, DISPLAY_VER(dev_priv) >= 11 ? DC_STATE_EN_DC9 : 0; - if (!dev_priv->params.disable_power_well) + if (!dev_priv->display.params.disable_power_well) max_dc = 0; if (enable_dc >= 0 && enable_dc <= max_dc) { @@ -1016,11 +1016,11 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) { struct i915_power_domains *power_domains = &dev_priv->display.power.domains; - dev_priv->params.disable_power_well = + dev_priv->display.params.disable_power_well = sanitize_disable_power_well_option(dev_priv, - dev_priv->params.disable_power_well); + dev_priv->display.params.disable_power_well); power_domains->allowed_dc_mask = - get_allowed_dc_mask(dev_priv, dev_priv->params.enable_dc); + get_allowed_dc_mask(dev_priv, dev_priv->display.params.enable_dc); power_domains->target_dc_state = sanitize_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6); @@ -1697,14 +1697,14 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv, if (resume) intel_dmc_load_program(dev_priv); - /* Wa_14011508470:tgl,dg1,rkl,adl-s,adl-p */ - if (DISPLAY_VER(dev_priv) >= 12) + /* Wa_14011508470:tgl,dg1,rkl,adl-s,adl-p,dg2 */ + if (IS_DISPLAY_IP_RANGE(dev_priv, IP_VER(12, 0), IP_VER(13, 0))) intel_de_rmw(dev_priv, GEN11_CHICKEN_DCPR_2, 0, DCPR_CLEAR_MEMSTAT_DIS | DCPR_SEND_RESP_IMM | DCPR_MASK_LPMODE | DCPR_MASK_MAXLATENCY_MEMUP_CLR); /* Wa_14011503030:xelpd */ - if (DISPLAY_VER(dev_priv) >= 13) + if (DISPLAY_VER(dev_priv) == 13) intel_de_write(dev_priv, XELPD_DISPLAY_ERR_FATAL_MASK, ~0); } @@ -1950,7 +1950,7 @@ void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume) intel_display_power_get(i915, POWER_DOMAIN_INIT); /* Disable power support if the user asked so. */ - if (!i915->params.disable_power_well) { + if (!i915->display.params.disable_power_well) { drm_WARN_ON(&i915->drm, power_domains->disable_wakeref); i915->display.power.domains.disable_wakeref = intel_display_power_get(i915, POWER_DOMAIN_INIT); @@ -1977,7 +1977,7 @@ void intel_power_domains_driver_remove(struct drm_i915_private *i915) fetch_and_zero(&i915->display.power.domains.init_wakeref); /* Remove the refcount we took to keep power well support disabled. */ - if (!i915->params.disable_power_well) + if (!i915->display.params.disable_power_well) intel_display_power_put(i915, POWER_DOMAIN_INIT, fetch_and_zero(&i915->display.power.domains.disable_wakeref)); @@ -2096,7 +2096,7 @@ void intel_power_domains_suspend(struct drm_i915_private *i915, bool s2idle) * Even if power well support was disabled we still want to disable * power wells if power domains must be deinitialized for suspend. */ - if (!i915->params.disable_power_well) + if (!i915->display.params.disable_power_well) intel_display_power_put(i915, POWER_DOMAIN_INIT, fetch_and_zero(&i915->display.power.domains.disable_wakeref)); diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.c b/drivers/gpu/drm/i915/display/intel_display_power_well.c index 07d650050099..47cd6bb04366 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_well.c +++ b/drivers/gpu/drm/i915/display/intel_display_power_well.c @@ -1400,20 +1400,16 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, { enum i915_power_well_id id = i915_power_well_instance(power_well)->id; enum dpio_phy phy; - enum pipe pipe; u32 tmp; drm_WARN_ON_ONCE(&dev_priv->drm, id != VLV_DISP_PW_DPIO_CMN_BC && id != CHV_DISP_PW_DPIO_CMN_D); - if (id == VLV_DISP_PW_DPIO_CMN_BC) { - pipe = PIPE_A; + if (id == VLV_DISP_PW_DPIO_CMN_BC) phy = DPIO_PHY0; - } else { - pipe = PIPE_C; + else phy = DPIO_PHY1; - } /* since ref/cri clock was enabled */ udelay(1); /* >10ns for cmnreset, >0ns for sidereset */ @@ -1428,24 +1424,24 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, vlv_dpio_get(dev_priv); /* Enable dynamic power down */ - tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW28); + tmp = vlv_dpio_read(dev_priv, phy, CHV_CMN_DW28); tmp |= DPIO_DYNPWRDOWNEN_CH0 | DPIO_CL1POWERDOWNEN | DPIO_SUS_CLK_CONFIG_GATE_CLKREQ; - vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW28, tmp); + vlv_dpio_write(dev_priv, phy, CHV_CMN_DW28, tmp); if (id == VLV_DISP_PW_DPIO_CMN_BC) { - tmp = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW6_CH1); + tmp = vlv_dpio_read(dev_priv, phy, _CHV_CMN_DW6_CH1); tmp |= DPIO_DYNPWRDOWNEN_CH1; - vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW6_CH1, tmp); + vlv_dpio_write(dev_priv, phy, _CHV_CMN_DW6_CH1, tmp); } else { /* * Force the non-existing CL2 off. BXT does this * too, so maybe it saves some power even though * CL2 doesn't exist? */ - tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW30); + tmp = vlv_dpio_read(dev_priv, phy, CHV_CMN_DW30); tmp |= DPIO_CL2_LDOFUSE_PWRENB; - vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW30, tmp); + vlv_dpio_write(dev_priv, phy, CHV_CMN_DW30, tmp); } vlv_dpio_put(dev_priv); @@ -1499,7 +1495,6 @@ static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, static void assert_chv_phy_powergate(struct drm_i915_private *dev_priv, enum dpio_phy phy, enum dpio_channel ch, bool override, unsigned int mask) { - enum pipe pipe = phy == DPIO_PHY0 ? PIPE_A : PIPE_C; u32 reg, val, expected, actual; /* @@ -1518,7 +1513,7 @@ static void assert_chv_phy_powergate(struct drm_i915_private *dev_priv, enum dpi reg = _CHV_CMN_DW6_CH1; vlv_dpio_get(dev_priv); - val = vlv_dpio_read(dev_priv, pipe, reg); + val = vlv_dpio_read(dev_priv, phy, reg); vlv_dpio_put(dev_priv); /* diff --git a/drivers/gpu/drm/i915/display/intel_display_reset.c b/drivers/gpu/drm/i915/display/intel_display_reset.c index 17178d5d7788..c2c347b22448 100644 --- a/drivers/gpu/drm/i915/display/intel_display_reset.c +++ b/drivers/gpu/drm/i915/display/intel_display_reset.c @@ -29,7 +29,7 @@ void intel_display_reset_prepare(struct drm_i915_private *dev_priv) return; /* reset doesn't touch the display */ - if (!dev_priv->params.force_reset_modeset_test && + if (!dev_priv->display.params.force_reset_modeset_test && !gpu_reset_clobbers_display(dev_priv)) return; diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 65ea37fe8cff..3fdd8a517983 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -198,6 +198,12 @@ struct intel_encoder { struct intel_encoder *, const struct intel_crtc_state *, const struct drm_connector_state *); + void (*audio_enable)(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); + void (*audio_disable)(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state); /* Read out the current hw state of this connector, returning true if * the encoder is active. If the encoder is enabled it also set the pipe * it is connected to in the pipe parameter. */ @@ -624,6 +630,9 @@ struct intel_connector { struct drm_dp_aux *dsc_decompression_aux; u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE]; u8 fec_capability; + + u8 dsc_hblank_expansion_quirk:1; + u8 dsc_decompression_enabled:1; } dp; /* Work struct to schedule a uevent on link train failure */ @@ -675,10 +684,6 @@ struct intel_atomic_state { bool skip_intermediate_wm; bool rps_interactive; - - struct i915_sw_fence commit_ready; - - struct llist_node freed; }; struct intel_plane_state { @@ -1015,7 +1020,6 @@ struct intel_c10pll_state { }; struct intel_c20pll_state { - u32 link_bit_rate; u32 clock; /* in kHz */ u16 tx[3]; u16 cmn[4]; @@ -1210,6 +1214,7 @@ struct intel_crtc_state { bool has_psr2; bool enable_psr2_sel_fetch; bool req_psr2_sdp_prior_scanline; + bool has_panel_replay; bool wm_level_disabled; u32 dc3co_exitline; u16 su_y_granularity; @@ -1361,7 +1366,8 @@ struct intel_crtc_state { struct { bool compression_enable; bool dsc_split; - u16 compressed_bpp; + /* Compressed Bpp in U6.4 format (first 4 bits for fractional part) */ + u16 compressed_bpp_x16; u8 slice_count; struct drm_dsc_config config; } dsc; @@ -1467,6 +1473,9 @@ struct intel_crtc { struct intel_crtc_state *config; + /* armed event for async flip */ + struct drm_pending_vblank_event *flip_done_event; + /* Access to these should be protected by dev_priv->irq_lock. */ bool cpu_fifo_underrun_disabled; bool pch_fifo_underrun_disabled; @@ -1707,9 +1716,13 @@ struct intel_psr { bool irq_aux_error; u16 su_w_granularity; u16 su_y_granularity; + bool source_panel_replay_support; + bool sink_panel_replay_support; + bool panel_replay_enabled; u32 dc3co_exitline; u32 dc3co_exit_delay; struct delayed_work dc3co_work; + u8 entry_setup_frames; }; struct intel_dp { @@ -1808,6 +1821,7 @@ struct intel_dp { /* Display stream compression testing */ bool force_dsc_en; int force_dsc_output_format; + bool force_dsc_fractional_bpp_en; int force_dsc_bpc; bool hobl_failed; @@ -1992,17 +2006,6 @@ dp_to_lspcon(struct intel_dp *intel_dp) #define dp_to_i915(__intel_dp) to_i915(dp_to_dig_port(__intel_dp)->base.base.dev) -#define CAN_PSR(intel_dp) ((intel_dp)->psr.sink_support && \ - (intel_dp)->psr.source_support) - -static inline bool intel_encoder_can_psr(struct intel_encoder *encoder) -{ - if (!intel_encoder_is_dp(encoder)) - return false; - - return CAN_PSR(enc_to_intel_dp(encoder)); -} - static inline struct intel_digital_port * hdmi_to_dig_port(struct intel_hdmi *intel_hdmi) { diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 073b85b57679..b70502586ab9 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -335,77 +335,6 @@ static void disable_event_handler(struct drm_i915_private *i915, intel_de_write(i915, htp_reg, 0); } -static void -disable_flip_queue_event(struct drm_i915_private *i915, - i915_reg_t ctl_reg, i915_reg_t htp_reg) -{ - u32 event_ctl; - u32 event_htp; - - event_ctl = intel_de_read(i915, ctl_reg); - event_htp = intel_de_read(i915, htp_reg); - if (event_ctl != (DMC_EVT_CTL_ENABLE | - DMC_EVT_CTL_RECURRING | - REG_FIELD_PREP(DMC_EVT_CTL_TYPE_MASK, - DMC_EVT_CTL_TYPE_EDGE_0_1) | - REG_FIELD_PREP(DMC_EVT_CTL_EVENT_ID_MASK, - DMC_EVT_CTL_EVENT_ID_CLK_MSEC)) || - !event_htp) { - drm_dbg_kms(&i915->drm, - "Unexpected DMC event configuration (control %08x htp %08x)\n", - event_ctl, event_htp); - return; - } - - disable_event_handler(i915, ctl_reg, htp_reg); -} - -static bool -get_flip_queue_event_regs(struct drm_i915_private *i915, enum intel_dmc_id dmc_id, - i915_reg_t *ctl_reg, i915_reg_t *htp_reg) -{ - if (dmc_id == DMC_FW_MAIN) { - if (DISPLAY_VER(i915) == 12) { - *ctl_reg = DMC_EVT_CTL(i915, dmc_id, 3); - *htp_reg = DMC_EVT_HTP(i915, dmc_id, 3); - - return true; - } - } else if (dmc_id >= DMC_FW_PIPEA && dmc_id <= DMC_FW_PIPED) { - if (IS_DG2(i915)) { - *ctl_reg = DMC_EVT_CTL(i915, dmc_id, 2); - *htp_reg = DMC_EVT_HTP(i915, dmc_id, 2); - - return true; - } - } - - return false; -} - -static void -disable_all_flip_queue_events(struct drm_i915_private *i915) -{ - enum intel_dmc_id dmc_id; - - /* TODO: check if the following applies to all D13+ platforms. */ - if (!IS_TIGERLAKE(i915)) - return; - - for_each_dmc_id(dmc_id) { - i915_reg_t ctl_reg; - i915_reg_t htp_reg; - - if (!has_dmc_id_fw(i915, dmc_id)) - continue; - - if (!get_flip_queue_event_regs(i915, dmc_id, &ctl_reg, &htp_reg)) - continue; - - disable_flip_queue_event(i915, ctl_reg, htp_reg); - } -} - static void disable_all_event_handlers(struct drm_i915_private *i915) { enum intel_dmc_id dmc_id; @@ -503,6 +432,16 @@ static bool is_dmc_evt_ctl_reg(struct drm_i915_private *i915, return offset >= start && offset < end; } +static bool is_dmc_evt_htp_reg(struct drm_i915_private *i915, + enum intel_dmc_id dmc_id, i915_reg_t reg) +{ + u32 offset = i915_mmio_reg_offset(reg); + u32 start = i915_mmio_reg_offset(DMC_EVT_HTP(i915, dmc_id, 0)); + u32 end = i915_mmio_reg_offset(DMC_EVT_HTP(i915, dmc_id, DMC_EVENT_HANDLER_COUNT_GEN12)); + + return offset >= start && offset < end; +} + static bool disable_dmc_evt(struct drm_i915_private *i915, enum intel_dmc_id dmc_id, i915_reg_t reg, u32 data) @@ -514,6 +453,16 @@ static bool disable_dmc_evt(struct drm_i915_private *i915, if (dmc_id != DMC_FW_MAIN) return true; + /* also disable the flip queue event on the main DMC on TGL */ + if (IS_TIGERLAKE(i915) && + REG_FIELD_GET(DMC_EVT_CTL_EVENT_ID_MASK, data) == DMC_EVT_CTL_EVENT_ID_CLK_MSEC) + return true; + + /* also disable the HRR event on the main DMC on TGL/ADLS */ + if ((IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915)) && + REG_FIELD_GET(DMC_EVT_CTL_EVENT_ID_MASK, data) == DMC_EVT_CTL_EVENT_ID_VBLANK_A) + return true; + return false; } @@ -579,13 +528,6 @@ void intel_dmc_load_program(struct drm_i915_private *i915) gen9_set_dc_state_debugmask(i915); - /* - * Flip queue events need to be disabled before enabling DC5/6. - * i915 doesn't use the flip queue feature, so disable it already - * here. - */ - disable_all_flip_queue_events(i915); - pipedmc_clock_gating_wa(i915, false); } @@ -781,9 +723,17 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc, return 0; } + drm_dbg_kms(&i915->drm, "DMC %d:\n", dmc_id); for (i = 0; i < mmio_count; i++) { dmc_info->mmioaddr[i] = _MMIO(mmioaddr[i]); dmc_info->mmiodata[i] = mmiodata[i]; + + drm_dbg_kms(&i915->drm, " mmio[%d]: 0x%x = 0x%x%s%s\n", + i, mmioaddr[i], mmiodata[i], + is_dmc_evt_ctl_reg(i915, dmc_id, dmc_info->mmioaddr[i]) ? " (EVT_CTL)" : + is_dmc_evt_htp_reg(i915, dmc_id, dmc_info->mmioaddr[i]) ? " (EVT_HTP)" : "", + disable_dmc_evt(i915, dmc_id, dmc_info->mmioaddr[i], + dmc_info->mmiodata[i]) ? " (disabling)" : ""); } dmc_info->mmio_count = mmio_count; dmc_info->start_mmioaddr = start_mmioaddr; diff --git a/drivers/gpu/drm/i915/display/intel_dmc_regs.h b/drivers/gpu/drm/i915/display/intel_dmc_regs.h index cf10094acae3..90d0dbb41cfe 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc_regs.h +++ b/drivers/gpu/drm/i915/display/intel_dmc_regs.h @@ -60,6 +60,7 @@ #define DMC_EVT_CTL_EVENT_ID_MASK REG_GENMASK(15, 8) #define DMC_EVT_CTL_EVENT_ID_FALSE 0x01 +#define DMC_EVT_CTL_EVENT_ID_VBLANK_A 0x32 /* main DMC */ /* An event handler scheduled to run at a 1 kHz frequency. */ #define DMC_EVT_CTL_EVENT_ID_CLK_MSEC 0xbf diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 62ce92772367..7d2b8ce48fda 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -85,8 +85,8 @@ #define DP_DSC_MAX_ENC_THROUGHPUT_0 340000 #define DP_DSC_MAX_ENC_THROUGHPUT_1 400000 -/* DP DSC FEC Overhead factor = 1/(0.972261) */ -#define DP_DSC_FEC_OVERHEAD_FACTOR 972261 +/* DP DSC FEC Overhead factor in ppm = 1/(0.972261) = 1.028530 */ +#define DP_DSC_FEC_OVERHEAD_FACTOR 1028530 /* Compliance test status bits */ #define INTEL_DP_RESOLUTION_SHIFT_MASK 0 @@ -124,7 +124,31 @@ static void intel_dp_unset_edid(struct intel_dp *intel_dp); /* Is link rate UHBR and thus 128b/132b? */ bool intel_dp_is_uhbr(const struct intel_crtc_state *crtc_state) { - return crtc_state->port_clock >= 1000000; + return drm_dp_is_uhbr_rate(crtc_state->port_clock); +} + +/** + * intel_dp_link_symbol_size - get the link symbol size for a given link rate + * @rate: link rate in 10kbit/s units + * + * Returns the link symbol size in bits/symbol units depending on the link + * rate -> channel coding. + */ +int intel_dp_link_symbol_size(int rate) +{ + return drm_dp_is_uhbr_rate(rate) ? 32 : 10; +} + +/** + * intel_dp_link_symbol_clock - convert link rate to link symbol clock + * @rate: link rate in 10kbit/s units + * + * Returns the link symbol clock frequency in kHz units depending on the + * link rate and channel coding. + */ +int intel_dp_link_symbol_clock(int rate) +{ + return DIV_ROUND_CLOSEST(rate * 10, intel_dp_link_symbol_size(rate)); } static void intel_dp_set_default_sink_rates(struct intel_dp *intel_dp) @@ -331,6 +355,9 @@ int intel_dp_max_lane_count(struct intel_dp *intel_dp) /* * The required data bandwidth for a mode with given pixel clock and bpp. This * is the required net bandwidth independent of the data bandwidth efficiency. + * + * TODO: check if callers of this functions should use + * intel_dp_effective_data_rate() instead. */ int intel_dp_link_required(int pixel_clock, int bpp) @@ -339,6 +366,22 @@ intel_dp_link_required(int pixel_clock, int bpp) return DIV_ROUND_UP(pixel_clock * bpp, 8); } +/** + * intel_dp_effective_data_rate - Return the pixel data rate accounting for BW allocation overhead + * @pixel_clock: pixel clock in kHz + * @bpp_x16: bits per pixel .4 fixed point format + * @bw_overhead: BW allocation overhead in 1ppm units + * + * Return the effective pixel data rate in kB/sec units taking into account + * the provided SSC, FEC, DSC BW allocation overhead. + */ +int intel_dp_effective_data_rate(int pixel_clock, int bpp_x16, + int bw_overhead) +{ + return DIV_ROUND_UP_ULL(mul_u32_u32(pixel_clock * bpp_x16, bw_overhead), + 1000000 * 16 * 8); +} + /* * Given a link rate and lanes, get the data bandwidth. * @@ -362,29 +405,27 @@ intel_dp_link_required(int pixel_clock, int bpp) int intel_dp_max_data_rate(int max_link_rate, int max_lanes) { - if (max_link_rate >= 1000000) { - /* - * UHBR rates always use 128b/132b channel encoding, and have - * 97.71% data bandwidth efficiency. Consider max_link_rate the - * link bit rate in units of 10000 bps. - */ - int max_link_rate_kbps = max_link_rate * 10; - - max_link_rate_kbps = DIV_ROUND_CLOSEST_ULL(mul_u32_u32(max_link_rate_kbps, 9671), 10000); - max_link_rate = max_link_rate_kbps / 8; - } + int ch_coding_efficiency = + drm_dp_bw_channel_coding_efficiency(drm_dp_is_uhbr_rate(max_link_rate)); + int max_link_rate_kbps = max_link_rate * 10; /* + * UHBR rates always use 128b/132b channel encoding, and have + * 97.71% data bandwidth efficiency. Consider max_link_rate the + * link bit rate in units of 10000 bps. + */ + /* * Lower than UHBR rates always use 8b/10b channel encoding, and have * 80% data bandwidth efficiency for SST non-FEC. However, this turns - * out to be a nop by coincidence, and can be skipped: + * out to be a nop by coincidence: * * int max_link_rate_kbps = max_link_rate * 10; - * max_link_rate_kbps = DIV_ROUND_CLOSEST_ULL(max_link_rate_kbps * 8, 10); + * max_link_rate_kbps = DIV_ROUND_DOWN_ULL(max_link_rate_kbps * 8, 10); * max_link_rate = max_link_rate_kbps / 8; */ - - return max_link_rate * max_lanes; + return DIV_ROUND_DOWN_ULL(mul_u32_u32(max_link_rate_kbps * max_lanes, + ch_coding_efficiency), + 1000000 * 8); } bool intel_dp_can_bigjoiner(struct intel_dp *intel_dp) @@ -680,8 +721,22 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, u32 intel_dp_mode_to_fec_clock(u32 mode_clock) { - return div_u64(mul_u32_u32(mode_clock, 1000000U), - DP_DSC_FEC_OVERHEAD_FACTOR); + return div_u64(mul_u32_u32(mode_clock, DP_DSC_FEC_OVERHEAD_FACTOR), + 1000000U); +} + +int intel_dp_bw_fec_overhead(bool fec_enabled) +{ + /* + * TODO: Calculate the actual overhead for a given mode. + * The hard-coded 1/0.972261=2.853% overhead factor + * corresponds (for instance) to the 8b/10b DP FEC 2.4% + + * 0.453% DSC overhead. This is enough for a 3840 width mode, + * which has a DSC overhead of up to ~0.2%, but may not be + * enough for a 1024 width mode where this is ~0.8% (on a 4 + * lane DP link, with 2 DSC slices and 8 bpp color depth). + */ + return fec_enabled ? DP_DSC_FEC_OVERHEAD_FACTOR : 1000000; } static int @@ -1373,9 +1428,9 @@ static bool intel_dp_source_supports_fec(struct intel_dp *intel_dp, return false; } -static bool intel_dp_supports_fec(struct intel_dp *intel_dp, - const struct intel_connector *connector, - const struct intel_crtc_state *pipe_config) +bool intel_dp_supports_fec(struct intel_dp *intel_dp, + const struct intel_connector *connector, + const struct intel_crtc_state *pipe_config) { return intel_dp_source_supports_fec(intel_dp, pipe_config) && drm_dp_sink_supports_fec(connector->dp.fec_capability); @@ -1388,6 +1443,7 @@ static bool intel_dp_supports_dsc(const struct intel_connector *connector, return false; return intel_dsc_source_support(crtc_state) && + connector->dp.dsc_decompression_aux && drm_dp_sink_supports_dsc(connector->dp.dsc_dpcd); } @@ -1721,15 +1777,15 @@ static bool intel_dp_dsc_supports_format(const struct intel_connector *connector return drm_dp_dsc_sink_supports_format(connector->dp.dsc_dpcd, sink_dsc_format); } -static bool is_bw_sufficient_for_dsc_config(u16 compressed_bpp, u32 link_clock, +static bool is_bw_sufficient_for_dsc_config(u16 compressed_bppx16, u32 link_clock, u32 lane_count, u32 mode_clock, enum intel_output_format output_format, int timeslots) { u32 available_bw, required_bw; - available_bw = (link_clock * lane_count * timeslots) / 8; - required_bw = compressed_bpp * (intel_dp_mode_to_fec_clock(mode_clock)); + available_bw = (link_clock * lane_count * timeslots * 16) / 8; + required_bw = compressed_bppx16 * (intel_dp_mode_to_fec_clock(mode_clock)); return available_bw > required_bw; } @@ -1737,7 +1793,7 @@ static bool is_bw_sufficient_for_dsc_config(u16 compressed_bpp, u32 link_clock, static int dsc_compute_link_config(struct intel_dp *intel_dp, struct intel_crtc_state *pipe_config, struct link_config_limits *limits, - u16 compressed_bpp, + u16 compressed_bppx16, int timeslots) { const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; @@ -1752,8 +1808,8 @@ static int dsc_compute_link_config(struct intel_dp *intel_dp, for (lane_count = limits->min_lane_count; lane_count <= limits->max_lane_count; lane_count <<= 1) { - if (!is_bw_sufficient_for_dsc_config(compressed_bpp, link_rate, lane_count, - adjusted_mode->clock, + if (!is_bw_sufficient_for_dsc_config(compressed_bppx16, link_rate, + lane_count, adjusted_mode->clock, pipe_config->output_format, timeslots)) continue; @@ -1795,7 +1851,7 @@ u16 intel_dp_dsc_max_sink_compressed_bppx16(const struct intel_connector *connec return 0; } -static int dsc_sink_min_compressed_bpp(struct intel_crtc_state *pipe_config) +int intel_dp_dsc_sink_min_compressed_bpp(struct intel_crtc_state *pipe_config) { /* From Mandatory bit rate range Support Table 2-157 (DP v2.0) */ switch (pipe_config->output_format) { @@ -1812,9 +1868,9 @@ static int dsc_sink_min_compressed_bpp(struct intel_crtc_state *pipe_config) return 0; } -static int dsc_sink_max_compressed_bpp(const struct intel_connector *connector, - struct intel_crtc_state *pipe_config, - int bpc) +int intel_dp_dsc_sink_max_compressed_bpp(const struct intel_connector *connector, + struct intel_crtc_state *pipe_config, + int bpc) { return intel_dp_dsc_max_sink_compressed_bppx16(connector, pipe_config, bpc) >> 4; @@ -1834,7 +1890,7 @@ static int dsc_src_max_compressed_bpp(struct intel_dp *intel_dp) * Max Compressed bpp for Gen 13+ is 27bpp. * For earlier platform is 23bpp. (Bspec:49259). */ - if (DISPLAY_VER(i915) <= 12) + if (DISPLAY_VER(i915) < 13) return 23; else return 27; @@ -1866,10 +1922,11 @@ icl_dsc_compute_link_config(struct intel_dp *intel_dp, ret = dsc_compute_link_config(intel_dp, pipe_config, limits, - valid_dsc_bpp[i], + valid_dsc_bpp[i] << 4, timeslots); if (ret == 0) { - pipe_config->dsc.compressed_bpp = valid_dsc_bpp[i]; + pipe_config->dsc.compressed_bpp_x16 = + to_bpp_x16(valid_dsc_bpp[i]); return 0; } } @@ -1885,6 +1942,7 @@ icl_dsc_compute_link_config(struct intel_dp *intel_dp, */ static int xelpd_dsc_compute_link_config(struct intel_dp *intel_dp, + const struct intel_connector *connector, struct intel_crtc_state *pipe_config, struct link_config_limits *limits, int dsc_max_bpp, @@ -1892,22 +1950,38 @@ xelpd_dsc_compute_link_config(struct intel_dp *intel_dp, int pipe_bpp, int timeslots) { - u16 compressed_bpp; + u8 bppx16_incr = drm_dp_dsc_sink_bpp_incr(connector->dp.dsc_dpcd); + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + u16 compressed_bppx16; + u8 bppx16_step; int ret; - /* Compressed BPP should be less than the Input DSC bpp */ - dsc_max_bpp = min(dsc_max_bpp, pipe_bpp - 1); + if (DISPLAY_VER(i915) < 14 || bppx16_incr <= 1) + bppx16_step = 16; + else + bppx16_step = 16 / bppx16_incr; - for (compressed_bpp = dsc_max_bpp; - compressed_bpp >= dsc_min_bpp; - compressed_bpp--) { + /* Compressed BPP should be less than the Input DSC bpp */ + dsc_max_bpp = min(dsc_max_bpp << 4, (pipe_bpp << 4) - bppx16_step); + dsc_min_bpp = dsc_min_bpp << 4; + + for (compressed_bppx16 = dsc_max_bpp; + compressed_bppx16 >= dsc_min_bpp; + compressed_bppx16 -= bppx16_step) { + if (intel_dp->force_dsc_fractional_bpp_en && + !to_bpp_frac(compressed_bppx16)) + continue; ret = dsc_compute_link_config(intel_dp, pipe_config, limits, - compressed_bpp, + compressed_bppx16, timeslots); if (ret == 0) { - pipe_config->dsc.compressed_bpp = compressed_bpp; + pipe_config->dsc.compressed_bpp_x16 = compressed_bppx16; + if (intel_dp->force_dsc_fractional_bpp_en && + to_bpp_frac(compressed_bppx16)) + drm_dbg_kms(&i915->drm, "Forcing DSC fractional bpp\n"); + return 0; } } @@ -1928,12 +2002,14 @@ static int dsc_compute_compressed_bpp(struct intel_dp *intel_dp, int dsc_joiner_max_bpp; dsc_src_min_bpp = dsc_src_min_compressed_bpp(); - dsc_sink_min_bpp = dsc_sink_min_compressed_bpp(pipe_config); + dsc_sink_min_bpp = intel_dp_dsc_sink_min_compressed_bpp(pipe_config); dsc_min_bpp = max(dsc_src_min_bpp, dsc_sink_min_bpp); dsc_min_bpp = max(dsc_min_bpp, to_bpp_int_roundup(limits->link.min_bpp_x16)); dsc_src_max_bpp = dsc_src_max_compressed_bpp(intel_dp); - dsc_sink_max_bpp = dsc_sink_max_compressed_bpp(connector, pipe_config, pipe_bpp / 3); + dsc_sink_max_bpp = intel_dp_dsc_sink_max_compressed_bpp(connector, + pipe_config, + pipe_bpp / 3); dsc_max_bpp = dsc_sink_max_bpp ? min(dsc_sink_max_bpp, dsc_src_max_bpp) : dsc_src_max_bpp; dsc_joiner_max_bpp = get_max_compressed_bpp_with_joiner(i915, adjusted_mode->clock, @@ -1943,7 +2019,7 @@ static int dsc_compute_compressed_bpp(struct intel_dp *intel_dp, dsc_max_bpp = min(dsc_max_bpp, to_bpp_int(limits->link.max_bpp_x16)); if (DISPLAY_VER(i915) >= 13) - return xelpd_dsc_compute_link_config(intel_dp, pipe_config, limits, + return xelpd_dsc_compute_link_config(intel_dp, connector, pipe_config, limits, dsc_max_bpp, dsc_min_bpp, pipe_bpp, timeslots); return icl_dsc_compute_link_config(intel_dp, pipe_config, limits, dsc_max_bpp, dsc_min_bpp, pipe_bpp, timeslots); @@ -2088,19 +2164,22 @@ static int intel_edp_dsc_compute_pipe_bpp(struct intel_dp *intel_dp, pipe_config->lane_count = limits->max_lane_count; dsc_src_min_bpp = dsc_src_min_compressed_bpp(); - dsc_sink_min_bpp = dsc_sink_min_compressed_bpp(pipe_config); + dsc_sink_min_bpp = intel_dp_dsc_sink_min_compressed_bpp(pipe_config); dsc_min_bpp = max(dsc_src_min_bpp, dsc_sink_min_bpp); dsc_min_bpp = max(dsc_min_bpp, to_bpp_int_roundup(limits->link.min_bpp_x16)); dsc_src_max_bpp = dsc_src_max_compressed_bpp(intel_dp); - dsc_sink_max_bpp = dsc_sink_max_compressed_bpp(connector, pipe_config, pipe_bpp / 3); + dsc_sink_max_bpp = intel_dp_dsc_sink_max_compressed_bpp(connector, + pipe_config, + pipe_bpp / 3); dsc_max_bpp = dsc_sink_max_bpp ? min(dsc_sink_max_bpp, dsc_src_max_bpp) : dsc_src_max_bpp; dsc_max_bpp = min(dsc_max_bpp, to_bpp_int(limits->link.max_bpp_x16)); /* Compressed BPP should be less than the Input DSC bpp */ dsc_max_bpp = min(dsc_max_bpp, pipe_bpp - 1); - pipe_config->dsc.compressed_bpp = max(dsc_min_bpp, dsc_max_bpp); + pipe_config->dsc.compressed_bpp_x16 = + to_bpp_x16(max(dsc_min_bpp, dsc_max_bpp)); pipe_config->pipe_bpp = pipe_bpp; @@ -2122,8 +2201,9 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, &pipe_config->hw.adjusted_mode; int ret; - pipe_config->fec_enable = !intel_dp_is_edp(intel_dp) && - intel_dp_supports_fec(intel_dp, connector, pipe_config); + pipe_config->fec_enable = pipe_config->fec_enable || + (!intel_dp_is_edp(intel_dp) && + intel_dp_supports_fec(intel_dp, connector, pipe_config)); if (!intel_dp_supports_dsc(connector, pipe_config)) return -EINVAL; @@ -2188,18 +2268,18 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, ret = intel_dp_dsc_compute_params(connector, pipe_config); if (ret < 0) { drm_dbg_kms(&dev_priv->drm, - "Cannot compute valid DSC parameters for Input Bpp = %d " - "Compressed BPP = %d\n", + "Cannot compute valid DSC parameters for Input Bpp = %d" + "Compressed BPP = " BPP_X16_FMT "\n", pipe_config->pipe_bpp, - pipe_config->dsc.compressed_bpp); + BPP_X16_ARGS(pipe_config->dsc.compressed_bpp_x16)); return ret; } pipe_config->dsc.compression_enable = true; drm_dbg_kms(&dev_priv->drm, "DP DSC computed with Input Bpp = %d " - "Compressed Bpp = %d Slice Count = %d\n", + "Compressed Bpp = " BPP_X16_FMT " Slice Count = %d\n", pipe_config->pipe_bpp, - pipe_config->dsc.compressed_bpp, + BPP_X16_ARGS(pipe_config->dsc.compressed_bpp_x16), pipe_config->dsc.slice_count); return 0; @@ -2311,6 +2391,8 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, { struct drm_i915_private *i915 = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); + const struct intel_connector *connector = + to_intel_connector(conn_state->connector); const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; struct intel_dp *intel_dp = enc_to_intel_dp(encoder); @@ -2319,6 +2401,10 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, bool dsc_needed; int ret = 0; + if (pipe_config->fec_enable && + !intel_dp_supports_fec(intel_dp, connector, pipe_config)) + return -EINVAL; + if (intel_dp_need_bigjoiner(intel_dp, adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_clock)) pipe_config->bigjoiner_pipes = GENMASK(crtc->pipe + 1, crtc->pipe); @@ -2366,15 +2452,15 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, if (pipe_config->dsc.compression_enable) { drm_dbg_kms(&i915->drm, - "DP lane count %d clock %d Input bpp %d Compressed bpp %d\n", + "DP lane count %d clock %d Input bpp %d Compressed bpp " BPP_X16_FMT "\n", pipe_config->lane_count, pipe_config->port_clock, pipe_config->pipe_bpp, - pipe_config->dsc.compressed_bpp); + BPP_X16_ARGS(pipe_config->dsc.compressed_bpp_x16)); drm_dbg_kms(&i915->drm, "DP link rate required %i available %i\n", intel_dp_link_required(adjusted_mode->crtc_clock, - pipe_config->dsc.compressed_bpp), + to_bpp_int_roundup(pipe_config->dsc.compressed_bpp_x16)), intel_dp_max_data_rate(pipe_config->port_clock, pipe_config->lane_count)); } else { @@ -2443,12 +2529,22 @@ static void intel_dp_compute_vsc_colorimetry(const struct intel_crtc_state *crtc struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - /* - * Prepare VSC Header for SU as per DP 1.4 spec, Table 2-118 - * VSC SDP supporting 3D stereo, PSR2, and Pixel Encoding/ - * Colorimetry Format indication. - */ - vsc->revision = 0x5; + if (crtc_state->has_panel_replay) { + /* + * Prepare VSC Header for SU as per DP 2.0 spec, Table 2-223 + * VSC SDP supporting 3D stereo, Panel Replay, and Pixel + * Encoding/Colorimetry Format indication. + */ + vsc->revision = 0x7; + } else { + /* + * Prepare VSC Header for SU as per DP 1.4 spec, Table 2-118 + * VSC SDP supporting 3D stereo, PSR2, and Pixel Encoding/ + * Colorimetry Format indication. + */ + vsc->revision = 0x5; + } + vsc->length = 0x13; /* DP 1.4a spec, Table 2-120 */ @@ -2557,6 +2653,21 @@ void intel_dp_compute_psr_vsc_sdp(struct intel_dp *intel_dp, vsc->revision = 0x4; vsc->length = 0xe; } + } else if (crtc_state->has_panel_replay) { + if (intel_dp->psr.colorimetry_support && + intel_dp_needs_vsc_sdp(crtc_state, conn_state)) { + /* [Panel Replay with colorimetry info] */ + intel_dp_compute_vsc_colorimetry(crtc_state, conn_state, + vsc); + } else { + /* + * [Panel Replay without colorimetry info] + * Prepare VSC Header for SU as per DP 2.0 spec, Table 2-223 + * VSC SDP supporting 3D stereo + Panel Replay. + */ + vsc->revision = 0x6; + vsc->length = 0x10; + } } else { /* * [PSR1] @@ -2633,7 +2744,7 @@ static bool can_enable_drrs(struct intel_connector *connector, static void intel_dp_drrs_compute_config(struct intel_connector *connector, struct intel_crtc_state *pipe_config, - int link_bpp) + int link_bpp_x16) { struct drm_i915_private *i915 = to_i915(connector->base.dev); const struct drm_display_mode *downclock_mode = @@ -2658,9 +2769,10 @@ intel_dp_drrs_compute_config(struct intel_connector *connector, if (pipe_config->splitter.enable) pixel_clock /= pipe_config->splitter.link_count; - intel_link_compute_m_n(link_bpp, pipe_config->lane_count, pixel_clock, - pipe_config->port_clock, &pipe_config->dp_m2_n2, - pipe_config->fec_enable); + intel_link_compute_m_n(link_bpp_x16, pipe_config->lane_count, pixel_clock, + pipe_config->port_clock, + intel_dp_bw_fec_overhead(pipe_config->fec_enable), + &pipe_config->dp_m2_n2); /* FIXME: abstract this better */ if (pipe_config->splitter.enable) @@ -2736,19 +2848,12 @@ intel_dp_audio_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - struct drm_connector *connector = conn_state->connector; - pipe_config->has_audio = intel_dp_has_audio(encoder, pipe_config, conn_state) && intel_audio_compute_config(encoder, pipe_config, conn_state); pipe_config->sdp_split_enable = pipe_config->has_audio && intel_dp_is_uhbr(pipe_config); - - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] SDP split enable: %s\n", - connector->base.id, connector->name, - str_yes_no(pipe_config->sdp_split_enable)); } int @@ -2761,7 +2866,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, struct intel_dp *intel_dp = enc_to_intel_dp(encoder); const struct drm_display_mode *fixed_mode; struct intel_connector *connector = intel_dp->attached_connector; - int ret = 0, link_bpp; + int ret = 0, link_bpp_x16; if (HAS_PCH_SPLIT(dev_priv) && !HAS_DDI(dev_priv) && encoder->port != PORT_A) pipe_config->has_pch_encoder = true; @@ -2810,10 +2915,10 @@ intel_dp_compute_config(struct intel_encoder *encoder, drm_dp_enhanced_frame_cap(intel_dp->dpcd); if (pipe_config->dsc.compression_enable) - link_bpp = pipe_config->dsc.compressed_bpp; + link_bpp_x16 = pipe_config->dsc.compressed_bpp_x16; else - link_bpp = intel_dp_output_bpp(pipe_config->output_format, - pipe_config->pipe_bpp); + link_bpp_x16 = to_bpp_x16(intel_dp_output_bpp(pipe_config->output_format, + pipe_config->pipe_bpp)); if (intel_dp->mso_link_count) { int n = intel_dp->mso_link_count; @@ -2837,12 +2942,12 @@ intel_dp_compute_config(struct intel_encoder *encoder, intel_dp_audio_compute_config(encoder, pipe_config, conn_state); - intel_link_compute_m_n(link_bpp, + intel_link_compute_m_n(link_bpp_x16, pipe_config->lane_count, adjusted_mode->crtc_clock, pipe_config->port_clock, - &pipe_config->dp_m_n, - pipe_config->fec_enable); + intel_dp_bw_fec_overhead(pipe_config->fec_enable), + &pipe_config->dp_m_n); /* FIXME: abstract this better */ if (pipe_config->splitter.enable) @@ -2853,7 +2958,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, intel_vrr_compute_config(pipe_config, conn_state); intel_psr_compute_config(intel_dp, pipe_config, conn_state); - intel_dp_drrs_compute_config(connector, pipe_config, link_bpp); + intel_dp_drrs_compute_config(connector, pipe_config, link_bpp_x16); intel_dp_compute_vsc_sdp(intel_dp, pipe_config, conn_state); intel_dp_compute_hdr_metadata_infoframe_sdp(intel_dp, pipe_config, conn_state); @@ -2921,25 +3026,180 @@ static bool downstream_hpd_needs_d0(struct intel_dp *intel_dp) intel_dp->downstream_ports[0] & DP_DS_PORT_HPD; } -void intel_dp_sink_set_decompression_state(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state, - bool enable) +static int +write_dsc_decompression_flag(struct drm_dp_aux *aux, u8 flag, bool set) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - int ret; + int err; + u8 val; - if (!crtc_state->dsc.compression_enable) - return; + err = drm_dp_dpcd_readb(aux, DP_DSC_ENABLE, &val); + if (err < 0) + return err; - ret = drm_dp_dpcd_writeb(&intel_dp->aux, DP_DSC_ENABLE, - enable ? DP_DECOMPRESSION_EN : 0); - if (ret < 0) + if (set) + val |= flag; + else + val &= ~flag; + + return drm_dp_dpcd_writeb(aux, DP_DSC_ENABLE, val); +} + +static void +intel_dp_sink_set_dsc_decompression(struct intel_connector *connector, + bool enable) +{ + struct drm_i915_private *i915 = to_i915(connector->base.dev); + + if (write_dsc_decompression_flag(connector->dp.dsc_decompression_aux, + DP_DECOMPRESSION_EN, enable) < 0) drm_dbg_kms(&i915->drm, "Failed to %s sink decompression state\n", str_enable_disable(enable)); } static void +intel_dp_sink_set_dsc_passthrough(const struct intel_connector *connector, + bool enable) +{ + struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct drm_dp_aux *aux = connector->port ? + connector->port->passthrough_aux : NULL; + + if (!aux) + return; + + if (write_dsc_decompression_flag(aux, + DP_DSC_PASSTHROUGH_EN, enable) < 0) + drm_dbg_kms(&i915->drm, + "Failed to %s sink compression passthrough state\n", + str_enable_disable(enable)); +} + +static int intel_dp_dsc_aux_ref_count(struct intel_atomic_state *state, + const struct intel_connector *connector, + bool for_get_ref) +{ + struct drm_i915_private *i915 = to_i915(state->base.dev); + struct drm_connector *_connector_iter; + struct drm_connector_state *old_conn_state; + struct drm_connector_state *new_conn_state; + int ref_count = 0; + int i; + + /* + * On SST the decompression AUX device won't be shared, each connector + * uses for this its own AUX targeting the sink device. + */ + if (!connector->mst_port) + return connector->dp.dsc_decompression_enabled ? 1 : 0; + + for_each_oldnew_connector_in_state(&state->base, _connector_iter, + old_conn_state, new_conn_state, i) { + const struct intel_connector * + connector_iter = to_intel_connector(_connector_iter); + + if (connector_iter->mst_port != connector->mst_port) + continue; + + if (!connector_iter->dp.dsc_decompression_enabled) + continue; + + drm_WARN_ON(&i915->drm, + (for_get_ref && !new_conn_state->crtc) || + (!for_get_ref && !old_conn_state->crtc)); + + if (connector_iter->dp.dsc_decompression_aux == + connector->dp.dsc_decompression_aux) + ref_count++; + } + + return ref_count; +} + +static bool intel_dp_dsc_aux_get_ref(struct intel_atomic_state *state, + struct intel_connector *connector) +{ + bool ret = intel_dp_dsc_aux_ref_count(state, connector, true) == 0; + + connector->dp.dsc_decompression_enabled = true; + + return ret; +} + +static bool intel_dp_dsc_aux_put_ref(struct intel_atomic_state *state, + struct intel_connector *connector) +{ + connector->dp.dsc_decompression_enabled = false; + + return intel_dp_dsc_aux_ref_count(state, connector, false) == 0; +} + +/** + * intel_dp_sink_enable_decompression - Enable DSC decompression in sink/last branch device + * @state: atomic state + * @connector: connector to enable the decompression for + * @new_crtc_state: new state for the CRTC driving @connector + * + * Enable the DSC decompression if required in the %DP_DSC_ENABLE DPCD + * register of the appropriate sink/branch device. On SST this is always the + * sink device, whereas on MST based on each device's DSC capabilities it's + * either the last branch device (enabling decompression in it) or both the + * last branch device (enabling passthrough in it) and the sink device + * (enabling decompression in it). + */ +void intel_dp_sink_enable_decompression(struct intel_atomic_state *state, + struct intel_connector *connector, + const struct intel_crtc_state *new_crtc_state) +{ + struct drm_i915_private *i915 = to_i915(state->base.dev); + + if (!new_crtc_state->dsc.compression_enable) + return; + + if (drm_WARN_ON(&i915->drm, + !connector->dp.dsc_decompression_aux || + connector->dp.dsc_decompression_enabled)) + return; + + if (!intel_dp_dsc_aux_get_ref(state, connector)) + return; + + intel_dp_sink_set_dsc_passthrough(connector, true); + intel_dp_sink_set_dsc_decompression(connector, true); +} + +/** + * intel_dp_sink_disable_decompression - Disable DSC decompression in sink/last branch device + * @state: atomic state + * @connector: connector to disable the decompression for + * @old_crtc_state: old state for the CRTC driving @connector + * + * Disable the DSC decompression if required in the %DP_DSC_ENABLE DPCD + * register of the appropriate sink/branch device, corresponding to the + * sequence in intel_dp_sink_enable_decompression(). + */ +void intel_dp_sink_disable_decompression(struct intel_atomic_state *state, + struct intel_connector *connector, + const struct intel_crtc_state *old_crtc_state) +{ + struct drm_i915_private *i915 = to_i915(state->base.dev); + + if (!old_crtc_state->dsc.compression_enable) + return; + + if (drm_WARN_ON(&i915->drm, + !connector->dp.dsc_decompression_aux || + !connector->dp.dsc_decompression_enabled)) + return; + + if (!intel_dp_dsc_aux_put_ref(state, connector)) + return; + + intel_dp_sink_set_dsc_decompression(connector, false); + intel_dp_sink_set_dsc_passthrough(connector, false); +} + +static void intel_edp_init_source_oui(struct intel_dp *intel_dp, bool careful) { struct drm_i915_private *i915 = dp_to_i915(intel_dp); @@ -3775,7 +4035,7 @@ intel_dp_can_mst(struct intel_dp *intel_dp) { struct drm_i915_private *i915 = dp_to_i915(intel_dp); - return i915->params.enable_dp_mst && + return i915->display.params.enable_dp_mst && intel_dp_mst_source_support(intel_dp) && drm_dp_read_mst_cap(&intel_dp->aux, intel_dp->dpcd); } @@ -3793,13 +4053,13 @@ intel_dp_configure_mst(struct intel_dp *intel_dp) encoder->base.base.id, encoder->base.name, str_yes_no(intel_dp_mst_source_support(intel_dp)), str_yes_no(sink_can_mst), - str_yes_no(i915->params.enable_dp_mst)); + str_yes_no(i915->display.params.enable_dp_mst)); if (!intel_dp_mst_source_support(intel_dp)) return; intel_dp->is_mst = sink_can_mst && - i915->params.enable_dp_mst; + i915->display.params.enable_dp_mst; drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, intel_dp->is_mst); @@ -3869,11 +4129,16 @@ static ssize_t intel_dp_vsc_sdp_pack(const struct drm_dp_vsc_sdp *vsc, sdp->sdp_header.HB2 = vsc->revision; /* Revision Number */ sdp->sdp_header.HB3 = vsc->length; /* Number of Valid Data Bytes */ + if (vsc->revision == 0x6) { + sdp->db[0] = 1; + sdp->db[3] = 1; + } + /* - * Only revision 0x5 supports Pixel Encoding/Colorimetry Format as - * per DP 1.4a spec. + * Revision 0x5 and revision 0x7 supports Pixel Encoding/Colorimetry + * Format as per DP 1.4a spec and DP 2.0 respectively. */ - if (vsc->revision != 0x5) + if (!(vsc->revision == 0x5 || vsc->revision == 0x7)) goto out; /* VSC SDP Payload for DB16 through DB18 */ @@ -4053,7 +4318,10 @@ void intel_dp_set_infoframes(struct intel_encoder *encoder, VIDEO_DIP_ENABLE_SPD_HSW | VIDEO_DIP_ENABLE_DRM_GLK; u32 val = intel_de_read(dev_priv, reg) & ~dip_enable; - /* TODO: Add DSC case (DIP_ENABLE_PPS) */ + /* TODO: Sanitize DSC enabling wrt. intel_dsc_dp_pps_write(). */ + if (!enable && HAS_DSC(dev_priv)) + val &= ~VDIP_ENABLE_PPS; + /* When PSR is enabled, this routine doesn't disable VSC DIP */ if (!crtc_state->has_psr) val &= ~VIDEO_DIP_ENABLE_VSC_HSW; @@ -5413,6 +5681,7 @@ intel_dp_detect(struct drm_connector *connector, if (status == connector_status_disconnected) { memset(&intel_dp->compliance, 0, sizeof(intel_dp->compliance)); memset(intel_connector->dp.dsc_dpcd, 0, sizeof(intel_connector->dp.dsc_dpcd)); + intel_dp->psr.sink_panel_replay_support = false; if (intel_dp->is_mst) { drm_dbg_kms(&dev_priv->drm, @@ -6258,16 +6527,6 @@ intel_dp_init_connector(struct intel_digital_port *dig_port, "HDCP init failed, skipping.\n"); } - /* For G4X desktop chip, PEG_BAND_GAP_DATA 3:0 must first be written - * 0xd. Failure to do so will result in spurious interrupts being - * generated on the port when a cable is not attached. - */ - if (IS_G45(dev_priv)) { - u32 temp = intel_de_read(dev_priv, PEG_BAND_GAP_DATA); - intel_de_write(dev_priv, PEG_BAND_GAP_DATA, - (temp & ~0xf) | 0xd); - } - intel_dp->frl.is_trained = false; intel_dp->frl.trained_rate_gbps = 0; diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 484aea215a25..05db46b111f2 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -57,9 +57,12 @@ int intel_dp_retrain_link(struct intel_encoder *encoder, void intel_dp_set_power(struct intel_dp *intel_dp, u8 mode); void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state); -void intel_dp_sink_set_decompression_state(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state, - bool enable); +void intel_dp_sink_enable_decompression(struct intel_atomic_state *state, + struct intel_connector *connector, + const struct intel_crtc_state *new_crtc_state); +void intel_dp_sink_disable_decompression(struct intel_atomic_state *state, + struct intel_connector *connector, + const struct intel_crtc_state *old_crtc_state); void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder); void intel_dp_encoder_shutdown(struct intel_encoder *intel_encoder); void intel_dp_encoder_flush_work(struct drm_encoder *encoder); @@ -78,6 +81,8 @@ void intel_dp_audio_compute_config(struct intel_encoder *encoder, bool intel_dp_has_hdmi_sink(struct intel_dp *intel_dp); bool intel_dp_is_edp(struct intel_dp *intel_dp); bool intel_dp_is_uhbr(const struct intel_crtc_state *crtc_state); +int intel_dp_link_symbol_size(int rate); +int intel_dp_link_symbol_clock(int rate); bool intel_dp_is_port_edp(struct drm_i915_private *dev_priv, enum port port); enum irqreturn intel_dp_hpd_pulse(struct intel_digital_port *dig_port, bool long_hpd); @@ -98,6 +103,8 @@ bool intel_dp_source_supports_tps4(struct drm_i915_private *i915); bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp); int intel_dp_link_required(int pixel_clock, int bpp); +int intel_dp_effective_data_rate(int pixel_clock, int bpp_x16, + int bw_overhead); int intel_dp_max_data_rate(int max_link_rate, int max_lanes); bool intel_dp_can_bigjoiner(struct intel_dp *intel_dp); bool intel_dp_needs_vsc_sdp(const struct intel_crtc_state *crtc_state, @@ -125,6 +132,10 @@ u16 intel_dp_dsc_get_max_compressed_bpp(struct drm_i915_private *i915, enum intel_output_format output_format, u32 pipe_bpp, u32 timeslots); +int intel_dp_dsc_sink_min_compressed_bpp(struct intel_crtc_state *pipe_config); +int intel_dp_dsc_sink_max_compressed_bpp(const struct intel_connector *connector, + struct intel_crtc_state *pipe_config, + int bpc); u8 intel_dp_dsc_get_slice_count(const struct intel_connector *connector, int mode_clock, int mode_hdisplay, bool bigjoiner); @@ -136,7 +147,16 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count) return ~((1 << lane_count) - 1) & 0xf; } +bool intel_dp_supports_fec(struct intel_dp *intel_dp, + const struct intel_connector *connector, + const struct intel_crtc_state *pipe_config); u32 intel_dp_mode_to_fec_clock(u32 mode_clock); +int intel_dp_bw_fec_overhead(bool fec_enabled); + +bool intel_dp_supports_fec(struct intel_dp *intel_dp, + const struct intel_connector *connector, + const struct intel_crtc_state *pipe_config); + u32 intel_dp_dsc_nearest_valid_bpp(struct drm_i915_private *i915, u32 bpp, u32 pipe_bpp); void intel_ddi_update_pipe(struct intel_atomic_state *state, diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c index 4431b6290c4c..2e2af71bcd5a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c @@ -74,7 +74,7 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp) static u32 g4x_get_aux_clock_divider(struct intel_dp *intel_dp, int index) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + struct drm_i915_private *i915 = dp_to_i915(intel_dp); if (index) return 0; @@ -83,12 +83,12 @@ static u32 g4x_get_aux_clock_divider(struct intel_dp *intel_dp, int index) * The clock divider is based off the hrawclk, and would like to run at * 2MHz. So, take the hrawclk value and divide by 2000 and use that */ - return DIV_ROUND_CLOSEST(RUNTIME_INFO(dev_priv)->rawclk_freq, 2000); + return DIV_ROUND_CLOSEST(RUNTIME_INFO(i915)->rawclk_freq, 2000); } static u32 ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + struct drm_i915_private *i915 = dp_to_i915(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); u32 freq; @@ -101,18 +101,18 @@ static u32 ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) * divide by 2000 and use that */ if (dig_port->aux_ch == AUX_CH_A) - freq = dev_priv->display.cdclk.hw.cdclk; + freq = i915->display.cdclk.hw.cdclk; else - freq = RUNTIME_INFO(dev_priv)->rawclk_freq; + freq = RUNTIME_INFO(i915)->rawclk_freq; return DIV_ROUND_CLOSEST(freq, 2000); } static u32 hsw_get_aux_clock_divider(struct intel_dp *intel_dp, int index) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + struct drm_i915_private *i915 = dp_to_i915(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - if (dig_port->aux_ch != AUX_CH_A && HAS_PCH_LPT_H(dev_priv)) { + if (dig_port->aux_ch != AUX_CH_A && HAS_PCH_LPT_H(i915)) { /* Workaround for non-ULT HSW */ switch (index) { case 0: return 63; @@ -165,12 +165,11 @@ static u32 g4x_get_aux_send_ctl(struct intel_dp *intel_dp, u32 aux_clock_divider) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = - to_i915(dig_port->base.base.dev); + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); u32 timeout; /* Max timeout value on G4x-BDW: 1.6ms */ - if (IS_BROADWELL(dev_priv)) + if (IS_BROADWELL(i915)) timeout = DP_AUX_CH_CTL_TIME_OUT_600us; else timeout = DP_AUX_CH_CTL_TIME_OUT_400us; @@ -229,8 +228,7 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, u32 aux_send_ctl_flags) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *i915 = - to_i915(dig_port->base.base.dev); + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); enum phy phy = intel_port_to_phy(i915, dig_port->base.port); bool is_tc_port = intel_phy_is_tc(i915, phy); i915_reg_t ch_ctl, ch_data[5]; @@ -531,9 +529,40 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) return ret; } +static i915_reg_t vlv_aux_ctl_reg(struct intel_dp *intel_dp) +{ + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + enum aux_ch aux_ch = dig_port->aux_ch; + + switch (aux_ch) { + case AUX_CH_B: + case AUX_CH_C: + case AUX_CH_D: + return VLV_DP_AUX_CH_CTL(aux_ch); + default: + MISSING_CASE(aux_ch); + return VLV_DP_AUX_CH_CTL(AUX_CH_B); + } +} + +static i915_reg_t vlv_aux_data_reg(struct intel_dp *intel_dp, int index) +{ + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + enum aux_ch aux_ch = dig_port->aux_ch; + + switch (aux_ch) { + case AUX_CH_B: + case AUX_CH_C: + case AUX_CH_D: + return VLV_DP_AUX_CH_DATA(aux_ch, index); + default: + MISSING_CASE(aux_ch); + return VLV_DP_AUX_CH_DATA(AUX_CH_B, index); + } +} + static i915_reg_t g4x_aux_ctl_reg(struct intel_dp *intel_dp) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); enum aux_ch aux_ch = dig_port->aux_ch; @@ -550,7 +579,6 @@ static i915_reg_t g4x_aux_ctl_reg(struct intel_dp *intel_dp) static i915_reg_t g4x_aux_data_reg(struct intel_dp *intel_dp, int index) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); enum aux_ch aux_ch = dig_port->aux_ch; @@ -567,7 +595,6 @@ static i915_reg_t g4x_aux_data_reg(struct intel_dp *intel_dp, int index) static i915_reg_t ilk_aux_ctl_reg(struct intel_dp *intel_dp) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); enum aux_ch aux_ch = dig_port->aux_ch; @@ -586,7 +613,6 @@ static i915_reg_t ilk_aux_ctl_reg(struct intel_dp *intel_dp) static i915_reg_t ilk_aux_data_reg(struct intel_dp *intel_dp, int index) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); enum aux_ch aux_ch = dig_port->aux_ch; @@ -605,7 +631,6 @@ static i915_reg_t ilk_aux_data_reg(struct intel_dp *intel_dp, int index) static i915_reg_t skl_aux_ctl_reg(struct intel_dp *intel_dp) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); enum aux_ch aux_ch = dig_port->aux_ch; @@ -625,7 +650,6 @@ static i915_reg_t skl_aux_ctl_reg(struct intel_dp *intel_dp) static i915_reg_t skl_aux_data_reg(struct intel_dp *intel_dp, int index) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); enum aux_ch aux_ch = dig_port->aux_ch; @@ -645,7 +669,6 @@ static i915_reg_t skl_aux_data_reg(struct intel_dp *intel_dp, int index) static i915_reg_t tgl_aux_ctl_reg(struct intel_dp *intel_dp) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); enum aux_ch aux_ch = dig_port->aux_ch; @@ -668,7 +691,6 @@ static i915_reg_t tgl_aux_ctl_reg(struct intel_dp *intel_dp) static i915_reg_t tgl_aux_data_reg(struct intel_dp *intel_dp, int index) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); enum aux_ch aux_ch = dig_port->aux_ch; @@ -691,7 +713,7 @@ static i915_reg_t tgl_aux_data_reg(struct intel_dp *intel_dp, int index) static i915_reg_t xelpdp_aux_ctl_reg(struct intel_dp *intel_dp) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + struct drm_i915_private *i915 = dp_to_i915(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); enum aux_ch aux_ch = dig_port->aux_ch; @@ -702,16 +724,16 @@ static i915_reg_t xelpdp_aux_ctl_reg(struct intel_dp *intel_dp) case AUX_CH_USBC2: case AUX_CH_USBC3: case AUX_CH_USBC4: - return XELPDP_DP_AUX_CH_CTL(dev_priv, aux_ch); + return XELPDP_DP_AUX_CH_CTL(i915, aux_ch); default: MISSING_CASE(aux_ch); - return XELPDP_DP_AUX_CH_CTL(dev_priv, AUX_CH_A); + return XELPDP_DP_AUX_CH_CTL(i915, AUX_CH_A); } } static i915_reg_t xelpdp_aux_data_reg(struct intel_dp *intel_dp, int index) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + struct drm_i915_private *i915 = dp_to_i915(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); enum aux_ch aux_ch = dig_port->aux_ch; @@ -722,10 +744,10 @@ static i915_reg_t xelpdp_aux_data_reg(struct intel_dp *intel_dp, int index) case AUX_CH_USBC2: case AUX_CH_USBC3: case AUX_CH_USBC4: - return XELPDP_DP_AUX_CH_DATA(dev_priv, aux_ch, index); + return XELPDP_DP_AUX_CH_DATA(i915, aux_ch, index); default: MISSING_CASE(aux_ch); - return XELPDP_DP_AUX_CH_DATA(dev_priv, AUX_CH_A, index); + return XELPDP_DP_AUX_CH_DATA(i915, AUX_CH_A, index); } } @@ -739,49 +761,52 @@ void intel_dp_aux_fini(struct intel_dp *intel_dp) void intel_dp_aux_init(struct intel_dp *intel_dp) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + struct drm_i915_private *i915 = dp_to_i915(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct intel_encoder *encoder = &dig_port->base; enum aux_ch aux_ch = dig_port->aux_ch; char buf[AUX_CH_NAME_BUFSIZE]; - if (DISPLAY_VER(dev_priv) >= 14) { + if (DISPLAY_VER(i915) >= 14) { intel_dp->aux_ch_ctl_reg = xelpdp_aux_ctl_reg; intel_dp->aux_ch_data_reg = xelpdp_aux_data_reg; - } else if (DISPLAY_VER(dev_priv) >= 12) { + } else if (DISPLAY_VER(i915) >= 12) { intel_dp->aux_ch_ctl_reg = tgl_aux_ctl_reg; intel_dp->aux_ch_data_reg = tgl_aux_data_reg; - } else if (DISPLAY_VER(dev_priv) >= 9) { + } else if (DISPLAY_VER(i915) >= 9) { intel_dp->aux_ch_ctl_reg = skl_aux_ctl_reg; intel_dp->aux_ch_data_reg = skl_aux_data_reg; - } else if (HAS_PCH_SPLIT(dev_priv)) { + } else if (HAS_PCH_SPLIT(i915)) { intel_dp->aux_ch_ctl_reg = ilk_aux_ctl_reg; intel_dp->aux_ch_data_reg = ilk_aux_data_reg; + } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + intel_dp->aux_ch_ctl_reg = vlv_aux_ctl_reg; + intel_dp->aux_ch_data_reg = vlv_aux_data_reg; } else { intel_dp->aux_ch_ctl_reg = g4x_aux_ctl_reg; intel_dp->aux_ch_data_reg = g4x_aux_data_reg; } - if (DISPLAY_VER(dev_priv) >= 9) + if (DISPLAY_VER(i915) >= 9) intel_dp->get_aux_clock_divider = skl_get_aux_clock_divider; - else if (IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) + else if (IS_BROADWELL(i915) || IS_HASWELL(i915)) intel_dp->get_aux_clock_divider = hsw_get_aux_clock_divider; - else if (HAS_PCH_SPLIT(dev_priv)) + else if (HAS_PCH_SPLIT(i915)) intel_dp->get_aux_clock_divider = ilk_get_aux_clock_divider; else intel_dp->get_aux_clock_divider = g4x_get_aux_clock_divider; - if (DISPLAY_VER(dev_priv) >= 9) + if (DISPLAY_VER(i915) >= 9) intel_dp->get_aux_send_ctl = skl_get_aux_send_ctl; else intel_dp->get_aux_send_ctl = g4x_get_aux_send_ctl; - intel_dp->aux.drm_dev = &dev_priv->drm; + intel_dp->aux.drm_dev = &i915->drm; drm_dp_aux_init(&intel_dp->aux); /* Failure to allocate our preferred name is not critical */ intel_dp->aux.name = kasprintf(GFP_KERNEL, "AUX %s/%s", - aux_ch_name(dev_priv, buf, sizeof(buf), aux_ch), + aux_ch_name(i915, buf, sizeof(buf), aux_ch), encoder->base.name); intel_dp->aux.transfer = intel_dp_aux_transfer; diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 26ea7e9f1b89..4f58efdc688a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -146,7 +146,7 @@ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector) * HDR static metadata we need to start maintaining table of * ranges for such panels. */ - if (i915->params.enable_dpcd_backlight != INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL && + if (i915->display.params.enable_dpcd_backlight != INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL && !(connector->base.hdr_sink_metadata.hdmi_type1.metadata_type & BIT(HDMI_STATIC_METADATA_TYPE1))) { drm_info(&i915->drm, @@ -489,7 +489,7 @@ int intel_dp_aux_init_backlight_funcs(struct intel_connector *connector) /* Check the VBT and user's module parameters to figure out which * interfaces to probe */ - switch (i915->params.enable_dpcd_backlight) { + switch (i915->display.params.enable_dpcd_backlight) { case INTEL_DP_AUX_BACKLIGHT_OFF: return -ENODEV; case INTEL_DP_AUX_BACKLIGHT_AUTO: diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_regs.h b/drivers/gpu/drm/i915/display/intel_dp_aux_regs.h index 34f6e0a48ed2..e642445364d2 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_regs.h +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_regs.h @@ -21,13 +21,14 @@ #define __xe2lpd_aux_ch_idx(aux_ch) \ (aux_ch >= AUX_CH_USBC1 ? aux_ch : AUX_CH_USBC4 + 1 + (aux_ch) - AUX_CH_A) -/* TODO: Remove implicit dev_priv */ -#define _DPA_AUX_CH_CTL (DISPLAY_MMIO_BASE(dev_priv) + 0x64010) -#define _DPB_AUX_CH_CTL (DISPLAY_MMIO_BASE(dev_priv) + 0x64110) +#define _DPA_AUX_CH_CTL 0x64010 +#define _DPB_AUX_CH_CTL 0x64110 #define _XELPDP_USBC1_AUX_CH_CTL 0x16f210 #define _XELPDP_USBC2_AUX_CH_CTL 0x16f410 #define DP_AUX_CH_CTL(aux_ch) _MMIO_PORT(aux_ch, _DPA_AUX_CH_CTL, \ _DPB_AUX_CH_CTL) +#define VLV_DP_AUX_CH_CTL(aux_ch) _MMIO(VLV_DISPLAY_BASE + \ + _PORT(aux_ch, _DPA_AUX_CH_CTL, _DPB_AUX_CH_CTL)) #define _XELPDP_DP_AUX_CH_CTL(aux_ch) \ _MMIO(_PICK_EVEN_2RANGES(aux_ch, AUX_CH_USBC1, \ _DPA_AUX_CH_CTL, _DPB_AUX_CH_CTL, \ @@ -69,13 +70,14 @@ #define DP_AUX_CH_CTL_SYNC_PULSE_SKL_MASK REG_GENMASK(4, 0) /* skl+ */ #define DP_AUX_CH_CTL_SYNC_PULSE_SKL(c) REG_FIELD_PREP(DP_AUX_CH_CTL_SYNC_PULSE_SKL_MASK, (c) - 1) -/* TODO: Remove implicit dev_priv */ -#define _DPA_AUX_CH_DATA1 (DISPLAY_MMIO_BASE(dev_priv) + 0x64014) -#define _DPB_AUX_CH_DATA1 (DISPLAY_MMIO_BASE(dev_priv) + 0x64114) +#define _DPA_AUX_CH_DATA1 0x64014 +#define _DPB_AUX_CH_DATA1 0x64114 #define _XELPDP_USBC1_AUX_CH_DATA1 0x16f214 #define _XELPDP_USBC2_AUX_CH_DATA1 0x16f414 #define DP_AUX_CH_DATA(aux_ch, i) _MMIO(_PORT(aux_ch, _DPA_AUX_CH_DATA1, \ _DPB_AUX_CH_DATA1) + (i) * 4) /* 5 registers */ +#define VLV_DP_AUX_CH_DATA(aux_ch, i) _MMIO(VLV_DISPLAY_BASE + _PORT(aux_ch, _DPA_AUX_CH_DATA1, \ + _DPB_AUX_CH_DATA1) + (i) * 4) /* 5 registers */ #define _XELPDP_DP_AUX_CH_DATA(aux_ch, i) \ _MMIO(_PICK_EVEN_2RANGES(aux_ch, AUX_CH_USBC1, \ _DPA_AUX_CH_DATA1, _DPB_AUX_CH_DATA1, \ diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index aa1061262613..8a9432335030 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -26,6 +26,7 @@ #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_edid.h> +#include <drm/drm_fixed.h> #include <drm/drm_probe_helper.h> #include "i915_drv.h" @@ -43,6 +44,9 @@ #include "intel_dpio_phy.h" #include "intel_hdcp.h" #include "intel_hotplug.h" +#include "intel_link_bw.h" +#include "intel_psr.h" +#include "intel_vdsc.h" #include "skl_scaler.h" static int intel_dp_mst_check_constraints(struct drm_i915_private *i915, int bpp, @@ -50,7 +54,7 @@ static int intel_dp_mst_check_constraints(struct drm_i915_private *i915, int bpp struct intel_crtc_state *crtc_state, bool dsc) { - if (intel_dp_is_uhbr(crtc_state) && DISPLAY_VER(i915) <= 13 && dsc) { + if (intel_dp_is_uhbr(crtc_state) && DISPLAY_VER(i915) < 14 && dsc) { int output_bpp = bpp; /* DisplayPort 2 128b/132b, bits per lane is always 32 */ int symbol_clock = crtc_state->port_clock / 32; @@ -66,6 +70,73 @@ static int intel_dp_mst_check_constraints(struct drm_i915_private *i915, int bpp return 0; } +static int intel_dp_mst_bw_overhead(const struct intel_crtc_state *crtc_state, + const struct intel_connector *connector, + bool ssc, bool dsc, int bpp_x16) +{ + const struct drm_display_mode *adjusted_mode = + &crtc_state->hw.adjusted_mode; + unsigned long flags = DRM_DP_BW_OVERHEAD_MST; + int dsc_slice_count = 0; + int overhead; + + flags |= intel_dp_is_uhbr(crtc_state) ? DRM_DP_BW_OVERHEAD_UHBR : 0; + flags |= ssc ? DRM_DP_BW_OVERHEAD_SSC_REF_CLK : 0; + flags |= crtc_state->fec_enable ? DRM_DP_BW_OVERHEAD_FEC : 0; + + if (dsc) { + flags |= DRM_DP_BW_OVERHEAD_DSC; + /* TODO: add support for bigjoiner */ + dsc_slice_count = intel_dp_dsc_get_slice_count(connector, + adjusted_mode->clock, + adjusted_mode->hdisplay, + false); + } + + overhead = drm_dp_bw_overhead(crtc_state->lane_count, + adjusted_mode->hdisplay, + dsc_slice_count, + bpp_x16, + flags); + + /* + * TODO: clarify whether a minimum required by the fixed FEC overhead + * in the bspec audio programming sequence is required here. + */ + return max(overhead, intel_dp_bw_fec_overhead(crtc_state->fec_enable)); +} + +static void intel_dp_mst_compute_m_n(const struct intel_crtc_state *crtc_state, + const struct intel_connector *connector, + int overhead, + int bpp_x16, + struct intel_link_m_n *m_n) +{ + const struct drm_display_mode *adjusted_mode = + &crtc_state->hw.adjusted_mode; + + /* TODO: Check WA 14013163432 to set data M/N for full BW utilization. */ + intel_link_compute_m_n(bpp_x16, crtc_state->lane_count, + adjusted_mode->crtc_clock, + crtc_state->port_clock, + overhead, + m_n); + + m_n->tu = DIV_ROUND_UP_ULL(mul_u32_u32(m_n->data_m, 64), m_n->data_n); +} + +static int intel_dp_mst_calc_pbn(int pixel_clock, int bpp_x16, int bw_overhead) +{ + int effective_data_rate = + intel_dp_effective_data_rate(pixel_clock, bpp_x16, bw_overhead); + + /* + * TODO: Use drm_dp_calc_pbn_mode() instead, once it's converted + * to calculate PBN with the BW overhead passed to it. + */ + return DIV_ROUND_UP(effective_data_rate * 64, 54 * 1000); +} + static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, int max_bpp, @@ -94,20 +165,67 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder, crtc_state->lane_count = limits->max_lane_count; crtc_state->port_clock = limits->max_rate; + if (dsc) { + if (!intel_dp_supports_fec(intel_dp, connector, crtc_state)) + return -EINVAL; + + crtc_state->fec_enable = !intel_dp_is_uhbr(crtc_state); + } + mst_state->pbn_div = drm_dp_get_vc_payload_bw(&intel_dp->mst_mgr, crtc_state->port_clock, crtc_state->lane_count); + drm_dbg_kms(&i915->drm, "Looking for slots in range min bpp %d max bpp %d\n", + min_bpp, max_bpp); + for (bpp = max_bpp; bpp >= min_bpp; bpp -= step) { + int local_bw_overhead; + int remote_bw_overhead; + int link_bpp_x16; + int remote_tu; + drm_dbg_kms(&i915->drm, "Trying bpp %d\n", bpp); ret = intel_dp_mst_check_constraints(i915, bpp, adjusted_mode, crtc_state, dsc); if (ret) continue; - crtc_state->pbn = drm_dp_calc_pbn_mode(adjusted_mode->crtc_clock, - dsc ? bpp << 4 : bpp, - dsc); + link_bpp_x16 = to_bpp_x16(dsc ? bpp : + intel_dp_output_bpp(crtc_state->output_format, bpp)); + + local_bw_overhead = intel_dp_mst_bw_overhead(crtc_state, connector, + false, dsc, link_bpp_x16); + remote_bw_overhead = intel_dp_mst_bw_overhead(crtc_state, connector, + true, dsc, link_bpp_x16); + + intel_dp_mst_compute_m_n(crtc_state, connector, + local_bw_overhead, + link_bpp_x16, + &crtc_state->dp_m_n); + + /* + * The TU size programmed to the HW determines which slots in + * an MTP frame are used for this stream, which needs to match + * the payload size programmed to the first downstream branch + * device's payload table. + * + * Note that atm the payload's PBN value DRM core sends via + * the ALLOCATE_PAYLOAD side-band message matches the payload + * size (which it calculates from the PBN value) it programs + * to the first branch device's payload table. The allocation + * in the payload table could be reduced though (to + * crtc_state->dp_m_n.tu), provided that the driver doesn't + * enable SSC on the corresponding link. + */ + crtc_state->pbn = intel_dp_mst_calc_pbn(adjusted_mode->crtc_clock, + link_bpp_x16, + remote_bw_overhead); + + remote_tu = DIV_ROUND_UP(dfixed_const(crtc_state->pbn), mst_state->pbn_div.full); + + drm_WARN_ON(&i915->drm, remote_tu < crtc_state->dp_m_n.tu); + crtc_state->dp_m_n.tu = remote_tu; slots = drm_dp_atomic_find_time_slots(state, &intel_dp->mst_mgr, connector->port, @@ -116,13 +234,9 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder, return slots; if (slots >= 0) { - ret = drm_dp_mst_atomic_check(state); - /* - * If we got slots >= 0 and we can fit those based on check - * then we can exit the loop. Otherwise keep trying. - */ - if (!ret) - break; + drm_WARN_ON(&i915->drm, slots != crtc_state->dp_m_n.tu); + + break; } } @@ -137,7 +251,7 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder, if (!dsc) crtc_state->pipe_bpp = bpp; else - crtc_state->dsc.compressed_bpp = bpp; + crtc_state->dsc.compressed_bpp_x16 = to_bpp_x16(bpp); drm_dbg_kms(&i915->drm, "Got %d slots for pipe bpp %d dsc %d\n", slots, bpp, dsc); } @@ -149,10 +263,7 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, struct drm_connector_state *conn_state, struct link_config_limits *limits) { - const struct drm_display_mode *adjusted_mode = - &crtc_state->hw.adjusted_mode; int slots = -EINVAL; - int link_bpp; /* * FIXME: allocate the BW according to link_bpp, which in the case of @@ -167,16 +278,6 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, if (slots < 0) return slots; - link_bpp = intel_dp_output_bpp(crtc_state->output_format, crtc_state->pipe_bpp); - - intel_link_compute_m_n(link_bpp, - crtc_state->lane_count, - adjusted_mode->crtc_clock, - crtc_state->port_clock, - &crtc_state->dp_m_n, - crtc_state->fec_enable); - crtc_state->dp_m_n.tu = slots; - return 0; } @@ -188,15 +289,12 @@ static int intel_dp_dsc_mst_compute_link_config(struct intel_encoder *encoder, struct intel_connector *connector = to_intel_connector(conn_state->connector); struct drm_i915_private *i915 = to_i915(connector->base.dev); - const struct drm_display_mode *adjusted_mode = - &crtc_state->hw.adjusted_mode; int slots = -EINVAL; int i, num_bpc; u8 dsc_bpc[3] = {}; int min_bpp, max_bpp, sink_min_bpp, sink_max_bpp; u8 dsc_max_bpc; - bool need_timeslot_recalc = false; - u32 last_compressed_bpp; + int min_compressed_bpp, max_compressed_bpp; /* Max DSC Input BPC for ICL is 10 and for TGL+ is 12 */ if (DISPLAY_VER(i915) >= 12) @@ -232,45 +330,31 @@ static int intel_dp_dsc_mst_compute_link_config(struct intel_encoder *encoder, if (max_bpp > sink_max_bpp) max_bpp = sink_max_bpp; - min_bpp = max(min_bpp, to_bpp_int_roundup(limits->link.min_bpp_x16)); - max_bpp = min(max_bpp, to_bpp_int(limits->link.max_bpp_x16)); + max_compressed_bpp = intel_dp_dsc_sink_max_compressed_bpp(connector, + crtc_state, + max_bpp / 3); + max_compressed_bpp = min(max_compressed_bpp, + to_bpp_int(limits->link.max_bpp_x16)); - slots = intel_dp_mst_find_vcpi_slots_for_bpp(encoder, crtc_state, max_bpp, - min_bpp, limits, - conn_state, 2 * 3, true); + min_compressed_bpp = intel_dp_dsc_sink_min_compressed_bpp(crtc_state); + min_compressed_bpp = max(min_compressed_bpp, + to_bpp_int_roundup(limits->link.min_bpp_x16)); - if (slots < 0) - return slots; + drm_dbg_kms(&i915->drm, "DSC Sink supported compressed min bpp %d compressed max bpp %d\n", + min_compressed_bpp, max_compressed_bpp); - last_compressed_bpp = crtc_state->dsc.compressed_bpp; + /* Align compressed bpps according to our own constraints */ + max_compressed_bpp = intel_dp_dsc_nearest_valid_bpp(i915, max_compressed_bpp, + crtc_state->pipe_bpp); + min_compressed_bpp = intel_dp_dsc_nearest_valid_bpp(i915, min_compressed_bpp, + crtc_state->pipe_bpp); - crtc_state->dsc.compressed_bpp = intel_dp_dsc_nearest_valid_bpp(i915, - last_compressed_bpp, - crtc_state->pipe_bpp); + slots = intel_dp_mst_find_vcpi_slots_for_bpp(encoder, crtc_state, max_compressed_bpp, + min_compressed_bpp, limits, + conn_state, 1, true); - if (crtc_state->dsc.compressed_bpp != last_compressed_bpp) - need_timeslot_recalc = true; - - /* - * Apparently some MST hubs dislike if vcpi slots are not matching precisely - * the actual compressed bpp we use. - */ - if (need_timeslot_recalc) { - slots = intel_dp_mst_find_vcpi_slots_for_bpp(encoder, crtc_state, - crtc_state->dsc.compressed_bpp, - crtc_state->dsc.compressed_bpp, - limits, conn_state, 2 * 3, true); - if (slots < 0) - return slots; - } - - intel_link_compute_m_n(crtc_state->dsc.compressed_bpp, - crtc_state->lane_count, - adjusted_mode->crtc_clock, - crtc_state->port_clock, - &crtc_state->dp_m_n, - crtc_state->fec_enable); - crtc_state->dp_m_n.tu = slots; + if (slots < 0) + return slots; return 0; } @@ -298,7 +382,102 @@ static int intel_dp_mst_update_slots(struct intel_encoder *encoder, } static bool +intel_dp_mst_dsc_source_support(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + + /* + * FIXME: Enabling DSC on ICL results in blank screen and FIFO pipe / + * transcoder underruns, re-enable DSC after fixing this issue. + */ + return DISPLAY_VER(i915) >= 12 && intel_dsc_source_support(crtc_state); +} + +static int mode_hblank_period_ns(const struct drm_display_mode *mode) +{ + return DIV_ROUND_CLOSEST_ULL(mul_u32_u32(mode->htotal - mode->hdisplay, + NSEC_PER_SEC / 1000), + mode->crtc_clock); +} + +static bool +hblank_expansion_quirk_needs_dsc(const struct intel_connector *connector, + const struct intel_crtc_state *crtc_state) +{ + const struct drm_display_mode *adjusted_mode = + &crtc_state->hw.adjusted_mode; + + if (!connector->dp.dsc_hblank_expansion_quirk) + return false; + + if (mode_hblank_period_ns(adjusted_mode) > 300) + return false; + + return true; +} + +static bool +adjust_limits_for_dsc_hblank_expansion_quirk(const struct intel_connector *connector, + const struct intel_crtc_state *crtc_state, + struct link_config_limits *limits, + bool dsc) +{ + struct drm_i915_private *i915 = to_i915(connector->base.dev); + const struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + int min_bpp_x16 = limits->link.min_bpp_x16; + + if (!hblank_expansion_quirk_needs_dsc(connector, crtc_state)) + return true; + + if (!dsc) { + if (intel_dp_mst_dsc_source_support(crtc_state)) { + drm_dbg_kms(&i915->drm, + "[CRTC:%d:%s][CONNECTOR:%d:%s] DSC needed by hblank expansion quirk\n", + crtc->base.base.id, crtc->base.name, + connector->base.base.id, connector->base.name); + return false; + } + + drm_dbg_kms(&i915->drm, + "[CRTC:%d:%s][CONNECTOR:%d:%s] Increasing link min bpp to 24 due to hblank expansion quirk\n", + crtc->base.base.id, crtc->base.name, + connector->base.base.id, connector->base.name); + + if (limits->link.max_bpp_x16 < to_bpp_x16(24)) + return false; + + limits->link.min_bpp_x16 = to_bpp_x16(24); + + return true; + } + + drm_WARN_ON(&i915->drm, limits->min_rate != limits->max_rate); + + if (limits->max_rate < 540000) + min_bpp_x16 = to_bpp_x16(13); + else if (limits->max_rate < 810000) + min_bpp_x16 = to_bpp_x16(10); + + if (limits->link.min_bpp_x16 >= min_bpp_x16) + return true; + + drm_dbg_kms(&i915->drm, + "[CRTC:%d:%s][CONNECTOR:%d:%s] Increasing link min bpp to " BPP_X16_FMT " in DSC mode due to hblank expansion quirk\n", + crtc->base.base.id, crtc->base.name, + connector->base.base.id, connector->base.name, + BPP_X16_ARGS(min_bpp_x16)); + + if (limits->link.max_bpp_x16 < min_bpp_x16) + return false; + + limits->link.min_bpp_x16 = min_bpp_x16; + + return true; +} + +static bool intel_dp_mst_compute_config_limits(struct intel_dp *intel_dp, + const struct intel_connector *connector, struct intel_crtc_state *crtc_state, bool dsc, struct link_config_limits *limits) @@ -326,10 +505,16 @@ intel_dp_mst_compute_config_limits(struct intel_dp *intel_dp, intel_dp_adjust_compliance_config(intel_dp, crtc_state, limits); - return intel_dp_compute_config_link_bpp_limits(intel_dp, - crtc_state, - dsc, - limits); + if (!intel_dp_compute_config_link_bpp_limits(intel_dp, + crtc_state, + dsc, + limits)) + return false; + + return adjust_limits_for_dsc_hblank_expansion_quirk(connector, + crtc_state, + limits, + dsc); } static int intel_dp_mst_compute_config(struct intel_encoder *encoder, @@ -339,12 +524,18 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); struct intel_dp *intel_dp = &intel_mst->primary->dp; + const struct intel_connector *connector = + to_intel_connector(conn_state->connector); const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; struct link_config_limits limits; bool dsc_needed; int ret = 0; + if (pipe_config->fec_enable && + !intel_dp_supports_fec(intel_dp, connector, pipe_config)) + return -EINVAL; + if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) return -EINVAL; @@ -354,6 +545,7 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder, dsc_needed = intel_dp->force_dsc_en || !intel_dp_mst_compute_config_limits(intel_dp, + connector, pipe_config, false, &limits); @@ -375,7 +567,11 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder, str_yes_no(ret), str_yes_no(intel_dp->force_dsc_en)); + if (!intel_dp_mst_dsc_source_support(pipe_config)) + return -EINVAL; + if (!intel_dp_mst_compute_config_limits(intel_dp, + connector, pipe_config, true, &limits)) @@ -418,7 +614,9 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder, intel_dp_audio_compute_config(encoder, pipe_config, conn_state); - intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); + intel_ddi_compute_min_voltage_level(pipe_config); + + intel_psr_compute_config(intel_dp, pipe_config, conn_state); return 0; } @@ -459,6 +657,130 @@ intel_dp_mst_transcoder_mask(struct intel_atomic_state *state, return transcoders; } +static u8 get_pipes_downstream_of_mst_port(struct intel_atomic_state *state, + struct drm_dp_mst_topology_mgr *mst_mgr, + struct drm_dp_mst_port *parent_port) +{ + const struct intel_digital_connector_state *conn_state; + struct intel_connector *connector; + u8 mask = 0; + int i; + + for_each_new_intel_connector_in_state(state, connector, conn_state, i) { + if (!conn_state->base.crtc) + continue; + + if (&connector->mst_port->mst_mgr != mst_mgr) + continue; + + if (connector->port != parent_port && + !drm_dp_mst_port_downstream_of_parent(mst_mgr, + connector->port, + parent_port)) + continue; + + mask |= BIT(to_intel_crtc(conn_state->base.crtc)->pipe); + } + + return mask; +} + +static int intel_dp_mst_check_fec_change(struct intel_atomic_state *state, + struct drm_dp_mst_topology_mgr *mst_mgr, + struct intel_link_bw_limits *limits) +{ + struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_crtc *crtc; + u8 mst_pipe_mask; + u8 fec_pipe_mask = 0; + int ret; + + mst_pipe_mask = get_pipes_downstream_of_mst_port(state, mst_mgr, NULL); + + for_each_intel_crtc_in_pipe_mask(&i915->drm, crtc, mst_pipe_mask) { + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + + /* Atomic connector check should've added all the MST CRTCs. */ + if (drm_WARN_ON(&i915->drm, !crtc_state)) + return -EINVAL; + + if (crtc_state->fec_enable) + fec_pipe_mask |= BIT(crtc->pipe); + } + + if (!fec_pipe_mask || mst_pipe_mask == fec_pipe_mask) + return 0; + + limits->force_fec_pipes |= mst_pipe_mask; + + ret = intel_modeset_pipes_in_mask_early(state, "MST FEC", + mst_pipe_mask); + + return ret ? : -EAGAIN; +} + +static int intel_dp_mst_check_bw(struct intel_atomic_state *state, + struct drm_dp_mst_topology_mgr *mst_mgr, + struct drm_dp_mst_topology_state *mst_state, + struct intel_link_bw_limits *limits) +{ + struct drm_dp_mst_port *mst_port; + u8 mst_port_pipes; + int ret; + + ret = drm_dp_mst_atomic_check_mgr(&state->base, mst_mgr, mst_state, &mst_port); + if (ret != -ENOSPC) + return ret; + + mst_port_pipes = get_pipes_downstream_of_mst_port(state, mst_mgr, mst_port); + + ret = intel_link_bw_reduce_bpp(state, limits, + mst_port_pipes, "MST link BW"); + + return ret ? : -EAGAIN; +} + +/** + * intel_dp_mst_atomic_check_link - check all modeset MST link configuration + * @state: intel atomic state + * @limits: link BW limits + * + * Check the link configuration for all modeset MST outputs. If the + * configuration is invalid @limits will be updated if possible to + * reduce the total BW, after which the configuration for all CRTCs in + * @state must be recomputed with the updated @limits. + * + * Returns: + * - 0 if the confugration is valid + * - %-EAGAIN, if the configuration is invalid and @limits got updated + * with fallback values with which the configuration of all CRTCs in + * @state must be recomputed + * - Other negative error, if the configuration is invalid without a + * fallback possibility, or the check failed for another reason + */ +int intel_dp_mst_atomic_check_link(struct intel_atomic_state *state, + struct intel_link_bw_limits *limits) +{ + struct drm_dp_mst_topology_mgr *mgr; + struct drm_dp_mst_topology_state *mst_state; + int ret; + int i; + + for_each_new_mst_mgr_in_state(&state->base, mgr, mst_state, i) { + ret = intel_dp_mst_check_fec_change(state, mgr, limits); + if (ret) + return ret; + + ret = intel_dp_mst_check_bw(state, mgr, mst_state, + limits); + if (ret) + return ret; + } + + return 0; +} + static int intel_dp_mst_compute_config_late(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state) @@ -479,19 +801,23 @@ static int intel_dp_mst_compute_config_late(struct intel_encoder *encoder, * that shares the same MST stream as mode changed, * intel_modeset_pipe_config()+intel_crtc_check_fastset() will take care to do * a fastset when possible. + * + * On TGL+ this is required since each stream go through a master transcoder, + * so if the master transcoder needs modeset, all other streams in the + * topology need a modeset. All platforms need to add the atomic state + * for all streams in the topology, since a modeset on one may require + * changing the MST link BW usage of the others, which in turn needs a + * recomputation of the corresponding CRTC states. */ static int -intel_dp_mst_atomic_master_trans_check(struct intel_connector *connector, - struct intel_atomic_state *state) +intel_dp_mst_atomic_topology_check(struct intel_connector *connector, + struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct drm_connector_list_iter connector_list_iter; struct intel_connector *connector_iter; int ret = 0; - if (DISPLAY_VER(dev_priv) < 12) - return 0; - if (!intel_connector_needs_modeset(state, &connector->base)) return 0; @@ -545,7 +871,7 @@ intel_dp_mst_atomic_check(struct drm_connector *connector, if (ret) return ret; - ret = intel_dp_mst_atomic_master_trans_check(intel_connector, state); + ret = intel_dp_mst_atomic_topology_check(intel_connector, state); if (ret) return ret; @@ -587,10 +913,6 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state, struct intel_dp *intel_dp = &dig_port->dp; struct intel_connector *connector = to_intel_connector(old_conn_state->connector); - struct drm_dp_mst_topology_state *new_mst_state = - drm_atomic_get_new_mst_topology_state(&state->base, &intel_dp->mst_mgr); - struct drm_dp_mst_atomic_payload *new_payload = - drm_atomic_get_mst_payload_state(new_mst_state, connector->port); struct drm_i915_private *i915 = to_i915(connector->base.dev); drm_dbg_kms(&i915->drm, "active links %d\n", @@ -598,9 +920,7 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state, intel_hdcp_disable(intel_mst->connector); - drm_dp_remove_payload_part1(&intel_dp->mst_mgr, new_mst_state, new_payload); - - intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state); + intel_dp_sink_disable_decompression(state, connector, old_crtc_state); } static void intel_mst_post_disable_dp(struct intel_atomic_state *state, @@ -634,6 +954,8 @@ static void intel_mst_post_disable_dp(struct intel_atomic_state *state, intel_disable_transcoder(old_crtc_state); + drm_dp_remove_payload_part1(&intel_dp->mst_mgr, new_mst_state, new_payload); + clear_act_sent(encoder, old_crtc_state); intel_de_rmw(dev_priv, TRANS_DDI_FUNC_CTL(old_crtc_state->cpu_transcoder), @@ -646,6 +968,8 @@ static void intel_mst_post_disable_dp(struct intel_atomic_state *state, intel_ddi_disable_transcoder_func(old_crtc_state); + intel_dsc_disable(old_crtc_state); + if (DISPLAY_VER(dev_priv) >= 9) skl_scaler_disable(old_crtc_state); else @@ -662,9 +986,8 @@ static void intel_mst_post_disable_dp(struct intel_atomic_state *state, * BSpec 4287: disable DIP after the transcoder is disabled and before * the transcoder clock select is set to none. */ - if (last_mst_stream) - intel_dp_set_infoframes(&dig_port->base, false, - old_crtc_state, NULL); + intel_dp_set_infoframes(&dig_port->base, false, + old_crtc_state, NULL); /* * From TGL spec: "If multi-stream slave transcoder: Configure * Transcoder Clock Select to direct no clock to the transcoder" @@ -754,6 +1077,8 @@ static void intel_mst_pre_enable_dp(struct intel_atomic_state *state, drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, true); + intel_dp_sink_enable_decompression(state, connector, pipe_config); + if (first_mst_stream) dig_port->base.pre_enable(state, &dig_port->base, pipe_config, NULL); @@ -776,6 +1101,7 @@ static void intel_mst_pre_enable_dp(struct intel_atomic_state *state, if (DISPLAY_VER(dev_priv) < 12 || !first_mst_stream) intel_ddi_enable_transcoder_clock(encoder, pipe_config); + intel_dsc_dp_pps_write(&dig_port->base, pipe_config); intel_ddi_set_dp_msa(pipe_config, conn_state); } @@ -792,11 +1118,10 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, struct drm_dp_mst_topology_state *mst_state = drm_atomic_get_new_mst_topology_state(&state->base, &intel_dp->mst_mgr); enum transcoder trans = pipe_config->cpu_transcoder; + bool first_mst_stream = intel_dp->active_mst_links == 1; drm_WARN_ON(&dev_priv->drm, pipe_config->has_pch_encoder); - clear_act_sent(encoder, pipe_config); - if (intel_dp_is_uhbr(pipe_config)) { const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; @@ -810,6 +1135,8 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, intel_ddi_enable_transcoder_func(encoder, pipe_config); + clear_act_sent(encoder, pipe_config); + intel_de_rmw(dev_priv, TRANS_DDI_FUNC_CTL(trans), 0, TRANS_DDI_DP_VC_PAYLOAD_ALLOC); @@ -818,15 +1145,16 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, wait_for_act_sent(encoder, pipe_config); + if (first_mst_stream) + intel_ddi_wait_for_fec_status(encoder, pipe_config, true); + drm_dp_add_payload_part2(&intel_dp->mst_mgr, &state->base, drm_atomic_get_mst_payload_state(mst_state, connector->port)); - if (DISPLAY_VER(dev_priv) >= 14 && pipe_config->fec_enable) - intel_de_rmw(dev_priv, MTL_CHICKEN_TRANS(trans), 0, - FECSTALL_DIS_DPTSTREAM_DPTTG); - else if (DISPLAY_VER(dev_priv) >= 12 && pipe_config->fec_enable) - intel_de_rmw(dev_priv, CHICKEN_TRANS(trans), 0, - FECSTALL_DIS_DPTSTREAM_DPTTG); + if (DISPLAY_VER(dev_priv) >= 12) + intel_de_rmw(dev_priv, hsw_chicken_trans_reg(dev_priv, trans), + FECSTALL_DIS_DPTSTREAM_DPTTG, + pipe_config->fec_enable ? FECSTALL_DIS_DPTSTREAM_DPTTG : 0); intel_audio_sdp_split_update(pipe_config); @@ -834,12 +1162,7 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, intel_crtc_vblank_on(pipe_config); - intel_audio_codec_enable(encoder, pipe_config, conn_state); - - /* Enable hdcp if it's desired */ - if (conn_state->content_protection == - DRM_MODE_CONTENT_PROTECTION_DESIRED) - intel_hdcp_enable(state, encoder, pipe_config, conn_state); + intel_hdcp_enable(state, encoder, pipe_config, conn_state); } static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder, @@ -978,8 +1301,20 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector, if (ret) return ret; + /* + * TODO: + * - Also check if compression would allow for the mode + * - Calculate the overhead using drm_dp_bw_overhead() / + * drm_dp_bw_channel_coding_efficiency(), similarly to the + * compute config code, as drm_dp_calc_pbn_mode() doesn't + * account with all the overheads. + * - Check here and during compute config the BW reported by + * DFP_Link_Available_Payload_Bandwidth_Number (or the + * corresponding link capabilities of the sink) in case the + * stream is uncompressed for it by the last branch device. + */ if (mode_rate > max_rate || mode->clock > max_dotclk || - drm_dp_calc_pbn_mode(mode->clock, min_bpp, false) > port->full_pbn) { + drm_dp_calc_pbn_mode(mode->clock, min_bpp << 4) > port->full_pbn) { *status = MODE_CLOCK_HIGH; return 0; } @@ -1151,6 +1486,36 @@ intel_dp_mst_read_decompression_port_dsc_caps(struct intel_dp *intel_dp, intel_dp_get_dsc_sink_cap(dpcd_caps[DP_DPCD_REV], connector); } +static bool detect_dsc_hblank_expansion_quirk(const struct intel_connector *connector) +{ + struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct drm_dp_desc desc; + u8 dpcd[DP_RECEIVER_CAP_SIZE]; + + if (!connector->dp.dsc_decompression_aux) + return false; + + if (drm_dp_read_desc(connector->dp.dsc_decompression_aux, + &desc, true) < 0) + return false; + + if (!drm_dp_has_quirk(&desc, + DP_DPCD_QUIRK_HBLANK_EXPANSION_REQUIRES_DSC)) + return false; + + if (drm_dp_read_dpcd_caps(connector->dp.dsc_decompression_aux, dpcd) < 0) + return false; + + if (!(dpcd[DP_RECEIVE_PORT_0_CAP_0] & DP_HBLANK_EXPANSION_CAPABLE)) + return false; + + drm_dbg_kms(&i915->drm, + "[CONNECTOR:%d:%s] DSC HBLANK expansion quirk detected\n", + connector->base.base.id, connector->base.name); + + return true; +} + static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port, const char *pathprop) @@ -1173,13 +1538,10 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo intel_connector->port = port; drm_dp_mst_get_port_malloc(port); - /* - * TODO: set the AUX for the actual MST port decompressing the stream. - * At the moment the driver only supports enabling this globally in the - * first downstream MST branch, via intel_dp's (root port) AUX. - */ - intel_connector->dp.dsc_decompression_aux = &intel_dp->aux; + intel_connector->dp.dsc_decompression_aux = drm_dp_mst_dsc_aux_for_port(port); intel_dp_mst_read_decompression_port_dsc_caps(intel_dp, intel_connector); + intel_connector->dp.dsc_hblank_expansion_quirk = + detect_dsc_hblank_expansion_quirk(intel_connector); connector = &intel_connector->base; ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs, @@ -1272,6 +1634,8 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *dig_port, enum pipe intel_encoder->pre_pll_enable = intel_mst_pre_pll_enable_dp; intel_encoder->pre_enable = intel_mst_pre_enable_dp; intel_encoder->enable = intel_mst_enable_dp; + intel_encoder->audio_enable = intel_audio_codec_enable; + intel_encoder->audio_disable = intel_audio_codec_disable; intel_encoder->get_hw_state = intel_dp_mst_enc_get_hw_state; intel_encoder->get_config = intel_dp_mst_enc_get_config; intel_encoder->initial_fastset_check = intel_dp_mst_initial_fastset_check; @@ -1419,3 +1783,91 @@ int intel_dp_mst_add_topology_state_for_crtc(struct intel_atomic_state *state, return 0; } + +static struct intel_connector * +get_connector_in_state_for_crtc(struct intel_atomic_state *state, + const struct intel_crtc *crtc) +{ + struct drm_connector_state *old_conn_state; + struct drm_connector_state *new_conn_state; + struct drm_connector *_connector; + int i; + + for_each_oldnew_connector_in_state(&state->base, _connector, + old_conn_state, new_conn_state, i) { + struct intel_connector *connector = + to_intel_connector(_connector); + + if (old_conn_state->crtc == &crtc->base || + new_conn_state->crtc == &crtc->base) + return connector; + } + + return NULL; +} + +/** + * intel_dp_mst_crtc_needs_modeset - check if changes in topology need to modeset the given CRTC + * @state: atomic state + * @crtc: CRTC for which to check the modeset requirement + * + * Check if any change in a MST topology requires a forced modeset on @crtc in + * this topology. One such change is enabling/disabling the DSC decompression + * state in the first branch device's UFP DPCD as required by one CRTC, while + * the other @crtc in the same topology is still active, requiring a full modeset + * on @crtc. + */ +bool intel_dp_mst_crtc_needs_modeset(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + const struct intel_connector *crtc_connector; + const struct drm_connector_state *conn_state; + const struct drm_connector *_connector; + int i; + + if (!intel_crtc_has_type(intel_atomic_get_new_crtc_state(state, crtc), + INTEL_OUTPUT_DP_MST)) + return false; + + crtc_connector = get_connector_in_state_for_crtc(state, crtc); + + if (!crtc_connector) + /* None of the connectors in the topology needs modeset */ + return false; + + for_each_new_connector_in_state(&state->base, _connector, conn_state, i) { + const struct intel_connector *connector = + to_intel_connector(_connector); + const struct intel_crtc_state *new_crtc_state; + const struct intel_crtc_state *old_crtc_state; + struct intel_crtc *crtc_iter; + + if (connector->mst_port != crtc_connector->mst_port || + !conn_state->crtc) + continue; + + crtc_iter = to_intel_crtc(conn_state->crtc); + + new_crtc_state = intel_atomic_get_new_crtc_state(state, crtc_iter); + old_crtc_state = intel_atomic_get_old_crtc_state(state, crtc_iter); + + if (!intel_crtc_needs_modeset(new_crtc_state)) + continue; + + if (old_crtc_state->dsc.compression_enable == + new_crtc_state->dsc.compression_enable) + continue; + /* + * Toggling the decompression flag because of this stream in + * the first downstream branch device's UFP DPCD may reset the + * whole branch device. To avoid the reset while other streams + * are also active modeset the whole MST topology in this + * case. + */ + if (connector->dp.dsc_decompression_aux == + &connector->mst_port->aux) + return true; + } + + return false; +} diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.h b/drivers/gpu/drm/i915/display/intel_dp_mst.h index f1815bb72267..8ca1d599091c 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.h +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.h @@ -13,6 +13,7 @@ struct intel_crtc; struct intel_crtc_state; struct intel_digital_port; struct intel_dp; +struct intel_link_bw_limits; int intel_dp_mst_encoder_init(struct intel_digital_port *dig_port, int conn_id); void intel_dp_mst_encoder_cleanup(struct intel_digital_port *dig_port); @@ -22,5 +23,9 @@ bool intel_dp_mst_is_slave_trans(const struct intel_crtc_state *crtc_state); bool intel_dp_mst_source_support(struct intel_dp *intel_dp); int intel_dp_mst_add_topology_state_for_crtc(struct intel_atomic_state *state, struct intel_crtc *crtc); +int intel_dp_mst_atomic_check_link(struct intel_atomic_state *state, + struct intel_link_bw_limits *limits); +bool intel_dp_mst_crtc_needs_modeset(struct intel_atomic_state *state, + struct intel_crtc *crtc); #endif /* __INTEL_DP_MST_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dpio_phy.c b/drivers/gpu/drm/i915/display/intel_dpio_phy.c index 62b93d097e44..4ca910874a4f 100644 --- a/drivers/gpu/drm/i915/display/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/display/intel_dpio_phy.c @@ -666,6 +666,20 @@ enum dpio_phy vlv_dig_port_to_phy(struct intel_digital_port *dig_port) } } +enum dpio_phy vlv_pipe_to_phy(enum pipe pipe) +{ + switch (pipe) { + default: + MISSING_CASE(pipe); + fallthrough; + case PIPE_A: + case PIPE_B: + return DPIO_PHY0; + case PIPE_C: + return DPIO_PHY1; + } +} + enum dpio_channel vlv_pipe_to_channel(enum pipe pipe) { switch (pipe) { @@ -689,50 +703,50 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder, struct intel_digital_port *dig_port = enc_to_dig_port(encoder); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); enum dpio_channel ch = vlv_dig_port_to_channel(dig_port); - enum pipe pipe = crtc->pipe; + enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe); u32 val; int i; vlv_dpio_get(dev_priv); /* Clear calc init */ - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch)); + val = vlv_dpio_read(dev_priv, phy, VLV_PCS01_DW10(ch)); val &= ~(DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3); val &= ~(DPIO_PCS_TX1DEEMP_MASK | DPIO_PCS_TX2DEEMP_MASK); val |= DPIO_PCS_TX1DEEMP_9P5 | DPIO_PCS_TX2DEEMP_9P5; - vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW10(ch), val); + vlv_dpio_write(dev_priv, phy, VLV_PCS01_DW10(ch), val); if (crtc_state->lane_count > 2) { - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW10(ch)); + val = vlv_dpio_read(dev_priv, phy, VLV_PCS23_DW10(ch)); val &= ~(DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3); val &= ~(DPIO_PCS_TX1DEEMP_MASK | DPIO_PCS_TX2DEEMP_MASK); val |= DPIO_PCS_TX1DEEMP_9P5 | DPIO_PCS_TX2DEEMP_9P5; - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val); + vlv_dpio_write(dev_priv, phy, VLV_PCS23_DW10(ch), val); } - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW9(ch)); + val = vlv_dpio_read(dev_priv, phy, VLV_PCS01_DW9(ch)); val &= ~(DPIO_PCS_TX1MARGIN_MASK | DPIO_PCS_TX2MARGIN_MASK); val |= DPIO_PCS_TX1MARGIN_000 | DPIO_PCS_TX2MARGIN_000; - vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW9(ch), val); + vlv_dpio_write(dev_priv, phy, VLV_PCS01_DW9(ch), val); if (crtc_state->lane_count > 2) { - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW9(ch)); + val = vlv_dpio_read(dev_priv, phy, VLV_PCS23_DW9(ch)); val &= ~(DPIO_PCS_TX1MARGIN_MASK | DPIO_PCS_TX2MARGIN_MASK); val |= DPIO_PCS_TX1MARGIN_000 | DPIO_PCS_TX2MARGIN_000; - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW9(ch), val); + vlv_dpio_write(dev_priv, phy, VLV_PCS23_DW9(ch), val); } /* Program swing deemph */ for (i = 0; i < crtc_state->lane_count; i++) { - val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW4(ch, i)); + val = vlv_dpio_read(dev_priv, phy, CHV_TX_DW4(ch, i)); val &= ~DPIO_SWING_DEEMPH9P5_MASK; val |= deemph_reg_value << DPIO_SWING_DEEMPH9P5_SHIFT; - vlv_dpio_write(dev_priv, pipe, CHV_TX_DW4(ch, i), val); + vlv_dpio_write(dev_priv, phy, CHV_TX_DW4(ch, i), val); } /* Program swing margin */ for (i = 0; i < crtc_state->lane_count; i++) { - val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW2(ch, i)); + val = vlv_dpio_read(dev_priv, phy, CHV_TX_DW2(ch, i)); val &= ~DPIO_SWING_MARGIN000_MASK; val |= margin_reg_value << DPIO_SWING_MARGIN000_SHIFT; @@ -745,7 +759,7 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder, val &= ~(0xff << DPIO_UNIQ_TRANS_SCALE_SHIFT); val |= 0x9a << DPIO_UNIQ_TRANS_SCALE_SHIFT; - vlv_dpio_write(dev_priv, pipe, CHV_TX_DW2(ch, i), val); + vlv_dpio_write(dev_priv, phy, CHV_TX_DW2(ch, i), val); } /* @@ -755,23 +769,23 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder, * 27 for ch0 and ch1. */ for (i = 0; i < crtc_state->lane_count; i++) { - val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW3(ch, i)); + val = vlv_dpio_read(dev_priv, phy, CHV_TX_DW3(ch, i)); if (uniq_trans_scale) val |= DPIO_TX_UNIQ_TRANS_SCALE_EN; else val &= ~DPIO_TX_UNIQ_TRANS_SCALE_EN; - vlv_dpio_write(dev_priv, pipe, CHV_TX_DW3(ch, i), val); + vlv_dpio_write(dev_priv, phy, CHV_TX_DW3(ch, i), val); } /* Start swing calculation */ - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch)); + val = vlv_dpio_read(dev_priv, phy, VLV_PCS01_DW10(ch)); val |= DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3; - vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW10(ch), val); + vlv_dpio_write(dev_priv, phy, VLV_PCS01_DW10(ch), val); if (crtc_state->lane_count > 2) { - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW10(ch)); + val = vlv_dpio_read(dev_priv, phy, VLV_PCS23_DW10(ch)); val |= DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3; - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val); + vlv_dpio_write(dev_priv, phy, VLV_PCS23_DW10(ch), val); } vlv_dpio_put(dev_priv); @@ -782,43 +796,43 @@ void chv_data_lane_soft_reset(struct intel_encoder *encoder, bool reset) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum dpio_channel ch = vlv_dig_port_to_channel(enc_to_dig_port(encoder)); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - enum pipe pipe = crtc->pipe; + enum dpio_channel ch = vlv_dig_port_to_channel(enc_to_dig_port(encoder)); + enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe); u32 val; - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW0(ch)); + val = vlv_dpio_read(dev_priv, phy, VLV_PCS01_DW0(ch)); if (reset) val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); else val |= DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET; - vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW0(ch), val); + vlv_dpio_write(dev_priv, phy, VLV_PCS01_DW0(ch), val); if (crtc_state->lane_count > 2) { - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW0(ch)); + val = vlv_dpio_read(dev_priv, phy, VLV_PCS23_DW0(ch)); if (reset) val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); else val |= DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET; - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW0(ch), val); + vlv_dpio_write(dev_priv, phy, VLV_PCS23_DW0(ch), val); } - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW1(ch)); + val = vlv_dpio_read(dev_priv, phy, VLV_PCS01_DW1(ch)); val |= CHV_PCS_REQ_SOFTRESET_EN; if (reset) val &= ~DPIO_PCS_CLK_SOFT_RESET; else val |= DPIO_PCS_CLK_SOFT_RESET; - vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW1(ch), val); + vlv_dpio_write(dev_priv, phy, VLV_PCS01_DW1(ch), val); if (crtc_state->lane_count > 2) { - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW1(ch)); + val = vlv_dpio_read(dev_priv, phy, VLV_PCS23_DW1(ch)); val |= CHV_PCS_REQ_SOFTRESET_EN; if (reset) val &= ~DPIO_PCS_CLK_SOFT_RESET; else val |= DPIO_PCS_CLK_SOFT_RESET; - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW1(ch), val); + vlv_dpio_write(dev_priv, phy, VLV_PCS23_DW1(ch), val); } } @@ -829,6 +843,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); enum dpio_channel ch = vlv_dig_port_to_channel(dig_port); + enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe); enum pipe pipe = crtc->pipe; unsigned int lane_mask = intel_dp_unused_lane_mask(crtc_state->lane_count); @@ -851,40 +866,40 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder, /* program left/right clock distribution */ if (pipe != PIPE_B) { - val = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW5_CH0); + val = vlv_dpio_read(dev_priv, phy, _CHV_CMN_DW5_CH0); val &= ~(CHV_BUFLEFTENA1_MASK | CHV_BUFRIGHTENA1_MASK); if (ch == DPIO_CH0) val |= CHV_BUFLEFTENA1_FORCE; if (ch == DPIO_CH1) val |= CHV_BUFRIGHTENA1_FORCE; - vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW5_CH0, val); + vlv_dpio_write(dev_priv, phy, _CHV_CMN_DW5_CH0, val); } else { - val = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW1_CH1); + val = vlv_dpio_read(dev_priv, phy, _CHV_CMN_DW1_CH1); val &= ~(CHV_BUFLEFTENA2_MASK | CHV_BUFRIGHTENA2_MASK); if (ch == DPIO_CH0) val |= CHV_BUFLEFTENA2_FORCE; if (ch == DPIO_CH1) val |= CHV_BUFRIGHTENA2_FORCE; - vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW1_CH1, val); + vlv_dpio_write(dev_priv, phy, _CHV_CMN_DW1_CH1, val); } /* program clock channel usage */ - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW8(ch)); + val = vlv_dpio_read(dev_priv, phy, VLV_PCS01_DW8(ch)); val |= CHV_PCS_USEDCLKCHANNEL_OVRRIDE; if (pipe != PIPE_B) val &= ~CHV_PCS_USEDCLKCHANNEL; else val |= CHV_PCS_USEDCLKCHANNEL; - vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW8(ch), val); + vlv_dpio_write(dev_priv, phy, VLV_PCS01_DW8(ch), val); if (crtc_state->lane_count > 2) { - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW8(ch)); + val = vlv_dpio_read(dev_priv, phy, VLV_PCS23_DW8(ch)); val |= CHV_PCS_USEDCLKCHANNEL_OVRRIDE; if (pipe != PIPE_B) val &= ~CHV_PCS_USEDCLKCHANNEL; else val |= CHV_PCS_USEDCLKCHANNEL; - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW8(ch), val); + vlv_dpio_write(dev_priv, phy, VLV_PCS23_DW8(ch), val); } /* @@ -892,12 +907,12 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder, * matches the pipe, but here we need to * pick the CL based on the port. */ - val = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW19(ch)); + val = vlv_dpio_read(dev_priv, phy, CHV_CMN_DW19(ch)); if (pipe != PIPE_B) val &= ~CHV_CMN_USEDCLKCHANNEL; else val |= CHV_CMN_USEDCLKCHANNEL; - vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW19(ch), val); + vlv_dpio_write(dev_priv, phy, CHV_CMN_DW19(ch), val); vlv_dpio_put(dev_priv); } @@ -910,21 +925,21 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); enum dpio_channel ch = vlv_dig_port_to_channel(dig_port); - enum pipe pipe = crtc->pipe; + enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe); int data, i, stagger; u32 val; vlv_dpio_get(dev_priv); /* allow hardware to manage TX FIFO reset source */ - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW11(ch)); + val = vlv_dpio_read(dev_priv, phy, VLV_PCS01_DW11(ch)); val &= ~DPIO_LANEDESKEW_STRAP_OVRD; - vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW11(ch), val); + vlv_dpio_write(dev_priv, phy, VLV_PCS01_DW11(ch), val); if (crtc_state->lane_count > 2) { - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW11(ch)); + val = vlv_dpio_read(dev_priv, phy, VLV_PCS23_DW11(ch)); val &= ~DPIO_LANEDESKEW_STRAP_OVRD; - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW11(ch), val); + vlv_dpio_write(dev_priv, phy, VLV_PCS23_DW11(ch), val); } /* Program Tx lane latency optimal setting*/ @@ -934,7 +949,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, data = 0x0; else data = (i == 1) ? 0x0 : 0x1; - vlv_dpio_write(dev_priv, pipe, CHV_TX_DW14(ch, i), + vlv_dpio_write(dev_priv, phy, CHV_TX_DW14(ch, i), data << DPIO_UPAR_SHIFT); } @@ -950,17 +965,17 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, else stagger = 0x2; - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW11(ch)); + val = vlv_dpio_read(dev_priv, phy, VLV_PCS01_DW11(ch)); val |= DPIO_TX2_STAGGER_MASK(0x1f); - vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW11(ch), val); + vlv_dpio_write(dev_priv, phy, VLV_PCS01_DW11(ch), val); if (crtc_state->lane_count > 2) { - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW11(ch)); + val = vlv_dpio_read(dev_priv, phy, VLV_PCS23_DW11(ch)); val |= DPIO_TX2_STAGGER_MASK(0x1f); - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW11(ch), val); + vlv_dpio_write(dev_priv, phy, VLV_PCS23_DW11(ch), val); } - vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW12(ch), + vlv_dpio_write(dev_priv, phy, VLV_PCS01_DW12(ch), DPIO_LANESTAGGER_STRAP(stagger) | DPIO_LANESTAGGER_STRAP_OVRD | DPIO_TX1_STAGGER_MASK(0x1f) | @@ -968,7 +983,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, DPIO_TX2_STAGGER_MULT(0)); if (crtc_state->lane_count > 2) { - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW12(ch), + vlv_dpio_write(dev_priv, phy, VLV_PCS23_DW12(ch), DPIO_LANESTAGGER_STRAP(stagger) | DPIO_LANESTAGGER_STRAP_OVRD | DPIO_TX1_STAGGER_MASK(0x1f) | @@ -998,19 +1013,20 @@ void chv_phy_post_pll_disable(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum pipe pipe = to_intel_crtc(old_crtc_state->uapi.crtc)->pipe; + enum dpio_phy phy = vlv_pipe_to_phy(pipe); u32 val; vlv_dpio_get(dev_priv); /* disable left/right clock distribution */ if (pipe != PIPE_B) { - val = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW5_CH0); + val = vlv_dpio_read(dev_priv, phy, _CHV_CMN_DW5_CH0); val &= ~(CHV_BUFLEFTENA1_MASK | CHV_BUFRIGHTENA1_MASK); - vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW5_CH0, val); + vlv_dpio_write(dev_priv, phy, _CHV_CMN_DW5_CH0, val); } else { - val = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW1_CH1); + val = vlv_dpio_read(dev_priv, phy, _CHV_CMN_DW1_CH1); val &= ~(CHV_BUFLEFTENA2_MASK | CHV_BUFRIGHTENA2_MASK); - vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW1_CH1, val); + vlv_dpio_write(dev_priv, phy, _CHV_CMN_DW1_CH1, val); } vlv_dpio_put(dev_priv); @@ -1036,22 +1052,22 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder, struct intel_digital_port *dig_port = enc_to_dig_port(encoder); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); enum dpio_channel port = vlv_dig_port_to_channel(dig_port); - enum pipe pipe = crtc->pipe; + enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe); vlv_dpio_get(dev_priv); - vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), 0x00000000); - vlv_dpio_write(dev_priv, pipe, VLV_TX_DW4(port), demph_reg_value); - vlv_dpio_write(dev_priv, pipe, VLV_TX_DW2(port), + vlv_dpio_write(dev_priv, phy, VLV_TX_DW5(port), 0x00000000); + vlv_dpio_write(dev_priv, phy, VLV_TX_DW4(port), demph_reg_value); + vlv_dpio_write(dev_priv, phy, VLV_TX_DW2(port), uniqtranscale_reg_value); - vlv_dpio_write(dev_priv, pipe, VLV_TX_DW3(port), 0x0C782040); + vlv_dpio_write(dev_priv, phy, VLV_TX_DW3(port), 0x0C782040); if (tx3_demph) - vlv_dpio_write(dev_priv, pipe, VLV_TX3_DW4(port), tx3_demph); + vlv_dpio_write(dev_priv, phy, VLV_TX3_DW4(port), tx3_demph); - vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW11(port), 0x00030000); - vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW9(port), preemph_reg_value); - vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), DPIO_TX_OCALINIT_EN); + vlv_dpio_write(dev_priv, phy, VLV_PCS_DW11(port), 0x00030000); + vlv_dpio_write(dev_priv, phy, VLV_PCS_DW9(port), preemph_reg_value); + vlv_dpio_write(dev_priv, phy, VLV_TX_DW5(port), DPIO_TX_OCALINIT_EN); vlv_dpio_put(dev_priv); } @@ -1063,24 +1079,24 @@ void vlv_phy_pre_pll_enable(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); enum dpio_channel port = vlv_dig_port_to_channel(dig_port); - enum pipe pipe = crtc->pipe; + enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe); /* Program Tx lane resets to default */ vlv_dpio_get(dev_priv); - vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port), + vlv_dpio_write(dev_priv, phy, VLV_PCS_DW0(port), DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); - vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(port), + vlv_dpio_write(dev_priv, phy, VLV_PCS_DW1(port), DPIO_PCS_CLK_CRI_RXEB_EIOS_EN | DPIO_PCS_CLK_CRI_RXDIGFILTSG_EN | (1<<DPIO_PCS_CLK_DATAWIDTH_SHIFT) | DPIO_PCS_CLK_SOFT_RESET); /* Fix up inter-pair skew failure */ - vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW12(port), 0x00750f00); - vlv_dpio_write(dev_priv, pipe, VLV_TX_DW11(port), 0x00001500); - vlv_dpio_write(dev_priv, pipe, VLV_TX_DW14(port), 0x40400000); + vlv_dpio_write(dev_priv, phy, VLV_PCS_DW12(port), 0x00750f00); + vlv_dpio_write(dev_priv, phy, VLV_TX_DW11(port), 0x00001500); + vlv_dpio_write(dev_priv, phy, VLV_TX_DW14(port), 0x40400000); vlv_dpio_put(dev_priv); } @@ -1094,23 +1110,24 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder, struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); enum dpio_channel port = vlv_dig_port_to_channel(dig_port); enum pipe pipe = crtc->pipe; + enum dpio_phy phy = vlv_pipe_to_phy(pipe); u32 val; vlv_dpio_get(dev_priv); /* Enable clock channels for this port */ - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW8(port)); + val = vlv_dpio_read(dev_priv, phy, VLV_PCS01_DW8(port)); val = 0; if (pipe) val |= (1<<21); else val &= ~(1<<21); val |= 0x001000c4; - vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW8(port), val); + vlv_dpio_write(dev_priv, phy, VLV_PCS_DW8(port), val); /* Program lane clock */ - vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW14(port), 0x00760018); - vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW23(port), 0x00400888); + vlv_dpio_write(dev_priv, phy, VLV_PCS_DW14(port), 0x00760018); + vlv_dpio_write(dev_priv, phy, VLV_PCS_DW23(port), 0x00400888); vlv_dpio_put(dev_priv); } @@ -1122,10 +1139,10 @@ void vlv_phy_reset_lanes(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); enum dpio_channel port = vlv_dig_port_to_channel(dig_port); - enum pipe pipe = crtc->pipe; + enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe); vlv_dpio_get(dev_priv); - vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port), 0x00000000); - vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(port), 0x00e00060); + vlv_dpio_write(dev_priv, phy, VLV_PCS_DW0(port), 0x00000000); + vlv_dpio_write(dev_priv, phy, VLV_PCS_DW1(port), 0x00e00060); vlv_dpio_put(dev_priv); } diff --git a/drivers/gpu/drm/i915/display/intel_dpio_phy.h b/drivers/gpu/drm/i915/display/intel_dpio_phy.h index 4d43dbbdf81c..9adc4e8c1738 100644 --- a/drivers/gpu/drm/i915/display/intel_dpio_phy.h +++ b/drivers/gpu/drm/i915/display/intel_dpio_phy.h @@ -44,6 +44,7 @@ u8 bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder); enum dpio_channel vlv_dig_port_to_channel(struct intel_digital_port *dig_port); enum dpio_phy vlv_dig_port_to_phy(struct intel_digital_port *dig_port); +enum dpio_phy vlv_pipe_to_phy(enum pipe pipe); enum dpio_channel vlv_pipe_to_channel(enum pipe pipe); void chv_set_phy_signal_level(struct intel_encoder *encoder, @@ -116,6 +117,10 @@ static inline enum dpio_phy vlv_dig_port_to_phy(struct intel_digital_port *dig_p { return DPIO_PHY0; } +static inline enum dpio_phy vlv_pipe_to_phy(enum pipe pipe) +{ + return DPIO_PHY0; +} static inline enum dpio_channel vlv_pipe_to_channel(enum pipe pipe) { return DPIO_CH0; diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c b/drivers/gpu/drm/i915/display/intel_dpll.c index d41c1dc9f66c..3038655377ea 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll.c +++ b/drivers/gpu/drm/i915/display/intel_dpll.c @@ -16,6 +16,7 @@ #include "intel_dpio_phy.h" #include "intel_dpll.h" #include "intel_lvds.h" +#include "intel_lvds_regs.h" #include "intel_panel.h" #include "intel_pps.h" #include "intel_snps_phy.h" @@ -311,7 +312,7 @@ static const struct intel_limit intel_limits_bxt = { * divided-down version of it. */ /* m1 is reserved as 0 in Pineview, n is a ring counter */ -int pnv_calc_dpll_params(int refclk, struct dpll *clock) +static int pnv_calc_dpll_params(int refclk, struct dpll *clock) { clock->m = clock->m2 + 2; clock->p = clock->p1 * clock->p2; @@ -342,7 +343,7 @@ int i9xx_calc_dpll_params(int refclk, struct dpll *clock) return clock->dot; } -int vlv_calc_dpll_params(int refclk, struct dpll *clock) +static int vlv_calc_dpll_params(int refclk, struct dpll *clock) { clock->m = clock->m1 * clock->m2; clock->p = clock->p1 * clock->p2 * 5; @@ -368,6 +369,176 @@ int chv_calc_dpll_params(int refclk, struct dpll *clock) return clock->dot; } +static int i9xx_pll_refclk(struct drm_device *dev, + const struct intel_crtc_state *pipe_config) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + u32 dpll = pipe_config->dpll_hw_state.dpll; + + if ((dpll & PLL_REF_INPUT_MASK) == PLLB_REF_INPUT_SPREADSPECTRUMIN) + return dev_priv->display.vbt.lvds_ssc_freq; + else if (HAS_PCH_SPLIT(dev_priv)) + return 120000; + else if (DISPLAY_VER(dev_priv) != 2) + return 96000; + else + return 48000; +} + +/* Returns the clock of the currently programmed mode of the given pipe. */ +void i9xx_crtc_clock_get(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) +{ + struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + u32 dpll = pipe_config->dpll_hw_state.dpll; + u32 fp; + struct dpll clock; + int port_clock; + int refclk = i9xx_pll_refclk(dev, pipe_config); + + if ((dpll & DISPLAY_RATE_SELECT_FPA1) == 0) + fp = pipe_config->dpll_hw_state.fp0; + else + fp = pipe_config->dpll_hw_state.fp1; + + clock.m1 = (fp & FP_M1_DIV_MASK) >> FP_M1_DIV_SHIFT; + if (IS_PINEVIEW(dev_priv)) { + clock.n = ffs((fp & FP_N_PINEVIEW_DIV_MASK) >> FP_N_DIV_SHIFT) - 1; + clock.m2 = (fp & FP_M2_PINEVIEW_DIV_MASK) >> FP_M2_DIV_SHIFT; + } else { + clock.n = (fp & FP_N_DIV_MASK) >> FP_N_DIV_SHIFT; + clock.m2 = (fp & FP_M2_DIV_MASK) >> FP_M2_DIV_SHIFT; + } + + if (DISPLAY_VER(dev_priv) != 2) { + if (IS_PINEVIEW(dev_priv)) + clock.p1 = ffs((dpll & DPLL_FPA01_P1_POST_DIV_MASK_PINEVIEW) >> + DPLL_FPA01_P1_POST_DIV_SHIFT_PINEVIEW); + else + clock.p1 = ffs((dpll & DPLL_FPA01_P1_POST_DIV_MASK) >> + DPLL_FPA01_P1_POST_DIV_SHIFT); + + switch (dpll & DPLL_MODE_MASK) { + case DPLLB_MODE_DAC_SERIAL: + clock.p2 = dpll & DPLL_DAC_SERIAL_P2_CLOCK_DIV_5 ? + 5 : 10; + break; + case DPLLB_MODE_LVDS: + clock.p2 = dpll & DPLLB_LVDS_P2_CLOCK_DIV_7 ? + 7 : 14; + break; + default: + drm_dbg_kms(&dev_priv->drm, + "Unknown DPLL mode %08x in programmed " + "mode\n", (int)(dpll & DPLL_MODE_MASK)); + return; + } + + if (IS_PINEVIEW(dev_priv)) + port_clock = pnv_calc_dpll_params(refclk, &clock); + else + port_clock = i9xx_calc_dpll_params(refclk, &clock); + } else { + enum pipe lvds_pipe; + + if (IS_I85X(dev_priv) && + intel_lvds_port_enabled(dev_priv, LVDS, &lvds_pipe) && + lvds_pipe == crtc->pipe) { + u32 lvds = intel_de_read(dev_priv, LVDS); + + clock.p1 = ffs((dpll & DPLL_FPA01_P1_POST_DIV_MASK_I830_LVDS) >> + DPLL_FPA01_P1_POST_DIV_SHIFT); + + if (lvds & LVDS_CLKB_POWER_UP) + clock.p2 = 7; + else + clock.p2 = 14; + } else { + if (dpll & PLL_P1_DIVIDE_BY_TWO) + clock.p1 = 2; + else { + clock.p1 = ((dpll & DPLL_FPA01_P1_POST_DIV_MASK_I830) >> + DPLL_FPA01_P1_POST_DIV_SHIFT) + 2; + } + if (dpll & PLL_P2_DIVIDE_BY_4) + clock.p2 = 4; + else + clock.p2 = 2; + } + + port_clock = i9xx_calc_dpll_params(refclk, &clock); + } + + /* + * This value includes pixel_multiplier. We will use + * port_clock to compute adjusted_mode.crtc_clock in the + * encoder's get_config() function. + */ + pipe_config->port_clock = port_clock; +} + +void vlv_crtc_clock_get(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) +{ + struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe); + struct dpll clock; + u32 mdiv; + int refclk = 100000; + + /* In case of DSI, DPLL will not be used */ + if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0) + return; + + vlv_dpio_get(dev_priv); + mdiv = vlv_dpio_read(dev_priv, phy, VLV_PLL_DW3(crtc->pipe)); + vlv_dpio_put(dev_priv); + + clock.m1 = (mdiv >> DPIO_M1DIV_SHIFT) & 7; + clock.m2 = mdiv & DPIO_M2DIV_MASK; + clock.n = (mdiv >> DPIO_N_SHIFT) & 0xf; + clock.p1 = (mdiv >> DPIO_P1_SHIFT) & 7; + clock.p2 = (mdiv >> DPIO_P2_SHIFT) & 0x1f; + + pipe_config->port_clock = vlv_calc_dpll_params(refclk, &clock); +} + +void chv_crtc_clock_get(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) +{ + struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + enum dpio_channel port = vlv_pipe_to_channel(crtc->pipe); + enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe); + struct dpll clock; + u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2, pll_dw3; + int refclk = 100000; + + /* In case of DSI, DPLL will not be used */ + if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0) + return; + + vlv_dpio_get(dev_priv); + cmn_dw13 = vlv_dpio_read(dev_priv, phy, CHV_CMN_DW13(port)); + pll_dw0 = vlv_dpio_read(dev_priv, phy, CHV_PLL_DW0(port)); + pll_dw1 = vlv_dpio_read(dev_priv, phy, CHV_PLL_DW1(port)); + pll_dw2 = vlv_dpio_read(dev_priv, phy, CHV_PLL_DW2(port)); + pll_dw3 = vlv_dpio_read(dev_priv, phy, CHV_PLL_DW3(port)); + vlv_dpio_put(dev_priv); + + clock.m1 = (pll_dw1 & 0x7) == DPIO_CHV_M1_DIV_BY_2 ? 2 : 0; + clock.m2 = (pll_dw0 & 0xff) << 22; + if (pll_dw3 & DPIO_CHV_FRAC_DIV_EN) + clock.m2 |= pll_dw2 & 0x3fffff; + clock.n = (pll_dw1 >> DPIO_CHV_N_DIV_SHIFT) & 0xf; + clock.p1 = (cmn_dw13 >> DPIO_CHV_P1_DIV_SHIFT) & 0x7; + clock.p2 = (cmn_dw13 >> DPIO_CHV_P2_DIV_SHIFT) & 0x1f; + + pipe_config->port_clock = chv_calc_dpll_params(refclk, &clock); +} + /* * Returns whether the given set of divisors are valid for a given refclk with * the given connectors. @@ -1003,12 +1174,10 @@ static int dg2_crtc_compute_clock(struct intel_atomic_state *state, static int mtl_crtc_compute_clock(struct intel_atomic_state *state, struct intel_crtc *crtc) { - struct drm_i915_private *i915 = to_i915(state->base.dev); struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); struct intel_encoder *encoder = intel_get_crtc_new_encoder(state, crtc_state); - enum phy phy = intel_port_to_phy(i915, encoder->port); int ret; ret = intel_cx0pll_calc_state(crtc_state, encoder); @@ -1016,10 +1185,7 @@ static int mtl_crtc_compute_clock(struct intel_atomic_state *state, return ret; /* TODO: Do the readback via intel_compute_shared_dplls() */ - if (intel_is_c10phy(i915, phy)) - crtc_state->port_clock = intel_c10pll_calc_port_clock(encoder, &crtc_state->cx0pll_state.c10); - else - crtc_state->port_clock = intel_c20pll_calc_port_clock(encoder, &crtc_state->cx0pll_state.c20); + crtc_state->port_clock = intel_cx0pll_calc_port_clock(encoder, &crtc_state->cx0pll_state); crtc_state->hw.adjusted_mode.crtc_clock = intel_crtc_dotclock(crtc_state); @@ -1645,7 +1811,7 @@ void i9xx_enable_pll(const struct intel_crtc_state *crtc_state) } static void vlv_pllb_recal_opamp(struct drm_i915_private *dev_priv, - enum pipe pipe) + enum dpio_phy phy) { u32 reg_val; @@ -1653,30 +1819,31 @@ static void vlv_pllb_recal_opamp(struct drm_i915_private *dev_priv, * PLLB opamp always calibrates to max value of 0x3f, force enable it * and set it to a reasonable value instead. */ - reg_val = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW9(1)); + reg_val = vlv_dpio_read(dev_priv, phy, VLV_PLL_DW9(1)); reg_val &= 0xffffff00; reg_val |= 0x00000030; - vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW9(1), reg_val); + vlv_dpio_write(dev_priv, phy, VLV_PLL_DW9(1), reg_val); - reg_val = vlv_dpio_read(dev_priv, pipe, VLV_REF_DW13); + reg_val = vlv_dpio_read(dev_priv, phy, VLV_REF_DW13); reg_val &= 0x00ffffff; reg_val |= 0x8c000000; - vlv_dpio_write(dev_priv, pipe, VLV_REF_DW13, reg_val); + vlv_dpio_write(dev_priv, phy, VLV_REF_DW13, reg_val); - reg_val = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW9(1)); + reg_val = vlv_dpio_read(dev_priv, phy, VLV_PLL_DW9(1)); reg_val &= 0xffffff00; - vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW9(1), reg_val); + vlv_dpio_write(dev_priv, phy, VLV_PLL_DW9(1), reg_val); - reg_val = vlv_dpio_read(dev_priv, pipe, VLV_REF_DW13); + reg_val = vlv_dpio_read(dev_priv, phy, VLV_REF_DW13); reg_val &= 0x00ffffff; reg_val |= 0xb0000000; - vlv_dpio_write(dev_priv, pipe, VLV_REF_DW13, reg_val); + vlv_dpio_write(dev_priv, phy, VLV_REF_DW13, reg_val); } static void vlv_prepare_pll(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe); enum pipe pipe = crtc->pipe; u32 mdiv; u32 bestn, bestm1, bestm2, bestp1, bestp2; @@ -1694,18 +1861,18 @@ static void vlv_prepare_pll(const struct intel_crtc_state *crtc_state) /* PLL B needs special handling */ if (pipe == PIPE_B) - vlv_pllb_recal_opamp(dev_priv, pipe); + vlv_pllb_recal_opamp(dev_priv, phy); /* Set up Tx target for periodic Rcomp update */ - vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW9_BCAST, 0x0100000f); + vlv_dpio_write(dev_priv, phy, VLV_PLL_DW9_BCAST, 0x0100000f); /* Disable target IRef on PLL */ - reg_val = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW8(pipe)); + reg_val = vlv_dpio_read(dev_priv, phy, VLV_PLL_DW8(pipe)); reg_val &= 0x00ffffff; - vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW8(pipe), reg_val); + vlv_dpio_write(dev_priv, phy, VLV_PLL_DW8(pipe), reg_val); /* Disable fast lock */ - vlv_dpio_write(dev_priv, pipe, VLV_CMN_DW0, 0x610); + vlv_dpio_write(dev_priv, phy, VLV_CMN_DW0, 0x610); /* Set idtafcrecal before PLL is enabled */ mdiv = ((bestm1 << DPIO_M1DIV_SHIFT) | (bestm2 & DPIO_M2DIV_MASK)); @@ -1719,46 +1886,46 @@ static void vlv_prepare_pll(const struct intel_crtc_state *crtc_state) * Note: don't use the DAC post divider as it seems unstable. */ mdiv |= (DPIO_POST_DIV_HDMIDP << DPIO_POST_DIV_SHIFT); - vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW3(pipe), mdiv); + vlv_dpio_write(dev_priv, phy, VLV_PLL_DW3(pipe), mdiv); mdiv |= DPIO_ENABLE_CALIBRATION; - vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW3(pipe), mdiv); + vlv_dpio_write(dev_priv, phy, VLV_PLL_DW3(pipe), mdiv); /* Set HBR and RBR LPF coefficients */ if (crtc_state->port_clock == 162000 || intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG) || intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW10(pipe), + vlv_dpio_write(dev_priv, phy, VLV_PLL_DW10(pipe), 0x009f0003); else - vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW10(pipe), + vlv_dpio_write(dev_priv, phy, VLV_PLL_DW10(pipe), 0x00d0000f); if (intel_crtc_has_dp_encoder(crtc_state)) { /* Use SSC source */ if (pipe == PIPE_A) - vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW5(pipe), + vlv_dpio_write(dev_priv, phy, VLV_PLL_DW5(pipe), 0x0df40000); else - vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW5(pipe), + vlv_dpio_write(dev_priv, phy, VLV_PLL_DW5(pipe), 0x0df70000); } else { /* HDMI or VGA */ /* Use bend source */ if (pipe == PIPE_A) - vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW5(pipe), + vlv_dpio_write(dev_priv, phy, VLV_PLL_DW5(pipe), 0x0df70000); else - vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW5(pipe), + vlv_dpio_write(dev_priv, phy, VLV_PLL_DW5(pipe), 0x0df40000); } - coreclk = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW7(pipe)); + coreclk = vlv_dpio_read(dev_priv, phy, VLV_PLL_DW7(pipe)); coreclk = (coreclk & 0x0000ff00) | 0x01c00000; if (intel_crtc_has_dp_encoder(crtc_state)) coreclk |= 0x01000000; - vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW7(pipe), coreclk); + vlv_dpio_write(dev_priv, phy, VLV_PLL_DW7(pipe), coreclk); - vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW11(pipe), 0x87871000); + vlv_dpio_write(dev_priv, phy, VLV_PLL_DW11(pipe), 0x87871000); vlv_dpio_put(dev_priv); } @@ -1809,6 +1976,7 @@ static void chv_prepare_pll(const struct intel_crtc_state *crtc_state) struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; enum dpio_channel port = vlv_pipe_to_channel(pipe); + enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe); u32 loopfilter, tribuf_calcntr; u32 bestm2, bestp1, bestp2, bestm2_frac; u32 dpio_val; @@ -1825,39 +1993,39 @@ static void chv_prepare_pll(const struct intel_crtc_state *crtc_state) vlv_dpio_get(dev_priv); /* p1 and p2 divider */ - vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW13(port), + vlv_dpio_write(dev_priv, phy, CHV_CMN_DW13(port), 5 << DPIO_CHV_S1_DIV_SHIFT | bestp1 << DPIO_CHV_P1_DIV_SHIFT | bestp2 << DPIO_CHV_P2_DIV_SHIFT | 1 << DPIO_CHV_K_DIV_SHIFT); /* Feedback post-divider - m2 */ - vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW0(port), bestm2); + vlv_dpio_write(dev_priv, phy, CHV_PLL_DW0(port), bestm2); /* Feedback refclk divider - n and m1 */ - vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW1(port), + vlv_dpio_write(dev_priv, phy, CHV_PLL_DW1(port), DPIO_CHV_M1_DIV_BY_2 | 1 << DPIO_CHV_N_DIV_SHIFT); /* M2 fraction division */ - vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW2(port), bestm2_frac); + vlv_dpio_write(dev_priv, phy, CHV_PLL_DW2(port), bestm2_frac); /* M2 fraction division enable */ - dpio_val = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW3(port)); + dpio_val = vlv_dpio_read(dev_priv, phy, CHV_PLL_DW3(port)); dpio_val &= ~(DPIO_CHV_FEEDFWD_GAIN_MASK | DPIO_CHV_FRAC_DIV_EN); dpio_val |= (2 << DPIO_CHV_FEEDFWD_GAIN_SHIFT); if (bestm2_frac) dpio_val |= DPIO_CHV_FRAC_DIV_EN; - vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW3(port), dpio_val); + vlv_dpio_write(dev_priv, phy, CHV_PLL_DW3(port), dpio_val); /* Program digital lock detect threshold */ - dpio_val = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW9(port)); + dpio_val = vlv_dpio_read(dev_priv, phy, CHV_PLL_DW9(port)); dpio_val &= ~(DPIO_CHV_INT_LOCK_THRESHOLD_MASK | DPIO_CHV_INT_LOCK_THRESHOLD_SEL_COARSE); dpio_val |= (0x5 << DPIO_CHV_INT_LOCK_THRESHOLD_SHIFT); if (!bestm2_frac) dpio_val |= DPIO_CHV_INT_LOCK_THRESHOLD_SEL_COARSE; - vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW9(port), dpio_val); + vlv_dpio_write(dev_priv, phy, CHV_PLL_DW9(port), dpio_val); /* Loop filter */ if (vco == 5400000) { @@ -1882,16 +2050,16 @@ static void chv_prepare_pll(const struct intel_crtc_state *crtc_state) loopfilter |= (0x3 << DPIO_CHV_GAIN_CTRL_SHIFT); tribuf_calcntr = 0; } - vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW6(port), loopfilter); + vlv_dpio_write(dev_priv, phy, CHV_PLL_DW6(port), loopfilter); - dpio_val = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW8(port)); + dpio_val = vlv_dpio_read(dev_priv, phy, CHV_PLL_DW8(port)); dpio_val &= ~DPIO_CHV_TDC_TARGET_CNT_MASK; dpio_val |= (tribuf_calcntr << DPIO_CHV_TDC_TARGET_CNT_SHIFT); - vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW8(port), dpio_val); + vlv_dpio_write(dev_priv, phy, CHV_PLL_DW8(port), dpio_val); /* AFC Recal */ - vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), - vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)) | + vlv_dpio_write(dev_priv, phy, CHV_CMN_DW14(port), + vlv_dpio_read(dev_priv, phy, CHV_CMN_DW14(port)) | DPIO_AFC_RECAL); vlv_dpio_put(dev_priv); @@ -1903,14 +2071,15 @@ static void _chv_enable_pll(const struct intel_crtc_state *crtc_state) struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; enum dpio_channel port = vlv_pipe_to_channel(pipe); + enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe); u32 tmp; vlv_dpio_get(dev_priv); /* Enable back the 10bit clock to display controller */ - tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)); + tmp = vlv_dpio_read(dev_priv, phy, CHV_CMN_DW14(port)); tmp |= DPIO_DCLKP_EN; - vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), tmp); + vlv_dpio_write(dev_priv, phy, CHV_CMN_DW14(port), tmp); vlv_dpio_put(dev_priv); @@ -2031,6 +2200,7 @@ void vlv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe) void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe) { enum dpio_channel port = vlv_pipe_to_channel(pipe); + enum dpio_phy phy = vlv_pipe_to_phy(pipe); u32 val; /* Make sure the pipe isn't still relying on us */ @@ -2047,9 +2217,9 @@ void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe) vlv_dpio_get(dev_priv); /* Disable 10bit clock to display controller */ - val = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)); + val = vlv_dpio_read(dev_priv, phy, CHV_CMN_DW14(port)); val &= ~DPIO_DCLKP_EN; - vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), val); + vlv_dpio_write(dev_priv, phy, CHV_CMN_DW14(port), val); vlv_dpio_put(dev_priv); } diff --git a/drivers/gpu/drm/i915/display/intel_dpll.h b/drivers/gpu/drm/i915/display/intel_dpll.h index bbc30542f29f..ac01bb19cc6c 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll.h +++ b/drivers/gpu/drm/i915/display/intel_dpll.h @@ -20,8 +20,6 @@ int intel_dpll_crtc_compute_clock(struct intel_atomic_state *state, struct intel_crtc *crtc); int intel_dpll_crtc_get_shared_dpll(struct intel_atomic_state *state, struct intel_crtc *crtc); -int vlv_calc_dpll_params(int refclk, struct dpll *clock); -int pnv_calc_dpll_params(int refclk, struct dpll *clock); int i9xx_calc_dpll_params(int refclk, struct dpll *clock); u32 i9xx_dpll_compute_fp(const struct dpll *dpll); void vlv_compute_dpll(struct intel_crtc_state *crtc_state); @@ -41,6 +39,13 @@ bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state, struct dpll *best_clock); int chv_calc_dpll_params(int refclk, struct dpll *pll_clock); +void i9xx_crtc_clock_get(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config); +void vlv_crtc_clock_get(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config); +void chv_crtc_clock_get(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config); + void assert_pll_enabled(struct drm_i915_private *i915, enum pipe pipe); void assert_pll_disabled(struct drm_i915_private *i915, enum pipe pipe); diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 399653a20f98..ef57dad1a9cb 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -219,6 +219,26 @@ intel_tc_pll_enable_reg(struct drm_i915_private *i915, return MG_PLL_ENABLE(tc_port); } +static void _intel_enable_shared_dpll(struct drm_i915_private *i915, + struct intel_shared_dpll *pll) +{ + if (pll->info->power_domain) + pll->wakeref = intel_display_power_get(i915, pll->info->power_domain); + + pll->info->funcs->enable(i915, pll); + pll->on = true; +} + +static void _intel_disable_shared_dpll(struct drm_i915_private *i915, + struct intel_shared_dpll *pll) +{ + pll->info->funcs->disable(i915, pll); + pll->on = false; + + if (pll->info->power_domain) + intel_display_power_put(i915, pll->info->power_domain, pll->wakeref); +} + /** * intel_enable_shared_dpll - enable a CRTC's shared DPLL * @crtc_state: CRTC, and its state, which has a shared DPLL @@ -258,8 +278,8 @@ void intel_enable_shared_dpll(const struct intel_crtc_state *crtc_state) drm_WARN_ON(&i915->drm, pll->on); drm_dbg_kms(&i915->drm, "enabling %s\n", pll->info->name); - pll->info->funcs->enable(i915, pll); - pll->on = true; + + _intel_enable_shared_dpll(i915, pll); out: mutex_unlock(&i915->display.dpll.lock); @@ -304,8 +324,8 @@ void intel_disable_shared_dpll(const struct intel_crtc_state *crtc_state) goto out; drm_dbg_kms(&i915->drm, "disabling %s\n", pll->info->name); - pll->info->funcs->disable(i915, pll); - pll->on = false; + + _intel_disable_shared_dpll(i915, pll); out: mutex_unlock(&i915->display.dpll.lock); @@ -631,9 +651,9 @@ static const struct intel_shared_dpll_funcs ibx_pch_dpll_funcs = { }; static const struct dpll_info pch_plls[] = { - { "PCH DPLL A", &ibx_pch_dpll_funcs, DPLL_ID_PCH_PLL_A, 0 }, - { "PCH DPLL B", &ibx_pch_dpll_funcs, DPLL_ID_PCH_PLL_B, 0 }, - { }, + { .name = "PCH DPLL A", .funcs = &ibx_pch_dpll_funcs, .id = DPLL_ID_PCH_PLL_A, }, + { .name = "PCH DPLL B", .funcs = &ibx_pch_dpll_funcs, .id = DPLL_ID_PCH_PLL_B, }, + {} }; static const struct intel_dpll_mgr pch_pll_mgr = { @@ -1239,13 +1259,16 @@ static const struct intel_shared_dpll_funcs hsw_ddi_lcpll_funcs = { }; static const struct dpll_info hsw_plls[] = { - { "WRPLL 1", &hsw_ddi_wrpll_funcs, DPLL_ID_WRPLL1, 0 }, - { "WRPLL 2", &hsw_ddi_wrpll_funcs, DPLL_ID_WRPLL2, 0 }, - { "SPLL", &hsw_ddi_spll_funcs, DPLL_ID_SPLL, 0 }, - { "LCPLL 810", &hsw_ddi_lcpll_funcs, DPLL_ID_LCPLL_810, INTEL_DPLL_ALWAYS_ON }, - { "LCPLL 1350", &hsw_ddi_lcpll_funcs, DPLL_ID_LCPLL_1350, INTEL_DPLL_ALWAYS_ON }, - { "LCPLL 2700", &hsw_ddi_lcpll_funcs, DPLL_ID_LCPLL_2700, INTEL_DPLL_ALWAYS_ON }, - { }, + { .name = "WRPLL 1", .funcs = &hsw_ddi_wrpll_funcs, .id = DPLL_ID_WRPLL1, }, + { .name = "WRPLL 2", .funcs = &hsw_ddi_wrpll_funcs, .id = DPLL_ID_WRPLL2, }, + { .name = "SPLL", .funcs = &hsw_ddi_spll_funcs, .id = DPLL_ID_SPLL, }, + { .name = "LCPLL 810", .funcs = &hsw_ddi_lcpll_funcs, .id = DPLL_ID_LCPLL_810, + .flags = INTEL_DPLL_ALWAYS_ON, }, + { .name = "LCPLL 1350", .funcs = &hsw_ddi_lcpll_funcs, .id = DPLL_ID_LCPLL_1350, + .flags = INTEL_DPLL_ALWAYS_ON, }, + { .name = "LCPLL 2700", .funcs = &hsw_ddi_lcpll_funcs, .id = DPLL_ID_LCPLL_2700, + .flags = INTEL_DPLL_ALWAYS_ON, }, + {} }; static const struct intel_dpll_mgr hsw_pll_mgr = { @@ -1921,11 +1944,12 @@ static const struct intel_shared_dpll_funcs skl_ddi_dpll0_funcs = { }; static const struct dpll_info skl_plls[] = { - { "DPLL 0", &skl_ddi_dpll0_funcs, DPLL_ID_SKL_DPLL0, INTEL_DPLL_ALWAYS_ON }, - { "DPLL 1", &skl_ddi_pll_funcs, DPLL_ID_SKL_DPLL1, 0 }, - { "DPLL 2", &skl_ddi_pll_funcs, DPLL_ID_SKL_DPLL2, 0 }, - { "DPLL 3", &skl_ddi_pll_funcs, DPLL_ID_SKL_DPLL3, 0 }, - { }, + { .name = "DPLL 0", .funcs = &skl_ddi_dpll0_funcs, .id = DPLL_ID_SKL_DPLL0, + .flags = INTEL_DPLL_ALWAYS_ON, }, + { .name = "DPLL 1", .funcs = &skl_ddi_pll_funcs, .id = DPLL_ID_SKL_DPLL1, }, + { .name = "DPLL 2", .funcs = &skl_ddi_pll_funcs, .id = DPLL_ID_SKL_DPLL2, }, + { .name = "DPLL 3", .funcs = &skl_ddi_pll_funcs, .id = DPLL_ID_SKL_DPLL3, }, + {} }; static const struct intel_dpll_mgr skl_pll_mgr = { @@ -2376,10 +2400,10 @@ static const struct intel_shared_dpll_funcs bxt_ddi_pll_funcs = { }; static const struct dpll_info bxt_plls[] = { - { "PORT PLL A", &bxt_ddi_pll_funcs, DPLL_ID_SKL_DPLL0, 0 }, - { "PORT PLL B", &bxt_ddi_pll_funcs, DPLL_ID_SKL_DPLL1, 0 }, - { "PORT PLL C", &bxt_ddi_pll_funcs, DPLL_ID_SKL_DPLL2, 0 }, - { }, + { .name = "PORT PLL A", .funcs = &bxt_ddi_pll_funcs, .id = DPLL_ID_SKL_DPLL0, }, + { .name = "PORT PLL B", .funcs = &bxt_ddi_pll_funcs, .id = DPLL_ID_SKL_DPLL1, }, + { .name = "PORT PLL C", .funcs = &bxt_ddi_pll_funcs, .id = DPLL_ID_SKL_DPLL2, }, + {} }; static const struct intel_dpll_mgr bxt_pll_mgr = { @@ -3834,18 +3858,6 @@ static void combo_pll_enable(struct drm_i915_private *i915, { i915_reg_t enable_reg = intel_combo_pll_enable_reg(i915, pll); - if ((IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) && - pll->info->id == DPLL_ID_EHL_DPLL4) { - - /* - * We need to disable DC states when this DPLL is enabled. - * This can be done by taking a reference on DPLL4 power - * domain. - */ - pll->wakeref = intel_display_power_get(i915, - POWER_DOMAIN_DC_OFF); - } - icl_pll_power_enable(i915, pll, enable_reg); icl_dpll_write(i915, pll); @@ -3941,11 +3953,6 @@ static void combo_pll_disable(struct drm_i915_private *i915, i915_reg_t enable_reg = intel_combo_pll_enable_reg(i915, pll); icl_pll_disable(i915, pll, enable_reg); - - if ((IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) && - pll->info->id == DPLL_ID_EHL_DPLL4) - intel_display_power_put(i915, POWER_DOMAIN_DC_OFF, - pll->wakeref); } static void tbt_pll_disable(struct drm_i915_private *i915, @@ -4014,14 +4021,14 @@ static const struct intel_shared_dpll_funcs mg_pll_funcs = { }; static const struct dpll_info icl_plls[] = { - { "DPLL 0", &combo_pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, - { "DPLL 1", &combo_pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, - { "TBT PLL", &tbt_pll_funcs, DPLL_ID_ICL_TBTPLL, 0 }, - { "MG PLL 1", &mg_pll_funcs, DPLL_ID_ICL_MGPLL1, 0 }, - { "MG PLL 2", &mg_pll_funcs, DPLL_ID_ICL_MGPLL2, 0 }, - { "MG PLL 3", &mg_pll_funcs, DPLL_ID_ICL_MGPLL3, 0 }, - { "MG PLL 4", &mg_pll_funcs, DPLL_ID_ICL_MGPLL4, 0 }, - { }, + { .name = "DPLL 0", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL0, }, + { .name = "DPLL 1", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL1, }, + { .name = "TBT PLL", .funcs = &tbt_pll_funcs, .id = DPLL_ID_ICL_TBTPLL, }, + { .name = "MG PLL 1", .funcs = &mg_pll_funcs, .id = DPLL_ID_ICL_MGPLL1, }, + { .name = "MG PLL 2", .funcs = &mg_pll_funcs, .id = DPLL_ID_ICL_MGPLL2, }, + { .name = "MG PLL 3", .funcs = &mg_pll_funcs, .id = DPLL_ID_ICL_MGPLL3, }, + { .name = "MG PLL 4", .funcs = &mg_pll_funcs, .id = DPLL_ID_ICL_MGPLL4, }, + {} }; static const struct intel_dpll_mgr icl_pll_mgr = { @@ -4035,10 +4042,11 @@ static const struct intel_dpll_mgr icl_pll_mgr = { }; static const struct dpll_info ehl_plls[] = { - { "DPLL 0", &combo_pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, - { "DPLL 1", &combo_pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, - { "DPLL 4", &combo_pll_funcs, DPLL_ID_EHL_DPLL4, 0 }, - { }, + { .name = "DPLL 0", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL0, }, + { .name = "DPLL 1", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL1, }, + { .name = "DPLL 4", .funcs = &combo_pll_funcs, .id = DPLL_ID_EHL_DPLL4, + .power_domain = POWER_DOMAIN_DC_OFF, }, + {} }; static const struct intel_dpll_mgr ehl_pll_mgr = { @@ -4058,16 +4066,16 @@ static const struct intel_shared_dpll_funcs dkl_pll_funcs = { }; static const struct dpll_info tgl_plls[] = { - { "DPLL 0", &combo_pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, - { "DPLL 1", &combo_pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, - { "TBT PLL", &tbt_pll_funcs, DPLL_ID_ICL_TBTPLL, 0 }, - { "TC PLL 1", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL1, 0 }, - { "TC PLL 2", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL2, 0 }, - { "TC PLL 3", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL3, 0 }, - { "TC PLL 4", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL4, 0 }, - { "TC PLL 5", &dkl_pll_funcs, DPLL_ID_TGL_MGPLL5, 0 }, - { "TC PLL 6", &dkl_pll_funcs, DPLL_ID_TGL_MGPLL6, 0 }, - { }, + { .name = "DPLL 0", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL0, }, + { .name = "DPLL 1", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL1, }, + { .name = "TBT PLL", .funcs = &tbt_pll_funcs, .id = DPLL_ID_ICL_TBTPLL, }, + { .name = "TC PLL 1", .funcs = &dkl_pll_funcs, .id = DPLL_ID_ICL_MGPLL1, }, + { .name = "TC PLL 2", .funcs = &dkl_pll_funcs, .id = DPLL_ID_ICL_MGPLL2, }, + { .name = "TC PLL 3", .funcs = &dkl_pll_funcs, .id = DPLL_ID_ICL_MGPLL3, }, + { .name = "TC PLL 4", .funcs = &dkl_pll_funcs, .id = DPLL_ID_ICL_MGPLL4, }, + { .name = "TC PLL 5", .funcs = &dkl_pll_funcs, .id = DPLL_ID_TGL_MGPLL5, }, + { .name = "TC PLL 6", .funcs = &dkl_pll_funcs, .id = DPLL_ID_TGL_MGPLL6, }, + {} }; static const struct intel_dpll_mgr tgl_pll_mgr = { @@ -4081,10 +4089,10 @@ static const struct intel_dpll_mgr tgl_pll_mgr = { }; static const struct dpll_info rkl_plls[] = { - { "DPLL 0", &combo_pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, - { "DPLL 1", &combo_pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, - { "DPLL 4", &combo_pll_funcs, DPLL_ID_EHL_DPLL4, 0 }, - { }, + { .name = "DPLL 0", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL0, }, + { .name = "DPLL 1", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL1, }, + { .name = "DPLL 4", .funcs = &combo_pll_funcs, .id = DPLL_ID_EHL_DPLL4, }, + {} }; static const struct intel_dpll_mgr rkl_pll_mgr = { @@ -4097,11 +4105,11 @@ static const struct intel_dpll_mgr rkl_pll_mgr = { }; static const struct dpll_info dg1_plls[] = { - { "DPLL 0", &combo_pll_funcs, DPLL_ID_DG1_DPLL0, 0 }, - { "DPLL 1", &combo_pll_funcs, DPLL_ID_DG1_DPLL1, 0 }, - { "DPLL 2", &combo_pll_funcs, DPLL_ID_DG1_DPLL2, 0 }, - { "DPLL 3", &combo_pll_funcs, DPLL_ID_DG1_DPLL3, 0 }, - { }, + { .name = "DPLL 0", .funcs = &combo_pll_funcs, .id = DPLL_ID_DG1_DPLL0, }, + { .name = "DPLL 1", .funcs = &combo_pll_funcs, .id = DPLL_ID_DG1_DPLL1, }, + { .name = "DPLL 2", .funcs = &combo_pll_funcs, .id = DPLL_ID_DG1_DPLL2, }, + { .name = "DPLL 3", .funcs = &combo_pll_funcs, .id = DPLL_ID_DG1_DPLL3, }, + {} }; static const struct intel_dpll_mgr dg1_pll_mgr = { @@ -4114,11 +4122,11 @@ static const struct intel_dpll_mgr dg1_pll_mgr = { }; static const struct dpll_info adls_plls[] = { - { "DPLL 0", &combo_pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, - { "DPLL 1", &combo_pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, - { "DPLL 2", &combo_pll_funcs, DPLL_ID_DG1_DPLL2, 0 }, - { "DPLL 3", &combo_pll_funcs, DPLL_ID_DG1_DPLL3, 0 }, - { }, + { .name = "DPLL 0", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL0, }, + { .name = "DPLL 1", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL1, }, + { .name = "DPLL 2", .funcs = &combo_pll_funcs, .id = DPLL_ID_DG1_DPLL2, }, + { .name = "DPLL 3", .funcs = &combo_pll_funcs, .id = DPLL_ID_DG1_DPLL3, }, + {} }; static const struct intel_dpll_mgr adls_pll_mgr = { @@ -4131,14 +4139,14 @@ static const struct intel_dpll_mgr adls_pll_mgr = { }; static const struct dpll_info adlp_plls[] = { - { "DPLL 0", &combo_pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, - { "DPLL 1", &combo_pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, - { "TBT PLL", &tbt_pll_funcs, DPLL_ID_ICL_TBTPLL, 0 }, - { "TC PLL 1", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL1, 0 }, - { "TC PLL 2", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL2, 0 }, - { "TC PLL 3", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL3, 0 }, - { "TC PLL 4", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL4, 0 }, - { }, + { .name = "DPLL 0", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL0, }, + { .name = "DPLL 1", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL1, }, + { .name = "TBT PLL", .funcs = &tbt_pll_funcs, .id = DPLL_ID_ICL_TBTPLL, }, + { .name = "TC PLL 1", .funcs = &dkl_pll_funcs, .id = DPLL_ID_ICL_MGPLL1, }, + { .name = "TC PLL 2", .funcs = &dkl_pll_funcs, .id = DPLL_ID_ICL_MGPLL2, }, + { .name = "TC PLL 3", .funcs = &dkl_pll_funcs, .id = DPLL_ID_ICL_MGPLL3, }, + { .name = "TC PLL 4", .funcs = &dkl_pll_funcs, .id = DPLL_ID_ICL_MGPLL4, }, + {} }; static const struct intel_dpll_mgr adlp_pll_mgr = { @@ -4365,12 +4373,8 @@ static void readout_dpll_hw_state(struct drm_i915_private *i915, pll->on = intel_dpll_get_hw_state(i915, pll, &pll->state.hw_state); - if ((IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) && - pll->on && - pll->info->id == DPLL_ID_EHL_DPLL4) { - pll->wakeref = intel_display_power_get(i915, - POWER_DOMAIN_DC_OFF); - } + if (pll->on && pll->info->power_domain) + pll->wakeref = intel_display_power_get(i915, pll->info->power_domain); pll->state.pipe_mask = 0; for_each_intel_crtc(&i915->drm, crtc) { @@ -4417,8 +4421,7 @@ static void sanitize_dpll_state(struct drm_i915_private *i915, "%s enabled but not in use, disabling\n", pll->info->name); - pll->info->funcs->disable(i915, pll); - pll->on = false; + _intel_disable_shared_dpll(i915, pll); } void intel_dpll_sanitize_state(struct drm_i915_private *i915) @@ -4534,7 +4537,7 @@ void intel_shared_dpll_state_verify(struct intel_atomic_state *state, "pll active mismatch (didn't expect pipe %c in active mask (0x%x))\n", pipe_name(crtc->pipe), pll->active_mask); I915_STATE_WARN(i915, pll->state.pipe_mask & pipe_mask, - "pll enabled crtcs mismatch (found %x in enabled mask (0x%x))\n", + "pll enabled crtcs mismatch (found pipe %c in enabled mask (0x%x))\n", pipe_name(crtc->pipe), pll->state.pipe_mask); } } diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h index dd4796a61751..2e7ea0d8d3ff 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h @@ -27,6 +27,7 @@ #include <linux/types.h> +#include "intel_display_power.h" #include "intel_wakeref.h" #define for_each_shared_dpll(__i915, __pll, __i) \ @@ -270,6 +271,11 @@ struct dpll_info { */ enum intel_dpll_id id; + /** + * @power_domain: extra power domain required by the DPLL + */ + enum intel_display_power_domain power_domain; + #define INTEL_DPLL_ALWAYS_ON (1 << 0) /** * @flags: diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index 48582b31b7f7..b29bceff73f2 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -9,8 +9,6 @@ #include "gt/gen8_ppgtt.h" #include "i915_drv.h" -#include "i915_reg.h" -#include "intel_de.h" #include "intel_display_types.h" #include "intel_dpt.h" #include "intel_fb.h" @@ -318,25 +316,3 @@ void intel_dpt_destroy(struct i915_address_space *vm) i915_vm_put(&dpt->vm); } -void intel_dpt_configure(struct intel_crtc *crtc) -{ - struct drm_i915_private *i915 = to_i915(crtc->base.dev); - - if (DISPLAY_VER(i915) == 14) { - enum pipe pipe = crtc->pipe; - enum plane_id plane_id; - - for_each_plane_id_on_crtc(crtc, plane_id) { - if (plane_id == PLANE_CURSOR) - continue; - - intel_de_rmw(i915, PLANE_CHICKEN(pipe, plane_id), - PLANE_CHICKEN_DISABLE_DPT, - i915->params.enable_dpt ? 0 : PLANE_CHICKEN_DISABLE_DPT); - } - } else if (DISPLAY_VER(i915) == 13) { - intel_de_rmw(i915, CHICKEN_MISC_2, - CHICKEN_MISC_DISABLE_DPT, - i915->params.enable_dpt ? 0 : CHICKEN_MISC_DISABLE_DPT); - } -} diff --git a/drivers/gpu/drm/i915/display/intel_dpt.h b/drivers/gpu/drm/i915/display/intel_dpt.h index d9a166550185..e18a9f767b11 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.h +++ b/drivers/gpu/drm/i915/display/intel_dpt.h @@ -10,7 +10,6 @@ struct drm_i915_private; struct i915_address_space; struct i915_vma; -struct intel_crtc; struct intel_framebuffer; void intel_dpt_destroy(struct i915_address_space *vm); @@ -20,6 +19,5 @@ void intel_dpt_suspend(struct drm_i915_private *i915); void intel_dpt_resume(struct drm_i915_private *i915); struct i915_address_space * intel_dpt_create(struct intel_framebuffer *fb); -void intel_dpt_configure(struct intel_crtc *crtc); #endif /* __INTEL_DPT_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dpt_common.c b/drivers/gpu/drm/i915/display/intel_dpt_common.c new file mode 100644 index 000000000000..cdba47165c04 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dpt_common.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "i915_reg.h" +#include "intel_de.h" +#include "intel_display_types.h" +#include "intel_dpt_common.h" + +void intel_dpt_configure(struct intel_crtc *crtc) +{ + struct drm_i915_private *i915 = to_i915(crtc->base.dev); + + if (DISPLAY_VER(i915) == 14) { + enum pipe pipe = crtc->pipe; + enum plane_id plane_id; + + for_each_plane_id_on_crtc(crtc, plane_id) { + if (plane_id == PLANE_CURSOR) + continue; + + intel_de_rmw(i915, PLANE_CHICKEN(pipe, plane_id), + PLANE_CHICKEN_DISABLE_DPT, + i915->display.params.enable_dpt ? 0 : + PLANE_CHICKEN_DISABLE_DPT); + } + } else if (DISPLAY_VER(i915) == 13) { + intel_de_rmw(i915, CHICKEN_MISC_2, + CHICKEN_MISC_DISABLE_DPT, + i915->display.params.enable_dpt ? 0 : + CHICKEN_MISC_DISABLE_DPT); + } +} diff --git a/drivers/gpu/drm/i915/display/intel_dpt_common.h b/drivers/gpu/drm/i915/display/intel_dpt_common.h new file mode 100644 index 000000000000..6d7de405126a --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dpt_common.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __INTEL_DPT_COMMON_H__ +#define __INTEL_DPT_COMMON_H__ + +struct intel_crtc; + +void intel_dpt_configure(struct intel_crtc *crtc); + +#endif /* __INTEL_DPT_COMMON_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 7fd6280c54a7..482c28b5c2de 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -4,9 +4,6 @@ * */ -#include "gem/i915_gem_internal.h" -#include "gem/i915_gem_lmem.h" - #include "i915_drv.h" #include "i915_irq.h" #include "i915_reg.h" @@ -14,12 +11,13 @@ #include "intel_de.h" #include "intel_display_types.h" #include "intel_dsb.h" +#include "intel_dsb_buffer.h" #include "intel_dsb_regs.h" #include "intel_vblank.h" #include "intel_vrr.h" #include "skl_watermark.h" -struct i915_vma; +#define CACHELINE_BYTES 64 enum dsb_id { INVALID_DSB = -1, @@ -32,8 +30,7 @@ enum dsb_id { struct intel_dsb { enum dsb_id id; - u32 *cmd_buf; - struct i915_vma *vma; + struct intel_dsb_buffer dsb_buf; struct intel_crtc *crtc; /* @@ -109,15 +106,17 @@ static void intel_dsb_dump(struct intel_dsb *dsb) { struct intel_crtc *crtc = dsb->crtc; struct drm_i915_private *i915 = to_i915(crtc->base.dev); - const u32 *buf = dsb->cmd_buf; int i; drm_dbg_kms(&i915->drm, "[CRTC:%d:%s] DSB %d commands {\n", crtc->base.base.id, crtc->base.name, dsb->id); for (i = 0; i < ALIGN(dsb->free_pos, 64 / 4); i += 4) drm_dbg_kms(&i915->drm, - " 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", - i * 4, buf[i], buf[i+1], buf[i+2], buf[i+3]); + " 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i * 4, + intel_dsb_buffer_read(&dsb->dsb_buf, i), + intel_dsb_buffer_read(&dsb->dsb_buf, i + 1), + intel_dsb_buffer_read(&dsb->dsb_buf, i + 2), + intel_dsb_buffer_read(&dsb->dsb_buf, i + 3)); drm_dbg_kms(&i915->drm, "}\n"); } @@ -129,8 +128,6 @@ static bool is_dsb_busy(struct drm_i915_private *i915, enum pipe pipe, static void intel_dsb_emit(struct intel_dsb *dsb, u32 ldw, u32 udw) { - u32 *buf = dsb->cmd_buf; - if (!assert_dsb_has_room(dsb)) return; @@ -139,14 +136,13 @@ static void intel_dsb_emit(struct intel_dsb *dsb, u32 ldw, u32 udw) dsb->ins_start_offset = dsb->free_pos; - buf[dsb->free_pos++] = ldw; - buf[dsb->free_pos++] = udw; + intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos++, ldw); + intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos++, udw); } static bool intel_dsb_prev_ins_is_write(struct intel_dsb *dsb, u32 opcode, i915_reg_t reg) { - const u32 *buf = dsb->cmd_buf; u32 prev_opcode, prev_reg; /* @@ -157,8 +153,10 @@ static bool intel_dsb_prev_ins_is_write(struct intel_dsb *dsb, if (dsb->free_pos == 0) return false; - prev_opcode = buf[dsb->ins_start_offset + 1] & ~DSB_REG_VALUE_MASK; - prev_reg = buf[dsb->ins_start_offset + 1] & DSB_REG_VALUE_MASK; + prev_opcode = intel_dsb_buffer_read(&dsb->dsb_buf, + dsb->ins_start_offset + 1) & ~DSB_REG_VALUE_MASK; + prev_reg = intel_dsb_buffer_read(&dsb->dsb_buf, + dsb->ins_start_offset + 1) & DSB_REG_VALUE_MASK; return prev_opcode == opcode && prev_reg == i915_mmio_reg_offset(reg); } @@ -191,6 +189,8 @@ static bool intel_dsb_prev_ins_is_indexed_write(struct intel_dsb *dsb, i915_reg_ void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val) { + u32 old_val; + /* * For example the buffer will look like below for 3 dwords for auto * increment register: @@ -214,31 +214,32 @@ void intel_dsb_reg_write(struct intel_dsb *dsb, (DSB_BYTE_EN << DSB_BYTE_EN_SHIFT) | i915_mmio_reg_offset(reg)); } else { - u32 *buf = dsb->cmd_buf; - if (!assert_dsb_has_room(dsb)) return; /* convert to indexed write? */ if (intel_dsb_prev_ins_is_mmio_write(dsb, reg)) { - u32 prev_val = buf[dsb->ins_start_offset + 0]; + u32 prev_val = intel_dsb_buffer_read(&dsb->dsb_buf, + dsb->ins_start_offset + 0); - buf[dsb->ins_start_offset + 0] = 1; /* count */ - buf[dsb->ins_start_offset + 1] = - (DSB_OPCODE_INDEXED_WRITE << DSB_OPCODE_SHIFT) | - i915_mmio_reg_offset(reg); - buf[dsb->ins_start_offset + 2] = prev_val; + intel_dsb_buffer_write(&dsb->dsb_buf, + dsb->ins_start_offset + 0, 1); /* count */ + intel_dsb_buffer_write(&dsb->dsb_buf, dsb->ins_start_offset + 1, + (DSB_OPCODE_INDEXED_WRITE << DSB_OPCODE_SHIFT) | + i915_mmio_reg_offset(reg)); + intel_dsb_buffer_write(&dsb->dsb_buf, dsb->ins_start_offset + 2, prev_val); dsb->free_pos++; } - buf[dsb->free_pos++] = val; + intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos++, val); /* Update the count */ - buf[dsb->ins_start_offset]++; + old_val = intel_dsb_buffer_read(&dsb->dsb_buf, dsb->ins_start_offset); + intel_dsb_buffer_write(&dsb->dsb_buf, dsb->ins_start_offset, old_val + 1); /* if number of data words is odd, then the last dword should be 0.*/ if (dsb->free_pos & 0x1) - buf[dsb->free_pos] = 0; + intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos, 0); } } @@ -297,8 +298,8 @@ static void intel_dsb_align_tail(struct intel_dsb *dsb) aligned_tail = ALIGN(tail, CACHELINE_BYTES); if (aligned_tail > tail) - memset(&dsb->cmd_buf[dsb->free_pos], 0, - aligned_tail - tail); + intel_dsb_buffer_memset(&dsb->dsb_buf, dsb->free_pos, 0, + aligned_tail - tail); dsb->free_pos = aligned_tail / 4; } @@ -317,7 +318,7 @@ void intel_dsb_finish(struct intel_dsb *dsb) intel_dsb_align_tail(dsb); - i915_gem_object_flush_map(dsb->vma->obj); + intel_dsb_buffer_flush_map(&dsb->dsb_buf); } static int intel_dsb_dewake_scanline(const struct intel_crtc_state *crtc_state) @@ -361,7 +362,7 @@ static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, ctrl | DSB_ENABLE); intel_de_write_fw(dev_priv, DSB_HEAD(pipe, dsb->id), - i915_ggtt_offset(dsb->vma)); + intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf)); if (dewake_scanline >= 0) { int diff, hw_dewake_scanline; @@ -383,7 +384,7 @@ static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, } intel_de_write_fw(dev_priv, DSB_TAIL(pipe, dsb->id), - i915_ggtt_offset(dsb->vma) + tail); + intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf) + tail); } /** @@ -408,7 +409,7 @@ void intel_dsb_wait(struct intel_dsb *dsb) enum pipe pipe = crtc->pipe; if (wait_for(!is_dsb_busy(dev_priv, pipe, dsb->id), 1)) { - u32 offset = i915_ggtt_offset(dsb->vma); + u32 offset = intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf); intel_de_write_fw(dev_priv, DSB_CTRL(pipe, dsb->id), DSB_ENABLE | DSB_HALT); @@ -445,12 +446,9 @@ struct intel_dsb *intel_dsb_prepare(const struct intel_crtc_state *crtc_state, { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *i915 = to_i915(crtc->base.dev); - struct drm_i915_gem_object *obj; intel_wakeref_t wakeref; struct intel_dsb *dsb; - struct i915_vma *vma; unsigned int size; - u32 *buf; if (!HAS_DSB(i915)) return NULL; @@ -464,37 +462,13 @@ struct intel_dsb *intel_dsb_prepare(const struct intel_crtc_state *crtc_state, /* ~1 qword per instruction, full cachelines */ size = ALIGN(max_cmds * 8, CACHELINE_BYTES); - if (HAS_LMEM(i915)) { - obj = i915_gem_object_create_lmem(i915, PAGE_ALIGN(size), - I915_BO_ALLOC_CONTIGUOUS); - if (IS_ERR(obj)) - goto out_put_rpm; - } else { - obj = i915_gem_object_create_internal(i915, PAGE_ALIGN(size)); - if (IS_ERR(obj)) - goto out_put_rpm; - - i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); - } - - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) { - i915_gem_object_put(obj); - goto out_put_rpm; - } - - buf = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WC); - if (IS_ERR(buf)) { - i915_vma_unpin_and_release(&vma, I915_VMA_RELEASE_MAP); + if (!intel_dsb_buffer_create(crtc, &dsb->dsb_buf, size)) goto out_put_rpm; - } intel_runtime_pm_put(&i915->runtime_pm, wakeref); dsb->id = DSB1; - dsb->vma = vma; dsb->crtc = crtc; - dsb->cmd_buf = buf; dsb->size = size / 4; /* in dwords */ dsb->free_pos = 0; dsb->ins_start_offset = 0; @@ -522,6 +496,6 @@ out: */ void intel_dsb_cleanup(struct intel_dsb *dsb) { - i915_vma_unpin_and_release(&dsb->vma, I915_VMA_RELEASE_MAP); + intel_dsb_buffer_cleanup(&dsb->dsb_buf); kfree(dsb); } diff --git a/drivers/gpu/drm/i915/display/intel_dsb_buffer.c b/drivers/gpu/drm/i915/display/intel_dsb_buffer.c new file mode 100644 index 000000000000..c77d48bda26a --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dsb_buffer.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2023, Intel Corporation. + */ + +#include "gem/i915_gem_internal.h" +#include "gem/i915_gem_lmem.h" +#include "i915_drv.h" +#include "i915_vma.h" +#include "intel_display_types.h" +#include "intel_dsb_buffer.h" + +u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) +{ + return i915_ggtt_offset(dsb_buf->vma); +} + +void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) +{ + dsb_buf->cmd_buf[idx] = val; +} + +u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) +{ + return dsb_buf->cmd_buf[idx]; +} + +void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) +{ + WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); + + memset(&dsb_buf->cmd_buf[idx], val, size); +} + +bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size) +{ + struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *buf; + + if (HAS_LMEM(i915)) { + obj = i915_gem_object_create_lmem(i915, PAGE_ALIGN(size), + I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) + return false; + } else { + obj = i915_gem_object_create_internal(i915, PAGE_ALIGN(size)); + if (IS_ERR(obj)) + return false; + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); + } + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return false; + } + + buf = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WC); + if (IS_ERR(buf)) { + i915_vma_unpin_and_release(&vma, I915_VMA_RELEASE_MAP); + return false; + } + + dsb_buf->vma = vma; + dsb_buf->cmd_buf = buf; + dsb_buf->buf_size = size; + + return true; +} + +void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf) +{ + i915_vma_unpin_and_release(&dsb_buf->vma, I915_VMA_RELEASE_MAP); +} + +void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf) +{ + i915_gem_object_flush_map(dsb_buf->vma->obj); +} diff --git a/drivers/gpu/drm/i915/display/intel_dsb_buffer.h b/drivers/gpu/drm/i915/display/intel_dsb_buffer.h new file mode 100644 index 000000000000..425acd393905 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dsb_buffer.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT + * + * Copyright © 2023 Intel Corporation + */ + +#ifndef _INTEL_DSB_BUFFER_H +#define _INTEL_DSB_BUFFER_H + +#include <linux/types.h> + +struct intel_crtc; +struct i915_vma; + +struct intel_dsb_buffer { + u32 *cmd_buf; + struct i915_vma *vma; + size_t buf_size; +}; + +u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf); +void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val); +u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx); +void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size); +bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, + size_t size); +void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf); +void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf); + +#endif diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index 24b2cbcfc1ef..a5d7fc8418c9 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -55,43 +55,6 @@ #define MIPI_VIRTUAL_CHANNEL_SHIFT 1 #define MIPI_PORT_SHIFT 3 -/* base offsets for gpio pads */ -#define VLV_GPIO_NC_0_HV_DDI0_HPD 0x4130 -#define VLV_GPIO_NC_1_HV_DDI0_DDC_SDA 0x4120 -#define VLV_GPIO_NC_2_HV_DDI0_DDC_SCL 0x4110 -#define VLV_GPIO_NC_3_PANEL0_VDDEN 0x4140 -#define VLV_GPIO_NC_4_PANEL0_BKLTEN 0x4150 -#define VLV_GPIO_NC_5_PANEL0_BKLTCTL 0x4160 -#define VLV_GPIO_NC_6_HV_DDI1_HPD 0x4180 -#define VLV_GPIO_NC_7_HV_DDI1_DDC_SDA 0x4190 -#define VLV_GPIO_NC_8_HV_DDI1_DDC_SCL 0x4170 -#define VLV_GPIO_NC_9_PANEL1_VDDEN 0x4100 -#define VLV_GPIO_NC_10_PANEL1_BKLTEN 0x40E0 -#define VLV_GPIO_NC_11_PANEL1_BKLTCTL 0x40F0 - -#define VLV_GPIO_PCONF0(base_offset) (base_offset) -#define VLV_GPIO_PAD_VAL(base_offset) ((base_offset) + 8) - -struct gpio_map { - u16 base_offset; - bool init; -}; - -static struct gpio_map vlv_gpio_table[] = { - { VLV_GPIO_NC_0_HV_DDI0_HPD }, - { VLV_GPIO_NC_1_HV_DDI0_DDC_SDA }, - { VLV_GPIO_NC_2_HV_DDI0_DDC_SCL }, - { VLV_GPIO_NC_3_PANEL0_VDDEN }, - { VLV_GPIO_NC_4_PANEL0_BKLTEN }, - { VLV_GPIO_NC_5_PANEL0_BKLTCTL }, - { VLV_GPIO_NC_6_HV_DDI1_HPD }, - { VLV_GPIO_NC_7_HV_DDI1_DDC_SDA }, - { VLV_GPIO_NC_8_HV_DDI1_DDC_SCL }, - { VLV_GPIO_NC_9_PANEL1_VDDEN }, - { VLV_GPIO_NC_10_PANEL1_BKLTEN }, - { VLV_GPIO_NC_11_PANEL1_BKLTCTL }, -}; - struct i2c_adapter_lookup { u16 slave_addr; struct intel_dsi *intel_dsi; @@ -103,19 +66,6 @@ struct i2c_adapter_lookup { #define CHV_GPIO_IDX_START_SW 100 #define CHV_GPIO_IDX_START_SE 198 -#define CHV_VBT_MAX_PINS_PER_FMLY 15 - -#define CHV_GPIO_PAD_CFG0(f, i) (0x4400 + (f) * 0x400 + (i) * 8) -#define CHV_GPIO_GPIOEN (1 << 15) -#define CHV_GPIO_GPIOCFG_GPIO (0 << 8) -#define CHV_GPIO_GPIOCFG_GPO (1 << 8) -#define CHV_GPIO_GPIOCFG_GPI (2 << 8) -#define CHV_GPIO_GPIOCFG_HIZ (3 << 8) -#define CHV_GPIO_GPIOTXSTATE(state) ((!!(state)) << 1) - -#define CHV_GPIO_PAD_CFG1(f, i) (0x4400 + (f) * 0x400 + (i) * 8 + 4) -#define CHV_GPIO_CFGLOCK (1 << 31) - /* ICL DSI Display GPIO Pins */ #define ICL_GPIO_DDSP_HPD_A 0 #define ICL_GPIO_L_VDDEN_1 1 @@ -142,7 +92,7 @@ static enum port intel_dsi_seq_port_to_port(struct intel_dsi *intel_dsi, if (seq_port) { if (intel_dsi->ports & BIT(PORT_B)) return PORT_B; - else if (intel_dsi->ports & BIT(PORT_C)) + if (intel_dsi->ports & BIT(PORT_C)) return PORT_C; } @@ -243,75 +193,93 @@ static const u8 *mipi_exec_delay(struct intel_dsi *intel_dsi, const u8 *data) return data; } -static void vlv_exec_gpio(struct intel_connector *connector, - u8 gpio_source, u8 gpio_index, bool value) +static void soc_gpio_set_value(struct intel_connector *connector, u8 gpio_index, + const char *con_id, u8 idx, bool value) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - struct gpio_map *map; - u16 pconf0, padval; - u32 tmp; - u8 port; - - if (gpio_index >= ARRAY_SIZE(vlv_gpio_table)) { - drm_dbg_kms(&dev_priv->drm, "unknown gpio index %u\n", - gpio_index); - return; + /* XXX: this table is a quick ugly hack. */ + static struct gpio_desc *soc_gpio_table[U8_MAX + 1]; + struct gpio_desc *gpio_desc = soc_gpio_table[gpio_index]; + + if (gpio_desc) { + gpiod_set_value(gpio_desc, value); + } else { + gpio_desc = devm_gpiod_get_index(dev_priv->drm.dev, con_id, idx, + value ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW); + if (IS_ERR(gpio_desc)) { + drm_err(&dev_priv->drm, + "GPIO index %u request failed (%pe)\n", + gpio_index, gpio_desc); + return; + } + + soc_gpio_table[gpio_index] = gpio_desc; } +} - map = &vlv_gpio_table[gpio_index]; +static void soc_opaque_gpio_set_value(struct intel_connector *connector, + u8 gpio_index, const char *chip, + const char *con_id, u8 idx, bool value) +{ + struct gpiod_lookup_table *lookup; - if (connector->panel.vbt.dsi.seq_version >= 3) { - /* XXX: this assumes vlv_gpio_table only has NC GPIOs. */ - port = IOSF_PORT_GPIO_NC; - } else { - if (gpio_source == 0) { - port = IOSF_PORT_GPIO_NC; - } else if (gpio_source == 1) { + lookup = kzalloc(struct_size(lookup, table, 2), GFP_KERNEL); + if (!lookup) + return; + + lookup->dev_id = "0000:00:02.0"; + lookup->table[0] = + GPIO_LOOKUP_IDX(chip, idx, con_id, idx, GPIO_ACTIVE_HIGH); + + gpiod_add_lookup_table(lookup); + + soc_gpio_set_value(connector, gpio_index, con_id, idx, value); + + gpiod_remove_lookup_table(lookup); + kfree(lookup); +} + +static void vlv_gpio_set_value(struct intel_connector *connector, + u8 gpio_source, u8 gpio_index, bool value) +{ + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + + /* XXX: this assumes vlv_gpio_table only has NC GPIOs. */ + if (connector->panel.vbt.dsi.seq_version < 3) { + if (gpio_source == 1) { drm_dbg_kms(&dev_priv->drm, "SC gpio not supported\n"); return; - } else { + } + if (gpio_source > 1) { drm_dbg_kms(&dev_priv->drm, "unknown gpio source %u\n", gpio_source); return; } } - pconf0 = VLV_GPIO_PCONF0(map->base_offset); - padval = VLV_GPIO_PAD_VAL(map->base_offset); - - vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_GPIO)); - if (!map->init) { - /* FIXME: remove constant below */ - vlv_iosf_sb_write(dev_priv, port, pconf0, 0x2000CC00); - map->init = true; - } - - tmp = 0x4 | value; - vlv_iosf_sb_write(dev_priv, port, padval, tmp); - vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_GPIO)); + soc_opaque_gpio_set_value(connector, gpio_index, + "INT33FC:01", "Panel N", gpio_index, value); } -static void chv_exec_gpio(struct intel_connector *connector, - u8 gpio_source, u8 gpio_index, bool value) +static void chv_gpio_set_value(struct intel_connector *connector, + u8 gpio_source, u8 gpio_index, bool value) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - u16 cfg0, cfg1; - u16 family_num; - u8 port; if (connector->panel.vbt.dsi.seq_version >= 3) { if (gpio_index >= CHV_GPIO_IDX_START_SE) { /* XXX: it's unclear whether 255->57 is part of SE. */ - gpio_index -= CHV_GPIO_IDX_START_SE; - port = CHV_IOSF_PORT_GPIO_SE; + soc_opaque_gpio_set_value(connector, gpio_index, "INT33FF:03", "Panel SE", + gpio_index - CHV_GPIO_IDX_START_SE, value); } else if (gpio_index >= CHV_GPIO_IDX_START_SW) { - gpio_index -= CHV_GPIO_IDX_START_SW; - port = CHV_IOSF_PORT_GPIO_SW; + soc_opaque_gpio_set_value(connector, gpio_index, "INT33FF:00", "Panel SW", + gpio_index - CHV_GPIO_IDX_START_SW, value); } else if (gpio_index >= CHV_GPIO_IDX_START_E) { - gpio_index -= CHV_GPIO_IDX_START_E; - port = CHV_IOSF_PORT_GPIO_E; + soc_opaque_gpio_set_value(connector, gpio_index, "INT33FF:02", "Panel E", + gpio_index - CHV_GPIO_IDX_START_E, value); } else { - port = CHV_IOSF_PORT_GPIO_N; + soc_opaque_gpio_set_value(connector, gpio_index, "INT33FF:01", "Panel N", + gpio_index - CHV_GPIO_IDX_START_N, value); } } else { /* XXX: The spec is unclear about CHV GPIO on seq v2 */ @@ -328,56 +296,15 @@ static void chv_exec_gpio(struct intel_connector *connector, return; } - port = CHV_IOSF_PORT_GPIO_N; - } - - family_num = gpio_index / CHV_VBT_MAX_PINS_PER_FMLY; - gpio_index = gpio_index % CHV_VBT_MAX_PINS_PER_FMLY; - - cfg0 = CHV_GPIO_PAD_CFG0(family_num, gpio_index); - cfg1 = CHV_GPIO_PAD_CFG1(family_num, gpio_index); - - vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_GPIO)); - vlv_iosf_sb_write(dev_priv, port, cfg1, 0); - vlv_iosf_sb_write(dev_priv, port, cfg0, - CHV_GPIO_GPIOEN | CHV_GPIO_GPIOCFG_GPO | - CHV_GPIO_GPIOTXSTATE(value)); - vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_GPIO)); -} - -static void bxt_exec_gpio(struct intel_connector *connector, - u8 gpio_source, u8 gpio_index, bool value) -{ - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - /* XXX: this table is a quick ugly hack. */ - static struct gpio_desc *bxt_gpio_table[U8_MAX + 1]; - struct gpio_desc *gpio_desc = bxt_gpio_table[gpio_index]; - - if (!gpio_desc) { - gpio_desc = devm_gpiod_get_index(dev_priv->drm.dev, - NULL, gpio_index, - value ? GPIOD_OUT_LOW : - GPIOD_OUT_HIGH); - - if (IS_ERR_OR_NULL(gpio_desc)) { - drm_err(&dev_priv->drm, - "GPIO index %u request failed (%ld)\n", - gpio_index, PTR_ERR(gpio_desc)); - return; - } - - bxt_gpio_table[gpio_index] = gpio_desc; + soc_opaque_gpio_set_value(connector, gpio_index, "INT33FF:01", "Panel N", + gpio_index - CHV_GPIO_IDX_START_N, value); } - - gpiod_set_value(gpio_desc, value); } -static void icl_exec_gpio(struct intel_connector *connector, - u8 gpio_source, u8 gpio_index, bool value) +static void bxt_gpio_set_value(struct intel_connector *connector, + u8 gpio_index, bool value) { - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - - drm_dbg_kms(&dev_priv->drm, "Skipping ICL GPIO element execution\n"); + soc_gpio_set_value(connector, gpio_index, NULL, gpio_index, value); } enum { @@ -462,44 +389,45 @@ static void icl_native_gpio_set_value(struct drm_i915_private *dev_priv, static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) { struct drm_device *dev = intel_dsi->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *i915 = to_i915(dev); struct intel_connector *connector = intel_dsi->attached_connector; - u8 gpio_source, gpio_index = 0, gpio_number; + u8 gpio_source = 0, gpio_index = 0, gpio_number; bool value; - bool native = DISPLAY_VER(dev_priv) >= 11; + int size; + bool native = DISPLAY_VER(i915) >= 11; - if (connector->panel.vbt.dsi.seq_version >= 3) - gpio_index = *data++; + if (connector->panel.vbt.dsi.seq_version >= 3) { + size = 3; - gpio_number = *data++; + gpio_index = data[0]; + gpio_number = data[1]; + value = data[2] & BIT(0); - /* gpio source in sequence v2 only */ - if (connector->panel.vbt.dsi.seq_version == 2) - gpio_source = (*data >> 1) & 3; - else - gpio_source = 0; + if (connector->panel.vbt.dsi.seq_version >= 4 && data[2] & BIT(1)) + native = false; + } else { + size = 2; - if (connector->panel.vbt.dsi.seq_version >= 4 && *data & BIT(1)) - native = false; + gpio_number = data[0]; + value = data[1] & BIT(0); - /* pull up/down */ - value = *data++ & 1; + if (connector->panel.vbt.dsi.seq_version == 2) + gpio_source = (data[1] >> 1) & 3; + } - drm_dbg_kms(&dev_priv->drm, "GPIO index %u, number %u, source %u, native %s, set to %s\n", + drm_dbg_kms(&i915->drm, "GPIO index %u, number %u, source %u, native %s, set to %s\n", gpio_index, gpio_number, gpio_source, str_yes_no(native), str_on_off(value)); if (native) - icl_native_gpio_set_value(dev_priv, gpio_number, value); - else if (DISPLAY_VER(dev_priv) >= 11) - icl_exec_gpio(connector, gpio_source, gpio_index, value); - else if (IS_VALLEYVIEW(dev_priv)) - vlv_exec_gpio(connector, gpio_source, gpio_number, value); - else if (IS_CHERRYVIEW(dev_priv)) - chv_exec_gpio(connector, gpio_source, gpio_number, value); - else - bxt_exec_gpio(connector, gpio_source, gpio_index, value); - - return data; + icl_native_gpio_set_value(i915, gpio_number, value); + else if (DISPLAY_VER(i915) >= 9) + bxt_gpio_set_value(connector, gpio_index, value); + else if (IS_VALLEYVIEW(i915)) + vlv_gpio_set_value(connector, gpio_source, gpio_number, value); + else if (IS_CHERRYVIEW(i915)) + chv_gpio_set_value(connector, gpio_source, gpio_number, value); + + return data + size; } #ifdef CONFIG_ACPI @@ -658,6 +586,7 @@ static const fn_mipi_elem_exec exec_elem[] = { */ static const char * const seq_name[] = { + [MIPI_SEQ_END] = "MIPI_SEQ_END", [MIPI_SEQ_DEASSERT_RESET] = "MIPI_SEQ_DEASSERT_RESET", [MIPI_SEQ_INIT_OTP] = "MIPI_SEQ_INIT_OTP", [MIPI_SEQ_DISPLAY_ON] = "MIPI_SEQ_DISPLAY_ON", @@ -673,10 +602,10 @@ static const char * const seq_name[] = { static const char *sequence_name(enum mipi_seq seq_id) { - if (seq_id < ARRAY_SIZE(seq_name) && seq_name[seq_id]) + if (seq_id < ARRAY_SIZE(seq_name)) return seq_name[seq_id]; - else - return "(unknown)"; + + return "(unknown)"; } static void intel_dsi_vbt_exec(struct intel_dsi *intel_dsi, @@ -707,13 +636,10 @@ static void intel_dsi_vbt_exec(struct intel_dsi *intel_dsi, if (connector->panel.vbt.dsi.seq_version >= 3) data += 4; - while (1) { + while (*data != MIPI_SEQ_ELEM_END) { u8 operation_byte = *data++; u8 operation_size = 0; - if (operation_byte == MIPI_SEQ_ELEM_END) - break; - if (operation_byte < ARRAY_SIZE(exec_elem)) mipi_elem_exec = exec_elem[operation_byte]; else @@ -873,36 +799,34 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) * multiply by 100 to preserve remainder */ if (intel_dsi->video_mode == BURST_MODE) { - if (mipi_config->target_burst_mode_freq) { - u32 bitrate = intel_dsi_bitrate(intel_dsi); - - /* - * Sometimes the VBT contains a slightly lower clock, - * then the bitrate we have calculated, in this case - * just replace it with the calculated bitrate. - */ - if (mipi_config->target_burst_mode_freq < bitrate && - intel_fuzzy_clock_check( - mipi_config->target_burst_mode_freq, - bitrate)) - mipi_config->target_burst_mode_freq = bitrate; - - if (mipi_config->target_burst_mode_freq < bitrate) { - drm_err(&dev_priv->drm, - "Burst mode freq is less than computed\n"); - return false; - } + u32 bitrate; - burst_mode_ratio = DIV_ROUND_UP( - mipi_config->target_burst_mode_freq * 100, - bitrate); + if (mipi_config->target_burst_mode_freq == 0) { + drm_err(&dev_priv->drm, "Burst mode target is not set\n"); + return false; + } - intel_dsi->pclk = DIV_ROUND_UP(intel_dsi->pclk * burst_mode_ratio, 100); - } else { - drm_err(&dev_priv->drm, - "Burst mode target is not set\n"); + bitrate = intel_dsi_bitrate(intel_dsi); + + /* + * Sometimes the VBT contains a slightly lower clock, then + * the bitrate we have calculated, in this case just replace it + * with the calculated bitrate. + */ + if (mipi_config->target_burst_mode_freq < bitrate && + intel_fuzzy_clock_check(mipi_config->target_burst_mode_freq, + bitrate)) + mipi_config->target_burst_mode_freq = bitrate; + + if (mipi_config->target_burst_mode_freq < bitrate) { + drm_err(&dev_priv->drm, "Burst mode freq is less than computed\n"); return false; } + + burst_mode_ratio = + DIV_ROUND_UP(mipi_config->target_burst_mode_freq * 100, bitrate); + + intel_dsi->pclk = DIV_ROUND_UP(intel_dsi->pclk * burst_mode_ratio, 100); } else burst_mode_ratio = 100; @@ -964,6 +888,7 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on) struct intel_connector *connector = intel_dsi->attached_connector; struct mipi_config *mipi_config = connector->panel.vbt.dsi.config; enum gpiod_flags flags = panel_is_on ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW; + struct gpiod_lookup_table *gpiod_lookup_table = NULL; bool want_backlight_gpio = false; bool want_panel_gpio = false; struct pinctrl *pinctrl; @@ -971,12 +896,12 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on) if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && mipi_config->pwm_blc == PPS_BLC_PMIC) { - gpiod_add_lookup_table(&pmic_panel_gpio_table); + gpiod_lookup_table = &pmic_panel_gpio_table; want_panel_gpio = true; } if (IS_VALLEYVIEW(dev_priv) && mipi_config->pwm_blc == PPS_BLC_SOC) { - gpiod_add_lookup_table(&soc_panel_gpio_table); + gpiod_lookup_table = &soc_panel_gpio_table; want_panel_gpio = true; want_backlight_gpio = true; @@ -993,8 +918,11 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on) "Failed to set pinmux to PWM\n"); } + if (gpiod_lookup_table) + gpiod_add_lookup_table(gpiod_lookup_table); + if (want_panel_gpio) { - intel_dsi->gpio_panel = gpiod_get(dev->dev, "panel", flags); + intel_dsi->gpio_panel = devm_gpiod_get(dev->dev, "panel", flags); if (IS_ERR(intel_dsi->gpio_panel)) { drm_err(&dev_priv->drm, "Failed to own gpio for panel control\n"); @@ -1004,38 +932,14 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on) if (want_backlight_gpio) { intel_dsi->gpio_backlight = - gpiod_get(dev->dev, "backlight", flags); + devm_gpiod_get(dev->dev, "backlight", flags); if (IS_ERR(intel_dsi->gpio_backlight)) { drm_err(&dev_priv->drm, "Failed to own gpio for backlight control\n"); intel_dsi->gpio_backlight = NULL; } } -} -void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi) -{ - struct drm_device *dev = intel_dsi->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_connector *connector = intel_dsi->attached_connector; - struct mipi_config *mipi_config = connector->panel.vbt.dsi.config; - - if (intel_dsi->gpio_panel) { - gpiod_put(intel_dsi->gpio_panel); - intel_dsi->gpio_panel = NULL; - } - - if (intel_dsi->gpio_backlight) { - gpiod_put(intel_dsi->gpio_backlight); - intel_dsi->gpio_backlight = NULL; - } - - if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && - mipi_config->pwm_blc == PPS_BLC_PMIC) - gpiod_remove_lookup_table(&pmic_panel_gpio_table); - - if (IS_VALLEYVIEW(dev_priv) && mipi_config->pwm_blc == PPS_BLC_SOC) { - pinctrl_unregister_mappings(soc_pwm_pinctrl_map); - gpiod_remove_lookup_table(&soc_panel_gpio_table); - } + if (gpiod_lookup_table) + gpiod_remove_lookup_table(gpiod_lookup_table); } diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.h b/drivers/gpu/drm/i915/display/intel_dsi_vbt.h index 468d873fab1a..3462fcc760e6 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.h +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.h @@ -13,7 +13,6 @@ struct intel_dsi; bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id); void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on); -void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi); void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi, enum mipi_seq seq_id); void intel_dsi_log_params(struct intel_dsi *intel_dsi); diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 646f367a13f5..0c0144eaa8fa 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -4,7 +4,6 @@ */ #include <drm/drm_blend.h> -#include <drm/drm_framebuffer.h> #include <drm/drm_modeset_helper.h> #include <linux/dma-fence.h> @@ -15,6 +14,7 @@ #include "intel_display_types.h" #include "intel_dpt.h" #include "intel_fb.h" +#include "intel_fb_bo.h" #include "intel_frontbuffer.h" #define check_array_bounds(i915, a, i) drm_WARN_ON(&(i915)->drm, (i) >= ARRAY_SIZE(a)) @@ -301,6 +301,33 @@ lookup_format_info(const struct drm_format_info formats[], return NULL; } +unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier) +{ + const struct intel_modifier_desc *md; + u8 tiling_caps; + + md = lookup_modifier_or_null(fb_modifier); + if (!md) + return I915_TILING_NONE; + + tiling_caps = lookup_modifier_or_null(fb_modifier)->plane_caps & + INTEL_PLANE_CAP_TILING_MASK; + + switch (tiling_caps) { + case INTEL_PLANE_CAP_TILING_Y: + return I915_TILING_Y; + case INTEL_PLANE_CAP_TILING_X: + return I915_TILING_X; + case INTEL_PLANE_CAP_TILING_4: + case INTEL_PLANE_CAP_TILING_Yf: + case INTEL_PLANE_CAP_TILING_NONE: + return I915_TILING_NONE; + default: + MISSING_CASE(tiling_caps); + return I915_TILING_NONE; + } +} + /** * intel_fb_get_format_info: Get a modifier specific format information * @cmd: FB add command structure @@ -737,26 +764,6 @@ intel_fb_align_height(const struct drm_framebuffer *fb, return ALIGN(height, tile_height); } -static unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier) -{ - u8 tiling_caps = lookup_modifier(fb_modifier)->plane_caps & - INTEL_PLANE_CAP_TILING_MASK; - - switch (tiling_caps) { - case INTEL_PLANE_CAP_TILING_Y: - return I915_TILING_Y; - case INTEL_PLANE_CAP_TILING_X: - return I915_TILING_X; - case INTEL_PLANE_CAP_TILING_4: - case INTEL_PLANE_CAP_TILING_Yf: - case INTEL_PLANE_CAP_TILING_NONE: - return I915_TILING_NONE; - default: - MISSING_CASE(tiling_caps); - return I915_TILING_NONE; - } -} - bool intel_fb_modifier_uses_dpt(struct drm_i915_private *i915, u64 modifier) { return HAS_DPT(i915) && modifier != DRM_FORMAT_MOD_LINEAR; @@ -764,7 +771,7 @@ bool intel_fb_modifier_uses_dpt(struct drm_i915_private *i915, u64 modifier) bool intel_fb_uses_dpt(const struct drm_framebuffer *fb) { - return fb && to_i915(fb->dev)->params.enable_dpt && + return to_i915(fb->dev)->display.params.enable_dpt && intel_fb_modifier_uses_dpt(to_i915(fb->dev), fb->modifier); } @@ -1670,10 +1677,10 @@ int intel_fill_fb_info(struct drm_i915_private *i915, struct intel_framebuffer * max_size = max(max_size, offset + size); } - if (mul_u32_u32(max_size, tile_size) > obj->base.size) { + if (mul_u32_u32(max_size, tile_size) > intel_bo_to_drm_bo(obj)->size) { drm_dbg_kms(&i915->drm, "fb too big for bo (need %llu bytes, have %zu bytes)\n", - mul_u32_u32(max_size, tile_size), obj->base.size); + mul_u32_u32(max_size, tile_size), intel_bo_to_drm_bo(obj)->size); return -EINVAL; } @@ -1894,6 +1901,8 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) intel_frontbuffer_put(intel_fb->frontbuffer); + intel_fb_bo_framebuffer_fini(intel_fb_obj(fb)); + kfree(intel_fb); } @@ -1902,7 +1911,7 @@ static int intel_user_framebuffer_create_handle(struct drm_framebuffer *fb, unsigned int *handle) { struct drm_i915_gem_object *obj = intel_fb_obj(fb); - struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct drm_i915_private *i915 = to_i915(intel_bo_to_drm_bo(obj)->dev); if (i915_gem_object_is_userptr(obj)) { drm_dbg(&i915->drm, @@ -1910,7 +1919,7 @@ static int intel_user_framebuffer_create_handle(struct drm_framebuffer *fb, return -EINVAL; } - return drm_gem_handle_create(file, &obj->base, handle); + return drm_gem_handle_create(file, intel_bo_to_drm_bo(obj), handle); } struct frontbuffer_fence_cb { @@ -1943,10 +1952,10 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb, if (!atomic_read(&front->bits)) return 0; - if (dma_resv_test_signaled(obj->base.resv, dma_resv_usage_rw(false))) + if (dma_resv_test_signaled(intel_bo_to_drm_bo(obj)->resv, dma_resv_usage_rw(false))) goto flush; - ret = dma_resv_get_singleton(obj->base.resv, dma_resv_usage_rw(false), + ret = dma_resv_get_singleton(intel_bo_to_drm_bo(obj)->resv, dma_resv_usage_rw(false), &fence); if (ret || !fence) goto flush; @@ -1988,61 +1997,30 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, struct drm_i915_gem_object *obj, struct drm_mode_fb_cmd2 *mode_cmd) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct drm_i915_private *dev_priv = to_i915(intel_bo_to_drm_bo(obj)->dev); struct drm_framebuffer *fb = &intel_fb->base; u32 max_stride; - unsigned int tiling, stride; int ret = -EINVAL; int i; - intel_fb->frontbuffer = intel_frontbuffer_get(obj); - if (!intel_fb->frontbuffer) - return -ENOMEM; - - i915_gem_object_lock(obj, NULL); - tiling = i915_gem_object_get_tiling(obj); - stride = i915_gem_object_get_stride(obj); - i915_gem_object_unlock(obj); + ret = intel_fb_bo_framebuffer_init(intel_fb, obj, mode_cmd); + if (ret) + return ret; - if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) { - /* - * If there's a fence, enforce that - * the fb modifier and tiling mode match. - */ - if (tiling != I915_TILING_NONE && - tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { - drm_dbg_kms(&dev_priv->drm, - "tiling_mode doesn't match fb modifier\n"); - goto err; - } - } else { - if (tiling == I915_TILING_X) { - mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED; - } else if (tiling == I915_TILING_Y) { - drm_dbg_kms(&dev_priv->drm, - "No Y tiling for legacy addfb\n"); - goto err; - } + intel_fb->frontbuffer = intel_frontbuffer_get(obj); + if (!intel_fb->frontbuffer) { + ret = -ENOMEM; + goto err; } + ret = -EINVAL; if (!drm_any_plane_has_format(&dev_priv->drm, mode_cmd->pixel_format, mode_cmd->modifier[0])) { drm_dbg_kms(&dev_priv->drm, "unsupported pixel format %p4cc / modifier 0x%llx\n", &mode_cmd->pixel_format, mode_cmd->modifier[0]); - goto err; - } - - /* - * gen2/3 display engine uses the fence if present, - * so the tiling mode must match the fb modifier exactly. - */ - if (DISPLAY_VER(dev_priv) < 4 && - tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { - drm_dbg_kms(&dev_priv->drm, - "tiling_mode must match fb modifier exactly on gen2/3\n"); - goto err; + goto err_frontbuffer_put; } max_stride = intel_fb_max_stride(dev_priv, mode_cmd->pixel_format, @@ -2053,18 +2031,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, mode_cmd->modifier[0] != DRM_FORMAT_MOD_LINEAR ? "tiled" : "linear", mode_cmd->pitches[0], max_stride); - goto err; - } - - /* - * If there's a fence, enforce that - * the fb pitch and fence stride match. - */ - if (tiling != I915_TILING_NONE && mode_cmd->pitches[0] != stride) { - drm_dbg_kms(&dev_priv->drm, - "pitch (%d) must match tiling stride (%d)\n", - mode_cmd->pitches[0], stride); - goto err; + goto err_frontbuffer_put; } /* FIXME need to adjust LINOFF/TILEOFF accordingly. */ @@ -2072,7 +2039,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(&dev_priv->drm, "plane 0 offset (0x%08x) must be 0\n", mode_cmd->offsets[0]); - goto err; + goto err_frontbuffer_put; } drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd); @@ -2083,7 +2050,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (mode_cmd->handles[i] != mode_cmd->handles[0]) { drm_dbg_kms(&dev_priv->drm, "bad plane %d handle\n", i); - goto err; + goto err_frontbuffer_put; } stride_alignment = intel_fb_stride_alignment(fb, i); @@ -2091,7 +2058,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(&dev_priv->drm, "plane %d pitch (%d) must be at least %u byte aligned\n", i, fb->pitches[i], stride_alignment); - goto err; + goto err_frontbuffer_put; } if (intel_fb_is_gen12_ccs_aux_plane(fb, i)) { @@ -2102,16 +2069,16 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, "ccs aux plane %d pitch (%d) must be %d\n", i, fb->pitches[i], ccs_aux_stride); - goto err; + goto err_frontbuffer_put; } } - fb->obj[i] = &obj->base; + fb->obj[i] = intel_bo_to_drm_bo(obj); } ret = intel_fill_fb_info(dev_priv, intel_fb); if (ret) - goto err; + goto err_frontbuffer_put; if (intel_fb_uses_dpt(fb)) { struct i915_address_space *vm; @@ -2120,7 +2087,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (IS_ERR(vm)) { drm_dbg_kms(&dev_priv->drm, "failed to create DPT\n"); ret = PTR_ERR(vm); - goto err; + goto err_frontbuffer_put; } intel_fb->dpt_vm = vm; @@ -2137,8 +2104,10 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, err_free_dpt: if (intel_fb_uses_dpt(fb)) intel_dpt_destroy(intel_fb->dpt_vm); -err: +err_frontbuffer_put: intel_frontbuffer_put(intel_fb->frontbuffer); +err: + intel_fb_bo_framebuffer_fini(obj); return ret; } @@ -2150,23 +2119,14 @@ intel_user_framebuffer_create(struct drm_device *dev, struct drm_framebuffer *fb; struct drm_i915_gem_object *obj; struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd; - struct drm_i915_private *i915; - - obj = i915_gem_object_lookup(filp, mode_cmd.handles[0]); - if (!obj) - return ERR_PTR(-ENOENT); - - /* object is backed with LMEM for discrete */ - i915 = to_i915(obj->base.dev); - if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM_0)) { - /* object is "remote", not in local memory */ - i915_gem_object_put(obj); - drm_dbg_kms(&i915->drm, "framebuffer must reside in local memory\n"); - return ERR_PTR(-EREMOTE); - } + struct drm_i915_private *i915 = to_i915(dev); + + obj = intel_fb_bo_lookup_valid_bo(i915, filp, &mode_cmd); + if (IS_ERR(obj)) + return ERR_CAST(obj); fb = intel_framebuffer_create(obj, &mode_cmd); - i915_gem_object_put(obj); + drm_gem_object_put(intel_bo_to_drm_bo(obj)); return fb; } diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h index e85167d6bc34..23db6628f53e 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fb.h @@ -95,4 +95,6 @@ intel_user_framebuffer_create(struct drm_device *dev, bool intel_fb_modifier_uses_dpt(struct drm_i915_private *i915, u64 modifier); bool intel_fb_uses_dpt(const struct drm_framebuffer *fb); +unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier); + #endif /* __INTEL_FB_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_fb_bo.c b/drivers/gpu/drm/i915/display/intel_fb_bo.c new file mode 100644 index 000000000000..4be09541e509 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_fb_bo.c @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#include <drm/drm_framebuffer.h> + +#include "gem/i915_gem_object.h" + +#include "i915_drv.h" +#include "intel_fb.h" +#include "intel_fb_bo.h" + +void intel_fb_bo_framebuffer_fini(struct drm_i915_gem_object *obj) +{ + /* Nothing to do for i915 */ +} + +int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, + struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned int tiling, stride; + + i915_gem_object_lock(obj, NULL); + tiling = i915_gem_object_get_tiling(obj); + stride = i915_gem_object_get_stride(obj); + i915_gem_object_unlock(obj); + + if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) { + /* + * If there's a fence, enforce that + * the fb modifier and tiling mode match. + */ + if (tiling != I915_TILING_NONE && + tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { + drm_dbg_kms(&i915->drm, + "tiling_mode doesn't match fb modifier\n"); + return -EINVAL; + } + } else { + if (tiling == I915_TILING_X) { + mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED; + } else if (tiling == I915_TILING_Y) { + drm_dbg_kms(&i915->drm, + "No Y tiling for legacy addfb\n"); + return -EINVAL; + } + } + + /* + * gen2/3 display engine uses the fence if present, + * so the tiling mode must match the fb modifier exactly. + */ + if (DISPLAY_VER(i915) < 4 && + tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { + drm_dbg_kms(&i915->drm, + "tiling_mode must match fb modifier exactly on gen2/3\n"); + return -EINVAL; + } + + /* + * If there's a fence, enforce that + * the fb pitch and fence stride match. + */ + if (tiling != I915_TILING_NONE && mode_cmd->pitches[0] != stride) { + drm_dbg_kms(&i915->drm, + "pitch (%d) must match tiling stride (%d)\n", + mode_cmd->pitches[0], stride); + return -EINVAL; + } + + return 0; +} + +struct drm_i915_gem_object * +intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915, + struct drm_file *filp, + const struct drm_mode_fb_cmd2 *mode_cmd) +{ + struct drm_i915_gem_object *obj; + + obj = i915_gem_object_lookup(filp, mode_cmd->handles[0]); + if (!obj) + return ERR_PTR(-ENOENT); + + /* object is backed with LMEM for discrete */ + if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM_0)) { + /* object is "remote", not in local memory */ + i915_gem_object_put(obj); + drm_dbg_kms(&i915->drm, "framebuffer must reside in local memory\n"); + return ERR_PTR(-EREMOTE); + } + + return obj; +} diff --git a/drivers/gpu/drm/i915/display/intel_fb_bo.h b/drivers/gpu/drm/i915/display/intel_fb_bo.h new file mode 100644 index 000000000000..232bf898b013 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_fb_bo.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __INTEL_FB_BO_H__ +#define __INTEL_FB_BO_H__ + +struct drm_file; +struct drm_mode_fb_cmd2; +struct drm_i915_gem_object; +struct drm_i915_private; +struct intel_framebuffer; + +void intel_fb_bo_framebuffer_fini(struct drm_i915_gem_object *obj); + +int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, + struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd); + +struct drm_i915_gem_object * +intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915, + struct drm_file *filp, + const struct drm_mode_fb_cmd2 *user_mode_cmd); + +#endif diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 4820d21cc942..f17a1afb4929 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -608,6 +608,7 @@ static u32 ivb_dpfc_ctl(struct intel_fbc *fbc) static void ivb_fbc_activate(struct intel_fbc *fbc) { struct drm_i915_private *i915 = fbc->i915; + u32 dpfc_ctl; if (DISPLAY_VER(i915) >= 10) glk_fbc_program_cfb_stride(fbc); @@ -617,8 +618,13 @@ static void ivb_fbc_activate(struct intel_fbc *fbc) if (intel_gt_support_legacy_fencing(to_gt(i915))) snb_fbc_program_fence(fbc); + /* wa_14019417088 Alternative WA*/ + dpfc_ctl = ivb_dpfc_ctl(fbc); + if (DISPLAY_VER(i915) >= 20) + intel_de_write(i915, ILK_DPFC_CONTROL(fbc->id), dpfc_ctl); + intel_de_write(i915, ILK_DPFC_CONTROL(fbc->id), - DPFC_CTL_EN | ivb_dpfc_ctl(fbc)); + DPFC_CTL_EN | dpfc_ctl); } static bool ivb_fbc_is_compressing(struct intel_fbc *fbc) @@ -1022,10 +1028,13 @@ static bool intel_fbc_hw_tracking_covers_screen(const struct intel_plane_state * struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); unsigned int effective_w, effective_h, max_w, max_h; - if (DISPLAY_VER(i915) >= 10) { + if (DISPLAY_VER(i915) >= 11) { + max_w = 8192; + max_h = 4096; + } else if (DISPLAY_VER(i915) >= 10) { max_w = 5120; max_h = 4096; - } else if (DISPLAY_VER(i915) >= 8 || IS_HASWELL(i915)) { + } else if (DISPLAY_VER(i915) >= 7) { max_w = 4096; max_h = 4096; } else if (IS_G4X(i915) || DISPLAY_VER(i915) >= 5) { @@ -1044,6 +1053,31 @@ static bool intel_fbc_hw_tracking_covers_screen(const struct intel_plane_state * return effective_w <= max_w && effective_h <= max_h; } +static bool intel_fbc_plane_size_valid(const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + unsigned int w, h, max_w, max_h; + + if (DISPLAY_VER(i915) >= 10) { + max_w = 5120; + max_h = 4096; + } else if (DISPLAY_VER(i915) >= 8 || IS_HASWELL(i915)) { + max_w = 4096; + max_h = 4096; + } else if (IS_G4X(i915) || DISPLAY_VER(i915) >= 5) { + max_w = 4096; + max_h = 2048; + } else { + max_w = 2048; + max_h = 1536; + } + + w = drm_rect_width(&plane_state->uapi.src) >> 16; + h = drm_rect_height(&plane_state->uapi.src) >> 16; + + return w <= max_w && h <= max_h; +} + static bool i8xx_fbc_tiling_valid(const struct intel_plane_state *plane_state) { const struct drm_framebuffer *fb = plane_state->hw.fb; @@ -1174,7 +1208,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, return 0; } - if (!i915->params.enable_fbc) { + if (!i915->display.params.enable_fbc) { plane_state->no_fbc_reason = "disabled per module param or by default"; return 0; } @@ -1201,7 +1235,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, * Recommendation is to keep this combination disabled * Bspec: 50422 HSD: 14010260002 */ - if (DISPLAY_VER(i915) >= 12 && crtc_state->has_psr2) { + if (IS_DISPLAY_VER(i915, 12, 14) && crtc_state->has_psr2) { plane_state->no_fbc_reason = "PSR2 enabled"; return 0; } @@ -1241,11 +1275,16 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, return 0; } - if (!intel_fbc_hw_tracking_covers_screen(plane_state)) { + if (!intel_fbc_plane_size_valid(plane_state)) { plane_state->no_fbc_reason = "plane size too big"; return 0; } + if (!intel_fbc_hw_tracking_covers_screen(plane_state)) { + plane_state->no_fbc_reason = "surface size too big"; + return 0; + } + /* * Work around a problem on GEN9+ HW, where enabling FBC on a plane * having a Y offset that isn't divisible by 4 causes FIFO underrun @@ -1751,8 +1790,8 @@ void intel_fbc_handle_fifo_underrun_irq(struct drm_i915_private *i915) */ static int intel_sanitize_fbc_option(struct drm_i915_private *i915) { - if (i915->params.enable_fbc >= 0) - return !!i915->params.enable_fbc; + if (i915->display.params.enable_fbc >= 0) + return !!i915->display.params.enable_fbc; if (!HAS_FBC(i915)) return 0; @@ -1824,9 +1863,9 @@ void intel_fbc_init(struct drm_i915_private *i915) if (need_fbc_vtd_wa(i915)) DISPLAY_RUNTIME_INFO(i915)->fbc_mask = 0; - i915->params.enable_fbc = intel_sanitize_fbc_option(i915); + i915->display.params.enable_fbc = intel_sanitize_fbc_option(i915); drm_dbg_kms(&i915->drm, "Sanitized enable_fbc value: %d\n", - i915->params.enable_fbc); + i915->display.params.enable_fbc); for_each_fbc_id(i915, fbc_id) i915->display.fbc[fbc_id] = intel_fbc_create(i915, fbc_id); diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 31d0d695d567..99894a855ef0 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -43,7 +43,6 @@ #include <drm/drm_fourcc.h> #include <drm/drm_gem_framebuffer_helper.h> -#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_mman.h" #include "i915_drv.h" @@ -51,6 +50,7 @@ #include "intel_fb.h" #include "intel_fb_pin.h" #include "intel_fbdev.h" +#include "intel_fbdev_fb.h" #include "intel_frontbuffer.h" struct intel_fbdev { @@ -146,65 +146,6 @@ static const struct fb_ops intelfb_ops = { .fb_mmap = intel_fbdev_mmap, }; -static int intelfb_alloc(struct drm_fb_helper *helper, - struct drm_fb_helper_surface_size *sizes) -{ - struct intel_fbdev *ifbdev = to_intel_fbdev(helper); - struct drm_framebuffer *fb; - struct drm_device *dev = helper->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_mode_fb_cmd2 mode_cmd = {}; - struct drm_i915_gem_object *obj; - int size; - - /* we don't do packed 24bpp */ - if (sizes->surface_bpp == 24) - sizes->surface_bpp = 32; - - mode_cmd.width = sizes->surface_width; - mode_cmd.height = sizes->surface_height; - - mode_cmd.pitches[0] = ALIGN(mode_cmd.width * - DIV_ROUND_UP(sizes->surface_bpp, 8), 64); - mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, - sizes->surface_depth); - - size = mode_cmd.pitches[0] * mode_cmd.height; - size = PAGE_ALIGN(size); - - obj = ERR_PTR(-ENODEV); - if (HAS_LMEM(dev_priv)) { - obj = i915_gem_object_create_lmem(dev_priv, size, - I915_BO_ALLOC_CONTIGUOUS | - I915_BO_ALLOC_USER); - } else { - /* - * If the FB is too big, just don't use it since fbdev is not very - * important and we should probably use that space with FBC or other - * features. - * - * Also skip stolen on MTL as Wa_22018444074 mitigation. - */ - if (!(IS_METEORLAKE(dev_priv)) && size * 2 < dev_priv->dsm.usable_size) - obj = i915_gem_object_create_stolen(dev_priv, size); - if (IS_ERR(obj)) - obj = i915_gem_object_create_shmem(dev_priv, size); - } - - if (IS_ERR(obj)) { - drm_err(&dev_priv->drm, "failed to allocate framebuffer (%pe)\n", obj); - return PTR_ERR(obj); - } - - fb = intel_framebuffer_create(obj, &mode_cmd); - i915_gem_object_put(obj); - if (IS_ERR(fb)) - return PTR_ERR(fb); - - ifbdev->fb = to_intel_framebuffer(fb); - return 0; -} - static int intelfb_create(struct drm_fb_helper *helper, struct drm_fb_helper_surface_size *sizes) { @@ -213,7 +154,6 @@ static int intelfb_create(struct drm_fb_helper *helper, struct drm_device *dev = helper->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); - struct i915_ggtt *ggtt = to_gt(dev_priv)->ggtt; const struct i915_gtt_view view = { .type = I915_GTT_VIEW_NORMAL, }; @@ -222,9 +162,7 @@ static int intelfb_create(struct drm_fb_helper *helper, struct i915_vma *vma; unsigned long flags = 0; bool prealloc = false; - void __iomem *vaddr; struct drm_i915_gem_object *obj; - struct i915_gem_ww_ctx ww; int ret; mutex_lock(&ifbdev->hpd_lock); @@ -245,12 +183,13 @@ static int intelfb_create(struct drm_fb_helper *helper, intel_fb = ifbdev->fb = NULL; } if (!intel_fb || drm_WARN_ON(dev, !intel_fb_obj(&intel_fb->base))) { + struct drm_framebuffer *fb; drm_dbg_kms(&dev_priv->drm, "no BIOS fb, allocating a new one\n"); - ret = intelfb_alloc(helper, sizes); - if (ret) - return ret; - intel_fb = ifbdev->fb; + fb = intel_fbdev_fb_alloc(helper, sizes); + if (IS_ERR(fb)) + return PTR_ERR(fb); + intel_fb = ifbdev->fb = to_intel_framebuffer(fb); } else { drm_dbg_kms(&dev_priv->drm, "re-using BIOS fb\n"); prealloc = true; @@ -283,49 +222,18 @@ static int intelfb_create(struct drm_fb_helper *helper, info->fbops = &intelfb_ops; obj = intel_fb_obj(&intel_fb->base); - if (i915_gem_object_is_lmem(obj)) { - struct intel_memory_region *mem = obj->mm.region; - - /* Use fbdev's framebuffer from lmem for discrete */ - info->fix.smem_start = - (unsigned long)(mem->io_start + - i915_gem_object_get_dma_address(obj, 0)); - info->fix.smem_len = obj->base.size; - } else { - /* Our framebuffer is the entirety of fbdev's system memory */ - info->fix.smem_start = - (unsigned long)(ggtt->gmadr.start + i915_ggtt_offset(vma)); - info->fix.smem_len = vma->size; - } - - for_i915_gem_ww(&ww, ret, false) { - ret = i915_gem_object_lock(vma->obj, &ww); - - if (ret) - continue; - - vaddr = i915_vma_pin_iomap(vma); - if (IS_ERR(vaddr)) { - drm_err(&dev_priv->drm, - "Failed to remap framebuffer into virtual memory (%pe)\n", vaddr); - ret = PTR_ERR(vaddr); - continue; - } - } + ret = intel_fbdev_fb_fill_info(dev_priv, info, obj, vma); if (ret) goto out_unpin; - info->screen_base = vaddr; - info->screen_size = vma->size; - drm_fb_helper_fill_info(info, &ifbdev->helper, sizes); /* If the object is shmemfs backed, it will have given us zeroed pages. * If the object is stolen however, it will be full of whatever * garbage was left in there. */ - if (!i915_gem_object_is_shmem(vma->obj) && !prealloc) + if (!i915_gem_object_is_shmem(obj) && !prealloc) memset_io(info->screen_base, 0, info->screen_size); /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ @@ -424,12 +332,12 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, continue; } - if (obj->base.size > max_size) { + if (intel_bo_to_drm_bo(obj)->size > max_size) { drm_dbg_kms(&i915->drm, "found possible fb from [PLANE:%d:%s]\n", plane->base.base.id, plane->base.name); fb = to_intel_framebuffer(plane_state->uapi.fb); - max_size = obj->base.size; + max_size = intel_bo_to_drm_bo(obj)->size; } } diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c new file mode 100644 index 000000000000..717c3a3237c4 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include <drm/drm_fb_helper.h> + +#include "gem/i915_gem_lmem.h" + +#include "i915_drv.h" +#include "intel_display_types.h" +#include "intel_fbdev_fb.h" + +struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, + struct drm_fb_helper_surface_size *sizes) +{ + struct drm_framebuffer *fb; + struct drm_device *dev = helper->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_mode_fb_cmd2 mode_cmd = {}; + struct drm_i915_gem_object *obj; + int size; + + /* we don't do packed 24bpp */ + if (sizes->surface_bpp == 24) + sizes->surface_bpp = 32; + + mode_cmd.width = sizes->surface_width; + mode_cmd.height = sizes->surface_height; + + mode_cmd.pitches[0] = ALIGN(mode_cmd.width * + DIV_ROUND_UP(sizes->surface_bpp, 8), 64); + mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, + sizes->surface_depth); + + size = mode_cmd.pitches[0] * mode_cmd.height; + size = PAGE_ALIGN(size); + + obj = ERR_PTR(-ENODEV); + if (HAS_LMEM(dev_priv)) { + obj = i915_gem_object_create_lmem(dev_priv, size, + I915_BO_ALLOC_CONTIGUOUS | + I915_BO_ALLOC_USER); + } else { + /* + * If the FB is too big, just don't use it since fbdev is not very + * important and we should probably use that space with FBC or other + * features. + * + * Also skip stolen on MTL as Wa_22018444074 mitigation. + */ + if (!(IS_METEORLAKE(dev_priv)) && size * 2 < dev_priv->dsm.usable_size) + obj = i915_gem_object_create_stolen(dev_priv, size); + if (IS_ERR(obj)) + obj = i915_gem_object_create_shmem(dev_priv, size); + } + + if (IS_ERR(obj)) { + drm_err(&dev_priv->drm, "failed to allocate framebuffer (%pe)\n", obj); + return ERR_PTR(-ENOMEM); + } + + fb = intel_framebuffer_create(obj, &mode_cmd); + i915_gem_object_put(obj); + + return fb; +} + +int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info, + struct drm_i915_gem_object *obj, struct i915_vma *vma) +{ + struct i915_gem_ww_ctx ww; + void __iomem *vaddr; + int ret; + + if (i915_gem_object_is_lmem(obj)) { + struct intel_memory_region *mem = obj->mm.region; + + /* Use fbdev's framebuffer from lmem for discrete */ + info->fix.smem_start = + (unsigned long)(mem->io_start + + i915_gem_object_get_dma_address(obj, 0)); + info->fix.smem_len = obj->base.size; + } else { + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + + /* Our framebuffer is the entirety of fbdev's system memory */ + info->fix.smem_start = + (unsigned long)(ggtt->gmadr.start + i915_ggtt_offset(vma)); + info->fix.smem_len = vma->size; + } + + for_i915_gem_ww(&ww, ret, false) { + ret = i915_gem_object_lock(vma->obj, &ww); + + if (ret) + continue; + + vaddr = i915_vma_pin_iomap(vma); + if (IS_ERR(vaddr)) { + drm_err(&i915->drm, + "Failed to remap framebuffer into virtual memory (%pe)\n", vaddr); + ret = PTR_ERR(vaddr); + continue; + } + } + + if (ret) + return ret; + + info->screen_base = vaddr; + info->screen_size = intel_bo_to_drm_bo(obj)->size; + + return 0; +} diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h new file mode 100644 index 000000000000..a395b2c65d33 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __INTEL_FBDEV_FB_H__ +#define __INTEL_FBDEV_FB_H__ + +struct drm_fb_helper; +struct drm_fb_helper_surface_size; +struct drm_i915_gem_object; +struct drm_i915_private; +struct fb_info; +struct i915_vma; + +struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, + struct drm_fb_helper_surface_size *sizes); +int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info, + struct drm_i915_gem_object *obj, struct i915_vma *vma); + +#endif diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c b/drivers/gpu/drm/i915/display/intel_fdi.c index e6429dfebe15..295a0f24ebbf 100644 --- a/drivers/gpu/drm/i915/display/intel_fdi.c +++ b/drivers/gpu/drm/i915/display/intel_fdi.c @@ -10,6 +10,7 @@ #include "intel_crtc.h" #include "intel_ddi.h" #include "intel_de.h" +#include "intel_dp.h" #include "intel_display_types.h" #include "intel_fdi.h" #include "intel_fdi_regs.h" @@ -338,8 +339,11 @@ int ilk_fdi_compute_config(struct intel_crtc *crtc, pipe_config->fdi_lanes = lane; - intel_link_compute_m_n(pipe_config->pipe_bpp, lane, fdi_dotclock, - link_bw, &pipe_config->fdi_m_n, false); + intel_link_compute_m_n(to_bpp_x16(pipe_config->pipe_bpp), + lane, fdi_dotclock, + link_bw, + intel_dp_bw_fec_overhead(false), + &pipe_config->fdi_m_n); return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index ec46716b2f49..2ea37c0414a9 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -265,8 +265,6 @@ static void frontbuffer_release(struct kref *ref) spin_unlock(&intel_bo_to_i915(obj)->display.fb_tracking.lock); i915_active_fini(&front->write); - - i915_gem_object_put(obj); kfree_rcu(front, rcu); } diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index c89da3568ebd..39b3f7c0c77c 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -923,7 +923,7 @@ static int _intel_hdcp_disable(struct intel_connector *connector) return 0; } -static int _intel_hdcp_enable(struct intel_connector *connector) +static int intel_hdcp1_enable(struct intel_connector *connector) { struct drm_i915_private *i915 = to_i915(connector->base.dev); struct intel_hdcp *hdcp = &connector->hdcp; @@ -1058,7 +1058,7 @@ static int intel_hdcp_check_link(struct intel_connector *connector) goto out; } - ret = _intel_hdcp_enable(connector); + ret = intel_hdcp1_enable(connector); if (ret) { drm_err(&i915->drm, "Failed to enable hdcp (%d)\n", ret); intel_hdcp_update_value(connector, @@ -2324,10 +2324,10 @@ intel_hdcp_set_streams(struct intel_digital_port *dig_port, return 0; } -int intel_hdcp_enable(struct intel_atomic_state *state, - struct intel_encoder *encoder, - const struct intel_crtc_state *pipe_config, - const struct drm_connector_state *conn_state) +static int _intel_hdcp_enable(struct intel_atomic_state *state, + struct intel_encoder *encoder, + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { struct drm_i915_private *i915 = to_i915(encoder->base.dev); struct intel_connector *connector = @@ -2388,7 +2388,7 @@ int intel_hdcp_enable(struct intel_atomic_state *state, */ if (ret && intel_hdcp_capable(connector) && hdcp->content_type != DRM_MODE_HDCP_CONTENT_TYPE1) { - ret = _intel_hdcp_enable(connector); + ret = intel_hdcp1_enable(connector); } if (!ret) { @@ -2404,6 +2404,27 @@ int intel_hdcp_enable(struct intel_atomic_state *state, return ret; } +void intel_hdcp_enable(struct intel_atomic_state *state, + struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct intel_connector *connector = + to_intel_connector(conn_state->connector); + struct intel_hdcp *hdcp = &connector->hdcp; + + /* + * Enable hdcp if it's desired or if userspace is enabled and + * driver set its state to undesired + */ + if (conn_state->content_protection == + DRM_MODE_CONTENT_PROTECTION_DESIRED || + (conn_state->content_protection == + DRM_MODE_CONTENT_PROTECTION_ENABLED && hdcp->value == + DRM_MODE_CONTENT_PROTECTION_UNDESIRED)) + _intel_hdcp_enable(state, encoder, crtc_state, conn_state); +} + int intel_hdcp_disable(struct intel_connector *connector) { struct intel_digital_port *dig_port = intel_attached_dig_port(connector); @@ -2491,7 +2512,7 @@ void intel_hdcp_update_pipe(struct intel_atomic_state *state, } if (desired_and_not_enabled || content_protection_type_changed) - intel_hdcp_enable(state, encoder, crtc_state, conn_state); + _intel_hdcp_enable(state, encoder, crtc_state, conn_state); } void intel_hdcp_component_fini(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.h b/drivers/gpu/drm/i915/display/intel_hdcp.h index 5997c52a0958..a9c784fd9ba5 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.h +++ b/drivers/gpu/drm/i915/display/intel_hdcp.h @@ -28,10 +28,10 @@ void intel_hdcp_atomic_check(struct drm_connector *connector, int intel_hdcp_init(struct intel_connector *connector, struct intel_digital_port *dig_port, const struct intel_hdcp_shim *hdcp_shim); -int intel_hdcp_enable(struct intel_atomic_state *state, - struct intel_encoder *encoder, - const struct intel_crtc_state *pipe_config, - const struct drm_connector_state *conn_state); +void intel_hdcp_enable(struct intel_atomic_state *state, + struct intel_encoder *encoder, + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state); int intel_hdcp_disable(struct intel_connector *connector); void intel_hdcp_update_pipe(struct intel_atomic_state *state, struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index bfa456fa7d25..39e4f5f7c817 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -3034,16 +3034,6 @@ void intel_hdmi_init_connector(struct intel_digital_port *dig_port, "HDCP init failed, skipping.\n"); } - /* For G4X desktop chip, PEG_BAND_GAP_DATA 3:0 must first be written - * 0xd. Failure to do so will result in spurious interrupts being - * generated on the port when a cable is not attached. - */ - if (IS_G45(dev_priv)) { - u32 temp = intel_de_read(dev_priv, PEG_BAND_GAP_DATA); - intel_de_write(dev_priv, PEG_BAND_GAP_DATA, - (temp & ~0xf) | 0xd); - } - cec_fill_conn_info_from_drm(&conn_info, connector); intel_hdmi->cec_notifier = diff --git a/drivers/gpu/drm/i915/display/intel_hotplug_irq.c b/drivers/gpu/drm/i915/display/intel_hotplug_irq.c index f07047e9cb30..04f62f27ad74 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug_irq.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug_irq.c @@ -1361,11 +1361,24 @@ static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv) bxt_hpd_detection_setup(dev_priv); } +static void g45_hpd_peg_band_gap_wa(struct drm_i915_private *i915) +{ + /* + * For G4X desktop chip, PEG_BAND_GAP_DATA 3:0 must first be written + * 0xd. Failure to do so will result in spurious interrupts being + * generated on the port when a cable is not attached. + */ + intel_de_rmw(i915, PEG_BAND_GAP_DATA, 0xf, 0xd); +} + static void i915_hpd_enable_detection(struct intel_encoder *encoder) { struct drm_i915_private *i915 = to_i915(encoder->base.dev); u32 hotplug_en = hpd_mask_i915[encoder->hpd_pin]; + if (IS_G45(i915)) + g45_hpd_peg_band_gap_wa(i915); + /* HPD sense and interrupt enable are one and the same */ i915_hotplug_interrupt_update(i915, hotplug_en, hotplug_en); } @@ -1389,6 +1402,9 @@ static void i915_hpd_irq_setup(struct drm_i915_private *dev_priv) hotplug_en |= CRT_HOTPLUG_ACTIVATION_PERIOD_64; hotplug_en |= CRT_HOTPLUG_VOLTAGE_COMPARE_50; + if (IS_G45(dev_priv)) + g45_hpd_peg_band_gap_wa(dev_priv); + /* Ignore TV since it's buggy */ i915_hotplug_interrupt_update_locked(dev_priv, HOTPLUG_INT_EN_MASK | diff --git a/drivers/gpu/drm/i915/display/intel_link_bw.c b/drivers/gpu/drm/i915/display/intel_link_bw.c index c5eb5f242536..9c6d35a405a1 100644 --- a/drivers/gpu/drm/i915/display/intel_link_bw.c +++ b/drivers/gpu/drm/i915/display/intel_link_bw.c @@ -7,6 +7,7 @@ #include "intel_atomic.h" #include "intel_display_types.h" +#include "intel_dp_mst.h" #include "intel_fdi.h" #include "intel_link_bw.h" @@ -21,6 +22,7 @@ void intel_link_bw_init_limits(struct drm_i915_private *i915, struct intel_link_ { enum pipe pipe; + limits->force_fec_pipes = 0; limits->bpp_limit_reached_pipes = 0; for_each_pipe(i915, pipe) limits->max_bpp_x16[pipe] = INT_MAX; @@ -53,11 +55,11 @@ int intel_link_bw_reduce_bpp(struct intel_atomic_state *state, struct drm_i915_private *i915 = to_i915(state->base.dev); enum pipe max_bpp_pipe = INVALID_PIPE; struct intel_crtc *crtc; - int max_bpp = 0; + int max_bpp_x16 = 0; for_each_intel_crtc_in_pipe_mask(&i915->drm, crtc, pipe_mask) { struct intel_crtc_state *crtc_state; - int link_bpp; + int link_bpp_x16; if (limits->bpp_limit_reached_pipes & BIT(crtc->pipe)) continue; @@ -68,7 +70,7 @@ int intel_link_bw_reduce_bpp(struct intel_atomic_state *state, return PTR_ERR(crtc_state); if (crtc_state->dsc.compression_enable) - link_bpp = crtc_state->dsc.compressed_bpp; + link_bpp_x16 = crtc_state->dsc.compressed_bpp_x16; else /* * TODO: for YUV420 the actual link bpp is only half @@ -76,10 +78,10 @@ int intel_link_bw_reduce_bpp(struct intel_atomic_state *state, * is based on the pipe bpp value, set the actual link bpp * limit here once the MST BW allocation is fixed. */ - link_bpp = crtc_state->pipe_bpp; + link_bpp_x16 = to_bpp_x16(crtc_state->pipe_bpp); - if (link_bpp > max_bpp) { - max_bpp = link_bpp; + if (link_bpp_x16 > max_bpp_x16) { + max_bpp_x16 = link_bpp_x16; max_bpp_pipe = crtc->pipe; } } @@ -87,7 +89,7 @@ int intel_link_bw_reduce_bpp(struct intel_atomic_state *state, if (max_bpp_pipe == INVALID_PIPE) return -ENOSPC; - limits->max_bpp_x16[max_bpp_pipe] = to_bpp_x16(max_bpp) - 1; + limits->max_bpp_x16[max_bpp_pipe] = max_bpp_x16 - 1; return intel_modeset_pipes_in_mask_early(state, reason, BIT(max_bpp_pipe)); @@ -143,6 +145,10 @@ static int check_all_link_config(struct intel_atomic_state *state, /* TODO: Check additional shared display link configurations like MST */ int ret; + ret = intel_dp_mst_atomic_check_link(state, limits); + if (ret) + return ret; + ret = intel_fdi_atomic_check_link(state, limits); if (ret) return ret; @@ -158,6 +164,12 @@ assert_link_limit_change_valid(struct drm_i915_private *i915, bool bpps_changed = false; enum pipe pipe; + /* FEC can't be forced off after it was forced on. */ + if (drm_WARN_ON(&i915->drm, + (old_limits->force_fec_pipes & new_limits->force_fec_pipes) != + old_limits->force_fec_pipes)) + return false; + for_each_pipe(i915, pipe) { /* The bpp limit can only decrease. */ if (drm_WARN_ON(&i915->drm, @@ -172,7 +184,9 @@ assert_link_limit_change_valid(struct drm_i915_private *i915, /* At least one limit must change. */ if (drm_WARN_ON(&i915->drm, - !bpps_changed)) + !bpps_changed && + new_limits->force_fec_pipes == + old_limits->force_fec_pipes)) return false; return true; diff --git a/drivers/gpu/drm/i915/display/intel_link_bw.h b/drivers/gpu/drm/i915/display/intel_link_bw.h index e07df22a779a..2cf57307cc24 100644 --- a/drivers/gpu/drm/i915/display/intel_link_bw.h +++ b/drivers/gpu/drm/i915/display/intel_link_bw.h @@ -16,6 +16,7 @@ struct intel_atomic_state; struct intel_crtc_state; struct intel_link_bw_limits { + u8 force_fec_pipes; u8 bpp_limit_reached_pipes; /* in 1/16 bpp units */ int max_bpp_x16[I915_MAX_PIPES]; diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index bcbdd1984fd9..221f5c6c871b 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -185,7 +185,7 @@ static void intel_lvds_pps_get_hw_state(struct drm_i915_private *dev_priv, /* Convert from 100ms to 100us units */ pps->t4 = val * 1000; - if (DISPLAY_VER(dev_priv) <= 4 && + if (DISPLAY_VER(dev_priv) < 5 && pps->t1_t2 == 0 && pps->t5 == 0 && pps->t3 == 0 && pps->tx == 0) { drm_dbg_kms(&dev_priv->drm, "Panel power timings uninitialized, " @@ -799,8 +799,8 @@ static bool compute_is_dual_link_lvds(struct intel_lvds_encoder *lvds_encoder) unsigned int val; /* use the module option value if specified */ - if (i915->params.lvds_channel_mode > 0) - return i915->params.lvds_channel_mode == 2; + if (i915->display.params.lvds_channel_mode > 0) + return i915->display.params.lvds_channel_mode == 2; /* single channel LVDS is limited to 112 MHz */ if (fixed_mode->clock > 112999) diff --git a/drivers/gpu/drm/i915/display/intel_modeset_setup.c b/drivers/gpu/drm/i915/display/intel_modeset_setup.c index b8f43efb0ab5..94eece7f63be 100644 --- a/drivers/gpu/drm/i915/display/intel_modeset_setup.c +++ b/drivers/gpu/drm/i915/display/intel_modeset_setup.c @@ -318,6 +318,12 @@ static void intel_modeset_update_connector_atomic_state(struct drm_i915_private const struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); + if (crtc_state->dsc.compression_enable) { + drm_WARN_ON(&i915->drm, !connector->dp.dsc_decompression_aux); + connector->dp.dsc_decompression_enabled = true; + } else { + connector->dp.dsc_decompression_enabled = false; + } conn_state->max_bpc = (crtc_state->pipe_bpp ?: 24) / 3; } } diff --git a/drivers/gpu/drm/i915/display/intel_modeset_verify.c b/drivers/gpu/drm/i915/display/intel_modeset_verify.c index 5e1c2c780412..076298a8d405 100644 --- a/drivers/gpu/drm/i915/display/intel_modeset_verify.c +++ b/drivers/gpu/drm/i915/display/intel_modeset_verify.c @@ -244,7 +244,7 @@ void intel_modeset_verify_crtc(struct intel_atomic_state *state, verify_crtc_state(state, crtc); intel_shared_dpll_state_verify(state, crtc); intel_mpllb_state_verify(state, crtc); - intel_c10pll_state_verify(state, crtc); + intel_cx0pll_state_verify(state, crtc); } void intel_modeset_verify_disabled(struct intel_atomic_state *state) diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index 84078fb82b2f..1ce785db6a5e 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -841,7 +841,7 @@ static int intel_load_vbt_firmware(struct drm_i915_private *dev_priv) { struct intel_opregion *opregion = &dev_priv->display.opregion; const struct firmware *fw = NULL; - const char *name = dev_priv->params.vbt_firmware; + const char *name = dev_priv->display.params.vbt_firmware; int ret; if (!name || !*name) diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index 483beedac5b8..0d8e5320a4f8 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -46,8 +46,8 @@ bool intel_panel_use_ssc(struct drm_i915_private *i915) { - if (i915->params.panel_use_ssc >= 0) - return i915->params.panel_use_ssc != 0; + if (i915->display.params.panel_use_ssc >= 0) + return i915->display.params.panel_use_ssc != 0; return i915->display.vbt.lvds_use_ssc && !intel_has_quirk(i915, QUIRK_LVDS_SSC_DISABLE); } diff --git a/drivers/gpu/drm/i915/display/intel_pch_display.c b/drivers/gpu/drm/i915/display/intel_pch_display.c index 866786e6b32f..baf679759e00 100644 --- a/drivers/gpu/drm/i915/display/intel_pch_display.c +++ b/drivers/gpu/drm/i915/display/intel_pch_display.c @@ -8,6 +8,7 @@ #include "intel_crt.h" #include "intel_de.h" #include "intel_display_types.h" +#include "intel_dpll.h" #include "intel_fdi.h" #include "intel_fdi_regs.h" #include "intel_lvds.h" diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index 73f0f1714b37..a8fa3a20990e 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -90,7 +90,7 @@ vlv_power_sequencer_kick(struct intel_dp *intel_dp) struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); enum pipe pipe = intel_dp->pps.pps_pipe; bool pll_enabled, release_cl_override = false; - enum dpio_phy phy = DPIO_PHY(pipe); + enum dpio_phy phy = vlv_pipe_to_phy(pipe); enum dpio_channel ch = vlv_pipe_to_channel(pipe); u32 DP; diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 4f1f31fc9529..b6e2e70e1290 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -29,6 +29,7 @@ #include "i915_reg.h" #include "intel_atomic.h" #include "intel_crtc.h" +#include "intel_ddi.h" #include "intel_de.h" #include "intel_display_types.h" #include "intel_dp.h" @@ -172,6 +173,15 @@ * irrelevant for normal operation. */ +bool intel_encoder_can_psr(struct intel_encoder *encoder) +{ + if (intel_encoder_is_dp(encoder) || encoder->type == INTEL_OUTPUT_DP_MST) + return CAN_PSR(enc_to_intel_dp(encoder)) || + CAN_PANEL_REPLAY(enc_to_intel_dp(encoder)); + else + return false; +} + static bool psr_global_enabled(struct intel_dp *intel_dp) { struct intel_connector *connector = intel_dp->attached_connector; @@ -179,9 +189,9 @@ static bool psr_global_enabled(struct intel_dp *intel_dp) switch (intel_dp->psr.debug & I915_PSR_DEBUG_MODE_MASK) { case I915_PSR_DEBUG_DEFAULT: - if (i915->params.enable_psr == -1) + if (i915->display.params.enable_psr == -1) return connector->panel.vbt.psr.enable; - return i915->params.enable_psr; + return i915->display.params.enable_psr; case I915_PSR_DEBUG_DISABLE: return false; default: @@ -198,7 +208,7 @@ static bool psr2_global_enabled(struct intel_dp *intel_dp) case I915_PSR_DEBUG_FORCE_PSR1: return false; default: - if (i915->params.enable_psr == 1) + if (i915->display.params.enable_psr == 1) return false; return true; } @@ -474,27 +484,41 @@ exit: intel_dp->psr.su_y_granularity = y; } -void intel_psr_init_dpcd(struct intel_dp *intel_dp) +static void _panel_replay_init_dpcd(struct intel_dp *intel_dp) { - struct drm_i915_private *dev_priv = - to_i915(dp_to_dig_port(intel_dp)->base.base.dev); + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + u8 pr_dpcd = 0; - drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, intel_dp->psr_dpcd, - sizeof(intel_dp->psr_dpcd)); + intel_dp->psr.sink_panel_replay_support = false; + drm_dp_dpcd_readb(&intel_dp->aux, DP_PANEL_REPLAY_CAP, &pr_dpcd); - if (!intel_dp->psr_dpcd[0]) + if (!(pr_dpcd & DP_PANEL_REPLAY_SUPPORT)) { + drm_dbg_kms(&i915->drm, + "Panel replay is not supported by panel\n"); return; - drm_dbg_kms(&dev_priv->drm, "eDP panel supports PSR version %x\n", + } + + drm_dbg_kms(&i915->drm, + "Panel replay is supported by panel\n"); + intel_dp->psr.sink_panel_replay_support = true; +} + +static void _psr_init_dpcd(struct intel_dp *intel_dp) +{ + struct drm_i915_private *i915 = + to_i915(dp_to_dig_port(intel_dp)->base.base.dev); + + drm_dbg_kms(&i915->drm, "eDP panel supports PSR version %x\n", intel_dp->psr_dpcd[0]); if (drm_dp_has_quirk(&intel_dp->desc, DP_DPCD_QUIRK_NO_PSR)) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(&i915->drm, "PSR support not currently available for this panel\n"); return; } if (!(intel_dp->edp_dpcd[1] & DP_EDP_SET_POWER_CAP)) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(&i915->drm, "Panel lacks power state control, PSR cannot be enabled\n"); return; } @@ -503,8 +527,8 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp) intel_dp->psr.sink_sync_latency = intel_dp_get_sink_sync_latency(intel_dp); - if (DISPLAY_VER(dev_priv) >= 9 && - (intel_dp->psr_dpcd[0] == DP_PSR2_WITH_Y_COORD_IS_SUPPORTED)) { + if (DISPLAY_VER(i915) >= 9 && + intel_dp->psr_dpcd[0] == DP_PSR2_WITH_Y_COORD_IS_SUPPORTED) { bool y_req = intel_dp->psr_dpcd[1] & DP_PSR2_SU_Y_COORDINATE_REQUIRED; bool alpm = intel_dp_get_alpm_status(intel_dp); @@ -521,14 +545,25 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp) * GTC first. */ intel_dp->psr.sink_psr2_support = y_req && alpm; - drm_dbg_kms(&dev_priv->drm, "PSR2 %ssupported\n", + drm_dbg_kms(&i915->drm, "PSR2 %ssupported\n", intel_dp->psr.sink_psr2_support ? "" : "not "); + } +} - if (intel_dp->psr.sink_psr2_support) { - intel_dp->psr.colorimetry_support = - intel_dp_get_colorimetry_status(intel_dp); - intel_dp_get_su_granularity(intel_dp); - } +void intel_psr_init_dpcd(struct intel_dp *intel_dp) +{ + _panel_replay_init_dpcd(intel_dp); + + drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, intel_dp->psr_dpcd, + sizeof(intel_dp->psr_dpcd)); + + if (intel_dp->psr_dpcd[0]) + _psr_init_dpcd(intel_dp); + + if (intel_dp->psr.sink_psr2_support) { + intel_dp->psr.colorimetry_support = + intel_dp_get_colorimetry_status(intel_dp); + intel_dp_get_su_granularity(intel_dp); } } @@ -574,8 +609,11 @@ static void intel_psr_enable_sink(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); u8 dpcd_val = DP_PSR_ENABLE; - /* Enable ALPM at sink for psr2 */ + if (intel_dp->psr.panel_replay_enabled) + return; + if (intel_dp->psr.psr2_enabled) { + /* Enable ALPM at sink for psr2 */ drm_dp_dpcd_writeb(&intel_dp->aux, DP_RECEIVER_ALPM_CONFIG, DP_ALPM_ENABLE | DP_ALPM_LOCK_ERROR_IRQ_HPD_ENABLE); @@ -592,6 +630,9 @@ static void intel_psr_enable_sink(struct intel_dp *intel_dp) if (intel_dp->psr.req_psr2_sdp_prior_scanline) dpcd_val |= DP_PSR_SU_REGION_SCANLINE_CAPTURE; + if (intel_dp->psr.entry_setup_frames > 0) + dpcd_val |= DP_PSR_FRAME_CAPTURE; + drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, dpcd_val); drm_dp_dpcd_writeb(&intel_dp->aux, DP_SET_POWER, DP_SET_POWER_D0); @@ -606,7 +647,7 @@ static u32 intel_psr1_get_tp_time(struct intel_dp *intel_dp) if (DISPLAY_VER(dev_priv) >= 11) val |= EDP_PSR_TP4_TIME_0us; - if (dev_priv->params.psr_safest_params) { + if (dev_priv->display.params.psr_safest_params) { val |= EDP_PSR_TP1_TIME_2500us; val |= EDP_PSR_TP2_TP3_TIME_2500us; goto check_tp3_sel; @@ -690,6 +731,9 @@ static void hsw_activate_psr1(struct intel_dp *intel_dp) if (DISPLAY_VER(dev_priv) >= 8) val |= EDP_PSR_CRC_ENABLE; + if (DISPLAY_VER(dev_priv) >= 20) + val |= LNL_EDP_PSR_ENTRY_SETUP_FRAMES(intel_dp->psr.entry_setup_frames); + intel_de_rmw(dev_priv, psr_ctl_reg(dev_priv, cpu_transcoder), ~EDP_PSR_RESTORE_PSR_ACTIVE_CTX_MASK, val); } @@ -700,7 +744,7 @@ static u32 intel_psr2_get_tp_time(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); u32 val = 0; - if (dev_priv->params.psr_safest_params) + if (dev_priv->display.params.psr_safest_params) return EDP_PSR2_TP2_TIME_2500us; if (connector->panel.vbt.psr.psr2_tp2_tp3_wakeup_time_us >= 0 && @@ -727,21 +771,49 @@ static int psr2_block_count(struct intel_dp *intel_dp) return psr2_block_count_lines(intel_dp) / 4; } +static u8 frames_before_su_entry(struct intel_dp *intel_dp) +{ + u8 frames_before_su_entry; + + frames_before_su_entry = max_t(u8, + intel_dp->psr.sink_sync_latency + 1, + 2); + + /* Entry setup frames must be at least 1 less than frames before SU entry */ + if (intel_dp->psr.entry_setup_frames >= frames_before_su_entry) + frames_before_su_entry = intel_dp->psr.entry_setup_frames + 1; + + return frames_before_su_entry; +} + +static void dg2_activate_panel_replay(struct intel_dp *intel_dp) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + + intel_de_rmw(dev_priv, PSR2_MAN_TRK_CTL(intel_dp->psr.transcoder), + 0, ADLP_PSR2_MAN_TRK_CTL_SF_CONTINUOS_FULL_FRAME); + + intel_de_rmw(dev_priv, TRANS_DP2_CTL(intel_dp->psr.transcoder), 0, + TRANS_DP2_PANEL_REPLAY_ENABLE); +} + static void hsw_activate_psr2(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); enum transcoder cpu_transcoder = intel_dp->psr.transcoder; u32 val = EDP_PSR2_ENABLE; + u32 psr_val = 0; val |= EDP_PSR2_IDLE_FRAMES(psr_compute_idle_frames(intel_dp)); - if (DISPLAY_VER(dev_priv) <= 13 && !IS_ALDERLAKE_P(dev_priv)) + if (DISPLAY_VER(dev_priv) < 14 && !IS_ALDERLAKE_P(dev_priv)) val |= EDP_SU_TRACK_ENABLE; - if (DISPLAY_VER(dev_priv) >= 10 && DISPLAY_VER(dev_priv) <= 12) + if (DISPLAY_VER(dev_priv) >= 10 && DISPLAY_VER(dev_priv) < 13) val |= EDP_Y_COORDINATE_ENABLE; - val |= EDP_PSR2_FRAME_BEFORE_SU(max_t(u8, intel_dp->psr.sink_sync_latency + 1, 2)); + val |= EDP_PSR2_FRAME_BEFORE_SU(frames_before_su_entry(intel_dp)); + val |= intel_psr2_get_tp_time(intel_dp); if (DISPLAY_VER(dev_priv) >= 12) { @@ -785,6 +857,9 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) if (intel_dp->psr.req_psr2_sdp_prior_scanline) val |= EDP_PSR2_SU_SDP_SCANLINE; + if (DISPLAY_VER(dev_priv) >= 20) + psr_val |= LNL_EDP_PSR_ENTRY_SETUP_FRAMES(intel_dp->psr.entry_setup_frames); + if (intel_dp->psr.psr2_sel_fetch_enabled) { u32 tmp; @@ -798,7 +873,7 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) * PSR2 HW is incorrectly using EDP_PSR_TP1_TP3_SEL and BSpec is * recommending keep this bit unset while PSR2 is enabled. */ - intel_de_write(dev_priv, psr_ctl_reg(dev_priv, cpu_transcoder), 0); + intel_de_write(dev_priv, psr_ctl_reg(dev_priv, cpu_transcoder), psr_val); intel_de_write(dev_priv, EDP_PSR2_CTL(cpu_transcoder), val); } @@ -816,13 +891,13 @@ transcoder_has_psr2(struct drm_i915_private *dev_priv, enum transcoder cpu_trans return false; } -static u32 intel_get_frame_time_us(const struct intel_crtc_state *cstate) +static u32 intel_get_frame_time_us(const struct intel_crtc_state *crtc_state) { - if (!cstate || !cstate->hw.active) + if (!crtc_state->hw.active) return 0; return DIV_ROUND_UP(1000 * 1000, - drm_mode_vrefresh(&cstate->hw.adjusted_mode)); + drm_mode_vrefresh(&crtc_state->hw.adjusted_mode)); } static void psr2_program_idle_frames(struct intel_dp *intel_dp, @@ -943,7 +1018,7 @@ static bool intel_psr2_sel_fetch_config_valid(struct intel_dp *intel_dp, { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - if (!dev_priv->params.enable_psr2_sel_fetch && + if (!dev_priv->display.params.enable_psr2_sel_fetch && intel_dp->psr.debug != I915_PSR_DEBUG_ENABLE_SEL_FETCH) { drm_dbg_kms(&dev_priv->drm, "PSR2 sel fetch not enabled, disabled by parameter\n"); @@ -1019,7 +1094,7 @@ static bool _compute_psr2_sdp_prior_scanline_indication(struct intel_dp *intel_d return true; /* Not supported <13 / Wa_22012279113:adl-p */ - if (DISPLAY_VER(dev_priv) <= 13 || intel_dp->edp_dpcd[0] < DP_EDP_14b) + if (DISPLAY_VER(dev_priv) < 14 || intel_dp->edp_dpcd[0] < DP_EDP_14b) return false; crtc_state->req_psr2_sdp_prior_scanline = true; @@ -1056,7 +1131,7 @@ static bool _compute_psr2_wake_times(struct intel_dp *intel_dp, fast_wake_lines > max_wake_lines) return false; - if (i915->params.psr_safest_params) + if (i915->display.params.psr_safest_params) io_wake_lines = fast_wake_lines = max_wake_lines; /* According to Bspec lower limit should be set as 7 lines. */ @@ -1066,6 +1141,39 @@ static bool _compute_psr2_wake_times(struct intel_dp *intel_dp, return true; } +static int intel_psr_entry_setup_frames(struct intel_dp *intel_dp, + const struct drm_display_mode *adjusted_mode) +{ + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + int psr_setup_time = drm_dp_psr_setup_time(intel_dp->psr_dpcd); + int entry_setup_frames = 0; + + if (psr_setup_time < 0) { + drm_dbg_kms(&i915->drm, + "PSR condition failed: Invalid PSR setup time (0x%02x)\n", + intel_dp->psr_dpcd[1]); + return -ETIME; + } + + if (intel_usecs_to_scanlines(adjusted_mode, psr_setup_time) > + adjusted_mode->crtc_vtotal - adjusted_mode->crtc_vdisplay - 1) { + if (DISPLAY_VER(i915) >= 20) { + /* setup entry frames can be up to 3 frames */ + entry_setup_frames = 1; + drm_dbg_kms(&i915->drm, + "PSR setup entry frames %d\n", + entry_setup_frames); + } else { + drm_dbg_kms(&i915->drm, + "PSR condition failed: PSR setup time (%d us) too long\n", + psr_setup_time); + return -ETIME; + } + } + + return entry_setup_frames; +} + static bool intel_psr2_config_valid(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state) { @@ -1113,7 +1221,7 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, * over PSR2. */ if (crtc_state->dsc.compression_enable && - (DISPLAY_VER(dev_priv) <= 13 && !IS_ALDERLAKE_P(dev_priv))) { + (DISPLAY_VER(dev_priv) < 14 && !IS_ALDERLAKE_P(dev_priv))) { drm_dbg_kms(&dev_priv->drm, "PSR2 cannot be enabled since DSC is enabled\n"); return false; @@ -1206,24 +1314,42 @@ unsupported: return false; } -void intel_psr_compute_config(struct intel_dp *intel_dp, - struct intel_crtc_state *crtc_state, - struct drm_connector_state *conn_state) +static bool _psr_compute_config(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - const struct drm_display_mode *adjusted_mode = - &crtc_state->hw.adjusted_mode; - int psr_setup_time; + const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; + int entry_setup_frames; /* * Current PSR panels don't work reliably with VRR enabled * So if VRR is enabled, do not enable PSR. */ if (crtc_state->vrr.enable) - return; + return false; if (!CAN_PSR(intel_dp)) - return; + return false; + + entry_setup_frames = intel_psr_entry_setup_frames(intel_dp, adjusted_mode); + + if (entry_setup_frames >= 0) { + intel_dp->psr.entry_setup_frames = entry_setup_frames; + } else { + drm_dbg_kms(&dev_priv->drm, + "PSR condition failed: PSR setup timing not met\n"); + return false; + } + + return true; +} + +void intel_psr_compute_config(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; if (!psr_global_enabled(intel_dp)) { drm_dbg_kms(&dev_priv->drm, "PSR disabled by flag\n"); @@ -1242,23 +1368,14 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, return; } - psr_setup_time = drm_dp_psr_setup_time(intel_dp->psr_dpcd); - if (psr_setup_time < 0) { - drm_dbg_kms(&dev_priv->drm, - "PSR condition failed: Invalid PSR setup time (0x%02x)\n", - intel_dp->psr_dpcd[1]); - return; - } + if (CAN_PANEL_REPLAY(intel_dp)) + crtc_state->has_panel_replay = true; + else + crtc_state->has_psr = _psr_compute_config(intel_dp, crtc_state); - if (intel_usecs_to_scanlines(adjusted_mode, psr_setup_time) > - adjusted_mode->crtc_vtotal - adjusted_mode->crtc_vdisplay - 1) { - drm_dbg_kms(&dev_priv->drm, - "PSR condition failed: PSR setup time (%d us) too long\n", - psr_setup_time); + if (!(crtc_state->has_panel_replay || crtc_state->has_psr)) return; - } - crtc_state->has_psr = true; crtc_state->has_psr2 = intel_psr2_config_valid(intel_dp, crtc_state); crtc_state->infoframes.enable |= intel_hdmi_infoframe_enable(DP_SDP_VSC); @@ -1279,18 +1396,23 @@ void intel_psr_get_config(struct intel_encoder *encoder, return; intel_dp = &dig_port->dp; - if (!CAN_PSR(intel_dp)) + if (!(CAN_PSR(intel_dp) || CAN_PANEL_REPLAY(intel_dp))) return; mutex_lock(&intel_dp->psr.lock); if (!intel_dp->psr.enabled) goto unlock; - /* - * Not possible to read EDP_PSR/PSR2_CTL registers as it is - * enabled/disabled because of frontbuffer tracking and others. - */ - pipe_config->has_psr = true; + if (intel_dp->psr.panel_replay_enabled) { + pipe_config->has_panel_replay = true; + } else { + /* + * Not possible to read EDP_PSR/PSR2_CTL registers as it is + * enabled/disabled because of frontbuffer tracking and others. + */ + pipe_config->has_psr = true; + } + pipe_config->has_psr2 = intel_dp->psr.psr2_enabled; pipe_config->infoframes.enable |= intel_hdmi_infoframe_enable(DP_SDP_VSC); @@ -1327,8 +1449,10 @@ static void intel_psr_activate(struct intel_dp *intel_dp) lockdep_assert_held(&intel_dp->psr.lock); - /* psr1 and psr2 are mutually exclusive.*/ - if (intel_dp->psr.psr2_enabled) + /* psr1, psr2 and panel-replay are mutually exclusive.*/ + if (intel_dp->psr.panel_replay_enabled) + dg2_activate_panel_replay(intel_dp); + else if (intel_dp->psr.psr2_enabled) hsw_activate_psr2(intel_dp); else hsw_activate_psr1(intel_dp); @@ -1452,12 +1576,10 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, * All supported adlp panels have 1-based X granularity, this may * cause issues if non-supported panels are used. */ - if (IS_DISPLAY_IP_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0)) - intel_de_rmw(dev_priv, MTL_CHICKEN_TRANS(cpu_transcoder), 0, - ADLP_1_BASED_X_GRANULARITY); - else if (IS_ALDERLAKE_P(dev_priv)) - intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder), 0, - ADLP_1_BASED_X_GRANULARITY); + if (IS_DISPLAY_IP_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0) || + IS_ALDERLAKE_P(dev_priv)) + intel_de_rmw(dev_priv, hsw_chicken_trans_reg(dev_priv, cpu_transcoder), + 0, ADLP_1_BASED_X_GRANULARITY); /* Wa_16012604467:adlp,mtl[a0,b0] */ if (IS_DISPLAY_IP_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0)) @@ -1508,6 +1630,7 @@ static void intel_psr_enable_locked(struct intel_dp *intel_dp, drm_WARN_ON(&dev_priv->drm, intel_dp->psr.enabled); intel_dp->psr.psr2_enabled = crtc_state->has_psr2; + intel_dp->psr.panel_replay_enabled = crtc_state->has_panel_replay; intel_dp->psr.busy_frontbuffer_bits = 0; intel_dp->psr.pipe = to_intel_crtc(crtc_state->uapi.crtc)->pipe; intel_dp->psr.transcoder = crtc_state->cpu_transcoder; @@ -1523,8 +1646,12 @@ static void intel_psr_enable_locked(struct intel_dp *intel_dp, if (!psr_interrupt_error_check(intel_dp)) return; - drm_dbg_kms(&dev_priv->drm, "Enabling PSR%s\n", - intel_dp->psr.psr2_enabled ? "2" : "1"); + if (intel_dp->psr.panel_replay_enabled) + drm_dbg_kms(&dev_priv->drm, "Enabling Panel Replay\n"); + else + drm_dbg_kms(&dev_priv->drm, "Enabling PSR%s\n", + intel_dp->psr.psr2_enabled ? "2" : "1"); + intel_write_dp_vsc_sdp(encoder, crtc_state, &crtc_state->psr_vsc); intel_snps_phy_update_psr_power_state(dev_priv, phy, true); intel_psr_enable_sink(intel_dp); @@ -1553,7 +1680,10 @@ static void intel_psr_exit(struct intel_dp *intel_dp) return; } - if (intel_dp->psr.psr2_enabled) { + if (intel_dp->psr.panel_replay_enabled) { + intel_de_rmw(dev_priv, TRANS_DP2_CTL(intel_dp->psr.transcoder), + TRANS_DP2_PANEL_REPLAY_ENABLE, 0); + } else if (intel_dp->psr.psr2_enabled) { tgl_disallow_dc3co_on_psr2_exit(intel_dp); val = intel_de_rmw(dev_priv, EDP_PSR2_CTL(cpu_transcoder), @@ -1602,8 +1732,11 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp) if (!intel_dp->psr.enabled) return; - drm_dbg_kms(&dev_priv->drm, "Disabling PSR%s\n", - intel_dp->psr.psr2_enabled ? "2" : "1"); + if (intel_dp->psr.panel_replay_enabled) + drm_dbg_kms(&dev_priv->drm, "Disabling Panel Replay\n"); + else + drm_dbg_kms(&dev_priv->drm, "Disabling PSR%s\n", + intel_dp->psr.psr2_enabled ? "2" : "1"); intel_psr_exit(intel_dp); intel_psr_wait_exit_locked(intel_dp); @@ -1636,6 +1769,7 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp) drm_dp_dpcd_writeb(&intel_dp->aux, DP_RECEIVER_ALPM_CONFIG, 0); intel_dp->psr.enabled = false; + intel_dp->psr.panel_replay_enabled = false; intel_dp->psr.psr2_enabled = false; intel_dp->psr.psr2_sel_fetch_enabled = false; intel_dp->psr.psr2_sel_fetch_cff_enabled = false; @@ -1783,81 +1917,6 @@ static void psr_force_hw_tracking_exit(struct intel_dp *intel_dp) intel_de_write(dev_priv, CURSURFLIVE(intel_dp->psr.pipe), 0); } -void intel_psr2_disable_plane_sel_fetch_arm(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state) -{ - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - enum pipe pipe = plane->pipe; - - if (!crtc_state->enable_psr2_sel_fetch) - return; - - intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_CTL(pipe, plane->id), 0); -} - -void intel_psr2_program_plane_sel_fetch_arm(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) -{ - struct drm_i915_private *i915 = to_i915(plane->base.dev); - enum pipe pipe = plane->pipe; - - if (!crtc_state->enable_psr2_sel_fetch) - return; - - if (plane->id == PLANE_CURSOR) - intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), - plane_state->ctl); - else - intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), - PLANE_SEL_FETCH_CTL_ENABLE); -} - -void intel_psr2_program_plane_sel_fetch_noarm(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state, - int color_plane) -{ - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - enum pipe pipe = plane->pipe; - const struct drm_rect *clip; - u32 val; - int x, y; - - if (!crtc_state->enable_psr2_sel_fetch) - return; - - if (plane->id == PLANE_CURSOR) - return; - - clip = &plane_state->psr2_sel_fetch_area; - - val = (clip->y1 + plane_state->uapi.dst.y1) << 16; - val |= plane_state->uapi.dst.x1; - intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_POS(pipe, plane->id), val); - - x = plane_state->view.color_plane[color_plane].x; - - /* - * From Bspec: UV surface Start Y Position = half of Y plane Y - * start position. - */ - if (!color_plane) - y = plane_state->view.color_plane[color_plane].y + clip->y1; - else - y = plane_state->view.color_plane[color_plane].y + clip->y1 / 2; - - val = y << 16 | x; - - intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_OFFSET(pipe, plane->id), - val); - - /* Sizes are 0 based */ - val = (drm_rect_height(clip) - 1) << 16; - val |= (drm_rect_width(&plane_state->uapi.src) >> 16) - 1; - intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_SIZE(pipe, plane->id), val); -} - void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); @@ -2117,8 +2176,19 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, continue; inter = pipe_clip; - if (!drm_rect_intersect(&inter, &new_plane_state->uapi.dst)) + sel_fetch_area = &new_plane_state->psr2_sel_fetch_area; + if (!drm_rect_intersect(&inter, &new_plane_state->uapi.dst)) { + sel_fetch_area->y1 = -1; + sel_fetch_area->y2 = -1; + /* + * if plane sel fetch was previously enabled -> + * disable it + */ + if (drm_rect_height(&old_plane_state->psr2_sel_fetch_area) > 0) + crtc_state->update_planes |= BIT(plane->id); + continue; + } if (!psr2_sel_fetch_plane_state_supported(new_plane_state)) { full_update = true; @@ -2207,7 +2277,7 @@ void intel_psr_post_plane_update(struct intel_atomic_state *state, intel_atomic_get_new_crtc_state(state, crtc); struct intel_encoder *encoder; - if (!crtc_state->has_psr) + if (!(crtc_state->has_psr || crtc_state->has_panel_replay)) return; for_each_intel_encoder_mask_with_psr(state->base.dev, encoder, @@ -2693,9 +2763,12 @@ void intel_psr_init(struct intel_dp *intel_dp) struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - if (!HAS_PSR(dev_priv)) + if (!(HAS_PSR(dev_priv) || HAS_DP20(dev_priv))) return; + if (!intel_dp_is_edp(intel_dp)) + intel_psr_init_dpcd(intel_dp); + /* * HSW spec explicitly says PSR is tied to port A. * BDW+ platforms have a instance of PSR registers per transcoder but @@ -2711,7 +2784,10 @@ void intel_psr_init(struct intel_dp *intel_dp) return; } - intel_dp->psr.source_support = true; + if (HAS_DP20(dev_priv) && !intel_dp_is_edp(intel_dp)) + intel_dp->psr.source_panel_replay_support = true; + else + intel_dp->psr.source_support = true; /* Set link_standby x link_off defaults */ if (DISPLAY_VER(dev_priv) < 12) @@ -2728,12 +2804,19 @@ static int psr_get_status_and_error_status(struct intel_dp *intel_dp, { struct drm_dp_aux *aux = &intel_dp->aux; int ret; + unsigned int offset; + + offset = intel_dp->psr.panel_replay_enabled ? + DP_SINK_DEVICE_PR_AND_FRAME_LOCK_STATUS : DP_PSR_STATUS; - ret = drm_dp_dpcd_readb(aux, DP_PSR_STATUS, status); + ret = drm_dp_dpcd_readb(aux, offset, status); if (ret != 1) return ret; - ret = drm_dp_dpcd_readb(aux, DP_PSR_ERROR_STATUS, error_status); + offset = intel_dp->psr.panel_replay_enabled ? + DP_PANEL_REPLAY_ERROR_STATUS : DP_PSR_ERROR_STATUS; + + ret = drm_dp_dpcd_readb(aux, offset, error_status); if (ret != 1) return ret; @@ -2954,7 +3037,7 @@ psr_source_status(struct intel_dp *intel_dp, struct seq_file *m) status = live_status[status_val]; } - seq_printf(m, "Source PSR status: %s [0x%08x]\n", status, val); + seq_printf(m, "Source PSR/PanelReplay status: %s [0x%08x]\n", status, val); } static int intel_psr_status(struct seq_file *m, struct intel_dp *intel_dp) @@ -2967,18 +3050,22 @@ static int intel_psr_status(struct seq_file *m, struct intel_dp *intel_dp) bool enabled; u32 val; - seq_printf(m, "Sink support: %s", str_yes_no(psr->sink_support)); + seq_printf(m, "Sink support: PSR = %s", + str_yes_no(psr->sink_support)); + if (psr->sink_support) seq_printf(m, " [0x%02x]", intel_dp->psr_dpcd[0]); - seq_puts(m, "\n"); + seq_printf(m, ", Panel Replay = %s\n", str_yes_no(psr->sink_panel_replay_support)); - if (!psr->sink_support) + if (!(psr->sink_support || psr->sink_panel_replay_support)) return 0; wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); mutex_lock(&psr->lock); - if (psr->enabled) + if (psr->panel_replay_enabled) + status = "Panel Replay Enabled"; + else if (psr->enabled) status = psr->psr2_enabled ? "PSR2 enabled" : "PSR1 enabled"; else status = "disabled"; @@ -2991,14 +3078,17 @@ static int intel_psr_status(struct seq_file *m, struct intel_dp *intel_dp) goto unlock; } - if (psr->psr2_enabled) { + if (psr->panel_replay_enabled) { + val = intel_de_read(dev_priv, TRANS_DP2_CTL(cpu_transcoder)); + enabled = val & TRANS_DP2_PANEL_REPLAY_ENABLE; + } else if (psr->psr2_enabled) { val = intel_de_read(dev_priv, EDP_PSR2_CTL(cpu_transcoder)); enabled = val & EDP_PSR2_ENABLE; } else { val = intel_de_read(dev_priv, psr_ctl_reg(dev_priv, cpu_transcoder)); enabled = val & EDP_PSR_ENABLE; } - seq_printf(m, "Source PSR ctl: %s [0x%08x]\n", + seq_printf(m, "Source PSR/PanelReplay ctl: %s [0x%08x]\n", str_enabled_disabled(enabled), val); psr_source_status(intel_dp, m); seq_printf(m, "Busy frontbuffer bits: 0x%08x\n", @@ -3136,6 +3226,16 @@ void intel_psr_debugfs_register(struct drm_i915_private *i915) i915, &i915_edp_psr_status_fops); } +static const char *psr_mode_str(struct intel_dp *intel_dp) +{ + if (intel_dp->psr.panel_replay_enabled) + return "PANEL-REPLAY"; + else if (intel_dp->psr.enabled) + return "PSR"; + + return "unknown"; +} + static int i915_psr_sink_status_show(struct seq_file *m, void *data) { struct intel_connector *connector = m->private; @@ -3150,12 +3250,19 @@ static int i915_psr_sink_status_show(struct seq_file *m, void *data) "reserved", "sink internal error", }; + static const char * const panel_replay_status[] = { + "Sink device frame is locked to the Source device", + "Sink device is coasting, using the VTotal target", + "Sink device is governing the frame rate (frame rate unlock is granted)", + "Sink device in the process of re-locking with the Source device", + }; const char *str; int ret; u8 status, error_status; + u32 idx; - if (!CAN_PSR(intel_dp)) { - seq_puts(m, "PSR Unsupported\n"); + if (!(CAN_PSR(intel_dp) || CAN_PANEL_REPLAY(intel_dp))) { + seq_puts(m, "PSR/Panel-Replay Unsupported\n"); return -ENODEV; } @@ -3166,15 +3273,20 @@ static int i915_psr_sink_status_show(struct seq_file *m, void *data) if (ret) return ret; - status &= DP_PSR_SINK_STATE_MASK; - if (status < ARRAY_SIZE(sink_status)) - str = sink_status[status]; - else - str = "unknown"; + str = "unknown"; + if (intel_dp->psr.panel_replay_enabled) { + idx = (status & DP_SINK_FRAME_LOCKED_MASK) >> DP_SINK_FRAME_LOCKED_SHIFT; + if (idx < ARRAY_SIZE(panel_replay_status)) + str = panel_replay_status[idx]; + } else if (intel_dp->psr.enabled) { + idx = status & DP_PSR_SINK_STATE_MASK; + if (idx < ARRAY_SIZE(sink_status)) + str = sink_status[idx]; + } - seq_printf(m, "Sink PSR status: 0x%x [%s]\n", status, str); + seq_printf(m, "Sink %s status: 0x%x [%s]\n", psr_mode_str(intel_dp), status, str); - seq_printf(m, "Sink PSR error status: 0x%x", error_status); + seq_printf(m, "Sink %s error status: 0x%x", psr_mode_str(intel_dp), error_status); if (error_status & (DP_PSR_RFB_STORAGE_ERROR | DP_PSR_VSC_SDP_UNCORRECTABLE_ERROR | @@ -3183,11 +3295,11 @@ static int i915_psr_sink_status_show(struct seq_file *m, void *data) else seq_puts(m, "\n"); if (error_status & DP_PSR_RFB_STORAGE_ERROR) - seq_puts(m, "\tPSR RFB storage error\n"); + seq_printf(m, "\t%s RFB storage error\n", psr_mode_str(intel_dp)); if (error_status & DP_PSR_VSC_SDP_UNCORRECTABLE_ERROR) - seq_puts(m, "\tPSR VSC SDP uncorrectable error\n"); + seq_printf(m, "\t%s VSC SDP uncorrectable error\n", psr_mode_str(intel_dp)); if (error_status & DP_PSR_LINK_CRC_ERROR) - seq_puts(m, "\tPSR Link CRC error\n"); + seq_printf(m, "\t%s Link CRC error\n", psr_mode_str(intel_dp)); return ret; } @@ -3207,13 +3319,16 @@ void intel_psr_connector_debugfs_add(struct intel_connector *connector) struct drm_i915_private *i915 = to_i915(connector->base.dev); struct dentry *root = connector->base.debugfs_entry; - if (connector->base.connector_type != DRM_MODE_CONNECTOR_eDP) - return; + if (connector->base.connector_type != DRM_MODE_CONNECTOR_eDP) { + if (!(HAS_DP20(i915) && + connector->base.connector_type == DRM_MODE_CONNECTOR_DisplayPort)) + return; + } debugfs_create_file("i915_psr_sink_status", 0444, root, connector, &i915_psr_sink_status_fops); - if (HAS_PSR(i915)) + if (HAS_PSR(i915) || HAS_DP20(i915)) debugfs_create_file("i915_psr_status", 0444, root, connector, &i915_psr_status_fops); } diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h index bf35f42df6bc..143e0595c097 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.h +++ b/drivers/gpu/drm/i915/display/intel_psr.h @@ -21,6 +21,13 @@ struct intel_encoder; struct intel_plane; struct intel_plane_state; +#define CAN_PSR(intel_dp) ((intel_dp)->psr.sink_support && \ + (intel_dp)->psr.source_support) + +#define CAN_PANEL_REPLAY(intel_dp) ((intel_dp)->psr.sink_panel_replay_support && \ + (intel_dp)->psr.source_panel_replay_support) + +bool intel_encoder_can_psr(struct intel_encoder *encoder); void intel_psr_init_dpcd(struct intel_dp *intel_dp); void intel_psr_pre_plane_update(struct intel_atomic_state *state, struct intel_crtc *crtc); @@ -48,16 +55,6 @@ bool intel_psr_enabled(struct intel_dp *intel_dp); int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, struct intel_crtc *crtc); void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state); -void intel_psr2_program_plane_sel_fetch_noarm(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state, - int color_plane); -void intel_psr2_program_plane_sel_fetch_arm(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state); - -void intel_psr2_disable_plane_sel_fetch_arm(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state); void intel_psr_pause(struct intel_dp *intel_dp); void intel_psr_resume(struct intel_dp *intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_psr_regs.h b/drivers/gpu/drm/i915/display/intel_psr_regs.h index d39951383c92..efe4306b37e0 100644 --- a/drivers/gpu/drm/i915/display/intel_psr_regs.h +++ b/drivers/gpu/drm/i915/display/intel_psr_regs.h @@ -35,6 +35,8 @@ #define EDP_PSR_MIN_LINK_ENTRY_TIME_0_LINES REG_FIELD_PREP(EDP_PSR_MIN_LINK_ENTRY_TIME_MASK, 3) #define EDP_PSR_MAX_SLEEP_TIME_MASK REG_GENMASK(24, 20) #define EDP_PSR_MAX_SLEEP_TIME(x) REG_FIELD_PREP(EDP_PSR_MAX_SLEEP_TIME_MASK, (x)) +#define LNL_EDP_PSR_ENTRY_SETUP_FRAMES_MASK REG_GENMASK(17, 16) +#define LNL_EDP_PSR_ENTRY_SETUP_FRAMES(x) REG_FIELD_PREP(LNL_EDP_PSR_ENTRY_SETUP_FRAMES_MASK, (x)) #define EDP_PSR_SKIP_AUX_EXIT REG_BIT(12) #define EDP_PSR_TP_MASK REG_BIT(11) #define EDP_PSR_TP_TP1_TP2 REG_FIELD_PREP(EDP_PSR_TP_MASK, 0) diff --git a/drivers/gpu/drm/i915/display/intel_qp_tables.c b/drivers/gpu/drm/i915/display/intel_qp_tables.c index 543cdc46aa1d..600c815e37e4 100644 --- a/drivers/gpu/drm/i915/display/intel_qp_tables.c +++ b/drivers/gpu/drm/i915/display/intel_qp_tables.c @@ -34,9 +34,6 @@ * These qp tables are as per the C model * and it has the rows pointing to bpps which increment * in steps of 0.5 - * We do not support fractional bpps as of today, - * hence we would skip the fractional bpps during - * our references for qp calclulations. */ static const u8 rc_range_minqp444_8bpc[DSC_NUM_BUF_RANGES][RC_RANGE_QP444_8BPC_MAX_NUM_BPP] = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index a9ac7d45d1f3..9218047495fb 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -35,6 +35,7 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_crtc.h> #include <drm/drm_edid.h> +#include <drm/drm_eld.h> #include "i915_drv.h" #include "i915_reg.h" @@ -1787,17 +1788,28 @@ static void intel_sdvo_get_config(struct intel_encoder *encoder, intel_sdvo_get_eld(intel_sdvo, pipe_config); } -static void intel_sdvo_disable_audio(struct intel_sdvo *intel_sdvo) +static void intel_sdvo_disable_audio(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { + struct intel_sdvo *intel_sdvo = to_sdvo(encoder); + + if (!old_crtc_state->has_audio) + return; + intel_sdvo_set_audio_state(intel_sdvo, 0); } -static void intel_sdvo_enable_audio(struct intel_sdvo *intel_sdvo, +static void intel_sdvo_enable_audio(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { + struct intel_sdvo *intel_sdvo = to_sdvo(encoder); const u8 *eld = crtc_state->eld; + if (!crtc_state->has_audio) + return; + intel_sdvo_set_audio_state(intel_sdvo, 0); intel_sdvo_write_infoframe(intel_sdvo, SDVO_HBUF_INDEX_ELD, @@ -1818,8 +1830,7 @@ static void intel_disable_sdvo(struct intel_atomic_state *state, struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); u32 temp; - if (old_crtc_state->has_audio) - intel_sdvo_disable_audio(intel_sdvo); + encoder->audio_disable(encoder, old_crtc_state, conn_state); intel_sdvo_set_active_outputs(intel_sdvo, 0); if (0) @@ -1913,8 +1924,7 @@ static void intel_enable_sdvo(struct intel_atomic_state *state, DRM_MODE_DPMS_ON); intel_sdvo_set_active_outputs(intel_sdvo, intel_sdvo_connector->output_flag); - if (pipe_config->has_audio) - intel_sdvo_enable_audio(intel_sdvo, pipe_config, conn_state); + encoder->audio_enable(encoder, pipe_config, conn_state); } static enum drm_mode_status @@ -3396,6 +3406,8 @@ bool intel_sdvo_init(struct drm_i915_private *dev_priv, } intel_encoder->pre_enable = intel_sdvo_pre_enable; intel_encoder->enable = intel_enable_sdvo; + intel_encoder->audio_enable = intel_sdvo_enable_audio; + intel_encoder->audio_disable = intel_sdvo_disable_audio; intel_encoder->get_hw_state = intel_sdvo_get_hw_state; intel_encoder->get_config = intel_sdvo_get_config; diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.c b/drivers/gpu/drm/i915/display/intel_snps_phy.c index ce5a73a4cc89..bc61e736f9b3 100644 --- a/drivers/gpu/drm/i915/display/intel_snps_phy.c +++ b/drivers/gpu/drm/i915/display/intel_snps_phy.c @@ -3,7 +3,7 @@ * Copyright © 2019 Intel Corporation */ -#include <linux/util_macros.h> +#include <linux/math.h> #include "i915_reg.h" #include "intel_ddi.h" diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index 1fb16510f750..d7b440c8caef 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -48,6 +48,11 @@ #include "intel_frontbuffer.h" #include "intel_sprite.h" +static char sprite_name(struct drm_i915_private *i915, enum pipe pipe, int sprite) +{ + return pipe * DISPLAY_RUNTIME_INFO(i915)->num_sprites[pipe] + sprite + 'A'; +} + static void i9xx_plane_linear_gamma(u16 gamma[8]) { /* The points are not evenly spaced. */ @@ -1636,7 +1641,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, 0, plane_funcs, formats, num_formats, modifiers, DRM_PLANE_TYPE_OVERLAY, - "sprite %c", sprite_name(pipe, sprite)); + "sprite %c", sprite_name(dev_priv, pipe, sprite)); kfree(modifiers); if (ret) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index f64d348a969e..dcf05e00e505 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -1030,18 +1030,25 @@ static bool xelpdp_tc_phy_enable_tcss_power(struct intel_tc_port *tc, bool enabl __xelpdp_tc_phy_enable_tcss_power(tc, enable); - if ((!tc_phy_wait_for_ready(tc) || - !xelpdp_tc_phy_wait_for_tcss_power(tc, enable)) && - !drm_WARN_ON(&i915->drm, tc->mode == TC_PORT_LEGACY)) { - if (enable) { - __xelpdp_tc_phy_enable_tcss_power(tc, false); - xelpdp_tc_phy_wait_for_tcss_power(tc, false); - } + if (enable && !tc_phy_wait_for_ready(tc)) + goto out_disable; - return false; - } + if (!xelpdp_tc_phy_wait_for_tcss_power(tc, enable)) + goto out_disable; return true; + +out_disable: + if (drm_WARN_ON(&i915->drm, tc->mode == TC_PORT_LEGACY)) + return false; + + if (!enable) + return false; + + __xelpdp_tc_phy_enable_tcss_power(tc, false); + xelpdp_tc_phy_wait_for_tcss_power(tc, false); + + return false; } static void xelpdp_tc_phy_take_ownership(struct intel_tc_port *tc, bool take) diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index 2ee4f0d95851..d4386cb3569e 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -1417,9 +1417,6 @@ set_tv_mode_timings(struct drm_i915_private *dev_priv, static void set_color_conversion(struct drm_i915_private *dev_priv, const struct color_conversion *color_conversion) { - if (!color_conversion) - return; - intel_de_write(dev_priv, TV_CSC_Y, (color_conversion->ry << 16) | color_conversion->gy); intel_de_write(dev_priv, TV_CSC_Y2, @@ -1454,9 +1451,6 @@ static void intel_tv_pre_enable(struct intel_atomic_state *state, int xpos, ypos; unsigned int xsize, ysize; - if (!tv_mode) - return; /* can't happen (mode_prepare prevents this) */ - tv_ctl = intel_de_read(dev_priv, TV_CTL); tv_ctl &= TV_CTL_SAVE; diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c index 2cec2abf9746..fe256bf7b485 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.c +++ b/drivers/gpu/drm/i915/display/intel_vblank.c @@ -265,6 +265,32 @@ int intel_crtc_scanline_to_hw(struct intel_crtc *crtc, int scanline) return (scanline + vtotal - crtc->scanline_offset) % vtotal; } +/* + * The uncore version of the spin lock functions is used to decide + * whether we need to lock the uncore lock or not. This is only + * needed in i915, not in Xe. + * + * This lock in i915 is needed because some old platforms (at least + * IVB and possibly HSW as well), which are not supported in Xe, need + * all register accesses to the same cacheline to be serialized, + * otherwise they may hang. + */ +static void intel_vblank_section_enter(struct drm_i915_private *i915) + __acquires(i915->uncore.lock) +{ +#ifdef I915 + spin_lock(&i915->uncore.lock); +#endif +} + +static void intel_vblank_section_exit(struct drm_i915_private *i915) + __releases(i915->uncore.lock) +{ +#ifdef I915 + spin_unlock(&i915->uncore.lock); +#endif +} + static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, bool in_vblank_irq, int *vpos, int *hpos, @@ -302,11 +328,12 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, } /* - * Lock uncore.lock, as we will do multiple timing critical raw - * register reads, potentially with preemption disabled, so the - * following code must not block on uncore.lock. + * Enter vblank critical section, as we will do multiple + * timing critical raw register reads, potentially with + * preemption disabled, so the following code must not block. */ - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + local_irq_save(irqflags); + intel_vblank_section_enter(dev_priv); /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ @@ -374,7 +401,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); + intel_vblank_section_exit(dev_priv); + local_irq_restore(irqflags); /* * While in vblank, position will be negative @@ -412,9 +440,13 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc) unsigned long irqflags; int position; - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + local_irq_save(irqflags); + intel_vblank_section_enter(dev_priv); + position = __intel_get_crtc_scanline(crtc); - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); + + intel_vblank_section_exit(dev_priv); + local_irq_restore(irqflags); return position; } @@ -537,7 +569,7 @@ void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state, * Need to audit everything to make sure it's safe. */ spin_lock_irqsave(&i915->drm.vblank_time_lock, irqflags); - spin_lock(&i915->uncore.lock); + intel_vblank_section_enter(i915); drm_calc_timestamping_constants(&crtc->base, &adjusted_mode); @@ -546,7 +578,6 @@ void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state, crtc->mode_flags = mode_flags; crtc->scanline_offset = intel_crtc_scanline_offset(crtc_state); - - spin_unlock(&i915->uncore.lock); + intel_vblank_section_exit(i915); spin_unlock_irqrestore(&i915->drm.vblank_time_lock, irqflags); } diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index 6757dbae9ee5..17d6572f9d0a 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -77,8 +77,8 @@ intel_vdsc_set_min_max_qp(struct drm_dsc_config *vdsc_cfg, int buf, static void calculate_rc_params(struct drm_dsc_config *vdsc_cfg) { + int bpp = to_bpp_int(vdsc_cfg->bits_per_pixel); int bpc = vdsc_cfg->bits_per_component; - int bpp = vdsc_cfg->bits_per_pixel >> 4; int qp_bpc_modifier = (bpc - 8) * 2; int uncompressed_bpg_rate; int first_line_bpg_offset; @@ -148,7 +148,13 @@ calculate_rc_params(struct drm_dsc_config *vdsc_cfg) static const s8 ofs_und8[] = { 10, 8, 6, 4, 2, 0, -2, -4, -6, -8, -10, -10, -12, -12, -12 }; - + /* + * For 420 format since bits_per_pixel (bpp) is set to target bpp * 2, + * QP table values for target bpp 4.0 to 4.4375 (rounded to 4.0) are + * actually for bpp 8 to 8.875 (rounded to 4.0 * 2 i.e 8). + * Similarly values for target bpp 4.5 to 4.8375 (rounded to 4.5) + * are for bpp 9 to 9.875 (rounded to 4.5 * 2 i.e 9), and so on. + */ bpp_i = bpp - 8; for (buf_i = 0; buf_i < DSC_NUM_BUF_RANGES; buf_i++) { u8 range_bpg_offset; @@ -178,6 +184,9 @@ calculate_rc_params(struct drm_dsc_config *vdsc_cfg) range_bpg_offset & DSC_RANGE_BPG_OFFSET_MASK; } } else { + /* fractional bpp part * 10000 (for precision up to 4 decimal places) */ + int fractional_bits = to_bpp_frac(vdsc_cfg->bits_per_pixel); + static const s8 ofs_und6[] = { 0, -2, -2, -4, -6, -6, -8, -8, -8, -10, -10, -12, -12, -12, -12 }; @@ -191,7 +200,14 @@ calculate_rc_params(struct drm_dsc_config *vdsc_cfg) 10, 8, 6, 4, 2, 0, -2, -4, -6, -8, -10, -10, -12, -12, -12 }; - bpp_i = (2 * (bpp - 6)); + /* + * QP table rows have values in increment of 0.5. + * So 6.0 bpp to 6.4375 will have index 0, 6.5 to 6.9375 will have index 1, + * and so on. + * 0.5 fractional part with 4 decimal precision becomes 5000 + */ + bpp_i = ((bpp - 6) + (fractional_bits < 5000 ? 0 : 1)); + for (buf_i = 0; buf_i < DSC_NUM_BUF_RANGES; buf_i++) { u8 range_bpg_offset; @@ -248,7 +264,7 @@ int intel_dsc_compute_params(struct intel_crtc_state *pipe_config) struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct drm_dsc_config *vdsc_cfg = &pipe_config->dsc.config; - u16 compressed_bpp = pipe_config->dsc.compressed_bpp; + u16 compressed_bpp = to_bpp_int(pipe_config->dsc.compressed_bpp_x16); int err; int ret; @@ -279,8 +295,7 @@ int intel_dsc_compute_params(struct intel_crtc_state *pipe_config) /* Gen 11 does not support VBR */ vdsc_cfg->vbr_enable = false; - /* Gen 11 only supports integral values of bpp */ - vdsc_cfg->bits_per_pixel = compressed_bpp << 4; + vdsc_cfg->bits_per_pixel = pipe_config->dsc.compressed_bpp_x16; /* * According to DSC 1.2 specs in Section 4.1 if native_420 is set @@ -797,13 +812,13 @@ void intel_dsc_disable(const struct intel_crtc_state *old_crtc_state) } static u32 intel_dsc_pps_read(struct intel_crtc_state *crtc_state, int pps, - bool *check_equal) + bool *all_equal) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *i915 = to_i915(crtc->base.dev); i915_reg_t dsc_reg[2]; int i, vdsc_per_pipe, dsc_reg_num; - u32 val = 0; + u32 val; vdsc_per_pipe = intel_dsc_get_vdsc_per_pipe(crtc_state); dsc_reg_num = min_t(int, ARRAY_SIZE(dsc_reg), vdsc_per_pipe); @@ -812,20 +827,13 @@ static u32 intel_dsc_pps_read(struct intel_crtc_state *crtc_state, int pps, intel_dsc_get_pps_reg(crtc_state, pps, dsc_reg, dsc_reg_num); - if (check_equal) - *check_equal = true; - - for (i = 0; i < dsc_reg_num; i++) { - u32 tmp; + *all_equal = true; - tmp = intel_de_read(i915, dsc_reg[i]); + val = intel_de_read(i915, dsc_reg[0]); - if (i == 0) { - val = tmp; - } else if (check_equal && tmp != val) { - *check_equal = false; - break; - } else if (!check_equal) { + for (i = 1; i < dsc_reg_num; i++) { + if (intel_de_read(i915, dsc_reg[i]) != val) { + *all_equal = false; break; } } @@ -874,7 +882,7 @@ static void intel_dsc_get_pps_config(struct intel_crtc_state *crtc_state) if (vdsc_cfg->native_420) vdsc_cfg->bits_per_pixel >>= 1; - crtc_state->dsc.compressed_bpp = vdsc_cfg->bits_per_pixel >> 4; + crtc_state->dsc.compressed_bpp_x16 = vdsc_cfg->bits_per_pixel; /* PPS 2 */ pps_temp = intel_dsc_pps_read_and_verify(crtc_state, 2); diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 245a64332cc7..511dc1544854 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -18,10 +18,10 @@ #include "intel_fbc.h" #include "intel_frontbuffer.h" #include "intel_psr.h" +#include "intel_psr_regs.h" #include "skl_scaler.h" #include "skl_universal_plane.h" #include "skl_watermark.h" -#include "gt/intel_gt.h" #include "pxp/intel_pxp.h" static const u32 skl_plane_formats[] = { @@ -630,6 +630,18 @@ skl_plane_disable_arm(struct intel_plane *plane, intel_de_write_fw(dev_priv, PLANE_SURF(pipe, plane_id), 0); } +static void icl_plane_disable_sel_fetch_arm(struct intel_plane *plane, + const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *i915 = to_i915(plane->base.dev); + enum pipe pipe = plane->pipe; + + if (!crtc_state->enable_psr2_sel_fetch) + return; + + intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), 0); +} + static void icl_plane_disable_arm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state) @@ -643,7 +655,7 @@ icl_plane_disable_arm(struct intel_plane *plane, skl_write_plane_wm(plane, crtc_state); - intel_psr2_disable_plane_sel_fetch_arm(plane, crtc_state); + icl_plane_disable_sel_fetch_arm(plane, crtc_state); intel_de_write_fw(dev_priv, PLANE_CTL(pipe, plane_id), 0); intel_de_write_fw(dev_priv, PLANE_SURF(pipe, plane_id), 0); } @@ -1007,7 +1019,8 @@ static u32 skl_surf_address(const struct intel_plane_state *plane_state, * The DPT object contains only one vma, so the VMA's offset * within the DPT is always 0. */ - drm_WARN_ON(&i915->drm, plane_state->dpt_vma->node.start); + drm_WARN_ON(&i915->drm, plane_state->dpt_vma && + plane_state->dpt_vma->node.start); drm_WARN_ON(&i915->drm, offset & 0x1fffff); return offset >> 9; } else { @@ -1197,6 +1210,48 @@ skl_plane_update_arm(struct intel_plane *plane, skl_plane_surf(plane_state, 0)); } +static void icl_plane_update_sel_fetch_noarm(struct intel_plane *plane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, + int color_plane) +{ + struct drm_i915_private *i915 = to_i915(plane->base.dev); + enum pipe pipe = plane->pipe; + const struct drm_rect *clip; + u32 val; + int x, y; + + if (!crtc_state->enable_psr2_sel_fetch) + return; + + clip = &plane_state->psr2_sel_fetch_area; + + val = (clip->y1 + plane_state->uapi.dst.y1) << 16; + val |= plane_state->uapi.dst.x1; + intel_de_write_fw(i915, PLANE_SEL_FETCH_POS(pipe, plane->id), val); + + x = plane_state->view.color_plane[color_plane].x; + + /* + * From Bspec: UV surface Start Y Position = half of Y plane Y + * start position. + */ + if (!color_plane) + y = plane_state->view.color_plane[color_plane].y + clip->y1; + else + y = plane_state->view.color_plane[color_plane].y + clip->y1 / 2; + + val = y << 16 | x; + + intel_de_write_fw(i915, PLANE_SEL_FETCH_OFFSET(pipe, plane->id), + val); + + /* Sizes are 0 based */ + val = (drm_rect_height(clip) - 1) << 16; + val |= (drm_rect_width(&plane_state->uapi.src) >> 16) - 1; + intel_de_write_fw(i915, PLANE_SEL_FETCH_SIZE(pipe, plane->id), val); +} + static void icl_plane_update_noarm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, @@ -1269,7 +1324,24 @@ icl_plane_update_noarm(struct intel_plane *plane, if (plane_state->force_black) icl_plane_csc_load_black(plane); - intel_psr2_program_plane_sel_fetch_noarm(plane, crtc_state, plane_state, color_plane); + icl_plane_update_sel_fetch_noarm(plane, crtc_state, plane_state, color_plane); +} + +static void icl_plane_update_sel_fetch_arm(struct intel_plane *plane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *i915 = to_i915(plane->base.dev); + enum pipe pipe = plane->pipe; + + if (!crtc_state->enable_psr2_sel_fetch) + return; + + if (drm_rect_height(&plane_state->psr2_sel_fetch_area) > 0) + intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), + PLANE_SEL_FETCH_CTL_ENABLE); + else + icl_plane_disable_sel_fetch_arm(plane, crtc_state); } static void @@ -1296,7 +1368,7 @@ icl_plane_update_arm(struct intel_plane *plane, if (plane_state->scaler_id >= 0) skl_program_plane_scaler(plane, crtc_state, plane_state); - intel_psr2_program_plane_sel_fetch_arm(plane, crtc_state, plane_state); + icl_plane_update_sel_fetch_arm(plane, crtc_state, plane_state); /* * The control register self-arms if the plane was previously @@ -1855,16 +1927,19 @@ static bool skl_fb_scalable(const struct drm_framebuffer *fb) } } -static bool bo_has_valid_encryption(struct drm_i915_gem_object *obj) +static void check_protection(struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + struct drm_i915_private *i915 = to_i915(plane->base.dev); + const struct drm_framebuffer *fb = plane_state->hw.fb; + struct drm_i915_gem_object *obj = intel_fb_obj(fb); - return intel_pxp_key_check(i915->pxp, obj, false) == 0; -} + if (DISPLAY_VER(i915) < 11) + return; -static bool pxp_is_borked(struct drm_i915_gem_object *obj) -{ - return i915_gem_object_is_protected(obj) && !bo_has_valid_encryption(obj); + plane_state->decrypt = intel_pxp_key_check(i915->pxp, obj, false) == 0; + plane_state->force_black = i915_gem_object_is_protected(obj) && + !plane_state->decrypt; } static int skl_plane_check(struct intel_crtc_state *crtc_state, @@ -1911,10 +1986,7 @@ static int skl_plane_check(struct intel_crtc_state *crtc_state, if (ret) return ret; - if (DISPLAY_VER(dev_priv) >= 11) { - plane_state->decrypt = bo_has_valid_encryption(intel_fb_obj(fb)); - plane_state->force_black = pxp_is_borked(intel_fb_obj(fb)); - } + check_protection(plane_state); /* HW only has 8 bits pixel precision, disable plane if invisible */ if (!(plane_state->hw.alpha >> 8)) @@ -2489,7 +2561,7 @@ skl_get_initial_plane_config(struct intel_crtc *crtc, goto error; } - if (!dev_priv->params.enable_dpt && + if (!dev_priv->display.params.enable_dpt && intel_fb_modifier_uses_dpt(dev_priv, fb->modifier)) { drm_dbg_kms(&dev_priv->drm, "DPT disabled, skipping initial FB\n"); goto error; diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 99b8ccdc3dfa..56588d6e24ae 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -412,7 +412,7 @@ static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *i915 = to_i915(crtc->base.dev); - if (!i915->params.enable_sagv) + if (!i915->display.params.enable_sagv) return false; if (DISPLAY_VER(i915) >= 12) @@ -3702,7 +3702,8 @@ static int intel_sagv_status_show(struct seq_file *m, void *unused) }; seq_printf(m, "SAGV available: %s\n", str_yes_no(intel_has_sagv(i915))); - seq_printf(m, "SAGV modparam: %s\n", str_enabled_disabled(i915->params.enable_sagv)); + seq_printf(m, "SAGV modparam: %s\n", + str_enabled_disabled(i915->display.params.enable_sagv)); seq_printf(m, "SAGV status: %s\n", sagv_status[i915->display.sagv.status]); seq_printf(m, "SAGV block time: %d usec\n", i915->display.sagv.block_time_us); diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index f488394d3108..9b33b8a74d64 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -561,6 +561,12 @@ static void glk_dsi_clear_device_ready(struct intel_encoder *encoder) glk_dsi_disable_mipi_io(encoder); } +static i915_reg_t port_ctrl_reg(struct drm_i915_private *i915, enum port port) +{ + return IS_GEMINILAKE(i915) || IS_BROXTON(i915) ? + BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(port); +} + static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -570,7 +576,7 @@ static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder) drm_dbg_kms(&dev_priv->drm, "\n"); for_each_dsi_port(port, intel_dsi->ports) { /* Common bit for both MIPI Port A & MIPI Port C on VLV/CHV */ - i915_reg_t port_ctrl = IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) ? + i915_reg_t port_ctrl = IS_BROXTON(dev_priv) ? BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(PORT_A); intel_de_write(dev_priv, MIPI_DEVICE_READY(port), @@ -589,7 +595,7 @@ static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder) * On VLV/CHV, wait till Clock lanes are in LP-00 state for MIPI * Port A only. MIPI Port C has no similar bit for checking. */ - if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) || port == PORT_A) && + if ((IS_BROXTON(dev_priv) || port == PORT_A) && intel_de_wait_for_clear(dev_priv, port_ctrl, AFE_LATCHOUT, 30)) drm_err(&dev_priv->drm, "DSI LP not going Low\n"); @@ -627,8 +633,7 @@ static void intel_dsi_port_enable(struct intel_encoder *encoder, } for_each_dsi_port(port, intel_dsi->ports) { - i915_reg_t port_ctrl = IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) ? - BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(port); + i915_reg_t port_ctrl = port_ctrl_reg(dev_priv, port); u32 temp; temp = intel_de_read(dev_priv, port_ctrl); @@ -664,8 +669,7 @@ static void intel_dsi_port_disable(struct intel_encoder *encoder) enum port port; for_each_dsi_port(port, intel_dsi->ports) { - i915_reg_t port_ctrl = IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) ? - BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(port); + i915_reg_t port_ctrl = port_ctrl_reg(dev_priv, port); /* de-assert ip_tg_enable signal */ intel_de_rmw(dev_priv, port_ctrl, DPI_ENABLE, 0); @@ -955,9 +959,8 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, /* XXX: this only works for one DSI output */ for_each_dsi_port(port, intel_dsi->ports) { - i915_reg_t ctrl_reg = IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) ? - BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(port); - bool enabled = intel_de_read(dev_priv, ctrl_reg) & DPI_ENABLE; + i915_reg_t port_ctrl = port_ctrl_reg(dev_priv, port); + bool enabled = intel_de_read(dev_priv, port_ctrl) & DPI_ENABLE; /* * Due to some hardware limitations on VLV/CHV, the DPI enable @@ -1529,16 +1532,8 @@ static void intel_dsi_unprepare(struct intel_encoder *encoder) } } -static void intel_dsi_encoder_destroy(struct drm_encoder *encoder) -{ - struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(encoder)); - - intel_dsi_vbt_gpio_cleanup(intel_dsi); - intel_encoder_destroy(encoder); -} - static const struct drm_encoder_funcs intel_dsi_funcs = { - .destroy = intel_dsi_encoder_destroy, + .destroy = intel_encoder_destroy, }; static enum drm_mode_status vlv_dsi_mode_valid(struct drm_connector *connector, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index e38f06a6e56e..dcbfe32fd30c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -279,7 +279,8 @@ static int proto_context_set_protected(struct drm_i915_private *i915, } static struct i915_gem_proto_context * -proto_context_create(struct drm_i915_private *i915, unsigned int flags) +proto_context_create(struct drm_i915_file_private *fpriv, + struct drm_i915_private *i915, unsigned int flags) { struct i915_gem_proto_context *pc, *err; @@ -287,6 +288,7 @@ proto_context_create(struct drm_i915_private *i915, unsigned int flags) if (!pc) return ERR_PTR(-ENOMEM); + pc->fpriv = fpriv; pc->num_user_engines = -1; pc->user_engines = NULL; pc->user_flags = BIT(UCONTEXT_BANNABLE) | @@ -1622,6 +1624,7 @@ i915_gem_create_context(struct drm_i915_private *i915, err = PTR_ERR(ppgtt); goto err_ctx; } + ppgtt->vm.fpriv = pc->fpriv; vm = &ppgtt->vm; } if (vm) @@ -1741,7 +1744,7 @@ int i915_gem_context_open(struct drm_i915_private *i915, /* 0 reserved for invalid/unassigned ppgtt */ xa_init_flags(&file_priv->vm_xa, XA_FLAGS_ALLOC1); - pc = proto_context_create(i915, 0); + pc = proto_context_create(file_priv, i915, 0); if (IS_ERR(pc)) { err = PTR_ERR(pc); goto err; @@ -1823,6 +1826,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, GEM_BUG_ON(id == 0); /* reserved for invalid/unassigned ppgtt */ args->vm_id = id; + ppgtt->vm.fpriv = file_priv; return 0; err_put: @@ -2285,7 +2289,8 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, return -EIO; } - ext_data.pc = proto_context_create(i915, args->flags); + ext_data.pc = proto_context_create(file->driver_priv, i915, + args->flags); if (IS_ERR(ext_data.pc)) return PTR_ERR(ext_data.pc); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index cb78214a7dcd..c573c067779f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -188,6 +188,9 @@ struct i915_gem_proto_engine { * CONTEXT_CREATE_SET_PARAM during GEM_CONTEXT_CREATE. */ struct i915_gem_proto_context { + /** @fpriv: Client which creates the context */ + struct drm_i915_file_private *fpriv; + /** @vm: See &i915_gem_context.vm */ struct i915_address_space *vm; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 683fd8d3151c..555022c0652c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -9,6 +9,7 @@ #include <linux/sync_file.h> #include <linux/uaccess.h> +#include <drm/drm_auth.h> #include <drm/drm_syncobj.h> #include "display/intel_frontbuffer.h" @@ -253,6 +254,8 @@ struct i915_execbuffer { struct intel_gt *gt; /* gt for the execbuf */ struct intel_context *context; /* logical state for the request */ struct i915_gem_context *gem_context; /** caller's context */ + intel_wakeref_t wakeref; + intel_wakeref_t wakeref_gt0; /** our requests to build */ struct i915_request *requests[MAX_ENGINE_INSTANCE + 1]; @@ -1156,7 +1159,7 @@ static void reloc_cache_unmap(struct reloc_cache *cache) vaddr = unmask_page(cache->vaddr); if (cache->vaddr & KMAP) - kunmap_atomic(vaddr); + kunmap_local(vaddr); else io_mapping_unmap_atomic((void __iomem *)vaddr); } @@ -1172,7 +1175,7 @@ static void reloc_cache_remap(struct reloc_cache *cache, if (cache->vaddr & KMAP) { struct page *page = i915_gem_object_get_page(obj, cache->page); - vaddr = kmap_atomic(page); + vaddr = kmap_local_page(page); cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; } else { @@ -1202,7 +1205,7 @@ static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer if (cache->vaddr & CLFLUSH_AFTER) mb(); - kunmap_atomic(vaddr); + kunmap_local(vaddr); i915_gem_object_finish_access(obj); } else { struct i915_ggtt *ggtt = cache_to_ggtt(cache); @@ -1234,7 +1237,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj, struct page *page; if (cache->vaddr) { - kunmap_atomic(unmask_page(cache->vaddr)); + kunmap_local(unmask_page(cache->vaddr)); } else { unsigned int flushes; int err; @@ -1256,7 +1259,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj, if (!obj->mm.dirty) set_page_dirty(page); - vaddr = kmap_atomic(page); + vaddr = kmap_local_page(page); cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; cache->page = pageno; @@ -1678,7 +1681,7 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb) urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); size = nreloc * sizeof(*relocs); - relocs = kvmalloc_array(size, 1, GFP_KERNEL); + relocs = kvmalloc_array(1, size, GFP_KERNEL); if (!relocs) { err = -ENOMEM; goto err; @@ -2719,13 +2722,13 @@ eb_select_engine(struct i915_execbuffer *eb) for_each_child(ce, child) intel_context_get(child); - intel_gt_pm_get(gt); + eb->wakeref = intel_gt_pm_get(ce->engine->gt); /* * Keep GT0 active on MTL so that i915_vma_parked() doesn't * free VMAs while execbuf ioctl is validating VMAs. */ if (gt->info.id) - intel_gt_pm_get(to_gt(gt->i915)); + eb->wakeref_gt0 = intel_gt_pm_get(to_gt(gt->i915)); if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { err = intel_context_alloc_state(ce); @@ -2765,9 +2768,9 @@ eb_select_engine(struct i915_execbuffer *eb) err: if (gt->info.id) - intel_gt_pm_put(to_gt(gt->i915)); + intel_gt_pm_put(to_gt(gt->i915), eb->wakeref_gt0); - intel_gt_pm_put(gt); + intel_gt_pm_put(ce->engine->gt, eb->wakeref); for_each_child(ce, child) intel_context_put(child); intel_context_put(ce); @@ -2785,8 +2788,8 @@ eb_put_engine(struct i915_execbuffer *eb) * i915_vma_parked() from interfering while execbuf validates vmas. */ if (eb->gt->info.id) - intel_gt_pm_put(to_gt(eb->gt->i915)); - intel_gt_pm_put(eb->gt); + intel_gt_pm_put(to_gt(eb->gt->i915), eb->wakeref_gt0); + intel_gt_pm_put(eb->context->engine->gt, eb->wakeref); for_each_child(eb->context, child) intel_context_put(child); intel_context_put(eb->context); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index c26d87555825..58e6c680fe0d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -106,6 +106,10 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&obj->mm.link); +#ifdef CONFIG_PROC_FS + INIT_LIST_HEAD(&obj->client_link); +#endif + INIT_LIST_HEAD(&obj->lut_list); spin_lock_init(&obj->lut_lock); @@ -293,6 +297,10 @@ void __i915_gem_free_object_rcu(struct rcu_head *head) container_of(head, typeof(*obj), rcu); struct drm_i915_private *i915 = to_i915(obj->base.dev); + /* We need to keep this alive for RCU read access from fdinfo. */ + if (obj->mm.n_placements > 1) + kfree(obj->mm.placements); + i915_gem_object_free(obj); GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); @@ -389,9 +397,6 @@ void __i915_gem_free_object(struct drm_i915_gem_object *obj) if (obj->ops->release) obj->ops->release(obj); - if (obj->mm.n_placements > 1) - kfree(obj->mm.placements); - if (obj->shares_resv_from) i915_vm_resv_put(obj->shares_resv_from); @@ -442,6 +447,8 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj) GEM_BUG_ON(i915_gem_object_is_framebuffer(obj)); + i915_drm_client_remove_object(obj); + /* * Before we free the object, make sure any pure RCU-only * read-side critical sections are complete, e.g. @@ -493,17 +500,15 @@ static void i915_gem_object_read_from_page_kmap(struct drm_i915_gem_object *obj, u64 offset, void *dst, int size) { pgoff_t idx = offset >> PAGE_SHIFT; - void *src_map; void *src_ptr; - src_map = kmap_atomic(i915_gem_object_get_page(obj, idx)); - - src_ptr = src_map + offset_in_page(offset); + src_ptr = kmap_local_page(i915_gem_object_get_page(obj, idx)) + + offset_in_page(offset); if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) drm_clflush_virt_range(src_ptr, size); memcpy(dst, src_ptr, size); - kunmap_atomic(src_map); + kunmap_local(src_ptr); } static void diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h b/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h index e5e870b6f186..9fbf14867a2a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h @@ -89,6 +89,7 @@ i915_gem_object_set_frontbuffer(struct drm_i915_gem_object *obj, if (!front) { RCU_INIT_POINTER(obj->frontbuffer, NULL); + drm_gem_object_put(intel_bo_to_drm_bo(obj)); } else if (rcu_access_pointer(obj->frontbuffer)) { cur = rcu_dereference_protected(obj->frontbuffer, true); kref_get(&cur->ref); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 2292404007c8..0c5cdab278b6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -302,6 +302,18 @@ struct drm_i915_gem_object { */ struct i915_address_space *shares_resv_from; +#ifdef CONFIG_PROC_FS + /** + * @client: @i915_drm_client which created the object + */ + struct i915_drm_client *client; + + /** + * @client_link: Link into @i915_drm_client.objects_list + */ + struct list_head client_link; +#endif + union { struct rcu_head rcu; struct llist_node freed; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 5df128e2f4dc..ef85c6dc9fd5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -65,16 +65,13 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) dst = vaddr; for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { struct page *page; - void *src; page = shmem_read_mapping_page(mapping, i); if (IS_ERR(page)) goto err_st; - src = kmap_atomic(page); - memcpy(dst, src, PAGE_SIZE); + memcpy_from_page(dst, page, 0, PAGE_SIZE); drm_clflush_virt_range(dst, PAGE_SIZE); - kunmap_atomic(src); put_page(page); dst += PAGE_SIZE; @@ -113,16 +110,13 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { struct page *page; - char *dst; page = shmem_read_mapping_page(mapping, i); if (IS_ERR(page)) continue; - dst = kmap_atomic(page); drm_clflush_virt_range(src, PAGE_SIZE); - memcpy(dst, src, PAGE_SIZE); - kunmap_atomic(dst); + memcpy_to_page(page, 0, src, PAGE_SIZE); set_page_dirty(page); if (obj->mm.madv == I915_MADV_WILLNEED) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 73a4a4eb29e0..38b72d86560f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -485,11 +485,13 @@ shmem_pwrite(struct drm_i915_gem_object *obj, if (err < 0) return err; - vaddr = kmap_atomic(page); + vaddr = kmap_local_page(page); + pagefault_disable(); unwritten = __copy_from_user_inatomic(vaddr + pg, user_data, len); - kunmap_atomic(vaddr); + pagefault_enable(); + kunmap_local(vaddr); err = aops->write_end(obj->base.filp, mapping, offset, len, len - unwritten, page, data); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 1a766d8e7cce..8c88075eeab2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -386,6 +386,27 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915, drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val); + /* Wa_14019821291 */ + if (MEDIA_VER_FULL(i915) == IP_VER(13, 0)) { + /* + * This workaround is primarily implemented by the BIOS. We + * just need to figure out whether the BIOS has applied the + * workaround (meaning the programmed address falls within + * the DSM) and, if so, reserve that part of the DSM to + * prevent accidental reuse. The DSM location should be just + * below the WOPCM. + */ + u64 gscpsmi_base = intel_uncore_read64_2x32(uncore, + MTL_GSCPSMI_BASEADDR_LSB, + MTL_GSCPSMI_BASEADDR_MSB); + if (gscpsmi_base >= i915->dsm.stolen.start && + gscpsmi_base < i915->dsm.stolen.end) { + *base = gscpsmi_base; + *size = i915->dsm.stolen.end - gscpsmi_base; + return; + } + } + switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) { case GEN8_STOLEN_RESERVED_1M: *size = 1024 * 1024; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 84c50c4c4af7..3ff3d8889c6c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1082,7 +1082,7 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) goto err_unlock; for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { - u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n)); + u32 *ptr = kmap_local_page(i915_gem_object_get_page(obj, n)); if (needs_flush & CLFLUSH_BEFORE) drm_clflush_virt_range(ptr, PAGE_SIZE); @@ -1090,12 +1090,12 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) if (ptr[dword] != val) { pr_err("n=%lu ptr[%u]=%u, val=%u\n", n, dword, ptr[dword], val); - kunmap_atomic(ptr); + kunmap_local(ptr); err = -EINVAL; break; } - kunmap_atomic(ptr); + kunmap_local(ptr); } i915_gem_object_finish_access(obj); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 3bef1beec7cb..2a0c0634d446 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -24,7 +24,6 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v) { unsigned int needs_clflush; struct page *page; - void *map; u32 *cpu; int err; @@ -34,8 +33,7 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v) goto out; page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); - map = kmap_atomic(page); - cpu = map + offset_in_page(offset); + cpu = kmap_local_page(page) + offset_in_page(offset); if (needs_clflush & CLFLUSH_BEFORE) drm_clflush_virt_range(cpu, sizeof(*cpu)); @@ -45,7 +43,7 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v) if (needs_clflush & CLFLUSH_AFTER) drm_clflush_virt_range(cpu, sizeof(*cpu)); - kunmap_atomic(map); + kunmap_local(cpu); i915_gem_object_finish_access(ctx->obj); out: @@ -57,7 +55,6 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) { unsigned int needs_clflush; struct page *page; - void *map; u32 *cpu; int err; @@ -67,15 +64,14 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) goto out; page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); - map = kmap_atomic(page); - cpu = map + offset_in_page(offset); + cpu = kmap_local_page(page) + offset_in_page(offset); if (needs_clflush & CLFLUSH_BEFORE) drm_clflush_virt_range(cpu, sizeof(*cpu)); *v = *cpu; - kunmap_atomic(map); + kunmap_local(cpu); i915_gem_object_finish_access(ctx->obj); out: @@ -85,6 +81,7 @@ out: static int gtt_set(struct context *ctx, unsigned long offset, u32 v) { + intel_wakeref_t wakeref; struct i915_vma *vma; u32 __iomem *map; int err = 0; @@ -99,7 +96,7 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v) if (IS_ERR(vma)) return PTR_ERR(vma); - intel_gt_pm_get(vma->vm->gt); + wakeref = intel_gt_pm_get(vma->vm->gt); map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); @@ -112,12 +109,13 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v) i915_vma_unpin_iomap(vma); out_rpm: - intel_gt_pm_put(vma->vm->gt); + intel_gt_pm_put(vma->vm->gt, wakeref); return err; } static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) { + intel_wakeref_t wakeref; struct i915_vma *vma; u32 __iomem *map; int err = 0; @@ -132,7 +130,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) if (IS_ERR(vma)) return PTR_ERR(vma); - intel_gt_pm_get(vma->vm->gt); + wakeref = intel_gt_pm_get(vma->vm->gt); map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); @@ -145,7 +143,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) i915_vma_unpin_iomap(vma); out_rpm: - intel_gt_pm_put(vma->vm->gt); + intel_gt_pm_put(vma->vm->gt, wakeref); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 7021b6e9b219..89d4dc8b60c6 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -489,12 +489,12 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) for (n = 0; n < real_page_count(obj); n++) { u32 *map; - map = kmap_atomic(i915_gem_object_get_page(obj, n)); + map = kmap_local_page(i915_gem_object_get_page(obj, n)); for (m = 0; m < DW_PER_PAGE; m++) map[m] = value; if (!has_llc) drm_clflush_virt_range(map, PAGE_SIZE); - kunmap_atomic(map); + kunmap_local(map); } i915_gem_object_finish_access(obj); @@ -520,7 +520,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj, for (n = 0; n < real_page_count(obj); n++) { u32 *map, m; - map = kmap_atomic(i915_gem_object_get_page(obj, n)); + map = kmap_local_page(i915_gem_object_get_page(obj, n)); if (needs_flush & CLFLUSH_BEFORE) drm_clflush_virt_range(map, PAGE_SIZE); @@ -546,7 +546,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj, } out_unmap: - kunmap_atomic(map); + kunmap_local(map); if (err) break; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index e57f9390076c..d684a70f2c04 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -504,7 +504,7 @@ static int igt_dmabuf_export_vmap(void *arg) } if (memchr_inv(ptr, 0, dmabuf->size)) { - pr_err("Exported object not initialiased to zero!\n"); + pr_err("Exported object not initialised to zero!\n"); err = -EINVAL; goto out; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 72957a36a36b..2c51a2c452fc 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -630,14 +630,14 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, static void disable_retire_worker(struct drm_i915_private *i915) { i915_gem_driver_unregister__shrinker(i915); - intel_gt_pm_get(to_gt(i915)); + intel_gt_pm_get_untracked(to_gt(i915)); cancel_delayed_work_sync(&to_gt(i915)->requests.retire_work); } static void restore_retire_worker(struct drm_i915_private *i915) { igt_flush_test(i915); - intel_gt_pm_put(to_gt(i915)); + intel_gt_pm_put_untracked(to_gt(i915)); i915_gem_driver_register__shrinker(i915); } @@ -778,6 +778,7 @@ err_obj: static int gtt_set(struct drm_i915_gem_object *obj) { + intel_wakeref_t wakeref; struct i915_vma *vma; void __iomem *map; int err = 0; @@ -786,7 +787,7 @@ static int gtt_set(struct drm_i915_gem_object *obj) if (IS_ERR(vma)) return PTR_ERR(vma); - intel_gt_pm_get(vma->vm->gt); + wakeref = intel_gt_pm_get(vma->vm->gt); map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); if (IS_ERR(map)) { @@ -798,12 +799,13 @@ static int gtt_set(struct drm_i915_gem_object *obj) i915_vma_unpin_iomap(vma); out: - intel_gt_pm_put(vma->vm->gt); + intel_gt_pm_put(vma->vm->gt, wakeref); return err; } static int gtt_check(struct drm_i915_gem_object *obj) { + intel_wakeref_t wakeref; struct i915_vma *vma; void __iomem *map; int err = 0; @@ -812,7 +814,7 @@ static int gtt_check(struct drm_i915_gem_object *obj) if (IS_ERR(vma)) return PTR_ERR(vma); - intel_gt_pm_get(vma->vm->gt); + wakeref = intel_gt_pm_get(vma->vm->gt); map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); if (IS_ERR(map)) { @@ -828,7 +830,7 @@ static int gtt_check(struct drm_i915_gem_object *obj) i915_vma_unpin_iomap(vma); out: - intel_gt_pm_put(vma->vm->gt); + intel_gt_pm_put(vma->vm->gt, wakeref); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index e199d7dbb876..2b0327cc47c2 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -83,7 +83,7 @@ live_context(struct drm_i915_private *i915, struct file *file) int err; u32 id; - pc = proto_context_create(i915, 0); + pc = proto_context_create(fpriv, i915, 0); if (IS_ERR(pc)) return ERR_CAST(pc); @@ -152,7 +152,7 @@ kernel_context(struct drm_i915_private *i915, struct i915_gem_context *ctx; struct i915_gem_proto_context *pc; - pc = proto_context_create(i915, 0); + pc = proto_context_create(NULL, i915, 0); if (IS_ERR(pc)) return ERR_CAST(pc); diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 9895e18df043..fa46d2308b0e 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -5,6 +5,7 @@ #include <linux/log2.h> +#include "gem/i915_gem_internal.h" #include "gem/i915_gem_lmem.h" #include "gen8_ppgtt.h" @@ -222,6 +223,9 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) { struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + if (vm->rsvd.obj) + i915_gem_object_put(vm->rsvd.obj); + if (intel_vgpu_active(vm->i915)) gen8_ppgtt_notify_vgt(ppgtt, false); @@ -950,6 +954,41 @@ err_pd: return ERR_PTR(err); } +static int gen8_init_rsvd(struct i915_address_space *vm) +{ + struct drm_i915_private *i915 = vm->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int ret; + + /* The memory will be used only by GPU. */ + obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, + I915_BO_ALLOC_VOLATILE | + I915_BO_ALLOC_GPU_ONLY); + if (IS_ERR(obj)) + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto unref; + } + + ret = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH); + if (ret) + goto unref; + + vm->rsvd.vma = i915_vma_make_unshrinkable(vma); + vm->rsvd.obj = obj; + vm->total -= vma->node.size; + return 0; +unref: + i915_gem_object_put(obj); + return ret; +} + /* * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers * with a net effect resembling a 2-level page table in normal x86 terms. Each @@ -1031,6 +1070,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, if (intel_vgpu_active(gt->i915)) gen8_ppgtt_notify_vgt(ppgtt, true); + err = gen8_init_rsvd(&ppgtt->vm); + if (err) + goto err_put; + return ppgtt; err_put: diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index ecc990ec1b95..d650beb8ed22 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -28,11 +28,14 @@ static void irq_disable(struct intel_breadcrumbs *b) static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) { + intel_wakeref_t wakeref; + /* * Since we are waiting on a request, the GPU should be busy * and should have its own rpm reference. */ - if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt))) + wakeref = intel_gt_pm_get_if_awake(b->irq_engine->gt); + if (GEM_WARN_ON(!wakeref)) return; /* @@ -41,7 +44,7 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) * which we can add a new waiter and avoid the cost of re-enabling * the irq. */ - WRITE_ONCE(b->irq_armed, true); + WRITE_ONCE(b->irq_armed, wakeref); /* Requests may have completed before we could enable the interrupt. */ if (!b->irq_enabled++ && b->irq_enable(b)) @@ -61,12 +64,14 @@ static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) { + intel_wakeref_t wakeref = b->irq_armed; + GEM_BUG_ON(!b->irq_enabled); if (!--b->irq_enabled) b->irq_disable(b); - WRITE_ONCE(b->irq_armed, false); - intel_gt_pm_put_async(b->irq_engine->gt); + WRITE_ONCE(b->irq_armed, 0); + intel_gt_pm_put_async(b->irq_engine->gt, wakeref); } static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h index 72dfd3748c4c..bdf09fd67b6e 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h @@ -13,6 +13,7 @@ #include <linux/types.h> #include "intel_engine_types.h" +#include "intel_wakeref.h" /* * Rather than have every client wait upon all user interrupts, @@ -43,7 +44,7 @@ struct intel_breadcrumbs { spinlock_t irq_lock; /* protects the interrupt from hardirq context */ struct irq_work irq_work; /* for use from inside irq_lock */ unsigned int irq_enabled; - bool irq_armed; + intel_wakeref_t irq_armed; /* Not all breadcrumbs are attached to physical HW */ intel_engine_mask_t engine_mask; diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index a53b26178f0a..a2f1245741bb 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -6,6 +6,7 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_pm.h" +#include "i915_drm_client.h" #include "i915_drv.h" #include "i915_trace.h" @@ -50,6 +51,7 @@ intel_context_create(struct intel_engine_cs *engine) int intel_context_alloc_state(struct intel_context *ce) { + struct i915_gem_context *ctx; int err = 0; if (mutex_lock_interruptible(&ce->pin_mutex)) @@ -66,6 +68,18 @@ int intel_context_alloc_state(struct intel_context *ce) goto unlock; set_bit(CONTEXT_ALLOC_BIT, &ce->flags); + + rcu_read_lock(); + ctx = rcu_dereference(ce->gem_context); + if (ctx && !kref_get_unless_zero(&ctx->ref)) + ctx = NULL; + rcu_read_unlock(); + if (ctx) { + if (ctx->client) + i915_drm_client_add_context_objects(ctx->client, + ce); + i915_gem_context_put(ctx); + } } unlock: diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index a80e3b7c24ff..25564c01507e 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -212,7 +212,7 @@ static inline void intel_context_enter(struct intel_context *ce) return; ce->ops->enter(ce); - intel_gt_pm_get(ce->vm->gt); + ce->wakeref = intel_gt_pm_get(ce->vm->gt); } static inline void intel_context_mark_active(struct intel_context *ce) @@ -229,7 +229,7 @@ static inline void intel_context_exit(struct intel_context *ce) if (--ce->active_count) return; - intel_gt_pm_put_async(ce->vm->gt); + intel_gt_pm_put_async(ce->vm->gt, ce->wakeref); ce->ops->exit(ce); } diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index aceaac28a33e..7eccbd70d89f 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -17,6 +17,7 @@ #include "i915_utils.h" #include "intel_engine_types.h" #include "intel_sseu.h" +#include "intel_wakeref.h" #include "uc/intel_guc_fwif.h" @@ -112,6 +113,7 @@ struct intel_context { u32 ring_size; struct intel_ring *ring; struct intel_timeline *timeline; + intel_wakeref_t wakeref; unsigned long flags; #define CONTEXT_BARRIER_BIT 0 diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 4a11219e560e..40687806d22a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -47,7 +47,7 @@ #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) #define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE) -#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) +#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE) #define MAX_MMIO_BASES 3 struct engine_info { diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 9a527e1f5be6..1a8e2b7db013 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -188,7 +188,7 @@ static void heartbeat(struct work_struct *wrk) * low latency and no jitter] the chance to naturally * complete before being preempted. */ - attr.priority = 0; + attr.priority = I915_PRIORITY_NORMAL; if (rq->sched.attr.priority >= attr.priority) attr.priority = I915_PRIORITY_HEARTBEAT; if (rq->sched.attr.priority >= attr.priority) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index e91fc881dbf1..96bdb93a948d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -63,7 +63,7 @@ static int __engine_unpark(struct intel_wakeref *wf) ENGINE_TRACE(engine, "\n"); - intel_gt_pm_get(engine->gt); + engine->wakeref_track = intel_gt_pm_get(engine->gt); /* Discard stale context state from across idling */ ce = engine->kernel_context; @@ -122,6 +122,7 @@ __queue_and_release_pm(struct i915_request *rq, */ GEM_BUG_ON(rq->context->active_count != 1); __intel_gt_pm_get(engine->gt); + rq->context->wakeref = intel_wakeref_track(&engine->gt->wakeref); /* * We have to serialise all potential retirement paths with our @@ -285,7 +286,7 @@ static int __engine_park(struct intel_wakeref *wf) engine->park(engine); /* While gt calls i915_vma_parked(), we have to break the lock cycle */ - intel_gt_pm_put_async(engine->gt); + intel_gt_pm_put_async(engine->gt, engine->wakeref_track); return 0; } @@ -296,7 +297,7 @@ static const struct intel_wakeref_ops wf_ops = { void intel_engine_init__pm(struct intel_engine_cs *engine) { - intel_wakeref_init(&engine->wakeref, engine->i915, &wf_ops); + intel_wakeref_init(&engine->wakeref, engine->i915, &wf_ops, engine->name); intel_engine_init_heartbeat(engine); intel_gsc_idle_msg_enable(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index d68675925b79..1d97c435a015 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -10,6 +10,7 @@ #include "i915_request.h" #include "intel_engine_types.h" #include "intel_wakeref.h" +#include "intel_gt.h" #include "intel_gt_pm.h" static inline bool diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index fdd4ddd3a978..a8eac59e3779 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -118,9 +118,15 @@ #define CCID_EXTENDED_STATE_RESTORE BIT(2) #define CCID_EXTENDED_STATE_SAVE BIT(3) #define RING_BB_PER_CTX_PTR(base) _MMIO((base) + 0x1c0) /* gen8+ */ +#define PER_CTX_BB_FORCE BIT(2) +#define PER_CTX_BB_VALID BIT(0) + #define RING_INDIRECT_CTX(base) _MMIO((base) + 0x1c4) /* gen8+ */ #define RING_INDIRECT_CTX_OFFSET(base) _MMIO((base) + 0x1c8) /* gen8+ */ #define ECOSKPD(base) _MMIO((base) + 0x1d0) +#define XEHP_BLITTER_SCHEDULING_MODE_MASK REG_GENMASK(12, 11) +#define XEHP_BLITTER_ROUND_ROBIN_MODE \ + REG_FIELD_PREP(XEHP_BLITTER_SCHEDULING_MODE_MASK, 1) #define ECO_CONSTANT_BUFFER_SR_DISABLE REG_BIT(4) #define ECO_GATING_CX_ONLY REG_BIT(3) #define GEN6_BLITTER_FBC_NOTIFY REG_BIT(3) @@ -257,5 +263,7 @@ #define VDBOX_CGCTL3F18(base) _MMIO((base) + 0x3f18) #define ALNUNIT_CLKGATE_DIS REG_BIT(13) +#define VDBOX_CGCTL3F1C(base) _MMIO((base) + 0x3f1c) +#define MFXPIPE_CLKGATE_DIS REG_BIT(3) #endif /* __INTEL_ENGINE_REGS__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 8769760257fd..960e6be2042f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -446,7 +446,9 @@ struct intel_engine_cs { unsigned long serial; unsigned long wakeref_serial; + intel_wakeref_t wakeref_track; struct intel_wakeref wakeref; + struct file *default_state; struct { diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index e8f42ec6b1b4..42aade0faf2d 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -630,7 +630,7 @@ static void __execlists_schedule_out(struct i915_request * const rq, execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); if (engine->fw_domain && !--engine->fw_active) intel_uncore_forcewake_put(engine->uncore, engine->fw_domain); - intel_gt_pm_put_async(engine->gt); + intel_gt_pm_put_async_untracked(engine->gt); /* * If this is part of a virtual engine, its next request may diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 15fc8e4703f4..21a7e3191c18 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -245,16 +245,15 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) gen8_ggtt_invalidate(ggtt); list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) { - if (intel_guc_tlb_invalidation_is_available(>->uc.guc)) { + if (intel_guc_tlb_invalidation_is_available(>->uc.guc)) guc_ggtt_ct_invalidate(gt); - } else if (GRAPHICS_VER(i915) >= 12) { + else if (GRAPHICS_VER(i915) >= 12) intel_uncore_write_fw(gt->uncore, GEN12_GUC_TLB_INV_CR, GEN12_GUC_TLB_INV_CR_INVALIDATE); - } else { + else intel_uncore_write_fw(gt->uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); - } } } @@ -297,7 +296,7 @@ static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt) return intel_gt_is_bind_context_ready(gt); } -static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt) +static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt, intel_wakeref_t *wakeref) { struct intel_context *ce; struct intel_gt *gt = ggtt->vm.gt; @@ -314,7 +313,8 @@ static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt) * would conflict with fs_reclaim trying to allocate memory while * doing rpm_resume(). */ - if (!intel_gt_pm_get_if_awake(gt)) + *wakeref = intel_gt_pm_get_if_awake(gt); + if (!*wakeref) return NULL; intel_engine_pm_get(ce->engine); @@ -322,10 +322,10 @@ static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt) return ce; } -static void gen8_ggtt_bind_put_ce(struct intel_context *ce) +static void gen8_ggtt_bind_put_ce(struct intel_context *ce, intel_wakeref_t wakeref) { intel_engine_pm_put(ce->engine); - intel_gt_pm_put(ce->engine->gt); + intel_gt_pm_put(ce->engine->gt, wakeref); } static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset, @@ -338,12 +338,13 @@ static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset, struct sgt_iter iter; struct i915_request *rq; struct intel_context *ce; + intel_wakeref_t wakeref; u32 *cs; if (!num_entries) return true; - ce = gen8_ggtt_bind_get_ce(ggtt); + ce = gen8_ggtt_bind_get_ce(ggtt, &wakeref); if (!ce) return false; @@ -419,13 +420,13 @@ queue_err_rq: offset += n_ptes; } - gen8_ggtt_bind_put_ce(ce); + gen8_ggtt_bind_put_ce(ce, wakeref); return true; err_rq: i915_request_put(rq); put_ce: - gen8_ggtt_bind_put_ce(ce); + gen8_ggtt_bind_put_ce(ce, wakeref); return false; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index ba1186fc524f..a425db5ed3a2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -451,7 +451,7 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt) spin_lock_irqsave(&uncore->lock, flags); intel_uncore_posting_read_fw(uncore, - RING_HEAD(RENDER_RING_BASE)); + RING_TAIL(RENDER_RING_BASE)); spin_unlock_irqrestore(&uncore->lock, flags); } } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 970bedf6b78a..608f5c872928 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -82,6 +82,10 @@ struct drm_printer; ##__VA_ARGS__); \ } while (0) +#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \ + IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 55), IP_VER(12, 71)) && \ + engine->class == COPY_ENGINE_CLASS && engine->instance == 0) + static inline bool gt_is_root(struct intel_gt *gt) { return !gt->info.id; @@ -114,6 +118,11 @@ static inline struct intel_gt *gsc_to_gt(struct intel_gsc *gsc) return container_of(gsc, struct intel_gt, gsc); } +static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc) +{ + return guc_to_gt(guc)->i915; +} + void intel_gt_common_init_early(struct intel_gt *gt); int intel_root_gt_init_early(struct drm_i915_private *i915); int intel_gt_assign_ggtt(struct intel_gt *gt); @@ -167,6 +176,20 @@ void intel_gt_release_all(struct drm_i915_private *i915); (id__)++) \ for_each_if(((gt__) = (i915__)->gt[(id__)])) +/* Simple iterator over all initialised engines */ +#define for_each_engine(engine__, gt__, id__) \ + for ((id__) = 0; \ + (id__) < I915_NUM_ENGINES; \ + (id__)++) \ + for_each_if ((engine__) = (gt__)->engine[(id__)]) + +/* Iterator over subset of engines selected by mask */ +#define for_each_engine_masked(engine__, gt__, mask__, tmp__) \ + for ((tmp__) = (mask__) & (gt__)->info.engine_mask; \ + (tmp__) ? \ + ((engine__) = (gt__)->engine[__mask_next_bit(tmp__)]), 1 : \ + 0;) + void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c index 8f9b874fdc9c..3aa1d014c14d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c @@ -6,8 +6,8 @@ #include <drm/drm_print.h> -#include "i915_drv.h" /* for_each_engine! */ #include "intel_engine.h" +#include "intel_gt.h" #include "intel_gt_debugfs.h" #include "intel_gt_engines_debugfs.h" diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 34913912d8ae..e253750a51c5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -388,8 +388,7 @@ void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags) * registers. This wakeref will be released in the unlock * routine. * - * This is expected to become a formally documented/numbered - * workaround soon. + * Wa_22018931422 */ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_GT); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index f5899d503e23..220ac4f92edf 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -28,19 +28,20 @@ static void user_forcewake(struct intel_gt *gt, bool suspend) { int count = atomic_read(>->user_wakeref); + intel_wakeref_t wakeref; /* Inside suspend/resume so single threaded, no races to worry about. */ if (likely(!count)) return; - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); if (suspend) { GEM_BUG_ON(count > atomic_read(>->wakeref.count)); atomic_sub(count, >->wakeref.count); } else { atomic_add(count, >->wakeref.count); } - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); } static void runtime_begin(struct intel_gt *gt) @@ -138,7 +139,7 @@ void intel_gt_pm_init_early(struct intel_gt *gt) * runtime_pm is per-device rather than per-tile, so this is still the * correct structure. */ - intel_wakeref_init(>->wakeref, gt->i915, &wf_ops); + intel_wakeref_init(>->wakeref, gt->i915, &wf_ops, "GT"); seqcount_mutex_init(>->stats.lock, >->wakeref.mutex); } @@ -167,7 +168,7 @@ static void gt_sanitize(struct intel_gt *gt, bool force) enum intel_engine_id id; intel_wakeref_t wakeref; - GT_TRACE(gt, "force:%s", str_yes_no(force)); + GT_TRACE(gt, "force:%s\n", str_yes_no(force)); /* Use a raw wakeref to avoid calling intel_display_power_get early */ wakeref = intel_runtime_pm_get(gt->uncore->rpm); @@ -236,6 +237,7 @@ int intel_gt_resume(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err; err = intel_gt_has_unrecoverable_error(gt); @@ -252,7 +254,7 @@ int intel_gt_resume(struct intel_gt *gt) */ gt_sanitize(gt, true); - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); intel_rc6_sanitize(>->rc6); @@ -295,7 +297,7 @@ int intel_gt_resume(struct intel_gt *gt) out_fw: intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); intel_gt_bind_context_set_ready(gt); return err; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index b1eeb5b33918..911fd0160221 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -16,19 +16,28 @@ static inline bool intel_gt_pm_is_awake(const struct intel_gt *gt) return intel_wakeref_is_active(>->wakeref); } -static inline void intel_gt_pm_get(struct intel_gt *gt) +static inline void intel_gt_pm_get_untracked(struct intel_gt *gt) { intel_wakeref_get(>->wakeref); } +static inline intel_wakeref_t intel_gt_pm_get(struct intel_gt *gt) +{ + intel_gt_pm_get_untracked(gt); + return intel_wakeref_track(>->wakeref); +} + static inline void __intel_gt_pm_get(struct intel_gt *gt) { __intel_wakeref_get(>->wakeref); } -static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt) +static inline intel_wakeref_t intel_gt_pm_get_if_awake(struct intel_gt *gt) { - return intel_wakeref_get_if_active(>->wakeref); + if (!intel_wakeref_get_if_active(>->wakeref)) + return 0; + + return intel_wakeref_track(>->wakeref); } static inline void intel_gt_pm_might_get(struct intel_gt *gt) @@ -36,12 +45,18 @@ static inline void intel_gt_pm_might_get(struct intel_gt *gt) intel_wakeref_might_get(>->wakeref); } -static inline void intel_gt_pm_put(struct intel_gt *gt) +static inline void intel_gt_pm_put_untracked(struct intel_gt *gt) { intel_wakeref_put(>->wakeref); } -static inline void intel_gt_pm_put_async(struct intel_gt *gt) +static inline void intel_gt_pm_put(struct intel_gt *gt, intel_wakeref_t handle) +{ + intel_wakeref_untrack(>->wakeref, handle); + intel_gt_pm_put_untracked(gt); +} + +static inline void intel_gt_pm_put_async_untracked(struct intel_gt *gt) { intel_wakeref_put_async(>->wakeref); } @@ -51,9 +66,14 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt) intel_wakeref_might_put(>->wakeref); } -#define with_intel_gt_pm(gt, tmp) \ - for (tmp = 1, intel_gt_pm_get(gt); tmp; \ - intel_gt_pm_put(gt), tmp = 0) +static inline void intel_gt_pm_put_async(struct intel_gt *gt, intel_wakeref_t handle) +{ + intel_wakeref_untrack(>->wakeref, handle); + intel_gt_pm_put_async_untracked(gt); +} + +#define with_intel_gt_pm(gt, wf) \ + for (wf = intel_gt_pm_get(gt); wf; intel_gt_pm_put(gt, wf), wf = 0) /** * with_intel_gt_pm_if_awake - if GT is PM awake, get a reference to prevent @@ -64,7 +84,7 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt) * @wf: pointer to a temporary wakeref. */ #define with_intel_gt_pm_if_awake(gt, wf) \ - for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0) + for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt, wf), wf = 0) static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index f900cc68d6d9..7114c116e928 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -27,7 +27,7 @@ void intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt) { atomic_inc(>->user_wakeref); - intel_gt_pm_get(gt); + intel_gt_pm_get_untracked(gt); if (GRAPHICS_VER(gt->i915) >= 6) intel_uncore_forcewake_user_get(gt->uncore); } @@ -36,7 +36,7 @@ void intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt) { if (GRAPHICS_VER(gt->i915) >= 6) intel_uncore_forcewake_user_put(gt->uncore); - intel_gt_pm_put(gt); + intel_gt_pm_put_untracked(gt); atomic_dec(>->user_wakeref); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index eecd0a87a647..50962cfd1353 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -469,6 +469,9 @@ #define XEHP_PSS_MODE2 MCR_REG(0x703c) #define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5) +#define XEHP_PSS_CHICKEN MCR_REG(0x7044) +#define FD_END_COLLECT REG_BIT(5) + #define GEN7_SC_INSTDONE _MMIO(0x7100) #define GEN12_SC_INSTDONE_EXTRA _MMIO(0x7104) #define GEN12_SC_INSTDONE_EXTRA2 _MMIO(0x7108) @@ -537,6 +540,9 @@ #define XEHP_SQCM MCR_REG(0x8724) #define EN_32B_ACCESS REG_BIT(30) +#define MTL_GSCPSMI_BASEADDR_LSB _MMIO(0x880c) +#define MTL_GSCPSMI_BASEADDR_MSB _MMIO(0x8810) + #define HSW_IDICR _MMIO(0x9008) #define IDIHASHMSK(x) (((x) & 0x3f) << 16) diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 4fbed27ef0ec..86f73fe558ca 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -63,6 +63,9 @@ struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz) if (!IS_ERR(obj)) { obj->base.resv = i915_vm_resv_get(vm); obj->shares_resv_from = vm; + + if (vm->fpriv) + i915_drm_client_add_object(vm->fpriv->client, obj); } return obj; @@ -84,6 +87,9 @@ struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz) if (!IS_ERR(obj)) { obj->base.resv = i915_vm_resv_get(vm); obj->shares_resv_from = vm; + + if (vm->fpriv) + i915_drm_client_add_object(vm->fpriv->client, obj); } return obj; @@ -95,6 +101,16 @@ int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj) void *vaddr; type = intel_gt_coherent_map_type(vm->gt, obj, true); + /* + * FIXME: It is suspected that some Address Translation Service (ATS) + * issue on IOMMU is causing CAT errors to occur on some MTL workloads. + * Applying a write barrier to the ppgtt set entry functions appeared + * to have no effect, so we must temporarily use I915_MAP_WC here on + * MTL until a proper ATS solution is found. + */ + if (IS_METEORLAKE(vm->i915)) + type = I915_MAP_WC; + vaddr = i915_gem_object_pin_map_unlocked(obj, type); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -109,6 +125,16 @@ int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object void *vaddr; type = intel_gt_coherent_map_type(vm->gt, obj, true); + /* + * FIXME: It is suspected that some Address Translation Service (ATS) + * issue on IOMMU is causing CAT errors to occur on some MTL workloads. + * Applying a write barrier to the ppgtt set entry functions appeared + * to have no effect, so we must temporarily use I915_MAP_WC here on + * MTL until a proper ATS solution is found. + */ + if (IS_METEORLAKE(vm->i915)) + type = I915_MAP_WC; + vaddr = i915_gem_object_pin_map(obj, type); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index b471edac2699..6b85222ee3ea 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -249,8 +249,13 @@ struct i915_address_space { struct work_struct release_work; struct drm_mm mm; + struct { + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + } rsvd; struct intel_gt *gt; struct drm_i915_private *i915; + struct drm_i915_file_private *fpriv; struct device *dma; u64 total; /* size addr space maps (ex. 2GB for ggtt) */ u64 reserved; /* size addr space reserved */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index eaf66d903166..7c367ba8d9dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -829,6 +829,18 @@ lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) } static void +lrc_setup_bb_per_ctx(u32 *regs, + const struct intel_engine_cs *engine, + u32 ctx_bb_ggtt_addr) +{ + GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1); + regs[lrc_ring_wa_bb_per_ctx(engine) + 1] = + ctx_bb_ggtt_addr | + PER_CTX_BB_FORCE | + PER_CTX_BB_VALID; +} + +static void lrc_setup_indirect_ctx(u32 *regs, const struct intel_engine_cs *engine, u32 ctx_bb_ggtt_addr, @@ -1020,7 +1032,13 @@ static u32 context_wa_bb_offset(const struct intel_context *ce) return PAGE_SIZE * ce->wa_bb_page; } -static u32 *context_indirect_bb(const struct intel_context *ce) +/* + * per_ctx below determines which WABB section is used. + * When true, the function returns the location of the + * PER_CTX_BB. When false, the function returns the + * location of the INDIRECT_CTX. + */ +static u32 *context_wabb(const struct intel_context *ce, bool per_ctx) { void *ptr; @@ -1029,6 +1047,7 @@ static u32 *context_indirect_bb(const struct intel_context *ce) ptr = ce->lrc_reg_state; ptr -= LRC_STATE_OFFSET; /* back to start of context image */ ptr += context_wa_bb_offset(ce); + ptr += per_ctx ? PAGE_SIZE : 0; return ptr; } @@ -1105,7 +1124,8 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) if (GRAPHICS_VER(engine->i915) >= 12) { ce->wa_bb_page = context_size / PAGE_SIZE; - context_size += PAGE_SIZE; + /* INDIRECT_CTX and PER_CTX_BB need separate pages. */ + context_size += PAGE_SIZE * 2; } if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) { @@ -1407,12 +1427,85 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) return gen12_emit_aux_table_inv(ce->engine, cs); } +static u32 *xehp_emit_fastcolor_blt_wabb(const struct intel_context *ce, u32 *cs) +{ + struct intel_gt *gt = ce->engine->gt; + int mocs = gt->mocs.uc_index << 1; + + /** + * Wa_16018031267 / Wa_16018063123 requires that SW forces the + * main copy engine arbitration into round robin mode. We + * additionally need to submit the following WABB blt command + * to produce 4 subblits with each subblit generating 0 byte + * write requests as WABB: + * + * XY_FASTCOLOR_BLT + * BG0 -> 5100000E + * BG1 -> 0000003F (Dest pitch) + * BG2 -> 00000000 (X1, Y1) = (0, 0) + * BG3 -> 00040001 (X2, Y2) = (1, 4) + * BG4 -> scratch + * BG5 -> scratch + * BG6-12 -> 00000000 + * BG13 -> 20004004 (Surf. Width= 2,Surf. Height = 5 ) + * BG14 -> 00000010 (Qpitch = 4) + * BG15 -> 00000000 + */ + *cs++ = XY_FAST_COLOR_BLT_CMD | (16 - 2); + *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | 0x3f; + *cs++ = 0; + *cs++ = 4 << 16 | 1; + *cs++ = lower_32_bits(i915_vma_offset(ce->vm->rsvd.vma)); + *cs++ = upper_32_bits(i915_vma_offset(ce->vm->rsvd.vma)); + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0x20004004; + *cs++ = 0x10; + *cs++ = 0; + + return cs; +} + +static u32 * +xehp_emit_per_ctx_bb(const struct intel_context *ce, u32 *cs) +{ + /* Wa_16018031267, Wa_16018063123 */ + if (NEEDS_FASTCOLOR_BLT_WABB(ce->engine)) + cs = xehp_emit_fastcolor_blt_wabb(ce, cs); + + return cs; +} + +static void +setup_per_ctx_bb(const struct intel_context *ce, + const struct intel_engine_cs *engine, + u32 *(*emit)(const struct intel_context *, u32 *)) +{ + /* Place PER_CTX_BB on next page after INDIRECT_CTX */ + u32 * const start = context_wabb(ce, true); + u32 *cs; + + cs = emit(ce, start); + + /* PER_CTX_BB must manually terminate */ + *cs++ = MI_BATCH_BUFFER_END; + + GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs)); + lrc_setup_bb_per_ctx(ce->lrc_reg_state, engine, + lrc_indirect_bb(ce) + PAGE_SIZE); +} + static void setup_indirect_ctx_bb(const struct intel_context *ce, const struct intel_engine_cs *engine, u32 *(*emit)(const struct intel_context *, u32 *)) { - u32 * const start = context_indirect_bb(ce); + u32 * const start = context_wabb(ce, false); u32 *cs; cs = emit(ce, start); @@ -1511,6 +1604,7 @@ u32 lrc_update_regs(const struct intel_context *ce, /* Mutually exclusive wrt to global indirect bb */ GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size); setup_indirect_ctx_bb(ce, engine, fn); + setup_per_ctx_bb(ce, engine, xehp_emit_per_ctx_bb); } return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE; diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index f602895f6d0d..6a3246240e81 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -849,13 +849,12 @@ void intel_sseu_print_topology(struct drm_i915_private *i915, const struct sseu_dev_info *sseu, struct drm_printer *p) { - if (sseu->max_slices == 0) { + if (sseu->max_slices == 0) drm_printf(p, "Unavailable\n"); - } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) sseu_print_xehp_topology(sseu, p); - } else { + else sseu_print_hsw_topology(sseu, p); - } } void intel_sseu_print_ss_info(const char *type, diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 192ac0e59afa..3eacbc50caf8 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -777,6 +777,9 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_18019271663:dg2 */ wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE); + + /* Wa_14019877138:dg2 */ + wa_mcr_masked_en(wal, XEHP_PSS_CHICKEN, FD_END_COLLECT); } static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine, @@ -1663,8 +1666,22 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) } static void +wa_16021867713(struct intel_gt *gt, struct i915_wa_list *wal) +{ + struct intel_engine_cs *engine; + int id; + + for_each_engine(engine, gt, id) + if (engine->class == VIDEO_DECODE_CLASS) + wa_write_or(wal, VDBOX_CGCTL3F1C(engine->mmio_base), + MFXPIPE_CLKGATE_DIS); +} + +static void xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { + wa_16021867713(gt, wal); + /* * Wa_14018778641 * Wa_18018781329 @@ -1674,6 +1691,9 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) */ wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB); + /* Wa_22016670082 */ + wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE); + debug_dump_steering(gt); } @@ -2340,14 +2360,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) 0, true); } - if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) { - /* Wa_22014600077:dg2 */ - wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH), - 0 /* Wa_14012342262 write-only reg, so skip verification */, - true); - } - if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* @@ -2782,6 +2794,11 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) RING_SEMA_WAIT_POLL(engine->mmio_base), 1); } + /* Wa_16018031267, Wa_16018063123 */ + if (NEEDS_FASTCOLOR_BLT_WABB(engine)) + wa_masked_field_set(wal, ECOSKPD(engine->mmio_base), + XEHP_BLITTER_SCHEDULING_MODE_MASK, + XEHP_BLITTER_ROUND_ROBIN_MODE); } static void @@ -2915,6 +2932,9 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li * Wa_22015475538:dg2 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); + + /* Wa_18028616096 */ + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3); } if (IS_DG2_G11(i915)) { @@ -2943,11 +2963,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li true); } - if (IS_DG2_G10(i915) || IS_DG2_G12(i915)) { - /* Wa_18028616096 */ - wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3); - } - if (IS_XEHPSDV(i915)) { /* Wa_1409954639 */ wa_mcr_masked_en(wal, diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index 86cecf7a1105..5ffa5e30f419 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -21,20 +21,22 @@ static int cmp_u32(const void *A, const void *B) return *a - *b; } -static void perf_begin(struct intel_gt *gt) +static intel_wakeref_t perf_begin(struct intel_gt *gt) { - intel_gt_pm_get(gt); + intel_wakeref_t wakeref = intel_gt_pm_get(gt); /* Boost gpufreq to max [waitboost] and keep it fixed */ atomic_inc(>->rps.num_waiters); queue_work(gt->i915->unordered_wq, >->rps.work); flush_work(>->rps.work); + + return wakeref; } -static int perf_end(struct intel_gt *gt) +static int perf_end(struct intel_gt *gt, intel_wakeref_t wakeref) { atomic_dec(>->rps.num_waiters); - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); return igt_flush_test(gt->i915); } @@ -133,12 +135,13 @@ static int perf_mi_bb_start(void *arg) struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err = 0; if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ return 0; - perf_begin(gt); + wakeref = perf_begin(gt); for_each_engine(engine, gt, id) { struct intel_context *ce = engine->kernel_context; struct i915_vma *batch; @@ -207,7 +210,7 @@ out: pr_info("%s: MI_BB_START cycles: %u\n", engine->name, trifilter(cycles)); } - if (perf_end(gt)) + if (perf_end(gt, wakeref)) err = -EIO; return err; @@ -260,12 +263,13 @@ static int perf_mi_noop(void *arg) struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err = 0; if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ return 0; - perf_begin(gt); + wakeref = perf_begin(gt); for_each_engine(engine, gt, id) { struct intel_context *ce = engine->kernel_context; struct i915_vma *base, *nop; @@ -364,7 +368,7 @@ out: pr_info("%s: 16K MI_NOOP cycles: %u\n", engine->name, trifilter(cycles)); } - if (perf_end(gt)) + if (perf_end(gt, wakeref)) err = -EIO; return err; diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 273d440a53e3..bc441ce7b380 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -84,7 +84,7 @@ static struct pulse *pulse_create(void) static void pulse_unlock_wait(struct pulse *p) { - i915_active_unlock_wait(&p->active); + wait_var_event_timeout(&p->active, i915_active_is_idle(&p->active), HZ); } static int __live_idle_pulse(struct intel_engine_cs *engine, diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c index 0971241707ce..33351deeea4f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -81,6 +81,7 @@ static int live_gt_clocks(void *arg) struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err = 0; if (!gt->clock_frequency) { /* unknown */ @@ -91,7 +92,7 @@ static int live_gt_clocks(void *arg) if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ return 0; - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); for_each_engine(engine, gt, id) { @@ -128,7 +129,7 @@ static int live_gt_clocks(void *arg) } intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 5f826b6dcf5d..e17b8777d21d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1555,7 +1555,7 @@ static int live_lrc_isolation(void *arg) return err; } -static int indirect_ctx_submit_req(struct intel_context *ce) +static int wabb_ctx_submit_req(struct intel_context *ce) { struct i915_request *rq; int err = 0; @@ -1579,7 +1579,8 @@ static int indirect_ctx_submit_req(struct intel_context *ce) #define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32)) static u32 * -emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) +emit_wabb_ctx_canary(const struct intel_context *ce, + u32 *cs, bool per_ctx) { *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT | @@ -1587,26 +1588,43 @@ emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) *cs++ = i915_mmio_reg_offset(RING_START(0)); *cs++ = i915_ggtt_offset(ce->state) + context_wa_bb_offset(ce) + - CTX_BB_CANARY_OFFSET; + CTX_BB_CANARY_OFFSET + + (per_ctx ? PAGE_SIZE : 0); *cs++ = 0; return cs; } +static u32 * +emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) +{ + return emit_wabb_ctx_canary(ce, cs, false); +} + +static u32 * +emit_per_ctx_bb_canary(const struct intel_context *ce, u32 *cs) +{ + return emit_wabb_ctx_canary(ce, cs, true); +} + static void -indirect_ctx_bb_setup(struct intel_context *ce) +wabb_ctx_setup(struct intel_context *ce, bool per_ctx) { - u32 *cs = context_indirect_bb(ce); + u32 *cs = context_wabb(ce, per_ctx); cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d; - setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary); + if (per_ctx) + setup_per_ctx_bb(ce, ce->engine, emit_per_ctx_bb_canary); + else + setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary); } -static bool check_ring_start(struct intel_context *ce) +static bool check_ring_start(struct intel_context *ce, bool per_ctx) { const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) - - LRC_STATE_OFFSET + context_wa_bb_offset(ce); + LRC_STATE_OFFSET + context_wa_bb_offset(ce) + + (per_ctx ? PAGE_SIZE : 0); if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START]) return true; @@ -1618,21 +1636,21 @@ static bool check_ring_start(struct intel_context *ce) return false; } -static int indirect_ctx_bb_check(struct intel_context *ce) +static int wabb_ctx_check(struct intel_context *ce, bool per_ctx) { int err; - err = indirect_ctx_submit_req(ce); + err = wabb_ctx_submit_req(ce); if (err) return err; - if (!check_ring_start(ce)) + if (!check_ring_start(ce, per_ctx)) return -EINVAL; return 0; } -static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine) +static int __lrc_wabb_ctx(struct intel_engine_cs *engine, bool per_ctx) { struct intel_context *a, *b; int err; @@ -1667,14 +1685,14 @@ static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine) * As ring start is restored apriori of starting the indirect ctx bb and * as it will be different for each context, it fits to this purpose. */ - indirect_ctx_bb_setup(a); - indirect_ctx_bb_setup(b); + wabb_ctx_setup(a, per_ctx); + wabb_ctx_setup(b, per_ctx); - err = indirect_ctx_bb_check(a); + err = wabb_ctx_check(a, per_ctx); if (err) goto unpin_b; - err = indirect_ctx_bb_check(b); + err = wabb_ctx_check(b, per_ctx); unpin_b: intel_context_unpin(b); @@ -1688,7 +1706,7 @@ put_a: return err; } -static int live_lrc_indirect_ctx_bb(void *arg) +static int lrc_wabb_ctx(void *arg, bool per_ctx) { struct intel_gt *gt = arg; struct intel_engine_cs *engine; @@ -1697,7 +1715,7 @@ static int live_lrc_indirect_ctx_bb(void *arg) for_each_engine(engine, gt, id) { intel_engine_pm_get(engine); - err = __live_lrc_indirect_ctx_bb(engine); + err = __lrc_wabb_ctx(engine, per_ctx); intel_engine_pm_put(engine); if (igt_flush_test(gt->i915)) @@ -1710,6 +1728,16 @@ static int live_lrc_indirect_ctx_bb(void *arg) return err; } +static int live_lrc_indirect_ctx_bb(void *arg) +{ + return lrc_wabb_ctx(arg, false); +} + +static int live_lrc_per_ctx_bb(void *arg) +{ + return lrc_wabb_ctx(arg, true); +} + static void garbage_reset(struct intel_engine_cs *engine, struct i915_request *rq) { @@ -1947,6 +1975,7 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915) SUBTEST(live_lrc_garbage), SUBTEST(live_pphwsp_runtime), SUBTEST(live_lrc_indirect_ctx_bb), + SUBTEST(live_lrc_per_ctx_bb), }; if (!HAS_LOGICAL_RING_CONTEXTS(i915)) diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 79aa6ac66ad2..f40de408cd3a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -261,11 +261,12 @@ static int igt_atomic_reset(void *arg) { struct intel_gt *gt = arg; const typeof(*igt_atomic_phases) *p; + intel_wakeref_t wakeref; int err = 0; /* Check that the resets are usable from atomic context */ - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); igt_global_reset_lock(gt); /* Flush any requests before we get started and check basics */ @@ -296,7 +297,7 @@ static int igt_atomic_reset(void *arg) unlock: igt_global_reset_unlock(gt); - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); return err; } @@ -307,6 +308,7 @@ static int igt_atomic_engine_reset(void *arg) const typeof(*igt_atomic_phases) *p; struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err = 0; /* Check that the resets are usable from atomic context */ @@ -317,7 +319,7 @@ static int igt_atomic_engine_reset(void *arg) if (intel_uc_uses_guc_submission(>->uc)) return 0; - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); igt_global_reset_lock(gt); /* Flush any requests before we get started and check basics */ @@ -365,7 +367,7 @@ static int igt_atomic_engine_reset(void *arg) out_unlock: igt_global_reset_unlock(gt); - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index fb30f733b036..dcef8d498919 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -224,6 +224,7 @@ int live_rps_clock_interval(void *arg) struct intel_engine_cs *engine; enum intel_engine_id id; struct igt_spinner spin; + intel_wakeref_t wakeref; int err = 0; if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) @@ -236,7 +237,7 @@ int live_rps_clock_interval(void *arg) saved_work = rps->work.func; rps->work.func = dummy_rps_work; - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); intel_rps_disable(>->rps); intel_gt_check_clock_frequency(gt); @@ -355,7 +356,7 @@ int live_rps_clock_interval(void *arg) } intel_rps_enable(>->rps); - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); igt_spinner_fini(&spin); @@ -376,6 +377,7 @@ int live_rps_control(void *arg) struct intel_engine_cs *engine; enum intel_engine_id id; struct igt_spinner spin; + intel_wakeref_t wakeref; int err = 0; /* @@ -398,7 +400,7 @@ int live_rps_control(void *arg) saved_work = rps->work.func; rps->work.func = dummy_rps_work; - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); for_each_engine(engine, gt, id) { struct i915_request *rq; ktime_t min_dt, max_dt; @@ -488,7 +490,7 @@ int live_rps_control(void *arg) break; } } - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); igt_spinner_fini(&spin); @@ -1023,6 +1025,7 @@ int live_rps_interrupt(void *arg) struct intel_engine_cs *engine; enum intel_engine_id id; struct igt_spinner spin; + intel_wakeref_t wakeref; u32 pm_events; int err = 0; @@ -1033,9 +1036,9 @@ int live_rps_interrupt(void *arg) if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6) return 0; - intel_gt_pm_get(gt); - pm_events = rps->pm_events; - intel_gt_pm_put(gt); + pm_events = 0; + with_intel_gt_pm(gt, wakeref) + pm_events = rps->pm_events; if (!pm_events) { pr_err("No RPS PM events registered, but RPS is enabled?\n"); return -ENODEV; diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index 952c8d52d68a..302d0540295d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -266,6 +266,7 @@ static int run_test(struct intel_gt *gt, int test_type) struct intel_rps *rps = >->rps; struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; struct igt_spinner spin; u32 slpc_min_freq, slpc_max_freq; int err = 0; @@ -311,7 +312,7 @@ static int run_test(struct intel_gt *gt, int test_type) } intel_gt_pm_wait_for_idle(gt); - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); for_each_engine(engine, gt, id) { struct i915_request *rq; u32 max_act_freq; @@ -397,7 +398,7 @@ static int run_test(struct intel_gt *gt, int test_type) if (igt_flush_test(gt->i915)) err = -EIO; - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); igt_spinner_fini(&spin); intel_gt_pm_wait_for_idle(gt); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c index 5f138de3c14f..40817ebcca71 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c @@ -322,6 +322,7 @@ static int i915_gsc_proxy_component_bind(struct device *i915_kdev, gsc->proxy.component = data; gsc->proxy.component->mei_dev = mei_kdev; mutex_unlock(&gsc->proxy.mutex); + gt_dbg(gt, "GSC proxy mei component bound\n"); return 0; } @@ -342,6 +343,7 @@ static void i915_gsc_proxy_component_unbind(struct device *i915_kdev, with_intel_runtime_pm(&i915->runtime_pm, wakeref) intel_uncore_rmw(gt->uncore, HECI_H_CSR(MTL_GSC_HECI2_BASE), HECI_H_CSR_IE | HECI_H_CSR_RST, 0); + gt_dbg(gt, "GSC proxy mei component unbound\n"); } static const struct component_ops i915_gsc_proxy_component_ops = { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 3f3df1166b86..2b450c43bbd7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -330,7 +330,7 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) static u32 guc_ctl_devid(struct intel_guc *guc) { - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); return (INTEL_DEVID(i915) << 16) | INTEL_REVID(i915); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 2b6dfe62c8f2..e22c12ce245a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -297,6 +297,10 @@ struct intel_guc { * @number_guc_id_stolen: The number of guc_ids that have been stolen */ int number_guc_id_stolen; + /** + * @fast_response_selftest: Backdoor to CT handler for fast response selftest + */ + u32 fast_response_selftest; #endif }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index a4da0208c883..a1cd40d80517 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -355,7 +355,7 @@ guc_capture_alloc_steered_lists(struct intel_guc *guc, static const struct __guc_mmio_reg_descr_group * guc_capture_get_device_reglist(struct intel_guc *guc) { - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); const struct __guc_mmio_reg_descr_group *lists; if (GRAPHICS_VER(i915) >= 12) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 89e314b3756b..0d5197c0824a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -265,7 +265,7 @@ int intel_guc_ct_init(struct intel_guc_ct *ct) u32 *cmds; int err; - err = i915_inject_probe_error(guc_to_gt(guc)->i915, -ENXIO); + err = i915_inject_probe_error(guc_to_i915(guc), -ENXIO); if (err) return err; @@ -1076,6 +1076,15 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r found = true; break; } + +#ifdef CONFIG_DRM_I915_SELFTEST + if (!found && ct_to_guc(ct)->fast_response_selftest) { + CT_DEBUG(ct, "Assuming unsolicited response due to FAST_REQUEST selftest\n"); + ct_to_guc(ct)->fast_response_selftest++; + found = true; + } +#endif + if (!found) { CT_ERROR(ct, "Unsolicited response message: len %u, data %#x (fence %u, last %u)\n", len, hxg[0], fence, ct->requests.last_fence); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 55bc8b55fbc0..bf16351c9349 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -520,7 +520,7 @@ void intel_guc_log_init_early(struct intel_guc_log *log) static int guc_log_relay_create(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); struct rchan *guc_log_relay_chan; size_t n_subbufs, subbuf_size; int ret; @@ -573,7 +573,7 @@ static void guc_log_relay_destroy(struct intel_guc_log *log) static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); intel_wakeref_t wakeref; _guc_log_copy_debuglogs_for_relay(log); @@ -589,7 +589,7 @@ static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log) static u32 __get_default_log_level(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); /* A negative value means "use platform/config default" */ if (i915->params.guc_log_level < 0) { @@ -664,7 +664,7 @@ void intel_guc_log_destroy(struct intel_guc_log *log) int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); intel_wakeref_t wakeref; int ret = 0; @@ -796,7 +796,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log) static void guc_log_relay_stop(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); if (!log->relay.started) return; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c index 1adec6de223c..9df7927304ae 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c @@ -14,7 +14,7 @@ static bool __guc_rc_supported(struct intel_guc *guc) { /* GuC RC is unavailable for pre-Gen12 */ return guc->submission_supported && - GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12; + GRAPHICS_VER(guc_to_i915(guc)) >= 12; } static bool __guc_rc_selected(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 2dfb07cc4b33..3e681ab6fbf9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -34,7 +34,7 @@ static bool __detect_slpc_supported(struct intel_guc *guc) { /* GuC SLPC is unavailable for pre-Gen12 */ return guc->submission_supported && - GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12; + GRAPHICS_VER(guc_to_i915(guc)) >= 12; } static bool __guc_slpc_selected(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 17df71117cc7..a259f1118c5a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1107,7 +1107,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) if (deregister) guc_signal_context_fence(ce); if (destroyed) { - intel_gt_pm_put_async(guc_to_gt(guc)); + intel_gt_pm_put_async_untracked(guc_to_gt(guc)); release_guc_id(guc, ce); __guc_context_destroy(ce); } @@ -1303,6 +1303,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) unsigned long flags; u32 reset_count; bool in_reset; + intel_wakeref_t wakeref; spin_lock_irqsave(&guc->timestamp.lock, flags); @@ -1325,7 +1326,8 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) * start_gt_clk is derived from GuC state. To get a consistent * view of activity, we query the GuC state only if gt is awake. */ - if (!in_reset && intel_gt_pm_get_if_awake(gt)) { + wakeref = in_reset ? 0 : intel_gt_pm_get_if_awake(gt); + if (wakeref) { stats_saved = *stats; gt_stamp_saved = guc->timestamp.gt_stamp; /* @@ -1334,7 +1336,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) */ guc_update_engine_gt_clks(engine); guc_update_pm_timestamp(guc, now); - intel_gt_pm_put_async(gt); + intel_gt_pm_put_async(gt, wakeref); if (i915_reset_count(gpu_error) != reset_count) { *stats = stats_saved; guc->timestamp.gt_stamp = gt_stamp_saved; @@ -3385,9 +3387,9 @@ static void destroyed_worker_func(struct work_struct *w) struct intel_guc *guc = container_of(w, struct intel_guc, submission_state.destroyed_worker); struct intel_gt *gt = guc_to_gt(guc); - int tmp; + intel_wakeref_t wakeref; - with_intel_gt_pm(gt, tmp) + with_intel_gt_pm(gt, wakeref) deregister_destroyed_contexts(guc); } @@ -4624,12 +4626,12 @@ static bool __guc_submission_supported(struct intel_guc *guc) { /* GuC submission is unavailable for pre-Gen11 */ return intel_guc_is_supported(guc) && - GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; + GRAPHICS_VER(guc_to_i915(guc)) >= 11; } static bool __guc_submission_selected(struct intel_guc *guc) { - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); if (!intel_guc_submission_is_supported(guc)) return false; @@ -4894,7 +4896,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, intel_context_put(ce); } else if (context_destroyed(ce)) { /* Context has been destroyed */ - intel_gt_pm_put_async(guc_to_gt(guc)); + intel_gt_pm_put_async_untracked(guc_to_gt(guc)); release_guc_id(guc, ce); __guc_context_destroy(ce); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 27f6561dd731..3872d309ed31 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -106,11 +106,6 @@ static void __confirm_options(struct intel_uc *uc) gt_info(gt, "Incompatible option enable_guc=%d - %s\n", i915->params.enable_guc, "GuC is not supported!"); - if (i915->params.enable_guc & ENABLE_GUC_LOAD_HUC && - !intel_uc_supports_huc(uc)) - gt_info(gt, "Incompatible option enable_guc=%d - %s\n", - i915->params.enable_guc, "HuC is not supported!"); - if (i915->params.enable_guc & ENABLE_GUC_SUBMISSION && !intel_uc_supports_guc_submission(uc)) gt_info(gt, "Incompatible option enable_guc=%d - %s\n", diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 362639162ed6..756093eaf2ad 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -1343,16 +1343,13 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len) for_each_sgt_page(page, iter, uc_fw->obj->mm.pages) { u32 len = min_t(u32, size, PAGE_SIZE - offset); - void *vaddr; if (idx > 0) { idx--; continue; } - vaddr = kmap_atomic(page); - memcpy(dst, vaddr + offset, len); - kunmap_atomic(vaddr); + memcpy_from_page(dst, page, offset, len); offset = 0; dst += len; diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c index bfb72143566f..c900aac85adb 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -286,11 +286,126 @@ err_wakeref: return ret; } +/* + * Send a context schedule H2G message with an invalid context id. + * This should generate a GUC_RESULT_INVALID_CONTEXT response. + */ +static int bad_h2g(struct intel_guc *guc) +{ + u32 action[] = { + INTEL_GUC_ACTION_SCHED_CONTEXT, + 0x12345678, + }; + + return intel_guc_send_nb(guc, action, ARRAY_SIZE(action), 0); +} + +/* + * Set a spinner running to make sure the system is alive and active, + * then send a bad but asynchronous H2G command and wait to see if an + * error response is returned. If no response is received or if the + * spinner dies then the test will fail. + */ +#define FAST_RESPONSE_TIMEOUT_MS 1000 +static int intel_guc_fast_request(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_context *ce; + struct igt_spinner spin; + struct i915_request *rq; + intel_wakeref_t wakeref; + struct intel_engine_cs *engine = intel_selftest_find_any_engine(gt); + bool spinning = false; + int ret = 0; + + if (!engine) + return 0; + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); + gt_err(gt, "Failed to create spinner request: %pe\n", ce); + goto err_pm; + } + + ret = igt_spinner_init(&spin, engine->gt); + if (ret) { + gt_err(gt, "Failed to create spinner: %pe\n", ERR_PTR(ret)); + goto err_pm; + } + spinning = true; + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + intel_context_put(ce); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + gt_err(gt, "Failed to create spinner request: %pe\n", rq); + goto err_spin; + } + + ret = request_add_spin(rq, &spin); + if (ret) { + gt_err(gt, "Failed to add Spinner request: %pe\n", ERR_PTR(ret)); + goto err_rq; + } + + gt->uc.guc.fast_response_selftest = 1; + + ret = bad_h2g(>->uc.guc); + if (ret) { + gt_err(gt, "Failed to send H2G: %pe\n", ERR_PTR(ret)); + goto err_rq; + } + + ret = wait_for(gt->uc.guc.fast_response_selftest != 1 || i915_request_completed(rq), + FAST_RESPONSE_TIMEOUT_MS); + if (ret) { + gt_err(gt, "Request wait failed: %pe\n", ERR_PTR(ret)); + goto err_rq; + } + + if (i915_request_completed(rq)) { + gt_err(gt, "Spinner died waiting for fast request error!\n"); + ret = -EIO; + goto err_rq; + } + + if (gt->uc.guc.fast_response_selftest != 2) { + gt_err(gt, "Unexpected fast response count: %d\n", + gt->uc.guc.fast_response_selftest); + goto err_rq; + } + + igt_spinner_end(&spin); + spinning = false; + + ret = intel_selftest_wait_for_rq(rq); + if (ret) { + gt_err(gt, "Request failed to complete: %pe\n", ERR_PTR(ret)); + goto err_rq; + } + +err_rq: + i915_request_put(rq); + +err_spin: + if (spinning) + igt_spinner_end(&spin); + igt_spinner_fini(&spin); + +err_pm: + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + return ret; +} + int intel_guc_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(intel_guc_scrub_ctbs), SUBTEST(intel_guc_steal_guc_ids), + SUBTEST(intel_guc_fast_request), }; struct intel_gt *gt = to_gt(i915); diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c index 34b5d952e2bc..26fdc392fce6 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c @@ -74,7 +74,7 @@ static int intel_hang_guc(void *arg) goto err; } - rq = igt_spinner_create_request(&spin, ce, MI_NOOP); + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); intel_context_put(ce); if (IS_ERR(rq)) { ret = PTR_ERR(rq); diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 05f9348b7a9d..d4a3f3e093b0 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -3047,7 +3047,7 @@ put_obj: static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) { - u32 per_ctx_start[CACHELINE_DWORDS] = {0}; + u32 per_ctx_start[CACHELINE_DWORDS] = {}; unsigned char *bb_start_sva; if (!wa_ctx->per_ctx.valid) diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c index 835c3fde8a20..313efdabee57 100644 --- a/drivers/gpu/drm/i915/gvt/fb_decoder.c +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c @@ -56,7 +56,7 @@ static const struct pixel_format bdw_pixel_formats[] = { {DRM_FORMAT_XBGR8888, 32, "32-bit RGBX (8:8:8:8 MSB-X:B:G:R)"}, /* non-supported format has bpp default to 0 */ - {0, 0, NULL}, + {} }; static const struct pixel_format skl_pixel_formats[] = { @@ -76,7 +76,7 @@ static const struct pixel_format skl_pixel_formats[] = { {DRM_FORMAT_XRGB2101010, 32, "32-bit BGRX (2:10:10:10 MSB-X:R:G:B)"}, /* non-supported format has bpp default to 0 */ - {0, 0, NULL}, + {} }; static int bdw_format_to_drm(int format) @@ -293,7 +293,7 @@ static const struct cursor_mode_format cursor_pixel_formats[] = { {DRM_FORMAT_ARGB8888, 32, 64, 64, "64x64 32bpp ARGB"}, /* non-supported format has bpp default to 0 */ - {0, 0, 0, 0, NULL}, + {} }; static int cursor_mode_to_drm(int mode) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index a9f7fa9b90bd..90f6c1ece57d 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -538,7 +538,7 @@ static u32 bxt_vgpu_get_dp_bitrate(struct intel_vgpu *vgpu, enum port port) int refclk = vgpu->gvt->gt->i915->display.dpll.ref_clks.nssc; enum dpio_phy phy = DPIO_PHY0; enum dpio_channel ch = DPIO_CH0; - struct dpll clock = {0}; + struct dpll clock = {}; u32 temp; /* Port to PHY mapping is fixed, see bxt_ddi_phy_info{} */ @@ -2576,7 +2576,6 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt) static int init_skl_mmio_info(struct intel_gvt *gvt) { - struct drm_i915_private *dev_priv = gvt->gt->i915; int ret; MMIO_DH(FORCEWAKE_RENDER_GEN9, D_SKL_PLUS, NULL, mul_force_wake_write); diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index ddf49c2dbb91..2905df83e180 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1211,11 +1211,11 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, for (n = offset >> PAGE_SHIFT; remain; n++) { int len = min(remain, PAGE_SIZE - x); - src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); + src = kmap_local_page(i915_gem_object_get_page(src_obj, n)); if (src_needs_clflush) drm_clflush_virt_range(src + x, len); memcpy(ptr, src + x, len); - kunmap_atomic(src); + kunmap_local(src); ptr += len; remain -= len; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e9b79c2c37d8..db99c2ef66db 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -32,6 +32,8 @@ #include <drm/drm_debugfs.h> +#include "display/intel_display_params.h" + #include "gem/i915_gem_context.h" #include "gt/intel_gt.h" #include "gt/intel_gt_buffer_pool.h" @@ -49,6 +51,7 @@ #include "i915_debugfs.h" #include "i915_debugfs_params.h" #include "i915_driver.h" +#include "i915_gpu_error.h" #include "i915_irq.h" #include "i915_reg.h" #include "i915_scheduler.h" @@ -67,13 +70,13 @@ static int i915_capabilities(struct seq_file *m, void *data) seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(i915)); intel_device_info_print(INTEL_INFO(i915), RUNTIME_INFO(i915), &p); - intel_display_device_info_print(DISPLAY_INFO(i915), DISPLAY_RUNTIME_INFO(i915), &p); i915_print_iommu_status(i915, &p); intel_gt_info_print(&to_gt(i915)->info, &p); intel_driver_caps_print(&i915->caps, &p); kernel_param_lock(THIS_MODULE); i915_params_dump(&i915->params, &p); + intel_display_params_dump(i915, &p); kernel_param_unlock(THIS_MODULE); return 0; @@ -297,107 +300,6 @@ static int i915_gem_object_info(struct seq_file *m, void *data) return 0; } -#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) -static ssize_t gpu_state_read(struct file *file, char __user *ubuf, - size_t count, loff_t *pos) -{ - struct i915_gpu_coredump *error; - ssize_t ret; - void *buf; - - error = file->private_data; - if (!error) - return 0; - - /* Bounce buffer required because of kernfs __user API convenience. */ - buf = kmalloc(count, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - ret = i915_gpu_coredump_copy_to_buffer(error, buf, *pos, count); - if (ret <= 0) - goto out; - - if (!copy_to_user(ubuf, buf, ret)) - *pos += ret; - else - ret = -EFAULT; - -out: - kfree(buf); - return ret; -} - -static int gpu_state_release(struct inode *inode, struct file *file) -{ - i915_gpu_coredump_put(file->private_data); - return 0; -} - -static int i915_gpu_info_open(struct inode *inode, struct file *file) -{ - struct drm_i915_private *i915 = inode->i_private; - struct i915_gpu_coredump *gpu; - intel_wakeref_t wakeref; - - gpu = NULL; - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES, CORE_DUMP_FLAG_NONE); - - if (IS_ERR(gpu)) - return PTR_ERR(gpu); - - file->private_data = gpu; - return 0; -} - -static const struct file_operations i915_gpu_info_fops = { - .owner = THIS_MODULE, - .open = i915_gpu_info_open, - .read = gpu_state_read, - .llseek = default_llseek, - .release = gpu_state_release, -}; - -static ssize_t -i915_error_state_write(struct file *filp, - const char __user *ubuf, - size_t cnt, - loff_t *ppos) -{ - struct i915_gpu_coredump *error = filp->private_data; - - if (!error) - return 0; - - drm_dbg(&error->i915->drm, "Resetting error state\n"); - i915_reset_error_state(error->i915); - - return cnt; -} - -static int i915_error_state_open(struct inode *inode, struct file *file) -{ - struct i915_gpu_coredump *error; - - error = i915_first_error_state(inode->i_private); - if (IS_ERR(error)) - return PTR_ERR(error); - - file->private_data = error; - return 0; -} - -static const struct file_operations i915_error_state_fops = { - .owner = THIS_MODULE, - .open = i915_error_state_open, - .read = gpu_state_read, - .write = i915_error_state_write, - .llseek = default_llseek, - .release = gpu_state_release, -}; -#endif - static int i915_frequency_info(struct seq_file *m, void *unused) { struct drm_i915_private *i915 = node_to_i915(m->private); @@ -837,10 +739,6 @@ static const struct i915_debugfs_files { {"i915_perf_noa_delay", &i915_perf_noa_delay_fops}, {"i915_wedged", &i915_wedged_fops}, {"i915_gem_drop_caches", &i915_drop_caches_fops}, -#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) - {"i915_error_state", &i915_error_state_fops}, - {"i915_gpu_info", &i915_gpu_info_fops}, -#endif }; void i915_debugfs_register(struct drm_i915_private *dev_priv) @@ -863,4 +761,6 @@ void i915_debugfs_register(struct drm_i915_private *dev_priv) drm_debugfs_create_files(i915_debugfs_list, ARRAY_SIZE(i915_debugfs_list), minor->debugfs_root, minor); + + i915_gpu_error_debugfs_register(dev_priv); } diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 802de2c6decb..c7d7c3b7ecc6 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -231,16 +231,10 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) spin_lock_init(&dev_priv->irq_lock); spin_lock_init(&dev_priv->gpu_error.lock); - mutex_init(&dev_priv->display.backlight.lock); mutex_init(&dev_priv->sb_lock); cpu_latency_qos_add_request(&dev_priv->sb_qos, PM_QOS_DEFAULT_VALUE); - mutex_init(&dev_priv->display.audio.mutex); - mutex_init(&dev_priv->display.wm.wm_mutex); - mutex_init(&dev_priv->display.pps.mutex); - mutex_init(&dev_priv->display.hdcp.hdcp_mutex); - i915_memcpy_init_early(dev_priv); intel_runtime_pm_init_early(&dev_priv->runtime_pm); @@ -804,7 +798,9 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto out_cleanup_modeset2; - intel_pxp_init(i915); + ret = intel_pxp_init(i915); + if (ret != -ENODEV) + drm_dbg(&i915->drm, "pxp init failed with %d\n", ret); ret = intel_display_driver_probe(i915); if (ret) @@ -907,6 +903,8 @@ static void i915_driver_release(struct drm_device *dev) intel_runtime_pm_driver_release(rpm); i915_driver_late_release(dev_priv); + + intel_display_device_remove(dev_priv); } static int i915_driver_open(struct drm_device *dev, struct drm_file *file) @@ -1037,7 +1035,7 @@ void i915_driver_shutdown(struct drm_i915_private *i915) intel_power_domains_driver_remove(i915); enable_rpm_wakeref_asserts(&i915->runtime_pm); - intel_runtime_pm_driver_release(&i915->runtime_pm); + intel_runtime_pm_driver_last_release(&i915->runtime_pm); } static bool suspend_to_idle(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c index 2a44b3876cb5..fa6852713bee 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.c +++ b/drivers/gpu/drm/i915/i915_drm_client.c @@ -28,6 +28,10 @@ struct i915_drm_client *i915_drm_client_alloc(void) kref_init(&client->kref); spin_lock_init(&client->ctx_lock); INIT_LIST_HEAD(&client->ctx_list); +#ifdef CONFIG_PROC_FS + spin_lock_init(&client->objects_lock); + INIT_LIST_HEAD(&client->objects_list); +#endif return client; } @@ -41,6 +45,68 @@ void __i915_drm_client_free(struct kref *kref) } #ifdef CONFIG_PROC_FS +static void +obj_meminfo(struct drm_i915_gem_object *obj, + struct drm_memory_stats stats[INTEL_REGION_UNKNOWN]) +{ + const enum intel_region_id id = obj->mm.region ? + obj->mm.region->id : INTEL_REGION_SMEM; + const u64 sz = obj->base.size; + + if (obj->base.handle_count > 1) + stats[id].shared += sz; + else + stats[id].private += sz; + + if (i915_gem_object_has_pages(obj)) { + stats[id].resident += sz; + + if (!dma_resv_test_signaled(obj->base.resv, + DMA_RESV_USAGE_BOOKKEEP)) + stats[id].active += sz; + else if (i915_gem_object_is_shrinkable(obj) && + obj->mm.madv == I915_MADV_DONTNEED) + stats[id].purgeable += sz; + } +} + +static void show_meminfo(struct drm_printer *p, struct drm_file *file) +{ + struct drm_memory_stats stats[INTEL_REGION_UNKNOWN] = {}; + struct drm_i915_file_private *fpriv = file->driver_priv; + struct i915_drm_client *client = fpriv->client; + struct drm_i915_private *i915 = fpriv->i915; + struct drm_i915_gem_object *obj; + struct intel_memory_region *mr; + struct list_head __rcu *pos; + unsigned int id; + + /* Public objects. */ + spin_lock(&file->table_lock); + idr_for_each_entry(&file->object_idr, obj, id) + obj_meminfo(obj, stats); + spin_unlock(&file->table_lock); + + /* Internal objects. */ + rcu_read_lock(); + list_for_each_rcu(pos, &client->objects_list) { + obj = i915_gem_object_get_rcu(list_entry(pos, typeof(*obj), + client_link)); + if (!obj) + continue; + obj_meminfo(obj, stats); + i915_gem_object_put(obj); + } + rcu_read_unlock(); + + for_each_memory_region(mr, i915, id) + drm_print_memory_stats(p, + &stats[id], + DRM_GEM_OBJECT_RESIDENT | + DRM_GEM_OBJECT_PURGEABLE, + mr->uabi_name); +} + static const char * const uabi_class_names[] = { [I915_ENGINE_CLASS_RENDER] = "render", [I915_ENGINE_CLASS_COPY] = "copy", @@ -102,10 +168,52 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file) * ****************************************************************** */ + show_meminfo(p, file); + if (GRAPHICS_VER(i915) < 8) return; for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++) show_client_class(p, i915, file_priv->client, i); } + +void i915_drm_client_add_object(struct i915_drm_client *client, + struct drm_i915_gem_object *obj) +{ + unsigned long flags; + + GEM_WARN_ON(obj->client); + GEM_WARN_ON(!list_empty(&obj->client_link)); + + spin_lock_irqsave(&client->objects_lock, flags); + obj->client = i915_drm_client_get(client); + list_add_tail_rcu(&obj->client_link, &client->objects_list); + spin_unlock_irqrestore(&client->objects_lock, flags); +} + +void i915_drm_client_remove_object(struct drm_i915_gem_object *obj) +{ + struct i915_drm_client *client = fetch_and_zero(&obj->client); + unsigned long flags; + + /* Object may not be associated with a client. */ + if (!client) + return; + + spin_lock_irqsave(&client->objects_lock, flags); + list_del_rcu(&obj->client_link); + spin_unlock_irqrestore(&client->objects_lock, flags); + + i915_drm_client_put(client); +} + +void i915_drm_client_add_context_objects(struct i915_drm_client *client, + struct intel_context *ce) +{ + if (ce->state) + i915_drm_client_add_object(client, ce->state->obj); + + if (ce->ring != ce->engine->legacy.ring && ce->ring->vma) + i915_drm_client_add_object(client, ce->ring->vma->obj); +} #endif diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h index 67816c912bca..a439dd789936 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.h +++ b/drivers/gpu/drm/i915/i915_drm_client.h @@ -12,6 +12,10 @@ #include <uapi/drm/i915_drm.h> +#include "i915_file_private.h" +#include "gem/i915_gem_object_types.h" +#include "gt/intel_context_types.h" + #define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE struct drm_file; @@ -25,6 +29,20 @@ struct i915_drm_client { spinlock_t ctx_lock; /* For add/remove from ctx_list. */ struct list_head ctx_list; /* List of contexts belonging to client. */ +#ifdef CONFIG_PROC_FS + /** + * @objects_lock: lock protecting @objects_list + */ + spinlock_t objects_lock; + + /** + * @objects_list: list of objects created by this client + * + * Protected by @objects_lock. + */ + struct list_head objects_list; +#endif + /** * @past_runtime: Accumulation of pphwsp runtimes from closed contexts. */ @@ -49,4 +67,28 @@ struct i915_drm_client *i915_drm_client_alloc(void); void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file); +#ifdef CONFIG_PROC_FS +void i915_drm_client_add_object(struct i915_drm_client *client, + struct drm_i915_gem_object *obj); +void i915_drm_client_remove_object(struct drm_i915_gem_object *obj); +void i915_drm_client_add_context_objects(struct i915_drm_client *client, + struct intel_context *ce); +#else +static inline void i915_drm_client_add_object(struct i915_drm_client *client, + struct drm_i915_gem_object *obj) +{ +} + +static inline void +i915_drm_client_remove_object(struct drm_i915_gem_object *obj) +{ +} + +static inline void +i915_drm_client_add_context_objects(struct i915_drm_client *client, + struct intel_context *ce) +{ +} +#endif + #endif /* !__I915_DRM_CLIENT_H__ */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index dd452c220df7..861567362abd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -396,20 +396,6 @@ static inline struct intel_gt *to_gt(const struct drm_i915_private *i915) return i915->gt[0]; } -/* Simple iterator over all initialised engines */ -#define for_each_engine(engine__, gt__, id__) \ - for ((id__) = 0; \ - (id__) < I915_NUM_ENGINES; \ - (id__)++) \ - for_each_if ((engine__) = (gt__)->engine[(id__)]) - -/* Iterator over subset of engines selected by mask */ -#define for_each_engine_masked(engine__, gt__, mask__, tmp__) \ - for ((tmp__) = (mask__) & (gt__)->info.engine_mask; \ - (tmp__) ? \ - ((engine__) = (gt__)->engine[__mask_next_bit(tmp__)]), 1 : \ - 0;) - #define rb_to_uabi_engine(rb) \ rb_entry_safe(rb, struct intel_engine_cs, uabi_node) @@ -418,11 +404,6 @@ static inline struct intel_gt *to_gt(const struct drm_i915_private *i915) (engine__); \ (engine__) = rb_to_uabi_engine(rb_next(&(engine__)->uabi_node))) -#define for_each_uabi_class_engine(engine__, class__, i915__) \ - for ((engine__) = intel_engine_lookup_user((i915__), (class__), 0); \ - (engine__) && (engine__)->uabi_class == (class__); \ - (engine__) = rb_to_uabi_engine(rb_next(&(engine__)->uabi_node))) - #define INTEL_INFO(i915) ((i915)->__info) #define RUNTIME_INFO(i915) (&(i915)->__runtime) #define DRIVER_CAPS(i915) (&(i915)->caps) @@ -575,6 +556,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_DG2(i915) IS_PLATFORM(i915, INTEL_DG2) #define IS_PONTEVECCHIO(i915) IS_PLATFORM(i915, INTEL_PONTEVECCHIO) #define IS_METEORLAKE(i915) IS_PLATFORM(i915, INTEL_METEORLAKE) +#define IS_LUNARLAKE(i915) 0 #define IS_DG2_G10(i915) \ IS_SUBPLATFORM(i915, INTEL_DG2, INTEL_SUBPLATFORM_G10) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c166ad5e187a..92758b6b41f0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1306,8 +1306,6 @@ void i915_gem_init_early(struct drm_i915_private *dev_priv) { i915_gem_init__mm(dev_priv); i915_gem_init__contexts(dev_priv); - - spin_lock_init(&dev_priv->display.fb_tracking.lock); } void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index b4e31e59c799..d04660b60046 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -57,6 +57,7 @@ #include "i915_memcpy.h" #include "i915_reg.h" #include "i915_scatterlist.h" +#include "i915_sysfs.h" #include "i915_utils.h" #define ALLOW_FAIL (__GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) @@ -520,7 +521,7 @@ __find_vma(struct i915_vma_coredump *vma, const char *name) return NULL; } -struct i915_vma_coredump * +static struct i915_vma_coredump * intel_gpu_error_find_batch(const struct intel_engine_coredump *ee) { return __find_vma(ee->vma, "batch"); @@ -609,9 +610,9 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) va_end(args); } -void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m, - const struct intel_engine_cs *engine, - const struct i915_vma_coredump *vma) +static void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m, + const struct intel_engine_cs *engine, + const struct i915_vma_coredump *vma) { char out[ASCII85_BUFSZ]; struct page *page; @@ -660,6 +661,7 @@ static void err_print_params(struct drm_i915_error_state_buf *m, struct drm_printer p = i915_error_printer(m); i915_params_dump(params, &p); + intel_display_params_dump(m->i915, &p); } static void err_print_pciid(struct drm_i915_error_state_buf *m, @@ -1027,6 +1029,7 @@ static void i915_vma_coredump_free(struct i915_vma_coredump *vma) static void cleanup_params(struct i915_gpu_coredump *error) { i915_params_free(&error->params); + intel_display_params_free(&error->display_params); } static void cleanup_uc(struct intel_uc_coredump *uc) @@ -1988,6 +1991,7 @@ static void capture_gen(struct i915_gpu_coredump *error) error->suspend_count = i915->suspend_count; i915_params_copy(&error->params, &i915->params); + intel_display_params_copy(&error->display_params); memcpy(&error->device_info, INTEL_INFO(i915), sizeof(error->device_info)); @@ -2137,7 +2141,7 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 du return error; } -struct i915_gpu_coredump * +static struct i915_gpu_coredump * i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags) { static DEFINE_MUTEX(capture_mutex); @@ -2174,7 +2178,7 @@ void i915_error_state_store(struct i915_gpu_coredump *error) ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) { pr_info("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n"); pr_info("Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/intel/issues/new.\n"); - pr_info("Please see https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs for details.\n"); + pr_info("Please see https://drm.pages.freedesktop.org/intel-docs/how-to-file-i915-bugs.html for details.\n"); pr_info("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n"); pr_info("The GPU crash dump is required to analyze GPU hangs, so please always attach it.\n"); pr_info("GPU crash dump saved to /sys/class/drm/card%d/error\n", @@ -2208,7 +2212,7 @@ void i915_capture_error_state(struct intel_gt *gt, i915_gpu_coredump_put(error); } -struct i915_gpu_coredump * +static struct i915_gpu_coredump * i915_first_error_state(struct drm_i915_private *i915) { struct i915_gpu_coredump *error; @@ -2375,3 +2379,184 @@ void intel_klog_error_capture(struct intel_gt *gt, drm_info(&i915->drm, "[Capture/%d.%d] Dumped %zd bytes\n", l_count, line++, pos_err); } #endif + +static ssize_t gpu_state_read(struct file *file, char __user *ubuf, + size_t count, loff_t *pos) +{ + struct i915_gpu_coredump *error; + ssize_t ret; + void *buf; + + error = file->private_data; + if (!error) + return 0; + + /* Bounce buffer required because of kernfs __user API convenience. */ + buf = kmalloc(count, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret = i915_gpu_coredump_copy_to_buffer(error, buf, *pos, count); + if (ret <= 0) + goto out; + + if (!copy_to_user(ubuf, buf, ret)) + *pos += ret; + else + ret = -EFAULT; + +out: + kfree(buf); + return ret; +} + +static int gpu_state_release(struct inode *inode, struct file *file) +{ + i915_gpu_coredump_put(file->private_data); + return 0; +} + +static int i915_gpu_info_open(struct inode *inode, struct file *file) +{ + struct drm_i915_private *i915 = inode->i_private; + struct i915_gpu_coredump *gpu; + intel_wakeref_t wakeref; + + gpu = NULL; + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES, CORE_DUMP_FLAG_NONE); + + if (IS_ERR(gpu)) + return PTR_ERR(gpu); + + file->private_data = gpu; + return 0; +} + +static const struct file_operations i915_gpu_info_fops = { + .owner = THIS_MODULE, + .open = i915_gpu_info_open, + .read = gpu_state_read, + .llseek = default_llseek, + .release = gpu_state_release, +}; + +static ssize_t +i915_error_state_write(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + struct i915_gpu_coredump *error = filp->private_data; + + if (!error) + return 0; + + drm_dbg(&error->i915->drm, "Resetting error state\n"); + i915_reset_error_state(error->i915); + + return cnt; +} + +static int i915_error_state_open(struct inode *inode, struct file *file) +{ + struct i915_gpu_coredump *error; + + error = i915_first_error_state(inode->i_private); + if (IS_ERR(error)) + return PTR_ERR(error); + + file->private_data = error; + return 0; +} + +static const struct file_operations i915_error_state_fops = { + .owner = THIS_MODULE, + .open = i915_error_state_open, + .read = gpu_state_read, + .write = i915_error_state_write, + .llseek = default_llseek, + .release = gpu_state_release, +}; + +void i915_gpu_error_debugfs_register(struct drm_i915_private *i915) +{ + struct drm_minor *minor = i915->drm.primary; + + debugfs_create_file("i915_error_state", 0644, minor->debugfs_root, i915, + &i915_error_state_fops); + debugfs_create_file("i915_gpu_info", 0644, minor->debugfs_root, i915, + &i915_gpu_info_fops); +} + +static ssize_t error_state_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + + struct device *kdev = kobj_to_dev(kobj); + struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); + struct i915_gpu_coredump *gpu; + ssize_t ret = 0; + + /* + * FIXME: Concurrent clients triggering resets and reading + clearing + * dumps can cause inconsistent sysfs reads when a user calls in with a + * non-zero offset to complete a prior partial read but the + * gpu_coredump has been cleared or replaced. + */ + + gpu = i915_first_error_state(i915); + if (IS_ERR(gpu)) { + ret = PTR_ERR(gpu); + } else if (gpu) { + ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count); + i915_gpu_coredump_put(gpu); + } else { + const char *str = "No error state collected\n"; + size_t len = strlen(str); + + if (off < len) { + ret = min_t(size_t, count, len - off); + memcpy(buf, str + off, ret); + } + } + + return ret; +} + +static ssize_t error_state_write(struct file *file, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + struct device *kdev = kobj_to_dev(kobj); + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + + drm_dbg(&dev_priv->drm, "Resetting error state\n"); + i915_reset_error_state(dev_priv); + + return count; +} + +static const struct bin_attribute error_state_attr = { + .attr.name = "error", + .attr.mode = S_IRUSR | S_IWUSR, + .size = 0, + .read = error_state_read, + .write = error_state_write, +}; + +void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915) +{ + struct device *kdev = i915->drm.primary->kdev; + + if (sysfs_create_bin_file(&kdev->kobj, &error_state_attr)) + drm_err(&i915->drm, "error_state sysfs setup failed\n"); +} + +void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915) +{ + struct device *kdev = i915->drm.primary->kdev; + + sysfs_remove_bin_file(&kdev->kobj, &error_state_attr); +} diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 48f6c00402c4..7c255bb1c319 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -15,6 +15,7 @@ #include <drm/drm_mm.h> #include "display/intel_display_device.h" +#include "display/intel_display_params.h" #include "gt/intel_engine.h" #include "gt/intel_engine_types.h" #include "gt/intel_gt_types.h" @@ -215,6 +216,7 @@ struct i915_gpu_coredump { struct intel_display_runtime_info display_runtime_info; struct intel_driver_caps driver_caps; struct i915_params params; + struct intel_display_params display_params; struct intel_overlay_error_state *overlay; @@ -283,14 +285,7 @@ static inline void intel_klog_error_capture(struct intel_gt *gt, __printf(2, 3) void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); -void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m, - const struct intel_engine_cs *engine, - const struct i915_vma_coredump *vma); -struct i915_vma_coredump * -intel_gpu_error_find_batch(const struct intel_engine_coredump *ee); - -struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt, - intel_engine_mask_t engine_mask, u32 dump_flags); + void i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags); @@ -338,10 +333,13 @@ static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu) kref_put(&gpu->ref, __i915_gpu_coredump_free); } -struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915); void i915_reset_error_state(struct drm_i915_private *i915); void i915_disable_error_state(struct drm_i915_private *i915, int err); +void i915_gpu_error_debugfs_register(struct drm_i915_private *i915); +void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915); +void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915); + #else __printf(2, 3) @@ -409,12 +407,6 @@ static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu) { } -static inline struct i915_gpu_coredump * -i915_first_error_state(struct drm_i915_private *i915) -{ - return ERR_PTR(-ENODEV); -} - static inline void i915_reset_error_state(struct drm_i915_private *i915) { } @@ -424,6 +416,18 @@ static inline void i915_disable_error_state(struct drm_i915_private *i915, { } +static inline void i915_gpu_error_debugfs_register(struct drm_i915_private *i915) +{ +} + +static inline void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915) +{ +} + +static inline void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915) +{ +} + #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */ #endif /* _I915_GPU_ERROR_H_ */ diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 036c4c3ed6ed..de43048543e8 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -67,33 +67,9 @@ i915_param_named(modeset, int, 0400, "Use kernel modesetting [KMS] (0=disable, " "1=on, -1=force vga console preference [default])"); -i915_param_named_unsafe(enable_dc, int, 0400, - "Enable power-saving display C-states. " - "(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6; " - "3=up to DC5 with DC3CO; 4=up to DC6 with DC3CO)"); - -i915_param_named_unsafe(enable_fbc, int, 0400, - "Enable frame buffer compression for power savings " - "(default: -1 (use per-chip default))"); - -i915_param_named_unsafe(lvds_channel_mode, int, 0400, - "Specify LVDS channel mode " - "(0=probe BIOS [default], 1=single-channel, 2=dual-channel)"); - -i915_param_named_unsafe(panel_use_ssc, int, 0400, - "Use Spread Spectrum Clock with panels [LVDS/eDP] " - "(default: auto from VBT)"); - -i915_param_named_unsafe(vbt_sdvo_panel_type, int, 0400, - "Override/Ignore selection of SDVO panel mode in the VBT " - "(-2=ignore, -1=auto [default], index in VBT BIOS table)"); - i915_param_named_unsafe(reset, uint, 0400, "Attempt GPU resets (0=disabled, 1=full gpu reset, 2=engine reset [default])"); -i915_param_named_unsafe(vbt_firmware, charp, 0400, - "Load VBT from specified file under /lib/firmware"); - #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) i915_param_named(error_capture, bool, 0400, "Record the GPU state following a hang. " @@ -106,55 +82,10 @@ i915_param_named_unsafe(enable_hangcheck, bool, 0400, "WARNING: Disabling this can cause system wide hangs. " "(default: true)"); -i915_param_named_unsafe(enable_psr, int, 0400, - "Enable PSR " - "(0=disabled, 1=enable up to PSR1, 2=enable up to PSR2) " - "Default: -1 (use per-chip default)"); - -i915_param_named(psr_safest_params, bool, 0400, - "Replace PSR VBT parameters by the safest and not optimal ones. This " - "is helpful to detect if PSR issues are related to bad values set in " - " VBT. (0=use VBT parameters, 1=use safest parameters)"); - -i915_param_named_unsafe(enable_psr2_sel_fetch, bool, 0400, - "Enable PSR2 selective fetch " - "(0=disabled, 1=enabled) " - "Default: 0"); - -i915_param_named_unsafe(enable_sagv, bool, 0600, - "Enable system agent voltage/frequency scaling (SAGV) (default: true)"); - i915_param_named_unsafe(force_probe, charp, 0400, "Force probe options for specified supported devices. " "See CONFIG_DRM_I915_FORCE_PROBE for details."); -i915_param_named_unsafe(disable_power_well, int, 0400, - "Disable display power wells when possible " - "(-1=auto [default], 0=power wells always on, 1=power wells disabled when possible)"); - -i915_param_named_unsafe(enable_ips, int, 0400, "Enable IPS (default: true)"); - -i915_param_named_unsafe(enable_dpt, bool, 0400, - "Enable display page table (DPT) (default: true)"); - -i915_param_named_unsafe(load_detect_test, bool, 0400, - "Force-enable the VGA load detect code for testing (default:false). " - "For developers only."); - -i915_param_named_unsafe(force_reset_modeset_test, bool, 0400, - "Force a modeset during gpu reset for testing (default:false). " - "For developers only."); - -i915_param_named_unsafe(invert_brightness, int, 0400, - "Invert backlight brightness " - "(-1 force normal, 0 machine defaults, 1 force inversion), please " - "report PCI device ID, subsystem vendor and subsystem device ID " - "to dri-devel@lists.freedesktop.org, if your machine needs it. " - "It will then be included in an upcoming module version."); - -i915_param_named(disable_display, bool, 0400, - "Disable display (default: false)"); - i915_param_named(memtest, bool, 0400, "Perform a read/write test of all device memory on module load (default: off)"); @@ -162,19 +93,6 @@ i915_param_named(mmio_debug, int, 0400, "Enable the MMIO debug code for the first N failures (default: off). " "This may negatively affect performance."); -/* Special case writable file */ -i915_param_named(verbose_state_checks, bool, 0600, - "Enable verbose logs (ie. WARN_ON()) in case of unexpected hw state conditions."); - -i915_param_named_unsafe(nuclear_pageflip, bool, 0400, - "Force enable atomic functionality on platforms that don't have full support yet."); - -/* WA to get away with the default setting in VBT for early platforms.Will be removed */ -i915_param_named_unsafe(edp_vswing, int, 0400, - "Ignore/Override vswing pre-emph table selection from VBT " - "(0=use value from vbt [default], 1=low power swing(200mV)," - "2=default swing(400mV))"); - i915_param_named_unsafe(enable_guc, int, 0400, "Enable GuC load for GuC submission and/or HuC load. " "Required functionality can be selected using bitmask values. " @@ -196,18 +114,11 @@ i915_param_named_unsafe(dmc_firmware_path, charp, 0400, i915_param_named_unsafe(gsc_firmware_path, charp, 0400, "GSC firmware path to use instead of the default one"); -i915_param_named_unsafe(enable_dp_mst, bool, 0400, - "Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)"); - #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) i915_param_named_unsafe(inject_probe_failure, uint, 0400, "Force an error after a number of failure check points (0:disabled (default), N:force failure at the Nth failure check point)"); #endif -i915_param_named(enable_dpcd_backlight, int, 0400, - "Enable support for DPCD backlight control" - "(-1=use per-VBT LFP backlight type setting [default], 0=disabled, 1=enable, 2=force VESA interface, 3=force Intel interface)"); - #if IS_ENABLED(CONFIG_DRM_I915_GVT) i915_param_named(enable_gvt, bool, 0400, "Enable support for Intel GVT-g graphics virtualization host support(default:false)"); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index d5194b039aab..1315d7fac850 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -46,21 +46,7 @@ struct drm_printer; * debugfs file */ #define I915_PARAMS_FOR_EACH(param) \ - param(char *, vbt_firmware, NULL, 0400) \ param(int, modeset, -1, 0400) \ - param(int, lvds_channel_mode, 0, 0400) \ - param(int, panel_use_ssc, -1, 0600) \ - param(int, vbt_sdvo_panel_type, -1, 0400) \ - param(int, enable_dc, -1, 0400) \ - param(int, enable_fbc, -1, 0600) \ - param(int, enable_psr, -1, 0600) \ - param(bool, enable_dpt, true, 0400) \ - param(bool, psr_safest_params, false, 0400) \ - param(bool, enable_psr2_sel_fetch, true, 0400) \ - param(bool, enable_sagv, true, 0600) \ - param(int, disable_power_well, -1, 0400) \ - param(int, enable_ips, 1, 0600) \ - param(int, invert_brightness, 0, 0600) \ param(int, enable_guc, -1, 0400) \ param(int, guc_log_level, -1, 0400) \ param(char *, guc_firmware_path, NULL, 0400) \ @@ -69,23 +55,15 @@ struct drm_printer; param(char *, gsc_firmware_path, NULL, 0400) \ param(bool, memtest, false, 0400) \ param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO), 0600) \ - param(int, edp_vswing, 0, 0400) \ param(unsigned int, reset, 3, 0600) \ param(unsigned int, inject_probe_failure, 0, 0) \ - param(int, enable_dpcd_backlight, -1, 0600) \ param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE, 0400) \ param(unsigned int, request_timeout_ms, CONFIG_DRM_I915_REQUEST_TIMEOUT, CONFIG_DRM_I915_REQUEST_TIMEOUT ? 0600 : 0) \ param(unsigned int, lmem_size, 0, 0400) \ param(unsigned int, lmem_bar_size, 0, 0400) \ /* leave bools at the end to not create holes */ \ param(bool, enable_hangcheck, true, 0600) \ - param(bool, load_detect_test, false, 0600) \ - param(bool, force_reset_modeset_test, false, 0600) \ param(bool, error_capture, true, IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) ? 0600 : 0) \ - param(bool, disable_display, false, 0400) \ - param(bool, verbose_state_checks, true, 0) \ - param(bool, nuclear_pageflip, false, 0400) \ - param(bool, enable_dp_mst, true, 0600) \ param(bool, enable_gvt, false, IS_ENABLED(CONFIG_DRM_I915_GVT) ? 0400 : 0) #define MEMBER(T, member, ...) T member; diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index f861863eb7c1..21eb0c5b320d 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -31,6 +31,16 @@ static cpumask_t i915_pmu_cpumask; static unsigned int i915_pmu_target_cpu = -1; +static struct i915_pmu *event_to_pmu(struct perf_event *event) +{ + return container_of(event->pmu, struct i915_pmu, base); +} + +static struct drm_i915_private *pmu_to_i915(struct i915_pmu *pmu) +{ + return container_of(pmu, struct drm_i915_private, pmu); +} + static u8 engine_config_sample(u64 config) { return config & I915_PMU_SAMPLE_MASK; @@ -141,7 +151,7 @@ static u32 frequency_enabled_mask(void) static bool pmu_needs_timer(struct i915_pmu *pmu) { - struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); + struct drm_i915_private *i915 = pmu_to_i915(pmu); u32 enable; /* @@ -213,19 +223,19 @@ static u64 get_rc6(struct intel_gt *gt) struct drm_i915_private *i915 = gt->i915; const unsigned int gt_id = gt->info.id; struct i915_pmu *pmu = &i915->pmu; + intel_wakeref_t wakeref; unsigned long flags; - bool awake = false; u64 val; - if (intel_gt_pm_get_if_awake(gt)) { + wakeref = intel_gt_pm_get_if_awake(gt); + if (wakeref) { val = __get_rc6(gt); - intel_gt_pm_put_async(gt); - awake = true; + intel_gt_pm_put_async(gt, wakeref); } spin_lock_irqsave(&pmu->lock, flags); - if (awake) { + if (wakeref) { store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val); } else { /* @@ -251,7 +261,7 @@ static u64 get_rc6(struct intel_gt *gt) static void init_rc6(struct i915_pmu *pmu) { - struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); + struct drm_i915_private *i915 = pmu_to_i915(pmu); struct intel_gt *gt; unsigned int i; @@ -429,12 +439,14 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) const unsigned int gt_id = gt->info.id; struct i915_pmu *pmu = &i915->pmu; struct intel_rps *rps = >->rps; + intel_wakeref_t wakeref; if (!frequency_sampling_enabled(pmu, gt_id)) return; /* Report 0/0 (actual/requested) frequency while parked. */ - if (!intel_gt_pm_get_if_awake(gt)) + wakeref = intel_gt_pm_get_if_awake(gt); + if (!wakeref) return; if (pmu->enable & config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt_id))) { @@ -463,14 +475,13 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) period_ns / 1000); } - intel_gt_pm_put_async(gt); + intel_gt_pm_put_async(gt, wakeref); } static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) { - struct drm_i915_private *i915 = - container_of(hrtimer, struct drm_i915_private, pmu.timer); - struct i915_pmu *pmu = &i915->pmu; + struct i915_pmu *pmu = container_of(hrtimer, struct i915_pmu, timer); + struct drm_i915_private *i915 = pmu_to_i915(pmu); unsigned int period_ns; struct intel_gt *gt; unsigned int i; @@ -505,8 +516,8 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) static void i915_pmu_event_destroy(struct perf_event *event) { - struct drm_i915_private *i915 = - container_of(event->pmu, typeof(*i915), pmu.base); + struct i915_pmu *pmu = event_to_pmu(event); + struct drm_i915_private *i915 = pmu_to_i915(pmu); drm_WARN_ON(&i915->drm, event->parent); @@ -572,8 +583,8 @@ config_status(struct drm_i915_private *i915, u64 config) static int engine_event_init(struct perf_event *event) { - struct drm_i915_private *i915 = - container_of(event->pmu, typeof(*i915), pmu.base); + struct i915_pmu *pmu = event_to_pmu(event); + struct drm_i915_private *i915 = pmu_to_i915(pmu); struct intel_engine_cs *engine; engine = intel_engine_lookup_user(i915, engine_event_class(event), @@ -586,9 +597,8 @@ static int engine_event_init(struct perf_event *event) static int i915_pmu_event_init(struct perf_event *event) { - struct drm_i915_private *i915 = - container_of(event->pmu, typeof(*i915), pmu.base); - struct i915_pmu *pmu = &i915->pmu; + struct i915_pmu *pmu = event_to_pmu(event); + struct drm_i915_private *i915 = pmu_to_i915(pmu); int ret; if (pmu->closed) @@ -628,9 +638,8 @@ static int i915_pmu_event_init(struct perf_event *event) static u64 __i915_pmu_event_read(struct perf_event *event) { - struct drm_i915_private *i915 = - container_of(event->pmu, typeof(*i915), pmu.base); - struct i915_pmu *pmu = &i915->pmu; + struct i915_pmu *pmu = event_to_pmu(event); + struct drm_i915_private *i915 = pmu_to_i915(pmu); u64 val = 0; if (is_engine_event(event)) { @@ -686,10 +695,8 @@ static u64 __i915_pmu_event_read(struct perf_event *event) static void i915_pmu_event_read(struct perf_event *event) { - struct drm_i915_private *i915 = - container_of(event->pmu, typeof(*i915), pmu.base); + struct i915_pmu *pmu = event_to_pmu(event); struct hw_perf_event *hwc = &event->hw; - struct i915_pmu *pmu = &i915->pmu; u64 prev, new; if (pmu->closed) { @@ -707,10 +714,9 @@ static void i915_pmu_event_read(struct perf_event *event) static void i915_pmu_enable(struct perf_event *event) { - struct drm_i915_private *i915 = - container_of(event->pmu, typeof(*i915), pmu.base); + struct i915_pmu *pmu = event_to_pmu(event); + struct drm_i915_private *i915 = pmu_to_i915(pmu); const unsigned int bit = event_bit(event); - struct i915_pmu *pmu = &i915->pmu; unsigned long flags; if (bit == -1) @@ -771,10 +777,9 @@ update: static void i915_pmu_disable(struct perf_event *event) { - struct drm_i915_private *i915 = - container_of(event->pmu, typeof(*i915), pmu.base); + struct i915_pmu *pmu = event_to_pmu(event); + struct drm_i915_private *i915 = pmu_to_i915(pmu); const unsigned int bit = event_bit(event); - struct i915_pmu *pmu = &i915->pmu; unsigned long flags; if (bit == -1) @@ -818,9 +823,7 @@ static void i915_pmu_disable(struct perf_event *event) static void i915_pmu_event_start(struct perf_event *event, int flags) { - struct drm_i915_private *i915 = - container_of(event->pmu, typeof(*i915), pmu.base); - struct i915_pmu *pmu = &i915->pmu; + struct i915_pmu *pmu = event_to_pmu(event); if (pmu->closed) return; @@ -848,9 +851,7 @@ out: static int i915_pmu_event_add(struct perf_event *event, int flags) { - struct drm_i915_private *i915 = - container_of(event->pmu, typeof(*i915), pmu.base); - struct i915_pmu *pmu = &i915->pmu; + struct i915_pmu *pmu = event_to_pmu(event); if (pmu->closed) return -ENODEV; @@ -982,7 +983,7 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name, static struct attribute ** create_event_attributes(struct i915_pmu *pmu) { - struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); + struct drm_i915_private *i915 = pmu_to_i915(pmu); static const struct { unsigned int counter; const char *name; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 135e8d8dbdf0..27dc903f0553 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -195,8 +195,6 @@ #define DPIO_SFR_BYPASS (1 << 1) #define DPIO_CMNRST (1 << 0) -#define DPIO_PHY(pipe) ((pipe) >> 1) - /* * Per pipe/PLL DPIO regs */ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index e88bb4f04305..613decd47760 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -155,81 +155,6 @@ static const struct bin_attribute dpf_attrs_1 = { .private = (void *)1 }; -#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) - -static ssize_t error_state_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, - loff_t off, size_t count) -{ - - struct device *kdev = kobj_to_dev(kobj); - struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); - struct i915_gpu_coredump *gpu; - ssize_t ret = 0; - - /* - * FIXME: Concurrent clients triggering resets and reading + clearing - * dumps can cause inconsistent sysfs reads when a user calls in with a - * non-zero offset to complete a prior partial read but the - * gpu_coredump has been cleared or replaced. - */ - - gpu = i915_first_error_state(i915); - if (IS_ERR(gpu)) { - ret = PTR_ERR(gpu); - } else if (gpu) { - ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count); - i915_gpu_coredump_put(gpu); - } else { - const char *str = "No error state collected\n"; - size_t len = strlen(str); - - if (off < len) { - ret = min_t(size_t, count, len - off); - memcpy(buf, str + off, ret); - } - } - - return ret; -} - -static ssize_t error_state_write(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, char *buf, - loff_t off, size_t count) -{ - struct device *kdev = kobj_to_dev(kobj); - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - - drm_dbg(&dev_priv->drm, "Resetting error state\n"); - i915_reset_error_state(dev_priv); - - return count; -} - -static const struct bin_attribute error_state_attr = { - .attr.name = "error", - .attr.mode = S_IRUSR | S_IWUSR, - .size = 0, - .read = error_state_read, - .write = error_state_write, -}; - -static void i915_setup_error_capture(struct device *kdev) -{ - if (sysfs_create_bin_file(&kdev->kobj, &error_state_attr)) - drm_err(&kdev_minor_to_i915(kdev)->drm, - "error_state sysfs setup failed\n"); -} - -static void i915_teardown_error_capture(struct device *kdev) -{ - sysfs_remove_bin_file(&kdev->kobj, &error_state_attr); -} -#else -static void i915_setup_error_capture(struct device *kdev) {} -static void i915_teardown_error_capture(struct device *kdev) {} -#endif - void i915_setup_sysfs(struct drm_i915_private *dev_priv) { struct device *kdev = dev_priv->drm.primary->kdev; @@ -255,7 +180,7 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv) drm_warn(&dev_priv->drm, "failed to register GT sysfs directory\n"); - i915_setup_error_capture(kdev); + i915_gpu_error_sysfs_setup(dev_priv); intel_engines_add_sysfs(dev_priv); } @@ -264,7 +189,7 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv) { struct device *kdev = dev_priv->drm.primary->kdev; - i915_teardown_error_capture(kdev); + i915_gpu_error_sysfs_teardown(dev_priv); device_remove_bin_file(kdev, &dpf_attrs_1); device_remove_bin_file(kdev, &dpf_attrs); diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index c61066498bf2..f98577967b7f 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -40,7 +40,7 @@ struct drm_i915_private; struct timer_list; -#define FDO_BUG_URL "https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs" +#define FDO_BUG_URL "https://drm.pages.freedesktop.org/intel-docs/how-to-file-i915-bugs.html" #define MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \ __stringify(x), (long)(x)) diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index 3d1fdea9811d..60a03340bbd4 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -216,6 +216,22 @@ static int intel_memory_region_memtest(struct intel_memory_region *mem, return err; } +static const char *region_type_str(u16 type) +{ + switch (type) { + case INTEL_MEMORY_SYSTEM: + return "system"; + case INTEL_MEMORY_LOCAL: + return "local"; + case INTEL_MEMORY_STOLEN_LOCAL: + return "stolen-local"; + case INTEL_MEMORY_STOLEN_SYSTEM: + return "stolen-system"; + default: + return "unknown"; + } +} + struct intel_memory_region * intel_memory_region_create(struct drm_i915_private *i915, resource_size_t start, @@ -244,6 +260,9 @@ intel_memory_region_create(struct drm_i915_private *i915, mem->type = type; mem->instance = instance; + snprintf(mem->uabi_name, sizeof(mem->uabi_name), "%s%u", + region_type_str(type), instance); + mutex_init(&mem->objects.lock); INIT_LIST_HEAD(&mem->objects.list); diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 2953ed5c3248..9ba36454e51b 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -80,6 +80,7 @@ struct intel_memory_region { u16 instance; enum intel_region_id id; char name[16]; + char uabi_name[16]; bool private; /* not for userspace */ struct { diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 8743153fad87..860b51b56a92 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -50,184 +50,44 @@ * present for a given platform. */ -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) - -#include <linux/sort.h> - -#define STACKDEPTH 8 - -static noinline depot_stack_handle_t __save_depot_stack(void) +static struct drm_i915_private *rpm_to_i915(struct intel_runtime_pm *rpm) { - unsigned long entries[STACKDEPTH]; - unsigned int n; - - n = stack_trace_save(entries, ARRAY_SIZE(entries), 1); - return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN); + return container_of(rpm, struct drm_i915_private, runtime_pm); } +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) + static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm) { - spin_lock_init(&rpm->debug.lock); - stack_depot_init(); + ref_tracker_dir_init(&rpm->debug, INTEL_REFTRACK_DEAD_COUNT, dev_name(rpm->kdev)); } -static noinline depot_stack_handle_t +static intel_wakeref_t track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm) { - depot_stack_handle_t stack, *stacks; - unsigned long flags; - - if (rpm->no_wakeref_tracking) + if (!rpm->available || rpm->no_wakeref_tracking) return -1; - stack = __save_depot_stack(); - if (!stack) - return -1; - - spin_lock_irqsave(&rpm->debug.lock, flags); - - if (!rpm->debug.count) - rpm->debug.last_acquire = stack; - - stacks = krealloc(rpm->debug.owners, - (rpm->debug.count + 1) * sizeof(*stacks), - GFP_NOWAIT | __GFP_NOWARN); - if (stacks) { - stacks[rpm->debug.count++] = stack; - rpm->debug.owners = stacks; - } else { - stack = -1; - } - - spin_unlock_irqrestore(&rpm->debug.lock, flags); - - return stack; + return intel_ref_tracker_alloc(&rpm->debug); } static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm, - depot_stack_handle_t stack) + intel_wakeref_t wakeref) { - struct drm_i915_private *i915 = container_of(rpm, - struct drm_i915_private, - runtime_pm); - unsigned long flags, n; - bool found = false; - - if (unlikely(stack == -1)) + if (!rpm->available || rpm->no_wakeref_tracking) return; - spin_lock_irqsave(&rpm->debug.lock, flags); - for (n = rpm->debug.count; n--; ) { - if (rpm->debug.owners[n] == stack) { - memmove(rpm->debug.owners + n, - rpm->debug.owners + n + 1, - (--rpm->debug.count - n) * sizeof(stack)); - found = true; - break; - } - } - spin_unlock_irqrestore(&rpm->debug.lock, flags); - - if (drm_WARN(&i915->drm, !found, - "Unmatched wakeref (tracking %lu), count %u\n", - rpm->debug.count, atomic_read(&rpm->wakeref_count))) { - char *buf; - - buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN); - if (!buf) - return; - - stack_depot_snprint(stack, buf, PAGE_SIZE, 2); - DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf); - - stack = READ_ONCE(rpm->debug.last_release); - if (stack) { - stack_depot_snprint(stack, buf, PAGE_SIZE, 2); - DRM_DEBUG_DRIVER("wakeref last released at\n%s", buf); - } - - kfree(buf); - } + intel_ref_tracker_free(&rpm->debug, wakeref); } -static int cmphandle(const void *_a, const void *_b) +static void untrack_all_intel_runtime_pm_wakerefs(struct intel_runtime_pm *rpm) { - const depot_stack_handle_t * const a = _a, * const b = _b; - - if (*a < *b) - return -1; - else if (*a > *b) - return 1; - else - return 0; -} - -static void -__print_intel_runtime_pm_wakeref(struct drm_printer *p, - const struct intel_runtime_pm_debug *dbg) -{ - unsigned long i; - char *buf; - - buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN); - if (!buf) - return; - - if (dbg->last_acquire) { - stack_depot_snprint(dbg->last_acquire, buf, PAGE_SIZE, 2); - drm_printf(p, "Wakeref last acquired:\n%s", buf); - } - - if (dbg->last_release) { - stack_depot_snprint(dbg->last_release, buf, PAGE_SIZE, 2); - drm_printf(p, "Wakeref last released:\n%s", buf); - } - - drm_printf(p, "Wakeref count: %lu\n", dbg->count); - - sort(dbg->owners, dbg->count, sizeof(*dbg->owners), cmphandle, NULL); - - for (i = 0; i < dbg->count; i++) { - depot_stack_handle_t stack = dbg->owners[i]; - unsigned long rep; - - rep = 1; - while (i + 1 < dbg->count && dbg->owners[i + 1] == stack) - rep++, i++; - stack_depot_snprint(stack, buf, PAGE_SIZE, 2); - drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf); - } - - kfree(buf); -} - -static noinline void -__untrack_all_wakerefs(struct intel_runtime_pm_debug *debug, - struct intel_runtime_pm_debug *saved) -{ - *saved = *debug; - - debug->owners = NULL; - debug->count = 0; - debug->last_release = __save_depot_stack(); -} - -static void -dump_and_free_wakeref_tracking(struct intel_runtime_pm_debug *debug) -{ - if (debug->count) { - struct drm_printer p = drm_debug_printer("i915"); - - __print_intel_runtime_pm_wakeref(&p, debug); - } - - kfree(debug->owners); + ref_tracker_dir_exit(&rpm->debug); } static noinline void __intel_wakeref_dec_and_check_tracking(struct intel_runtime_pm *rpm) { - struct intel_runtime_pm_debug dbg = {}; unsigned long flags; if (!atomic_dec_and_lock_irqsave(&rpm->wakeref_count, @@ -235,60 +95,14 @@ __intel_wakeref_dec_and_check_tracking(struct intel_runtime_pm *rpm) flags)) return; - __untrack_all_wakerefs(&rpm->debug, &dbg); + ref_tracker_dir_print_locked(&rpm->debug, INTEL_REFTRACK_PRINT_LIMIT); spin_unlock_irqrestore(&rpm->debug.lock, flags); - - dump_and_free_wakeref_tracking(&dbg); -} - -static noinline void -untrack_all_intel_runtime_pm_wakerefs(struct intel_runtime_pm *rpm) -{ - struct intel_runtime_pm_debug dbg = {}; - unsigned long flags; - - spin_lock_irqsave(&rpm->debug.lock, flags); - __untrack_all_wakerefs(&rpm->debug, &dbg); - spin_unlock_irqrestore(&rpm->debug.lock, flags); - - dump_and_free_wakeref_tracking(&dbg); } void print_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm, struct drm_printer *p) { - struct intel_runtime_pm_debug dbg = {}; - - do { - unsigned long alloc = dbg.count; - depot_stack_handle_t *s; - - spin_lock_irq(&rpm->debug.lock); - dbg.count = rpm->debug.count; - if (dbg.count <= alloc) { - memcpy(dbg.owners, - rpm->debug.owners, - dbg.count * sizeof(*s)); - } - dbg.last_acquire = rpm->debug.last_acquire; - dbg.last_release = rpm->debug.last_release; - spin_unlock_irq(&rpm->debug.lock); - if (dbg.count <= alloc) - break; - - s = krealloc(dbg.owners, - dbg.count * sizeof(*s), - GFP_NOWAIT | __GFP_NOWARN); - if (!s) - goto out; - - dbg.owners = s; - } while (1); - - __print_intel_runtime_pm_wakeref(p, &dbg); - -out: - kfree(dbg.owners); + intel_ref_tracker_show(&rpm->debug, p); } #else @@ -297,14 +111,14 @@ static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm) { } -static depot_stack_handle_t +static intel_wakeref_t track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm) { return -1; } static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm, - intel_wakeref_t wref) + intel_wakeref_t wakeref) { } @@ -349,9 +163,7 @@ intel_runtime_pm_release(struct intel_runtime_pm *rpm, int wakelock) static intel_wakeref_t __intel_runtime_pm_get(struct intel_runtime_pm *rpm, bool wakelock) { - struct drm_i915_private *i915 = container_of(rpm, - struct drm_i915_private, - runtime_pm); + struct drm_i915_private *i915 = rpm_to_i915(rpm); int ret; ret = pm_runtime_get_sync(rpm->kdev); @@ -556,9 +368,7 @@ void intel_runtime_pm_put(struct intel_runtime_pm *rpm, intel_wakeref_t wref) */ void intel_runtime_pm_enable(struct intel_runtime_pm *rpm) { - struct drm_i915_private *i915 = container_of(rpm, - struct drm_i915_private, - runtime_pm); + struct drm_i915_private *i915 = rpm_to_i915(rpm); struct device *kdev = rpm->kdev; /* @@ -611,9 +421,7 @@ void intel_runtime_pm_enable(struct intel_runtime_pm *rpm) void intel_runtime_pm_disable(struct intel_runtime_pm *rpm) { - struct drm_i915_private *i915 = container_of(rpm, - struct drm_i915_private, - runtime_pm); + struct drm_i915_private *i915 = rpm_to_i915(rpm); struct device *kdev = rpm->kdev; /* Transfer rpm ownership back to core */ @@ -628,9 +436,7 @@ void intel_runtime_pm_disable(struct intel_runtime_pm *rpm) void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm) { - struct drm_i915_private *i915 = container_of(rpm, - struct drm_i915_private, - runtime_pm); + struct drm_i915_private *i915 = rpm_to_i915(rpm); int count = atomic_read(&rpm->wakeref_count); intel_wakeref_auto_fini(&rpm->userfault_wakeref); @@ -639,14 +445,17 @@ void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm) "i915 raw-wakerefs=%d wakelocks=%d on cleanup\n", intel_rpm_raw_wakeref_count(count), intel_rpm_wakelock_count(count)); +} +void intel_runtime_pm_driver_last_release(struct intel_runtime_pm *rpm) +{ + intel_runtime_pm_driver_release(rpm); untrack_all_intel_runtime_pm_wakerefs(rpm); } void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm) { - struct drm_i915_private *i915 = - container_of(rpm, struct drm_i915_private, runtime_pm); + struct drm_i915_private *i915 = rpm_to_i915(rpm); struct pci_dev *pdev = to_pci_dev(i915->drm.dev); struct device *kdev = &pdev->dev; diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.h b/drivers/gpu/drm/i915/intel_runtime_pm.h index f79cda7a2503..de3579d399e1 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.h +++ b/drivers/gpu/drm/i915/intel_runtime_pm.h @@ -11,8 +11,6 @@ #include "intel_wakeref.h" -#include "i915_utils.h" - struct device; struct drm_i915_private; struct drm_printer; @@ -77,15 +75,7 @@ struct intel_runtime_pm { * paired rpm_put) we can remove corresponding pairs of and keep * the array trimmed to active wakerefs. */ - struct intel_runtime_pm_debug { - spinlock_t lock; - - depot_stack_handle_t last_acquire; - depot_stack_handle_t last_release; - - depot_stack_handle_t *owners; - unsigned long count; - } debug; + struct ref_tracker_dir debug; #endif }; @@ -189,6 +179,7 @@ void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm); void intel_runtime_pm_enable(struct intel_runtime_pm *rpm); void intel_runtime_pm_disable(struct intel_runtime_pm *rpm); void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm); +void intel_runtime_pm_driver_last_release(struct intel_runtime_pm *rpm); intel_wakeref_t intel_runtime_pm_get(struct intel_runtime_pm *rpm); intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm); diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c index 623a69089386..dea2f63184f8 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.c +++ b/drivers/gpu/drm/i915/intel_wakeref.c @@ -99,7 +99,8 @@ static void __intel_wakeref_put_work(struct work_struct *wrk) void __intel_wakeref_init(struct intel_wakeref *wf, struct drm_i915_private *i915, const struct intel_wakeref_ops *ops, - struct intel_wakeref_lockclass *key) + struct intel_wakeref_lockclass *key, + const char *name) { wf->i915 = i915; wf->ops = ops; @@ -111,6 +112,10 @@ void __intel_wakeref_init(struct intel_wakeref *wf, INIT_DELAYED_WORK(&wf->work, __intel_wakeref_put_work); lockdep_init_map(&wf->work.work.lockdep_map, "wakeref.work", &key->work, 0); + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF) + ref_tracker_dir_init(&wf->debug, INTEL_REFTRACK_DEAD_COUNT, name); +#endif } int intel_wakeref_wait_for_idle(struct intel_wakeref *wf) @@ -191,3 +196,31 @@ void intel_wakeref_auto_fini(struct intel_wakeref_auto *wf) intel_wakeref_auto(wf, 0); INTEL_WAKEREF_BUG_ON(wf->wakeref); } + +void intel_ref_tracker_show(struct ref_tracker_dir *dir, + struct drm_printer *p) +{ + const size_t buf_size = PAGE_SIZE; + char *buf, *sb, *se; + size_t count; + + buf = kmalloc(buf_size, GFP_NOWAIT); + if (!buf) + return; + + count = ref_tracker_dir_snprint(dir, buf, buf_size); + if (!count) + goto free; + /* printk does not like big buffers, so we split it */ + for (sb = buf; *sb; sb = se + 1) { + se = strchrnul(sb, '\n'); + drm_printf(p, "%.*s", (int)(se - sb + 1), sb); + if (!*se) + break; + } + if (count >= buf_size) + drm_printf(p, "\n...dropped %zd extra bytes of leak report.\n", + count + 1 - buf_size); +free: + kfree(buf); +} diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h index ec881b097368..68aa3be48251 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.h +++ b/drivers/gpu/drm/i915/intel_wakeref.h @@ -7,16 +7,25 @@ #ifndef INTEL_WAKEREF_H #define INTEL_WAKEREF_H +#include <drm/drm_print.h> + #include <linux/atomic.h> #include <linux/bitfield.h> #include <linux/bits.h> #include <linux/lockdep.h> #include <linux/mutex.h> #include <linux/refcount.h> +#include <linux/ref_tracker.h> +#include <linux/slab.h> #include <linux/stackdepot.h> #include <linux/timer.h> #include <linux/workqueue.h> +typedef unsigned long intel_wakeref_t; + +#define INTEL_REFTRACK_DEAD_COUNT 16 +#define INTEL_REFTRACK_PRINT_LIMIT 16 + #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) #define INTEL_WAKEREF_BUG_ON(expr) BUG_ON(expr) #else @@ -26,8 +35,6 @@ struct intel_runtime_pm; struct intel_wakeref; -typedef depot_stack_handle_t intel_wakeref_t; - struct intel_wakeref_ops { int (*get)(struct intel_wakeref *wf); int (*put)(struct intel_wakeref *wf); @@ -43,6 +50,10 @@ struct intel_wakeref { const struct intel_wakeref_ops *ops; struct delayed_work work; + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF) + struct ref_tracker_dir debug; +#endif }; struct intel_wakeref_lockclass { @@ -53,11 +64,12 @@ struct intel_wakeref_lockclass { void __intel_wakeref_init(struct intel_wakeref *wf, struct drm_i915_private *i915, const struct intel_wakeref_ops *ops, - struct intel_wakeref_lockclass *key); -#define intel_wakeref_init(wf, i915, ops) do { \ + struct intel_wakeref_lockclass *key, + const char *name); +#define intel_wakeref_init(wf, i915, ops, name) do { \ static struct intel_wakeref_lockclass __key; \ \ - __intel_wakeref_init((wf), (i915), (ops), &__key); \ + __intel_wakeref_init((wf), (i915), (ops), &__key, name); \ } while (0) int __intel_wakeref_get_first(struct intel_wakeref *wf); @@ -261,6 +273,57 @@ __intel_wakeref_defer_park(struct intel_wakeref *wf) */ int intel_wakeref_wait_for_idle(struct intel_wakeref *wf); +#define INTEL_WAKEREF_DEF ((intel_wakeref_t)(-1)) + +static inline intel_wakeref_t intel_ref_tracker_alloc(struct ref_tracker_dir *dir) +{ + struct ref_tracker *user = NULL; + + ref_tracker_alloc(dir, &user, GFP_NOWAIT); + + return (intel_wakeref_t)user ?: INTEL_WAKEREF_DEF; +} + +static inline void intel_ref_tracker_free(struct ref_tracker_dir *dir, + intel_wakeref_t handle) +{ + struct ref_tracker *user; + + user = (handle == INTEL_WAKEREF_DEF) ? NULL : (void *)handle; + + ref_tracker_free(dir, &user); +} + +void intel_ref_tracker_show(struct ref_tracker_dir *dir, + struct drm_printer *p); + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF) + +static inline intel_wakeref_t intel_wakeref_track(struct intel_wakeref *wf) +{ + return intel_ref_tracker_alloc(&wf->debug); +} + +static inline void intel_wakeref_untrack(struct intel_wakeref *wf, + intel_wakeref_t handle) +{ + intel_ref_tracker_free(&wf->debug, handle); +} + +#else + +static inline intel_wakeref_t intel_wakeref_track(struct intel_wakeref *wf) +{ + return -1; +} + +static inline void intel_wakeref_untrack(struct intel_wakeref *wf, + intel_wakeref_t handle) +{ +} + +#endif + struct intel_wakeref_auto { struct drm_i915_private *i915; struct timer_list timer; diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index dc327cf40b5a..75278e78ca90 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -199,6 +199,9 @@ int intel_pxp_init(struct drm_i915_private *i915) struct intel_gt *gt; bool is_full_feature = false; + if (intel_gt_is_wedged(to_gt(i915))) + return -ENOTCONN; + /* * NOTE: Get the ctrl_gt before checking intel_pxp_is_supported since * we still need it if PXP's backend tee transport is needed. @@ -303,6 +306,8 @@ static int __pxp_global_teardown_final(struct intel_pxp *pxp) if (!pxp->arb_is_valid) return 0; + + drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: teardown for suspend/fini"); /* * To ensure synchronous and coherent session teardown completion * in response to suspend or shutdown triggers, don't use a worker. @@ -324,6 +329,8 @@ static int __pxp_global_teardown_restart(struct intel_pxp *pxp) if (pxp->arb_is_valid) return 0; + + drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: teardown for restart"); /* * The arb-session is currently inactive and we are doing a reset and restart * due to a runtime event. Use the worker that was designed for this. @@ -332,8 +339,11 @@ static int __pxp_global_teardown_restart(struct intel_pxp *pxp) timeout = intel_pxp_get_backend_timeout_ms(pxp); - if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(timeout))) + if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(timeout))) { + drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: restart backend timed out (%d ms)", + timeout); return -ETIMEDOUT; + } return 0; } @@ -414,10 +424,12 @@ int intel_pxp_start(struct intel_pxp *pxp) int ret = 0; ret = intel_pxp_get_readiness_status(pxp, PXP_READINESS_TIMEOUT); - if (ret < 0) + if (ret < 0) { + drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: tried but not-avail (%d)", ret); return ret; - else if (ret > 1) + } else if (ret > 1) { return -EIO; /* per UAPI spec, user may retry later */ + } mutex_lock(&pxp->arb_mutex); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c index 91e9622c07d0..d81750b9bdda 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c @@ -40,11 +40,12 @@ void intel_pxp_irq_handler(struct intel_pxp *pxp, u16 iir) GEN12_DISPLAY_APP_TERMINATED_PER_FW_REQ_INTERRUPT)) { /* immediately mark PXP as inactive on termination */ intel_pxp_mark_termination_in_progress(pxp); - pxp->session_events |= PXP_TERMINATION_REQUEST | PXP_INVAL_REQUIRED; + pxp->session_events |= PXP_TERMINATION_REQUEST | PXP_INVAL_REQUIRED | + PXP_EVENT_TYPE_IRQ; } if (iir & GEN12_DISPLAY_STATE_RESET_COMPLETE_INTERRUPT) - pxp->session_events |= PXP_TERMINATION_COMPLETE; + pxp->session_events |= PXP_TERMINATION_COMPLETE | PXP_EVENT_TYPE_IRQ; if (pxp->session_events) queue_work(system_unbound_wq, &pxp->session_work); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c index 0a3e66b0265e..091c86e03d1a 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c @@ -137,8 +137,10 @@ void intel_pxp_terminate(struct intel_pxp *pxp, bool post_invalidation_needs_res static void pxp_terminate_complete(struct intel_pxp *pxp) { /* Re-create the arb session after teardown handle complete */ - if (fetch_and_zero(&pxp->hw_state_invalidated)) + if (fetch_and_zero(&pxp->hw_state_invalidated)) { + drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: creating arb_session after invalidation"); pxp_create_arb_session(pxp); + } complete_all(&pxp->termination); } @@ -157,6 +159,8 @@ static void pxp_session_work(struct work_struct *work) if (!events) return; + drm_dbg(>->i915->drm, "PXP: processing event-flags 0x%08x", events); + if (events & PXP_INVAL_REQUIRED) intel_pxp_invalidate(pxp); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h index 7e11fa8034b2..07864b584cf4 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h @@ -124,6 +124,7 @@ struct intel_pxp { #define PXP_TERMINATION_REQUEST BIT(0) #define PXP_TERMINATION_COMPLETE BIT(1) #define PXP_INVAL_REQUIRED BIT(2) +#define PXP_EVENT_TYPE_IRQ BIT(3) }; #endif /* __INTEL_PXP_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_syncmap.c b/drivers/gpu/drm/i915/selftests/i915_syncmap.c index 47f4ae18a1ef..88fa845e9f4a 100644 --- a/drivers/gpu/drm/i915/selftests/i915_syncmap.c +++ b/drivers/gpu/drm/i915/selftests/i915_syncmap.c @@ -77,7 +77,7 @@ __sync_print(struct i915_syncmap *p, for_each_set_bit(i, (unsigned long *)&p->bitmap, KSYNCMAP) { buf = __sync_print(__sync_child(p)[i], buf, sz, depth + 1, - last << 1 | !!(p->bitmap >> (i + 1)), + last << 1 | ((p->bitmap >> (i + 1)) ? 1 : 0), i); } } diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 03ea75cd84dd..4f98aa8a861e 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -24,6 +24,8 @@ #include "../i915_selftest.h" +#include "gt/intel_gt.h" + static int intel_fw_table_check(const struct intel_forcewake_range *ranges, unsigned int num_ranges, bool is_watertight) diff --git a/drivers/gpu/drm/i915/soc/intel_gmch.c b/drivers/gpu/drm/i915/soc/intel_gmch.c index f32e9f78770a..40874ebfb64c 100644 --- a/drivers/gpu/drm/i915/soc/intel_gmch.c +++ b/drivers/gpu/drm/i915/soc/intel_gmch.c @@ -33,18 +33,22 @@ int intel_gmch_bridge_setup(struct drm_i915_private *i915) i915->gmch.pdev); } +static int mchbar_reg(struct drm_i915_private *i915) +{ + return GRAPHICS_VER(i915) >= 4 ? MCHBAR_I965 : MCHBAR_I915; +} + /* Allocate space for the MCH regs if needed, return nonzero on error */ static int intel_alloc_mchbar_resource(struct drm_i915_private *i915) { - int reg = GRAPHICS_VER(i915) >= 4 ? MCHBAR_I965 : MCHBAR_I915; u32 temp_lo, temp_hi = 0; u64 mchbar_addr; int ret; if (GRAPHICS_VER(i915) >= 4) - pci_read_config_dword(i915->gmch.pdev, reg + 4, &temp_hi); - pci_read_config_dword(i915->gmch.pdev, reg, &temp_lo); + pci_read_config_dword(i915->gmch.pdev, mchbar_reg(i915) + 4, &temp_hi); + pci_read_config_dword(i915->gmch.pdev, mchbar_reg(i915), &temp_lo); mchbar_addr = ((u64)temp_hi << 32) | temp_lo; /* If ACPI doesn't have it, assume we need to allocate it ourselves */ @@ -68,10 +72,10 @@ intel_alloc_mchbar_resource(struct drm_i915_private *i915) } if (GRAPHICS_VER(i915) >= 4) - pci_write_config_dword(i915->gmch.pdev, reg + 4, + pci_write_config_dword(i915->gmch.pdev, mchbar_reg(i915) + 4, upper_32_bits(i915->gmch.mch_res.start)); - pci_write_config_dword(i915->gmch.pdev, reg, + pci_write_config_dword(i915->gmch.pdev, mchbar_reg(i915), lower_32_bits(i915->gmch.mch_res.start)); return 0; } @@ -79,7 +83,6 @@ intel_alloc_mchbar_resource(struct drm_i915_private *i915) /* Setup MCHBAR if possible, return true if we should disable it again */ void intel_gmch_bar_setup(struct drm_i915_private *i915) { - int mchbar_reg = GRAPHICS_VER(i915) >= 4 ? MCHBAR_I965 : MCHBAR_I915; u32 temp; bool enabled; @@ -92,7 +95,7 @@ void intel_gmch_bar_setup(struct drm_i915_private *i915) pci_read_config_dword(i915->gmch.pdev, DEVEN, &temp); enabled = !!(temp & DEVEN_MCHBAR_EN); } else { - pci_read_config_dword(i915->gmch.pdev, mchbar_reg, &temp); + pci_read_config_dword(i915->gmch.pdev, mchbar_reg(i915), &temp); enabled = temp & 1; } @@ -110,15 +113,13 @@ void intel_gmch_bar_setup(struct drm_i915_private *i915) pci_write_config_dword(i915->gmch.pdev, DEVEN, temp | DEVEN_MCHBAR_EN); } else { - pci_read_config_dword(i915->gmch.pdev, mchbar_reg, &temp); - pci_write_config_dword(i915->gmch.pdev, mchbar_reg, temp | 1); + pci_read_config_dword(i915->gmch.pdev, mchbar_reg(i915), &temp); + pci_write_config_dword(i915->gmch.pdev, mchbar_reg(i915), temp | 1); } } void intel_gmch_bar_teardown(struct drm_i915_private *i915) { - int mchbar_reg = GRAPHICS_VER(i915) >= 4 ? MCHBAR_I965 : MCHBAR_I915; - if (i915->gmch.mchbar_need_disable) { if (IS_I915G(i915) || IS_I915GM(i915)) { u32 deven_val; @@ -131,10 +132,10 @@ void intel_gmch_bar_teardown(struct drm_i915_private *i915) } else { u32 mchbar_val; - pci_read_config_dword(i915->gmch.pdev, mchbar_reg, + pci_read_config_dword(i915->gmch.pdev, mchbar_reg(i915), &mchbar_val); mchbar_val &= ~1; - pci_write_config_dword(i915->gmch.pdev, mchbar_reg, + pci_write_config_dword(i915->gmch.pdev, mchbar_reg(i915), mchbar_val); } } diff --git a/drivers/gpu/drm/i915/vlv_sideband.c b/drivers/gpu/drm/i915/vlv_sideband.c index b98dec3ad817..ffa195560d0d 100644 --- a/drivers/gpu/drm/i915/vlv_sideband.c +++ b/drivers/gpu/drm/i915/vlv_sideband.c @@ -166,23 +166,6 @@ u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr) return val; } -u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg) -{ - u32 val = 0; - - vlv_sideband_rw(i915, PCI_DEVFN(0, 0), port, - SB_CRRDDA_NP, reg, &val); - - return val; -} - -void vlv_iosf_sb_write(struct drm_i915_private *i915, - u8 port, u32 reg, u32 val) -{ - vlv_sideband_rw(i915, PCI_DEVFN(0, 0), port, - SB_CRWRDA_NP, reg, &val); -} - u32 vlv_cck_read(struct drm_i915_private *i915, u32 reg) { u32 val = 0; @@ -227,9 +210,9 @@ static u32 vlv_dpio_phy_iosf_port(struct drm_i915_private *i915, enum dpio_phy p return IOSF_PORT_DPIO; } -u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg) +u32 vlv_dpio_read(struct drm_i915_private *i915, enum dpio_phy phy, int reg) { - u32 port = vlv_dpio_phy_iosf_port(i915, DPIO_PHY(pipe)); + u32 port = vlv_dpio_phy_iosf_port(i915, phy); u32 val = 0; vlv_sideband_rw(i915, DPIO_DEVFN, port, SB_MRD_NP, reg, &val); @@ -239,16 +222,16 @@ u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg) * so ideally we should check the register offset instead... */ drm_WARN(&i915->drm, val == 0xffffffff, - "DPIO read pipe %c reg 0x%x == 0x%x\n", - pipe_name(pipe), reg, val); + "DPIO PHY%d read reg 0x%x == 0x%x\n", + phy, reg, val); return val; } void vlv_dpio_write(struct drm_i915_private *i915, - enum pipe pipe, int reg, u32 val) + enum dpio_phy phy, int reg, u32 val) { - u32 port = vlv_dpio_phy_iosf_port(i915, DPIO_PHY(pipe)); + u32 port = vlv_dpio_phy_iosf_port(i915, phy); vlv_sideband_rw(i915, DPIO_DEVFN, port, SB_MWR_NP, reg, &val); } diff --git a/drivers/gpu/drm/i915/vlv_sideband.h b/drivers/gpu/drm/i915/vlv_sideband.h index 9ce283d96b80..c20cf41b2d39 100644 --- a/drivers/gpu/drm/i915/vlv_sideband.h +++ b/drivers/gpu/drm/i915/vlv_sideband.h @@ -11,7 +11,7 @@ #include "vlv_sideband_reg.h" -enum pipe; +enum dpio_phy; struct drm_i915_private; enum { @@ -26,9 +26,6 @@ enum { }; void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports); -u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg); -void vlv_iosf_sb_write(struct drm_i915_private *i915, - u8 port, u32 reg, u32 val); void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports); static inline void vlv_bunit_get(struct drm_i915_private *i915) @@ -75,9 +72,9 @@ static inline void vlv_dpio_get(struct drm_i915_private *i915) vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_DPIO)); } -u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg); +u32 vlv_dpio_read(struct drm_i915_private *i915, enum dpio_phy phy, int reg); void vlv_dpio_write(struct drm_i915_private *i915, - enum pipe pipe, int reg, u32 val); + enum dpio_phy phy, int reg, u32 val); static inline void vlv_dpio_put(struct drm_i915_private *i915) { diff --git a/drivers/gpu/drm/imagination/Kconfig b/drivers/gpu/drm/imagination/Kconfig new file mode 100644 index 000000000000..3bfa2ac212dc --- /dev/null +++ b/drivers/gpu/drm/imagination/Kconfig @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (c) 2023 Imagination Technologies Ltd. + +config DRM_POWERVR + tristate "Imagination Technologies PowerVR (Series 6 and later) & IMG Graphics" + depends on ARM64 + depends on DRM + depends on PM + select DRM_EXEC + select DRM_GEM_SHMEM_HELPER + select DRM_SCHED + select DRM_GPUVM + select FW_LOADER + help + Choose this option if you have a system that has an Imagination + Technologies PowerVR (Series 6 or later) or IMG GPU. + + If "M" is selected, the module will be called powervr. diff --git a/drivers/gpu/drm/imagination/Makefile b/drivers/gpu/drm/imagination/Makefile new file mode 100644 index 000000000000..ec6db8e9b403 --- /dev/null +++ b/drivers/gpu/drm/imagination/Makefile @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (c) 2023 Imagination Technologies Ltd. + +subdir-ccflags-y := -I$(srctree)/$(src) + +powervr-y := \ + pvr_ccb.o \ + pvr_cccb.o \ + pvr_context.o \ + pvr_device.o \ + pvr_device_info.o \ + pvr_drv.o \ + pvr_free_list.o \ + pvr_fw.o \ + pvr_fw_meta.o \ + pvr_fw_mips.o \ + pvr_fw_startstop.o \ + pvr_fw_trace.o \ + pvr_gem.o \ + pvr_hwrt.o \ + pvr_job.o \ + pvr_mmu.o \ + pvr_params.o \ + pvr_power.o \ + pvr_queue.o \ + pvr_stream.o \ + pvr_stream_defs.o \ + pvr_sync.o \ + pvr_vm.o \ + pvr_vm_mips.o + +powervr-$(CONFIG_DEBUG_FS) += \ + pvr_debugfs.o + +obj-$(CONFIG_DRM_POWERVR) += powervr.o diff --git a/drivers/gpu/drm/imagination/pvr_ccb.c b/drivers/gpu/drm/imagination/pvr_ccb.c new file mode 100644 index 000000000000..4deeac7ed40a --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_ccb.c @@ -0,0 +1,645 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_ccb.h" +#include "pvr_device.h" +#include "pvr_drv.h" +#include "pvr_free_list.h" +#include "pvr_fw.h" +#include "pvr_gem.h" +#include "pvr_power.h" + +#include <drm/drm_managed.h> +#include <linux/compiler.h> +#include <linux/delay.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/mutex.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +#define RESERVE_SLOT_TIMEOUT (1 * HZ) /* 1s */ +#define RESERVE_SLOT_MIN_RETRIES 10 + +static void +ccb_ctrl_init(void *cpu_ptr, void *priv) +{ + struct rogue_fwif_ccb_ctl *ctrl = cpu_ptr; + struct pvr_ccb *pvr_ccb = priv; + + ctrl->write_offset = 0; + ctrl->read_offset = 0; + ctrl->wrap_mask = pvr_ccb->num_cmds - 1; + ctrl->cmd_size = pvr_ccb->cmd_size; +} + +/** + * pvr_ccb_init() - Initialise a CCB + * @pvr_dev: Device pointer. + * @pvr_ccb: Pointer to CCB structure to initialise. + * @num_cmds_log2: Log2 of number of commands in this CCB. + * @cmd_size: Command size for this CCB. + * + * Return: + * * Zero on success, or + * * Any error code returned by pvr_fw_object_create_and_map(). + */ +static int +pvr_ccb_init(struct pvr_device *pvr_dev, struct pvr_ccb *pvr_ccb, + u32 num_cmds_log2, size_t cmd_size) +{ + u32 num_cmds = 1 << num_cmds_log2; + u32 ccb_size = num_cmds * cmd_size; + int err; + + pvr_ccb->num_cmds = num_cmds; + pvr_ccb->cmd_size = cmd_size; + + err = drmm_mutex_init(from_pvr_device(pvr_dev), &pvr_ccb->lock); + if (err) + return err; + + /* + * Map CCB and control structure as uncached, so we don't have to flush + * CPU cache repeatedly when polling for space. + */ + pvr_ccb->ctrl = pvr_fw_object_create_and_map(pvr_dev, sizeof(*pvr_ccb->ctrl), + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + ccb_ctrl_init, pvr_ccb, &pvr_ccb->ctrl_obj); + if (IS_ERR(pvr_ccb->ctrl)) + return PTR_ERR(pvr_ccb->ctrl); + + pvr_ccb->ccb = pvr_fw_object_create_and_map(pvr_dev, ccb_size, + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + NULL, NULL, &pvr_ccb->ccb_obj); + if (IS_ERR(pvr_ccb->ccb)) { + err = PTR_ERR(pvr_ccb->ccb); + goto err_free_ctrl; + } + + pvr_fw_object_get_fw_addr(pvr_ccb->ctrl_obj, &pvr_ccb->ctrl_fw_addr); + pvr_fw_object_get_fw_addr(pvr_ccb->ccb_obj, &pvr_ccb->ccb_fw_addr); + + WRITE_ONCE(pvr_ccb->ctrl->write_offset, 0); + WRITE_ONCE(pvr_ccb->ctrl->read_offset, 0); + WRITE_ONCE(pvr_ccb->ctrl->wrap_mask, num_cmds - 1); + WRITE_ONCE(pvr_ccb->ctrl->cmd_size, cmd_size); + + return 0; + +err_free_ctrl: + pvr_fw_object_unmap_and_destroy(pvr_ccb->ctrl_obj); + + return err; +} + +/** + * pvr_ccb_fini() - Release CCB structure + * @pvr_ccb: CCB to release. + */ +void +pvr_ccb_fini(struct pvr_ccb *pvr_ccb) +{ + pvr_fw_object_unmap_and_destroy(pvr_ccb->ccb_obj); + pvr_fw_object_unmap_and_destroy(pvr_ccb->ctrl_obj); +} + +/** + * pvr_ccb_slot_available_locked() - Test whether any slots are available in CCB + * @pvr_ccb: CCB to test. + * @write_offset: Address to store number of next available slot. May be %NULL. + * + * Caller must hold @pvr_ccb->lock. + * + * Return: + * * %true if a slot is available, or + * * %false if no slot is available. + */ +static __always_inline bool +pvr_ccb_slot_available_locked(struct pvr_ccb *pvr_ccb, u32 *write_offset) +{ + struct rogue_fwif_ccb_ctl *ctrl = pvr_ccb->ctrl; + u32 next_write_offset = (READ_ONCE(ctrl->write_offset) + 1) & READ_ONCE(ctrl->wrap_mask); + + lockdep_assert_held(&pvr_ccb->lock); + + if (READ_ONCE(ctrl->read_offset) != next_write_offset) { + if (write_offset) + *write_offset = next_write_offset; + return true; + } + + return false; +} + +static void +process_fwccb_command(struct pvr_device *pvr_dev, struct rogue_fwif_fwccb_cmd *cmd) +{ + switch (cmd->cmd_type) { + case ROGUE_FWIF_FWCCB_CMD_REQUEST_GPU_RESTART: + pvr_power_reset(pvr_dev, false); + break; + + case ROGUE_FWIF_FWCCB_CMD_FREELISTS_RECONSTRUCTION: + pvr_free_list_process_reconstruct_req(pvr_dev, + &cmd->cmd_data.cmd_freelists_reconstruction); + break; + + case ROGUE_FWIF_FWCCB_CMD_FREELIST_GROW: + pvr_free_list_process_grow_req(pvr_dev, &cmd->cmd_data.cmd_free_list_gs); + break; + + default: + drm_info(from_pvr_device(pvr_dev), "Received unknown FWCCB command %x\n", + cmd->cmd_type); + break; + } +} + +/** + * pvr_fwccb_process() - Process any pending FWCCB commands + * @pvr_dev: Target PowerVR device + */ +void pvr_fwccb_process(struct pvr_device *pvr_dev) +{ + struct rogue_fwif_fwccb_cmd *fwccb = pvr_dev->fwccb.ccb; + struct rogue_fwif_ccb_ctl *ctrl = pvr_dev->fwccb.ctrl; + u32 read_offset; + + mutex_lock(&pvr_dev->fwccb.lock); + + while ((read_offset = READ_ONCE(ctrl->read_offset)) != READ_ONCE(ctrl->write_offset)) { + struct rogue_fwif_fwccb_cmd cmd = fwccb[read_offset]; + + WRITE_ONCE(ctrl->read_offset, (read_offset + 1) & READ_ONCE(ctrl->wrap_mask)); + + /* Drop FWCCB lock while we process command. */ + mutex_unlock(&pvr_dev->fwccb.lock); + + process_fwccb_command(pvr_dev, &cmd); + + mutex_lock(&pvr_dev->fwccb.lock); + } + + mutex_unlock(&pvr_dev->fwccb.lock); +} + +/** + * pvr_kccb_capacity() - Returns the maximum number of usable KCCB slots. + * @pvr_dev: Target PowerVR device + * + * Return: + * * The maximum number of active slots. + */ +static u32 pvr_kccb_capacity(struct pvr_device *pvr_dev) +{ + /* Capacity is the number of slot minus one to cope with the wrapping + * mechanisms. If we were to use all slots, we might end up with + * read_offset == write_offset, which the FW considers as a KCCB-is-empty + * condition. + */ + return pvr_dev->kccb.slot_count - 1; +} + +/** + * pvr_kccb_used_slot_count_locked() - Get the number of used slots + * @pvr_dev: Device pointer. + * + * KCCB lock must be held. + * + * Return: + * * The number of slots currently used. + */ +static u32 +pvr_kccb_used_slot_count_locked(struct pvr_device *pvr_dev) +{ + struct pvr_ccb *pvr_ccb = &pvr_dev->kccb.ccb; + struct rogue_fwif_ccb_ctl *ctrl = pvr_ccb->ctrl; + u32 wr_offset = READ_ONCE(ctrl->write_offset); + u32 rd_offset = READ_ONCE(ctrl->read_offset); + u32 used_count; + + lockdep_assert_held(&pvr_ccb->lock); + + if (wr_offset >= rd_offset) + used_count = wr_offset - rd_offset; + else + used_count = wr_offset + pvr_dev->kccb.slot_count - rd_offset; + + return used_count; +} + +/** + * pvr_kccb_send_cmd_reserved_powered() - Send command to the KCCB, with the PM ref + * held and a slot pre-reserved + * @pvr_dev: Device pointer. + * @cmd: Command to sent. + * @kccb_slot: Address to store the KCCB slot for this command. May be %NULL. + */ +void +pvr_kccb_send_cmd_reserved_powered(struct pvr_device *pvr_dev, + struct rogue_fwif_kccb_cmd *cmd, + u32 *kccb_slot) +{ + struct pvr_ccb *pvr_ccb = &pvr_dev->kccb.ccb; + struct rogue_fwif_kccb_cmd *kccb = pvr_ccb->ccb; + struct rogue_fwif_ccb_ctl *ctrl = pvr_ccb->ctrl; + u32 old_write_offset; + u32 new_write_offset; + + WARN_ON(pvr_dev->lost); + + mutex_lock(&pvr_ccb->lock); + + if (WARN_ON(!pvr_dev->kccb.reserved_count)) + goto out_unlock; + + old_write_offset = READ_ONCE(ctrl->write_offset); + + /* We reserved the slot, we should have one available. */ + if (WARN_ON(!pvr_ccb_slot_available_locked(pvr_ccb, &new_write_offset))) + goto out_unlock; + + memcpy(&kccb[old_write_offset], cmd, + sizeof(struct rogue_fwif_kccb_cmd)); + if (kccb_slot) { + *kccb_slot = old_write_offset; + /* Clear return status for this slot. */ + WRITE_ONCE(pvr_dev->kccb.rtn[old_write_offset], + ROGUE_FWIF_KCCB_RTN_SLOT_NO_RESPONSE); + } + mb(); /* memory barrier */ + WRITE_ONCE(ctrl->write_offset, new_write_offset); + pvr_dev->kccb.reserved_count--; + + /* Kick MTS */ + pvr_fw_mts_schedule(pvr_dev, + PVR_FWIF_DM_GP & ~ROGUE_CR_MTS_SCHEDULE_DM_CLRMSK); + +out_unlock: + mutex_unlock(&pvr_ccb->lock); +} + +/** + * pvr_kccb_try_reserve_slot() - Try to reserve a KCCB slot + * @pvr_dev: Device pointer. + * + * Return: + * * true if a KCCB slot was reserved, or + * * false otherwise. + */ +static bool pvr_kccb_try_reserve_slot(struct pvr_device *pvr_dev) +{ + bool reserved = false; + u32 used_count; + + mutex_lock(&pvr_dev->kccb.ccb.lock); + + used_count = pvr_kccb_used_slot_count_locked(pvr_dev); + if (pvr_dev->kccb.reserved_count < pvr_kccb_capacity(pvr_dev) - used_count) { + pvr_dev->kccb.reserved_count++; + reserved = true; + } + + mutex_unlock(&pvr_dev->kccb.ccb.lock); + + return reserved; +} + +/** + * pvr_kccb_reserve_slot_sync() - Try to reserve a slot synchronously + * @pvr_dev: Device pointer. + * + * Return: + * * 0 on success, or + * * -EBUSY if no slots were reserved after %RESERVE_SLOT_TIMEOUT, with a minimum of + * %RESERVE_SLOT_MIN_RETRIES retries. + */ +static int pvr_kccb_reserve_slot_sync(struct pvr_device *pvr_dev) +{ + unsigned long start_timestamp = jiffies; + bool reserved = false; + u32 retries = 0; + + while ((jiffies - start_timestamp) < (u32)RESERVE_SLOT_TIMEOUT || + retries < RESERVE_SLOT_MIN_RETRIES) { + reserved = pvr_kccb_try_reserve_slot(pvr_dev); + if (reserved) + break; + + usleep_range(1, 50); + + if (retries < U32_MAX) + retries++; + } + + return reserved ? 0 : -EBUSY; +} + +/** + * pvr_kccb_send_cmd_powered() - Send command to the KCCB, with a PM ref held + * @pvr_dev: Device pointer. + * @cmd: Command to sent. + * @kccb_slot: Address to store the KCCB slot for this command. May be %NULL. + * + * Returns: + * * Zero on success, or + * * -EBUSY if timeout while waiting for a free KCCB slot. + */ +int +pvr_kccb_send_cmd_powered(struct pvr_device *pvr_dev, struct rogue_fwif_kccb_cmd *cmd, + u32 *kccb_slot) +{ + int err; + + err = pvr_kccb_reserve_slot_sync(pvr_dev); + if (err) + return err; + + pvr_kccb_send_cmd_reserved_powered(pvr_dev, cmd, kccb_slot); + return 0; +} + +/** + * pvr_kccb_send_cmd() - Send command to the KCCB + * @pvr_dev: Device pointer. + * @cmd: Command to sent. + * @kccb_slot: Address to store the KCCB slot for this command. May be %NULL. + * + * Returns: + * * Zero on success, or + * * -EBUSY if timeout while waiting for a free KCCB slot. + */ +int +pvr_kccb_send_cmd(struct pvr_device *pvr_dev, struct rogue_fwif_kccb_cmd *cmd, + u32 *kccb_slot) +{ + int err; + + err = pvr_power_get(pvr_dev); + if (err) + return err; + + err = pvr_kccb_send_cmd_powered(pvr_dev, cmd, kccb_slot); + + pvr_power_put(pvr_dev); + + return err; +} + +/** + * pvr_kccb_wait_for_completion() - Wait for a KCCB command to complete + * @pvr_dev: Device pointer. + * @slot_nr: KCCB slot to wait on. + * @timeout: Timeout length (in jiffies). + * @rtn_out: Location to store KCCB command result. May be %NULL. + * + * Returns: + * * Zero on success, or + * * -ETIMEDOUT on timeout. + */ +int +pvr_kccb_wait_for_completion(struct pvr_device *pvr_dev, u32 slot_nr, + u32 timeout, u32 *rtn_out) +{ + int ret = wait_event_timeout(pvr_dev->kccb.rtn_q, READ_ONCE(pvr_dev->kccb.rtn[slot_nr]) & + ROGUE_FWIF_KCCB_RTN_SLOT_CMD_EXECUTED, timeout); + + if (ret && rtn_out) + *rtn_out = READ_ONCE(pvr_dev->kccb.rtn[slot_nr]); + + return ret ? 0 : -ETIMEDOUT; +} + +/** + * pvr_kccb_is_idle() - Returns whether the device's KCCB is idle + * @pvr_dev: Device pointer + * + * Returns: + * * %true if the KCCB is idle (contains no commands), or + * * %false if the KCCB contains pending commands. + */ +bool +pvr_kccb_is_idle(struct pvr_device *pvr_dev) +{ + struct rogue_fwif_ccb_ctl *ctrl = pvr_dev->kccb.ccb.ctrl; + bool idle; + + mutex_lock(&pvr_dev->kccb.ccb.lock); + + idle = (READ_ONCE(ctrl->write_offset) == READ_ONCE(ctrl->read_offset)); + + mutex_unlock(&pvr_dev->kccb.ccb.lock); + + return idle; +} + +static const char * +pvr_kccb_fence_get_driver_name(struct dma_fence *f) +{ + return PVR_DRIVER_NAME; +} + +static const char * +pvr_kccb_fence_get_timeline_name(struct dma_fence *f) +{ + return "kccb"; +} + +static const struct dma_fence_ops pvr_kccb_fence_ops = { + .get_driver_name = pvr_kccb_fence_get_driver_name, + .get_timeline_name = pvr_kccb_fence_get_timeline_name, +}; + +/** + * struct pvr_kccb_fence - Fence object used to wait for a KCCB slot + */ +struct pvr_kccb_fence { + /** @base: Base dma_fence object. */ + struct dma_fence base; + + /** @node: Node used to insert the fence in the pvr_device::kccb::waiters list. */ + struct list_head node; +}; + +/** + * pvr_kccb_wake_up_waiters() - Check the KCCB waiters + * @pvr_dev: Target PowerVR device + * + * Signal as many KCCB fences as we have slots available. + */ +void pvr_kccb_wake_up_waiters(struct pvr_device *pvr_dev) +{ + struct pvr_kccb_fence *fence, *tmp_fence; + u32 used_count, available_count; + + /* Wake up those waiting for KCCB slot execution. */ + wake_up_all(&pvr_dev->kccb.rtn_q); + + /* Then iterate over all KCCB fences and signal as many as we can. */ + mutex_lock(&pvr_dev->kccb.ccb.lock); + used_count = pvr_kccb_used_slot_count_locked(pvr_dev); + + if (WARN_ON(used_count + pvr_dev->kccb.reserved_count > pvr_kccb_capacity(pvr_dev))) + goto out_unlock; + + available_count = pvr_kccb_capacity(pvr_dev) - used_count - pvr_dev->kccb.reserved_count; + list_for_each_entry_safe(fence, tmp_fence, &pvr_dev->kccb.waiters, node) { + if (!available_count) + break; + + list_del(&fence->node); + pvr_dev->kccb.reserved_count++; + available_count--; + dma_fence_signal(&fence->base); + dma_fence_put(&fence->base); + } + +out_unlock: + mutex_unlock(&pvr_dev->kccb.ccb.lock); +} + +/** + * pvr_kccb_fini() - Cleanup device KCCB + * @pvr_dev: Target PowerVR device + */ +void pvr_kccb_fini(struct pvr_device *pvr_dev) +{ + pvr_ccb_fini(&pvr_dev->kccb.ccb); + WARN_ON(!list_empty(&pvr_dev->kccb.waiters)); + WARN_ON(pvr_dev->kccb.reserved_count); +} + +/** + * pvr_kccb_init() - Initialise device KCCB + * @pvr_dev: Target PowerVR device + * + * Returns: + * * 0 on success, or + * * Any error returned by pvr_ccb_init(). + */ +int +pvr_kccb_init(struct pvr_device *pvr_dev) +{ + pvr_dev->kccb.slot_count = 1 << ROGUE_FWIF_KCCB_NUMCMDS_LOG2_DEFAULT; + INIT_LIST_HEAD(&pvr_dev->kccb.waiters); + pvr_dev->kccb.fence_ctx.id = dma_fence_context_alloc(1); + spin_lock_init(&pvr_dev->kccb.fence_ctx.lock); + + return pvr_ccb_init(pvr_dev, &pvr_dev->kccb.ccb, + ROGUE_FWIF_KCCB_NUMCMDS_LOG2_DEFAULT, + sizeof(struct rogue_fwif_kccb_cmd)); +} + +/** + * pvr_kccb_fence_alloc() - Allocate a pvr_kccb_fence object + * + * Return: + * * NULL if the allocation fails, or + * * A valid dma_fence pointer otherwise. + */ +struct dma_fence *pvr_kccb_fence_alloc(void) +{ + struct pvr_kccb_fence *kccb_fence; + + kccb_fence = kzalloc(sizeof(*kccb_fence), GFP_KERNEL); + if (!kccb_fence) + return NULL; + + return &kccb_fence->base; +} + +/** + * pvr_kccb_fence_put() - Drop a KCCB fence reference + * @fence: The fence to drop the reference on. + * + * If the fence hasn't been initialized yet, dma_fence_free() is called. This + * way we have a single function taking care of both cases. + */ +void pvr_kccb_fence_put(struct dma_fence *fence) +{ + if (!fence) + return; + + if (!fence->ops) { + dma_fence_free(fence); + } else { + WARN_ON(fence->ops != &pvr_kccb_fence_ops); + dma_fence_put(fence); + } +} + +/** + * pvr_kccb_reserve_slot() - Reserve a KCCB slot for later use + * @pvr_dev: Target PowerVR device + * @f: KCCB fence object previously allocated with pvr_kccb_fence_alloc() + * + * Try to reserve a KCCB slot, and if there's no slot available, + * initializes the fence object and queue it to the waiters list. + * + * If NULL is returned, that means the slot is reserved. In that case, + * the @f is freed and shouldn't be accessed after that point. + * + * Return: + * * NULL if a slot was available directly, or + * * A valid dma_fence object to wait on if no slot was available. + */ +struct dma_fence * +pvr_kccb_reserve_slot(struct pvr_device *pvr_dev, struct dma_fence *f) +{ + struct pvr_kccb_fence *fence = container_of(f, struct pvr_kccb_fence, base); + struct dma_fence *out_fence = NULL; + u32 used_count; + + mutex_lock(&pvr_dev->kccb.ccb.lock); + + used_count = pvr_kccb_used_slot_count_locked(pvr_dev); + if (pvr_dev->kccb.reserved_count >= pvr_kccb_capacity(pvr_dev) - used_count) { + dma_fence_init(&fence->base, &pvr_kccb_fence_ops, + &pvr_dev->kccb.fence_ctx.lock, + pvr_dev->kccb.fence_ctx.id, + atomic_inc_return(&pvr_dev->kccb.fence_ctx.seqno)); + out_fence = dma_fence_get(&fence->base); + list_add_tail(&fence->node, &pvr_dev->kccb.waiters); + } else { + pvr_kccb_fence_put(f); + pvr_dev->kccb.reserved_count++; + } + + mutex_unlock(&pvr_dev->kccb.ccb.lock); + + return out_fence; +} + +/** + * pvr_kccb_release_slot() - Release a KCCB slot reserved with + * pvr_kccb_reserve_slot() + * @pvr_dev: Target PowerVR device + * + * Should only be called if something failed after the + * pvr_kccb_reserve_slot() call and you know you won't call + * pvr_kccb_send_cmd_reserved(). + */ +void pvr_kccb_release_slot(struct pvr_device *pvr_dev) +{ + mutex_lock(&pvr_dev->kccb.ccb.lock); + if (!WARN_ON(!pvr_dev->kccb.reserved_count)) + pvr_dev->kccb.reserved_count--; + mutex_unlock(&pvr_dev->kccb.ccb.lock); +} + +/** + * pvr_fwccb_init() - Initialise device FWCCB + * @pvr_dev: Target PowerVR device + * + * Returns: + * * 0 on success, or + * * Any error returned by pvr_ccb_init(). + */ +int +pvr_fwccb_init(struct pvr_device *pvr_dev) +{ + return pvr_ccb_init(pvr_dev, &pvr_dev->fwccb, + ROGUE_FWIF_FWCCB_NUMCMDS_LOG2, + sizeof(struct rogue_fwif_fwccb_cmd)); +} diff --git a/drivers/gpu/drm/imagination/pvr_ccb.h b/drivers/gpu/drm/imagination/pvr_ccb.h new file mode 100644 index 000000000000..4c8aef31eeb0 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_ccb.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_CCB_H +#define PVR_CCB_H + +#include "pvr_rogue_fwif.h" + +#include <linux/mutex.h> +#include <linux/types.h> + +/* Forward declaration from pvr_device.h. */ +struct pvr_device; + +/* Forward declaration from pvr_gem.h. */ +struct pvr_fw_object; + +struct pvr_ccb { + /** @ctrl_obj: FW object representing CCB control structure. */ + struct pvr_fw_object *ctrl_obj; + /** @ccb_obj: FW object representing CCB. */ + struct pvr_fw_object *ccb_obj; + + /** @ctrl_fw_addr: FW virtual address of CCB control structure. */ + u32 ctrl_fw_addr; + /** @ccb_fw_addr: FW virtual address of CCB. */ + u32 ccb_fw_addr; + + /** @num_cmds: Number of commands in this CCB. */ + u32 num_cmds; + + /** @cmd_size: Size of each command in this CCB, in bytes. */ + u32 cmd_size; + + /** @lock: Mutex protecting @ctrl and @ccb. */ + struct mutex lock; + /** + * @ctrl: Kernel mapping of CCB control structure. @lock must be held + * when accessing. + */ + struct rogue_fwif_ccb_ctl *ctrl; + /** @ccb: Kernel mapping of CCB. @lock must be held when accessing. */ + void *ccb; +}; + +int pvr_kccb_init(struct pvr_device *pvr_dev); +void pvr_kccb_fini(struct pvr_device *pvr_dev); +int pvr_fwccb_init(struct pvr_device *pvr_dev); +void pvr_ccb_fini(struct pvr_ccb *ccb); + +void pvr_fwccb_process(struct pvr_device *pvr_dev); + +struct dma_fence *pvr_kccb_fence_alloc(void); +void pvr_kccb_fence_put(struct dma_fence *fence); +struct dma_fence * +pvr_kccb_reserve_slot(struct pvr_device *pvr_dev, struct dma_fence *f); +void pvr_kccb_release_slot(struct pvr_device *pvr_dev); +int pvr_kccb_send_cmd(struct pvr_device *pvr_dev, + struct rogue_fwif_kccb_cmd *cmd, u32 *kccb_slot); +int pvr_kccb_send_cmd_powered(struct pvr_device *pvr_dev, + struct rogue_fwif_kccb_cmd *cmd, + u32 *kccb_slot); +void pvr_kccb_send_cmd_reserved_powered(struct pvr_device *pvr_dev, + struct rogue_fwif_kccb_cmd *cmd, + u32 *kccb_slot); +int pvr_kccb_wait_for_completion(struct pvr_device *pvr_dev, u32 slot_nr, u32 timeout, + u32 *rtn_out); +bool pvr_kccb_is_idle(struct pvr_device *pvr_dev); +void pvr_kccb_wake_up_waiters(struct pvr_device *pvr_dev); + +#endif /* PVR_CCB_H */ diff --git a/drivers/gpu/drm/imagination/pvr_cccb.c b/drivers/gpu/drm/imagination/pvr_cccb.c new file mode 100644 index 000000000000..4fabab41bea7 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_cccb.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_ccb.h" +#include "pvr_cccb.h" +#include "pvr_device.h" +#include "pvr_gem.h" +#include "pvr_hwrt.h" + +#include <linux/compiler.h> +#include <linux/delay.h> +#include <linux/jiffies.h> +#include <linux/mutex.h> +#include <linux/types.h> + +static __always_inline u32 +get_ccb_space(u32 w_off, u32 r_off, u32 ccb_size) +{ + return (((r_off) - (w_off)) + ((ccb_size) - 1)) & ((ccb_size) - 1); +} + +static void +cccb_ctrl_init(void *cpu_ptr, void *priv) +{ + struct rogue_fwif_cccb_ctl *ctrl = cpu_ptr; + struct pvr_cccb *pvr_cccb = priv; + + WRITE_ONCE(ctrl->write_offset, 0); + WRITE_ONCE(ctrl->read_offset, 0); + WRITE_ONCE(ctrl->dep_offset, 0); + WRITE_ONCE(ctrl->wrap_mask, pvr_cccb->wrap_mask); +} + +/** + * pvr_cccb_init() - Initialise a Client CCB + * @pvr_dev: Device pointer. + * @pvr_cccb: Pointer to Client CCB structure to initialise. + * @size_log2: Log2 size of Client CCB in bytes. + * @name: Name of owner of Client CCB. Used for fence context. + * + * Return: + * * Zero on success, or + * * Any error code returned by pvr_fw_object_create_and_map(). + */ +int +pvr_cccb_init(struct pvr_device *pvr_dev, struct pvr_cccb *pvr_cccb, + u32 size_log2, const char *name) +{ + size_t size = 1 << size_log2; + int err; + + pvr_cccb->size = size; + pvr_cccb->write_offset = 0; + pvr_cccb->wrap_mask = size - 1; + + /* + * Map CCCB and control structure as uncached, so we don't have to flush + * CPU cache repeatedly when polling for space. + */ + pvr_cccb->ctrl = pvr_fw_object_create_and_map(pvr_dev, sizeof(*pvr_cccb->ctrl), + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + cccb_ctrl_init, pvr_cccb, + &pvr_cccb->ctrl_obj); + if (IS_ERR(pvr_cccb->ctrl)) + return PTR_ERR(pvr_cccb->ctrl); + + pvr_cccb->cccb = pvr_fw_object_create_and_map(pvr_dev, size, + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + NULL, NULL, &pvr_cccb->cccb_obj); + if (IS_ERR(pvr_cccb->cccb)) { + err = PTR_ERR(pvr_cccb->cccb); + goto err_free_ctrl; + } + + pvr_fw_object_get_fw_addr(pvr_cccb->ctrl_obj, &pvr_cccb->ctrl_fw_addr); + pvr_fw_object_get_fw_addr(pvr_cccb->cccb_obj, &pvr_cccb->cccb_fw_addr); + + return 0; + +err_free_ctrl: + pvr_fw_object_unmap_and_destroy(pvr_cccb->ctrl_obj); + + return err; +} + +/** + * pvr_cccb_fini() - Release Client CCB structure + * @pvr_cccb: Client CCB to release. + */ +void +pvr_cccb_fini(struct pvr_cccb *pvr_cccb) +{ + pvr_fw_object_unmap_and_destroy(pvr_cccb->cccb_obj); + pvr_fw_object_unmap_and_destroy(pvr_cccb->ctrl_obj); +} + +/** + * pvr_cccb_cmdseq_fits() - Check if a command sequence fits in the CCCB + * @pvr_cccb: Target Client CCB. + * @size: Size of the command sequence. + * + * Check if a command sequence fits in the CCCB we have at hand. + * + * Return: + * * true if the command sequence fits in the CCCB, or + * * false otherwise. + */ +bool pvr_cccb_cmdseq_fits(struct pvr_cccb *pvr_cccb, size_t size) +{ + struct rogue_fwif_cccb_ctl *ctrl = pvr_cccb->ctrl; + u32 read_offset, remaining; + bool fits = false; + + read_offset = READ_ONCE(ctrl->read_offset); + remaining = pvr_cccb->size - pvr_cccb->write_offset; + + /* Always ensure we have enough room for a padding command at the end of the CCCB. + * If our command sequence does not fit, reserve the remaining space for a padding + * command. + */ + if (size + PADDING_COMMAND_SIZE > remaining) + size += remaining; + + if (get_ccb_space(pvr_cccb->write_offset, read_offset, pvr_cccb->size) >= size) + fits = true; + + return fits; +} + +/** + * pvr_cccb_write_command_with_header() - Write a command + command header to a + * Client CCB + * @pvr_cccb: Target Client CCB. + * @cmd_type: Client CCB command type. Must be one of %ROGUE_FWIF_CCB_CMD_TYPE_*. + * @cmd_size: Size of command in bytes. + * @cmd_data: Pointer to command to write. + * @ext_job_ref: External job reference. + * @int_job_ref: Internal job reference. + * + * Caller must make sure there's enough space in CCCB to queue this command. This + * can be done by calling pvr_cccb_cmdseq_fits(). + * + * This function is not protected by any lock. The caller must ensure there's + * no concurrent caller, which should be guaranteed by the drm_sched model (job + * submission is serialized in drm_sched_main()). + */ +void +pvr_cccb_write_command_with_header(struct pvr_cccb *pvr_cccb, u32 cmd_type, u32 cmd_size, + void *cmd_data, u32 ext_job_ref, u32 int_job_ref) +{ + u32 sz_with_hdr = pvr_cccb_get_size_of_cmd_with_hdr(cmd_size); + struct rogue_fwif_ccb_cmd_header cmd_header = { + .cmd_type = cmd_type, + .cmd_size = ALIGN(cmd_size, 8), + .ext_job_ref = ext_job_ref, + .int_job_ref = int_job_ref, + }; + struct rogue_fwif_cccb_ctl *ctrl = pvr_cccb->ctrl; + u32 remaining = pvr_cccb->size - pvr_cccb->write_offset; + u32 required_size, cccb_space, read_offset; + + /* + * Always ensure we have enough room for a padding command at the end of + * the CCCB. + */ + if (remaining < sz_with_hdr + PADDING_COMMAND_SIZE) { + /* + * Command would need to wrap, so we need to pad the remainder + * of the CCCB. + */ + required_size = sz_with_hdr + remaining; + } else { + required_size = sz_with_hdr; + } + + read_offset = READ_ONCE(ctrl->read_offset); + cccb_space = get_ccb_space(pvr_cccb->write_offset, read_offset, pvr_cccb->size); + if (WARN_ON(cccb_space < required_size)) + return; + + if (required_size != sz_with_hdr) { + /* Add padding command */ + struct rogue_fwif_ccb_cmd_header pad_cmd = { + .cmd_type = ROGUE_FWIF_CCB_CMD_TYPE_PADDING, + .cmd_size = remaining - sizeof(pad_cmd), + }; + + memcpy(&pvr_cccb->cccb[pvr_cccb->write_offset], &pad_cmd, sizeof(pad_cmd)); + pvr_cccb->write_offset = 0; + } + + memcpy(&pvr_cccb->cccb[pvr_cccb->write_offset], &cmd_header, sizeof(cmd_header)); + memcpy(&pvr_cccb->cccb[pvr_cccb->write_offset + sizeof(cmd_header)], cmd_data, cmd_size); + pvr_cccb->write_offset += sz_with_hdr; +} + +static void fill_cmd_kick_data(struct pvr_cccb *cccb, u32 ctx_fw_addr, + struct pvr_hwrt_data *hwrt, + struct rogue_fwif_kccb_cmd_kick_data *k) +{ + k->context_fw_addr = ctx_fw_addr; + k->client_woff_update = cccb->write_offset; + k->client_wrap_mask_update = cccb->wrap_mask; + + if (hwrt) { + u32 cleanup_state_offset = offsetof(struct rogue_fwif_hwrtdata, cleanup_state); + + pvr_fw_object_get_fw_addr_offset(hwrt->fw_obj, cleanup_state_offset, + &k->cleanup_ctl_fw_addr[k->num_cleanup_ctl++]); + } +} + +/** + * pvr_cccb_send_kccb_kick: Send KCCB kick to trigger command processing + * @pvr_dev: Device pointer. + * @pvr_cccb: Pointer to CCCB to process. + * @cctx_fw_addr: FW virtual address for context owning this Client CCB. + * @hwrt: HWRT data set associated with this kick. May be %NULL. + * + * You must call pvr_kccb_reserve_slot() and wait for the returned fence to + * signal (if this function didn't return NULL) before calling + * pvr_cccb_send_kccb_kick(). + */ +void +pvr_cccb_send_kccb_kick(struct pvr_device *pvr_dev, + struct pvr_cccb *pvr_cccb, u32 cctx_fw_addr, + struct pvr_hwrt_data *hwrt) +{ + struct rogue_fwif_kccb_cmd cmd_kick = { + .cmd_type = ROGUE_FWIF_KCCB_CMD_KICK, + }; + + fill_cmd_kick_data(pvr_cccb, cctx_fw_addr, hwrt, &cmd_kick.cmd_data.cmd_kick_data); + + /* Make sure the writes to the CCCB are flushed before sending the KICK. */ + wmb(); + + pvr_kccb_send_cmd_reserved_powered(pvr_dev, &cmd_kick, NULL); +} + +void +pvr_cccb_send_kccb_combined_kick(struct pvr_device *pvr_dev, + struct pvr_cccb *geom_cccb, + struct pvr_cccb *frag_cccb, + u32 geom_ctx_fw_addr, + u32 frag_ctx_fw_addr, + struct pvr_hwrt_data *hwrt, + bool frag_is_pr) +{ + struct rogue_fwif_kccb_cmd cmd_kick = { + .cmd_type = ROGUE_FWIF_KCCB_CMD_COMBINED_GEOM_FRAG_KICK, + }; + + fill_cmd_kick_data(geom_cccb, geom_ctx_fw_addr, hwrt, + &cmd_kick.cmd_data.combined_geom_frag_cmd_kick_data.geom_cmd_kick_data); + + /* If this is a partial-render job, we don't attach resources to cleanup-ctl array, + * because the resources are already retained by the geometry job. + */ + fill_cmd_kick_data(frag_cccb, frag_ctx_fw_addr, frag_is_pr ? NULL : hwrt, + &cmd_kick.cmd_data.combined_geom_frag_cmd_kick_data.frag_cmd_kick_data); + + /* Make sure the writes to the CCCB are flushed before sending the KICK. */ + wmb(); + + pvr_kccb_send_cmd_reserved_powered(pvr_dev, &cmd_kick, NULL); +} diff --git a/drivers/gpu/drm/imagination/pvr_cccb.h b/drivers/gpu/drm/imagination/pvr_cccb.h new file mode 100644 index 000000000000..943fe8f2c963 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_cccb.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_CCCB_H +#define PVR_CCCB_H + +#include "pvr_rogue_fwif.h" +#include "pvr_rogue_fwif_shared.h" + +#include <linux/mutex.h> +#include <linux/types.h> + +#define PADDING_COMMAND_SIZE sizeof(struct rogue_fwif_ccb_cmd_header) + +/* Forward declaration from pvr_device.h. */ +struct pvr_device; + +/* Forward declaration from pvr_gem.h. */ +struct pvr_fw_object; + +/* Forward declaration from pvr_hwrt.h. */ +struct pvr_hwrt_data; + +struct pvr_cccb { + /** @ctrl_obj: FW object representing CCCB control structure. */ + struct pvr_fw_object *ctrl_obj; + + /** @ccb_obj: FW object representing CCCB. */ + struct pvr_fw_object *cccb_obj; + + /** + * @ctrl: Kernel mapping of CCCB control structure. @lock must be held + * when accessing. + */ + struct rogue_fwif_cccb_ctl *ctrl; + + /** @cccb: Kernel mapping of CCCB. @lock must be held when accessing.*/ + u8 *cccb; + + /** @ctrl_fw_addr: FW virtual address of CCCB control structure. */ + u32 ctrl_fw_addr; + /** @ccb_fw_addr: FW virtual address of CCCB. */ + u32 cccb_fw_addr; + + /** @size: Size of CCCB in bytes. */ + size_t size; + + /** @write_offset: CCCB write offset. */ + u32 write_offset; + + /** @wrap_mask: CCCB wrap mask. */ + u32 wrap_mask; +}; + +int pvr_cccb_init(struct pvr_device *pvr_dev, struct pvr_cccb *cccb, + u32 size_log2, const char *name); +void pvr_cccb_fini(struct pvr_cccb *cccb); + +void pvr_cccb_write_command_with_header(struct pvr_cccb *pvr_cccb, + u32 cmd_type, u32 cmd_size, void *cmd_data, + u32 ext_job_ref, u32 int_job_ref); +void pvr_cccb_send_kccb_kick(struct pvr_device *pvr_dev, + struct pvr_cccb *pvr_cccb, u32 cctx_fw_addr, + struct pvr_hwrt_data *hwrt); +void pvr_cccb_send_kccb_combined_kick(struct pvr_device *pvr_dev, + struct pvr_cccb *geom_cccb, + struct pvr_cccb *frag_cccb, + u32 geom_ctx_fw_addr, + u32 frag_ctx_fw_addr, + struct pvr_hwrt_data *hwrt, + bool frag_is_pr); +bool pvr_cccb_cmdseq_fits(struct pvr_cccb *pvr_cccb, size_t size); + +/** + * pvr_cccb_get_size_of_cmd_with_hdr() - Get the size of a command and its header. + * @cmd_size: Command size. + * + * Returns the size of the command and its header. + */ +static __always_inline u32 +pvr_cccb_get_size_of_cmd_with_hdr(u32 cmd_size) +{ + WARN_ON(!IS_ALIGNED(cmd_size, 8)); + return sizeof(struct rogue_fwif_ccb_cmd_header) + ALIGN(cmd_size, 8); +} + +/** + * pvr_cccb_cmdseq_can_fit() - Check if a command sequence can fit in the CCCB. + * @pvr_cccb: Target Client CCB. + * @size: Command sequence size. + * + * Returns: + * * true it the CCCB is big enough to contain a command sequence, or + * * false otherwise. + */ +static __always_inline bool +pvr_cccb_cmdseq_can_fit(struct pvr_cccb *pvr_cccb, size_t size) +{ + /* We divide the capacity by two to simplify our CCCB fencing logic: + * we want to be sure that, no matter what we had queued before, we + * are able to either queue our command sequence at the end or add a + * padding command and queue the command sequence at the beginning + * of the CCCB. If the command sequence size is bigger than half the + * CCCB capacity, we'd have to queue the padding command and make sure + * the FW is done processing it before queueing our command sequence. + */ + return size + PADDING_COMMAND_SIZE <= pvr_cccb->size / 2; +} + +#endif /* PVR_CCCB_H */ diff --git a/drivers/gpu/drm/imagination/pvr_context.c b/drivers/gpu/drm/imagination/pvr_context.c new file mode 100644 index 000000000000..eded5e955cc0 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_context.c @@ -0,0 +1,464 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_cccb.h" +#include "pvr_context.h" +#include "pvr_device.h" +#include "pvr_drv.h" +#include "pvr_gem.h" +#include "pvr_job.h" +#include "pvr_power.h" +#include "pvr_rogue_fwif.h" +#include "pvr_rogue_fwif_common.h" +#include "pvr_rogue_fwif_resetframework.h" +#include "pvr_stream.h" +#include "pvr_stream_defs.h" +#include "pvr_vm.h" + +#include <drm/drm_auth.h> +#include <drm/drm_managed.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/xarray.h> + +static int +remap_priority(struct pvr_file *pvr_file, s32 uapi_priority, + enum pvr_context_priority *priority_out) +{ + switch (uapi_priority) { + case DRM_PVR_CTX_PRIORITY_LOW: + *priority_out = PVR_CTX_PRIORITY_LOW; + break; + case DRM_PVR_CTX_PRIORITY_NORMAL: + *priority_out = PVR_CTX_PRIORITY_MEDIUM; + break; + case DRM_PVR_CTX_PRIORITY_HIGH: + if (!capable(CAP_SYS_NICE) && !drm_is_current_master(from_pvr_file(pvr_file))) + return -EACCES; + *priority_out = PVR_CTX_PRIORITY_HIGH; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int get_fw_obj_size(enum drm_pvr_ctx_type type) +{ + switch (type) { + case DRM_PVR_CTX_TYPE_RENDER: + return sizeof(struct rogue_fwif_fwrendercontext); + case DRM_PVR_CTX_TYPE_COMPUTE: + return sizeof(struct rogue_fwif_fwcomputecontext); + case DRM_PVR_CTX_TYPE_TRANSFER_FRAG: + return sizeof(struct rogue_fwif_fwtransfercontext); + } + + return -EINVAL; +} + +static int +process_static_context_state(struct pvr_device *pvr_dev, const struct pvr_stream_cmd_defs *cmd_defs, + u64 stream_user_ptr, u32 stream_size, void *dest) +{ + void *stream; + int err; + + stream = kzalloc(stream_size, GFP_KERNEL); + if (!stream) + return -ENOMEM; + + if (copy_from_user(stream, u64_to_user_ptr(stream_user_ptr), stream_size)) { + err = -EFAULT; + goto err_free; + } + + err = pvr_stream_process(pvr_dev, cmd_defs, stream, stream_size, dest); + if (err) + goto err_free; + + kfree(stream); + + return 0; + +err_free: + kfree(stream); + + return err; +} + +static int init_render_fw_objs(struct pvr_context *ctx, + struct drm_pvr_ioctl_create_context_args *args, + void *fw_ctx_map) +{ + struct rogue_fwif_static_rendercontext_state *static_rendercontext_state; + struct rogue_fwif_fwrendercontext *fw_render_context = fw_ctx_map; + + if (!args->static_context_state_len) + return -EINVAL; + + static_rendercontext_state = &fw_render_context->static_render_context_state; + + /* Copy static render context state from userspace. */ + return process_static_context_state(ctx->pvr_dev, + &pvr_static_render_context_state_stream, + args->static_context_state, + args->static_context_state_len, + &static_rendercontext_state->ctxswitch_regs[0]); +} + +static int init_compute_fw_objs(struct pvr_context *ctx, + struct drm_pvr_ioctl_create_context_args *args, + void *fw_ctx_map) +{ + struct rogue_fwif_fwcomputecontext *fw_compute_context = fw_ctx_map; + struct rogue_fwif_cdm_registers_cswitch *ctxswitch_regs; + + if (!args->static_context_state_len) + return -EINVAL; + + ctxswitch_regs = &fw_compute_context->static_compute_context_state.ctxswitch_regs; + + /* Copy static render context state from userspace. */ + return process_static_context_state(ctx->pvr_dev, + &pvr_static_compute_context_state_stream, + args->static_context_state, + args->static_context_state_len, + ctxswitch_regs); +} + +static int init_transfer_fw_objs(struct pvr_context *ctx, + struct drm_pvr_ioctl_create_context_args *args, + void *fw_ctx_map) +{ + if (args->static_context_state_len) + return -EINVAL; + + return 0; +} + +static int init_fw_objs(struct pvr_context *ctx, + struct drm_pvr_ioctl_create_context_args *args, + void *fw_ctx_map) +{ + switch (ctx->type) { + case DRM_PVR_CTX_TYPE_RENDER: + return init_render_fw_objs(ctx, args, fw_ctx_map); + case DRM_PVR_CTX_TYPE_COMPUTE: + return init_compute_fw_objs(ctx, args, fw_ctx_map); + case DRM_PVR_CTX_TYPE_TRANSFER_FRAG: + return init_transfer_fw_objs(ctx, args, fw_ctx_map); + } + + return -EINVAL; +} + +static void +ctx_fw_data_init(void *cpu_ptr, void *priv) +{ + struct pvr_context *ctx = priv; + + memcpy(cpu_ptr, ctx->data, ctx->data_size); +} + +/** + * pvr_context_destroy_queues() - Destroy all queues attached to a context. + * @ctx: Context to destroy queues on. + * + * Should be called when the last reference to a context object is dropped. + * It releases all resources attached to the queues bound to this context. + */ +static void pvr_context_destroy_queues(struct pvr_context *ctx) +{ + switch (ctx->type) { + case DRM_PVR_CTX_TYPE_RENDER: + pvr_queue_destroy(ctx->queues.fragment); + pvr_queue_destroy(ctx->queues.geometry); + break; + case DRM_PVR_CTX_TYPE_COMPUTE: + pvr_queue_destroy(ctx->queues.compute); + break; + case DRM_PVR_CTX_TYPE_TRANSFER_FRAG: + pvr_queue_destroy(ctx->queues.transfer); + break; + } +} + +/** + * pvr_context_create_queues() - Create all queues attached to a context. + * @ctx: Context to create queues on. + * @args: Context creation arguments passed by userspace. + * @fw_ctx_map: CPU mapping of the FW context object. + * + * Return: + * * 0 on success, or + * * A negative error code otherwise. + */ +static int pvr_context_create_queues(struct pvr_context *ctx, + struct drm_pvr_ioctl_create_context_args *args, + void *fw_ctx_map) +{ + int err; + + switch (ctx->type) { + case DRM_PVR_CTX_TYPE_RENDER: + ctx->queues.geometry = pvr_queue_create(ctx, DRM_PVR_JOB_TYPE_GEOMETRY, + args, fw_ctx_map); + if (IS_ERR(ctx->queues.geometry)) { + err = PTR_ERR(ctx->queues.geometry); + ctx->queues.geometry = NULL; + goto err_destroy_queues; + } + + ctx->queues.fragment = pvr_queue_create(ctx, DRM_PVR_JOB_TYPE_FRAGMENT, + args, fw_ctx_map); + if (IS_ERR(ctx->queues.fragment)) { + err = PTR_ERR(ctx->queues.fragment); + ctx->queues.fragment = NULL; + goto err_destroy_queues; + } + return 0; + + case DRM_PVR_CTX_TYPE_COMPUTE: + ctx->queues.compute = pvr_queue_create(ctx, DRM_PVR_JOB_TYPE_COMPUTE, + args, fw_ctx_map); + if (IS_ERR(ctx->queues.compute)) { + err = PTR_ERR(ctx->queues.compute); + ctx->queues.compute = NULL; + goto err_destroy_queues; + } + return 0; + + case DRM_PVR_CTX_TYPE_TRANSFER_FRAG: + ctx->queues.transfer = pvr_queue_create(ctx, DRM_PVR_JOB_TYPE_TRANSFER_FRAG, + args, fw_ctx_map); + if (IS_ERR(ctx->queues.transfer)) { + err = PTR_ERR(ctx->queues.transfer); + ctx->queues.transfer = NULL; + goto err_destroy_queues; + } + return 0; + } + + return -EINVAL; + +err_destroy_queues: + pvr_context_destroy_queues(ctx); + return err; +} + +/** + * pvr_context_kill_queues() - Kill queues attached to context. + * @ctx: Context to kill queues on. + * + * Killing the queues implies making them unusable for future jobs, while still + * letting the currently submitted jobs a chance to finish. Queue resources will + * stay around until pvr_context_destroy_queues() is called. + */ +static void pvr_context_kill_queues(struct pvr_context *ctx) +{ + switch (ctx->type) { + case DRM_PVR_CTX_TYPE_RENDER: + pvr_queue_kill(ctx->queues.fragment); + pvr_queue_kill(ctx->queues.geometry); + break; + case DRM_PVR_CTX_TYPE_COMPUTE: + pvr_queue_kill(ctx->queues.compute); + break; + case DRM_PVR_CTX_TYPE_TRANSFER_FRAG: + pvr_queue_kill(ctx->queues.transfer); + break; + } +} + +/** + * pvr_context_create() - Create a context. + * @pvr_file: File to attach the created context to. + * @args: Context creation arguments. + * + * Return: + * * 0 on success, or + * * A negative error code on failure. + */ +int pvr_context_create(struct pvr_file *pvr_file, struct drm_pvr_ioctl_create_context_args *args) +{ + struct pvr_device *pvr_dev = pvr_file->pvr_dev; + struct pvr_context *ctx; + int ctx_size; + int err; + + /* Context creation flags are currently unused and must be zero. */ + if (args->flags) + return -EINVAL; + + ctx_size = get_fw_obj_size(args->type); + if (ctx_size < 0) + return ctx_size; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->data_size = ctx_size; + ctx->type = args->type; + ctx->flags = args->flags; + ctx->pvr_dev = pvr_dev; + kref_init(&ctx->ref_count); + + err = remap_priority(pvr_file, args->priority, &ctx->priority); + if (err) + goto err_free_ctx; + + ctx->vm_ctx = pvr_vm_context_lookup(pvr_file, args->vm_context_handle); + if (IS_ERR(ctx->vm_ctx)) { + err = PTR_ERR(ctx->vm_ctx); + goto err_free_ctx; + } + + ctx->data = kzalloc(ctx_size, GFP_KERNEL); + if (!ctx->data) { + err = -ENOMEM; + goto err_put_vm; + } + + err = pvr_context_create_queues(ctx, args, ctx->data); + if (err) + goto err_free_ctx_data; + + err = init_fw_objs(ctx, args, ctx->data); + if (err) + goto err_destroy_queues; + + err = pvr_fw_object_create(pvr_dev, ctx_size, PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + ctx_fw_data_init, ctx, &ctx->fw_obj); + if (err) + goto err_free_ctx_data; + + err = xa_alloc(&pvr_dev->ctx_ids, &ctx->ctx_id, ctx, xa_limit_32b, GFP_KERNEL); + if (err) + goto err_destroy_fw_obj; + + err = xa_alloc(&pvr_file->ctx_handles, &args->handle, ctx, xa_limit_32b, GFP_KERNEL); + if (err) { + /* + * It's possible that another thread could have taken a reference on the context at + * this point as it is in the ctx_ids xarray. Therefore instead of directly + * destroying the context, drop a reference instead. + */ + pvr_context_put(ctx); + return err; + } + + return 0; + +err_destroy_fw_obj: + pvr_fw_object_destroy(ctx->fw_obj); + +err_destroy_queues: + pvr_context_destroy_queues(ctx); + +err_free_ctx_data: + kfree(ctx->data); + +err_put_vm: + pvr_vm_context_put(ctx->vm_ctx); + +err_free_ctx: + kfree(ctx); + return err; +} + +static void +pvr_context_release(struct kref *ref_count) +{ + struct pvr_context *ctx = + container_of(ref_count, struct pvr_context, ref_count); + struct pvr_device *pvr_dev = ctx->pvr_dev; + + xa_erase(&pvr_dev->ctx_ids, ctx->ctx_id); + pvr_context_destroy_queues(ctx); + pvr_fw_object_destroy(ctx->fw_obj); + kfree(ctx->data); + pvr_vm_context_put(ctx->vm_ctx); + kfree(ctx); +} + +/** + * pvr_context_put() - Release reference on context + * @ctx: Target context. + */ +void +pvr_context_put(struct pvr_context *ctx) +{ + if (ctx) + kref_put(&ctx->ref_count, pvr_context_release); +} + +/** + * pvr_context_destroy() - Destroy context + * @pvr_file: Pointer to pvr_file structure. + * @handle: Userspace context handle. + * + * Removes context from context list and drops initial reference. Context will + * then be destroyed once all outstanding references are dropped. + * + * Return: + * * 0 on success, or + * * -%EINVAL if context not in context list. + */ +int +pvr_context_destroy(struct pvr_file *pvr_file, u32 handle) +{ + struct pvr_context *ctx = xa_erase(&pvr_file->ctx_handles, handle); + + if (!ctx) + return -EINVAL; + + /* Make sure nothing can be queued to the queues after that point. */ + pvr_context_kill_queues(ctx); + + /* Release the reference held by the handle set. */ + pvr_context_put(ctx); + + return 0; +} + +/** + * pvr_destroy_contexts_for_file: Destroy any contexts associated with the given file + * @pvr_file: Pointer to pvr_file structure. + * + * Removes all contexts associated with @pvr_file from the device context list and drops initial + * references. Contexts will then be destroyed once all outstanding references are dropped. + */ +void pvr_destroy_contexts_for_file(struct pvr_file *pvr_file) +{ + struct pvr_context *ctx; + unsigned long handle; + + xa_for_each(&pvr_file->ctx_handles, handle, ctx) + pvr_context_destroy(pvr_file, handle); +} + +/** + * pvr_context_device_init() - Device level initialization for queue related resources. + * @pvr_dev: The device to initialize. + */ +void pvr_context_device_init(struct pvr_device *pvr_dev) +{ + xa_init_flags(&pvr_dev->ctx_ids, XA_FLAGS_ALLOC1); +} + +/** + * pvr_context_device_fini() - Device level cleanup for queue related resources. + * @pvr_dev: The device to cleanup. + */ +void pvr_context_device_fini(struct pvr_device *pvr_dev) +{ + WARN_ON(!xa_empty(&pvr_dev->ctx_ids)); + xa_destroy(&pvr_dev->ctx_ids); +} diff --git a/drivers/gpu/drm/imagination/pvr_context.h b/drivers/gpu/drm/imagination/pvr_context.h new file mode 100644 index 000000000000..0c7b97dfa6ba --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_context.h @@ -0,0 +1,205 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_CONTEXT_H +#define PVR_CONTEXT_H + +#include <drm/gpu_scheduler.h> + +#include <linux/compiler_attributes.h> +#include <linux/dma-fence.h> +#include <linux/kref.h> +#include <linux/types.h> +#include <linux/xarray.h> +#include <uapi/drm/pvr_drm.h> + +#include "pvr_cccb.h" +#include "pvr_device.h" +#include "pvr_queue.h" + +/* Forward declaration from pvr_gem.h. */ +struct pvr_fw_object; + +enum pvr_context_priority { + PVR_CTX_PRIORITY_LOW = 0, + PVR_CTX_PRIORITY_MEDIUM, + PVR_CTX_PRIORITY_HIGH, +}; + +/** + * struct pvr_context - Context data + */ +struct pvr_context { + /** @ref_count: Refcount for context. */ + struct kref ref_count; + + /** @pvr_dev: Pointer to owning device. */ + struct pvr_device *pvr_dev; + + /** @vm_ctx: Pointer to associated VM context. */ + struct pvr_vm_context *vm_ctx; + + /** @type: Type of context. */ + enum drm_pvr_ctx_type type; + + /** @flags: Context flags. */ + u32 flags; + + /** @priority: Context priority*/ + enum pvr_context_priority priority; + + /** @fw_obj: FW object representing FW-side context data. */ + struct pvr_fw_object *fw_obj; + + /** @data: Pointer to local copy of FW context data. */ + void *data; + + /** @data_size: Size of FW context data, in bytes. */ + u32 data_size; + + /** @ctx_id: FW context ID. */ + u32 ctx_id; + + /** + * @faulty: Set to 1 when the context queues had unfinished job when + * a GPU reset happened. + * + * In that case, the context is in an inconsistent state and can't be + * used anymore. + */ + atomic_t faulty; + + /** @queues: Union containing all kind of queues. */ + union { + struct { + /** @geometry: Geometry queue. */ + struct pvr_queue *geometry; + + /** @fragment: Fragment queue. */ + struct pvr_queue *fragment; + }; + + /** @compute: Compute queue. */ + struct pvr_queue *compute; + + /** @compute: Transfer queue. */ + struct pvr_queue *transfer; + } queues; +}; + +static __always_inline struct pvr_queue * +pvr_context_get_queue_for_job(struct pvr_context *ctx, enum drm_pvr_job_type type) +{ + switch (type) { + case DRM_PVR_JOB_TYPE_GEOMETRY: + return ctx->type == DRM_PVR_CTX_TYPE_RENDER ? ctx->queues.geometry : NULL; + case DRM_PVR_JOB_TYPE_FRAGMENT: + return ctx->type == DRM_PVR_CTX_TYPE_RENDER ? ctx->queues.fragment : NULL; + case DRM_PVR_JOB_TYPE_COMPUTE: + return ctx->type == DRM_PVR_CTX_TYPE_COMPUTE ? ctx->queues.compute : NULL; + case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: + return ctx->type == DRM_PVR_CTX_TYPE_TRANSFER_FRAG ? ctx->queues.transfer : NULL; + } + + return NULL; +} + +/** + * pvr_context_get() - Take additional reference on context. + * @ctx: Context pointer. + * + * Call pvr_context_put() to release. + * + * Returns: + * * The requested context on success, or + * * %NULL if no context pointer passed. + */ +static __always_inline struct pvr_context * +pvr_context_get(struct pvr_context *ctx) +{ + if (ctx) + kref_get(&ctx->ref_count); + + return ctx; +} + +/** + * pvr_context_lookup() - Lookup context pointer from handle and file. + * @pvr_file: Pointer to pvr_file structure. + * @handle: Context handle. + * + * Takes reference on context. Call pvr_context_put() to release. + * + * Return: + * * The requested context on success, or + * * %NULL on failure (context does not exist, or does not belong to @pvr_file). + */ +static __always_inline struct pvr_context * +pvr_context_lookup(struct pvr_file *pvr_file, u32 handle) +{ + struct pvr_context *ctx; + + /* Take the array lock to protect against context removal. */ + xa_lock(&pvr_file->ctx_handles); + ctx = pvr_context_get(xa_load(&pvr_file->ctx_handles, handle)); + xa_unlock(&pvr_file->ctx_handles); + + return ctx; +} + +/** + * pvr_context_lookup_id() - Lookup context pointer from ID. + * @pvr_dev: Device pointer. + * @id: FW context ID. + * + * Takes reference on context. Call pvr_context_put() to release. + * + * Return: + * * The requested context on success, or + * * %NULL on failure (context does not exist). + */ +static __always_inline struct pvr_context * +pvr_context_lookup_id(struct pvr_device *pvr_dev, u32 id) +{ + struct pvr_context *ctx; + + /* Take the array lock to protect against context removal. */ + xa_lock(&pvr_dev->ctx_ids); + + /* Contexts are removed from the ctx_ids set in the context release path, + * meaning the ref_count reached zero before they get removed. We need + * to make sure we're not trying to acquire a context that's being + * destroyed. + */ + ctx = xa_load(&pvr_dev->ctx_ids, id); + if (!kref_get_unless_zero(&ctx->ref_count)) + ctx = NULL; + + xa_unlock(&pvr_dev->ctx_ids); + + return ctx; +} + +static __always_inline u32 +pvr_context_get_fw_addr(struct pvr_context *ctx) +{ + u32 ctx_fw_addr = 0; + + pvr_fw_object_get_fw_addr(ctx->fw_obj, &ctx_fw_addr); + + return ctx_fw_addr; +} + +void pvr_context_put(struct pvr_context *ctx); + +int pvr_context_create(struct pvr_file *pvr_file, struct drm_pvr_ioctl_create_context_args *args); + +int pvr_context_destroy(struct pvr_file *pvr_file, u32 handle); + +void pvr_destroy_contexts_for_file(struct pvr_file *pvr_file); + +void pvr_context_device_init(struct pvr_device *pvr_dev); + +void pvr_context_device_fini(struct pvr_device *pvr_dev); + +#endif /* PVR_CONTEXT_H */ diff --git a/drivers/gpu/drm/imagination/pvr_debugfs.c b/drivers/gpu/drm/imagination/pvr_debugfs.c new file mode 100644 index 000000000000..6b77c9b4bde8 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_debugfs.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_debugfs.h" + +#include "pvr_device.h" +#include "pvr_fw_trace.h" +#include "pvr_params.h" + +#include <linux/dcache.h> +#include <linux/debugfs.h> +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/types.h> + +#include <drm/drm_device.h> +#include <drm/drm_file.h> +#include <drm/drm_print.h> + +static const struct pvr_debugfs_entry pvr_debugfs_entries[] = { + {"pvr_params", pvr_params_debugfs_init}, + {"pvr_fw", pvr_fw_trace_debugfs_init}, +}; + +void +pvr_debugfs_init(struct drm_minor *minor) +{ + struct drm_device *drm_dev = minor->dev; + struct pvr_device *pvr_dev = to_pvr_device(drm_dev); + struct dentry *root = minor->debugfs_root; + size_t i; + + for (i = 0; i < ARRAY_SIZE(pvr_debugfs_entries); ++i) { + const struct pvr_debugfs_entry *entry = &pvr_debugfs_entries[i]; + struct dentry *dir; + + dir = debugfs_create_dir(entry->name, root); + if (IS_ERR(dir)) { + drm_warn(drm_dev, + "failed to create debugfs dir '%s' (err=%d)", + entry->name, (int)PTR_ERR(dir)); + continue; + } + + entry->init(pvr_dev, dir); + } +} + +/* + * Since all entries are created under &drm_minor->debugfs_root, there's no + * need for a pvr_debugfs_fini() as DRM will clean up everything under its root + * automatically. + */ diff --git a/drivers/gpu/drm/imagination/pvr_debugfs.h b/drivers/gpu/drm/imagination/pvr_debugfs.h new file mode 100644 index 000000000000..ebacbd13b84a --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_debugfs.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_DEBUGFS_H +#define PVR_DEBUGFS_H + +/* Forward declaration from <drm/drm_drv.h>. */ +struct drm_minor; + +#if defined(CONFIG_DEBUG_FS) +/* Forward declaration from "pvr_device.h". */ +struct pvr_device; + +/* Forward declaration from <linux/dcache.h>. */ +struct dentry; + +struct pvr_debugfs_entry { + const char *name; + void (*init)(struct pvr_device *pvr_dev, struct dentry *dir); +}; + +void pvr_debugfs_init(struct drm_minor *minor); +#else /* defined(CONFIG_DEBUG_FS) */ +#include <linux/compiler_attributes.h> + +static __always_inline void pvr_debugfs_init(struct drm_minor *minor) {} +#endif /* defined(CONFIG_DEBUG_FS) */ + +#endif /* PVR_DEBUGFS_H */ diff --git a/drivers/gpu/drm/imagination/pvr_device.c b/drivers/gpu/drm/imagination/pvr_device.c new file mode 100644 index 000000000000..1704c0268589 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_device.c @@ -0,0 +1,658 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_device.h" +#include "pvr_device_info.h" + +#include "pvr_fw.h" +#include "pvr_params.h" +#include "pvr_power.h" +#include "pvr_queue.h" +#include "pvr_rogue_cr_defs.h" +#include "pvr_stream.h" +#include "pvr_vm.h" + +#include <drm/drm_print.h> + +#include <linux/bitfield.h> +#include <linux/clk.h> +#include <linux/compiler_attributes.h> +#include <linux/compiler_types.h> +#include <linux/dma-mapping.h> +#include <linux/err.h> +#include <linux/firmware.h> +#include <linux/gfp.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/slab.h> +#include <linux/stddef.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +/* Major number for the supported version of the firmware. */ +#define PVR_FW_VERSION_MAJOR 1 + +/** + * pvr_device_reg_init() - Initialize kernel access to a PowerVR device's + * control registers. + * @pvr_dev: Target PowerVR device. + * + * Sets struct pvr_device->regs. + * + * This method of mapping the device control registers into memory ensures that + * they are unmapped when the driver is detached (i.e. no explicit cleanup is + * required). + * + * Return: + * * 0 on success, or + * * Any error returned by devm_platform_ioremap_resource(). + */ +static int +pvr_device_reg_init(struct pvr_device *pvr_dev) +{ + struct drm_device *drm_dev = from_pvr_device(pvr_dev); + struct platform_device *plat_dev = to_platform_device(drm_dev->dev); + struct resource *regs_resource; + void __iomem *regs; + + pvr_dev->regs_resource = NULL; + pvr_dev->regs = NULL; + + regs = devm_platform_get_and_ioremap_resource(plat_dev, 0, ®s_resource); + if (IS_ERR(regs)) + return dev_err_probe(drm_dev->dev, PTR_ERR(regs), + "failed to ioremap gpu registers\n"); + + pvr_dev->regs = regs; + pvr_dev->regs_resource = regs_resource; + + return 0; +} + +/** + * pvr_device_clk_init() - Initialize clocks required by a PowerVR device + * @pvr_dev: Target PowerVR device. + * + * Sets struct pvr_device->core_clk, struct pvr_device->sys_clk and + * struct pvr_device->mem_clk. + * + * Three clocks are required by the PowerVR device: core, sys and mem. On + * return, this function guarantees that the clocks are in one of the following + * states: + * + * * All successfully initialized, + * * Core errored, sys and mem uninitialized, + * * Core deinitialized, sys errored, mem uninitialized, or + * * Core and sys deinitialized, mem errored. + * + * Return: + * * 0 on success, + * * Any error returned by devm_clk_get(), or + * * Any error returned by devm_clk_get_optional(). + */ +static int pvr_device_clk_init(struct pvr_device *pvr_dev) +{ + struct drm_device *drm_dev = from_pvr_device(pvr_dev); + struct clk *core_clk; + struct clk *sys_clk; + struct clk *mem_clk; + + core_clk = devm_clk_get(drm_dev->dev, "core"); + if (IS_ERR(core_clk)) + return dev_err_probe(drm_dev->dev, PTR_ERR(core_clk), + "failed to get core clock\n"); + + sys_clk = devm_clk_get_optional(drm_dev->dev, "sys"); + if (IS_ERR(sys_clk)) + return dev_err_probe(drm_dev->dev, PTR_ERR(sys_clk), + "failed to get sys clock\n"); + + mem_clk = devm_clk_get_optional(drm_dev->dev, "mem"); + if (IS_ERR(mem_clk)) + return dev_err_probe(drm_dev->dev, PTR_ERR(mem_clk), + "failed to get mem clock\n"); + + pvr_dev->core_clk = core_clk; + pvr_dev->sys_clk = sys_clk; + pvr_dev->mem_clk = mem_clk; + + return 0; +} + +/** + * pvr_device_process_active_queues() - Process all queue related events. + * @pvr_dev: PowerVR device to check + * + * This is called any time we receive a FW event. It iterates over all + * active queues and calls pvr_queue_process() on them. + */ +static void pvr_device_process_active_queues(struct pvr_device *pvr_dev) +{ + struct pvr_queue *queue, *tmp_queue; + LIST_HEAD(active_queues); + + mutex_lock(&pvr_dev->queues.lock); + + /* Move all active queues to a temporary list. Queues that remain + * active after we're done processing them are re-inserted to + * the queues.active list by pvr_queue_process(). + */ + list_splice_init(&pvr_dev->queues.active, &active_queues); + + list_for_each_entry_safe(queue, tmp_queue, &active_queues, node) + pvr_queue_process(queue); + + mutex_unlock(&pvr_dev->queues.lock); +} + +static irqreturn_t pvr_device_irq_thread_handler(int irq, void *data) +{ + struct pvr_device *pvr_dev = data; + irqreturn_t ret = IRQ_NONE; + + /* We are in the threaded handler, we can keep dequeuing events until we + * don't see any. This should allow us to reduce the number of interrupts + * when the GPU is receiving a massive amount of short jobs. + */ + while (pvr_fw_irq_pending(pvr_dev)) { + pvr_fw_irq_clear(pvr_dev); + + if (pvr_dev->fw_dev.booted) { + pvr_fwccb_process(pvr_dev); + pvr_kccb_wake_up_waiters(pvr_dev); + pvr_device_process_active_queues(pvr_dev); + } + + pm_runtime_mark_last_busy(from_pvr_device(pvr_dev)->dev); + + ret = IRQ_HANDLED; + } + + /* Unmask FW irqs before returning, so new interrupts can be received. */ + pvr_fw_irq_enable(pvr_dev); + return ret; +} + +static irqreturn_t pvr_device_irq_handler(int irq, void *data) +{ + struct pvr_device *pvr_dev = data; + + if (!pvr_fw_irq_pending(pvr_dev)) + return IRQ_NONE; /* Spurious IRQ - ignore. */ + + /* Mask the FW interrupts before waking up the thread. Will be unmasked + * when the thread handler is done processing events. + */ + pvr_fw_irq_disable(pvr_dev); + return IRQ_WAKE_THREAD; +} + +/** + * pvr_device_irq_init() - Initialise IRQ required by a PowerVR device + * @pvr_dev: Target PowerVR device. + * + * Returns: + * * 0 on success, + * * Any error returned by platform_get_irq_byname(), or + * * Any error returned by request_irq(). + */ +static int +pvr_device_irq_init(struct pvr_device *pvr_dev) +{ + struct drm_device *drm_dev = from_pvr_device(pvr_dev); + struct platform_device *plat_dev = to_platform_device(drm_dev->dev); + + init_waitqueue_head(&pvr_dev->kccb.rtn_q); + + pvr_dev->irq = platform_get_irq(plat_dev, 0); + if (pvr_dev->irq < 0) + return pvr_dev->irq; + + /* Clear any pending events before requesting the IRQ line. */ + pvr_fw_irq_clear(pvr_dev); + pvr_fw_irq_enable(pvr_dev); + + return request_threaded_irq(pvr_dev->irq, pvr_device_irq_handler, + pvr_device_irq_thread_handler, + IRQF_SHARED, "gpu", pvr_dev); +} + +/** + * pvr_device_irq_fini() - Deinitialise IRQ required by a PowerVR device + * @pvr_dev: Target PowerVR device. + */ +static void +pvr_device_irq_fini(struct pvr_device *pvr_dev) +{ + free_irq(pvr_dev->irq, pvr_dev); +} + +/** + * pvr_build_firmware_filename() - Construct a PowerVR firmware filename + * @pvr_dev: Target PowerVR device. + * @base: First part of the filename. + * @major: Major version number. + * + * A PowerVR firmware filename consists of three parts separated by underscores + * (``'_'``) along with a '.fw' file suffix. The first part is the exact value + * of @base, the second part is the hardware version string derived from @pvr_fw + * and the final part is the firmware version number constructed from @major with + * a 'v' prefix, e.g. powervr/rogue_4.40.2.51_v1.fw. + * + * The returned string will have been slab allocated and must be freed with + * kfree(). + * + * Return: + * * The constructed filename on success, or + * * Any error returned by kasprintf(). + */ +static char * +pvr_build_firmware_filename(struct pvr_device *pvr_dev, const char *base, + u8 major) +{ + struct pvr_gpu_id *gpu_id = &pvr_dev->gpu_id; + + return kasprintf(GFP_KERNEL, "%s_%d.%d.%d.%d_v%d.fw", base, gpu_id->b, + gpu_id->v, gpu_id->n, gpu_id->c, major); +} + +static void +pvr_release_firmware(void *data) +{ + struct pvr_device *pvr_dev = data; + + release_firmware(pvr_dev->fw_dev.firmware); +} + +/** + * pvr_request_firmware() - Load firmware for a PowerVR device + * @pvr_dev: Target PowerVR device. + * + * See pvr_build_firmware_filename() for details on firmware file naming. + * + * Return: + * * 0 on success, + * * Any error returned by pvr_build_firmware_filename(), or + * * Any error returned by request_firmware(). + */ +static int +pvr_request_firmware(struct pvr_device *pvr_dev) +{ + struct drm_device *drm_dev = &pvr_dev->base; + char *filename; + const struct firmware *fw; + int err; + + filename = pvr_build_firmware_filename(pvr_dev, "powervr/rogue", + PVR_FW_VERSION_MAJOR); + if (!filename) + return -ENOMEM; + + /* + * This function takes a copy of &filename, meaning we can free our + * instance before returning. + */ + err = request_firmware(&fw, filename, pvr_dev->base.dev); + if (err) { + drm_err(drm_dev, "failed to load firmware %s (err=%d)\n", + filename, err); + goto err_free_filename; + } + + drm_info(drm_dev, "loaded firmware %s\n", filename); + kfree(filename); + + pvr_dev->fw_dev.firmware = fw; + + return devm_add_action_or_reset(drm_dev->dev, pvr_release_firmware, pvr_dev); + +err_free_filename: + kfree(filename); + + return err; +} + +/** + * pvr_load_gpu_id() - Load a PowerVR device's GPU ID (BVNC) from control registers. + * + * Sets struct pvr_dev.gpu_id. + * + * @pvr_dev: Target PowerVR device. + */ +static void +pvr_load_gpu_id(struct pvr_device *pvr_dev) +{ + struct pvr_gpu_id *gpu_id = &pvr_dev->gpu_id; + u64 bvnc; + + /* + * Try reading the BVNC using the newer (cleaner) method first. If the + * B value is zero, fall back to the older method. + */ + bvnc = pvr_cr_read64(pvr_dev, ROGUE_CR_CORE_ID__PBVNC); + + gpu_id->b = PVR_CR_FIELD_GET(bvnc, CORE_ID__PBVNC__BRANCH_ID); + if (gpu_id->b != 0) { + gpu_id->v = PVR_CR_FIELD_GET(bvnc, CORE_ID__PBVNC__VERSION_ID); + gpu_id->n = PVR_CR_FIELD_GET(bvnc, CORE_ID__PBVNC__NUMBER_OF_SCALABLE_UNITS); + gpu_id->c = PVR_CR_FIELD_GET(bvnc, CORE_ID__PBVNC__CONFIG_ID); + } else { + u32 core_rev = pvr_cr_read32(pvr_dev, ROGUE_CR_CORE_REVISION); + u32 core_id = pvr_cr_read32(pvr_dev, ROGUE_CR_CORE_ID); + u16 core_id_config = PVR_CR_FIELD_GET(core_id, CORE_ID_CONFIG); + + gpu_id->b = PVR_CR_FIELD_GET(core_rev, CORE_REVISION_MAJOR); + gpu_id->v = PVR_CR_FIELD_GET(core_rev, CORE_REVISION_MINOR); + gpu_id->n = FIELD_GET(0xFF00, core_id_config); + gpu_id->c = FIELD_GET(0x00FF, core_id_config); + } +} + +/** + * pvr_set_dma_info() - Set PowerVR device DMA information + * @pvr_dev: Target PowerVR device. + * + * Sets the DMA mask and max segment size for the PowerVR device. + * + * Return: + * * 0 on success, + * * Any error returned by PVR_FEATURE_VALUE(), or + * * Any error returned by dma_set_mask(). + */ + +static int +pvr_set_dma_info(struct pvr_device *pvr_dev) +{ + struct drm_device *drm_dev = from_pvr_device(pvr_dev); + u16 phys_bus_width; + int err; + + err = PVR_FEATURE_VALUE(pvr_dev, phys_bus_width, &phys_bus_width); + if (err) { + drm_err(drm_dev, "Failed to get device physical bus width\n"); + return err; + } + + err = dma_set_mask(drm_dev->dev, DMA_BIT_MASK(phys_bus_width)); + if (err) { + drm_err(drm_dev, "Failed to set DMA mask (err=%d)\n", err); + return err; + } + + dma_set_max_seg_size(drm_dev->dev, UINT_MAX); + + return 0; +} + +/** + * pvr_device_gpu_init() - GPU-specific initialization for a PowerVR device + * @pvr_dev: Target PowerVR device. + * + * The following steps are taken to ensure the device is ready: + * + * 1. Read the hardware version information from control registers, + * 2. Initialise the hardware feature information, + * 3. Setup the device DMA information, + * 4. Setup the device-scoped memory context, and + * 5. Load firmware into the device. + * + * Return: + * * 0 on success, + * * -%ENODEV if the GPU is not supported, + * * Any error returned by pvr_set_dma_info(), + * * Any error returned by pvr_memory_context_init(), or + * * Any error returned by pvr_request_firmware(). + */ +static int +pvr_device_gpu_init(struct pvr_device *pvr_dev) +{ + int err; + + pvr_load_gpu_id(pvr_dev); + + err = pvr_request_firmware(pvr_dev); + if (err) + return err; + + err = pvr_fw_validate_init_device_info(pvr_dev); + if (err) + return err; + + if (PVR_HAS_FEATURE(pvr_dev, meta)) + pvr_dev->fw_dev.processor_type = PVR_FW_PROCESSOR_TYPE_META; + else if (PVR_HAS_FEATURE(pvr_dev, mips)) + pvr_dev->fw_dev.processor_type = PVR_FW_PROCESSOR_TYPE_MIPS; + else if (PVR_HAS_FEATURE(pvr_dev, riscv_fw_processor)) + pvr_dev->fw_dev.processor_type = PVR_FW_PROCESSOR_TYPE_RISCV; + else + return -EINVAL; + + pvr_stream_create_musthave_masks(pvr_dev); + + err = pvr_set_dma_info(pvr_dev); + if (err) + return err; + + if (pvr_dev->fw_dev.processor_type != PVR_FW_PROCESSOR_TYPE_MIPS) { + pvr_dev->kernel_vm_ctx = pvr_vm_create_context(pvr_dev, false); + if (IS_ERR(pvr_dev->kernel_vm_ctx)) + return PTR_ERR(pvr_dev->kernel_vm_ctx); + } + + err = pvr_fw_init(pvr_dev); + if (err) + goto err_vm_ctx_put; + + return 0; + +err_vm_ctx_put: + if (pvr_dev->fw_dev.processor_type != PVR_FW_PROCESSOR_TYPE_MIPS) { + pvr_vm_context_put(pvr_dev->kernel_vm_ctx); + pvr_dev->kernel_vm_ctx = NULL; + } + + return err; +} + +/** + * pvr_device_gpu_fini() - GPU-specific deinitialization for a PowerVR device + * @pvr_dev: Target PowerVR device. + */ +static void +pvr_device_gpu_fini(struct pvr_device *pvr_dev) +{ + pvr_fw_fini(pvr_dev); + + if (pvr_dev->fw_dev.processor_type != PVR_FW_PROCESSOR_TYPE_MIPS) { + WARN_ON(!pvr_vm_context_put(pvr_dev->kernel_vm_ctx)); + pvr_dev->kernel_vm_ctx = NULL; + } +} + +/** + * pvr_device_init() - Initialize a PowerVR device + * @pvr_dev: Target PowerVR device. + * + * If this function returns successfully, the device will have been fully + * initialized. Otherwise, any parts of the device initialized before an error + * occurs will be de-initialized before returning. + * + * NOTE: The initialization steps currently taken are the bare minimum required + * to read from the control registers. The device is unlikely to function + * until further initialization steps are added. [This note should be + * removed when that happens.] + * + * Return: + * * 0 on success, + * * Any error returned by pvr_device_reg_init(), + * * Any error returned by pvr_device_clk_init(), or + * * Any error returned by pvr_device_gpu_init(). + */ +int +pvr_device_init(struct pvr_device *pvr_dev) +{ + struct drm_device *drm_dev = from_pvr_device(pvr_dev); + struct device *dev = drm_dev->dev; + int err; + + /* + * Setup device parameters. We do this first in case other steps + * depend on them. + */ + err = pvr_device_params_init(&pvr_dev->params); + if (err) + return err; + + /* Enable and initialize clocks required for the device to operate. */ + err = pvr_device_clk_init(pvr_dev); + if (err) + return err; + + /* Explicitly power the GPU so we can access control registers before the FW is booted. */ + err = pm_runtime_resume_and_get(dev); + if (err) + return err; + + /* Map the control registers into memory. */ + err = pvr_device_reg_init(pvr_dev); + if (err) + goto err_pm_runtime_put; + + /* Perform GPU-specific initialization steps. */ + err = pvr_device_gpu_init(pvr_dev); + if (err) + goto err_pm_runtime_put; + + err = pvr_device_irq_init(pvr_dev); + if (err) + goto err_device_gpu_fini; + + pm_runtime_put(dev); + + return 0; + +err_device_gpu_fini: + pvr_device_gpu_fini(pvr_dev); + +err_pm_runtime_put: + pm_runtime_put_sync_suspend(dev); + + return err; +} + +/** + * pvr_device_fini() - Deinitialize a PowerVR device + * @pvr_dev: Target PowerVR device. + */ +void +pvr_device_fini(struct pvr_device *pvr_dev) +{ + /* + * Deinitialization stages are performed in reverse order compared to + * the initialization stages in pvr_device_init(). + */ + pvr_device_irq_fini(pvr_dev); + pvr_device_gpu_fini(pvr_dev); +} + +bool +pvr_device_has_uapi_quirk(struct pvr_device *pvr_dev, u32 quirk) +{ + switch (quirk) { + case 47217: + return PVR_HAS_QUIRK(pvr_dev, 47217); + case 48545: + return PVR_HAS_QUIRK(pvr_dev, 48545); + case 49927: + return PVR_HAS_QUIRK(pvr_dev, 49927); + case 51764: + return PVR_HAS_QUIRK(pvr_dev, 51764); + case 62269: + return PVR_HAS_QUIRK(pvr_dev, 62269); + default: + return false; + }; +} + +bool +pvr_device_has_uapi_enhancement(struct pvr_device *pvr_dev, u32 enhancement) +{ + switch (enhancement) { + case 35421: + return PVR_HAS_ENHANCEMENT(pvr_dev, 35421); + case 42064: + return PVR_HAS_ENHANCEMENT(pvr_dev, 42064); + default: + return false; + }; +} + +/** + * pvr_device_has_feature() - Look up device feature based on feature definition + * @pvr_dev: Device pointer. + * @feature: Feature to look up. Should be one of %PVR_FEATURE_*. + * + * Returns: + * * %true if feature is present on device, or + * * %false if feature is not present on device. + */ +bool +pvr_device_has_feature(struct pvr_device *pvr_dev, u32 feature) +{ + switch (feature) { + case PVR_FEATURE_CLUSTER_GROUPING: + return PVR_HAS_FEATURE(pvr_dev, cluster_grouping); + + case PVR_FEATURE_COMPUTE_MORTON_CAPABLE: + return PVR_HAS_FEATURE(pvr_dev, compute_morton_capable); + + case PVR_FEATURE_FB_CDC_V4: + return PVR_HAS_FEATURE(pvr_dev, fb_cdc_v4); + + case PVR_FEATURE_GPU_MULTICORE_SUPPORT: + return PVR_HAS_FEATURE(pvr_dev, gpu_multicore_support); + + case PVR_FEATURE_ISP_ZLS_D24_S8_PACKING_OGL_MODE: + return PVR_HAS_FEATURE(pvr_dev, isp_zls_d24_s8_packing_ogl_mode); + + case PVR_FEATURE_S7_TOP_INFRASTRUCTURE: + return PVR_HAS_FEATURE(pvr_dev, s7_top_infrastructure); + + case PVR_FEATURE_TESSELLATION: + return PVR_HAS_FEATURE(pvr_dev, tessellation); + + case PVR_FEATURE_TPU_DM_GLOBAL_REGISTERS: + return PVR_HAS_FEATURE(pvr_dev, tpu_dm_global_registers); + + case PVR_FEATURE_VDM_DRAWINDIRECT: + return PVR_HAS_FEATURE(pvr_dev, vdm_drawindirect); + + case PVR_FEATURE_VDM_OBJECT_LEVEL_LLS: + return PVR_HAS_FEATURE(pvr_dev, vdm_object_level_lls); + + case PVR_FEATURE_ZLS_SUBTILE: + return PVR_HAS_FEATURE(pvr_dev, zls_subtile); + + /* Derived features. */ + case PVR_FEATURE_CDM_USER_MODE_QUEUE: { + u8 cdm_control_stream_format = 0; + + PVR_FEATURE_VALUE(pvr_dev, cdm_control_stream_format, &cdm_control_stream_format); + return (cdm_control_stream_format >= 2 && cdm_control_stream_format <= 4); + } + + case PVR_FEATURE_REQUIRES_FB_CDC_ZLS_SETUP: + if (PVR_HAS_FEATURE(pvr_dev, fbcdc_algorithm)) { + u8 fbcdc_algorithm = 0; + + PVR_FEATURE_VALUE(pvr_dev, fbcdc_algorithm, &fbcdc_algorithm); + return (fbcdc_algorithm < 3 || PVR_HAS_FEATURE(pvr_dev, fb_cdc_v4)); + } + return false; + + default: + WARN(true, "Looking up undefined feature %u\n", feature); + return false; + } +} diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h new file mode 100644 index 000000000000..ecdd5767d8ef --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_device.h @@ -0,0 +1,725 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_DEVICE_H +#define PVR_DEVICE_H + +#include "pvr_ccb.h" +#include "pvr_device_info.h" +#include "pvr_fw.h" +#include "pvr_params.h" +#include "pvr_rogue_fwif_stream.h" +#include "pvr_stream.h" + +#include <drm/drm_device.h> +#include <drm/drm_file.h> +#include <drm/drm_mm.h> + +#include <linux/bits.h> +#include <linux/compiler_attributes.h> +#include <linux/compiler_types.h> +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/kernel.h> +#include <linux/math.h> +#include <linux/mutex.h> +#include <linux/timer.h> +#include <linux/types.h> +#include <linux/wait.h> +#include <linux/workqueue.h> +#include <linux/xarray.h> + +/* Forward declaration from <linux/clk.h>. */ +struct clk; + +/* Forward declaration from <linux/firmware.h>. */ +struct firmware; + +/** + * struct pvr_gpu_id - Hardware GPU ID information for a PowerVR device + * @b: Branch ID. + * @v: Version ID. + * @n: Number of scalable units. + * @c: Config ID. + */ +struct pvr_gpu_id { + u16 b, v, n, c; +}; + +/** + * struct pvr_fw_version - Firmware version information + * @major: Major version number. + * @minor: Minor version number. + */ +struct pvr_fw_version { + u16 major, minor; +}; + +/** + * struct pvr_device - powervr-specific wrapper for &struct drm_device + */ +struct pvr_device { + /** + * @base: The underlying &struct drm_device. + * + * Do not access this member directly, instead call + * from_pvr_device(). + */ + struct drm_device base; + + /** @gpu_id: GPU ID detected at runtime. */ + struct pvr_gpu_id gpu_id; + + /** + * @features: Hardware feature information. + * + * Do not access this member directly, instead use PVR_HAS_FEATURE() + * or PVR_FEATURE_VALUE() macros. + */ + struct pvr_device_features features; + + /** + * @quirks: Hardware quirk information. + * + * Do not access this member directly, instead use PVR_HAS_QUIRK(). + */ + struct pvr_device_quirks quirks; + + /** + * @enhancements: Hardware enhancement information. + * + * Do not access this member directly, instead use + * PVR_HAS_ENHANCEMENT(). + */ + struct pvr_device_enhancements enhancements; + + /** @fw_version: Firmware version detected at runtime. */ + struct pvr_fw_version fw_version; + + /** @regs_resource: Resource representing device control registers. */ + struct resource *regs_resource; + + /** + * @regs: Device control registers. + * + * These are mapped into memory when the device is initialized; that + * location is where this pointer points. + */ + void __iomem *regs; + + /** + * @core_clk: General core clock. + * + * This is the primary clock used by the entire GPU core. + */ + struct clk *core_clk; + + /** + * @sys_clk: Optional system bus clock. + * + * This may be used on some platforms to provide an independent clock to the SoC Interface + * (SOCIF). If present, this needs to be enabled/disabled together with @core_clk. + */ + struct clk *sys_clk; + + /** + * @mem_clk: Optional memory clock. + * + * This may be used on some platforms to provide an independent clock to the Memory + * Interface (MEMIF). If present, this needs to be enabled/disabled together with @core_clk. + */ + struct clk *mem_clk; + + /** @irq: IRQ number. */ + int irq; + + /** @fwccb: Firmware CCB. */ + struct pvr_ccb fwccb; + + /** + * @kernel_vm_ctx: Virtual memory context used for kernel mappings. + * + * This is used for mappings in the firmware address region when a META firmware processor + * is in use. + * + * When a MIPS firmware processor is in use, this will be %NULL. + */ + struct pvr_vm_context *kernel_vm_ctx; + + /** @fw_dev: Firmware related data. */ + struct pvr_fw_device fw_dev; + + /** + * @params: Device-specific parameters. + * + * The values of these parameters are initialized from the + * defaults specified as module parameters. They may be + * modified at runtime via debugfs (if enabled). + */ + struct pvr_device_params params; + + /** @stream_musthave_quirks: Bit array of "must-have" quirks for stream commands. */ + u32 stream_musthave_quirks[PVR_STREAM_TYPE_MAX][PVR_STREAM_EXTHDR_TYPE_MAX]; + + /** + * @mmu_flush_cache_flags: Records which MMU caches require flushing + * before submitting the next job. + */ + atomic_t mmu_flush_cache_flags; + + /** + * @ctx_ids: Array of contexts belonging to this device. Array members + * are of type "struct pvr_context *". + * + * This array is used to allocate IDs used by the firmware. + */ + struct xarray ctx_ids; + + /** + * @free_list_ids: Array of free lists belonging to this device. Array members + * are of type "struct pvr_free_list *". + * + * This array is used to allocate IDs used by the firmware. + */ + struct xarray free_list_ids; + + /** + * @job_ids: Array of jobs belonging to this device. Array members + * are of type "struct pvr_job *". + */ + struct xarray job_ids; + + /** + * @queues: Queue-related fields. + */ + struct { + /** @queues.active: Active queue list. */ + struct list_head active; + + /** @queues.idle: Idle queue list. */ + struct list_head idle; + + /** @queues.lock: Lock protecting access to the active/idle + * lists. */ + struct mutex lock; + } queues; + + /** + * @watchdog: Watchdog for communications with firmware. + */ + struct { + /** @watchdog.work: Work item for watchdog callback. */ + struct delayed_work work; + + /** + * @watchdog.old_kccb_cmds_executed: KCCB command execution + * count at last watchdog poll. + */ + u32 old_kccb_cmds_executed; + + /** + * @watchdog.kccb_stall_count: Number of watchdog polls + * KCCB has been stalled for. + */ + u32 kccb_stall_count; + } watchdog; + + /** + * @kccb: Circular buffer for communications with firmware. + */ + struct { + /** @kccb.ccb: Kernel CCB. */ + struct pvr_ccb ccb; + + /** @kccb.rtn_q: Waitqueue for KCCB command return waiters. */ + wait_queue_head_t rtn_q; + + /** @kccb.rtn_obj: Object representing KCCB return slots. */ + struct pvr_fw_object *rtn_obj; + + /** + * @kccb.rtn: Pointer to CPU mapping of KCCB return slots. + * Must be accessed by READ_ONCE()/WRITE_ONCE(). + */ + u32 *rtn; + + /** @kccb.slot_count: Total number of KCCB slots available. */ + u32 slot_count; + + /** @kccb.reserved_count: Number of KCCB slots reserved for + * future use. */ + u32 reserved_count; + + /** + * @kccb.waiters: List of KCCB slot waiters. + */ + struct list_head waiters; + + /** @kccb.fence_ctx: KCCB fence context. */ + struct { + /** @kccb.fence_ctx.id: KCCB fence context ID + * allocated with dma_fence_context_alloc(). */ + u64 id; + + /** @kccb.fence_ctx.seqno: Sequence number incremented + * each time a fence is created. */ + atomic_t seqno; + + /** + * @kccb.fence_ctx.lock: Lock used to synchronize + * access to fences allocated by this context. + */ + spinlock_t lock; + } fence_ctx; + } kccb; + + /** + * @lost: %true if the device has been lost. + * + * This variable is set if the device has become irretrievably unavailable, e.g. if the + * firmware processor has stopped responding and can not be revived via a hard reset. + */ + bool lost; + + /** + * @reset_sem: Reset semaphore. + * + * GPU reset code will lock this for writing. Any code that submits commands to the firmware + * that isn't in an IRQ handler or on the scheduler workqueue must lock this for reading. + * Once this has been successfully locked, &pvr_dev->lost _must_ be checked, and -%EIO must + * be returned if it is set. + */ + struct rw_semaphore reset_sem; + + /** @sched_wq: Workqueue for schedulers. */ + struct workqueue_struct *sched_wq; +}; + +/** + * struct pvr_file - powervr-specific data to be assigned to &struct + * drm_file.driver_priv + */ +struct pvr_file { + /** + * @file: A reference to the parent &struct drm_file. + * + * Do not access this member directly, instead call from_pvr_file(). + */ + struct drm_file *file; + + /** + * @pvr_dev: A reference to the powervr-specific wrapper for the + * associated device. Saves on repeated calls to to_pvr_device(). + */ + struct pvr_device *pvr_dev; + + /** + * @ctx_handles: Array of contexts belonging to this file. Array members + * are of type "struct pvr_context *". + * + * This array is used to allocate handles returned to userspace. + */ + struct xarray ctx_handles; + + /** + * @free_list_handles: Array of free lists belonging to this file. Array + * members are of type "struct pvr_free_list *". + * + * This array is used to allocate handles returned to userspace. + */ + struct xarray free_list_handles; + + /** + * @hwrt_handles: Array of HWRT datasets belonging to this file. Array + * members are of type "struct pvr_hwrt_dataset *". + * + * This array is used to allocate handles returned to userspace. + */ + struct xarray hwrt_handles; + + /** + * @vm_ctx_handles: Array of VM contexts belonging to this file. Array + * members are of type "struct pvr_vm_context *". + * + * This array is used to allocate handles returned to userspace. + */ + struct xarray vm_ctx_handles; +}; + +/** + * PVR_HAS_FEATURE() - Tests whether a PowerVR device has a given feature + * @pvr_dev: [IN] Target PowerVR device. + * @feature: [IN] Hardware feature name. + * + * Feature names are derived from those found in &struct pvr_device_features by + * dropping the 'has_' prefix, which is applied by this macro. + * + * Return: + * * true if the named feature is present in the hardware + * * false if the named feature is not present in the hardware + */ +#define PVR_HAS_FEATURE(pvr_dev, feature) ((pvr_dev)->features.has_##feature) + +/** + * PVR_FEATURE_VALUE() - Gets a PowerVR device feature value + * @pvr_dev: [IN] Target PowerVR device. + * @feature: [IN] Feature name. + * @value_out: [OUT] Feature value. + * + * This macro will get a feature value for those features that have values. + * If the feature is not present, nothing will be stored to @value_out. + * + * Feature names are derived from those found in &struct pvr_device_features by + * dropping the 'has_' prefix. + * + * Return: + * * 0 on success, or + * * -%EINVAL if the named feature is not present in the hardware + */ +#define PVR_FEATURE_VALUE(pvr_dev, feature, value_out) \ + ({ \ + struct pvr_device *_pvr_dev = pvr_dev; \ + int _ret = -EINVAL; \ + if (_pvr_dev->features.has_##feature) { \ + *(value_out) = _pvr_dev->features.feature; \ + _ret = 0; \ + } \ + _ret; \ + }) + +/** + * PVR_HAS_QUIRK() - Tests whether a physical device has a given quirk + * @pvr_dev: [IN] Target PowerVR device. + * @quirk: [IN] Hardware quirk name. + * + * Quirk numbers are derived from those found in #pvr_device_quirks by + * dropping the 'has_brn' prefix, which is applied by this macro. + * + * Returns + * * true if the quirk is present in the hardware, or + * * false if the quirk is not present in the hardware. + */ +#define PVR_HAS_QUIRK(pvr_dev, quirk) ((pvr_dev)->quirks.has_brn##quirk) + +/** + * PVR_HAS_ENHANCEMENT() - Tests whether a physical device has a given + * enhancement + * @pvr_dev: [IN] Target PowerVR device. + * @enhancement: [IN] Hardware enhancement name. + * + * Enhancement numbers are derived from those found in #pvr_device_enhancements + * by dropping the 'has_ern' prefix, which is applied by this macro. + * + * Returns + * * true if the enhancement is present in the hardware, or + * * false if the enhancement is not present in the hardware. + */ +#define PVR_HAS_ENHANCEMENT(pvr_dev, enhancement) ((pvr_dev)->enhancements.has_ern##enhancement) + +#define from_pvr_device(pvr_dev) (&(pvr_dev)->base) + +#define to_pvr_device(drm_dev) container_of_const(drm_dev, struct pvr_device, base) + +#define from_pvr_file(pvr_file) ((pvr_file)->file) + +#define to_pvr_file(file) ((file)->driver_priv) + +/** + * PVR_PACKED_BVNC() - Packs B, V, N and C values into a 64-bit unsigned integer + * @b: Branch ID. + * @v: Version ID. + * @n: Number of scalable units. + * @c: Config ID. + * + * The packed layout is as follows: + * + * +--------+--------+--------+-------+ + * | 63..48 | 47..32 | 31..16 | 15..0 | + * +========+========+========+=======+ + * | B | V | N | C | + * +--------+--------+--------+-------+ + * + * pvr_gpu_id_to_packed_bvnc() should be used instead of this macro when a + * &struct pvr_gpu_id is available in order to ensure proper type checking. + * + * Return: Packed BVNC. + */ +/* clang-format off */ +#define PVR_PACKED_BVNC(b, v, n, c) \ + ((((u64)(b) & GENMASK_ULL(15, 0)) << 48) | \ + (((u64)(v) & GENMASK_ULL(15, 0)) << 32) | \ + (((u64)(n) & GENMASK_ULL(15, 0)) << 16) | \ + (((u64)(c) & GENMASK_ULL(15, 0)) << 0)) +/* clang-format on */ + +/** + * pvr_gpu_id_to_packed_bvnc() - Packs B, V, N and C values into a 64-bit + * unsigned integer + * @gpu_id: GPU ID. + * + * The packed layout is as follows: + * + * +--------+--------+--------+-------+ + * | 63..48 | 47..32 | 31..16 | 15..0 | + * +========+========+========+=======+ + * | B | V | N | C | + * +--------+--------+--------+-------+ + * + * This should be used in preference to PVR_PACKED_BVNC() when a &struct + * pvr_gpu_id is available in order to ensure proper type checking. + * + * Return: Packed BVNC. + */ +static __always_inline u64 +pvr_gpu_id_to_packed_bvnc(struct pvr_gpu_id *gpu_id) +{ + return PVR_PACKED_BVNC(gpu_id->b, gpu_id->v, gpu_id->n, gpu_id->c); +} + +static __always_inline void +packed_bvnc_to_pvr_gpu_id(u64 bvnc, struct pvr_gpu_id *gpu_id) +{ + gpu_id->b = (bvnc & GENMASK_ULL(63, 48)) >> 48; + gpu_id->v = (bvnc & GENMASK_ULL(47, 32)) >> 32; + gpu_id->n = (bvnc & GENMASK_ULL(31, 16)) >> 16; + gpu_id->c = bvnc & GENMASK_ULL(15, 0); +} + +int pvr_device_init(struct pvr_device *pvr_dev); +void pvr_device_fini(struct pvr_device *pvr_dev); +void pvr_device_reset(struct pvr_device *pvr_dev); + +bool +pvr_device_has_uapi_quirk(struct pvr_device *pvr_dev, u32 quirk); +bool +pvr_device_has_uapi_enhancement(struct pvr_device *pvr_dev, u32 enhancement); +bool +pvr_device_has_feature(struct pvr_device *pvr_dev, u32 feature); + +/** + * PVR_CR_FIELD_GET() - Extract a single field from a PowerVR control register + * @val: Value of the target register. + * @field: Field specifier, as defined in "pvr_rogue_cr_defs.h". + * + * Return: The extracted field. + */ +#define PVR_CR_FIELD_GET(val, field) FIELD_GET(~ROGUE_CR_##field##_CLRMSK, val) + +/** + * pvr_cr_read32() - Read a 32-bit register from a PowerVR device + * @pvr_dev: Target PowerVR device. + * @reg: Target register. + * + * Return: The value of the requested register. + */ +static __always_inline u32 +pvr_cr_read32(struct pvr_device *pvr_dev, u32 reg) +{ + return ioread32(pvr_dev->regs + reg); +} + +/** + * pvr_cr_read64() - Read a 64-bit register from a PowerVR device + * @pvr_dev: Target PowerVR device. + * @reg: Target register. + * + * Return: The value of the requested register. + */ +static __always_inline u64 +pvr_cr_read64(struct pvr_device *pvr_dev, u32 reg) +{ + return ioread64(pvr_dev->regs + reg); +} + +/** + * pvr_cr_write32() - Write to a 32-bit register in a PowerVR device + * @pvr_dev: Target PowerVR device. + * @reg: Target register. + * @val: Value to write. + */ +static __always_inline void +pvr_cr_write32(struct pvr_device *pvr_dev, u32 reg, u32 val) +{ + iowrite32(val, pvr_dev->regs + reg); +} + +/** + * pvr_cr_write64() - Write to a 64-bit register in a PowerVR device + * @pvr_dev: Target PowerVR device. + * @reg: Target register. + * @val: Value to write. + */ +static __always_inline void +pvr_cr_write64(struct pvr_device *pvr_dev, u32 reg, u64 val) +{ + iowrite64(val, pvr_dev->regs + reg); +} + +/** + * pvr_cr_poll_reg32() - Wait for a 32-bit register to match a given value by + * polling + * @pvr_dev: Target PowerVR device. + * @reg_addr: Address of register. + * @reg_value: Expected register value (after masking). + * @reg_mask: Mask of bits valid for comparison with @reg_value. + * @timeout_usec: Timeout length, in us. + * + * Returns: + * * 0 on success, or + * * -%ETIMEDOUT on timeout. + */ +static __always_inline int +pvr_cr_poll_reg32(struct pvr_device *pvr_dev, u32 reg_addr, u32 reg_value, + u32 reg_mask, u64 timeout_usec) +{ + u32 value; + + return readl_poll_timeout(pvr_dev->regs + reg_addr, value, + (value & reg_mask) == reg_value, 0, timeout_usec); +} + +/** + * pvr_cr_poll_reg64() - Wait for a 64-bit register to match a given value by + * polling + * @pvr_dev: Target PowerVR device. + * @reg_addr: Address of register. + * @reg_value: Expected register value (after masking). + * @reg_mask: Mask of bits valid for comparison with @reg_value. + * @timeout_usec: Timeout length, in us. + * + * Returns: + * * 0 on success, or + * * -%ETIMEDOUT on timeout. + */ +static __always_inline int +pvr_cr_poll_reg64(struct pvr_device *pvr_dev, u32 reg_addr, u64 reg_value, + u64 reg_mask, u64 timeout_usec) +{ + u64 value; + + return readq_poll_timeout(pvr_dev->regs + reg_addr, value, + (value & reg_mask) == reg_value, 0, timeout_usec); +} + +/** + * pvr_round_up_to_cacheline_size() - Round up a provided size to be cacheline + * aligned + * @pvr_dev: Target PowerVR device. + * @size: Initial size, in bytes. + * + * Returns: + * * Size aligned to cacheline size. + */ +static __always_inline size_t +pvr_round_up_to_cacheline_size(struct pvr_device *pvr_dev, size_t size) +{ + u16 slc_cacheline_size_bits = 0; + u16 slc_cacheline_size_bytes; + + WARN_ON(!PVR_HAS_FEATURE(pvr_dev, slc_cache_line_size_bits)); + PVR_FEATURE_VALUE(pvr_dev, slc_cache_line_size_bits, + &slc_cacheline_size_bits); + slc_cacheline_size_bytes = slc_cacheline_size_bits / 8; + + return round_up(size, slc_cacheline_size_bytes); +} + +/** + * DOC: IOCTL validation helpers + * + * To validate the constraints imposed on IOCTL argument structs, a collection + * of macros and helper functions exist in ``pvr_device.h``. + * + * Of the current helpers, it should only be necessary to call + * PVR_IOCTL_UNION_PADDING_CHECK() directly. This macro should be used once in + * every code path which extracts a union member from a struct passed from + * userspace. + */ + +/** + * pvr_ioctl_union_padding_check() - Validate that the implicit padding between + * the end of a union member and the end of the union itself is zeroed. + * @instance: Pointer to the instance of the struct to validate. + * @union_offset: Offset into the type of @instance of the target union. Must + * be 64-bit aligned. + * @union_size: Size of the target union in the type of @instance. Must be + * 64-bit aligned. + * @member_size: Size of the target member in the target union specified by + * @union_offset and @union_size. It is assumed that the offset of the target + * member is zero relative to @union_offset. Must be 64-bit aligned. + * + * You probably want to use PVR_IOCTL_UNION_PADDING_CHECK() instead of calling + * this function directly, since that macro abstracts away much of the setup, + * and also provides some static validation. See its docs for details. + * + * Return: + * * %true if every byte between the end of the used member of the union and + * the end of that union is zeroed, or + * * %false otherwise. + */ +static __always_inline bool +pvr_ioctl_union_padding_check(void *instance, size_t union_offset, + size_t union_size, size_t member_size) +{ + /* + * void pointer arithmetic is technically illegal - cast to a byte + * pointer so this addition works safely. + */ + void *padding_start = ((u8 *)instance) + union_offset + member_size; + size_t padding_size = union_size - member_size; + + return !memchr_inv(padding_start, 0, padding_size); +} + +/** + * PVR_STATIC_ASSERT_64BIT_ALIGNED() - Inline assertion for 64-bit alignment. + * @static_expr_: Target expression to evaluate. + * + * If @static_expr_ does not evaluate to a constant integer which would be a + * 64-bit aligned address (i.e. a multiple of 8), compilation will fail. + * + * Return: + * The value of @static_expr_. + */ +#define PVR_STATIC_ASSERT_64BIT_ALIGNED(static_expr_) \ + ({ \ + static_assert(((static_expr_) & (sizeof(u64) - 1)) == 0); \ + (static_expr_); \ + }) + +/** + * PVR_IOCTL_UNION_PADDING_CHECK() - Validate that the implicit padding between + * the end of a union member and the end of the union itself is zeroed. + * @struct_instance_: An expression which evaluates to a pointer to a UAPI data + * struct. + * @union_: The name of the union member of @struct_instance_ to check. If the + * union member is nested within the type of @struct_instance_, this may + * contain the member access operator ("."). + * @member_: The name of the member of @union_ to assess. + * + * This is a wrapper around pvr_ioctl_union_padding_check() which performs + * alignment checks and simplifies things for the caller. + * + * Return: + * * %true if every byte in @struct_instance_ between the end of @member_ and + * the end of @union_ is zeroed, or + * * %false otherwise. + */ +#define PVR_IOCTL_UNION_PADDING_CHECK(struct_instance_, union_, member_) \ + ({ \ + typeof(struct_instance_) __instance = (struct_instance_); \ + size_t __union_offset = PVR_STATIC_ASSERT_64BIT_ALIGNED( \ + offsetof(typeof(*__instance), union_)); \ + size_t __union_size = PVR_STATIC_ASSERT_64BIT_ALIGNED( \ + sizeof(__instance->union_)); \ + size_t __member_size = PVR_STATIC_ASSERT_64BIT_ALIGNED( \ + sizeof(__instance->union_.member_)); \ + pvr_ioctl_union_padding_check(__instance, __union_offset, \ + __union_size, __member_size); \ + }) + +#define PVR_FW_PROCESSOR_TYPE_META 0 +#define PVR_FW_PROCESSOR_TYPE_MIPS 1 +#define PVR_FW_PROCESSOR_TYPE_RISCV 2 + +#endif /* PVR_DEVICE_H */ diff --git a/drivers/gpu/drm/imagination/pvr_device_info.c b/drivers/gpu/drm/imagination/pvr_device_info.c new file mode 100644 index 000000000000..d3301cde7d11 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_device_info.c @@ -0,0 +1,255 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_device.h" +#include "pvr_device_info.h" +#include "pvr_rogue_fwif_dev_info.h" + +#include <drm/drm_print.h> + +#include <linux/bits.h> +#include <linux/minmax.h> +#include <linux/stddef.h> +#include <linux/types.h> + +#define QUIRK_MAPPING(quirk) \ + [PVR_FW_HAS_BRN_##quirk] = offsetof(struct pvr_device, quirks.has_brn##quirk) + +static const uintptr_t quirks_mapping[] = { + QUIRK_MAPPING(44079), + QUIRK_MAPPING(47217), + QUIRK_MAPPING(48492), + QUIRK_MAPPING(48545), + QUIRK_MAPPING(49927), + QUIRK_MAPPING(50767), + QUIRK_MAPPING(51764), + QUIRK_MAPPING(62269), + QUIRK_MAPPING(63142), + QUIRK_MAPPING(63553), + QUIRK_MAPPING(66011), + QUIRK_MAPPING(71242), +}; + +#undef QUIRK_MAPPING + +#define ENHANCEMENT_MAPPING(enhancement) \ + [PVR_FW_HAS_ERN_##enhancement] = offsetof(struct pvr_device, \ + enhancements.has_ern##enhancement) + +static const uintptr_t enhancements_mapping[] = { + ENHANCEMENT_MAPPING(35421), + ENHANCEMENT_MAPPING(38020), + ENHANCEMENT_MAPPING(38748), + ENHANCEMENT_MAPPING(42064), + ENHANCEMENT_MAPPING(42290), + ENHANCEMENT_MAPPING(42606), + ENHANCEMENT_MAPPING(47025), + ENHANCEMENT_MAPPING(57596), +}; + +#undef ENHANCEMENT_MAPPING + +static void pvr_device_info_set_common(struct pvr_device *pvr_dev, const u64 *bitmask, + u32 bitmask_size, const uintptr_t *mapping, u32 mapping_max) +{ + const u32 mapping_max_size = (mapping_max + 63) >> 6; + const u32 nr_bits = min(bitmask_size * 64, mapping_max); + + /* Warn if any unsupported values in the bitmask. */ + if (bitmask_size > mapping_max_size) { + if (mapping == quirks_mapping) + drm_warn(from_pvr_device(pvr_dev), "Unsupported quirks in firmware image"); + else + drm_warn(from_pvr_device(pvr_dev), + "Unsupported enhancements in firmware image"); + } else if (bitmask_size == mapping_max_size && (mapping_max & 63)) { + u64 invalid_mask = ~0ull << (mapping_max & 63); + + if (bitmask[bitmask_size - 1] & invalid_mask) { + if (mapping == quirks_mapping) + drm_warn(from_pvr_device(pvr_dev), + "Unsupported quirks in firmware image"); + else + drm_warn(from_pvr_device(pvr_dev), + "Unsupported enhancements in firmware image"); + } + } + + for (u32 i = 0; i < nr_bits; i++) { + if (bitmask[i >> 6] & BIT_ULL(i & 63)) + *(bool *)((u8 *)pvr_dev + mapping[i]) = true; + } +} + +/** + * pvr_device_info_set_quirks() - Set device quirks from device information in firmware + * @pvr_dev: Device pointer. + * @quirks: Pointer to quirks mask in device information. + * @quirks_size: Size of quirks mask, in u64s. + */ +void pvr_device_info_set_quirks(struct pvr_device *pvr_dev, const u64 *quirks, u32 quirks_size) +{ + BUILD_BUG_ON(ARRAY_SIZE(quirks_mapping) != PVR_FW_HAS_BRN_MAX); + + pvr_device_info_set_common(pvr_dev, quirks, quirks_size, quirks_mapping, + ARRAY_SIZE(quirks_mapping)); +} + +/** + * pvr_device_info_set_enhancements() - Set device enhancements from device information in firmware + * @pvr_dev: Device pointer. + * @enhancements: Pointer to enhancements mask in device information. + * @enhancements_size: Size of enhancements mask, in u64s. + */ +void pvr_device_info_set_enhancements(struct pvr_device *pvr_dev, const u64 *enhancements, + u32 enhancements_size) +{ + BUILD_BUG_ON(ARRAY_SIZE(enhancements_mapping) != PVR_FW_HAS_ERN_MAX); + + pvr_device_info_set_common(pvr_dev, enhancements, enhancements_size, + enhancements_mapping, ARRAY_SIZE(enhancements_mapping)); +} + +#define FEATURE_MAPPING(fw_feature, feature) \ + [PVR_FW_HAS_FEATURE_##fw_feature] = { \ + .flag_offset = offsetof(struct pvr_device, features.has_##feature), \ + .value_offset = 0 \ + } + +#define FEATURE_MAPPING_VALUE(fw_feature, feature) \ + [PVR_FW_HAS_FEATURE_##fw_feature] = { \ + .flag_offset = offsetof(struct pvr_device, features.has_##feature), \ + .value_offset = offsetof(struct pvr_device, features.feature) \ + } + +static const struct { + uintptr_t flag_offset; + uintptr_t value_offset; +} features_mapping[] = { + FEATURE_MAPPING(AXI_ACELITE, axi_acelite), + FEATURE_MAPPING_VALUE(CDM_CONTROL_STREAM_FORMAT, cdm_control_stream_format), + FEATURE_MAPPING(CLUSTER_GROUPING, cluster_grouping), + FEATURE_MAPPING_VALUE(COMMON_STORE_SIZE_IN_DWORDS, common_store_size_in_dwords), + FEATURE_MAPPING(COMPUTE, compute), + FEATURE_MAPPING(COMPUTE_MORTON_CAPABLE, compute_morton_capable), + FEATURE_MAPPING(COMPUTE_OVERLAP, compute_overlap), + FEATURE_MAPPING(COREID_PER_OS, coreid_per_os), + FEATURE_MAPPING(DYNAMIC_DUST_POWER, dynamic_dust_power), + FEATURE_MAPPING_VALUE(ECC_RAMS, ecc_rams), + FEATURE_MAPPING_VALUE(FBCDC, fbcdc), + FEATURE_MAPPING_VALUE(FBCDC_ALGORITHM, fbcdc_algorithm), + FEATURE_MAPPING_VALUE(FBCDC_ARCHITECTURE, fbcdc_architecture), + FEATURE_MAPPING_VALUE(FBC_MAX_DEFAULT_DESCRIPTORS, fbc_max_default_descriptors), + FEATURE_MAPPING_VALUE(FBC_MAX_LARGE_DESCRIPTORS, fbc_max_large_descriptors), + FEATURE_MAPPING(FB_CDC_V4, fb_cdc_v4), + FEATURE_MAPPING(GPU_MULTICORE_SUPPORT, gpu_multicore_support), + FEATURE_MAPPING(GPU_VIRTUALISATION, gpu_virtualisation), + FEATURE_MAPPING(GS_RTA_SUPPORT, gs_rta_support), + FEATURE_MAPPING(IRQ_PER_OS, irq_per_os), + FEATURE_MAPPING_VALUE(ISP_MAX_TILES_IN_FLIGHT, isp_max_tiles_in_flight), + FEATURE_MAPPING_VALUE(ISP_SAMPLES_PER_PIXEL, isp_samples_per_pixel), + FEATURE_MAPPING(ISP_ZLS_D24_S8_PACKING_OGL_MODE, isp_zls_d24_s8_packing_ogl_mode), + FEATURE_MAPPING_VALUE(LAYOUT_MARS, layout_mars), + FEATURE_MAPPING_VALUE(MAX_PARTITIONS, max_partitions), + FEATURE_MAPPING_VALUE(META, meta), + FEATURE_MAPPING_VALUE(META_COREMEM_SIZE, meta_coremem_size), + FEATURE_MAPPING(MIPS, mips), + FEATURE_MAPPING_VALUE(NUM_CLUSTERS, num_clusters), + FEATURE_MAPPING_VALUE(NUM_ISP_IPP_PIPES, num_isp_ipp_pipes), + FEATURE_MAPPING_VALUE(NUM_OSIDS, num_osids), + FEATURE_MAPPING_VALUE(NUM_RASTER_PIPES, num_raster_pipes), + FEATURE_MAPPING(PBE2_IN_XE, pbe2_in_xe), + FEATURE_MAPPING(PBVNC_COREID_REG, pbvnc_coreid_reg), + FEATURE_MAPPING(PERFBUS, perfbus), + FEATURE_MAPPING(PERF_COUNTER_BATCH, perf_counter_batch), + FEATURE_MAPPING_VALUE(PHYS_BUS_WIDTH, phys_bus_width), + FEATURE_MAPPING(RISCV_FW_PROCESSOR, riscv_fw_processor), + FEATURE_MAPPING(ROGUEXE, roguexe), + FEATURE_MAPPING(S7_TOP_INFRASTRUCTURE, s7_top_infrastructure), + FEATURE_MAPPING(SIMPLE_INTERNAL_PARAMETER_FORMAT, simple_internal_parameter_format), + FEATURE_MAPPING(SIMPLE_INTERNAL_PARAMETER_FORMAT_V2, simple_internal_parameter_format_v2), + FEATURE_MAPPING_VALUE(SIMPLE_PARAMETER_FORMAT_VERSION, simple_parameter_format_version), + FEATURE_MAPPING_VALUE(SLC_BANKS, slc_banks), + FEATURE_MAPPING_VALUE(SLC_CACHE_LINE_SIZE_BITS, slc_cache_line_size_bits), + FEATURE_MAPPING(SLC_SIZE_CONFIGURABLE, slc_size_configurable), + FEATURE_MAPPING_VALUE(SLC_SIZE_IN_KILOBYTES, slc_size_in_kilobytes), + FEATURE_MAPPING(SOC_TIMER, soc_timer), + FEATURE_MAPPING(SYS_BUS_SECURE_RESET, sys_bus_secure_reset), + FEATURE_MAPPING(TESSELLATION, tessellation), + FEATURE_MAPPING(TILE_REGION_PROTECTION, tile_region_protection), + FEATURE_MAPPING_VALUE(TILE_SIZE_X, tile_size_x), + FEATURE_MAPPING_VALUE(TILE_SIZE_Y, tile_size_y), + FEATURE_MAPPING(TLA, tla), + FEATURE_MAPPING(TPU_CEM_DATAMASTER_GLOBAL_REGISTERS, tpu_cem_datamaster_global_registers), + FEATURE_MAPPING(TPU_DM_GLOBAL_REGISTERS, tpu_dm_global_registers), + FEATURE_MAPPING(TPU_FILTERING_MODE_CONTROL, tpu_filtering_mode_control), + FEATURE_MAPPING_VALUE(USC_MIN_OUTPUT_REGISTERS_PER_PIX, usc_min_output_registers_per_pix), + FEATURE_MAPPING(VDM_DRAWINDIRECT, vdm_drawindirect), + FEATURE_MAPPING(VDM_OBJECT_LEVEL_LLS, vdm_object_level_lls), + FEATURE_MAPPING_VALUE(VIRTUAL_ADDRESS_SPACE_BITS, virtual_address_space_bits), + FEATURE_MAPPING(WATCHDOG_TIMER, watchdog_timer), + FEATURE_MAPPING(WORKGROUP_PROTECTION, workgroup_protection), + FEATURE_MAPPING_VALUE(XE_ARCHITECTURE, xe_architecture), + FEATURE_MAPPING(XE_MEMORY_HIERARCHY, xe_memory_hierarchy), + FEATURE_MAPPING(XE_TPU2, xe_tpu2), + FEATURE_MAPPING_VALUE(XPU_MAX_REGBANKS_ADDR_WIDTH, xpu_max_regbanks_addr_width), + FEATURE_MAPPING_VALUE(XPU_MAX_SLAVES, xpu_max_slaves), + FEATURE_MAPPING_VALUE(XPU_REGISTER_BROADCAST, xpu_register_broadcast), + FEATURE_MAPPING(XT_TOP_INFRASTRUCTURE, xt_top_infrastructure), + FEATURE_MAPPING(ZLS_SUBTILE, zls_subtile), +}; + +#undef FEATURE_MAPPING_VALUE +#undef FEATURE_MAPPING + +/** + * pvr_device_info_set_features() - Set device features from device information in firmware + * @pvr_dev: Device pointer. + * @features: Pointer to features mask in device information. + * @features_size: Size of features mask, in u64s. + * @feature_param_size: Size of feature parameters, in u64s. + * + * Returns: + * * 0 on success, or + * * -%EINVAL on malformed stream. + */ +int pvr_device_info_set_features(struct pvr_device *pvr_dev, const u64 *features, u32 features_size, + u32 feature_param_size) +{ + const u32 mapping_max = ARRAY_SIZE(features_mapping); + const u32 mapping_max_size = (mapping_max + 63) >> 6; + const u32 nr_bits = min(features_size * 64, mapping_max); + const u64 *feature_params = features + features_size; + u32 param_idx = 0; + + BUILD_BUG_ON(ARRAY_SIZE(features_mapping) != PVR_FW_HAS_FEATURE_MAX); + + /* Verify no unsupported values in the bitmask. */ + if (features_size > mapping_max_size) { + drm_warn(from_pvr_device(pvr_dev), "Unsupported features in firmware image"); + } else if (features_size == mapping_max_size && + ((mapping_max & 63) != 0)) { + u64 invalid_mask = ~0ull << (mapping_max & 63); + + if (features[features_size - 1] & invalid_mask) + drm_warn(from_pvr_device(pvr_dev), + "Unsupported features in firmware image"); + } + + for (u32 i = 0; i < nr_bits; i++) { + if (features[i >> 6] & BIT_ULL(i & 63)) { + *(bool *)((u8 *)pvr_dev + features_mapping[i].flag_offset) = true; + + if (features_mapping[i].value_offset) { + if (param_idx >= feature_param_size) + return -EINVAL; + + *(u64 *)((u8 *)pvr_dev + features_mapping[i].value_offset) = + feature_params[param_idx]; + param_idx++; + } + } + } + + return 0; +} diff --git a/drivers/gpu/drm/imagination/pvr_device_info.h b/drivers/gpu/drm/imagination/pvr_device_info.h new file mode 100644 index 000000000000..f61fb988b553 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_device_info.h @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_DEVICE_INFO_H +#define PVR_DEVICE_INFO_H + +#include <linux/types.h> + +struct pvr_device; + +/* + * struct pvr_device_features - Hardware feature information + */ +struct pvr_device_features { + bool has_axi_acelite; + bool has_cdm_control_stream_format; + bool has_cluster_grouping; + bool has_common_store_size_in_dwords; + bool has_compute; + bool has_compute_morton_capable; + bool has_compute_overlap; + bool has_coreid_per_os; + bool has_dynamic_dust_power; + bool has_ecc_rams; + bool has_fb_cdc_v4; + bool has_fbc_max_default_descriptors; + bool has_fbc_max_large_descriptors; + bool has_fbcdc; + bool has_fbcdc_algorithm; + bool has_fbcdc_architecture; + bool has_gpu_multicore_support; + bool has_gpu_virtualisation; + bool has_gs_rta_support; + bool has_irq_per_os; + bool has_isp_max_tiles_in_flight; + bool has_isp_samples_per_pixel; + bool has_isp_zls_d24_s8_packing_ogl_mode; + bool has_layout_mars; + bool has_max_partitions; + bool has_meta; + bool has_meta_coremem_size; + bool has_mips; + bool has_num_clusters; + bool has_num_isp_ipp_pipes; + bool has_num_osids; + bool has_num_raster_pipes; + bool has_pbe2_in_xe; + bool has_pbvnc_coreid_reg; + bool has_perfbus; + bool has_perf_counter_batch; + bool has_phys_bus_width; + bool has_riscv_fw_processor; + bool has_roguexe; + bool has_s7_top_infrastructure; + bool has_simple_internal_parameter_format; + bool has_simple_internal_parameter_format_v2; + bool has_simple_parameter_format_version; + bool has_slc_banks; + bool has_slc_cache_line_size_bits; + bool has_slc_size_configurable; + bool has_slc_size_in_kilobytes; + bool has_soc_timer; + bool has_sys_bus_secure_reset; + bool has_tessellation; + bool has_tile_region_protection; + bool has_tile_size_x; + bool has_tile_size_y; + bool has_tla; + bool has_tpu_cem_datamaster_global_registers; + bool has_tpu_dm_global_registers; + bool has_tpu_filtering_mode_control; + bool has_usc_min_output_registers_per_pix; + bool has_vdm_drawindirect; + bool has_vdm_object_level_lls; + bool has_virtual_address_space_bits; + bool has_watchdog_timer; + bool has_workgroup_protection; + bool has_xe_architecture; + bool has_xe_memory_hierarchy; + bool has_xe_tpu2; + bool has_xpu_max_regbanks_addr_width; + bool has_xpu_max_slaves; + bool has_xpu_register_broadcast; + bool has_xt_top_infrastructure; + bool has_zls_subtile; + + u64 cdm_control_stream_format; + u64 common_store_size_in_dwords; + u64 ecc_rams; + u64 fbc_max_default_descriptors; + u64 fbc_max_large_descriptors; + u64 fbcdc; + u64 fbcdc_algorithm; + u64 fbcdc_architecture; + u64 isp_max_tiles_in_flight; + u64 isp_samples_per_pixel; + u64 layout_mars; + u64 max_partitions; + u64 meta; + u64 meta_coremem_size; + u64 num_clusters; + u64 num_isp_ipp_pipes; + u64 num_osids; + u64 num_raster_pipes; + u64 phys_bus_width; + u64 simple_parameter_format_version; + u64 slc_banks; + u64 slc_cache_line_size_bits; + u64 slc_size_in_kilobytes; + u64 tile_size_x; + u64 tile_size_y; + u64 usc_min_output_registers_per_pix; + u64 virtual_address_space_bits; + u64 xe_architecture; + u64 xpu_max_regbanks_addr_width; + u64 xpu_max_slaves; + u64 xpu_register_broadcast; +}; + +/* + * struct pvr_device_quirks - Hardware quirk information + */ +struct pvr_device_quirks { + bool has_brn44079; + bool has_brn47217; + bool has_brn48492; + bool has_brn48545; + bool has_brn49927; + bool has_brn50767; + bool has_brn51764; + bool has_brn62269; + bool has_brn63142; + bool has_brn63553; + bool has_brn66011; + bool has_brn71242; +}; + +/* + * struct pvr_device_enhancements - Hardware enhancement information + */ +struct pvr_device_enhancements { + bool has_ern35421; + bool has_ern38020; + bool has_ern38748; + bool has_ern42064; + bool has_ern42290; + bool has_ern42606; + bool has_ern47025; + bool has_ern57596; +}; + +void pvr_device_info_set_quirks(struct pvr_device *pvr_dev, const u64 *bitmask, + u32 bitmask_len); +void pvr_device_info_set_enhancements(struct pvr_device *pvr_dev, const u64 *bitmask, + u32 bitmask_len); +int pvr_device_info_set_features(struct pvr_device *pvr_dev, const u64 *features, u32 features_size, + u32 feature_param_size); + +/* + * Meta cores + * + * These are the values for the 'meta' feature when the feature is present + * (as per &struct pvr_device_features)/ + */ +#define PVR_META_MTP218 (1) +#define PVR_META_MTP219 (2) +#define PVR_META_LTP218 (3) +#define PVR_META_LTP217 (4) + +enum { + PVR_FEATURE_CDM_USER_MODE_QUEUE, + PVR_FEATURE_CLUSTER_GROUPING, + PVR_FEATURE_COMPUTE_MORTON_CAPABLE, + PVR_FEATURE_FB_CDC_V4, + PVR_FEATURE_GPU_MULTICORE_SUPPORT, + PVR_FEATURE_ISP_ZLS_D24_S8_PACKING_OGL_MODE, + PVR_FEATURE_REQUIRES_FB_CDC_ZLS_SETUP, + PVR_FEATURE_S7_TOP_INFRASTRUCTURE, + PVR_FEATURE_TESSELLATION, + PVR_FEATURE_TPU_DM_GLOBAL_REGISTERS, + PVR_FEATURE_VDM_DRAWINDIRECT, + PVR_FEATURE_VDM_OBJECT_LEVEL_LLS, + PVR_FEATURE_ZLS_SUBTILE, +}; + +#endif /* PVR_DEVICE_INFO_H */ diff --git a/drivers/gpu/drm/imagination/pvr_drv.c b/drivers/gpu/drm/imagination/pvr_drv.c new file mode 100644 index 000000000000..5c3b2d58d766 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_drv.c @@ -0,0 +1,1501 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_context.h" +#include "pvr_debugfs.h" +#include "pvr_device.h" +#include "pvr_drv.h" +#include "pvr_free_list.h" +#include "pvr_gem.h" +#include "pvr_hwrt.h" +#include "pvr_job.h" +#include "pvr_mmu.h" +#include "pvr_power.h" +#include "pvr_rogue_defs.h" +#include "pvr_rogue_fwif_client.h" +#include "pvr_rogue_fwif_shared.h" +#include "pvr_vm.h" + +#include <uapi/drm/pvr_drm.h> + +#include <drm/drm_device.h> +#include <drm/drm_drv.h> +#include <drm/drm_file.h> +#include <drm/drm_gem.h> +#include <drm/drm_ioctl.h> + +#include <linux/err.h> +#include <linux/export.h> +#include <linux/fs.h> +#include <linux/kernel.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/of_device.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/xarray.h> + +/** + * DOC: PowerVR (Series 6 and later) and IMG Graphics Driver + * + * This driver supports the following PowerVR/IMG graphics cores from Imagination Technologies: + * + * * AXE-1-16M (found in Texas Instruments AM62) + */ + +/** + * pvr_ioctl_create_bo() - IOCTL to create a GEM buffer object. + * @drm_dev: [IN] Target DRM device. + * @raw_args: [IN/OUT] Arguments passed to this IOCTL. This must be of type + * &struct drm_pvr_ioctl_create_bo_args. + * @file: [IN] DRM file-private data. + * + * Called from userspace with %DRM_IOCTL_PVR_CREATE_BO. + * + * Return: + * * 0 on success, + * * -%EINVAL if the value of &drm_pvr_ioctl_create_bo_args.size is zero + * or wider than &typedef size_t, + * * -%EINVAL if any bits in &drm_pvr_ioctl_create_bo_args.flags that are + * reserved or undefined are set, + * * -%EINVAL if any padding fields in &drm_pvr_ioctl_create_bo_args are not + * zero, + * * Any error encountered while creating the object (see + * pvr_gem_object_create()), or + * * Any error encountered while transferring ownership of the object into a + * userspace-accessible handle (see pvr_gem_object_into_handle()). + */ +static int +pvr_ioctl_create_bo(struct drm_device *drm_dev, void *raw_args, + struct drm_file *file) +{ + struct drm_pvr_ioctl_create_bo_args *args = raw_args; + struct pvr_device *pvr_dev = to_pvr_device(drm_dev); + struct pvr_file *pvr_file = to_pvr_file(file); + + struct pvr_gem_object *pvr_obj; + size_t sanitized_size; + + int idx; + int err; + + if (!drm_dev_enter(drm_dev, &idx)) + return -EIO; + + /* All padding fields must be zeroed. */ + if (args->_padding_c != 0) { + err = -EINVAL; + goto err_drm_dev_exit; + } + + /* + * On 64-bit platforms (our primary target), size_t is a u64. However, + * on other architectures we have to check for overflow when casting + * down to size_t from u64. + * + * We also disallow zero-sized allocations, and reserved (kernel-only) + * flags. + */ + if (args->size > SIZE_MAX || args->size == 0 || args->flags & + ~DRM_PVR_BO_FLAGS_MASK || args->size & (PVR_DEVICE_PAGE_SIZE - 1)) { + err = -EINVAL; + goto err_drm_dev_exit; + } + + sanitized_size = (size_t)args->size; + + /* + * Create a buffer object and transfer ownership to a userspace- + * accessible handle. + */ + pvr_obj = pvr_gem_object_create(pvr_dev, sanitized_size, args->flags); + if (IS_ERR(pvr_obj)) { + err = PTR_ERR(pvr_obj); + goto err_drm_dev_exit; + } + + /* This function will not modify &args->handle unless it succeeds. */ + err = pvr_gem_object_into_handle(pvr_obj, pvr_file, &args->handle); + if (err) + goto err_destroy_obj; + + drm_dev_exit(idx); + + return 0; + +err_destroy_obj: + /* + * GEM objects are refcounted, so there is no explicit destructor + * function. Instead, we release the singular reference we currently + * hold on the object and let GEM take care of the rest. + */ + pvr_gem_object_put(pvr_obj); + +err_drm_dev_exit: + drm_dev_exit(idx); + + return err; +} + +/** + * pvr_ioctl_get_bo_mmap_offset() - IOCTL to generate a "fake" offset to be + * used when calling mmap() from userspace to map the given GEM buffer object + * @drm_dev: [IN] DRM device (unused). + * @raw_args: [IN/OUT] Arguments passed to this IOCTL. This must be of type + * &struct drm_pvr_ioctl_get_bo_mmap_offset_args. + * @file: [IN] DRM file private data. + * + * Called from userspace with %DRM_IOCTL_PVR_GET_BO_MMAP_OFFSET. + * + * This IOCTL does *not* perform an mmap. See the docs on + * &struct drm_pvr_ioctl_get_bo_mmap_offset_args for details. + * + * Return: + * * 0 on success, + * * -%ENOENT if the handle does not reference a valid GEM buffer object, + * * -%EINVAL if any padding fields in &struct + * drm_pvr_ioctl_get_bo_mmap_offset_args are not zero, or + * * Any error returned by drm_gem_create_mmap_offset(). + */ +static int +pvr_ioctl_get_bo_mmap_offset(struct drm_device *drm_dev, void *raw_args, + struct drm_file *file) +{ + struct drm_pvr_ioctl_get_bo_mmap_offset_args *args = raw_args; + struct pvr_file *pvr_file = to_pvr_file(file); + struct pvr_gem_object *pvr_obj; + struct drm_gem_object *gem_obj; + int idx; + int ret; + + if (!drm_dev_enter(drm_dev, &idx)) + return -EIO; + + /* All padding fields must be zeroed. */ + if (args->_padding_4 != 0) { + ret = -EINVAL; + goto err_drm_dev_exit; + } + + /* + * Obtain a kernel reference to the buffer object. This reference is + * counted and must be manually dropped before returning. If a buffer + * object cannot be found for the specified handle, return -%ENOENT (No + * such file or directory). + */ + pvr_obj = pvr_gem_object_from_handle(pvr_file, args->handle); + if (!pvr_obj) { + ret = -ENOENT; + goto err_drm_dev_exit; + } + + gem_obj = gem_from_pvr_gem(pvr_obj); + + /* + * Allocate a fake offset which can be used in userspace calls to mmap + * on the DRM device file. If this fails, return the error code. This + * operation is idempotent. + */ + ret = drm_gem_create_mmap_offset(gem_obj); + if (ret != 0) { + /* Drop our reference to the buffer object. */ + drm_gem_object_put(gem_obj); + goto err_drm_dev_exit; + } + + /* + * Read out the fake offset allocated by the earlier call to + * drm_gem_create_mmap_offset. + */ + args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node); + + /* Drop our reference to the buffer object. */ + pvr_gem_object_put(pvr_obj); + +err_drm_dev_exit: + drm_dev_exit(idx); + + return ret; +} + +static __always_inline u64 +pvr_fw_version_packed(u32 major, u32 minor) +{ + return ((u64)major << 32) | minor; +} + +static u32 +rogue_get_common_store_partition_space_size(struct pvr_device *pvr_dev) +{ + u32 max_partitions = 0; + u32 tile_size_x = 0; + u32 tile_size_y = 0; + + PVR_FEATURE_VALUE(pvr_dev, tile_size_x, &tile_size_x); + PVR_FEATURE_VALUE(pvr_dev, tile_size_y, &tile_size_y); + PVR_FEATURE_VALUE(pvr_dev, max_partitions, &max_partitions); + + if (tile_size_x == 16 && tile_size_y == 16) { + u32 usc_min_output_registers_per_pix = 0; + + PVR_FEATURE_VALUE(pvr_dev, usc_min_output_registers_per_pix, + &usc_min_output_registers_per_pix); + + return tile_size_x * tile_size_y * max_partitions * + usc_min_output_registers_per_pix; + } + + return max_partitions * 1024; +} + +static u32 +rogue_get_common_store_alloc_region_size(struct pvr_device *pvr_dev) +{ + u32 common_store_size_in_dwords = 512 * 4 * 4; + u32 alloc_region_size; + + PVR_FEATURE_VALUE(pvr_dev, common_store_size_in_dwords, &common_store_size_in_dwords); + + alloc_region_size = common_store_size_in_dwords - (256U * 4U) - + rogue_get_common_store_partition_space_size(pvr_dev); + + if (PVR_HAS_QUIRK(pvr_dev, 44079)) { + u32 common_store_split_point = (768U * 4U * 4U); + + return min(common_store_split_point - (256U * 4U), alloc_region_size); + } + + return alloc_region_size; +} + +static inline u32 +rogue_get_num_phantoms(struct pvr_device *pvr_dev) +{ + u32 num_clusters = 1; + + PVR_FEATURE_VALUE(pvr_dev, num_clusters, &num_clusters); + + return ROGUE_REQ_NUM_PHANTOMS(num_clusters); +} + +static inline u32 +rogue_get_max_coeffs(struct pvr_device *pvr_dev) +{ + u32 max_coeff_additional_portion = ROGUE_MAX_VERTEX_SHARED_REGISTERS; + u32 pending_allocation_shared_regs = 2U * 1024U; + u32 pending_allocation_coeff_regs = 0U; + u32 num_phantoms = rogue_get_num_phantoms(pvr_dev); + u32 tiles_in_flight = 0; + u32 max_coeff_pixel_portion; + + PVR_FEATURE_VALUE(pvr_dev, isp_max_tiles_in_flight, &tiles_in_flight); + max_coeff_pixel_portion = DIV_ROUND_UP(tiles_in_flight, num_phantoms); + max_coeff_pixel_portion *= ROGUE_MAX_PIXEL_SHARED_REGISTERS; + + /* + * Compute tasks on cores with BRN48492 and without compute overlap may lock + * up without two additional lines of coeffs. + */ + if (PVR_HAS_QUIRK(pvr_dev, 48492) && !PVR_HAS_FEATURE(pvr_dev, compute_overlap)) + pending_allocation_coeff_regs = 2U * 1024U; + + if (PVR_HAS_ENHANCEMENT(pvr_dev, 38748)) + pending_allocation_shared_regs = 0; + + if (PVR_HAS_ENHANCEMENT(pvr_dev, 38020)) + max_coeff_additional_portion += ROGUE_MAX_COMPUTE_SHARED_REGISTERS; + + return rogue_get_common_store_alloc_region_size(pvr_dev) + pending_allocation_coeff_regs - + (max_coeff_pixel_portion + max_coeff_additional_portion + + pending_allocation_shared_regs); +} + +static inline u32 +rogue_get_cdm_max_local_mem_size_regs(struct pvr_device *pvr_dev) +{ + u32 available_coeffs_in_dwords = rogue_get_max_coeffs(pvr_dev); + + if (PVR_HAS_QUIRK(pvr_dev, 48492) && PVR_HAS_FEATURE(pvr_dev, roguexe) && + !PVR_HAS_FEATURE(pvr_dev, compute_overlap)) { + /* Driver must not use the 2 reserved lines. */ + available_coeffs_in_dwords -= ROGUE_CSRM_LINE_SIZE_IN_DWORDS * 2; + } + + /* + * The maximum amount of local memory available to a kernel is the minimum + * of the total number of coefficient registers available and the max common + * store allocation size which can be made by the CDM. + * + * If any coeff lines are reserved for tessellation or pixel then we need to + * subtract those too. + */ + return min(available_coeffs_in_dwords, (u32)ROGUE_MAX_PER_KERNEL_LOCAL_MEM_SIZE_REGS); +} + +/** + * pvr_dev_query_gpu_info_get() + * @pvr_dev: Device pointer. + * @args: [IN] Device query arguments containing a pointer to a userspace + * struct drm_pvr_dev_query_gpu_info. + * + * If the query object pointer is NULL, the size field is updated with the + * expected size of the query object. + * + * Returns: + * * 0 on success, or if size is requested using a NULL pointer, or + * * -%E2BIG if the indicated length of the allocation is less than is + * required to contain the copied data, or + * * -%EFAULT if local memory could not be copied to userspace. + */ +static int +pvr_dev_query_gpu_info_get(struct pvr_device *pvr_dev, + struct drm_pvr_ioctl_dev_query_args *args) +{ + struct drm_pvr_dev_query_gpu_info gpu_info = {0}; + int err; + + if (!args->pointer) { + args->size = sizeof(struct drm_pvr_dev_query_gpu_info); + return 0; + } + + gpu_info.gpu_id = + pvr_gpu_id_to_packed_bvnc(&pvr_dev->gpu_id); + gpu_info.num_phantoms = rogue_get_num_phantoms(pvr_dev); + + err = PVR_UOBJ_SET(args->pointer, args->size, gpu_info); + if (err < 0) + return err; + + if (args->size > sizeof(gpu_info)) + args->size = sizeof(gpu_info); + return 0; +} + +/** + * pvr_dev_query_runtime_info_get() + * @pvr_dev: Device pointer. + * @args: [IN] Device query arguments containing a pointer to a userspace + * struct drm_pvr_dev_query_runtime_info. + * + * If the query object pointer is NULL, the size field is updated with the + * expected size of the query object. + * + * Returns: + * * 0 on success, or if size is requested using a NULL pointer, or + * * -%E2BIG if the indicated length of the allocation is less than is + * required to contain the copied data, or + * * -%EFAULT if local memory could not be copied to userspace. + */ +static int +pvr_dev_query_runtime_info_get(struct pvr_device *pvr_dev, + struct drm_pvr_ioctl_dev_query_args *args) +{ + struct drm_pvr_dev_query_runtime_info runtime_info = {0}; + int err; + + if (!args->pointer) { + args->size = sizeof(struct drm_pvr_dev_query_runtime_info); + return 0; + } + + runtime_info.free_list_min_pages = + pvr_get_free_list_min_pages(pvr_dev); + runtime_info.free_list_max_pages = + ROGUE_PM_MAX_FREELIST_SIZE / ROGUE_PM_PAGE_SIZE; + runtime_info.common_store_alloc_region_size = + rogue_get_common_store_alloc_region_size(pvr_dev); + runtime_info.common_store_partition_space_size = + rogue_get_common_store_partition_space_size(pvr_dev); + runtime_info.max_coeffs = rogue_get_max_coeffs(pvr_dev); + runtime_info.cdm_max_local_mem_size_regs = + rogue_get_cdm_max_local_mem_size_regs(pvr_dev); + + err = PVR_UOBJ_SET(args->pointer, args->size, runtime_info); + if (err < 0) + return err; + + if (args->size > sizeof(runtime_info)) + args->size = sizeof(runtime_info); + return 0; +} + +/** + * pvr_dev_query_quirks_get() - Unpack array of quirks at the address given + * in a struct drm_pvr_dev_query_quirks, or gets the amount of space required + * for it. + * @pvr_dev: Device pointer. + * @args: [IN] Device query arguments containing a pointer to a userspace + * struct drm_pvr_dev_query_query_quirks. + * + * If the query object pointer is NULL, the size field is updated with the + * expected size of the query object. + * If the userspace pointer in the query object is NULL, or the count is + * short, no data is copied. + * The count field will be updated to that copied, or if either pointer is + * NULL, that which would have been copied. + * The size field in the query object will be updated to the size copied. + * + * Returns: + * * 0 on success, or if size/count is requested using a NULL pointer, or + * * -%EINVAL if args contained non-zero reserved fields, or + * * -%E2BIG if the indicated length of the allocation is less than is + * required to contain the copied data, or + * * -%EFAULT if local memory could not be copied to userspace. + */ +static int +pvr_dev_query_quirks_get(struct pvr_device *pvr_dev, + struct drm_pvr_ioctl_dev_query_args *args) +{ + /* + * @FIXME - hardcoding of numbers here is intended as an + * intermediate step so the UAPI can be fixed, but requires a + * a refactor in the future to store them in a more appropriate + * location + */ + static const u32 umd_quirks_musthave[] = { + 47217, + 49927, + 62269, + }; + static const u32 umd_quirks[] = { + 48545, + 51764, + }; + struct drm_pvr_dev_query_quirks query; + u32 out[ARRAY_SIZE(umd_quirks_musthave) + ARRAY_SIZE(umd_quirks)]; + size_t out_musthave_count = 0; + size_t out_count = 0; + int err; + + if (!args->pointer) { + args->size = sizeof(struct drm_pvr_dev_query_quirks); + return 0; + } + + err = PVR_UOBJ_GET(query, args->size, args->pointer); + + if (err < 0) + return err; + if (query._padding_c) + return -EINVAL; + + for (int i = 0; i < ARRAY_SIZE(umd_quirks_musthave); i++) { + if (pvr_device_has_uapi_quirk(pvr_dev, umd_quirks_musthave[i])) { + out[out_count++] = umd_quirks_musthave[i]; + out_musthave_count++; + } + } + + for (int i = 0; i < ARRAY_SIZE(umd_quirks); i++) { + if (pvr_device_has_uapi_quirk(pvr_dev, umd_quirks[i])) + out[out_count++] = umd_quirks[i]; + } + + if (!query.quirks) + goto copy_out; + if (query.count < out_count) + return -E2BIG; + + if (copy_to_user(u64_to_user_ptr(query.quirks), out, + out_count * sizeof(u32))) { + return -EFAULT; + } + + query.musthave_count = out_musthave_count; + +copy_out: + query.count = out_count; + err = PVR_UOBJ_SET(args->pointer, args->size, query); + if (err < 0) + return err; + + args->size = sizeof(query); + return 0; +} + +/** + * pvr_dev_query_enhancements_get() - Unpack array of enhancements at the + * address given in a struct drm_pvr_dev_query_enhancements, or gets the amount + * of space required for it. + * @pvr_dev: Device pointer. + * @args: [IN] Device query arguments containing a pointer to a userspace + * struct drm_pvr_dev_query_enhancements. + * + * If the query object pointer is NULL, the size field is updated with the + * expected size of the query object. + * If the userspace pointer in the query object is NULL, or the count is + * short, no data is copied. + * The count field will be updated to that copied, or if either pointer is + * NULL, that which would have been copied. + * The size field in the query object will be updated to the size copied. + * + * Returns: + * * 0 on success, or if size/count is requested using a NULL pointer, or + * * -%EINVAL if args contained non-zero reserved fields, or + * * -%E2BIG if the indicated length of the allocation is less than is + * required to contain the copied data, or + * * -%EFAULT if local memory could not be copied to userspace. + */ +static int +pvr_dev_query_enhancements_get(struct pvr_device *pvr_dev, + struct drm_pvr_ioctl_dev_query_args *args) +{ + /* + * @FIXME - hardcoding of numbers here is intended as an + * intermediate step so the UAPI can be fixed, but requires a + * a refactor in the future to store them in a more appropriate + * location + */ + const u32 umd_enhancements[] = { + 35421, + 42064, + }; + struct drm_pvr_dev_query_enhancements query; + u32 out[ARRAY_SIZE(umd_enhancements)]; + size_t out_idx = 0; + int err; + + if (!args->pointer) { + args->size = sizeof(struct drm_pvr_dev_query_enhancements); + return 0; + } + + err = PVR_UOBJ_GET(query, args->size, args->pointer); + + if (err < 0) + return err; + if (query._padding_a) + return -EINVAL; + if (query._padding_c) + return -EINVAL; + + for (int i = 0; i < ARRAY_SIZE(umd_enhancements); i++) { + if (pvr_device_has_uapi_enhancement(pvr_dev, umd_enhancements[i])) + out[out_idx++] = umd_enhancements[i]; + } + + if (!query.enhancements) + goto copy_out; + if (query.count < out_idx) + return -E2BIG; + + if (copy_to_user(u64_to_user_ptr(query.enhancements), out, + out_idx * sizeof(u32))) { + return -EFAULT; + } + +copy_out: + query.count = out_idx; + err = PVR_UOBJ_SET(args->pointer, args->size, query); + if (err < 0) + return err; + + args->size = sizeof(query); + return 0; +} + +/** + * pvr_ioctl_dev_query() - IOCTL to copy information about a device + * @drm_dev: [IN] DRM device. + * @raw_args: [IN/OUT] Arguments passed to this IOCTL. This must be of type + * &struct drm_pvr_ioctl_dev_query_args. + * @file: [IN] DRM file private data. + * + * Called from userspace with %DRM_IOCTL_PVR_DEV_QUERY. + * If the given receiving struct pointer is NULL, or the indicated size is too + * small, the expected size of the struct type will be returned in the size + * argument field. + * + * Return: + * * 0 on success or when fetching the size with args->pointer == NULL, or + * * -%E2BIG if the indicated size of the receiving struct is less than is + * required to contain the copied data, or + * * -%EINVAL if the indicated struct type is unknown, or + * * -%ENOMEM if local memory could not be allocated, or + * * -%EFAULT if local memory could not be copied to userspace. + */ +static int +pvr_ioctl_dev_query(struct drm_device *drm_dev, void *raw_args, + struct drm_file *file) +{ + struct pvr_device *pvr_dev = to_pvr_device(drm_dev); + struct drm_pvr_ioctl_dev_query_args *args = raw_args; + int idx; + int ret = -EINVAL; + + if (!drm_dev_enter(drm_dev, &idx)) + return -EIO; + + switch ((enum drm_pvr_dev_query)args->type) { + case DRM_PVR_DEV_QUERY_GPU_INFO_GET: + ret = pvr_dev_query_gpu_info_get(pvr_dev, args); + break; + + case DRM_PVR_DEV_QUERY_RUNTIME_INFO_GET: + ret = pvr_dev_query_runtime_info_get(pvr_dev, args); + break; + + case DRM_PVR_DEV_QUERY_QUIRKS_GET: + ret = pvr_dev_query_quirks_get(pvr_dev, args); + break; + + case DRM_PVR_DEV_QUERY_ENHANCEMENTS_GET: + ret = pvr_dev_query_enhancements_get(pvr_dev, args); + break; + + case DRM_PVR_DEV_QUERY_HEAP_INFO_GET: + ret = pvr_heap_info_get(pvr_dev, args); + break; + + case DRM_PVR_DEV_QUERY_STATIC_DATA_AREAS_GET: + ret = pvr_static_data_areas_get(pvr_dev, args); + break; + } + + drm_dev_exit(idx); + + return ret; +} + +/** + * pvr_ioctl_create_context() - IOCTL to create a context + * @drm_dev: [IN] DRM device. + * @raw_args: [IN/OUT] Arguments passed to this IOCTL. This must be of type + * &struct drm_pvr_ioctl_create_context_args. + * @file: [IN] DRM file private data. + * + * Called from userspace with %DRM_IOCTL_PVR_CREATE_CONTEXT. + * + * Return: + * * 0 on success, or + * * -%EINVAL if provided arguments are invalid, or + * * -%EFAULT if arguments can't be copied from userspace, or + * * Any error returned by pvr_create_render_context(). + */ +static int +pvr_ioctl_create_context(struct drm_device *drm_dev, void *raw_args, + struct drm_file *file) +{ + struct drm_pvr_ioctl_create_context_args *args = raw_args; + struct pvr_file *pvr_file = file->driver_priv; + int idx; + int ret; + + if (!drm_dev_enter(drm_dev, &idx)) + return -EIO; + + ret = pvr_context_create(pvr_file, args); + + drm_dev_exit(idx); + + return ret; +} + +/** + * pvr_ioctl_destroy_context() - IOCTL to destroy a context + * @drm_dev: [IN] DRM device. + * @raw_args: [IN/OUT] Arguments passed to this IOCTL. This must be of type + * &struct drm_pvr_ioctl_destroy_context_args. + * @file: [IN] DRM file private data. + * + * Called from userspace with %DRM_IOCTL_PVR_DESTROY_CONTEXT. + * + * Return: + * * 0 on success, or + * * -%EINVAL if context not in context list. + */ +static int +pvr_ioctl_destroy_context(struct drm_device *drm_dev, void *raw_args, + struct drm_file *file) +{ + struct drm_pvr_ioctl_destroy_context_args *args = raw_args; + struct pvr_file *pvr_file = file->driver_priv; + + if (args->_padding_4) + return -EINVAL; + + return pvr_context_destroy(pvr_file, args->handle); +} + +/** + * pvr_ioctl_create_free_list() - IOCTL to create a free list + * @drm_dev: [IN] DRM device. + * @raw_args: [IN/OUT] Arguments passed to this IOCTL. This must be of type + * &struct drm_pvr_ioctl_create_free_list_args. + * @file: [IN] DRM file private data. + * + * Called from userspace with %DRM_IOCTL_PVR_CREATE_FREE_LIST. + * + * Return: + * * 0 on success, or + * * Any error returned by pvr_free_list_create(). + */ +static int +pvr_ioctl_create_free_list(struct drm_device *drm_dev, void *raw_args, + struct drm_file *file) +{ + struct drm_pvr_ioctl_create_free_list_args *args = raw_args; + struct pvr_file *pvr_file = to_pvr_file(file); + struct pvr_free_list *free_list; + int idx; + int err; + + if (!drm_dev_enter(drm_dev, &idx)) + return -EIO; + + free_list = pvr_free_list_create(pvr_file, args); + if (IS_ERR(free_list)) { + err = PTR_ERR(free_list); + goto err_drm_dev_exit; + } + + /* Allocate object handle for userspace. */ + err = xa_alloc(&pvr_file->free_list_handles, + &args->handle, + free_list, + xa_limit_32b, + GFP_KERNEL); + if (err < 0) + goto err_cleanup; + + drm_dev_exit(idx); + + return 0; + +err_cleanup: + pvr_free_list_put(free_list); + +err_drm_dev_exit: + drm_dev_exit(idx); + + return err; +} + +/** + * pvr_ioctl_destroy_free_list() - IOCTL to destroy a free list + * @drm_dev: [IN] DRM device. + * @raw_args: [IN] Arguments passed to this IOCTL. This must be of type + * &struct drm_pvr_ioctl_destroy_free_list_args. + * @file: [IN] DRM file private data. + * + * Called from userspace with %DRM_IOCTL_PVR_DESTROY_FREE_LIST. + * + * Return: + * * 0 on success, or + * * -%EINVAL if free list not in object list. + */ +static int +pvr_ioctl_destroy_free_list(struct drm_device *drm_dev, void *raw_args, + struct drm_file *file) +{ + struct drm_pvr_ioctl_destroy_free_list_args *args = raw_args; + struct pvr_file *pvr_file = to_pvr_file(file); + struct pvr_free_list *free_list; + + if (args->_padding_4) + return -EINVAL; + + free_list = xa_erase(&pvr_file->free_list_handles, args->handle); + if (!free_list) + return -EINVAL; + + pvr_free_list_put(free_list); + return 0; +} + +/** + * pvr_ioctl_create_hwrt_dataset() - IOCTL to create a HWRT dataset + * @drm_dev: [IN] DRM device. + * @raw_args: [IN/OUT] Arguments passed to this IOCTL. This must be of type + * &struct drm_pvr_ioctl_create_hwrt_dataset_args. + * @file: [IN] DRM file private data. + * + * Called from userspace with %DRM_IOCTL_PVR_CREATE_HWRT_DATASET. + * + * Return: + * * 0 on success, or + * * Any error returned by pvr_hwrt_dataset_create(). + */ +static int +pvr_ioctl_create_hwrt_dataset(struct drm_device *drm_dev, void *raw_args, + struct drm_file *file) +{ + struct drm_pvr_ioctl_create_hwrt_dataset_args *args = raw_args; + struct pvr_file *pvr_file = to_pvr_file(file); + struct pvr_hwrt_dataset *hwrt; + int idx; + int err; + + if (!drm_dev_enter(drm_dev, &idx)) + return -EIO; + + hwrt = pvr_hwrt_dataset_create(pvr_file, args); + if (IS_ERR(hwrt)) { + err = PTR_ERR(hwrt); + goto err_drm_dev_exit; + } + + /* Allocate object handle for userspace. */ + err = xa_alloc(&pvr_file->hwrt_handles, + &args->handle, + hwrt, + xa_limit_32b, + GFP_KERNEL); + if (err < 0) + goto err_cleanup; + + drm_dev_exit(idx); + + return 0; + +err_cleanup: + pvr_hwrt_dataset_put(hwrt); + +err_drm_dev_exit: + drm_dev_exit(idx); + + return err; +} + +/** + * pvr_ioctl_destroy_hwrt_dataset() - IOCTL to destroy a HWRT dataset + * @drm_dev: [IN] DRM device. + * @raw_args: [IN] Arguments passed to this IOCTL. This must be of type + * &struct drm_pvr_ioctl_destroy_hwrt_dataset_args. + * @file: [IN] DRM file private data. + * + * Called from userspace with %DRM_IOCTL_PVR_DESTROY_HWRT_DATASET. + * + * Return: + * * 0 on success, or + * * -%EINVAL if HWRT dataset not in object list. + */ +static int +pvr_ioctl_destroy_hwrt_dataset(struct drm_device *drm_dev, void *raw_args, + struct drm_file *file) +{ + struct drm_pvr_ioctl_destroy_hwrt_dataset_args *args = raw_args; + struct pvr_file *pvr_file = to_pvr_file(file); + struct pvr_hwrt_dataset *hwrt; + + if (args->_padding_4) + return -EINVAL; + + hwrt = xa_erase(&pvr_file->hwrt_handles, args->handle); + if (!hwrt) + return -EINVAL; + + pvr_hwrt_dataset_put(hwrt); + return 0; +} + +/** + * pvr_ioctl_create_vm_context() - IOCTL to create a VM context + * @drm_dev: [IN] DRM device. + * @raw_args: [IN/OUT] Arguments passed to this IOCTL. This must be of type + * &struct drm_pvr_ioctl_create_vm_context_args. + * @file: [IN] DRM file private data. + * + * Called from userspace with %DRM_IOCTL_PVR_CREATE_VM_CONTEXT. + * + * Return: + * * 0 on success, or + * * Any error returned by pvr_vm_create_context(). + */ +static int +pvr_ioctl_create_vm_context(struct drm_device *drm_dev, void *raw_args, + struct drm_file *file) +{ + struct drm_pvr_ioctl_create_vm_context_args *args = raw_args; + struct pvr_file *pvr_file = to_pvr_file(file); + struct pvr_vm_context *vm_ctx; + int idx; + int err; + + if (!drm_dev_enter(drm_dev, &idx)) + return -EIO; + + if (args->_padding_4) { + err = -EINVAL; + goto err_drm_dev_exit; + } + + vm_ctx = pvr_vm_create_context(pvr_file->pvr_dev, true); + if (IS_ERR(vm_ctx)) { + err = PTR_ERR(vm_ctx); + goto err_drm_dev_exit; + } + + /* Allocate object handle for userspace. */ + err = xa_alloc(&pvr_file->vm_ctx_handles, + &args->handle, + vm_ctx, + xa_limit_32b, + GFP_KERNEL); + if (err < 0) + goto err_cleanup; + + drm_dev_exit(idx); + + return 0; + +err_cleanup: + pvr_vm_context_put(vm_ctx); + +err_drm_dev_exit: + drm_dev_exit(idx); + + return err; +} + +/** + * pvr_ioctl_destroy_vm_context() - IOCTL to destroy a VM context +* @drm_dev: [IN] DRM device. +* @raw_args: [IN] Arguments passed to this IOCTL. This must be of type +* &struct drm_pvr_ioctl_destroy_vm_context_args. +* @file: [IN] DRM file private data. +* +* Called from userspace with %DRM_IOCTL_PVR_DESTROY_VM_CONTEXT. +* +* Return: +* * 0 on success, or +* * -%EINVAL if object not in object list. + */ +static int +pvr_ioctl_destroy_vm_context(struct drm_device *drm_dev, void *raw_args, + struct drm_file *file) +{ + struct drm_pvr_ioctl_destroy_vm_context_args *args = raw_args; + struct pvr_file *pvr_file = to_pvr_file(file); + struct pvr_vm_context *vm_ctx; + + if (args->_padding_4) + return -EINVAL; + + vm_ctx = xa_erase(&pvr_file->vm_ctx_handles, args->handle); + if (!vm_ctx) + return -EINVAL; + + pvr_vm_context_put(vm_ctx); + return 0; +} + +/** + * pvr_ioctl_vm_map() - IOCTL to map buffer to GPU address space. + * @drm_dev: [IN] DRM device. + * @raw_args: [IN] Arguments passed to this IOCTL. This must be of type + * &struct drm_pvr_ioctl_vm_map_args. + * @file: [IN] DRM file private data. + * + * Called from userspace with %DRM_IOCTL_PVR_VM_MAP. + * + * Return: + * * 0 on success, + * * -%EINVAL if &drm_pvr_ioctl_vm_op_map_args.flags is not zero, + * * -%EINVAL if the bounds specified by &drm_pvr_ioctl_vm_op_map_args.offset + * and &drm_pvr_ioctl_vm_op_map_args.size are not valid or do not fall + * within the buffer object specified by + * &drm_pvr_ioctl_vm_op_map_args.handle, + * * -%EINVAL if the bounds specified by + * &drm_pvr_ioctl_vm_op_map_args.device_addr and + * &drm_pvr_ioctl_vm_op_map_args.size do not form a valid device-virtual + * address range which falls entirely within a single heap, or + * * -%ENOENT if &drm_pvr_ioctl_vm_op_map_args.handle does not refer to a + * valid PowerVR buffer object. + */ +static int +pvr_ioctl_vm_map(struct drm_device *drm_dev, void *raw_args, + struct drm_file *file) +{ + struct pvr_device *pvr_dev = to_pvr_device(drm_dev); + struct drm_pvr_ioctl_vm_map_args *args = raw_args; + struct pvr_file *pvr_file = to_pvr_file(file); + struct pvr_vm_context *vm_ctx; + + struct pvr_gem_object *pvr_obj; + size_t pvr_obj_size; + + u64 offset_plus_size; + int idx; + int err; + + if (!drm_dev_enter(drm_dev, &idx)) + return -EIO; + + /* Initial validation of args. */ + if (args->_padding_14) { + err = -EINVAL; + goto err_drm_dev_exit; + } + + if (args->flags != 0 || + check_add_overflow(args->offset, args->size, &offset_plus_size) || + !pvr_find_heap_containing(pvr_dev, args->device_addr, args->size)) { + err = -EINVAL; + goto err_drm_dev_exit; + } + + vm_ctx = pvr_vm_context_lookup(pvr_file, args->vm_context_handle); + if (!vm_ctx) { + err = -EINVAL; + goto err_drm_dev_exit; + } + + pvr_obj = pvr_gem_object_from_handle(pvr_file, args->handle); + if (!pvr_obj) { + err = -ENOENT; + goto err_put_vm_context; + } + + pvr_obj_size = pvr_gem_object_size(pvr_obj); + + /* + * Validate offset and size args. The alignment of these will be + * checked when mapping; for now just check that they're within valid + * bounds + */ + if (args->offset >= pvr_obj_size || offset_plus_size > pvr_obj_size) { + err = -EINVAL; + goto err_put_pvr_object; + } + + err = pvr_vm_map(vm_ctx, pvr_obj, args->offset, + args->device_addr, args->size); + if (err) + goto err_put_pvr_object; + + /* + * In order to set up the mapping, we needed a reference to &pvr_obj. + * However, pvr_vm_map() obtains and stores its own reference, so we + * must release ours before returning. + */ + +err_put_pvr_object: + pvr_gem_object_put(pvr_obj); + +err_put_vm_context: + pvr_vm_context_put(vm_ctx); + +err_drm_dev_exit: + drm_dev_exit(idx); + + return err; +} + +/** + * pvr_ioctl_vm_unmap() - IOCTL to unmap buffer from GPU address space. + * @drm_dev: [IN] DRM device. + * @raw_args: [IN] Arguments passed to this IOCTL. This must be of type + * &struct drm_pvr_ioctl_vm_unmap_args. + * @file: [IN] DRM file private data. + * + * Called from userspace with %DRM_IOCTL_PVR_VM_UNMAP. + * + * Return: + * * 0 on success, + * * -%EINVAL if &drm_pvr_ioctl_vm_op_unmap_args.device_addr is not a valid + * device page-aligned device-virtual address, or + * * -%ENOENT if there is currently no PowerVR buffer object mapped at + * &drm_pvr_ioctl_vm_op_unmap_args.device_addr. + */ +static int +pvr_ioctl_vm_unmap(struct drm_device *drm_dev, void *raw_args, + struct drm_file *file) +{ + struct drm_pvr_ioctl_vm_unmap_args *args = raw_args; + struct pvr_file *pvr_file = to_pvr_file(file); + struct pvr_vm_context *vm_ctx; + int err; + + /* Initial validation of args. */ + if (args->_padding_4) + return -EINVAL; + + vm_ctx = pvr_vm_context_lookup(pvr_file, args->vm_context_handle); + if (!vm_ctx) + return -EINVAL; + + err = pvr_vm_unmap(vm_ctx, args->device_addr, args->size); + + pvr_vm_context_put(vm_ctx); + + return err; +} + +/* + * pvr_ioctl_submit_job() - IOCTL to submit a job to the GPU + * @drm_dev: [IN] DRM device. + * @raw_args: [IN] Arguments passed to this IOCTL. This must be of type + * &struct drm_pvr_ioctl_submit_job_args. + * @file: [IN] DRM file private data. + * + * Called from userspace with %DRM_IOCTL_PVR_SUBMIT_JOB. + * + * Return: + * * 0 on success, or + * * -%EINVAL if arguments are invalid. + */ +static int +pvr_ioctl_submit_jobs(struct drm_device *drm_dev, void *raw_args, + struct drm_file *file) +{ + struct drm_pvr_ioctl_submit_jobs_args *args = raw_args; + struct pvr_device *pvr_dev = to_pvr_device(drm_dev); + struct pvr_file *pvr_file = to_pvr_file(file); + int idx; + int err; + + if (!drm_dev_enter(drm_dev, &idx)) + return -EIO; + + err = pvr_submit_jobs(pvr_dev, pvr_file, args); + + drm_dev_exit(idx); + + return err; +} + +int +pvr_get_uobj(u64 usr_ptr, u32 usr_stride, u32 min_stride, u32 obj_size, void *out) +{ + if (usr_stride < min_stride) + return -EINVAL; + + return copy_struct_from_user(out, obj_size, u64_to_user_ptr(usr_ptr), usr_stride); +} + +int +pvr_set_uobj(u64 usr_ptr, u32 usr_stride, u32 min_stride, u32 obj_size, const void *in) +{ + if (usr_stride < min_stride) + return -EINVAL; + + if (copy_to_user(u64_to_user_ptr(usr_ptr), in, min_t(u32, usr_stride, obj_size))) + return -EFAULT; + + if (usr_stride > obj_size && + clear_user(u64_to_user_ptr(usr_ptr + obj_size), usr_stride - obj_size)) { + return -EFAULT; + } + + return 0; +} + +int +pvr_get_uobj_array(const struct drm_pvr_obj_array *in, u32 min_stride, u32 obj_size, void **out) +{ + int ret = 0; + void *out_alloc; + + if (in->stride < min_stride) + return -EINVAL; + + if (!in->count) + return 0; + + out_alloc = kvmalloc_array(in->count, obj_size, GFP_KERNEL); + if (!out_alloc) + return -ENOMEM; + + if (obj_size == in->stride) { + if (copy_from_user(out_alloc, u64_to_user_ptr(in->array), + (unsigned long)obj_size * in->count)) + ret = -EFAULT; + } else { + void __user *in_ptr = u64_to_user_ptr(in->array); + void *out_ptr = out_alloc; + + for (u32 i = 0; i < in->count; i++) { + ret = copy_struct_from_user(out_ptr, obj_size, in_ptr, in->stride); + if (ret) + break; + + out_ptr += obj_size; + in_ptr += in->stride; + } + } + + if (ret) { + kvfree(out_alloc); + return ret; + } + + *out = out_alloc; + return 0; +} + +int +pvr_set_uobj_array(const struct drm_pvr_obj_array *out, u32 min_stride, u32 obj_size, + const void *in) +{ + if (out->stride < min_stride) + return -EINVAL; + + if (!out->count) + return 0; + + if (obj_size == out->stride) { + if (copy_to_user(u64_to_user_ptr(out->array), in, + (unsigned long)obj_size * out->count)) + return -EFAULT; + } else { + u32 cpy_elem_size = min_t(u32, out->stride, obj_size); + void __user *out_ptr = u64_to_user_ptr(out->array); + const void *in_ptr = in; + + for (u32 i = 0; i < out->count; i++) { + if (copy_to_user(out_ptr, in_ptr, cpy_elem_size)) + return -EFAULT; + + out_ptr += obj_size; + in_ptr += out->stride; + } + + if (out->stride > obj_size && + clear_user(u64_to_user_ptr(out->array + obj_size), + out->stride - obj_size)) { + return -EFAULT; + } + } + + return 0; +} + +#define DRM_PVR_IOCTL(_name, _func, _flags) \ + DRM_IOCTL_DEF_DRV(PVR_##_name, pvr_ioctl_##_func, _flags) + +/* clang-format off */ + +static const struct drm_ioctl_desc pvr_drm_driver_ioctls[] = { + DRM_PVR_IOCTL(DEV_QUERY, dev_query, DRM_RENDER_ALLOW), + DRM_PVR_IOCTL(CREATE_BO, create_bo, DRM_RENDER_ALLOW), + DRM_PVR_IOCTL(GET_BO_MMAP_OFFSET, get_bo_mmap_offset, DRM_RENDER_ALLOW), + DRM_PVR_IOCTL(CREATE_VM_CONTEXT, create_vm_context, DRM_RENDER_ALLOW), + DRM_PVR_IOCTL(DESTROY_VM_CONTEXT, destroy_vm_context, DRM_RENDER_ALLOW), + DRM_PVR_IOCTL(VM_MAP, vm_map, DRM_RENDER_ALLOW), + DRM_PVR_IOCTL(VM_UNMAP, vm_unmap, DRM_RENDER_ALLOW), + DRM_PVR_IOCTL(CREATE_CONTEXT, create_context, DRM_RENDER_ALLOW), + DRM_PVR_IOCTL(DESTROY_CONTEXT, destroy_context, DRM_RENDER_ALLOW), + DRM_PVR_IOCTL(CREATE_FREE_LIST, create_free_list, DRM_RENDER_ALLOW), + DRM_PVR_IOCTL(DESTROY_FREE_LIST, destroy_free_list, DRM_RENDER_ALLOW), + DRM_PVR_IOCTL(CREATE_HWRT_DATASET, create_hwrt_dataset, DRM_RENDER_ALLOW), + DRM_PVR_IOCTL(DESTROY_HWRT_DATASET, destroy_hwrt_dataset, DRM_RENDER_ALLOW), + DRM_PVR_IOCTL(SUBMIT_JOBS, submit_jobs, DRM_RENDER_ALLOW), +}; + +/* clang-format on */ + +#undef DRM_PVR_IOCTL + +/** + * pvr_drm_driver_open() - Driver callback when a new &struct drm_file is opened + * @drm_dev: [IN] DRM device. + * @file: [IN] DRM file private data. + * + * Allocates powervr-specific file private data (&struct pvr_file). + * + * Registered in &pvr_drm_driver. + * + * Return: + * * 0 on success, + * * -%ENOMEM if the allocation of a &struct ipvr_file fails, or + * * Any error returned by pvr_memory_context_init(). + */ +static int +pvr_drm_driver_open(struct drm_device *drm_dev, struct drm_file *file) +{ + struct pvr_device *pvr_dev = to_pvr_device(drm_dev); + struct pvr_file *pvr_file; + + pvr_file = kzalloc(sizeof(*pvr_file), GFP_KERNEL); + if (!pvr_file) + return -ENOMEM; + + /* + * Store reference to base DRM file private data for use by + * from_pvr_file. + */ + pvr_file->file = file; + + /* + * Store reference to powervr-specific outer device struct in file + * private data for convenient access. + */ + pvr_file->pvr_dev = pvr_dev; + + xa_init_flags(&pvr_file->ctx_handles, XA_FLAGS_ALLOC1); + xa_init_flags(&pvr_file->free_list_handles, XA_FLAGS_ALLOC1); + xa_init_flags(&pvr_file->hwrt_handles, XA_FLAGS_ALLOC1); + xa_init_flags(&pvr_file->vm_ctx_handles, XA_FLAGS_ALLOC1); + + /* + * Store reference to powervr-specific file private data in DRM file + * private data. + */ + file->driver_priv = pvr_file; + + return 0; +} + +/** + * pvr_drm_driver_postclose() - One of the driver callbacks when a &struct + * drm_file is closed. + * @drm_dev: [IN] DRM device (unused). + * @file: [IN] DRM file private data. + * + * Frees powervr-specific file private data (&struct pvr_file). + * + * Registered in &pvr_drm_driver. + */ +static void +pvr_drm_driver_postclose(__always_unused struct drm_device *drm_dev, + struct drm_file *file) +{ + struct pvr_file *pvr_file = to_pvr_file(file); + + /* Kill remaining contexts. */ + pvr_destroy_contexts_for_file(pvr_file); + + /* Drop references on any remaining objects. */ + pvr_destroy_free_lists_for_file(pvr_file); + pvr_destroy_hwrt_datasets_for_file(pvr_file); + pvr_destroy_vm_contexts_for_file(pvr_file); + + kfree(pvr_file); + file->driver_priv = NULL; +} + +DEFINE_DRM_GEM_FOPS(pvr_drm_driver_fops); + +static struct drm_driver pvr_drm_driver = { + .driver_features = DRIVER_GEM | DRIVER_GEM_GPUVA | DRIVER_RENDER | + DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE, + .open = pvr_drm_driver_open, + .postclose = pvr_drm_driver_postclose, + .ioctls = pvr_drm_driver_ioctls, + .num_ioctls = ARRAY_SIZE(pvr_drm_driver_ioctls), + .fops = &pvr_drm_driver_fops, +#if defined(CONFIG_DEBUG_FS) + .debugfs_init = pvr_debugfs_init, +#endif + + .name = PVR_DRIVER_NAME, + .desc = PVR_DRIVER_DESC, + .date = PVR_DRIVER_DATE, + .major = PVR_DRIVER_MAJOR, + .minor = PVR_DRIVER_MINOR, + .patchlevel = PVR_DRIVER_PATCHLEVEL, + + .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table, + .gem_create_object = pvr_gem_create_object, +}; + +static int +pvr_probe(struct platform_device *plat_dev) +{ + struct pvr_device *pvr_dev; + struct drm_device *drm_dev; + int err; + + pvr_dev = devm_drm_dev_alloc(&plat_dev->dev, &pvr_drm_driver, + struct pvr_device, base); + if (IS_ERR(pvr_dev)) + return PTR_ERR(pvr_dev); + + drm_dev = &pvr_dev->base; + + platform_set_drvdata(plat_dev, drm_dev); + + init_rwsem(&pvr_dev->reset_sem); + + pvr_context_device_init(pvr_dev); + + err = pvr_queue_device_init(pvr_dev); + if (err) + goto err_context_fini; + + devm_pm_runtime_enable(&plat_dev->dev); + pm_runtime_mark_last_busy(&plat_dev->dev); + + pm_runtime_set_autosuspend_delay(&plat_dev->dev, 50); + pm_runtime_use_autosuspend(&plat_dev->dev); + pvr_watchdog_init(pvr_dev); + + err = pvr_device_init(pvr_dev); + if (err) + goto err_watchdog_fini; + + err = drm_dev_register(drm_dev, 0); + if (err) + goto err_device_fini; + + xa_init_flags(&pvr_dev->free_list_ids, XA_FLAGS_ALLOC1); + xa_init_flags(&pvr_dev->job_ids, XA_FLAGS_ALLOC1); + + return 0; + +err_device_fini: + pvr_device_fini(pvr_dev); + +err_watchdog_fini: + pvr_watchdog_fini(pvr_dev); + + pvr_queue_device_fini(pvr_dev); + +err_context_fini: + pvr_context_device_fini(pvr_dev); + + return err; +} + +static int +pvr_remove(struct platform_device *plat_dev) +{ + struct drm_device *drm_dev = platform_get_drvdata(plat_dev); + struct pvr_device *pvr_dev = to_pvr_device(drm_dev); + + WARN_ON(!xa_empty(&pvr_dev->job_ids)); + WARN_ON(!xa_empty(&pvr_dev->free_list_ids)); + + xa_destroy(&pvr_dev->job_ids); + xa_destroy(&pvr_dev->free_list_ids); + + pm_runtime_suspend(drm_dev->dev); + pvr_device_fini(pvr_dev); + drm_dev_unplug(drm_dev); + pvr_watchdog_fini(pvr_dev); + pvr_queue_device_fini(pvr_dev); + pvr_context_device_fini(pvr_dev); + + return 0; +} + +static const struct of_device_id dt_match[] = { + { .compatible = "img,img-axe", .data = NULL }, + {} +}; +MODULE_DEVICE_TABLE(of, dt_match); + +static const struct dev_pm_ops pvr_pm_ops = { + RUNTIME_PM_OPS(pvr_power_device_suspend, pvr_power_device_resume, pvr_power_device_idle) +}; + +static struct platform_driver pvr_driver = { + .probe = pvr_probe, + .remove = pvr_remove, + .driver = { + .name = PVR_DRIVER_NAME, + .pm = &pvr_pm_ops, + .of_match_table = dt_match, + }, +}; +module_platform_driver(pvr_driver); + +MODULE_AUTHOR("Imagination Technologies Ltd."); +MODULE_DESCRIPTION(PVR_DRIVER_DESC); +MODULE_LICENSE("Dual MIT/GPL"); +MODULE_IMPORT_NS(DMA_BUF); +MODULE_FIRMWARE("powervr/rogue_33.15.11.3_v1.fw"); diff --git a/drivers/gpu/drm/imagination/pvr_drv.h b/drivers/gpu/drm/imagination/pvr_drv.h new file mode 100644 index 000000000000..378fe477b759 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_drv.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_DRV_H +#define PVR_DRV_H + +#include "linux/compiler_attributes.h" +#include <uapi/drm/pvr_drm.h> + +#define PVR_DRIVER_NAME "powervr" +#define PVR_DRIVER_DESC "Imagination PowerVR (Series 6 and later) & IMG Graphics" +#define PVR_DRIVER_DATE "20230904" + +/* + * Driver interface version: + * - 1.0: Initial interface + */ +#define PVR_DRIVER_MAJOR 1 +#define PVR_DRIVER_MINOR 0 +#define PVR_DRIVER_PATCHLEVEL 0 + +int pvr_get_uobj(u64 usr_ptr, u32 usr_size, u32 min_size, u32 obj_size, void *out); +int pvr_set_uobj(u64 usr_ptr, u32 usr_size, u32 min_size, u32 obj_size, const void *in); +int pvr_get_uobj_array(const struct drm_pvr_obj_array *in, u32 min_stride, u32 obj_size, + void **out); +int pvr_set_uobj_array(const struct drm_pvr_obj_array *out, u32 min_stride, u32 obj_size, + const void *in); + +#define PVR_UOBJ_MIN_SIZE_INTERNAL(_typename, _last_mandatory_field) \ + (offsetof(_typename, _last_mandatory_field) + \ + sizeof(((_typename *)NULL)->_last_mandatory_field)) + +/* NOLINTBEGIN(bugprone-macro-parentheses) */ +#define PVR_UOBJ_DECL(_typename, _last_mandatory_field) \ + , _typename : PVR_UOBJ_MIN_SIZE_INTERNAL(_typename, _last_mandatory_field) +/* NOLINTEND(bugprone-macro-parentheses) */ + +/** + * DOC: PVR user objects. + * + * Macros used to aid copying structured and array data to and from + * userspace. Objects can differ in size, provided the minimum size + * allowed is specified (using the last mandatory field in the struct). + * All types used with PVR_UOBJ_GET/SET macros must be listed here under + * PVR_UOBJ_MIN_SIZE, with the last mandatory struct field specified. + */ + +/** + * PVR_UOBJ_MIN_SIZE() - Fetch the minimum copy size of a compatible type object. + * @_obj_name: The name of the object. Cannot be a typename - this is deduced. + * + * This cannot fail. Using the macro with an incompatible type will result in a + * compiler error. + * + * To add compatibility for a type, list it within the macro in an orderly + * fashion. The second argument is the name of the last mandatory field of the + * struct type, which is used to calculate the size. See also PVR_UOBJ_DECL(). + * + * Return: The minimum copy size. + */ +#define PVR_UOBJ_MIN_SIZE(_obj_name) _Generic(_obj_name \ + PVR_UOBJ_DECL(struct drm_pvr_job, hwrt) \ + PVR_UOBJ_DECL(struct drm_pvr_sync_op, value) \ + PVR_UOBJ_DECL(struct drm_pvr_dev_query_gpu_info, num_phantoms) \ + PVR_UOBJ_DECL(struct drm_pvr_dev_query_runtime_info, cdm_max_local_mem_size_regs) \ + PVR_UOBJ_DECL(struct drm_pvr_dev_query_quirks, _padding_c) \ + PVR_UOBJ_DECL(struct drm_pvr_dev_query_enhancements, _padding_c) \ + PVR_UOBJ_DECL(struct drm_pvr_heap, page_size_log2) \ + PVR_UOBJ_DECL(struct drm_pvr_dev_query_heap_info, heaps) \ + PVR_UOBJ_DECL(struct drm_pvr_static_data_area, offset) \ + PVR_UOBJ_DECL(struct drm_pvr_dev_query_static_data_areas, static_data_areas) \ + ) + +/** + * PVR_UOBJ_GET() - Copies from _src_usr_ptr to &_dest_obj. + * @_dest_obj: The destination container object in kernel space. + * @_usr_size: The size of the source container in user space. + * @_src_usr_ptr: __u64 raw pointer to the source container in user space. + * + * Return: Error code. See pvr_get_uobj(). + */ +#define PVR_UOBJ_GET(_dest_obj, _usr_size, _src_usr_ptr) \ + pvr_get_uobj(_src_usr_ptr, _usr_size, \ + PVR_UOBJ_MIN_SIZE(_dest_obj), \ + sizeof(_dest_obj), &(_dest_obj)) + +/** + * PVR_UOBJ_SET() - Copies from &_src_obj to _dest_usr_ptr. + * @_dest_usr_ptr: __u64 raw pointer to the destination container in user space. + * @_usr_size: The size of the destination container in user space. + * @_src_obj: The source container object in kernel space. + * + * Return: Error code. See pvr_set_uobj(). + */ +#define PVR_UOBJ_SET(_dest_usr_ptr, _usr_size, _src_obj) \ + pvr_set_uobj(_dest_usr_ptr, _usr_size, \ + PVR_UOBJ_MIN_SIZE(_src_obj), \ + sizeof(_src_obj), &(_src_obj)) + +/** + * PVR_UOBJ_GET_ARRAY() - Copies from @_src_drm_pvr_obj_array.array to + * alloced memory and returns a pointer in _dest_array. + * @_dest_array: The destination C array object in kernel space. + * @_src_drm_pvr_obj_array: The &struct drm_pvr_obj_array containing a __u64 raw + * pointer to the source C array in user space and the size of each array + * element in user space (the 'stride'). + * + * Return: Error code. See pvr_get_uobj_array(). + */ +#define PVR_UOBJ_GET_ARRAY(_dest_array, _src_drm_pvr_obj_array) \ + pvr_get_uobj_array(_src_drm_pvr_obj_array, \ + PVR_UOBJ_MIN_SIZE((_dest_array)[0]), \ + sizeof((_dest_array)[0]), (void **)&(_dest_array)) + +/** + * PVR_UOBJ_SET_ARRAY() - Copies from _src_array to @_dest_drm_pvr_obj_array.array. + * @_dest_drm_pvr_obj_array: The &struct drm_pvr_obj_array containing a __u64 raw + * pointer to the destination C array in user space and the size of each array + * element in user space (the 'stride'). + * @_src_array: The source C array object in kernel space. + * + * Return: Error code. See pvr_set_uobj_array(). + */ +#define PVR_UOBJ_SET_ARRAY(_dest_drm_pvr_obj_array, _src_array) \ + pvr_set_uobj_array(_dest_drm_pvr_obj_array, \ + PVR_UOBJ_MIN_SIZE((_src_array)[0]), \ + sizeof((_src_array)[0]), _src_array) + +#endif /* PVR_DRV_H */ diff --git a/drivers/gpu/drm/imagination/pvr_free_list.c b/drivers/gpu/drm/imagination/pvr_free_list.c new file mode 100644 index 000000000000..5e51bc980751 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_free_list.c @@ -0,0 +1,625 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_free_list.h" +#include "pvr_gem.h" +#include "pvr_hwrt.h" +#include "pvr_rogue_fwif.h" +#include "pvr_vm.h" + +#include <drm/drm_gem.h> +#include <linux/slab.h> +#include <linux/xarray.h> +#include <uapi/drm/pvr_drm.h> + +#define FREE_LIST_ENTRY_SIZE sizeof(u32) + +#define FREE_LIST_ALIGNMENT \ + ((ROGUE_BIF_PM_FREELIST_BASE_ADDR_ALIGNSIZE / FREE_LIST_ENTRY_SIZE) - 1) + +#define FREE_LIST_MIN_PAGES 50 +#define FREE_LIST_MIN_PAGES_BRN66011 40 +#define FREE_LIST_MIN_PAGES_ROGUEXE 25 + +/** + * pvr_get_free_list_min_pages() - Get minimum free list size for this device + * @pvr_dev: Device pointer. + * + * Returns: + * * Minimum free list size, in PM physical pages. + */ +u32 +pvr_get_free_list_min_pages(struct pvr_device *pvr_dev) +{ + u32 value; + + if (PVR_HAS_FEATURE(pvr_dev, roguexe)) { + if (PVR_HAS_QUIRK(pvr_dev, 66011)) + value = FREE_LIST_MIN_PAGES_BRN66011; + else + value = FREE_LIST_MIN_PAGES_ROGUEXE; + } else { + value = FREE_LIST_MIN_PAGES; + } + + return value; +} + +static int +free_list_create_kernel_structure(struct pvr_file *pvr_file, + struct drm_pvr_ioctl_create_free_list_args *args, + struct pvr_free_list *free_list) +{ + struct pvr_gem_object *free_list_obj; + struct pvr_vm_context *vm_ctx; + u64 free_list_size; + int err; + + if (args->grow_threshold > 100 || + args->initial_num_pages > args->max_num_pages || + args->grow_num_pages > args->max_num_pages || + args->max_num_pages == 0 || + (args->initial_num_pages < args->max_num_pages && !args->grow_num_pages) || + (args->initial_num_pages == args->max_num_pages && args->grow_num_pages)) + return -EINVAL; + + if ((args->initial_num_pages & FREE_LIST_ALIGNMENT) || + (args->max_num_pages & FREE_LIST_ALIGNMENT) || + (args->grow_num_pages & FREE_LIST_ALIGNMENT)) + return -EINVAL; + + vm_ctx = pvr_vm_context_lookup(pvr_file, args->vm_context_handle); + if (!vm_ctx) + return -EINVAL; + + free_list_obj = pvr_vm_find_gem_object(vm_ctx, args->free_list_gpu_addr, + NULL, &free_list_size); + if (!free_list_obj) { + err = -EINVAL; + goto err_put_vm_context; + } + + if ((free_list_obj->flags & DRM_PVR_BO_ALLOW_CPU_USERSPACE_ACCESS) || + !(free_list_obj->flags & DRM_PVR_BO_PM_FW_PROTECT) || + free_list_size < (args->max_num_pages * FREE_LIST_ENTRY_SIZE)) { + err = -EINVAL; + goto err_put_free_list_obj; + } + + free_list->pvr_dev = pvr_file->pvr_dev; + free_list->current_pages = 0; + free_list->max_pages = args->max_num_pages; + free_list->grow_pages = args->grow_num_pages; + free_list->grow_threshold = args->grow_threshold; + free_list->obj = free_list_obj; + free_list->free_list_gpu_addr = args->free_list_gpu_addr; + free_list->initial_num_pages = args->initial_num_pages; + + pvr_vm_context_put(vm_ctx); + + return 0; + +err_put_free_list_obj: + pvr_gem_object_put(free_list_obj); + +err_put_vm_context: + pvr_vm_context_put(vm_ctx); + + return err; +} + +static void +free_list_destroy_kernel_structure(struct pvr_free_list *free_list) +{ + WARN_ON(!list_empty(&free_list->hwrt_list)); + + pvr_gem_object_put(free_list->obj); +} + +/** + * calculate_free_list_ready_pages_locked() - Function to work out the number of free + * list pages to reserve for growing within + * the FW without having to wait for the + * host to progress a grow request + * @free_list: Pointer to free list. + * @pages: Total pages currently in free list. + * + * If the threshold or grow size means less than the alignment size (4 pages on + * Rogue), then the feature is not used. + * + * Caller must hold &free_list->lock. + * + * Return: number of pages to reserve. + */ +static u32 +calculate_free_list_ready_pages_locked(struct pvr_free_list *free_list, u32 pages) +{ + u32 ready_pages; + + lockdep_assert_held(&free_list->lock); + + ready_pages = ((pages * free_list->grow_threshold) / 100); + + /* The number of pages must be less than the grow size. */ + ready_pages = min(ready_pages, free_list->grow_pages); + + /* + * The number of pages must be a multiple of the free list align size. + */ + ready_pages &= ~FREE_LIST_ALIGNMENT; + + return ready_pages; +} + +static u32 +calculate_free_list_ready_pages(struct pvr_free_list *free_list, u32 pages) +{ + u32 ret; + + mutex_lock(&free_list->lock); + + ret = calculate_free_list_ready_pages_locked(free_list, pages); + + mutex_unlock(&free_list->lock); + + return ret; +} + +static void +free_list_fw_init(void *cpu_ptr, void *priv) +{ + struct rogue_fwif_freelist *fw_data = cpu_ptr; + struct pvr_free_list *free_list = priv; + u32 ready_pages; + + /* Fill out FW structure */ + ready_pages = calculate_free_list_ready_pages(free_list, + free_list->initial_num_pages); + + fw_data->max_pages = free_list->max_pages; + fw_data->current_pages = free_list->initial_num_pages - ready_pages; + fw_data->grow_pages = free_list->grow_pages; + fw_data->ready_pages = ready_pages; + fw_data->freelist_id = free_list->fw_id; + fw_data->grow_pending = false; + fw_data->current_stack_top = fw_data->current_pages - 1; + fw_data->freelist_dev_addr = free_list->free_list_gpu_addr; + fw_data->current_dev_addr = (fw_data->freelist_dev_addr + + ((fw_data->max_pages - fw_data->current_pages) * + FREE_LIST_ENTRY_SIZE)) & + ~((u64)ROGUE_BIF_PM_FREELIST_BASE_ADDR_ALIGNSIZE - 1); +} + +static int +free_list_create_fw_structure(struct pvr_file *pvr_file, + struct drm_pvr_ioctl_create_free_list_args *args, + struct pvr_free_list *free_list) +{ + struct pvr_device *pvr_dev = pvr_file->pvr_dev; + + /* + * Create and map the FW structure so we can initialise it. This is not + * accessed on the CPU side post-initialisation so the mapping lifetime + * is only for this function. + */ + free_list->fw_data = pvr_fw_object_create_and_map(pvr_dev, sizeof(*free_list->fw_data), + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + free_list_fw_init, free_list, + &free_list->fw_obj); + if (IS_ERR(free_list->fw_data)) + return PTR_ERR(free_list->fw_data); + + return 0; +} + +static void +free_list_destroy_fw_structure(struct pvr_free_list *free_list) +{ + pvr_fw_object_unmap_and_destroy(free_list->fw_obj); +} + +static int +pvr_free_list_insert_pages_locked(struct pvr_free_list *free_list, + struct sg_table *sgt, u32 offset, u32 num_pages) +{ + struct sg_dma_page_iter dma_iter; + u32 *page_list; + + lockdep_assert_held(&free_list->lock); + + page_list = pvr_gem_object_vmap(free_list->obj); + if (IS_ERR(page_list)) + return PTR_ERR(page_list); + + offset /= FREE_LIST_ENTRY_SIZE; + /* clang-format off */ + for_each_sgtable_dma_page(sgt, &dma_iter, 0) { + dma_addr_t dma_addr = sg_page_iter_dma_address(&dma_iter); + u64 dma_pfn = dma_addr >> + ROGUE_BIF_PM_PHYSICAL_PAGE_ALIGNSHIFT; + u32 dma_addr_offset; + + BUILD_BUG_ON(ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE > PAGE_SIZE); + + for (dma_addr_offset = 0; dma_addr_offset < PAGE_SIZE; + dma_addr_offset += ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE) { + WARN_ON_ONCE(dma_pfn >> 32); + + page_list[offset++] = (u32)dma_pfn; + dma_pfn++; + + num_pages--; + if (!num_pages) + break; + } + + if (!num_pages) + break; + } + /* clang-format on */ + + /* Make sure our free_list update is flushed. */ + wmb(); + + pvr_gem_object_vunmap(free_list->obj); + + return 0; +} + +static int +pvr_free_list_insert_node_locked(struct pvr_free_list_node *free_list_node) +{ + struct pvr_free_list *free_list = free_list_node->free_list; + struct sg_table *sgt; + u32 start_page; + u32 offset; + int err; + + lockdep_assert_held(&free_list->lock); + + start_page = free_list->max_pages - free_list->current_pages - + free_list_node->num_pages; + offset = (start_page * FREE_LIST_ENTRY_SIZE) & + ~((u64)ROGUE_BIF_PM_FREELIST_BASE_ADDR_ALIGNSIZE - 1); + + sgt = drm_gem_shmem_get_pages_sgt(&free_list_node->mem_obj->base); + if (WARN_ON(IS_ERR(sgt))) + return PTR_ERR(sgt); + + err = pvr_free_list_insert_pages_locked(free_list, sgt, + offset, free_list_node->num_pages); + if (!err) + free_list->current_pages += free_list_node->num_pages; + + return err; +} + +static int +pvr_free_list_grow(struct pvr_free_list *free_list, u32 num_pages) +{ + struct pvr_device *pvr_dev = free_list->pvr_dev; + struct pvr_free_list_node *free_list_node; + int err; + + mutex_lock(&free_list->lock); + + if (num_pages & FREE_LIST_ALIGNMENT) { + err = -EINVAL; + goto err_unlock; + } + + free_list_node = kzalloc(sizeof(*free_list_node), GFP_KERNEL); + if (!free_list_node) { + err = -ENOMEM; + goto err_unlock; + } + + free_list_node->num_pages = num_pages; + free_list_node->free_list = free_list; + + free_list_node->mem_obj = pvr_gem_object_create(pvr_dev, + num_pages << + ROGUE_BIF_PM_PHYSICAL_PAGE_ALIGNSHIFT, + PVR_BO_FW_FLAGS_DEVICE_CACHED); + if (IS_ERR(free_list_node->mem_obj)) { + err = PTR_ERR(free_list_node->mem_obj); + goto err_free; + } + + err = pvr_free_list_insert_node_locked(free_list_node); + if (err) + goto err_destroy_gem_object; + + list_add_tail(&free_list_node->node, &free_list->mem_block_list); + + /* + * Reserve a number ready pages to allow the FW to process OOM quickly + * and asynchronously request a grow. + */ + free_list->ready_pages = + calculate_free_list_ready_pages_locked(free_list, + free_list->current_pages); + free_list->current_pages -= free_list->ready_pages; + + mutex_unlock(&free_list->lock); + + return 0; + +err_destroy_gem_object: + pvr_gem_object_put(free_list_node->mem_obj); + +err_free: + kfree(free_list_node); + +err_unlock: + mutex_unlock(&free_list->lock); + + return err; +} + +void pvr_free_list_process_grow_req(struct pvr_device *pvr_dev, + struct rogue_fwif_fwccb_cmd_freelist_gs_data *req) +{ + struct pvr_free_list *free_list = pvr_free_list_lookup_id(pvr_dev, req->freelist_id); + struct rogue_fwif_kccb_cmd resp_cmd = { + .cmd_type = ROGUE_FWIF_KCCB_CMD_FREELIST_GROW_UPDATE, + }; + struct rogue_fwif_freelist_gs_data *resp = &resp_cmd.cmd_data.free_list_gs_data; + u32 grow_pages = 0; + + /* If we don't have a freelist registered for this ID, we can't do much. */ + if (WARN_ON(!free_list)) + return; + + /* Since the FW made the request, it has already consumed the ready pages, + * update the host struct. + */ + free_list->current_pages += free_list->ready_pages; + free_list->ready_pages = 0; + + /* If the grow succeeds, update the grow_pages argument. */ + if (!pvr_free_list_grow(free_list, free_list->grow_pages)) + grow_pages = free_list->grow_pages; + + /* Now prepare the response and send it back to the FW. */ + pvr_fw_object_get_fw_addr(free_list->fw_obj, &resp->freelist_fw_addr); + resp->delta_pages = grow_pages; + resp->new_pages = free_list->current_pages + free_list->ready_pages; + resp->ready_pages = free_list->ready_pages; + pvr_free_list_put(free_list); + + WARN_ON(pvr_kccb_send_cmd(pvr_dev, &resp_cmd, NULL)); +} + +static void +pvr_free_list_free_node(struct pvr_free_list_node *free_list_node) +{ + pvr_gem_object_put(free_list_node->mem_obj); + + kfree(free_list_node); +} + +/** + * pvr_free_list_create() - Create a new free list and return an object pointer + * @pvr_file: Pointer to pvr_file structure. + * @args: Creation arguments from userspace. + * + * Return: + * * Pointer to new free_list, or + * * ERR_PTR(-%ENOMEM) on out of memory. + */ +struct pvr_free_list * +pvr_free_list_create(struct pvr_file *pvr_file, + struct drm_pvr_ioctl_create_free_list_args *args) +{ + struct pvr_free_list *free_list; + int err; + + /* Create and fill out the kernel structure */ + free_list = kzalloc(sizeof(*free_list), GFP_KERNEL); + + if (!free_list) + return ERR_PTR(-ENOMEM); + + kref_init(&free_list->ref_count); + INIT_LIST_HEAD(&free_list->mem_block_list); + INIT_LIST_HEAD(&free_list->hwrt_list); + mutex_init(&free_list->lock); + + err = free_list_create_kernel_structure(pvr_file, args, free_list); + if (err < 0) + goto err_free; + + /* Allocate global object ID for firmware. */ + err = xa_alloc(&pvr_file->pvr_dev->free_list_ids, + &free_list->fw_id, + free_list, + xa_limit_32b, + GFP_KERNEL); + if (err) + goto err_destroy_kernel_structure; + + err = free_list_create_fw_structure(pvr_file, args, free_list); + if (err < 0) + goto err_free_fw_id; + + err = pvr_free_list_grow(free_list, args->initial_num_pages); + if (err < 0) + goto err_fw_struct_cleanup; + + return free_list; + +err_fw_struct_cleanup: + WARN_ON(pvr_fw_structure_cleanup(free_list->pvr_dev, + ROGUE_FWIF_CLEANUP_FREELIST, + free_list->fw_obj, 0)); + +err_free_fw_id: + xa_erase(&free_list->pvr_dev->free_list_ids, free_list->fw_id); + +err_destroy_kernel_structure: + free_list_destroy_kernel_structure(free_list); + +err_free: + mutex_destroy(&free_list->lock); + kfree(free_list); + + return ERR_PTR(err); +} + +static void +pvr_free_list_release(struct kref *ref_count) +{ + struct pvr_free_list *free_list = + container_of(ref_count, struct pvr_free_list, ref_count); + struct list_head *pos, *n; + int err; + + xa_erase(&free_list->pvr_dev->free_list_ids, free_list->fw_id); + + err = pvr_fw_structure_cleanup(free_list->pvr_dev, + ROGUE_FWIF_CLEANUP_FREELIST, + free_list->fw_obj, 0); + if (err == -EBUSY) { + /* Flush the FWCCB to process any HWR or freelist reconstruction + * request that might keep the freelist busy, and try again. + */ + pvr_fwccb_process(free_list->pvr_dev); + err = pvr_fw_structure_cleanup(free_list->pvr_dev, + ROGUE_FWIF_CLEANUP_FREELIST, + free_list->fw_obj, 0); + } + + WARN_ON(err); + + /* clang-format off */ + list_for_each_safe(pos, n, &free_list->mem_block_list) { + struct pvr_free_list_node *free_list_node = + container_of(pos, struct pvr_free_list_node, node); + + list_del(pos); + pvr_free_list_free_node(free_list_node); + } + /* clang-format on */ + + free_list_destroy_kernel_structure(free_list); + free_list_destroy_fw_structure(free_list); + mutex_destroy(&free_list->lock); + kfree(free_list); +} + +/** + * pvr_destroy_free_lists_for_file: Destroy any free lists associated with the + * given file. + * @pvr_file: Pointer to pvr_file structure. + * + * Removes all free lists associated with @pvr_file from the device free_list + * list and drops initial references. Free lists will then be destroyed once + * all outstanding references are dropped. + */ +void pvr_destroy_free_lists_for_file(struct pvr_file *pvr_file) +{ + struct pvr_free_list *free_list; + unsigned long handle; + + xa_for_each(&pvr_file->free_list_handles, handle, free_list) { + (void)free_list; + pvr_free_list_put(xa_erase(&pvr_file->free_list_handles, handle)); + } +} + +/** + * pvr_free_list_put() - Release reference on free list + * @free_list: Pointer to list to release reference on + */ +void +pvr_free_list_put(struct pvr_free_list *free_list) +{ + if (free_list) + kref_put(&free_list->ref_count, pvr_free_list_release); +} + +void pvr_free_list_add_hwrt(struct pvr_free_list *free_list, struct pvr_hwrt_data *hwrt_data) +{ + mutex_lock(&free_list->lock); + + list_add_tail(&hwrt_data->freelist_node, &free_list->hwrt_list); + + mutex_unlock(&free_list->lock); +} + +void pvr_free_list_remove_hwrt(struct pvr_free_list *free_list, struct pvr_hwrt_data *hwrt_data) +{ + mutex_lock(&free_list->lock); + + list_del(&hwrt_data->freelist_node); + + mutex_unlock(&free_list->lock); +} + +static void +pvr_free_list_reconstruct(struct pvr_device *pvr_dev, u32 freelist_id) +{ + struct pvr_free_list *free_list = pvr_free_list_lookup_id(pvr_dev, freelist_id); + struct pvr_free_list_node *free_list_node; + struct rogue_fwif_freelist *fw_data; + struct pvr_hwrt_data *hwrt_data; + + if (!free_list) + return; + + mutex_lock(&free_list->lock); + + /* Rebuild the free list based on the memory block list. */ + free_list->current_pages = 0; + + list_for_each_entry(free_list_node, &free_list->mem_block_list, node) + WARN_ON(pvr_free_list_insert_node_locked(free_list_node)); + + /* + * Remove the ready pages, which are reserved to allow the FW to process OOM quickly and + * asynchronously request a grow. + */ + free_list->current_pages -= free_list->ready_pages; + + fw_data = free_list->fw_data; + fw_data->current_stack_top = fw_data->current_pages - 1; + fw_data->allocated_page_count = 0; + fw_data->allocated_mmu_page_count = 0; + + /* Reset the state of any associated HWRTs. */ + list_for_each_entry(hwrt_data, &free_list->hwrt_list, freelist_node) { + struct rogue_fwif_hwrtdata *hwrt_fw_data = pvr_fw_object_vmap(hwrt_data->fw_obj); + + if (!WARN_ON(IS_ERR(hwrt_fw_data))) { + hwrt_fw_data->state = ROGUE_FWIF_RTDATA_STATE_HWR; + hwrt_fw_data->hwrt_data_flags &= ~HWRTDATA_HAS_LAST_GEOM; + } + + pvr_fw_object_vunmap(hwrt_data->fw_obj); + } + + mutex_unlock(&free_list->lock); + + pvr_free_list_put(free_list); +} + +void +pvr_free_list_process_reconstruct_req(struct pvr_device *pvr_dev, + struct rogue_fwif_fwccb_cmd_freelists_reconstruction_data *req) +{ + struct rogue_fwif_kccb_cmd resp_cmd = { + .cmd_type = ROGUE_FWIF_KCCB_CMD_FREELISTS_RECONSTRUCTION_UPDATE, + }; + struct rogue_fwif_freelists_reconstruction_data *resp = + &resp_cmd.cmd_data.free_lists_reconstruction_data; + + for (u32 i = 0; i < req->freelist_count; i++) + pvr_free_list_reconstruct(pvr_dev, req->freelist_ids[i]); + + resp->freelist_count = req->freelist_count; + memcpy(resp->freelist_ids, req->freelist_ids, + req->freelist_count * sizeof(resp->freelist_ids[0])); + + WARN_ON(pvr_kccb_send_cmd(pvr_dev, &resp_cmd, NULL)); +} diff --git a/drivers/gpu/drm/imagination/pvr_free_list.h b/drivers/gpu/drm/imagination/pvr_free_list.h new file mode 100644 index 000000000000..bfb4f5fc622c --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_free_list.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_FREE_LIST_H +#define PVR_FREE_LIST_H + +#include <linux/compiler_attributes.h> +#include <linux/kref.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/types.h> +#include <linux/xarray.h> +#include <uapi/drm/pvr_drm.h> + +#include "pvr_device.h" + +/* Forward declaration from pvr_gem.h. */ +struct pvr_fw_object; + +/* Forward declaration from pvr_gem.h. */ +struct pvr_gem_object; + +/* Forward declaration from pvr_hwrt.h. */ +struct pvr_hwrt_data; + +/** + * struct pvr_free_list_node - structure representing an allocation in the free + * list + */ +struct pvr_free_list_node { + /** @node: List node for &pvr_free_list.mem_block_list. */ + struct list_head node; + + /** @free_list: Pointer to owning free list. */ + struct pvr_free_list *free_list; + + /** @num_pages: Number of pages in this node. */ + u32 num_pages; + + /** @mem_obj: GEM object representing the pages in this node. */ + struct pvr_gem_object *mem_obj; +}; + +/** + * struct pvr_free_list - structure representing a free list + */ +struct pvr_free_list { + /** @ref_count: Reference count of object. */ + struct kref ref_count; + + /** @pvr_dev: Pointer to device that owns this object. */ + struct pvr_device *pvr_dev; + + /** @obj: GEM object representing the free list. */ + struct pvr_gem_object *obj; + + /** @fw_obj: FW object representing the FW-side structure. */ + struct pvr_fw_object *fw_obj; + + /** @fw_data: Pointer to CPU mapping of the FW-side structure. */ + struct rogue_fwif_freelist *fw_data; + + /** + * @lock: Mutex protecting modification of the free list. Must be held when accessing any + * of the members below. + */ + struct mutex lock; + + /** @fw_id: Firmware ID for this object. */ + u32 fw_id; + + /** @current_pages: Current number of pages in free list. */ + u32 current_pages; + + /** @max_pages: Maximum number of pages in free list. */ + u32 max_pages; + + /** @grow_pages: Pages to grow free list by per request. */ + u32 grow_pages; + + /** + * @grow_threshold: Percentage of FL memory used that should trigger a + * new grow request. + */ + u32 grow_threshold; + + /** + * @ready_pages: Number of pages reserved for FW to use while a grow + * request is being processed. + */ + u32 ready_pages; + + /** @mem_block_list: List of memory blocks in this free list. */ + struct list_head mem_block_list; + + /** @hwrt_list: List of HWRTs using this free list. */ + struct list_head hwrt_list; + + /** @initial_num_pages: Initial number of pages in free list. */ + u32 initial_num_pages; + + /** @free_list_gpu_addr: Address of free list in GPU address space. */ + u64 free_list_gpu_addr; +}; + +struct pvr_free_list * +pvr_free_list_create(struct pvr_file *pvr_file, + struct drm_pvr_ioctl_create_free_list_args *args); + +void +pvr_destroy_free_lists_for_file(struct pvr_file *pvr_file); + +u32 +pvr_get_free_list_min_pages(struct pvr_device *pvr_dev); + +static __always_inline struct pvr_free_list * +pvr_free_list_get(struct pvr_free_list *free_list) +{ + if (free_list) + kref_get(&free_list->ref_count); + + return free_list; +} + +/** + * pvr_free_list_lookup() - Lookup free list pointer from handle and file + * @pvr_file: Pointer to pvr_file structure. + * @handle: Object handle. + * + * Takes reference on free list object. Call pvr_free_list_put() to release. + * + * Returns: + * * The requested object on success, or + * * %NULL on failure (object does not exist in list, is not a free list, or + * does not belong to @pvr_file) + */ +static __always_inline struct pvr_free_list * +pvr_free_list_lookup(struct pvr_file *pvr_file, u32 handle) +{ + struct pvr_free_list *free_list; + + xa_lock(&pvr_file->free_list_handles); + free_list = pvr_free_list_get(xa_load(&pvr_file->free_list_handles, handle)); + xa_unlock(&pvr_file->free_list_handles); + + return free_list; +} + +/** + * pvr_free_list_lookup_id() - Lookup free list pointer from FW ID + * @pvr_dev: Device pointer. + * @id: FW object ID. + * + * Takes reference on free list object. Call pvr_free_list_put() to release. + * + * Returns: + * * The requested object on success, or + * * %NULL on failure (object does not exist in list, or is not a free list) + */ +static __always_inline struct pvr_free_list * +pvr_free_list_lookup_id(struct pvr_device *pvr_dev, u32 id) +{ + struct pvr_free_list *free_list; + + xa_lock(&pvr_dev->free_list_ids); + + /* Contexts are removed from the ctx_ids set in the context release path, + * meaning the ref_count reached zero before they get removed. We need + * to make sure we're not trying to acquire a context that's being + * destroyed. + */ + free_list = xa_load(&pvr_dev->free_list_ids, id); + if (free_list && !kref_get_unless_zero(&free_list->ref_count)) + free_list = NULL; + xa_unlock(&pvr_dev->free_list_ids); + + return free_list; +} + +void +pvr_free_list_put(struct pvr_free_list *free_list); + +void +pvr_free_list_add_hwrt(struct pvr_free_list *free_list, struct pvr_hwrt_data *hwrt_data); +void +pvr_free_list_remove_hwrt(struct pvr_free_list *free_list, struct pvr_hwrt_data *hwrt_data); + +void pvr_free_list_process_grow_req(struct pvr_device *pvr_dev, + struct rogue_fwif_fwccb_cmd_freelist_gs_data *req); + +void +pvr_free_list_process_reconstruct_req(struct pvr_device *pvr_dev, + struct rogue_fwif_fwccb_cmd_freelists_reconstruction_data *req); + +#endif /* PVR_FREE_LIST_H */ diff --git a/drivers/gpu/drm/imagination/pvr_fw.c b/drivers/gpu/drm/imagination/pvr_fw.c new file mode 100644 index 000000000000..3debc9870a82 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_fw.c @@ -0,0 +1,1489 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_ccb.h" +#include "pvr_device.h" +#include "pvr_device_info.h" +#include "pvr_fw.h" +#include "pvr_fw_info.h" +#include "pvr_fw_startstop.h" +#include "pvr_fw_trace.h" +#include "pvr_gem.h" +#include "pvr_power.h" +#include "pvr_rogue_fwif_dev_info.h" +#include "pvr_rogue_heap_config.h" +#include "pvr_vm.h" + +#include <drm/drm_drv.h> +#include <drm/drm_managed.h> +#include <drm/drm_mm.h> +#include <linux/clk.h> +#include <linux/firmware.h> +#include <linux/math.h> +#include <linux/minmax.h> +#include <linux/sizes.h> + +#define FW_MAX_SUPPORTED_MAJOR_VERSION 1 + +#define FW_BOOT_TIMEOUT_USEC 5000000 + +/* Config heap occupies top 192k of the firmware heap. */ +#define PVR_ROGUE_FW_CONFIG_HEAP_GRANULARITY SZ_64K +#define PVR_ROGUE_FW_CONFIG_HEAP_SIZE (3 * PVR_ROGUE_FW_CONFIG_HEAP_GRANULARITY) + +/* Main firmware allocations should come from the remainder of the heap. */ +#define PVR_ROGUE_FW_MAIN_HEAP_BASE ROGUE_FW_HEAP_BASE + +/* Offsets from start of configuration area of FW heap. */ +#define PVR_ROGUE_FWIF_CONNECTION_CTL_OFFSET 0 +#define PVR_ROGUE_FWIF_OSINIT_OFFSET \ + (PVR_ROGUE_FWIF_CONNECTION_CTL_OFFSET + PVR_ROGUE_FW_CONFIG_HEAP_GRANULARITY) +#define PVR_ROGUE_FWIF_SYSINIT_OFFSET \ + (PVR_ROGUE_FWIF_OSINIT_OFFSET + PVR_ROGUE_FW_CONFIG_HEAP_GRANULARITY) + +#define PVR_ROGUE_FAULT_PAGE_SIZE SZ_4K + +#define PVR_SYNC_OBJ_SIZE sizeof(u32) + +const struct pvr_fw_layout_entry * +pvr_fw_find_layout_entry(struct pvr_device *pvr_dev, enum pvr_fw_section_id id) +{ + const struct pvr_fw_layout_entry *layout_entries = pvr_dev->fw_dev.layout_entries; + u32 num_layout_entries = pvr_dev->fw_dev.header->layout_entry_num; + u32 entry; + + for (entry = 0; entry < num_layout_entries; entry++) { + if (layout_entries[entry].id == id) + return &layout_entries[entry]; + } + + return NULL; +} + +static const struct pvr_fw_layout_entry * +pvr_fw_find_private_data(struct pvr_device *pvr_dev) +{ + const struct pvr_fw_layout_entry *layout_entries = pvr_dev->fw_dev.layout_entries; + u32 num_layout_entries = pvr_dev->fw_dev.header->layout_entry_num; + u32 entry; + + for (entry = 0; entry < num_layout_entries; entry++) { + if (layout_entries[entry].id == META_PRIVATE_DATA || + layout_entries[entry].id == MIPS_PRIVATE_DATA || + layout_entries[entry].id == RISCV_PRIVATE_DATA) + return &layout_entries[entry]; + } + + return NULL; +} + +#define DEV_INFO_MASK_SIZE(x) DIV_ROUND_UP(x, 64) + +/** + * pvr_fw_validate() - Parse firmware header and check compatibility + * @pvr_dev: Device pointer. + * + * Returns: + * * 0 on success, or + * * -EINVAL if firmware is incompatible. + */ +static int +pvr_fw_validate(struct pvr_device *pvr_dev) +{ + struct drm_device *drm_dev = from_pvr_device(pvr_dev); + const struct firmware *firmware = pvr_dev->fw_dev.firmware; + const struct pvr_fw_layout_entry *layout_entries; + const struct pvr_fw_info_header *header; + const u8 *fw = firmware->data; + u32 fw_offset = firmware->size - SZ_4K; + u32 layout_table_size; + u32 entry; + + if (firmware->size < SZ_4K || (firmware->size % FW_BLOCK_SIZE)) + return -EINVAL; + + header = (const struct pvr_fw_info_header *)&fw[fw_offset]; + + if (header->info_version != PVR_FW_INFO_VERSION) { + drm_err(drm_dev, "Unsupported fw info version %u\n", + header->info_version); + return -EINVAL; + } + + if (header->header_len != sizeof(struct pvr_fw_info_header) || + header->layout_entry_size != sizeof(struct pvr_fw_layout_entry) || + header->layout_entry_num > PVR_FW_INFO_MAX_NUM_ENTRIES) { + drm_err(drm_dev, "FW info format mismatch\n"); + return -EINVAL; + } + + if (!(header->flags & PVR_FW_FLAGS_OPEN_SOURCE) || + header->fw_version_major > FW_MAX_SUPPORTED_MAJOR_VERSION || + header->fw_version_major == 0) { + drm_err(drm_dev, "Unsupported FW version %u.%u (build: %u%s)\n", + header->fw_version_major, header->fw_version_minor, + header->fw_version_build, + (header->flags & PVR_FW_FLAGS_OPEN_SOURCE) ? " OS" : ""); + return -EINVAL; + } + + if (pvr_gpu_id_to_packed_bvnc(&pvr_dev->gpu_id) != header->bvnc) { + struct pvr_gpu_id fw_gpu_id; + + packed_bvnc_to_pvr_gpu_id(header->bvnc, &fw_gpu_id); + drm_err(drm_dev, "FW built for incorrect GPU ID %i.%i.%i.%i (expected %i.%i.%i.%i)\n", + fw_gpu_id.b, fw_gpu_id.v, fw_gpu_id.n, fw_gpu_id.c, + pvr_dev->gpu_id.b, pvr_dev->gpu_id.v, pvr_dev->gpu_id.n, pvr_dev->gpu_id.c); + return -EINVAL; + } + + fw_offset += header->header_len; + layout_table_size = + header->layout_entry_size * header->layout_entry_num; + if ((fw_offset + layout_table_size) > firmware->size) + return -EINVAL; + + layout_entries = (const struct pvr_fw_layout_entry *)&fw[fw_offset]; + for (entry = 0; entry < header->layout_entry_num; entry++) { + u32 start_addr = layout_entries[entry].base_addr; + u32 end_addr = start_addr + layout_entries[entry].alloc_size; + + if (start_addr >= end_addr) + return -EINVAL; + } + + fw_offset = (firmware->size - SZ_4K) - header->device_info_size; + + drm_info(drm_dev, "FW version v%u.%u (build %u OS)\n", header->fw_version_major, + header->fw_version_minor, header->fw_version_build); + + pvr_dev->fw_version.major = header->fw_version_major; + pvr_dev->fw_version.minor = header->fw_version_minor; + + pvr_dev->fw_dev.header = header; + pvr_dev->fw_dev.layout_entries = layout_entries; + + return 0; +} + +static int +pvr_fw_get_device_info(struct pvr_device *pvr_dev) +{ + const struct firmware *firmware = pvr_dev->fw_dev.firmware; + struct pvr_fw_device_info_header *header; + const u8 *fw = firmware->data; + const u64 *dev_info; + u32 fw_offset; + + fw_offset = (firmware->size - SZ_4K) - pvr_dev->fw_dev.header->device_info_size; + + header = (struct pvr_fw_device_info_header *)&fw[fw_offset]; + dev_info = (u64 *)(header + 1); + + pvr_device_info_set_quirks(pvr_dev, dev_info, header->brn_mask_size); + dev_info += header->brn_mask_size; + + pvr_device_info_set_enhancements(pvr_dev, dev_info, header->ern_mask_size); + dev_info += header->ern_mask_size; + + return pvr_device_info_set_features(pvr_dev, dev_info, header->feature_mask_size, + header->feature_param_size); +} + +static void +layout_get_sizes(struct pvr_device *pvr_dev) +{ + const struct pvr_fw_layout_entry *layout_entries = pvr_dev->fw_dev.layout_entries; + u32 num_layout_entries = pvr_dev->fw_dev.header->layout_entry_num; + struct pvr_fw_mem *fw_mem = &pvr_dev->fw_dev.mem; + + fw_mem->code_alloc_size = 0; + fw_mem->data_alloc_size = 0; + fw_mem->core_code_alloc_size = 0; + fw_mem->core_data_alloc_size = 0; + + /* Extract section sizes from FW layout table. */ + for (u32 entry = 0; entry < num_layout_entries; entry++) { + switch (layout_entries[entry].type) { + case FW_CODE: + fw_mem->code_alloc_size += layout_entries[entry].alloc_size; + break; + case FW_DATA: + fw_mem->data_alloc_size += layout_entries[entry].alloc_size; + break; + case FW_COREMEM_CODE: + fw_mem->core_code_alloc_size += + layout_entries[entry].alloc_size; + break; + case FW_COREMEM_DATA: + fw_mem->core_data_alloc_size += + layout_entries[entry].alloc_size; + break; + case NONE: + break; + } + } +} + +int +pvr_fw_find_mmu_segment(struct pvr_device *pvr_dev, u32 addr, u32 size, void *fw_code_ptr, + void *fw_data_ptr, void *fw_core_code_ptr, void *fw_core_data_ptr, + void **host_addr_out) +{ + const struct pvr_fw_layout_entry *layout_entries = pvr_dev->fw_dev.layout_entries; + u32 num_layout_entries = pvr_dev->fw_dev.header->layout_entry_num; + u32 end_addr = addr + size; + int entry = 0; + + /* Ensure requested range is not zero, and size is not causing addr to overflow. */ + if (end_addr <= addr) + return -EINVAL; + + for (entry = 0; entry < num_layout_entries; entry++) { + u32 entry_start_addr = layout_entries[entry].base_addr; + u32 entry_end_addr = entry_start_addr + layout_entries[entry].alloc_size; + + if (addr >= entry_start_addr && addr < entry_end_addr && + end_addr > entry_start_addr && end_addr <= entry_end_addr) { + switch (layout_entries[entry].type) { + case FW_CODE: + *host_addr_out = fw_code_ptr; + break; + + case FW_DATA: + *host_addr_out = fw_data_ptr; + break; + + case FW_COREMEM_CODE: + *host_addr_out = fw_core_code_ptr; + break; + + case FW_COREMEM_DATA: + *host_addr_out = fw_core_data_ptr; + break; + + default: + return -EINVAL; + } + /* Direct Mem write to mapped memory */ + addr -= layout_entries[entry].base_addr; + addr += layout_entries[entry].alloc_offset; + + /* + * Add offset to pointer to FW allocation only if that + * allocation is available + */ + *(u8 **)host_addr_out += addr; + return 0; + } + } + + return -EINVAL; +} + +static int +pvr_fw_create_fwif_connection_ctl(struct pvr_device *pvr_dev) +{ + struct drm_device *drm_dev = from_pvr_device(pvr_dev); + struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev; + + fw_dev->fwif_connection_ctl = + pvr_fw_object_create_and_map_offset(pvr_dev, + fw_dev->fw_heap_info.config_offset + + PVR_ROGUE_FWIF_CONNECTION_CTL_OFFSET, + sizeof(*fw_dev->fwif_connection_ctl), + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + NULL, NULL, + &fw_dev->mem.fwif_connection_ctl_obj); + if (IS_ERR(fw_dev->fwif_connection_ctl)) { + drm_err(drm_dev, + "Unable to allocate FWIF connection control memory\n"); + return PTR_ERR(fw_dev->fwif_connection_ctl); + } + + return 0; +} + +static void +pvr_fw_fini_fwif_connection_ctl(struct pvr_device *pvr_dev) +{ + struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev; + + pvr_fw_object_unmap_and_destroy(fw_dev->mem.fwif_connection_ctl_obj); +} + +static void +fw_osinit_init(void *cpu_ptr, void *priv) +{ + struct rogue_fwif_osinit *fwif_osinit = cpu_ptr; + struct pvr_device *pvr_dev = priv; + struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev; + struct pvr_fw_mem *fw_mem = &fw_dev->mem; + + fwif_osinit->kernel_ccbctl_fw_addr = pvr_dev->kccb.ccb.ctrl_fw_addr; + fwif_osinit->kernel_ccb_fw_addr = pvr_dev->kccb.ccb.ccb_fw_addr; + pvr_fw_object_get_fw_addr(pvr_dev->kccb.rtn_obj, + &fwif_osinit->kernel_ccb_rtn_slots_fw_addr); + + fwif_osinit->firmware_ccbctl_fw_addr = pvr_dev->fwccb.ctrl_fw_addr; + fwif_osinit->firmware_ccb_fw_addr = pvr_dev->fwccb.ccb_fw_addr; + + fwif_osinit->work_est_firmware_ccbctl_fw_addr = 0; + fwif_osinit->work_est_firmware_ccb_fw_addr = 0; + + pvr_fw_object_get_fw_addr(fw_mem->hwrinfobuf_obj, + &fwif_osinit->rogue_fwif_hwr_info_buf_ctl_fw_addr); + pvr_fw_object_get_fw_addr(fw_mem->osdata_obj, &fwif_osinit->fw_os_data_fw_addr); + + fwif_osinit->hwr_debug_dump_limit = 0; + + rogue_fwif_compchecks_bvnc_init(&fwif_osinit->rogue_comp_checks.hw_bvnc); + rogue_fwif_compchecks_bvnc_init(&fwif_osinit->rogue_comp_checks.fw_bvnc); +} + +static void +fw_osdata_init(void *cpu_ptr, void *priv) +{ + struct rogue_fwif_osdata *fwif_osdata = cpu_ptr; + struct pvr_device *pvr_dev = priv; + struct pvr_fw_mem *fw_mem = &pvr_dev->fw_dev.mem; + + pvr_fw_object_get_fw_addr(fw_mem->power_sync_obj, &fwif_osdata->power_sync_fw_addr); +} + +static void +fw_fault_page_init(void *cpu_ptr, void *priv) +{ + u32 *fault_page = cpu_ptr; + + for (int i = 0; i < PVR_ROGUE_FAULT_PAGE_SIZE / sizeof(*fault_page); i++) + fault_page[i] = 0xdeadbee0; +} + +static void +fw_sysinit_init(void *cpu_ptr, void *priv) +{ + struct rogue_fwif_sysinit *fwif_sysinit = cpu_ptr; + struct pvr_device *pvr_dev = priv; + struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev; + struct pvr_fw_mem *fw_mem = &fw_dev->mem; + dma_addr_t fault_dma_addr = 0; + u32 clock_speed_hz = clk_get_rate(pvr_dev->core_clk); + + WARN_ON(!clock_speed_hz); + + WARN_ON(pvr_fw_object_get_dma_addr(fw_mem->fault_page_obj, 0, &fault_dma_addr)); + fwif_sysinit->fault_phys_addr = (u64)fault_dma_addr; + + fwif_sysinit->pds_exec_base = ROGUE_PDSCODEDATA_HEAP_BASE; + fwif_sysinit->usc_exec_base = ROGUE_USCCODE_HEAP_BASE; + + pvr_fw_object_get_fw_addr(fw_mem->runtime_cfg_obj, &fwif_sysinit->runtime_cfg_fw_addr); + pvr_fw_object_get_fw_addr(fw_dev->fw_trace.tracebuf_ctrl_obj, + &fwif_sysinit->trace_buf_ctl_fw_addr); + pvr_fw_object_get_fw_addr(fw_mem->sysdata_obj, &fwif_sysinit->fw_sys_data_fw_addr); + pvr_fw_object_get_fw_addr(fw_mem->gpu_util_fwcb_obj, + &fwif_sysinit->gpu_util_fw_cb_ctl_fw_addr); + if (fw_mem->core_data_obj) { + pvr_fw_object_get_fw_addr(fw_mem->core_data_obj, + &fwif_sysinit->coremem_data_store.fw_addr); + } + + /* Currently unsupported. */ + fwif_sysinit->counter_dump_ctl.buffer_fw_addr = 0; + fwif_sysinit->counter_dump_ctl.size_in_dwords = 0; + + /* Skip alignment checks. */ + fwif_sysinit->align_checks = 0; + + fwif_sysinit->filter_flags = 0; + fwif_sysinit->hw_perf_filter = 0; + fwif_sysinit->firmware_perf = FW_PERF_CONF_NONE; + fwif_sysinit->initial_core_clock_speed = clock_speed_hz; + fwif_sysinit->active_pm_latency_ms = 0; + fwif_sysinit->gpio_validation_mode = ROGUE_FWIF_GPIO_VAL_OFF; + fwif_sysinit->firmware_started = false; + fwif_sysinit->marker_val = 1; + + memset(&fwif_sysinit->bvnc_km_feature_flags, 0, + sizeof(fwif_sysinit->bvnc_km_feature_flags)); +} + +#define ROGUE_FWIF_SLC_MIN_SIZE_FOR_DM_OVERLAP_KB 4 + +static void +fw_sysdata_init(void *cpu_ptr, void *priv) +{ + struct rogue_fwif_sysdata *fwif_sysdata = cpu_ptr; + struct pvr_device *pvr_dev = priv; + u32 slc_size_in_kilobytes = 0; + u32 config_flags = 0; + + WARN_ON(PVR_FEATURE_VALUE(pvr_dev, slc_size_in_kilobytes, &slc_size_in_kilobytes)); + + if (slc_size_in_kilobytes < ROGUE_FWIF_SLC_MIN_SIZE_FOR_DM_OVERLAP_KB) + config_flags |= ROGUE_FWIF_INICFG_DISABLE_DM_OVERLAP; + + fwif_sysdata->config_flags = config_flags; +} + +static void +fw_runtime_cfg_init(void *cpu_ptr, void *priv) +{ + struct rogue_fwif_runtime_cfg *runtime_cfg = cpu_ptr; + struct pvr_device *pvr_dev = priv; + u32 clock_speed_hz = clk_get_rate(pvr_dev->core_clk); + + WARN_ON(!clock_speed_hz); + + runtime_cfg->core_clock_speed = clock_speed_hz; + runtime_cfg->active_pm_latency_ms = 0; + runtime_cfg->active_pm_latency_persistant = true; + WARN_ON(PVR_FEATURE_VALUE(pvr_dev, num_clusters, + &runtime_cfg->default_dusts_num_init) != 0); +} + +static void +fw_gpu_util_fwcb_init(void *cpu_ptr, void *priv) +{ + struct rogue_fwif_gpu_util_fwcb *gpu_util_fwcb = cpu_ptr; + + gpu_util_fwcb->last_word = PVR_FWIF_GPU_UTIL_STATE_IDLE; +} + +static int +pvr_fw_create_structures(struct pvr_device *pvr_dev) +{ + struct drm_device *drm_dev = from_pvr_device(pvr_dev); + struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev; + struct pvr_fw_mem *fw_mem = &fw_dev->mem; + int err; + + fw_dev->power_sync = pvr_fw_object_create_and_map(pvr_dev, sizeof(*fw_dev->power_sync), + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + NULL, NULL, &fw_mem->power_sync_obj); + if (IS_ERR(fw_dev->power_sync)) { + drm_err(drm_dev, "Unable to allocate FW power_sync structure\n"); + return PTR_ERR(fw_dev->power_sync); + } + + fw_dev->hwrinfobuf = pvr_fw_object_create_and_map(pvr_dev, sizeof(*fw_dev->hwrinfobuf), + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + NULL, NULL, &fw_mem->hwrinfobuf_obj); + if (IS_ERR(fw_dev->hwrinfobuf)) { + drm_err(drm_dev, + "Unable to allocate FW hwrinfobuf structure\n"); + err = PTR_ERR(fw_dev->hwrinfobuf); + goto err_release_power_sync; + } + + err = pvr_fw_object_create(pvr_dev, PVR_SYNC_OBJ_SIZE, + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + NULL, NULL, &fw_mem->mmucache_sync_obj); + if (err) { + drm_err(drm_dev, + "Unable to allocate MMU cache sync object\n"); + goto err_release_hwrinfobuf; + } + + fw_dev->fwif_sysdata = pvr_fw_object_create_and_map(pvr_dev, + sizeof(*fw_dev->fwif_sysdata), + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + fw_sysdata_init, pvr_dev, + &fw_mem->sysdata_obj); + if (IS_ERR(fw_dev->fwif_sysdata)) { + drm_err(drm_dev, "Unable to allocate FW SYSDATA structure\n"); + err = PTR_ERR(fw_dev->fwif_sysdata); + goto err_release_mmucache_sync_obj; + } + + err = pvr_fw_object_create(pvr_dev, PVR_ROGUE_FAULT_PAGE_SIZE, + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + fw_fault_page_init, NULL, &fw_mem->fault_page_obj); + if (err) { + drm_err(drm_dev, "Unable to allocate FW fault page\n"); + goto err_release_sysdata; + } + + err = pvr_fw_object_create(pvr_dev, sizeof(struct rogue_fwif_gpu_util_fwcb), + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + fw_gpu_util_fwcb_init, pvr_dev, &fw_mem->gpu_util_fwcb_obj); + if (err) { + drm_err(drm_dev, "Unable to allocate GPU util FWCB\n"); + goto err_release_fault_page; + } + + err = pvr_fw_object_create(pvr_dev, sizeof(struct rogue_fwif_runtime_cfg), + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + fw_runtime_cfg_init, pvr_dev, &fw_mem->runtime_cfg_obj); + if (err) { + drm_err(drm_dev, "Unable to allocate FW runtime config\n"); + goto err_release_gpu_util_fwcb; + } + + err = pvr_fw_trace_init(pvr_dev); + if (err) + goto err_release_runtime_cfg; + + fw_dev->fwif_osdata = pvr_fw_object_create_and_map(pvr_dev, + sizeof(*fw_dev->fwif_osdata), + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + fw_osdata_init, pvr_dev, + &fw_mem->osdata_obj); + if (IS_ERR(fw_dev->fwif_osdata)) { + drm_err(drm_dev, "Unable to allocate FW OSDATA structure\n"); + err = PTR_ERR(fw_dev->fwif_osdata); + goto err_fw_trace_fini; + } + + fw_dev->fwif_osinit = + pvr_fw_object_create_and_map_offset(pvr_dev, + fw_dev->fw_heap_info.config_offset + + PVR_ROGUE_FWIF_OSINIT_OFFSET, + sizeof(*fw_dev->fwif_osinit), + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + fw_osinit_init, pvr_dev, &fw_mem->osinit_obj); + if (IS_ERR(fw_dev->fwif_osinit)) { + drm_err(drm_dev, "Unable to allocate FW OSINIT structure\n"); + err = PTR_ERR(fw_dev->fwif_osinit); + goto err_release_osdata; + } + + fw_dev->fwif_sysinit = + pvr_fw_object_create_and_map_offset(pvr_dev, + fw_dev->fw_heap_info.config_offset + + PVR_ROGUE_FWIF_SYSINIT_OFFSET, + sizeof(*fw_dev->fwif_sysinit), + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + fw_sysinit_init, pvr_dev, &fw_mem->sysinit_obj); + if (IS_ERR(fw_dev->fwif_sysinit)) { + drm_err(drm_dev, "Unable to allocate FW SYSINIT structure\n"); + err = PTR_ERR(fw_dev->fwif_sysinit); + goto err_release_osinit; + } + + return 0; + +err_release_osinit: + pvr_fw_object_unmap_and_destroy(fw_mem->osinit_obj); + +err_release_osdata: + pvr_fw_object_unmap_and_destroy(fw_mem->osdata_obj); + +err_fw_trace_fini: + pvr_fw_trace_fini(pvr_dev); + +err_release_runtime_cfg: + pvr_fw_object_destroy(fw_mem->runtime_cfg_obj); + +err_release_gpu_util_fwcb: + pvr_fw_object_destroy(fw_mem->gpu_util_fwcb_obj); + +err_release_fault_page: + pvr_fw_object_destroy(fw_mem->fault_page_obj); + +err_release_sysdata: + pvr_fw_object_unmap_and_destroy(fw_mem->sysdata_obj); + +err_release_mmucache_sync_obj: + pvr_fw_object_destroy(fw_mem->mmucache_sync_obj); + +err_release_hwrinfobuf: + pvr_fw_object_unmap_and_destroy(fw_mem->hwrinfobuf_obj); + +err_release_power_sync: + pvr_fw_object_unmap_and_destroy(fw_mem->power_sync_obj); + + return err; +} + +static void +pvr_fw_destroy_structures(struct pvr_device *pvr_dev) +{ + struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev; + struct pvr_fw_mem *fw_mem = &fw_dev->mem; + + pvr_fw_trace_fini(pvr_dev); + pvr_fw_object_destroy(fw_mem->runtime_cfg_obj); + pvr_fw_object_destroy(fw_mem->gpu_util_fwcb_obj); + pvr_fw_object_destroy(fw_mem->fault_page_obj); + pvr_fw_object_unmap_and_destroy(fw_mem->sysdata_obj); + pvr_fw_object_unmap_and_destroy(fw_mem->sysinit_obj); + + pvr_fw_object_destroy(fw_mem->mmucache_sync_obj); + pvr_fw_object_unmap_and_destroy(fw_mem->hwrinfobuf_obj); + pvr_fw_object_unmap_and_destroy(fw_mem->power_sync_obj); + pvr_fw_object_unmap_and_destroy(fw_mem->osdata_obj); + pvr_fw_object_unmap_and_destroy(fw_mem->osinit_obj); +} + +/** + * pvr_fw_process() - Process firmware image, allocate FW memory and create boot + * arguments + * @pvr_dev: Device pointer. + * + * Returns: + * * 0 on success, or + * * Any error returned by pvr_fw_object_create_and_map_offset(), or + * * Any error returned by pvr_fw_object_create_and_map(). + */ +static int +pvr_fw_process(struct pvr_device *pvr_dev) +{ + struct drm_device *drm_dev = from_pvr_device(pvr_dev); + struct pvr_fw_mem *fw_mem = &pvr_dev->fw_dev.mem; + const u8 *fw = pvr_dev->fw_dev.firmware->data; + const struct pvr_fw_layout_entry *private_data; + u8 *fw_code_ptr; + u8 *fw_data_ptr; + u8 *fw_core_code_ptr; + u8 *fw_core_data_ptr; + int err; + + layout_get_sizes(pvr_dev); + + private_data = pvr_fw_find_private_data(pvr_dev); + if (!private_data) + return -EINVAL; + + /* Allocate and map memory for firmware sections. */ + + /* + * Code allocation must be at the start of the firmware heap, otherwise + * firmware processor will be unable to boot. + * + * This has the useful side-effect that for every other object in the + * driver, a firmware address of 0 is invalid. + */ + fw_code_ptr = pvr_fw_object_create_and_map_offset(pvr_dev, 0, fw_mem->code_alloc_size, + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + NULL, NULL, &fw_mem->code_obj); + if (IS_ERR(fw_code_ptr)) { + drm_err(drm_dev, "Unable to allocate FW code memory\n"); + return PTR_ERR(fw_code_ptr); + } + + if (pvr_dev->fw_dev.defs->has_fixed_data_addr()) { + u32 base_addr = private_data->base_addr & pvr_dev->fw_dev.fw_heap_info.offset_mask; + + fw_data_ptr = + pvr_fw_object_create_and_map_offset(pvr_dev, base_addr, + fw_mem->data_alloc_size, + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + NULL, NULL, &fw_mem->data_obj); + } else { + fw_data_ptr = pvr_fw_object_create_and_map(pvr_dev, fw_mem->data_alloc_size, + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + NULL, NULL, &fw_mem->data_obj); + } + if (IS_ERR(fw_data_ptr)) { + drm_err(drm_dev, "Unable to allocate FW data memory\n"); + err = PTR_ERR(fw_data_ptr); + goto err_free_fw_code_obj; + } + + /* Core code and data sections are optional. */ + if (fw_mem->core_code_alloc_size) { + fw_core_code_ptr = + pvr_fw_object_create_and_map(pvr_dev, fw_mem->core_code_alloc_size, + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + NULL, NULL, &fw_mem->core_code_obj); + if (IS_ERR(fw_core_code_ptr)) { + drm_err(drm_dev, + "Unable to allocate FW core code memory\n"); + err = PTR_ERR(fw_core_code_ptr); + goto err_free_fw_data_obj; + } + } else { + fw_core_code_ptr = NULL; + } + + if (fw_mem->core_data_alloc_size) { + fw_core_data_ptr = + pvr_fw_object_create_and_map(pvr_dev, fw_mem->core_data_alloc_size, + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + NULL, NULL, &fw_mem->core_data_obj); + if (IS_ERR(fw_core_data_ptr)) { + drm_err(drm_dev, + "Unable to allocate FW core data memory\n"); + err = PTR_ERR(fw_core_data_ptr); + goto err_free_fw_core_code_obj; + } + } else { + fw_core_data_ptr = NULL; + } + + fw_mem->code = kzalloc(fw_mem->code_alloc_size, GFP_KERNEL); + fw_mem->data = kzalloc(fw_mem->data_alloc_size, GFP_KERNEL); + if (fw_mem->core_code_alloc_size) + fw_mem->core_code = kzalloc(fw_mem->core_code_alloc_size, GFP_KERNEL); + if (fw_mem->core_data_alloc_size) + fw_mem->core_data = kzalloc(fw_mem->core_data_alloc_size, GFP_KERNEL); + + if (!fw_mem->code || !fw_mem->data || + (!fw_mem->core_code && fw_mem->core_code_alloc_size) || + (!fw_mem->core_data && fw_mem->core_data_alloc_size)) { + err = -ENOMEM; + goto err_free_kdata; + } + + err = pvr_dev->fw_dev.defs->fw_process(pvr_dev, fw, + fw_mem->code, fw_mem->data, fw_mem->core_code, + fw_mem->core_data, fw_mem->core_code_alloc_size); + + if (err) + goto err_free_fw_core_data_obj; + + memcpy(fw_code_ptr, fw_mem->code, fw_mem->code_alloc_size); + memcpy(fw_data_ptr, fw_mem->data, fw_mem->data_alloc_size); + if (fw_mem->core_code) + memcpy(fw_core_code_ptr, fw_mem->core_code, fw_mem->core_code_alloc_size); + if (fw_mem->core_data) + memcpy(fw_core_data_ptr, fw_mem->core_data, fw_mem->core_data_alloc_size); + + /* We're finished with the firmware section memory on the CPU, unmap. */ + if (fw_core_data_ptr) + pvr_fw_object_vunmap(fw_mem->core_data_obj); + if (fw_core_code_ptr) + pvr_fw_object_vunmap(fw_mem->core_code_obj); + pvr_fw_object_vunmap(fw_mem->data_obj); + fw_data_ptr = NULL; + pvr_fw_object_vunmap(fw_mem->code_obj); + fw_code_ptr = NULL; + + err = pvr_fw_create_fwif_connection_ctl(pvr_dev); + if (err) + goto err_free_fw_core_data_obj; + + return 0; + +err_free_kdata: + kfree(fw_mem->core_data); + kfree(fw_mem->core_code); + kfree(fw_mem->data); + kfree(fw_mem->code); + +err_free_fw_core_data_obj: + if (fw_core_data_ptr) + pvr_fw_object_unmap_and_destroy(fw_mem->core_data_obj); + +err_free_fw_core_code_obj: + if (fw_core_code_ptr) + pvr_fw_object_unmap_and_destroy(fw_mem->core_code_obj); + +err_free_fw_data_obj: + if (fw_data_ptr) + pvr_fw_object_vunmap(fw_mem->data_obj); + pvr_fw_object_destroy(fw_mem->data_obj); + +err_free_fw_code_obj: + if (fw_code_ptr) + pvr_fw_object_vunmap(fw_mem->code_obj); + pvr_fw_object_destroy(fw_mem->code_obj); + + return err; +} + +static int +pvr_copy_to_fw(struct pvr_fw_object *dest_obj, u8 *src_ptr, u32 size) +{ + u8 *dest_ptr = pvr_fw_object_vmap(dest_obj); + + if (IS_ERR(dest_ptr)) + return PTR_ERR(dest_ptr); + + memcpy(dest_ptr, src_ptr, size); + + pvr_fw_object_vunmap(dest_obj); + + return 0; +} + +static int +pvr_fw_reinit_code_data(struct pvr_device *pvr_dev) +{ + struct pvr_fw_mem *fw_mem = &pvr_dev->fw_dev.mem; + int err; + + err = pvr_copy_to_fw(fw_mem->code_obj, fw_mem->code, fw_mem->code_alloc_size); + if (err) + return err; + + err = pvr_copy_to_fw(fw_mem->data_obj, fw_mem->data, fw_mem->data_alloc_size); + if (err) + return err; + + if (fw_mem->core_code) { + err = pvr_copy_to_fw(fw_mem->core_code_obj, fw_mem->core_code, + fw_mem->core_code_alloc_size); + if (err) + return err; + } + + if (fw_mem->core_data) { + err = pvr_copy_to_fw(fw_mem->core_data_obj, fw_mem->core_data, + fw_mem->core_data_alloc_size); + if (err) + return err; + } + + return 0; +} + +static void +pvr_fw_cleanup(struct pvr_device *pvr_dev) +{ + struct pvr_fw_mem *fw_mem = &pvr_dev->fw_dev.mem; + + pvr_fw_fini_fwif_connection_ctl(pvr_dev); + if (fw_mem->core_code_obj) + pvr_fw_object_destroy(fw_mem->core_code_obj); + if (fw_mem->core_data_obj) + pvr_fw_object_destroy(fw_mem->core_data_obj); + pvr_fw_object_destroy(fw_mem->code_obj); + pvr_fw_object_destroy(fw_mem->data_obj); +} + +/** + * pvr_wait_for_fw_boot() - Wait for firmware to finish booting + * @pvr_dev: Target PowerVR device. + * + * Returns: + * * 0 on success, or + * * -%ETIMEDOUT if firmware fails to boot within timeout. + */ +int +pvr_wait_for_fw_boot(struct pvr_device *pvr_dev) +{ + ktime_t deadline = ktime_add_us(ktime_get(), FW_BOOT_TIMEOUT_USEC); + struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev; + + while (ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0) { + if (READ_ONCE(fw_dev->fwif_sysinit->firmware_started)) + return 0; + } + + return -ETIMEDOUT; +} + +/* + * pvr_fw_heap_info_init() - Calculate size and masks for FW heap + * @pvr_dev: Target PowerVR device. + * @log2_size: Log2 of raw heap size. + * @reserved_size: Size of reserved area of heap, in bytes. May be zero. + */ +void +pvr_fw_heap_info_init(struct pvr_device *pvr_dev, u32 log2_size, u32 reserved_size) +{ + struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev; + + fw_dev->fw_heap_info.gpu_addr = PVR_ROGUE_FW_MAIN_HEAP_BASE; + fw_dev->fw_heap_info.log2_size = log2_size; + fw_dev->fw_heap_info.reserved_size = reserved_size; + fw_dev->fw_heap_info.raw_size = 1 << fw_dev->fw_heap_info.log2_size; + fw_dev->fw_heap_info.offset_mask = fw_dev->fw_heap_info.raw_size - 1; + fw_dev->fw_heap_info.config_offset = fw_dev->fw_heap_info.raw_size - + PVR_ROGUE_FW_CONFIG_HEAP_SIZE; + fw_dev->fw_heap_info.size = fw_dev->fw_heap_info.raw_size - + (PVR_ROGUE_FW_CONFIG_HEAP_SIZE + reserved_size); +} + +/** + * pvr_fw_validate_init_device_info() - Validate firmware and initialise device information + * @pvr_dev: Target PowerVR device. + * + * This function must be called before querying device information. + * + * Returns: + * * 0 on success, or + * * -%EINVAL if firmware validation fails. + */ +int +pvr_fw_validate_init_device_info(struct pvr_device *pvr_dev) +{ + int err; + + err = pvr_fw_validate(pvr_dev); + if (err) + return err; + + return pvr_fw_get_device_info(pvr_dev); +} + +/** + * pvr_fw_init() - Initialise and boot firmware + * @pvr_dev: Target PowerVR device + * + * On successful completion of the function the PowerVR device will be + * initialised and ready to use. + * + * Returns: + * * 0 on success, + * * -%EINVAL on invalid firmware image, + * * -%ENOMEM on out of memory, or + * * -%ETIMEDOUT if firmware processor fails to boot or on register poll timeout. + */ +int +pvr_fw_init(struct pvr_device *pvr_dev) +{ + u32 kccb_size_log2 = ROGUE_FWIF_KCCB_NUMCMDS_LOG2_DEFAULT; + u32 kccb_rtn_size = (1 << kccb_size_log2) * sizeof(*pvr_dev->kccb.rtn); + struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev; + int err; + + if (fw_dev->processor_type == PVR_FW_PROCESSOR_TYPE_META) + fw_dev->defs = &pvr_fw_defs_meta; + else if (fw_dev->processor_type == PVR_FW_PROCESSOR_TYPE_MIPS) + fw_dev->defs = &pvr_fw_defs_mips; + else + return -EINVAL; + + err = fw_dev->defs->init(pvr_dev); + if (err) + return err; + + drm_mm_init(&fw_dev->fw_mm, ROGUE_FW_HEAP_BASE, fw_dev->fw_heap_info.raw_size); + fw_dev->fw_mm_base = ROGUE_FW_HEAP_BASE; + spin_lock_init(&fw_dev->fw_mm_lock); + + INIT_LIST_HEAD(&fw_dev->fw_objs.list); + err = drmm_mutex_init(from_pvr_device(pvr_dev), &fw_dev->fw_objs.lock); + if (err) + goto err_mm_takedown; + + err = pvr_fw_process(pvr_dev); + if (err) + goto err_mm_takedown; + + /* Initialise KCCB and FWCCB. */ + err = pvr_kccb_init(pvr_dev); + if (err) + goto err_fw_cleanup; + + err = pvr_fwccb_init(pvr_dev); + if (err) + goto err_kccb_fini; + + /* Allocate memory for KCCB return slots. */ + pvr_dev->kccb.rtn = pvr_fw_object_create_and_map(pvr_dev, kccb_rtn_size, + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + NULL, NULL, &pvr_dev->kccb.rtn_obj); + if (IS_ERR(pvr_dev->kccb.rtn)) { + err = PTR_ERR(pvr_dev->kccb.rtn); + goto err_fwccb_fini; + } + + err = pvr_fw_create_structures(pvr_dev); + if (err) + goto err_kccb_rtn_release; + + err = pvr_fw_start(pvr_dev); + if (err) + goto err_destroy_structures; + + err = pvr_wait_for_fw_boot(pvr_dev); + if (err) { + drm_err(from_pvr_device(pvr_dev), "Firmware failed to boot\n"); + goto err_fw_stop; + } + + fw_dev->booted = true; + + return 0; + +err_fw_stop: + pvr_fw_stop(pvr_dev); + +err_destroy_structures: + pvr_fw_destroy_structures(pvr_dev); + +err_kccb_rtn_release: + pvr_fw_object_unmap_and_destroy(pvr_dev->kccb.rtn_obj); + +err_fwccb_fini: + pvr_ccb_fini(&pvr_dev->fwccb); + +err_kccb_fini: + pvr_kccb_fini(pvr_dev); + +err_fw_cleanup: + pvr_fw_cleanup(pvr_dev); + +err_mm_takedown: + drm_mm_takedown(&fw_dev->fw_mm); + + if (fw_dev->defs->fini) + fw_dev->defs->fini(pvr_dev); + + return err; +} + +/** + * pvr_fw_fini() - Shutdown firmware processor and free associated memory + * @pvr_dev: Target PowerVR device + */ +void +pvr_fw_fini(struct pvr_device *pvr_dev) +{ + struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev; + + fw_dev->booted = false; + + pvr_fw_destroy_structures(pvr_dev); + pvr_fw_object_unmap_and_destroy(pvr_dev->kccb.rtn_obj); + + /* + * Ensure FWCCB worker has finished executing before destroying FWCCB. The IRQ handler has + * been unregistered at this point so no new work should be being submitted. + */ + pvr_ccb_fini(&pvr_dev->fwccb); + pvr_kccb_fini(pvr_dev); + pvr_fw_cleanup(pvr_dev); + + mutex_lock(&pvr_dev->fw_dev.fw_objs.lock); + WARN_ON(!list_empty(&pvr_dev->fw_dev.fw_objs.list)); + mutex_unlock(&pvr_dev->fw_dev.fw_objs.lock); + + drm_mm_takedown(&fw_dev->fw_mm); + + if (fw_dev->defs->fini) + fw_dev->defs->fini(pvr_dev); +} + +/** + * pvr_fw_mts_schedule() - Schedule work via an MTS kick + * @pvr_dev: Target PowerVR device + * @val: Kick mask. Should be a combination of %ROGUE_CR_MTS_SCHEDULE_* + */ +void +pvr_fw_mts_schedule(struct pvr_device *pvr_dev, u32 val) +{ + /* Ensure memory is flushed before kicking MTS. */ + wmb(); + + pvr_cr_write32(pvr_dev, ROGUE_CR_MTS_SCHEDULE, val); + + /* Ensure the MTS kick goes through before continuing. */ + mb(); +} + +/** + * pvr_fw_structure_cleanup() - Send FW cleanup request for an object + * @pvr_dev: Target PowerVR device. + * @type: Type of object to cleanup. Must be one of &enum rogue_fwif_cleanup_type. + * @fw_obj: Pointer to FW object containing object to cleanup. + * @offset: Offset within FW object of object to cleanup. + * + * Returns: + * * 0 on success, + * * -EBUSY if object is busy, + * * -ETIMEDOUT on timeout, or + * * -EIO if device is lost. + */ +int +pvr_fw_structure_cleanup(struct pvr_device *pvr_dev, u32 type, struct pvr_fw_object *fw_obj, + u32 offset) +{ + struct rogue_fwif_kccb_cmd cmd; + int slot_nr; + int idx; + int err; + u32 rtn; + + struct rogue_fwif_cleanup_request *cleanup_req = &cmd.cmd_data.cleanup_data; + + down_read(&pvr_dev->reset_sem); + + if (!drm_dev_enter(from_pvr_device(pvr_dev), &idx)) { + err = -EIO; + goto err_up_read; + } + + cmd.cmd_type = ROGUE_FWIF_KCCB_CMD_CLEANUP; + cmd.kccb_flags = 0; + cleanup_req->cleanup_type = type; + + switch (type) { + case ROGUE_FWIF_CLEANUP_FWCOMMONCONTEXT: + pvr_fw_object_get_fw_addr_offset(fw_obj, offset, + &cleanup_req->cleanup_data.context_fw_addr); + break; + case ROGUE_FWIF_CLEANUP_HWRTDATA: + pvr_fw_object_get_fw_addr_offset(fw_obj, offset, + &cleanup_req->cleanup_data.hwrt_data_fw_addr); + break; + case ROGUE_FWIF_CLEANUP_FREELIST: + pvr_fw_object_get_fw_addr_offset(fw_obj, offset, + &cleanup_req->cleanup_data.freelist_fw_addr); + break; + default: + err = -EINVAL; + goto err_drm_dev_exit; + } + + err = pvr_kccb_send_cmd(pvr_dev, &cmd, &slot_nr); + if (err) + goto err_drm_dev_exit; + + err = pvr_kccb_wait_for_completion(pvr_dev, slot_nr, HZ, &rtn); + if (err) + goto err_drm_dev_exit; + + if (rtn & ROGUE_FWIF_KCCB_RTN_SLOT_CLEANUP_BUSY) + err = -EBUSY; + +err_drm_dev_exit: + drm_dev_exit(idx); + +err_up_read: + up_read(&pvr_dev->reset_sem); + + return err; +} + +/** + * pvr_fw_object_fw_map() - Map a FW object in firmware address space + * @pvr_dev: Device pointer. + * @fw_obj: FW object to map. + * @dev_addr: Desired address in device space, if a specific address is + * required. 0 otherwise. + * + * Returns: + * * 0 on success, or + * * -%EINVAL if @fw_obj is already mapped but has no references, or + * * Any error returned by DRM. + */ +static int +pvr_fw_object_fw_map(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj, u64 dev_addr) +{ + struct pvr_gem_object *pvr_obj = fw_obj->gem; + struct drm_gem_object *gem_obj = gem_from_pvr_gem(pvr_obj); + struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev; + + int err; + + spin_lock(&fw_dev->fw_mm_lock); + + if (drm_mm_node_allocated(&fw_obj->fw_mm_node)) { + err = -EINVAL; + goto err_unlock; + } + + if (!dev_addr) { + /* + * Allocate from the main heap only (firmware heap minus + * config space). + */ + err = drm_mm_insert_node_in_range(&fw_dev->fw_mm, &fw_obj->fw_mm_node, + gem_obj->size, 0, 0, + fw_dev->fw_heap_info.gpu_addr, + fw_dev->fw_heap_info.gpu_addr + + fw_dev->fw_heap_info.size, 0); + if (err) + goto err_unlock; + } else { + fw_obj->fw_mm_node.start = dev_addr; + fw_obj->fw_mm_node.size = gem_obj->size; + err = drm_mm_reserve_node(&fw_dev->fw_mm, &fw_obj->fw_mm_node); + if (err) + goto err_unlock; + } + + spin_unlock(&fw_dev->fw_mm_lock); + + /* Map object on GPU. */ + err = fw_dev->defs->vm_map(pvr_dev, fw_obj); + if (err) + goto err_remove_node; + + fw_obj->fw_addr_offset = (u32)(fw_obj->fw_mm_node.start - fw_dev->fw_mm_base); + + return 0; + +err_remove_node: + spin_lock(&fw_dev->fw_mm_lock); + drm_mm_remove_node(&fw_obj->fw_mm_node); + +err_unlock: + spin_unlock(&fw_dev->fw_mm_lock); + + return err; +} + +/** + * pvr_fw_object_fw_unmap() - Unmap a previously mapped FW object + * @fw_obj: FW object to unmap. + * + * Returns: + * * 0 on success, or + * * -%EINVAL if object is not currently mapped. + */ +static int +pvr_fw_object_fw_unmap(struct pvr_fw_object *fw_obj) +{ + struct pvr_gem_object *pvr_obj = fw_obj->gem; + struct drm_gem_object *gem_obj = gem_from_pvr_gem(pvr_obj); + struct pvr_device *pvr_dev = to_pvr_device(gem_obj->dev); + struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev; + + fw_dev->defs->vm_unmap(pvr_dev, fw_obj); + + spin_lock(&fw_dev->fw_mm_lock); + + if (!drm_mm_node_allocated(&fw_obj->fw_mm_node)) { + spin_unlock(&fw_dev->fw_mm_lock); + return -EINVAL; + } + + drm_mm_remove_node(&fw_obj->fw_mm_node); + + spin_unlock(&fw_dev->fw_mm_lock); + + return 0; +} + +static void * +pvr_fw_object_create_and_map_common(struct pvr_device *pvr_dev, size_t size, + u64 flags, u64 dev_addr, + void (*init)(void *cpu_ptr, void *priv), + void *init_priv, struct pvr_fw_object **fw_obj_out) +{ + struct pvr_fw_object *fw_obj; + void *cpu_ptr; + int err; + + /* %DRM_PVR_BO_PM_FW_PROTECT is implicit for FW objects. */ + flags |= DRM_PVR_BO_PM_FW_PROTECT; + + fw_obj = kzalloc(sizeof(*fw_obj), GFP_KERNEL); + if (!fw_obj) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&fw_obj->node); + fw_obj->init = init; + fw_obj->init_priv = init_priv; + + fw_obj->gem = pvr_gem_object_create(pvr_dev, size, flags); + if (IS_ERR(fw_obj->gem)) { + err = PTR_ERR(fw_obj->gem); + fw_obj->gem = NULL; + goto err_put_object; + } + + err = pvr_fw_object_fw_map(pvr_dev, fw_obj, dev_addr); + if (err) + goto err_put_object; + + cpu_ptr = pvr_fw_object_vmap(fw_obj); + if (IS_ERR(cpu_ptr)) { + err = PTR_ERR(cpu_ptr); + goto err_put_object; + } + + *fw_obj_out = fw_obj; + + if (fw_obj->init) + fw_obj->init(cpu_ptr, fw_obj->init_priv); + + mutex_lock(&pvr_dev->fw_dev.fw_objs.lock); + list_add_tail(&fw_obj->node, &pvr_dev->fw_dev.fw_objs.list); + mutex_unlock(&pvr_dev->fw_dev.fw_objs.lock); + + return cpu_ptr; + +err_put_object: + pvr_fw_object_destroy(fw_obj); + + return ERR_PTR(err); +} + +/** + * pvr_fw_object_create() - Create a FW object and map to firmware + * @pvr_dev: PowerVR device pointer. + * @size: Size of object, in bytes. + * @flags: Options which affect both this operation and future mapping + * operations performed on the returned object. Must be a combination of + * DRM_PVR_BO_* and/or PVR_BO_* flags. + * @init: Initialisation callback. + * @init_priv: Private pointer to pass to initialisation callback. + * @fw_obj_out: Pointer to location to store created object pointer. + * + * %DRM_PVR_BO_DEVICE_PM_FW_PROTECT is implied for all FW objects. Consequently, + * this function will fail if @flags has %DRM_PVR_BO_CPU_ALLOW_USERSPACE_ACCESS + * set. + * + * Returns: + * * 0 on success, or + * * Any error returned by pvr_fw_object_create_common(). + */ +int +pvr_fw_object_create(struct pvr_device *pvr_dev, size_t size, u64 flags, + void (*init)(void *cpu_ptr, void *priv), void *init_priv, + struct pvr_fw_object **fw_obj_out) +{ + void *cpu_ptr; + + cpu_ptr = pvr_fw_object_create_and_map_common(pvr_dev, size, flags, 0, init, init_priv, + fw_obj_out); + if (IS_ERR(cpu_ptr)) + return PTR_ERR(cpu_ptr); + + pvr_fw_object_vunmap(*fw_obj_out); + + return 0; +} + +/** + * pvr_fw_object_create_and_map() - Create a FW object and map to firmware and CPU + * @pvr_dev: PowerVR device pointer. + * @size: Size of object, in bytes. + * @flags: Options which affect both this operation and future mapping + * operations performed on the returned object. Must be a combination of + * DRM_PVR_BO_* and/or PVR_BO_* flags. + * @init: Initialisation callback. + * @init_priv: Private pointer to pass to initialisation callback. + * @fw_obj_out: Pointer to location to store created object pointer. + * + * %DRM_PVR_BO_DEVICE_PM_FW_PROTECT is implied for all FW objects. Consequently, + * this function will fail if @flags has %DRM_PVR_BO_CPU_ALLOW_USERSPACE_ACCESS + * set. + * + * Caller is responsible for calling pvr_fw_object_vunmap() to release the CPU + * mapping. + * + * Returns: + * * Pointer to CPU mapping of newly created object, or + * * Any error returned by pvr_fw_object_create(), or + * * Any error returned by pvr_fw_object_vmap(). + */ +void * +pvr_fw_object_create_and_map(struct pvr_device *pvr_dev, size_t size, u64 flags, + void (*init)(void *cpu_ptr, void *priv), + void *init_priv, struct pvr_fw_object **fw_obj_out) +{ + return pvr_fw_object_create_and_map_common(pvr_dev, size, flags, 0, init, init_priv, + fw_obj_out); +} + +/** + * pvr_fw_object_create_and_map_offset() - Create a FW object and map to + * firmware at the provided offset and to the CPU. + * @pvr_dev: PowerVR device pointer. + * @dev_offset: Base address of desired FW mapping, offset from start of FW heap. + * @size: Size of object, in bytes. + * @flags: Options which affect both this operation and future mapping + * operations performed on the returned object. Must be a combination of + * DRM_PVR_BO_* and/or PVR_BO_* flags. + * @init: Initialisation callback. + * @init_priv: Private pointer to pass to initialisation callback. + * @fw_obj_out: Pointer to location to store created object pointer. + * + * %DRM_PVR_BO_DEVICE_PM_FW_PROTECT is implied for all FW objects. Consequently, + * this function will fail if @flags has %DRM_PVR_BO_CPU_ALLOW_USERSPACE_ACCESS + * set. + * + * Caller is responsible for calling pvr_fw_object_vunmap() to release the CPU + * mapping. + * + * Returns: + * * Pointer to CPU mapping of newly created object, or + * * Any error returned by pvr_fw_object_create(), or + * * Any error returned by pvr_fw_object_vmap(). + */ +void * +pvr_fw_object_create_and_map_offset(struct pvr_device *pvr_dev, + u32 dev_offset, size_t size, u64 flags, + void (*init)(void *cpu_ptr, void *priv), + void *init_priv, struct pvr_fw_object **fw_obj_out) +{ + u64 dev_addr = pvr_dev->fw_dev.fw_mm_base + dev_offset; + + return pvr_fw_object_create_and_map_common(pvr_dev, size, flags, dev_addr, init, init_priv, + fw_obj_out); +} + +/** + * pvr_fw_object_destroy() - Destroy a pvr_fw_object + * @fw_obj: Pointer to object to destroy. + */ +void pvr_fw_object_destroy(struct pvr_fw_object *fw_obj) +{ + struct pvr_gem_object *pvr_obj = fw_obj->gem; + struct drm_gem_object *gem_obj = gem_from_pvr_gem(pvr_obj); + struct pvr_device *pvr_dev = to_pvr_device(gem_obj->dev); + + mutex_lock(&pvr_dev->fw_dev.fw_objs.lock); + list_del(&fw_obj->node); + mutex_unlock(&pvr_dev->fw_dev.fw_objs.lock); + + if (drm_mm_node_allocated(&fw_obj->fw_mm_node)) { + /* If we can't unmap, leak the memory. */ + if (WARN_ON(pvr_fw_object_fw_unmap(fw_obj))) + return; + } + + if (fw_obj->gem) + pvr_gem_object_put(fw_obj->gem); + + kfree(fw_obj); +} + +/** + * pvr_fw_object_get_fw_addr_offset() - Return address of object in firmware address space, with + * given offset. + * @fw_obj: Pointer to object. + * @offset: Desired offset from start of object. + * @fw_addr_out: Location to store address to. + */ +void pvr_fw_object_get_fw_addr_offset(struct pvr_fw_object *fw_obj, u32 offset, u32 *fw_addr_out) +{ + struct pvr_gem_object *pvr_obj = fw_obj->gem; + struct pvr_device *pvr_dev = to_pvr_device(gem_from_pvr_gem(pvr_obj)->dev); + + *fw_addr_out = pvr_dev->fw_dev.defs->get_fw_addr_with_offset(fw_obj, offset); +} + +/* + * pvr_fw_hard_reset() - Re-initialise the FW code and data segments, and reset all global FW + * structures + * @pvr_dev: Device pointer + * + * If this function returns an error then the caller must regard the device as lost. + * + * Returns: + * * 0 on success, or + * * Any error returned by pvr_fw_init_dev_structures() or pvr_fw_reset_all(). + */ +int +pvr_fw_hard_reset(struct pvr_device *pvr_dev) +{ + struct list_head *pos; + int err; + + /* Reset all FW objects */ + mutex_lock(&pvr_dev->fw_dev.fw_objs.lock); + + list_for_each(pos, &pvr_dev->fw_dev.fw_objs.list) { + struct pvr_fw_object *fw_obj = container_of(pos, struct pvr_fw_object, node); + void *cpu_ptr = pvr_fw_object_vmap(fw_obj); + + WARN_ON(IS_ERR(cpu_ptr)); + + if (!(fw_obj->gem->flags & PVR_BO_FW_NO_CLEAR_ON_RESET)) { + memset(cpu_ptr, 0, pvr_gem_object_size(fw_obj->gem)); + + if (fw_obj->init) + fw_obj->init(cpu_ptr, fw_obj->init_priv); + } + + pvr_fw_object_vunmap(fw_obj); + } + + mutex_unlock(&pvr_dev->fw_dev.fw_objs.lock); + + err = pvr_fw_reinit_code_data(pvr_dev); + if (err) + return err; + + return 0; +} diff --git a/drivers/gpu/drm/imagination/pvr_fw.h b/drivers/gpu/drm/imagination/pvr_fw.h new file mode 100644 index 000000000000..b7966bd574a9 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_fw.h @@ -0,0 +1,509 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_FW_H +#define PVR_FW_H + +#include "pvr_fw_info.h" +#include "pvr_fw_trace.h" +#include "pvr_gem.h" + +#include <drm/drm_mm.h> + +#include <linux/types.h> + +/* Forward declarations from "pvr_device.h". */ +struct pvr_device; +struct pvr_file; + +/* Forward declaration from "pvr_vm.h". */ +struct pvr_vm_context; + +#define ROGUE_FWIF_FWCCB_NUMCMDS_LOG2 5 + +#define ROGUE_FWIF_KCCB_NUMCMDS_LOG2_DEFAULT 7 + +/** + * struct pvr_fw_object - container for firmware memory allocations + */ +struct pvr_fw_object { + /** @ref_count: FW object reference counter. */ + struct kref ref_count; + + /** @gem: GEM object backing the FW object. */ + struct pvr_gem_object *gem; + + /** + * @fw_mm_node: Node representing mapping in FW address space. @pvr_obj->lock must + * be held when writing. + */ + struct drm_mm_node fw_mm_node; + + /** + * @fw_addr_offset: Virtual address offset of firmware mapping. Only + * valid if @flags has %PVR_GEM_OBJECT_FLAGS_FW_MAPPED + * set. + */ + u32 fw_addr_offset; + + /** + * @init: Initialisation callback. Will be called on object creation and FW hard reset. + * Object will have been zeroed before this is called. + */ + void (*init)(void *cpu_ptr, void *priv); + + /** @init_priv: Private data for initialisation callback. */ + void *init_priv; + + /** @node: Node for firmware object list. */ + struct list_head node; +}; + +/** + * struct pvr_fw_defs - FW processor function table and static definitions + */ +struct pvr_fw_defs { + /** + * @init: + * + * FW processor specific initialisation. + * @pvr_dev: Target PowerVR device. + * + * This function must call pvr_fw_heap_calculate() to initialise the firmware heap for this + * FW processor. + * + * This function is mandatory. + * + * Returns: + * * 0 on success, or + * * Any appropriate error on failure. + */ + int (*init)(struct pvr_device *pvr_dev); + + /** + * @fini: + * + * FW processor specific finalisation. + * @pvr_dev: Target PowerVR device. + * + * This function is optional. + */ + void (*fini)(struct pvr_device *pvr_dev); + + /** + * @fw_process: + * + * Load and process firmware image. + * @pvr_dev: Target PowerVR device. + * @fw: Pointer to firmware image. + * @fw_code_ptr: Pointer to firmware code section. + * @fw_data_ptr: Pointer to firmware data section. + * @fw_core_code_ptr: Pointer to firmware core code section. May be %NULL. + * @fw_core_data_ptr: Pointer to firmware core data section. May be %NULL. + * @core_code_alloc_size: Total allocation size of core code section. + * + * This function is mandatory. + * + * Returns: + * * 0 on success, or + * * Any appropriate error on failure. + */ + int (*fw_process)(struct pvr_device *pvr_dev, const u8 *fw, + u8 *fw_code_ptr, u8 *fw_data_ptr, u8 *fw_core_code_ptr, + u8 *fw_core_data_ptr, u32 core_code_alloc_size); + + /** + * @vm_map: + * + * Map FW object into FW processor address space. + * @pvr_dev: Target PowerVR device. + * @fw_obj: FW object to map. + * + * This function is mandatory. + * + * Returns: + * * 0 on success, or + * * Any appropriate error on failure. + */ + int (*vm_map)(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj); + + /** + * @vm_unmap: + * + * Unmap FW object from FW processor address space. + * @pvr_dev: Target PowerVR device. + * @fw_obj: FW object to map. + * + * This function is mandatory. + */ + void (*vm_unmap)(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj); + + /** + * @get_fw_addr_with_offset: + * + * Called to get address of object in firmware address space, with offset. + * @fw_obj: Pointer to object. + * @offset: Desired offset from start of object. + * + * This function is mandatory. + * + * Returns: + * * Address in firmware address space. + */ + u32 (*get_fw_addr_with_offset)(struct pvr_fw_object *fw_obj, u32 offset); + + /** + * @wrapper_init: + * + * Called to initialise FW wrapper. + * @pvr_dev: Target PowerVR device. + * + * This function is mandatory. + * + * Returns: + * * 0 on success. + * * Any appropriate error on failure. + */ + int (*wrapper_init)(struct pvr_device *pvr_dev); + + /** + * @has_fixed_data_addr: + * + * Called to check if firmware fixed data must be loaded at the address given by the + * firmware layout table. + * + * This function is mandatory. + * + * Returns: + * * %true if firmware fixed data must be loaded at the address given by the firmware + * layout table. + * * %false otherwise. + */ + bool (*has_fixed_data_addr)(void); + + /** + * @irq: FW Interrupt information. + * + * Those are processor dependent, and should be initialized by the + * processor backend in pvr_fw_funcs::init(). + */ + struct { + /** @enable_reg: FW interrupt enable register. */ + u32 enable_reg; + + /** @status_reg: FW interrupt status register. */ + u32 status_reg; + + /** + * @clear_reg: FW interrupt clear register. + * + * If @status_reg == @clear_reg, we clear by write a bit to zero, + * otherwise we clear by writing a bit to one. + */ + u32 clear_reg; + + /** @event_mask: Bitmask of events to listen for. */ + u32 event_mask; + + /** @clear_mask: Value to write to the clear_reg in order to clear FW IRQs. */ + u32 clear_mask; + } irq; +}; + +/** + * struct pvr_fw_mem - FW memory allocations + */ +struct pvr_fw_mem { + /** @code_obj: Object representing firmware code. */ + struct pvr_fw_object *code_obj; + + /** @data_obj: Object representing firmware data. */ + struct pvr_fw_object *data_obj; + + /** + * @core_code_obj: Object representing firmware core code. May be + * %NULL if firmware does not contain this section. + */ + struct pvr_fw_object *core_code_obj; + + /** + * @core_data_obj: Object representing firmware core data. May be + * %NULL if firmware does not contain this section. + */ + struct pvr_fw_object *core_data_obj; + + /** @code: Driver-side copy of firmware code. */ + u8 *code; + + /** @data: Driver-side copy of firmware data. */ + u8 *data; + + /** + * @core_code: Driver-side copy of firmware core code. May be %NULL if firmware does not + * contain this section. + */ + u8 *core_code; + + /** + * @core_data: Driver-side copy of firmware core data. May be %NULL if firmware does not + * contain this section. + */ + u8 *core_data; + + /** @code_alloc_size: Allocation size of firmware code section. */ + u32 code_alloc_size; + + /** @data_alloc_size: Allocation size of firmware data section. */ + u32 data_alloc_size; + + /** @core_code_alloc_size: Allocation size of firmware core code section. */ + u32 core_code_alloc_size; + + /** @core_data_alloc_size: Allocation size of firmware core data section. */ + u32 core_data_alloc_size; + + /** + * @fwif_connection_ctl_obj: Object representing FWIF connection control + * structure. + */ + struct pvr_fw_object *fwif_connection_ctl_obj; + + /** @osinit_obj: Object representing FW OSINIT structure. */ + struct pvr_fw_object *osinit_obj; + + /** @sysinit_obj: Object representing FW SYSINIT structure. */ + struct pvr_fw_object *sysinit_obj; + + /** @osdata_obj: Object representing FW OSDATA structure. */ + struct pvr_fw_object *osdata_obj; + + /** @hwrinfobuf_obj: Object representing FW hwrinfobuf structure. */ + struct pvr_fw_object *hwrinfobuf_obj; + + /** @sysdata_obj: Object representing FW SYSDATA structure. */ + struct pvr_fw_object *sysdata_obj; + + /** @power_sync_obj: Object representing power sync state. */ + struct pvr_fw_object *power_sync_obj; + + /** @fault_page_obj: Object representing FW fault page. */ + struct pvr_fw_object *fault_page_obj; + + /** @gpu_util_fwcb_obj: Object representing FW GPU utilisation control structure. */ + struct pvr_fw_object *gpu_util_fwcb_obj; + + /** @runtime_cfg_obj: Object representing FW runtime config structure. */ + struct pvr_fw_object *runtime_cfg_obj; + + /** @mmucache_sync_obj: Object used as the sync parameter in an MMU cache operation. */ + struct pvr_fw_object *mmucache_sync_obj; +}; + +struct pvr_fw_device { + /** @firmware: Handle to the firmware loaded into the device. */ + const struct firmware *firmware; + + /** @header: Pointer to firmware header. */ + const struct pvr_fw_info_header *header; + + /** @layout_entries: Pointer to firmware layout. */ + const struct pvr_fw_layout_entry *layout_entries; + + /** @mem: Structure containing objects representing firmware memory allocations. */ + struct pvr_fw_mem mem; + + /** @booted: %true if the firmware has been booted, %false otherwise. */ + bool booted; + + /** + * @processor_type: FW processor type for this device. Must be one of + * %PVR_FW_PROCESSOR_TYPE_*. + */ + u16 processor_type; + + /** @funcs: Function table for the FW processor used by this device. */ + const struct pvr_fw_defs *defs; + + /** @processor_data: Pointer to data specific to FW processor. */ + union { + /** @mips_data: Pointer to MIPS-specific data. */ + struct pvr_fw_mips_data *mips_data; + } processor_data; + + /** @fw_heap_info: Firmware heap information. */ + struct { + /** @gpu_addr: Base address of firmware heap in GPU address space. */ + u64 gpu_addr; + + /** @size: Size of main area of heap. */ + u32 size; + + /** @offset_mask: Mask for offsets within FW heap. */ + u32 offset_mask; + + /** @raw_size: Raw size of heap, including reserved areas. */ + u32 raw_size; + + /** @log2_size: Log2 of raw size of heap. */ + u32 log2_size; + + /** @config_offset: Offset of config area within heap. */ + u32 config_offset; + + /** @reserved_size: Size of reserved area in heap. */ + u32 reserved_size; + } fw_heap_info; + + /** @fw_mm: Firmware address space allocator. */ + struct drm_mm fw_mm; + + /** @fw_mm_lock: Lock protecting access to &fw_mm. */ + spinlock_t fw_mm_lock; + + /** @fw_mm_base: Base address of address space managed by @fw_mm. */ + u64 fw_mm_base; + + /** + * @fwif_connection_ctl: Pointer to CPU mapping of FWIF connection + * control structure. + */ + struct rogue_fwif_connection_ctl *fwif_connection_ctl; + + /** @fwif_sysinit: Pointer to CPU mapping of FW SYSINIT structure. */ + struct rogue_fwif_sysinit *fwif_sysinit; + + /** @fwif_sysdata: Pointer to CPU mapping of FW SYSDATA structure. */ + struct rogue_fwif_sysdata *fwif_sysdata; + + /** @fwif_osinit: Pointer to CPU mapping of FW OSINIT structure. */ + struct rogue_fwif_osinit *fwif_osinit; + + /** @fwif_osdata: Pointer to CPU mapping of FW OSDATA structure. */ + struct rogue_fwif_osdata *fwif_osdata; + + /** @power_sync: Pointer to CPU mapping of power sync state. */ + u32 *power_sync; + + /** @hwrinfobuf: Pointer to CPU mapping of FW HWR info buffer. */ + struct rogue_fwif_hwrinfobuf *hwrinfobuf; + + /** @fw_trace: Device firmware trace buffer state. */ + struct pvr_fw_trace fw_trace; + + /** @fw_objs: Structure tracking FW objects. */ + struct { + /** @list: Head of FW object list. */ + struct list_head list; + + /** @lock: Lock protecting access to FW object list. */ + struct mutex lock; + } fw_objs; +}; + +#define pvr_fw_irq_read_reg(pvr_dev, name) \ + pvr_cr_read32((pvr_dev), (pvr_dev)->fw_dev.defs->irq.name ## _reg) + +#define pvr_fw_irq_write_reg(pvr_dev, name, value) \ + pvr_cr_write32((pvr_dev), (pvr_dev)->fw_dev.defs->irq.name ## _reg, value) + +#define pvr_fw_irq_pending(pvr_dev) \ + (pvr_fw_irq_read_reg(pvr_dev, status) & (pvr_dev)->fw_dev.defs->irq.event_mask) + +#define pvr_fw_irq_clear(pvr_dev) \ + pvr_fw_irq_write_reg(pvr_dev, clear, (pvr_dev)->fw_dev.defs->irq.clear_mask) + +#define pvr_fw_irq_enable(pvr_dev) \ + pvr_fw_irq_write_reg(pvr_dev, enable, (pvr_dev)->fw_dev.defs->irq.event_mask) + +#define pvr_fw_irq_disable(pvr_dev) \ + pvr_fw_irq_write_reg(pvr_dev, enable, 0) + +extern const struct pvr_fw_defs pvr_fw_defs_meta; +extern const struct pvr_fw_defs pvr_fw_defs_mips; + +int pvr_fw_validate_init_device_info(struct pvr_device *pvr_dev); +int pvr_fw_init(struct pvr_device *pvr_dev); +void pvr_fw_fini(struct pvr_device *pvr_dev); + +int pvr_wait_for_fw_boot(struct pvr_device *pvr_dev); + +int +pvr_fw_hard_reset(struct pvr_device *pvr_dev); + +void pvr_fw_mts_schedule(struct pvr_device *pvr_dev, u32 val); + +void +pvr_fw_heap_info_init(struct pvr_device *pvr_dev, u32 log2_size, u32 reserved_size); + +const struct pvr_fw_layout_entry * +pvr_fw_find_layout_entry(struct pvr_device *pvr_dev, enum pvr_fw_section_id id); +int +pvr_fw_find_mmu_segment(struct pvr_device *pvr_dev, u32 addr, u32 size, void *fw_code_ptr, + void *fw_data_ptr, void *fw_core_code_ptr, void *fw_core_data_ptr, + void **host_addr_out); + +int +pvr_fw_structure_cleanup(struct pvr_device *pvr_dev, u32 type, struct pvr_fw_object *fw_obj, + u32 offset); + +int pvr_fw_object_create(struct pvr_device *pvr_dev, size_t size, u64 flags, + void (*init)(void *cpu_ptr, void *priv), void *init_priv, + struct pvr_fw_object **pvr_obj_out); + +void *pvr_fw_object_create_and_map(struct pvr_device *pvr_dev, size_t size, u64 flags, + void (*init)(void *cpu_ptr, void *priv), + void *init_priv, struct pvr_fw_object **pvr_obj_out); + +void * +pvr_fw_object_create_and_map_offset(struct pvr_device *pvr_dev, u32 dev_offset, size_t size, + u64 flags, void (*init)(void *cpu_ptr, void *priv), + void *init_priv, struct pvr_fw_object **pvr_obj_out); + +static __always_inline void * +pvr_fw_object_vmap(struct pvr_fw_object *fw_obj) +{ + return pvr_gem_object_vmap(fw_obj->gem); +} + +static __always_inline void +pvr_fw_object_vunmap(struct pvr_fw_object *fw_obj) +{ + pvr_gem_object_vunmap(fw_obj->gem); +} + +void pvr_fw_object_destroy(struct pvr_fw_object *fw_obj); + +static __always_inline void +pvr_fw_object_unmap_and_destroy(struct pvr_fw_object *fw_obj) +{ + pvr_fw_object_vunmap(fw_obj); + pvr_fw_object_destroy(fw_obj); +} + +/** + * pvr_fw_object_get_dma_addr() - Get DMA address for given offset in firmware + * object. + * @fw_obj: Pointer to object to lookup address in. + * @offset: Offset within object to lookup address at. + * @dma_addr_out: Pointer to location to store DMA address. + * + * Returns: + * * 0 on success, or + * * -%EINVAL if object is not currently backed, or if @offset is out of valid + * range for this object. + */ +static __always_inline int +pvr_fw_object_get_dma_addr(struct pvr_fw_object *fw_obj, u32 offset, dma_addr_t *dma_addr_out) +{ + return pvr_gem_get_dma_addr(fw_obj->gem, offset, dma_addr_out); +} + +void pvr_fw_object_get_fw_addr_offset(struct pvr_fw_object *fw_obj, u32 offset, u32 *fw_addr_out); + +static __always_inline void +pvr_fw_object_get_fw_addr(struct pvr_fw_object *fw_obj, u32 *fw_addr_out) +{ + pvr_fw_object_get_fw_addr_offset(fw_obj, 0, fw_addr_out); +} + +#endif /* PVR_FW_H */ diff --git a/drivers/gpu/drm/imagination/pvr_fw_info.h b/drivers/gpu/drm/imagination/pvr_fw_info.h new file mode 100644 index 000000000000..c3639440610e --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_fw_info.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_FW_INFO_H +#define PVR_FW_INFO_H + +#include <linux/bits.h> +#include <linux/sizes.h> +#include <linux/types.h> + +/* + * Firmware binary block unit in bytes. + * Raw data stored in FW binary will be aligned to this size. + */ +#define FW_BLOCK_SIZE SZ_4K + +/* Maximum number of entries in firmware layout table. */ +#define PVR_FW_INFO_MAX_NUM_ENTRIES 8 + +enum pvr_fw_section_id { + META_CODE = 0, + META_PRIVATE_DATA, + META_COREMEM_CODE, + META_COREMEM_DATA, + MIPS_CODE, + MIPS_EXCEPTIONS_CODE, + MIPS_BOOT_CODE, + MIPS_PRIVATE_DATA, + MIPS_BOOT_DATA, + MIPS_STACK, + RISCV_UNCACHED_CODE, + RISCV_CACHED_CODE, + RISCV_PRIVATE_DATA, + RISCV_COREMEM_CODE, + RISCV_COREMEM_DATA, +}; + +enum pvr_fw_section_type { + NONE = 0, + FW_CODE, + FW_DATA, + FW_COREMEM_CODE, + FW_COREMEM_DATA, +}; + +/* + * FW binary format with FW info attached: + * + * Contents Offset + * +-----------------+ + * | | 0 + * | | + * | Original binary | + * | file | + * | (.ldr/.elf) | + * | | + * | | + * +-----------------+ + * | Device info | FILE_SIZE - 4K - device_info_size + * +-----------------+ + * | FW info header | FILE_SIZE - 4K + * +-----------------+ + * | | + * | FW layout table | + * | | + * +-----------------+ + * FILE_SIZE + */ + +#define PVR_FW_INFO_VERSION 3 + +#define PVR_FW_FLAGS_OPEN_SOURCE BIT(0) + +/** struct pvr_fw_info_header - Firmware header */ +struct pvr_fw_info_header { + /** @info_version: FW info header version. */ + u32 info_version; + /** @header_len: Header length. */ + u32 header_len; + /** @layout_entry_num: Number of entries in the layout table. */ + u32 layout_entry_num; + /** @layout_entry_size: Size of an entry in the layout table. */ + u32 layout_entry_size; + /** @bvnc: GPU ID supported by firmware. */ + aligned_u64 bvnc; + /** @fw_page_size: Page size of processor on which firmware executes. */ + u32 fw_page_size; + /** @flags: Compatibility flags. */ + u32 flags; + /** @fw_version_major: Firmware major version number. */ + u16 fw_version_major; + /** @fw_version_minor: Firmware minor version number. */ + u16 fw_version_minor; + /** @fw_version_build: Firmware build number. */ + u32 fw_version_build; + /** @device_info_size: Size of device info structure. */ + u32 device_info_size; + /** @padding: Padding. */ + u32 padding; +}; + +/** + * struct pvr_fw_layout_entry - Entry in firmware layout table, describing a + * section of the firmware image + */ +struct pvr_fw_layout_entry { + /** @id: Section ID. */ + enum pvr_fw_section_id id; + /** @type: Section type. */ + enum pvr_fw_section_type type; + /** @base_addr: Base address of section in FW address space. */ + u32 base_addr; + /** @max_size: Maximum size of section, in bytes. */ + u32 max_size; + /** @alloc_size: Allocation size of section, in bytes. */ + u32 alloc_size; + /** @alloc_offset: Allocation offset of section. */ + u32 alloc_offset; +}; + +/** + * struct pvr_fw_device_info_header - Device information header. + */ +struct pvr_fw_device_info_header { + /** @brn_mask_size: BRN mask size (in u64s). */ + u64 brn_mask_size; + /** @ern_mask_size: ERN mask size (in u64s). */ + u64 ern_mask_size; + /** @feature_mask_size: Feature mask size (in u64s). */ + u64 feature_mask_size; + /** @feature_param_size: Feature parameter size (in u64s). */ + u64 feature_param_size; +}; + +#endif /* PVR_FW_INFO_H */ diff --git a/drivers/gpu/drm/imagination/pvr_fw_meta.c b/drivers/gpu/drm/imagination/pvr_fw_meta.c new file mode 100644 index 000000000000..c39beb70c317 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_fw_meta.c @@ -0,0 +1,555 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_device.h" +#include "pvr_fw.h" +#include "pvr_fw_info.h" +#include "pvr_fw_meta.h" +#include "pvr_gem.h" +#include "pvr_rogue_cr_defs.h" +#include "pvr_rogue_meta.h" +#include "pvr_vm.h" + +#include <linux/compiler.h> +#include <linux/delay.h> +#include <linux/firmware.h> +#include <linux/ktime.h> +#include <linux/types.h> + +#define ROGUE_FW_HEAP_META_SHIFT 25 /* 32 MB */ + +#define POLL_TIMEOUT_USEC 1000000 + +/** + * pvr_meta_cr_read32() - Read a META register via the Slave Port + * @pvr_dev: Device pointer. + * @reg_addr: Address of register to read. + * @reg_value_out: Pointer to location to store register value. + * + * Returns: + * * 0 on success, or + * * Any error returned by pvr_cr_poll_reg32(). + */ +int +pvr_meta_cr_read32(struct pvr_device *pvr_dev, u32 reg_addr, u32 *reg_value_out) +{ + int err; + + /* Wait for Slave Port to be Ready. */ + err = pvr_cr_poll_reg32(pvr_dev, ROGUE_CR_META_SP_MSLVCTRL1, + ROGUE_CR_META_SP_MSLVCTRL1_READY_EN | + ROGUE_CR_META_SP_MSLVCTRL1_GBLPORT_IDLE_EN, + ROGUE_CR_META_SP_MSLVCTRL1_READY_EN | + ROGUE_CR_META_SP_MSLVCTRL1_GBLPORT_IDLE_EN, + POLL_TIMEOUT_USEC); + if (err) + return err; + + /* Issue a Read. */ + pvr_cr_write32(pvr_dev, ROGUE_CR_META_SP_MSLVCTRL0, + reg_addr | ROGUE_CR_META_SP_MSLVCTRL0_RD_EN); + (void)pvr_cr_read32(pvr_dev, ROGUE_CR_META_SP_MSLVCTRL0); /* Fence write. */ + + /* Wait for Slave Port to be Ready. */ + err = pvr_cr_poll_reg32(pvr_dev, ROGUE_CR_META_SP_MSLVCTRL1, + ROGUE_CR_META_SP_MSLVCTRL1_READY_EN | + ROGUE_CR_META_SP_MSLVCTRL1_GBLPORT_IDLE_EN, + ROGUE_CR_META_SP_MSLVCTRL1_READY_EN | + ROGUE_CR_META_SP_MSLVCTRL1_GBLPORT_IDLE_EN, + POLL_TIMEOUT_USEC); + if (err) + return err; + + *reg_value_out = pvr_cr_read32(pvr_dev, ROGUE_CR_META_SP_MSLVDATAX); + + return 0; +} + +static int +pvr_meta_wrapper_init(struct pvr_device *pvr_dev) +{ + u64 garten_config; + + /* Configure META to Master boot. */ + pvr_cr_write64(pvr_dev, ROGUE_CR_META_BOOT, ROGUE_CR_META_BOOT_MODE_EN); + + /* Set Garten IDLE to META idle and Set the Garten Wrapper BIF Fence address. */ + + /* Garten IDLE bit controlled by META. */ + garten_config = ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_IDLE_CTRL_META; + + /* The fence addr is set during the fw init sequence. */ + + /* Set PC = 0 for fences. */ + garten_config &= + ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_PC_BASE_CLRMSK; + garten_config |= + (u64)MMU_CONTEXT_MAPPING_FWPRIV + << ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_PC_BASE_SHIFT; + + /* Set SLC DM=META. */ + garten_config |= ((u64)ROGUE_FW_SEGMMU_META_BIFDM_ID) + << ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_DM_SHIFT; + + pvr_cr_write64(pvr_dev, ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG, garten_config); + + return 0; +} + +static __always_inline void +add_boot_arg(u32 **boot_conf, u32 param, u32 data) +{ + *(*boot_conf)++ = param; + *(*boot_conf)++ = data; +} + +static int +meta_ldr_cmd_loadmem(struct drm_device *drm_dev, const u8 *fw, + struct rogue_meta_ldr_l1_data_blk *l1_data, u32 coremem_size, u8 *fw_code_ptr, + u8 *fw_data_ptr, u8 *fw_core_code_ptr, u8 *fw_core_data_ptr, const u32 fw_size) +{ + struct rogue_meta_ldr_l2_data_blk *l2_block = + (struct rogue_meta_ldr_l2_data_blk *)(fw + + l1_data->cmd_data[1]); + struct pvr_device *pvr_dev = to_pvr_device(drm_dev); + u32 offset = l1_data->cmd_data[0]; + u32 data_size; + void *write_addr; + int err; + + /* Verify header is within bounds. */ + if (((u8 *)l2_block - fw) >= fw_size || ((u8 *)(l2_block + 1) - fw) >= fw_size) + return -EINVAL; + + data_size = l2_block->length - 6 /* L2 Tag length and checksum */; + + /* Verify data is within bounds. */ + if (((u8 *)l2_block->block_data - fw) >= fw_size || + ((((u8 *)l2_block->block_data) + data_size) - fw) >= fw_size) + return -EINVAL; + + if (!ROGUE_META_IS_COREMEM_CODE(offset, coremem_size) && + !ROGUE_META_IS_COREMEM_DATA(offset, coremem_size)) { + /* Global range is aliased to local range */ + offset &= ~META_MEM_GLOBAL_RANGE_BIT; + } + + err = pvr_fw_find_mmu_segment(pvr_dev, offset, data_size, fw_code_ptr, fw_data_ptr, + fw_core_code_ptr, fw_core_data_ptr, &write_addr); + if (err) { + drm_err(drm_dev, + "Addr 0x%x (size: %d) not found in any firmware segment", + offset, data_size); + return err; + } + + memcpy(write_addr, l2_block->block_data, data_size); + + return 0; +} + +static int +meta_ldr_cmd_zeromem(struct drm_device *drm_dev, + struct rogue_meta_ldr_l1_data_blk *l1_data, u32 coremem_size, + u8 *fw_code_ptr, u8 *fw_data_ptr, u8 *fw_core_code_ptr, u8 *fw_core_data_ptr) +{ + struct pvr_device *pvr_dev = to_pvr_device(drm_dev); + u32 offset = l1_data->cmd_data[0]; + u32 byte_count = l1_data->cmd_data[1]; + void *write_addr; + int err; + + if (ROGUE_META_IS_COREMEM_DATA(offset, coremem_size)) { + /* cannot zero coremem directly */ + return 0; + } + + /* Global range is aliased to local range */ + offset &= ~META_MEM_GLOBAL_RANGE_BIT; + + err = pvr_fw_find_mmu_segment(pvr_dev, offset, byte_count, fw_code_ptr, fw_data_ptr, + fw_core_code_ptr, fw_core_data_ptr, &write_addr); + if (err) { + drm_err(drm_dev, + "Addr 0x%x (size: %d) not found in any firmware segment", + offset, byte_count); + return err; + } + + memset(write_addr, 0, byte_count); + + return 0; +} + +static int +meta_ldr_cmd_config(struct drm_device *drm_dev, const u8 *fw, + struct rogue_meta_ldr_l1_data_blk *l1_data, + const u32 fw_size, u32 **boot_conf_ptr) +{ + struct rogue_meta_ldr_l2_data_blk *l2_block = + (struct rogue_meta_ldr_l2_data_blk *)(fw + + l1_data->cmd_data[0]); + struct rogue_meta_ldr_cfg_blk *config_command; + u32 l2_block_size; + u32 curr_block_size = 0; + u32 *boot_conf = boot_conf_ptr ? *boot_conf_ptr : NULL; + + /* Verify block header is within bounds. */ + if (((u8 *)l2_block - fw) >= fw_size || ((u8 *)(l2_block + 1) - fw) >= fw_size) + return -EINVAL; + + l2_block_size = l2_block->length - 6 /* L2 Tag length and checksum */; + config_command = (struct rogue_meta_ldr_cfg_blk *)l2_block->block_data; + + if (((u8 *)config_command - fw) >= fw_size || + ((((u8 *)config_command) + l2_block_size) - fw) >= fw_size) + return -EINVAL; + + while (l2_block_size >= 12) { + if (config_command->type != ROGUE_META_LDR_CFG_WRITE) + return -EINVAL; + + /* + * Only write to bootloader if we got a valid pointer to the FW + * code allocation. + */ + if (boot_conf) { + u32 register_offset = config_command->block_data[0]; + u32 register_value = config_command->block_data[1]; + + /* Do register write */ + add_boot_arg(&boot_conf, register_offset, + register_value); + } + + curr_block_size = 12; + l2_block_size -= curr_block_size; + config_command = (struct rogue_meta_ldr_cfg_blk + *)((uintptr_t)config_command + + curr_block_size); + } + + if (boot_conf_ptr) + *boot_conf_ptr = boot_conf; + + return 0; +} + +/** + * process_ldr_command_stream() - Process LDR firmware image and populate + * firmware sections + * @pvr_dev: Device pointer. + * @fw: Pointer to firmware image. + * @fw_code_ptr: Pointer to FW code section. + * @fw_data_ptr: Pointer to FW data section. + * @fw_core_code_ptr: Pointer to FW coremem code section. + * @fw_core_data_ptr: Pointer to FW coremem data section. + * @boot_conf_ptr: Pointer to boot config argument pointer. + * + * Returns : + * * 0 on success, or + * * -EINVAL on any error in LDR command stream. + */ +static int +process_ldr_command_stream(struct pvr_device *pvr_dev, const u8 *fw, u8 *fw_code_ptr, + u8 *fw_data_ptr, u8 *fw_core_code_ptr, + u8 *fw_core_data_ptr, u32 **boot_conf_ptr) +{ + struct drm_device *drm_dev = from_pvr_device(pvr_dev); + struct rogue_meta_ldr_block_hdr *ldr_header = + (struct rogue_meta_ldr_block_hdr *)fw; + struct rogue_meta_ldr_l1_data_blk *l1_data = + (struct rogue_meta_ldr_l1_data_blk *)(fw + ldr_header->sl_data); + const u32 fw_size = pvr_dev->fw_dev.firmware->size; + int err; + + u32 *boot_conf = boot_conf_ptr ? *boot_conf_ptr : NULL; + u32 coremem_size; + + err = PVR_FEATURE_VALUE(pvr_dev, meta_coremem_size, &coremem_size); + if (err) + return err; + + coremem_size *= SZ_1K; + + while (l1_data) { + /* Verify block header is within bounds. */ + if (((u8 *)l1_data - fw) >= fw_size || ((u8 *)(l1_data + 1) - fw) >= fw_size) + return -EINVAL; + + if (ROGUE_META_LDR_BLK_IS_COMMENT(l1_data->cmd)) { + /* Don't process comment blocks */ + goto next_block; + } + + switch (l1_data->cmd & ROGUE_META_LDR_CMD_MASK) + case ROGUE_META_LDR_CMD_LOADMEM: { + err = meta_ldr_cmd_loadmem(drm_dev, fw, l1_data, + coremem_size, + fw_code_ptr, fw_data_ptr, + fw_core_code_ptr, + fw_core_data_ptr, fw_size); + if (err) + return err; + break; + + case ROGUE_META_LDR_CMD_START_THREADS: + /* Don't process this block */ + break; + + case ROGUE_META_LDR_CMD_ZEROMEM: + err = meta_ldr_cmd_zeromem(drm_dev, l1_data, + coremem_size, + fw_code_ptr, fw_data_ptr, + fw_core_code_ptr, + fw_core_data_ptr); + if (err) + return err; + break; + + case ROGUE_META_LDR_CMD_CONFIG: + err = meta_ldr_cmd_config(drm_dev, fw, l1_data, fw_size, + &boot_conf); + if (err) + return err; + break; + + default: + return -EINVAL; + } + +next_block: + if (l1_data->next == 0xFFFFFFFF) + break; + + l1_data = (struct rogue_meta_ldr_l1_data_blk *)(fw + + l1_data->next); + } + + if (boot_conf_ptr) + *boot_conf_ptr = boot_conf; + + return 0; +} + +static void +configure_seg_id(u64 seg_out_addr, u32 seg_base, u32 seg_limit, u32 seg_id, + u32 **boot_conf_ptr) +{ + u32 seg_out_addr0 = seg_out_addr & 0x00000000FFFFFFFFUL; + u32 seg_out_addr1 = (seg_out_addr >> 32) & 0x00000000FFFFFFFFUL; + u32 *boot_conf = *boot_conf_ptr; + + /* META segments have a minimum size. */ + u32 limit_off = max(seg_limit, ROGUE_FW_SEGMMU_ALIGN); + + /* The limit is an offset, therefore off = size - 1. */ + limit_off -= 1; + + seg_base |= ROGUE_FW_SEGMMU_ALLTHRS_WRITEABLE; + + add_boot_arg(&boot_conf, META_CR_MMCU_SEGMENT_N_BASE(seg_id), seg_base); + add_boot_arg(&boot_conf, META_CR_MMCU_SEGMENT_N_LIMIT(seg_id), limit_off); + add_boot_arg(&boot_conf, META_CR_MMCU_SEGMENT_N_OUTA0(seg_id), seg_out_addr0); + add_boot_arg(&boot_conf, META_CR_MMCU_SEGMENT_N_OUTA1(seg_id), seg_out_addr1); + + *boot_conf_ptr = boot_conf; +} + +static u64 get_fw_obj_gpu_addr(struct pvr_fw_object *fw_obj) +{ + struct pvr_device *pvr_dev = to_pvr_device(gem_from_pvr_gem(fw_obj->gem)->dev); + struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev; + + return fw_obj->fw_addr_offset + fw_dev->fw_heap_info.gpu_addr; +} + +static void +configure_seg_mmu(struct pvr_device *pvr_dev, u32 **boot_conf_ptr) +{ + const struct pvr_fw_layout_entry *layout_entries = pvr_dev->fw_dev.layout_entries; + u32 num_layout_entries = pvr_dev->fw_dev.header->layout_entry_num; + u64 seg_out_addr_top; + u32 i; + + seg_out_addr_top = + ROGUE_FW_SEGMMU_OUTADDR_TOP_SLC(MMU_CONTEXT_MAPPING_FWPRIV, + ROGUE_FW_SEGMMU_META_BIFDM_ID); + + for (i = 0; i < num_layout_entries; i++) { + /* + * FW code is using the bootloader segment which is already + * configured on boot. FW coremem code and data don't use the + * segment MMU. Only the FW data segment needs to be configured. + */ + if (layout_entries[i].type == FW_DATA) { + u32 seg_id = ROGUE_FW_SEGMMU_DATA_ID; + u64 seg_out_addr = get_fw_obj_gpu_addr(pvr_dev->fw_dev.mem.data_obj); + + seg_out_addr += layout_entries[i].alloc_offset; + seg_out_addr |= seg_out_addr_top; + + /* Write the sequence to the bootldr. */ + configure_seg_id(seg_out_addr, + layout_entries[i].base_addr, + layout_entries[i].alloc_size, seg_id, + boot_conf_ptr); + + break; + } + } +} + +static void +configure_meta_caches(u32 **boot_conf_ptr) +{ + u32 *boot_conf = *boot_conf_ptr; + u32 d_cache_t0, i_cache_t0; + u32 d_cache_t1, i_cache_t1; + u32 d_cache_t2, i_cache_t2; + u32 d_cache_t3, i_cache_t3; + + /* Initialise I/Dcache settings */ + d_cache_t0 = META_CR_SYSC_DCPARTX_CACHED_WRITE_ENABLE; + d_cache_t1 = META_CR_SYSC_DCPARTX_CACHED_WRITE_ENABLE; + d_cache_t2 = META_CR_SYSC_DCPARTX_CACHED_WRITE_ENABLE; + d_cache_t3 = META_CR_SYSC_DCPARTX_CACHED_WRITE_ENABLE; + i_cache_t0 = 0; + i_cache_t1 = 0; + i_cache_t2 = 0; + i_cache_t3 = 0; + + d_cache_t0 |= META_CR_SYSC_XCPARTX_LOCAL_ADDR_FULL_CACHE; + i_cache_t0 |= META_CR_SYSC_XCPARTX_LOCAL_ADDR_FULL_CACHE; + + /* Local region MMU enhanced bypass: WIN-3 mode for code and data caches */ + add_boot_arg(&boot_conf, META_CR_MMCU_LOCAL_EBCTRL, + META_CR_MMCU_LOCAL_EBCTRL_ICWIN | + META_CR_MMCU_LOCAL_EBCTRL_DCWIN); + + /* Data cache partitioning thread 0 to 3 */ + add_boot_arg(&boot_conf, META_CR_SYSC_DCPART(0), d_cache_t0); + add_boot_arg(&boot_conf, META_CR_SYSC_DCPART(1), d_cache_t1); + add_boot_arg(&boot_conf, META_CR_SYSC_DCPART(2), d_cache_t2); + add_boot_arg(&boot_conf, META_CR_SYSC_DCPART(3), d_cache_t3); + + /* Enable data cache hits */ + add_boot_arg(&boot_conf, META_CR_MMCU_DCACHE_CTRL, + META_CR_MMCU_XCACHE_CTRL_CACHE_HITS_EN); + + /* Instruction cache partitioning thread 0 to 3 */ + add_boot_arg(&boot_conf, META_CR_SYSC_ICPART(0), i_cache_t0); + add_boot_arg(&boot_conf, META_CR_SYSC_ICPART(1), i_cache_t1); + add_boot_arg(&boot_conf, META_CR_SYSC_ICPART(2), i_cache_t2); + add_boot_arg(&boot_conf, META_CR_SYSC_ICPART(3), i_cache_t3); + + /* Enable instruction cache hits */ + add_boot_arg(&boot_conf, META_CR_MMCU_ICACHE_CTRL, + META_CR_MMCU_XCACHE_CTRL_CACHE_HITS_EN); + + add_boot_arg(&boot_conf, 0x040000C0, 0); + + *boot_conf_ptr = boot_conf; +} + +static int +pvr_meta_fw_process(struct pvr_device *pvr_dev, const u8 *fw, + u8 *fw_code_ptr, u8 *fw_data_ptr, u8 *fw_core_code_ptr, u8 *fw_core_data_ptr, + u32 core_code_alloc_size) +{ + struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev; + u32 *boot_conf; + int err; + + boot_conf = ((u32 *)fw_code_ptr) + ROGUE_FW_BOOTLDR_CONF_OFFSET; + + /* Slave port and JTAG accesses are privileged. */ + add_boot_arg(&boot_conf, META_CR_SYSC_JTAG_THREAD, + META_CR_SYSC_JTAG_THREAD_PRIV_EN); + + configure_seg_mmu(pvr_dev, &boot_conf); + + /* Populate FW sections from LDR image. */ + err = process_ldr_command_stream(pvr_dev, fw, fw_code_ptr, fw_data_ptr, fw_core_code_ptr, + fw_core_data_ptr, &boot_conf); + if (err) + return err; + + configure_meta_caches(&boot_conf); + + /* End argument list. */ + add_boot_arg(&boot_conf, 0, 0); + + if (fw_dev->mem.core_code_obj) { + u32 core_code_fw_addr; + + pvr_fw_object_get_fw_addr(fw_dev->mem.core_code_obj, &core_code_fw_addr); + add_boot_arg(&boot_conf, core_code_fw_addr, core_code_alloc_size); + } else { + add_boot_arg(&boot_conf, 0, 0); + } + /* None of the cores supported by this driver have META DMA. */ + add_boot_arg(&boot_conf, 0, 0); + + return 0; +} + +static int +pvr_meta_init(struct pvr_device *pvr_dev) +{ + pvr_fw_heap_info_init(pvr_dev, ROGUE_FW_HEAP_META_SHIFT, 0); + + return 0; +} + +static u32 +pvr_meta_get_fw_addr_with_offset(struct pvr_fw_object *fw_obj, u32 offset) +{ + u32 fw_addr = fw_obj->fw_addr_offset + offset + ROGUE_FW_SEGMMU_DATA_BASE_ADDRESS; + + /* META cacheability is determined by address. */ + if (fw_obj->gem->flags & PVR_BO_FW_FLAGS_DEVICE_UNCACHED) + fw_addr |= ROGUE_FW_SEGMMU_DATA_META_UNCACHED | + ROGUE_FW_SEGMMU_DATA_VIVT_SLC_UNCACHED; + + return fw_addr; +} + +static int +pvr_meta_vm_map(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj) +{ + struct pvr_gem_object *pvr_obj = fw_obj->gem; + + return pvr_vm_map(pvr_dev->kernel_vm_ctx, pvr_obj, 0, fw_obj->fw_mm_node.start, + pvr_gem_object_size(pvr_obj)); +} + +static void +pvr_meta_vm_unmap(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj) +{ + pvr_vm_unmap(pvr_dev->kernel_vm_ctx, fw_obj->fw_mm_node.start, + fw_obj->fw_mm_node.size); +} + +static bool +pvr_meta_has_fixed_data_addr(void) +{ + return false; +} + +const struct pvr_fw_defs pvr_fw_defs_meta = { + .init = pvr_meta_init, + .fw_process = pvr_meta_fw_process, + .vm_map = pvr_meta_vm_map, + .vm_unmap = pvr_meta_vm_unmap, + .get_fw_addr_with_offset = pvr_meta_get_fw_addr_with_offset, + .wrapper_init = pvr_meta_wrapper_init, + .has_fixed_data_addr = pvr_meta_has_fixed_data_addr, + .irq = { + .enable_reg = ROGUE_CR_META_SP_MSLVIRQENABLE, + .status_reg = ROGUE_CR_META_SP_MSLVIRQSTATUS, + .clear_reg = ROGUE_CR_META_SP_MSLVIRQSTATUS, + .event_mask = ROGUE_CR_META_SP_MSLVIRQSTATUS_TRIGVECT2_EN, + .clear_mask = ROGUE_CR_META_SP_MSLVIRQSTATUS_TRIGVECT2_CLRMSK, + }, +}; diff --git a/drivers/gpu/drm/imagination/pvr_fw_meta.h b/drivers/gpu/drm/imagination/pvr_fw_meta.h new file mode 100644 index 000000000000..911ad700cba6 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_fw_meta.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_FW_META_H +#define PVR_FW_META_H + +#include <linux/types.h> + +/* Forward declaration from pvr_device.h */ +struct pvr_device; + +int pvr_meta_cr_read32(struct pvr_device *pvr_dev, u32 reg_addr, u32 *reg_value_out); + +#endif /* PVR_FW_META_H */ diff --git a/drivers/gpu/drm/imagination/pvr_fw_mips.c b/drivers/gpu/drm/imagination/pvr_fw_mips.c new file mode 100644 index 000000000000..0bed0257e2ab --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_fw_mips.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_device.h" +#include "pvr_fw.h" +#include "pvr_fw_mips.h" +#include "pvr_gem.h" +#include "pvr_rogue_mips.h" +#include "pvr_vm_mips.h" + +#include <linux/elf.h> +#include <linux/err.h> +#include <linux/types.h> + +#define ROGUE_FW_HEAP_MIPS_BASE 0xC0000000 +#define ROGUE_FW_HEAP_MIPS_SHIFT 24 /* 16 MB */ +#define ROGUE_FW_HEAP_MIPS_RESERVED_SIZE SZ_1M + +/** + * process_elf_command_stream() - Process ELF firmware image and populate + * firmware sections + * @pvr_dev: Device pointer. + * @fw: Pointer to firmware image. + * @fw_code_ptr: Pointer to FW code section. + * @fw_data_ptr: Pointer to FW data section. + * @fw_core_code_ptr: Pointer to FW coremem code section. + * @fw_core_data_ptr: Pointer to FW coremem data section. + * + * Returns : + * * 0 on success, or + * * -EINVAL on any error in ELF command stream. + */ +static int +process_elf_command_stream(struct pvr_device *pvr_dev, const u8 *fw, u8 *fw_code_ptr, + u8 *fw_data_ptr, u8 *fw_core_code_ptr, u8 *fw_core_data_ptr) +{ + struct elf32_hdr *header = (struct elf32_hdr *)fw; + struct elf32_phdr *program_header = (struct elf32_phdr *)(fw + header->e_phoff); + struct drm_device *drm_dev = from_pvr_device(pvr_dev); + u32 entry; + int err; + + for (entry = 0; entry < header->e_phnum; entry++, program_header++) { + void *write_addr; + + /* Only consider loadable entries in the ELF segment table */ + if (program_header->p_type != PT_LOAD) + continue; + + err = pvr_fw_find_mmu_segment(pvr_dev, program_header->p_vaddr, + program_header->p_memsz, fw_code_ptr, fw_data_ptr, + fw_core_code_ptr, fw_core_data_ptr, &write_addr); + if (err) { + drm_err(drm_dev, + "Addr 0x%x (size: %d) not found in any firmware segment", + program_header->p_vaddr, program_header->p_memsz); + return err; + } + + /* Write to FW allocation only if available */ + if (write_addr) { + memcpy(write_addr, fw + program_header->p_offset, + program_header->p_filesz); + + memset((u8 *)write_addr + program_header->p_filesz, 0, + program_header->p_memsz - program_header->p_filesz); + } + } + + return 0; +} + +static int +pvr_mips_init(struct pvr_device *pvr_dev) +{ + pvr_fw_heap_info_init(pvr_dev, ROGUE_FW_HEAP_MIPS_SHIFT, ROGUE_FW_HEAP_MIPS_RESERVED_SIZE); + + return pvr_vm_mips_init(pvr_dev); +} + +static void +pvr_mips_fini(struct pvr_device *pvr_dev) +{ + pvr_vm_mips_fini(pvr_dev); +} + +static int +pvr_mips_fw_process(struct pvr_device *pvr_dev, const u8 *fw, + u8 *fw_code_ptr, u8 *fw_data_ptr, u8 *fw_core_code_ptr, u8 *fw_core_data_ptr, + u32 core_code_alloc_size) +{ + struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev; + struct pvr_fw_mips_data *mips_data = fw_dev->processor_data.mips_data; + const struct pvr_fw_layout_entry *boot_code_entry; + const struct pvr_fw_layout_entry *boot_data_entry; + const struct pvr_fw_layout_entry *exception_code_entry; + const struct pvr_fw_layout_entry *stack_entry; + struct rogue_mipsfw_boot_data *boot_data; + dma_addr_t dma_addr; + u32 page_nr; + int err; + + err = process_elf_command_stream(pvr_dev, fw, fw_code_ptr, fw_data_ptr, fw_core_code_ptr, + fw_core_data_ptr); + if (err) + return err; + + boot_code_entry = pvr_fw_find_layout_entry(pvr_dev, MIPS_BOOT_CODE); + boot_data_entry = pvr_fw_find_layout_entry(pvr_dev, MIPS_BOOT_DATA); + exception_code_entry = pvr_fw_find_layout_entry(pvr_dev, MIPS_EXCEPTIONS_CODE); + if (!boot_code_entry || !boot_data_entry || !exception_code_entry) + return -EINVAL; + + WARN_ON(pvr_gem_get_dma_addr(fw_dev->mem.code_obj->gem, boot_code_entry->alloc_offset, + &mips_data->boot_code_dma_addr)); + WARN_ON(pvr_gem_get_dma_addr(fw_dev->mem.data_obj->gem, boot_data_entry->alloc_offset, + &mips_data->boot_data_dma_addr)); + WARN_ON(pvr_gem_get_dma_addr(fw_dev->mem.code_obj->gem, + exception_code_entry->alloc_offset, + &mips_data->exception_code_dma_addr)); + + stack_entry = pvr_fw_find_layout_entry(pvr_dev, MIPS_STACK); + if (!stack_entry) + return -EINVAL; + + boot_data = (struct rogue_mipsfw_boot_data *)(fw_data_ptr + boot_data_entry->alloc_offset + + ROGUE_MIPSFW_BOOTLDR_CONF_OFFSET); + + WARN_ON(pvr_fw_object_get_dma_addr(fw_dev->mem.data_obj, stack_entry->alloc_offset, + &dma_addr)); + boot_data->stack_phys_addr = dma_addr; + + boot_data->reg_base = pvr_dev->regs_resource->start; + + for (page_nr = 0; page_nr < ARRAY_SIZE(boot_data->pt_phys_addr); page_nr++) { + /* Firmware expects 4k pages, but host page size might be different. */ + u32 src_page_nr = (page_nr * ROGUE_MIPSFW_PAGE_SIZE_4K) >> PAGE_SHIFT; + u32 page_offset = (page_nr * ROGUE_MIPSFW_PAGE_SIZE_4K) & ~PAGE_MASK; + + boot_data->pt_phys_addr[page_nr] = mips_data->pt_dma_addr[src_page_nr] + + page_offset; + } + + boot_data->pt_log2_page_size = ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K; + boot_data->pt_num_pages = ROGUE_MIPSFW_MAX_NUM_PAGETABLE_PAGES; + boot_data->reserved1 = 0; + boot_data->reserved2 = 0; + + return 0; +} + +static int +pvr_mips_wrapper_init(struct pvr_device *pvr_dev) +{ + struct pvr_fw_mips_data *mips_data = pvr_dev->fw_dev.processor_data.mips_data; + const u64 remap_settings = ROGUE_MIPSFW_BOOT_REMAP_LOG2_SEGMENT_SIZE; + u32 phys_bus_width; + + int err = PVR_FEATURE_VALUE(pvr_dev, phys_bus_width, &phys_bus_width); + + if (WARN_ON(err)) + return err; + + /* Currently MIPS FW only supported with physical bus width > 32 bits. */ + if (WARN_ON(phys_bus_width <= 32)) + return -EINVAL; + + pvr_cr_write32(pvr_dev, ROGUE_CR_MIPS_WRAPPER_CONFIG, + (ROGUE_MIPSFW_REGISTERS_VIRTUAL_BASE >> + ROGUE_MIPSFW_WRAPPER_CONFIG_REGBANK_ADDR_ALIGN) | + ROGUE_CR_MIPS_WRAPPER_CONFIG_BOOT_ISA_MODE_MICROMIPS); + + /* Configure remap for boot code, boot data and exceptions code areas. */ + pvr_cr_write64(pvr_dev, ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG1, + ROGUE_MIPSFW_BOOT_REMAP_PHYS_ADDR_IN | + ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG1_MODE_ENABLE_EN); + pvr_cr_write64(pvr_dev, ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2, + (mips_data->boot_code_dma_addr & + ~ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_ADDR_OUT_CLRMSK) | remap_settings); + + if (PVR_HAS_QUIRK(pvr_dev, 63553)) { + /* + * WA always required on 36 bit cores, to avoid continuous unmapped memory accesses + * to address 0x0. + */ + WARN_ON(phys_bus_width != 36); + + pvr_cr_write64(pvr_dev, ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG1, + ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG1_MODE_ENABLE_EN); + pvr_cr_write64(pvr_dev, ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2, + (mips_data->boot_code_dma_addr & + ~ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_ADDR_OUT_CLRMSK) | + remap_settings); + } + + pvr_cr_write64(pvr_dev, ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG1, + ROGUE_MIPSFW_DATA_REMAP_PHYS_ADDR_IN | + ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG1_MODE_ENABLE_EN); + pvr_cr_write64(pvr_dev, ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2, + (mips_data->boot_data_dma_addr & + ~ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_ADDR_OUT_CLRMSK) | remap_settings); + + pvr_cr_write64(pvr_dev, ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG1, + ROGUE_MIPSFW_CODE_REMAP_PHYS_ADDR_IN | + ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG1_MODE_ENABLE_EN); + pvr_cr_write64(pvr_dev, ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2, + (mips_data->exception_code_dma_addr & + ~ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_ADDR_OUT_CLRMSK) | remap_settings); + + /* Garten IDLE bit controlled by MIPS. */ + pvr_cr_write64(pvr_dev, ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG, + ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_IDLE_CTRL_META); + + /* Turn on the EJTAG probe. */ + pvr_cr_write32(pvr_dev, ROGUE_CR_MIPS_DEBUG_CONFIG, 0); + + return 0; +} + +static u32 +pvr_mips_get_fw_addr_with_offset(struct pvr_fw_object *fw_obj, u32 offset) +{ + struct pvr_device *pvr_dev = to_pvr_device(gem_from_pvr_gem(fw_obj->gem)->dev); + + /* MIPS cacheability is determined by page table. */ + return ((fw_obj->fw_addr_offset + offset) & pvr_dev->fw_dev.fw_heap_info.offset_mask) | + ROGUE_FW_HEAP_MIPS_BASE; +} + +static bool +pvr_mips_has_fixed_data_addr(void) +{ + return true; +} + +const struct pvr_fw_defs pvr_fw_defs_mips = { + .init = pvr_mips_init, + .fini = pvr_mips_fini, + .fw_process = pvr_mips_fw_process, + .vm_map = pvr_vm_mips_map, + .vm_unmap = pvr_vm_mips_unmap, + .get_fw_addr_with_offset = pvr_mips_get_fw_addr_with_offset, + .wrapper_init = pvr_mips_wrapper_init, + .has_fixed_data_addr = pvr_mips_has_fixed_data_addr, + .irq = { + .enable_reg = ROGUE_CR_MIPS_WRAPPER_IRQ_ENABLE, + .status_reg = ROGUE_CR_MIPS_WRAPPER_IRQ_STATUS, + .clear_reg = ROGUE_CR_MIPS_WRAPPER_IRQ_CLEAR, + .event_mask = ROGUE_CR_MIPS_WRAPPER_IRQ_STATUS_EVENT_EN, + .clear_mask = ROGUE_CR_MIPS_WRAPPER_IRQ_CLEAR_EVENT_EN, + }, +}; diff --git a/drivers/gpu/drm/imagination/pvr_fw_mips.h b/drivers/gpu/drm/imagination/pvr_fw_mips.h new file mode 100644 index 000000000000..408dbe63a90c --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_fw_mips.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_FW_MIPS_H +#define PVR_FW_MIPS_H + +#include "pvr_rogue_mips.h" + +#include <asm/page.h> +#include <linux/types.h> + +/* Forward declaration from pvr_gem.h. */ +struct pvr_gem_object; + +#define PVR_MIPS_PT_PAGE_COUNT ((ROGUE_MIPSFW_MAX_NUM_PAGETABLE_PAGES * ROGUE_MIPSFW_PAGE_SIZE_4K) \ + >> PAGE_SHIFT) +/** + * struct pvr_fw_mips_data - MIPS-specific data + */ +struct pvr_fw_mips_data { + /** + * @pt_pages: Pages containing MIPS pagetable. + */ + struct page *pt_pages[PVR_MIPS_PT_PAGE_COUNT]; + + /** @pt: Pointer to CPU mapping of MIPS pagetable. */ + u32 *pt; + + /** @pt_dma_addr: DMA mappings of MIPS pagetable. */ + dma_addr_t pt_dma_addr[PVR_MIPS_PT_PAGE_COUNT]; + + /** @boot_code_dma_addr: DMA address of MIPS boot code. */ + dma_addr_t boot_code_dma_addr; + + /** @boot_data_dma_addr: DMA address of MIPS boot data. */ + dma_addr_t boot_data_dma_addr; + + /** @exception_code_dma_addr: DMA address of MIPS exception code. */ + dma_addr_t exception_code_dma_addr; + + /** @cache_policy: Cache policy for this processor. */ + u32 cache_policy; + + /** @pfn_mask: PFN mask for MIPS pagetable. */ + u32 pfn_mask; +}; + +#endif /* PVR_FW_MIPS_H */ diff --git a/drivers/gpu/drm/imagination/pvr_fw_startstop.c b/drivers/gpu/drm/imagination/pvr_fw_startstop.c new file mode 100644 index 000000000000..36cec227cfe3 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_fw_startstop.c @@ -0,0 +1,306 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_device.h" +#include "pvr_fw.h" +#include "pvr_fw_meta.h" +#include "pvr_fw_startstop.h" +#include "pvr_rogue_cr_defs.h" +#include "pvr_rogue_meta.h" +#include "pvr_vm.h" + +#include <linux/compiler.h> +#include <linux/delay.h> +#include <linux/ktime.h> +#include <linux/types.h> + +#define POLL_TIMEOUT_USEC 1000000 + +static void +rogue_axi_ace_list_init(struct pvr_device *pvr_dev) +{ + /* Setup AXI-ACE config. Set everything to outer cache. */ + u64 reg_val = + (3U << ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWDOMAIN_NON_SNOOPING_SHIFT) | + (3U << ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_NON_SNOOPING_SHIFT) | + (2U << ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_CACHE_MAINTENANCE_SHIFT) | + (2U << ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWDOMAIN_COHERENT_SHIFT) | + (2U << ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_COHERENT_SHIFT) | + (2U << ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWCACHE_COHERENT_SHIFT) | + (2U << ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARCACHE_COHERENT_SHIFT) | + (2U << ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARCACHE_CACHE_MAINTENANCE_SHIFT); + + pvr_cr_write64(pvr_dev, ROGUE_CR_AXI_ACE_LITE_CONFIGURATION, reg_val); +} + +static void +rogue_bif_init(struct pvr_device *pvr_dev) +{ + dma_addr_t pc_dma_addr; + u64 pc_addr; + + /* Acquire the address of the Kernel Page Catalogue. */ + pc_dma_addr = pvr_vm_get_page_table_root_addr(pvr_dev->kernel_vm_ctx); + + /* Write the kernel catalogue base. */ + pc_addr = ((((u64)pc_dma_addr >> ROGUE_CR_BIF_CAT_BASE0_ADDR_ALIGNSHIFT) + << ROGUE_CR_BIF_CAT_BASE0_ADDR_SHIFT) & + ~ROGUE_CR_BIF_CAT_BASE0_ADDR_CLRMSK); + + pvr_cr_write64(pvr_dev, BIF_CAT_BASEX(MMU_CONTEXT_MAPPING_FWPRIV), + pc_addr); +} + +static int +rogue_slc_init(struct pvr_device *pvr_dev) +{ + u16 slc_cache_line_size_bits; + u32 reg_val; + int err; + + /* + * SLC Misc control. + * + * Note: This is a 64bit register and we set only the lower 32bits + * leaving the top 32bits (ROGUE_CR_SLC_CTRL_MISC_SCRAMBLE_BITS) + * unchanged from the HW default. + */ + reg_val = (pvr_cr_read32(pvr_dev, ROGUE_CR_SLC_CTRL_MISC) & + ROGUE_CR_SLC_CTRL_MISC_ENABLE_PSG_HAZARD_CHECK_EN) | + ROGUE_CR_SLC_CTRL_MISC_ADDR_DECODE_MODE_PVR_HASH1; + + err = PVR_FEATURE_VALUE(pvr_dev, slc_cache_line_size_bits, &slc_cache_line_size_bits); + if (err) + return err; + + /* Bypass burst combiner if SLC line size is smaller than 1024 bits. */ + if (slc_cache_line_size_bits < 1024) + reg_val |= ROGUE_CR_SLC_CTRL_MISC_BYPASS_BURST_COMBINER_EN; + + if (PVR_HAS_QUIRK(pvr_dev, 71242) && !PVR_HAS_FEATURE(pvr_dev, gpu_multicore_support)) + reg_val |= ROGUE_CR_SLC_CTRL_MISC_LAZYWB_OVERRIDE_EN; + + pvr_cr_write32(pvr_dev, ROGUE_CR_SLC_CTRL_MISC, reg_val); + + return 0; +} + +/** + * pvr_fw_start() - Start FW processor and boot firmware + * @pvr_dev: Target PowerVR device. + * + * Returns: + * * 0 on success, or + * * Any error returned by rogue_slc_init(). + */ +int +pvr_fw_start(struct pvr_device *pvr_dev) +{ + bool has_reset2 = PVR_HAS_FEATURE(pvr_dev, xe_tpu2); + u64 soft_reset_mask; + int err; + + if (PVR_HAS_FEATURE(pvr_dev, pbe2_in_xe)) + soft_reset_mask = ROGUE_CR_SOFT_RESET__PBE2_XE__MASKFULL; + else + soft_reset_mask = ROGUE_CR_SOFT_RESET_MASKFULL; + + if (PVR_HAS_FEATURE(pvr_dev, sys_bus_secure_reset)) { + /* + * Disable the default sys_bus_secure protection to perform + * minimal setup. + */ + pvr_cr_write32(pvr_dev, ROGUE_CR_SYS_BUS_SECURE, 0); + (void)pvr_cr_read32(pvr_dev, ROGUE_CR_SYS_BUS_SECURE); /* Fence write */ + } + + /* Set Rogue in soft-reset. */ + pvr_cr_write64(pvr_dev, ROGUE_CR_SOFT_RESET, soft_reset_mask); + if (has_reset2) + pvr_cr_write64(pvr_dev, ROGUE_CR_SOFT_RESET2, ROGUE_CR_SOFT_RESET2_MASKFULL); + + /* Read soft-reset to fence previous write in order to clear the SOCIF pipeline. */ + (void)pvr_cr_read64(pvr_dev, ROGUE_CR_SOFT_RESET); + if (has_reset2) + (void)pvr_cr_read64(pvr_dev, ROGUE_CR_SOFT_RESET2); + + /* Take Rascal and Dust out of reset. */ + pvr_cr_write64(pvr_dev, ROGUE_CR_SOFT_RESET, + soft_reset_mask ^ ROGUE_CR_SOFT_RESET_RASCALDUSTS_EN); + if (has_reset2) + pvr_cr_write64(pvr_dev, ROGUE_CR_SOFT_RESET2, 0); + + (void)pvr_cr_read64(pvr_dev, ROGUE_CR_SOFT_RESET); + if (has_reset2) + (void)pvr_cr_read64(pvr_dev, ROGUE_CR_SOFT_RESET2); + + /* Take everything out of reset but the FW processor. */ + pvr_cr_write64(pvr_dev, ROGUE_CR_SOFT_RESET, ROGUE_CR_SOFT_RESET_GARTEN_EN); + if (has_reset2) + pvr_cr_write64(pvr_dev, ROGUE_CR_SOFT_RESET2, 0); + + (void)pvr_cr_read64(pvr_dev, ROGUE_CR_SOFT_RESET); + if (has_reset2) + (void)pvr_cr_read64(pvr_dev, ROGUE_CR_SOFT_RESET2); + + err = rogue_slc_init(pvr_dev); + if (err) + goto err_reset; + + /* Initialise Firmware wrapper. */ + pvr_dev->fw_dev.defs->wrapper_init(pvr_dev); + + /* We must init the AXI-ACE interface before first BIF transaction. */ + rogue_axi_ace_list_init(pvr_dev); + + if (pvr_dev->fw_dev.processor_type != PVR_FW_PROCESSOR_TYPE_MIPS) { + /* Initialise BIF. */ + rogue_bif_init(pvr_dev); + } + + /* Need to wait for at least 16 cycles before taking the FW processor out of reset ... */ + udelay(3); + + pvr_cr_write64(pvr_dev, ROGUE_CR_SOFT_RESET, 0x0); + (void)pvr_cr_read64(pvr_dev, ROGUE_CR_SOFT_RESET); + + /* ... and afterwards. */ + udelay(3); + + return 0; + +err_reset: + /* Put everything back into soft-reset. */ + pvr_cr_write64(pvr_dev, ROGUE_CR_SOFT_RESET, soft_reset_mask); + + return err; +} + +/** + * pvr_fw_stop() - Stop FW processor + * @pvr_dev: Target PowerVR device. + * + * Returns: + * * 0 on success, or + * * Any error returned by pvr_cr_poll_reg32(). + */ +int +pvr_fw_stop(struct pvr_device *pvr_dev) +{ + const u32 sidekick_idle_mask = ROGUE_CR_SIDEKICK_IDLE_MASKFULL & + ~(ROGUE_CR_SIDEKICK_IDLE_GARTEN_EN | + ROGUE_CR_SIDEKICK_IDLE_SOCIF_EN | + ROGUE_CR_SIDEKICK_IDLE_HOSTIF_EN); + bool skip_garten_idle = false; + u32 reg_value; + int err; + + /* + * Wait for Sidekick/Jones to signal IDLE except for the Garten Wrapper. + * For cores with the LAYOUT_MARS feature, SIDEKICK would have been + * powered down by the FW. + */ + err = pvr_cr_poll_reg32(pvr_dev, ROGUE_CR_SIDEKICK_IDLE, sidekick_idle_mask, + sidekick_idle_mask, POLL_TIMEOUT_USEC); + if (err) + return err; + + /* Unset MTS DM association with threads. */ + pvr_cr_write32(pvr_dev, ROGUE_CR_MTS_INTCTX_THREAD0_DM_ASSOC, + ROGUE_CR_MTS_INTCTX_THREAD0_DM_ASSOC_MASKFULL & + ROGUE_CR_MTS_INTCTX_THREAD0_DM_ASSOC_DM_ASSOC_CLRMSK); + pvr_cr_write32(pvr_dev, ROGUE_CR_MTS_BGCTX_THREAD0_DM_ASSOC, + ROGUE_CR_MTS_BGCTX_THREAD0_DM_ASSOC_MASKFULL & + ROGUE_CR_MTS_BGCTX_THREAD0_DM_ASSOC_DM_ASSOC_CLRMSK); + pvr_cr_write32(pvr_dev, ROGUE_CR_MTS_INTCTX_THREAD1_DM_ASSOC, + ROGUE_CR_MTS_INTCTX_THREAD1_DM_ASSOC_MASKFULL & + ROGUE_CR_MTS_INTCTX_THREAD1_DM_ASSOC_DM_ASSOC_CLRMSK); + pvr_cr_write32(pvr_dev, ROGUE_CR_MTS_BGCTX_THREAD1_DM_ASSOC, + ROGUE_CR_MTS_BGCTX_THREAD1_DM_ASSOC_MASKFULL & + ROGUE_CR_MTS_BGCTX_THREAD1_DM_ASSOC_DM_ASSOC_CLRMSK); + + /* Extra Idle checks. */ + err = pvr_cr_poll_reg32(pvr_dev, ROGUE_CR_BIF_STATUS_MMU, 0, + ROGUE_CR_BIF_STATUS_MMU_MASKFULL, + POLL_TIMEOUT_USEC); + if (err) + return err; + + err = pvr_cr_poll_reg32(pvr_dev, ROGUE_CR_BIFPM_STATUS_MMU, 0, + ROGUE_CR_BIFPM_STATUS_MMU_MASKFULL, + POLL_TIMEOUT_USEC); + if (err) + return err; + + if (!PVR_HAS_FEATURE(pvr_dev, xt_top_infrastructure)) { + err = pvr_cr_poll_reg32(pvr_dev, ROGUE_CR_BIF_READS_EXT_STATUS, 0, + ROGUE_CR_BIF_READS_EXT_STATUS_MASKFULL, + POLL_TIMEOUT_USEC); + if (err) + return err; + } + + err = pvr_cr_poll_reg32(pvr_dev, ROGUE_CR_BIFPM_READS_EXT_STATUS, 0, + ROGUE_CR_BIFPM_READS_EXT_STATUS_MASKFULL, + POLL_TIMEOUT_USEC); + if (err) + return err; + + err = pvr_cr_poll_reg64(pvr_dev, ROGUE_CR_SLC_STATUS1, 0, + ROGUE_CR_SLC_STATUS1_MASKFULL, + POLL_TIMEOUT_USEC); + if (err) + return err; + + /* + * Wait for SLC to signal IDLE. + * For cores with the LAYOUT_MARS feature, SLC would have been powered + * down by the FW. + */ + err = pvr_cr_poll_reg32(pvr_dev, ROGUE_CR_SLC_IDLE, + ROGUE_CR_SLC_IDLE_MASKFULL, + ROGUE_CR_SLC_IDLE_MASKFULL, POLL_TIMEOUT_USEC); + if (err) + return err; + + /* + * Wait for Sidekick/Jones to signal IDLE except for the Garten Wrapper. + * For cores with the LAYOUT_MARS feature, SIDEKICK would have been powered + * down by the FW. + */ + err = pvr_cr_poll_reg32(pvr_dev, ROGUE_CR_SIDEKICK_IDLE, sidekick_idle_mask, + sidekick_idle_mask, POLL_TIMEOUT_USEC); + if (err) + return err; + + if (pvr_dev->fw_dev.processor_type == PVR_FW_PROCESSOR_TYPE_META) { + err = pvr_meta_cr_read32(pvr_dev, META_CR_TxVECINT_BHALT, ®_value); + if (err) + return err; + + /* + * Wait for Sidekick/Jones to signal IDLE including the Garten + * Wrapper if there is no debugger attached (TxVECINT_BHALT = + * 0x0). + */ + if (reg_value) + skip_garten_idle = true; + } + + if (!skip_garten_idle) { + err = pvr_cr_poll_reg32(pvr_dev, ROGUE_CR_SIDEKICK_IDLE, + ROGUE_CR_SIDEKICK_IDLE_GARTEN_EN, + ROGUE_CR_SIDEKICK_IDLE_GARTEN_EN, + POLL_TIMEOUT_USEC); + if (err) + return err; + } + + if (PVR_HAS_FEATURE(pvr_dev, pbe2_in_xe)) + pvr_cr_write64(pvr_dev, ROGUE_CR_SOFT_RESET, + ROGUE_CR_SOFT_RESET__PBE2_XE__MASKFULL); + else + pvr_cr_write64(pvr_dev, ROGUE_CR_SOFT_RESET, ROGUE_CR_SOFT_RESET_MASKFULL); + + return 0; +} diff --git a/drivers/gpu/drm/imagination/pvr_fw_startstop.h b/drivers/gpu/drm/imagination/pvr_fw_startstop.h new file mode 100644 index 000000000000..a3cef061bd60 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_fw_startstop.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_FW_STARTSTOP_H +#define PVR_FW_STARTSTOP_H + +/* Forward declaration from pvr_device.h. */ +struct pvr_device; + +int pvr_fw_start(struct pvr_device *pvr_dev); +int pvr_fw_stop(struct pvr_device *pvr_dev); + +#endif /* PVR_FW_STARTSTOP_H */ diff --git a/drivers/gpu/drm/imagination/pvr_fw_trace.c b/drivers/gpu/drm/imagination/pvr_fw_trace.c new file mode 100644 index 000000000000..31199e45b72e --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_fw_trace.c @@ -0,0 +1,471 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_device.h" +#include "pvr_gem.h" +#include "pvr_rogue_fwif.h" +#include "pvr_rogue_fwif_sf.h" +#include "pvr_fw_trace.h" + +#include <drm/drm_drv.h> +#include <drm/drm_file.h> + +#include <linux/build_bug.h> +#include <linux/dcache.h> +#include <linux/sysfs.h> +#include <linux/types.h> + +static void +tracebuf_ctrl_init(void *cpu_ptr, void *priv) +{ + struct rogue_fwif_tracebuf *tracebuf_ctrl = cpu_ptr; + struct pvr_fw_trace *fw_trace = priv; + u32 thread_nr; + + tracebuf_ctrl->tracebuf_size_in_dwords = ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS; + tracebuf_ctrl->tracebuf_flags = 0; + + if (fw_trace->group_mask) + tracebuf_ctrl->log_type = fw_trace->group_mask | ROGUE_FWIF_LOG_TYPE_TRACE; + else + tracebuf_ctrl->log_type = ROGUE_FWIF_LOG_TYPE_NONE; + + for (thread_nr = 0; thread_nr < ARRAY_SIZE(fw_trace->buffers); thread_nr++) { + struct rogue_fwif_tracebuf_space *tracebuf_space = + &tracebuf_ctrl->tracebuf[thread_nr]; + struct pvr_fw_trace_buffer *trace_buffer = &fw_trace->buffers[thread_nr]; + + pvr_fw_object_get_fw_addr(trace_buffer->buf_obj, + &tracebuf_space->trace_buffer_fw_addr); + + tracebuf_space->trace_buffer = trace_buffer->buf; + tracebuf_space->trace_pointer = 0; + } +} + +int pvr_fw_trace_init(struct pvr_device *pvr_dev) +{ + struct pvr_fw_trace *fw_trace = &pvr_dev->fw_dev.fw_trace; + struct drm_device *drm_dev = from_pvr_device(pvr_dev); + u32 thread_nr; + int err; + + for (thread_nr = 0; thread_nr < ARRAY_SIZE(fw_trace->buffers); thread_nr++) { + struct pvr_fw_trace_buffer *trace_buffer = &fw_trace->buffers[thread_nr]; + + trace_buffer->buf = + pvr_fw_object_create_and_map(pvr_dev, + ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS * + sizeof(*trace_buffer->buf), + PVR_BO_FW_FLAGS_DEVICE_UNCACHED | + PVR_BO_FW_NO_CLEAR_ON_RESET, + NULL, NULL, &trace_buffer->buf_obj); + if (IS_ERR(trace_buffer->buf)) { + drm_err(drm_dev, "Unable to allocate trace buffer\n"); + err = PTR_ERR(trace_buffer->buf); + trace_buffer->buf = NULL; + goto err_free_buf; + } + } + + /* TODO: Provide control of group mask. */ + fw_trace->group_mask = 0; + + fw_trace->tracebuf_ctrl = + pvr_fw_object_create_and_map(pvr_dev, + sizeof(*fw_trace->tracebuf_ctrl), + PVR_BO_FW_FLAGS_DEVICE_UNCACHED | + PVR_BO_FW_NO_CLEAR_ON_RESET, + tracebuf_ctrl_init, fw_trace, + &fw_trace->tracebuf_ctrl_obj); + if (IS_ERR(fw_trace->tracebuf_ctrl)) { + drm_err(drm_dev, "Unable to allocate trace buffer control structure\n"); + err = PTR_ERR(fw_trace->tracebuf_ctrl); + goto err_free_buf; + } + + BUILD_BUG_ON(ARRAY_SIZE(fw_trace->tracebuf_ctrl->tracebuf) != + ARRAY_SIZE(fw_trace->buffers)); + + for (thread_nr = 0; thread_nr < ARRAY_SIZE(fw_trace->buffers); thread_nr++) { + struct rogue_fwif_tracebuf_space *tracebuf_space = + &fw_trace->tracebuf_ctrl->tracebuf[thread_nr]; + struct pvr_fw_trace_buffer *trace_buffer = &fw_trace->buffers[thread_nr]; + + trace_buffer->tracebuf_space = tracebuf_space; + } + + return 0; + +err_free_buf: + for (thread_nr = 0; thread_nr < ARRAY_SIZE(fw_trace->buffers); thread_nr++) { + struct pvr_fw_trace_buffer *trace_buffer = &fw_trace->buffers[thread_nr]; + + if (trace_buffer->buf) + pvr_fw_object_unmap_and_destroy(trace_buffer->buf_obj); + } + + return err; +} + +void pvr_fw_trace_fini(struct pvr_device *pvr_dev) +{ + struct pvr_fw_trace *fw_trace = &pvr_dev->fw_dev.fw_trace; + u32 thread_nr; + + for (thread_nr = 0; thread_nr < ARRAY_SIZE(fw_trace->buffers); thread_nr++) { + struct pvr_fw_trace_buffer *trace_buffer = &fw_trace->buffers[thread_nr]; + + pvr_fw_object_unmap_and_destroy(trace_buffer->buf_obj); + } + pvr_fw_object_unmap_and_destroy(fw_trace->tracebuf_ctrl_obj); +} + +#if defined(CONFIG_DEBUG_FS) + +/** + * update_logtype() - Send KCCB command to trigger FW to update logtype + * @pvr_dev: Target PowerVR device + * @group_mask: New log group mask. + * + * Returns: + * * 0 on success, + * * Any error returned by pvr_kccb_send_cmd(), or + * * -%EIO if the device is lost. + */ +static int +update_logtype(struct pvr_device *pvr_dev, u32 group_mask) +{ + struct pvr_fw_trace *fw_trace = &pvr_dev->fw_dev.fw_trace; + struct rogue_fwif_kccb_cmd cmd; + int idx; + int err; + + if (group_mask) + fw_trace->tracebuf_ctrl->log_type = ROGUE_FWIF_LOG_TYPE_TRACE | group_mask; + else + fw_trace->tracebuf_ctrl->log_type = ROGUE_FWIF_LOG_TYPE_NONE; + + fw_trace->group_mask = group_mask; + + down_read(&pvr_dev->reset_sem); + if (!drm_dev_enter(from_pvr_device(pvr_dev), &idx)) { + err = -EIO; + goto err_up_read; + } + + cmd.cmd_type = ROGUE_FWIF_KCCB_CMD_LOGTYPE_UPDATE; + cmd.kccb_flags = 0; + + err = pvr_kccb_send_cmd(pvr_dev, &cmd, NULL); + + drm_dev_exit(idx); + +err_up_read: + up_read(&pvr_dev->reset_sem); + + return err; +} + +struct pvr_fw_trace_seq_data { + /** @buffer: Pointer to copy of trace data. */ + u32 *buffer; + + /** @start_offset: Starting offset in trace data, as reported by FW. */ + u32 start_offset; + + /** @idx: Current index into trace data. */ + u32 idx; + + /** @assert_buf: Trace assert buffer, as reported by FW. */ + struct rogue_fwif_file_info_buf assert_buf; +}; + +static u32 find_sfid(u32 id) +{ + u32 i; + + for (i = 0; i < ARRAY_SIZE(stid_fmts); i++) { + if (stid_fmts[i].id == id) + return i; + } + + return ROGUE_FW_SF_LAST; +} + +static u32 read_fw_trace(struct pvr_fw_trace_seq_data *trace_seq_data, u32 offset) +{ + u32 idx; + + idx = trace_seq_data->idx + offset; + if (idx >= ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS) + return 0; + + idx = (idx + trace_seq_data->start_offset) % ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS; + return trace_seq_data->buffer[idx]; +} + +/** + * fw_trace_get_next() - Advance trace index to next entry + * @trace_seq_data: Trace sequence data. + * + * Returns: + * * %true if trace index is now pointing to a valid entry, or + * * %false if trace index is pointing to an invalid entry, or has hit the end + * of the trace. + */ +static bool fw_trace_get_next(struct pvr_fw_trace_seq_data *trace_seq_data) +{ + u32 id, sf_id; + + while (trace_seq_data->idx < ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS) { + id = read_fw_trace(trace_seq_data, 0); + trace_seq_data->idx++; + if (!ROGUE_FW_LOG_VALIDID(id)) + continue; + if (id == ROGUE_FW_SF_MAIN_ASSERT_FAILED) { + /* Assertion failure marks the end of the trace. */ + return false; + } + + sf_id = find_sfid(id); + if (sf_id == ROGUE_FW_SF_FIRST) + continue; + if (sf_id == ROGUE_FW_SF_LAST) { + /* + * Could not match with an ID in the SF table, trace is + * most likely corrupt from this point. + */ + return false; + } + + /* Skip over the timestamp, and any parameters. */ + trace_seq_data->idx += 2 + ROGUE_FW_SF_PARAMNUM(id); + + /* Ensure index is now pointing to a valid trace entry. */ + id = read_fw_trace(trace_seq_data, 0); + if (!ROGUE_FW_LOG_VALIDID(id)) + continue; + + return true; + } + + /* Hit end of trace data. */ + return false; +} + +/** + * fw_trace_get_first() - Find first valid entry in trace + * @trace_seq_data: Trace sequence data. + * + * Skips over invalid (usually zero) and ROGUE_FW_SF_FIRST entries. + * + * If the trace has no valid entries, this function will exit with the trace + * index pointing to the end of the trace. trace_seq_show() will return an error + * in this state. + */ +static void fw_trace_get_first(struct pvr_fw_trace_seq_data *trace_seq_data) +{ + trace_seq_data->idx = 0; + + while (trace_seq_data->idx < ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS) { + u32 id = read_fw_trace(trace_seq_data, 0); + + if (ROGUE_FW_LOG_VALIDID(id)) { + u32 sf_id = find_sfid(id); + + if (sf_id != ROGUE_FW_SF_FIRST) + break; + } + trace_seq_data->idx++; + } +} + +static void *fw_trace_seq_start(struct seq_file *s, loff_t *pos) +{ + struct pvr_fw_trace_seq_data *trace_seq_data = s->private; + u32 i; + + /* Reset trace index, then advance to *pos. */ + fw_trace_get_first(trace_seq_data); + + for (i = 0; i < *pos; i++) { + if (!fw_trace_get_next(trace_seq_data)) + return NULL; + } + + return (trace_seq_data->idx < ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS) ? pos : NULL; +} + +static void *fw_trace_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct pvr_fw_trace_seq_data *trace_seq_data = s->private; + + (*pos)++; + if (!fw_trace_get_next(trace_seq_data)) + return NULL; + + return (trace_seq_data->idx < ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS) ? pos : NULL; +} + +static void fw_trace_seq_stop(struct seq_file *s, void *v) +{ +} + +static int fw_trace_seq_show(struct seq_file *s, void *v) +{ + struct pvr_fw_trace_seq_data *trace_seq_data = s->private; + u64 timestamp; + u32 id; + u32 sf_id; + + if (trace_seq_data->idx >= ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS) + return -EINVAL; + + id = read_fw_trace(trace_seq_data, 0); + /* Index is not pointing at a valid entry. */ + if (!ROGUE_FW_LOG_VALIDID(id)) + return -EINVAL; + + sf_id = find_sfid(id); + /* Index is not pointing at a valid entry. */ + if (sf_id == ROGUE_FW_SF_LAST) + return -EINVAL; + + timestamp = read_fw_trace(trace_seq_data, 1) | + ((u64)read_fw_trace(trace_seq_data, 2) << 32); + timestamp = (timestamp & ~ROGUE_FWT_TIMESTAMP_TIME_CLRMSK) >> + ROGUE_FWT_TIMESTAMP_TIME_SHIFT; + + seq_printf(s, "[%llu] : ", timestamp); + if (id == ROGUE_FW_SF_MAIN_ASSERT_FAILED) { + seq_printf(s, "ASSERTION %s failed at %s:%u", + trace_seq_data->assert_buf.info, + trace_seq_data->assert_buf.path, + trace_seq_data->assert_buf.line_num); + } else { + seq_printf(s, stid_fmts[sf_id].name, + read_fw_trace(trace_seq_data, 3), + read_fw_trace(trace_seq_data, 4), + read_fw_trace(trace_seq_data, 5), + read_fw_trace(trace_seq_data, 6), + read_fw_trace(trace_seq_data, 7), + read_fw_trace(trace_seq_data, 8), + read_fw_trace(trace_seq_data, 9), + read_fw_trace(trace_seq_data, 10), + read_fw_trace(trace_seq_data, 11), + read_fw_trace(trace_seq_data, 12), + read_fw_trace(trace_seq_data, 13), + read_fw_trace(trace_seq_data, 14), + read_fw_trace(trace_seq_data, 15), + read_fw_trace(trace_seq_data, 16), + read_fw_trace(trace_seq_data, 17), + read_fw_trace(trace_seq_data, 18), + read_fw_trace(trace_seq_data, 19), + read_fw_trace(trace_seq_data, 20), + read_fw_trace(trace_seq_data, 21), + read_fw_trace(trace_seq_data, 22)); + } + seq_puts(s, "\n"); + return 0; +} + +static const struct seq_operations pvr_fw_trace_seq_ops = { + .start = fw_trace_seq_start, + .next = fw_trace_seq_next, + .stop = fw_trace_seq_stop, + .show = fw_trace_seq_show +}; + +static int fw_trace_open(struct inode *inode, struct file *file) +{ + struct pvr_fw_trace_buffer *trace_buffer = inode->i_private; + struct rogue_fwif_tracebuf_space *tracebuf_space = + trace_buffer->tracebuf_space; + struct pvr_fw_trace_seq_data *trace_seq_data; + int err; + + trace_seq_data = kzalloc(sizeof(*trace_seq_data), GFP_KERNEL); + if (!trace_seq_data) + return -ENOMEM; + + trace_seq_data->buffer = kcalloc(ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS, + sizeof(*trace_seq_data->buffer), GFP_KERNEL); + if (!trace_seq_data->buffer) { + err = -ENOMEM; + goto err_free_data; + } + + /* + * Take a local copy of the trace buffer, as firmware may still be + * writing to it. This will exist as long as this file is open. + */ + memcpy(trace_seq_data->buffer, trace_buffer->buf, + ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS * sizeof(u32)); + trace_seq_data->start_offset = READ_ONCE(tracebuf_space->trace_pointer); + trace_seq_data->assert_buf = tracebuf_space->assert_buf; + fw_trace_get_first(trace_seq_data); + + err = seq_open(file, &pvr_fw_trace_seq_ops); + if (err) + goto err_free_buffer; + + ((struct seq_file *)file->private_data)->private = trace_seq_data; + + return 0; + +err_free_buffer: + kfree(trace_seq_data->buffer); + +err_free_data: + kfree(trace_seq_data); + + return err; +} + +static int fw_trace_release(struct inode *inode, struct file *file) +{ + struct pvr_fw_trace_seq_data *trace_seq_data = + ((struct seq_file *)file->private_data)->private; + + seq_release(inode, file); + kfree(trace_seq_data->buffer); + kfree(trace_seq_data); + + return 0; +} + +static const struct file_operations pvr_fw_trace_fops = { + .owner = THIS_MODULE, + .open = fw_trace_open, + .read = seq_read, + .llseek = seq_lseek, + .release = fw_trace_release, +}; + +void +pvr_fw_trace_mask_update(struct pvr_device *pvr_dev, u32 old_mask, u32 new_mask) +{ + if (old_mask != new_mask) + update_logtype(pvr_dev, new_mask); +} + +void +pvr_fw_trace_debugfs_init(struct pvr_device *pvr_dev, struct dentry *dir) +{ + struct pvr_fw_trace *fw_trace = &pvr_dev->fw_dev.fw_trace; + u32 thread_nr; + + static_assert(ARRAY_SIZE(fw_trace->buffers) <= 10, + "The filename buffer is only large enough for a single-digit thread count"); + + for (thread_nr = 0; thread_nr < ARRAY_SIZE(fw_trace->buffers); ++thread_nr) { + char filename[8]; + + snprintf(filename, ARRAY_SIZE(filename), "trace_%u", thread_nr); + debugfs_create_file(filename, 0400, dir, + &fw_trace->buffers[thread_nr], + &pvr_fw_trace_fops); + } +} +#endif diff --git a/drivers/gpu/drm/imagination/pvr_fw_trace.h b/drivers/gpu/drm/imagination/pvr_fw_trace.h new file mode 100644 index 000000000000..0074d2b18da0 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_fw_trace.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_FW_TRACE_H +#define PVR_FW_TRACE_H + +#include <drm/drm_file.h> +#include <linux/types.h> + +#include "pvr_rogue_fwif.h" + +/* Forward declaration from pvr_device.h. */ +struct pvr_device; + +/* Forward declaration from pvr_gem.h. */ +struct pvr_fw_object; + +/* Forward declarations from pvr_rogue_fwif.h */ +struct rogue_fwif_tracebuf; +struct rogue_fwif_tracebuf_space; + +/** + * struct pvr_fw_trace_buffer - Structure representing a trace buffer + */ +struct pvr_fw_trace_buffer { + /** @buf_obj: FW buffer object representing trace buffer. */ + struct pvr_fw_object *buf_obj; + + /** @buf: Pointer to CPU mapping of trace buffer. */ + u32 *buf; + + /** + * @tracebuf_space: Pointer to FW tracebuf_space structure for this + * trace buffer. + */ + struct rogue_fwif_tracebuf_space *tracebuf_space; +}; + +/** + * struct pvr_fw_trace - Device firmware trace data + */ +struct pvr_fw_trace { + /** + * @tracebuf_ctrl_obj: Object representing FW trace buffer control + * structure. + */ + struct pvr_fw_object *tracebuf_ctrl_obj; + + /** + * @tracebuf_ctrl: Pointer to CPU mapping of FW trace buffer control + * structure. + */ + struct rogue_fwif_tracebuf *tracebuf_ctrl; + + /** + * @buffers: Array representing the actual trace buffers owned by this + * device. + */ + struct pvr_fw_trace_buffer buffers[ROGUE_FW_THREAD_MAX]; + + /** @group_mask: Mask of enabled trace groups. */ + u32 group_mask; +}; + +int pvr_fw_trace_init(struct pvr_device *pvr_dev); +void pvr_fw_trace_fini(struct pvr_device *pvr_dev); + +#if defined(CONFIG_DEBUG_FS) +/* Forward declaration from <linux/dcache.h>. */ +struct dentry; + +void pvr_fw_trace_mask_update(struct pvr_device *pvr_dev, u32 old_mask, + u32 new_mask); + +void pvr_fw_trace_debugfs_init(struct pvr_device *pvr_dev, struct dentry *dir); +#endif /* defined(CONFIG_DEBUG_FS) */ + +#endif /* PVR_FW_TRACE_H */ diff --git a/drivers/gpu/drm/imagination/pvr_gem.c b/drivers/gpu/drm/imagination/pvr_gem.c new file mode 100644 index 000000000000..6a8c81fe8c1e --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_gem.c @@ -0,0 +1,414 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_device.h" +#include "pvr_gem.h" +#include "pvr_vm.h" + +#include <drm/drm_gem.h> +#include <drm/drm_prime.h> + +#include <linux/compiler.h> +#include <linux/compiler_attributes.h> +#include <linux/dma-buf.h> +#include <linux/dma-direction.h> +#include <linux/dma-mapping.h> +#include <linux/err.h> +#include <linux/gfp.h> +#include <linux/iosys-map.h> +#include <linux/log2.h> +#include <linux/mutex.h> +#include <linux/pagemap.h> +#include <linux/refcount.h> +#include <linux/scatterlist.h> + +static void pvr_gem_object_free(struct drm_gem_object *obj) +{ + drm_gem_shmem_object_free(obj); +} + +static int pvr_gem_mmap(struct drm_gem_object *gem_obj, struct vm_area_struct *vma) +{ + struct pvr_gem_object *pvr_obj = gem_to_pvr_gem(gem_obj); + struct drm_gem_shmem_object *shmem_obj = shmem_gem_from_pvr_gem(pvr_obj); + + if (!(pvr_obj->flags & DRM_PVR_BO_ALLOW_CPU_USERSPACE_ACCESS)) + return -EINVAL; + + return drm_gem_shmem_mmap(shmem_obj, vma); +} + +static const struct drm_gem_object_funcs pvr_gem_object_funcs = { + .free = pvr_gem_object_free, + .print_info = drm_gem_shmem_object_print_info, + .pin = drm_gem_shmem_object_pin, + .unpin = drm_gem_shmem_object_unpin, + .get_sg_table = drm_gem_shmem_object_get_sg_table, + .vmap = drm_gem_shmem_object_vmap, + .vunmap = drm_gem_shmem_object_vunmap, + .mmap = pvr_gem_mmap, + .vm_ops = &drm_gem_shmem_vm_ops, +}; + +/** + * pvr_gem_object_flags_validate() - Verify that a collection of PowerVR GEM + * mapping and/or creation flags form a valid combination. + * @flags: PowerVR GEM mapping/creation flags to validate. + * + * This function explicitly allows kernel-only flags. All ioctl entrypoints + * should do their own validation as well as relying on this function. + * + * Return: + * * %true if @flags contains valid mapping and/or creation flags, or + * * %false otherwise. + */ +static bool +pvr_gem_object_flags_validate(u64 flags) +{ + static const u64 invalid_combinations[] = { + /* + * Memory flagged as PM/FW-protected cannot be mapped to + * userspace. To make this explicit, we require that the two + * flags allowing each of these respective features are never + * specified together. + */ + (DRM_PVR_BO_PM_FW_PROTECT | + DRM_PVR_BO_ALLOW_CPU_USERSPACE_ACCESS), + }; + + int i; + + /* + * Check for bits set in undefined regions. Reserved regions refer to + * options that can only be set by the kernel. These are explicitly + * allowed in most cases, and must be checked specifically in IOCTL + * callback code. + */ + if ((flags & PVR_BO_UNDEFINED_MASK) != 0) + return false; + + /* + * Check for all combinations of flags marked as invalid in the array + * above. + */ + for (i = 0; i < ARRAY_SIZE(invalid_combinations); ++i) { + u64 combo = invalid_combinations[i]; + + if ((flags & combo) == combo) + return false; + } + + return true; +} + +/** + * pvr_gem_object_into_handle() - Convert a reference to an object into a + * userspace-accessible handle. + * @pvr_obj: [IN] Target PowerVR-specific object. + * @pvr_file: [IN] File to associate the handle with. + * @handle: [OUT] Pointer to store the created handle in. Remains unmodified if + * an error is encountered. + * + * If an error is encountered, ownership of @pvr_obj will not have been + * transferred. If this function succeeds, however, further use of @pvr_obj is + * considered undefined behaviour unless another reference to it is explicitly + * held. + * + * Return: + * * 0 on success, or + * * Any error encountered while attempting to allocate a handle on @pvr_file. + */ +int +pvr_gem_object_into_handle(struct pvr_gem_object *pvr_obj, + struct pvr_file *pvr_file, u32 *handle) +{ + struct drm_gem_object *gem_obj = gem_from_pvr_gem(pvr_obj); + struct drm_file *file = from_pvr_file(pvr_file); + + u32 new_handle; + int err; + + err = drm_gem_handle_create(file, gem_obj, &new_handle); + if (err) + return err; + + /* + * Release our reference to @pvr_obj, effectively transferring + * ownership to the handle. + */ + pvr_gem_object_put(pvr_obj); + + /* + * Do not store the new handle in @handle until no more errors can + * occur. + */ + *handle = new_handle; + + return 0; +} + +/** + * pvr_gem_object_from_handle() - Obtain a reference to an object from a + * userspace handle. + * @pvr_file: PowerVR-specific file to which @handle is associated. + * @handle: Userspace handle referencing the target object. + * + * On return, @handle always maintains its reference to the requested object + * (if it had one in the first place). If this function succeeds, the returned + * object will hold an additional reference. When the caller is finished with + * the returned object, they should call pvr_gem_object_put() on it to release + * this reference. + * + * Return: + * * A pointer to the requested PowerVR-specific object on success, or + * * %NULL otherwise. + */ +struct pvr_gem_object * +pvr_gem_object_from_handle(struct pvr_file *pvr_file, u32 handle) +{ + struct drm_file *file = from_pvr_file(pvr_file); + struct drm_gem_object *gem_obj; + + gem_obj = drm_gem_object_lookup(file, handle); + if (!gem_obj) + return NULL; + + return gem_to_pvr_gem(gem_obj); +} + +/** + * pvr_gem_object_vmap() - Map a PowerVR GEM object into CPU virtual address + * space. + * @pvr_obj: Target PowerVR GEM object. + * + * Once the caller is finished with the CPU mapping, they must call + * pvr_gem_object_vunmap() on @pvr_obj. + * + * If @pvr_obj is CPU-cached, dma_sync_sgtable_for_cpu() is called to make + * sure the CPU mapping is consistent. + * + * Return: + * * A pointer to the CPU mapping on success, + * * -%ENOMEM if the mapping fails, or + * * Any error encountered while attempting to acquire a reference to the + * backing pages for @pvr_obj. + */ +void * +pvr_gem_object_vmap(struct pvr_gem_object *pvr_obj) +{ + struct drm_gem_shmem_object *shmem_obj = shmem_gem_from_pvr_gem(pvr_obj); + struct drm_gem_object *obj = gem_from_pvr_gem(pvr_obj); + struct iosys_map map; + int err; + + dma_resv_lock(obj->resv, NULL); + + err = drm_gem_shmem_vmap(shmem_obj, &map); + if (err) + goto err_unlock; + + if (pvr_obj->flags & PVR_BO_CPU_CACHED) { + struct device *dev = shmem_obj->base.dev->dev; + + /* If shmem_obj->sgt is NULL, that means the buffer hasn't been mapped + * in GPU space yet. + */ + if (shmem_obj->sgt) + dma_sync_sgtable_for_cpu(dev, shmem_obj->sgt, DMA_BIDIRECTIONAL); + } + + dma_resv_unlock(obj->resv); + + return map.vaddr; + +err_unlock: + dma_resv_unlock(obj->resv); + + return ERR_PTR(err); +} + +/** + * pvr_gem_object_vunmap() - Unmap a PowerVR memory object from CPU virtual + * address space. + * @pvr_obj: Target PowerVR GEM object. + * + * If @pvr_obj is CPU-cached, dma_sync_sgtable_for_device() is called to make + * sure the GPU mapping is consistent. + */ +void +pvr_gem_object_vunmap(struct pvr_gem_object *pvr_obj) +{ + struct drm_gem_shmem_object *shmem_obj = shmem_gem_from_pvr_gem(pvr_obj); + struct iosys_map map = IOSYS_MAP_INIT_VADDR(shmem_obj->vaddr); + struct drm_gem_object *obj = gem_from_pvr_gem(pvr_obj); + + if (WARN_ON(!map.vaddr)) + return; + + dma_resv_lock(obj->resv, NULL); + + if (pvr_obj->flags & PVR_BO_CPU_CACHED) { + struct device *dev = shmem_obj->base.dev->dev; + + /* If shmem_obj->sgt is NULL, that means the buffer hasn't been mapped + * in GPU space yet. + */ + if (shmem_obj->sgt) + dma_sync_sgtable_for_device(dev, shmem_obj->sgt, DMA_BIDIRECTIONAL); + } + + drm_gem_shmem_vunmap(shmem_obj, &map); + + dma_resv_unlock(obj->resv); +} + +/** + * pvr_gem_object_zero() - Zeroes the physical memory behind an object. + * @pvr_obj: Target PowerVR GEM object. + * + * Return: + * * 0 on success, or + * * Any error encountered while attempting to map @pvr_obj to the CPU (see + * pvr_gem_object_vmap()). + */ +static int +pvr_gem_object_zero(struct pvr_gem_object *pvr_obj) +{ + void *cpu_ptr; + + cpu_ptr = pvr_gem_object_vmap(pvr_obj); + if (IS_ERR(cpu_ptr)) + return PTR_ERR(cpu_ptr); + + memset(cpu_ptr, 0, pvr_gem_object_size(pvr_obj)); + + /* Make sure the zero-ing is done before vumap-ing the object. */ + wmb(); + + pvr_gem_object_vunmap(pvr_obj); + + return 0; +} + +/** + * pvr_gem_create_object() - Allocate and pre-initializes a pvr_gem_object + * @drm_dev: DRM device creating this object. + * @size: Size of the object to allocate in bytes. + * + * Return: + * * The new pre-initialized GEM object on success, + * * -ENOMEM if the allocation failed. + */ +struct drm_gem_object *pvr_gem_create_object(struct drm_device *drm_dev, size_t size) +{ + struct drm_gem_object *gem_obj; + struct pvr_gem_object *pvr_obj; + + pvr_obj = kzalloc(sizeof(*pvr_obj), GFP_KERNEL); + if (!pvr_obj) + return ERR_PTR(-ENOMEM); + + gem_obj = gem_from_pvr_gem(pvr_obj); + gem_obj->funcs = &pvr_gem_object_funcs; + + return gem_obj; +} + +/** + * pvr_gem_object_create() - Creates a PowerVR-specific buffer object. + * @pvr_dev: Target PowerVR device. + * @size: Size of the object to allocate in bytes. Must be greater than zero. + * Any value which is not an exact multiple of the system page size will be + * rounded up to satisfy this condition. + * @flags: Options which affect both this operation and future mapping + * operations performed on the returned object. Must be a combination of + * DRM_PVR_BO_* and/or PVR_BO_* flags. + * + * The created object may be larger than @size, but can never be smaller. To + * get the exact size, call pvr_gem_object_size() on the returned pointer. + * + * Return: + * * The newly-minted PowerVR-specific buffer object on success, + * * -%EINVAL if @size is zero or @flags is not valid, + * * -%ENOMEM if sufficient physical memory cannot be allocated, or + * * Any other error returned by drm_gem_create_mmap_offset(). + */ +struct pvr_gem_object * +pvr_gem_object_create(struct pvr_device *pvr_dev, size_t size, u64 flags) +{ + struct drm_gem_shmem_object *shmem_obj; + struct pvr_gem_object *pvr_obj; + struct sg_table *sgt; + int err; + + /* Verify @size and @flags before continuing. */ + if (size == 0 || !pvr_gem_object_flags_validate(flags)) + return ERR_PTR(-EINVAL); + + shmem_obj = drm_gem_shmem_create(from_pvr_device(pvr_dev), size); + if (IS_ERR(shmem_obj)) + return ERR_CAST(shmem_obj); + + shmem_obj->pages_mark_dirty_on_put = true; + shmem_obj->map_wc = !(flags & PVR_BO_CPU_CACHED); + pvr_obj = shmem_gem_to_pvr_gem(shmem_obj); + pvr_obj->flags = flags; + + sgt = drm_gem_shmem_get_pages_sgt(shmem_obj); + if (IS_ERR(sgt)) { + err = PTR_ERR(sgt); + goto err_shmem_object_free; + } + + dma_sync_sgtable_for_device(shmem_obj->base.dev->dev, sgt, + DMA_BIDIRECTIONAL); + + /* + * Do this last because pvr_gem_object_zero() requires a fully + * configured instance of struct pvr_gem_object. + */ + pvr_gem_object_zero(pvr_obj); + + return pvr_obj; + +err_shmem_object_free: + drm_gem_shmem_free(shmem_obj); + + return ERR_PTR(err); +} + +/** + * pvr_gem_get_dma_addr() - Get DMA address for given offset in object + * @pvr_obj: Pointer to object to lookup address in. + * @offset: Offset within object to lookup address at. + * @dma_addr_out: Pointer to location to store DMA address. + * + * Returns: + * * 0 on success, or + * * -%EINVAL if object is not currently backed, or if @offset is out of valid + * range for this object. + */ +int +pvr_gem_get_dma_addr(struct pvr_gem_object *pvr_obj, u32 offset, + dma_addr_t *dma_addr_out) +{ + struct drm_gem_shmem_object *shmem_obj = shmem_gem_from_pvr_gem(pvr_obj); + u32 accumulated_offset = 0; + struct scatterlist *sgl; + unsigned int sgt_idx; + + WARN_ON(!shmem_obj->sgt); + for_each_sgtable_dma_sg(shmem_obj->sgt, sgl, sgt_idx) { + u32 new_offset = accumulated_offset + sg_dma_len(sgl); + + if (offset >= accumulated_offset && offset < new_offset) { + *dma_addr_out = sg_dma_address(sgl) + + (offset - accumulated_offset); + return 0; + } + + accumulated_offset = new_offset; + } + + return -EINVAL; +} diff --git a/drivers/gpu/drm/imagination/pvr_gem.h b/drivers/gpu/drm/imagination/pvr_gem.h new file mode 100644 index 000000000000..e0e5ea509a2e --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_gem.h @@ -0,0 +1,170 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_GEM_H +#define PVR_GEM_H + +#include "pvr_rogue_heap_config.h" +#include "pvr_rogue_meta.h" + +#include <uapi/drm/pvr_drm.h> + +#include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_mm.h> + +#include <linux/bitfield.h> +#include <linux/bits.h> +#include <linux/const.h> +#include <linux/compiler_attributes.h> +#include <linux/kernel.h> +#include <linux/mutex.h> +#include <linux/refcount.h> +#include <linux/scatterlist.h> +#include <linux/sizes.h> +#include <linux/types.h> + +/* Forward declaration from "pvr_device.h". */ +struct pvr_device; +struct pvr_file; + +/** + * DOC: Flags for DRM_IOCTL_PVR_CREATE_BO (kernel-only) + * + * Kernel-only values allowed in &pvr_gem_object->flags. The majority of options + * for this field are specified in the UAPI header "pvr_drm.h" with a + * DRM_PVR_BO_ prefix. To distinguish these internal options (which must exist + * in ranges marked as "reserved" in the UAPI header), we drop the DRM prefix. + * The public options should be used directly, DRM prefix and all. + * + * To avoid potentially confusing gaps in the UAPI options, these kernel-only + * options are specified "in reverse", starting at bit 63. + * + * We use "reserved" to refer to bits defined here and not exposed in the UAPI. + * Bits not defined anywhere are "undefined". + * + * CPU mapping options + * :PVR_BO_CPU_CACHED: By default, all GEM objects are mapped write-combined on the CPU. Set this + * flag to override this behaviour and map the object cached. + * + * Firmware options + * :PVR_BO_FW_NO_CLEAR_ON_RESET: By default, all FW objects are cleared and reinitialised on hard + * reset. Set this flag to override this behaviour and preserve buffer contents on reset. + */ +#define PVR_BO_CPU_CACHED BIT_ULL(63) + +#define PVR_BO_FW_NO_CLEAR_ON_RESET BIT_ULL(62) + +#define PVR_BO_KERNEL_FLAGS_MASK (PVR_BO_CPU_CACHED | PVR_BO_FW_NO_CLEAR_ON_RESET) + +/* Bits 61..3 are undefined. */ +/* Bits 2..0 are defined in the UAPI. */ + +/* Other utilities. */ +#define PVR_BO_UNDEFINED_MASK ~(PVR_BO_KERNEL_FLAGS_MASK | DRM_PVR_BO_FLAGS_MASK) + +/* + * All firmware-mapped memory uses (mostly) the same flags. Specifically, + * firmware-mapped memory should be: + * * Read/write on the device, + * * Read/write on the CPU, and + * * Write-combined on the CPU. + * + * The only variation is in caching on the device. + */ +#define PVR_BO_FW_FLAGS_DEVICE_CACHED (ULL(0)) +#define PVR_BO_FW_FLAGS_DEVICE_UNCACHED DRM_PVR_BO_BYPASS_DEVICE_CACHE + +/** + * struct pvr_gem_object - powervr-specific wrapper for &struct drm_gem_object + */ +struct pvr_gem_object { + /** + * @base: The underlying &struct drm_gem_shmem_object. + * + * Do not access this member directly, instead call + * shem_gem_from_pvr_gem(). + */ + struct drm_gem_shmem_object base; + + /** + * @flags: Options set at creation-time. Some of these options apply to + * the creation operation itself (which are stored here for reference) + * with the remainder used for mapping options to both the device and + * CPU. These are used every time this object is mapped, but may be + * changed after creation. + * + * Must be a combination of DRM_PVR_BO_* and/or PVR_BO_* flags. + * + * .. note:: + * + * This member is declared const to indicate that none of these + * options may change or be changed throughout the object's + * lifetime. + */ + u64 flags; + +}; + +static_assert(offsetof(struct pvr_gem_object, base) == 0, + "offsetof(struct pvr_gem_object, base) not zero"); + +#define shmem_gem_from_pvr_gem(pvr_obj) (&(pvr_obj)->base) + +#define shmem_gem_to_pvr_gem(shmem_obj) container_of_const(shmem_obj, struct pvr_gem_object, base) + +#define gem_from_pvr_gem(pvr_obj) (&(pvr_obj)->base.base) + +#define gem_to_pvr_gem(gem_obj) container_of_const(gem_obj, struct pvr_gem_object, base.base) + +/* Functions defined in pvr_gem.c */ + +struct drm_gem_object *pvr_gem_create_object(struct drm_device *drm_dev, size_t size); + +struct pvr_gem_object *pvr_gem_object_create(struct pvr_device *pvr_dev, + size_t size, u64 flags); + +int pvr_gem_object_into_handle(struct pvr_gem_object *pvr_obj, + struct pvr_file *pvr_file, u32 *handle); +struct pvr_gem_object *pvr_gem_object_from_handle(struct pvr_file *pvr_file, + u32 handle); + +static __always_inline struct sg_table * +pvr_gem_object_get_pages_sgt(struct pvr_gem_object *pvr_obj) +{ + return drm_gem_shmem_get_pages_sgt(shmem_gem_from_pvr_gem(pvr_obj)); +} + +void *pvr_gem_object_vmap(struct pvr_gem_object *pvr_obj); +void pvr_gem_object_vunmap(struct pvr_gem_object *pvr_obj); + +int pvr_gem_get_dma_addr(struct pvr_gem_object *pvr_obj, u32 offset, + dma_addr_t *dma_addr_out); + +/** + * pvr_gem_object_get() - Acquire reference on pvr_gem_object + * @pvr_obj: Pointer to object to acquire reference on. + */ +static __always_inline void +pvr_gem_object_get(struct pvr_gem_object *pvr_obj) +{ + drm_gem_object_get(gem_from_pvr_gem(pvr_obj)); +} + +/** + * pvr_gem_object_put() - Release reference on pvr_gem_object + * @pvr_obj: Pointer to object to release reference on. + */ +static __always_inline void +pvr_gem_object_put(struct pvr_gem_object *pvr_obj) +{ + drm_gem_object_put(gem_from_pvr_gem(pvr_obj)); +} + +static __always_inline size_t +pvr_gem_object_size(struct pvr_gem_object *pvr_obj) +{ + return gem_from_pvr_gem(pvr_obj)->size; +} + +#endif /* PVR_GEM_H */ diff --git a/drivers/gpu/drm/imagination/pvr_hwrt.c b/drivers/gpu/drm/imagination/pvr_hwrt.c new file mode 100644 index 000000000000..54f88d6c01e5 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_hwrt.c @@ -0,0 +1,550 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_free_list.h" +#include "pvr_hwrt.h" +#include "pvr_gem.h" +#include "pvr_rogue_cr_defs_client.h" +#include "pvr_rogue_fwif.h" + +#include <drm/drm_gem.h> +#include <linux/bitops.h> +#include <linux/math.h> +#include <linux/slab.h> +#include <linux/xarray.h> +#include <uapi/drm/pvr_drm.h> + +static_assert(ROGUE_FWIF_NUM_RTDATAS == 2); +static_assert(ROGUE_FWIF_NUM_GEOMDATAS == 1); +static_assert(ROGUE_FWIF_NUM_RTDATA_FREELISTS == 2); + +/* + * struct pvr_rt_mtile_info - Render target macrotile information + */ +struct pvr_rt_mtile_info { + u32 mtile_x[3]; + u32 mtile_y[3]; + u32 tile_max_x; + u32 tile_max_y; + u32 tile_size_x; + u32 tile_size_y; + u32 num_tiles_x; + u32 num_tiles_y; +}; + +/* Size of Shadow Render Target Cache entry */ +#define SRTC_ENTRY_SIZE sizeof(u32) +/* Size of Renders Accumulation Array entry */ +#define RAA_ENTRY_SIZE sizeof(u32) + +static int +hwrt_init_kernel_structure(struct pvr_file *pvr_file, + struct drm_pvr_ioctl_create_hwrt_dataset_args *args, + struct pvr_hwrt_dataset *hwrt) +{ + struct pvr_device *pvr_dev = pvr_file->pvr_dev; + int err; + int i; + + hwrt->pvr_dev = pvr_dev; + hwrt->max_rts = args->layers; + + /* Get pointers to the free lists */ + for (i = 0; i < ARRAY_SIZE(hwrt->free_lists); i++) { + hwrt->free_lists[i] = pvr_free_list_lookup(pvr_file, args->free_list_handles[i]); + if (!hwrt->free_lists[i]) { + err = -EINVAL; + goto err_put_free_lists; + } + } + + if (hwrt->free_lists[ROGUE_FW_LOCAL_FREELIST]->current_pages < + pvr_get_free_list_min_pages(pvr_dev)) { + err = -EINVAL; + goto err_put_free_lists; + } + + return 0; + +err_put_free_lists: + for (i = 0; i < ARRAY_SIZE(hwrt->free_lists); i++) { + pvr_free_list_put(hwrt->free_lists[i]); + hwrt->free_lists[i] = NULL; + } + + return err; +} + +static void +hwrt_fini_kernel_structure(struct pvr_hwrt_dataset *hwrt) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(hwrt->free_lists); i++) { + pvr_free_list_put(hwrt->free_lists[i]); + hwrt->free_lists[i] = NULL; + } +} + +static void +hwrt_fini_common_fw_structure(struct pvr_hwrt_dataset *hwrt) +{ + pvr_fw_object_destroy(hwrt->common_fw_obj); +} + +static int +get_cr_isp_mtile_size_val(struct pvr_device *pvr_dev, u32 samples, + struct pvr_rt_mtile_info *info, u32 *value_out) +{ + u32 x = info->mtile_x[0]; + u32 y = info->mtile_y[0]; + u32 samples_per_pixel; + int err; + + err = PVR_FEATURE_VALUE(pvr_dev, isp_samples_per_pixel, &samples_per_pixel); + if (err) + return err; + + if (samples_per_pixel == 1) { + if (samples >= 4) + x <<= 1; + if (samples >= 2) + y <<= 1; + } else if (samples_per_pixel == 2) { + if (samples >= 8) + x <<= 1; + if (samples >= 4) + y <<= 1; + } else if (samples_per_pixel == 4) { + if (samples >= 8) + y <<= 1; + } else { + WARN(true, "Unsupported ISP samples per pixel value"); + return -EINVAL; + } + + *value_out = ((x << ROGUE_CR_ISP_MTILE_SIZE_X_SHIFT) & ~ROGUE_CR_ISP_MTILE_SIZE_X_CLRMSK) | + ((y << ROGUE_CR_ISP_MTILE_SIZE_Y_SHIFT) & ~ROGUE_CR_ISP_MTILE_SIZE_Y_CLRMSK); + + return 0; +} + +static int +get_cr_multisamplectl_val(u32 samples, bool y_flip, u64 *value_out) +{ + static const struct { + u8 x[8]; + u8 y[8]; + } sample_positions[4] = { + /* 1 sample */ + { + .x = { 8 }, + .y = { 8 }, + }, + /* 2 samples */ + { + .x = { 12, 4 }, + .y = { 12, 4 }, + }, + /* 4 samples */ + { + .x = { 6, 14, 2, 10 }, + .y = { 2, 6, 10, 14 }, + }, + /* 8 samples */ + { + .x = { 9, 7, 13, 5, 3, 1, 11, 15 }, + .y = { 5, 11, 9, 3, 13, 7, 15, 1 }, + }, + }; + const int idx = fls(samples) - 1; + u64 value = 0; + + if (idx < 0 || idx > 3) + return -EINVAL; + + for (u32 i = 0; i < 8; i++) { + value |= ((u64)sample_positions[idx].x[i]) << (i * 8); + if (y_flip) + value |= (((u64)(16 - sample_positions[idx].y[i]) & 0xf)) << (i * 8 + 4); + else + value |= ((u64)sample_positions[idx].y[i]) << (i * 8 + 4); + } + + *value_out = value; + + return 0; +} + +static int +get_cr_te_aa_val(struct pvr_device *pvr_dev, u32 samples, u32 *value_out) +{ + u32 samples_per_pixel; + u32 value = 0; + int err = 0; + + err = PVR_FEATURE_VALUE(pvr_dev, isp_samples_per_pixel, &samples_per_pixel); + if (err) + return err; + + switch (samples_per_pixel) { + case 1: + if (samples >= 2) + value |= ROGUE_CR_TE_AA_Y_EN; + if (samples >= 4) + value |= ROGUE_CR_TE_AA_X_EN; + break; + case 2: + if (samples >= 2) + value |= ROGUE_CR_TE_AA_X2_EN; + if (samples >= 4) + value |= ROGUE_CR_TE_AA_Y_EN; + if (samples >= 8) + value |= ROGUE_CR_TE_AA_X_EN; + break; + case 4: + if (samples >= 2) + value |= ROGUE_CR_TE_AA_X2_EN; + if (samples >= 4) + value |= ROGUE_CR_TE_AA_Y2_EN; + if (samples >= 8) + value |= ROGUE_CR_TE_AA_Y_EN; + break; + default: + WARN(true, "Unsupported ISP samples per pixel value"); + return -EINVAL; + } + + *value_out = value; + + return 0; +} + +static void +hwrtdata_common_init(void *cpu_ptr, void *priv) +{ + struct pvr_hwrt_dataset *hwrt = priv; + + memcpy(cpu_ptr, &hwrt->common, sizeof(hwrt->common)); +} + +static int +hwrt_init_common_fw_structure(struct pvr_file *pvr_file, + struct drm_pvr_ioctl_create_hwrt_dataset_args *args, + struct pvr_hwrt_dataset *hwrt) +{ + struct drm_pvr_create_hwrt_geom_data_args *geom_data_args = &args->geom_data_args; + struct pvr_device *pvr_dev = pvr_file->pvr_dev; + struct pvr_rt_mtile_info info; + int err; + + err = PVR_FEATURE_VALUE(pvr_dev, tile_size_x, &info.tile_size_x); + if (WARN_ON(err)) + return err; + + err = PVR_FEATURE_VALUE(pvr_dev, tile_size_y, &info.tile_size_y); + if (WARN_ON(err)) + return err; + + info.num_tiles_x = DIV_ROUND_UP(args->width, info.tile_size_x); + info.num_tiles_y = DIV_ROUND_UP(args->height, info.tile_size_y); + + if (PVR_HAS_FEATURE(pvr_dev, simple_parameter_format_version)) { + u32 parameter_format; + + err = PVR_FEATURE_VALUE(pvr_dev, simple_parameter_format_version, + ¶meter_format); + if (WARN_ON(err)) + return err; + + WARN_ON(parameter_format != 2); + + /* + * Set up 16 macrotiles with a multiple of 2x2 tiles per macrotile, which is + * aligned to a tile group. + */ + info.mtile_x[0] = DIV_ROUND_UP(info.num_tiles_x, 8) * 2; + info.mtile_y[0] = DIV_ROUND_UP(info.num_tiles_y, 8) * 2; + info.mtile_x[1] = 0; + info.mtile_y[1] = 0; + info.mtile_x[2] = 0; + info.mtile_y[2] = 0; + info.tile_max_x = round_up(info.num_tiles_x, 2) - 1; + info.tile_max_y = round_up(info.num_tiles_y, 2) - 1; + } else { + /* Set up 16 macrotiles with a multiple of 4x4 tiles per macrotile. */ + info.mtile_x[0] = round_up(DIV_ROUND_UP(info.num_tiles_x, 4), 4); + info.mtile_y[0] = round_up(DIV_ROUND_UP(info.num_tiles_y, 4), 4); + info.mtile_x[1] = info.mtile_x[0] * 2; + info.mtile_y[1] = info.mtile_y[0] * 2; + info.mtile_x[2] = info.mtile_x[0] * 3; + info.mtile_y[2] = info.mtile_y[0] * 3; + info.tile_max_x = info.num_tiles_x - 1; + info.tile_max_y = info.num_tiles_y - 1; + } + + hwrt->common.geom_caches_need_zeroing = false; + + hwrt->common.isp_merge_lower_x = args->isp_merge_lower_x; + hwrt->common.isp_merge_lower_y = args->isp_merge_lower_y; + hwrt->common.isp_merge_upper_x = args->isp_merge_upper_x; + hwrt->common.isp_merge_upper_y = args->isp_merge_upper_y; + hwrt->common.isp_merge_scale_x = args->isp_merge_scale_x; + hwrt->common.isp_merge_scale_y = args->isp_merge_scale_y; + + err = get_cr_multisamplectl_val(args->samples, false, + &hwrt->common.multi_sample_ctl); + if (err) + return err; + + err = get_cr_multisamplectl_val(args->samples, true, + &hwrt->common.flipped_multi_sample_ctl); + if (err) + return err; + + hwrt->common.mtile_stride = info.mtile_x[0] * info.mtile_y[0]; + + err = get_cr_te_aa_val(pvr_dev, args->samples, &hwrt->common.teaa); + if (err) + return err; + + hwrt->common.screen_pixel_max = + (((args->width - 1) << ROGUE_CR_PPP_SCREEN_PIXXMAX_SHIFT) & + ~ROGUE_CR_PPP_SCREEN_PIXXMAX_CLRMSK) | + (((args->height - 1) << ROGUE_CR_PPP_SCREEN_PIXYMAX_SHIFT) & + ~ROGUE_CR_PPP_SCREEN_PIXYMAX_CLRMSK); + + hwrt->common.te_screen = + ((info.tile_max_x << ROGUE_CR_TE_SCREEN_XMAX_SHIFT) & + ~ROGUE_CR_TE_SCREEN_XMAX_CLRMSK) | + ((info.tile_max_y << ROGUE_CR_TE_SCREEN_YMAX_SHIFT) & + ~ROGUE_CR_TE_SCREEN_YMAX_CLRMSK); + hwrt->common.te_mtile1 = + ((info.mtile_x[0] << ROGUE_CR_TE_MTILE1_X1_SHIFT) & ~ROGUE_CR_TE_MTILE1_X1_CLRMSK) | + ((info.mtile_x[1] << ROGUE_CR_TE_MTILE1_X2_SHIFT) & ~ROGUE_CR_TE_MTILE1_X2_CLRMSK) | + ((info.mtile_x[2] << ROGUE_CR_TE_MTILE1_X3_SHIFT) & ~ROGUE_CR_TE_MTILE1_X3_CLRMSK); + hwrt->common.te_mtile2 = + ((info.mtile_y[0] << ROGUE_CR_TE_MTILE2_Y1_SHIFT) & ~ROGUE_CR_TE_MTILE2_Y1_CLRMSK) | + ((info.mtile_y[1] << ROGUE_CR_TE_MTILE2_Y2_SHIFT) & ~ROGUE_CR_TE_MTILE2_Y2_CLRMSK) | + ((info.mtile_y[2] << ROGUE_CR_TE_MTILE2_Y3_SHIFT) & ~ROGUE_CR_TE_MTILE2_Y3_CLRMSK); + + err = get_cr_isp_mtile_size_val(pvr_dev, args->samples, &info, + &hwrt->common.isp_mtile_size); + if (err) + return err; + + hwrt->common.tpc_stride = geom_data_args->tpc_stride; + hwrt->common.tpc_size = geom_data_args->tpc_size; + + hwrt->common.rgn_header_size = args->region_header_size; + + err = pvr_fw_object_create(pvr_dev, sizeof(struct rogue_fwif_hwrtdata_common), + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, hwrtdata_common_init, hwrt, + &hwrt->common_fw_obj); + + return err; +} + +static void +hwrt_fw_data_init(void *cpu_ptr, void *priv) +{ + struct pvr_hwrt_data *hwrt_data = priv; + + memcpy(cpu_ptr, &hwrt_data->data, sizeof(hwrt_data->data)); +} + +static int +hwrt_data_init_fw_structure(struct pvr_file *pvr_file, + struct pvr_hwrt_dataset *hwrt, + struct drm_pvr_ioctl_create_hwrt_dataset_args *args, + struct drm_pvr_create_hwrt_rt_data_args *rt_data_args, + struct pvr_hwrt_data *hwrt_data) +{ + struct drm_pvr_create_hwrt_geom_data_args *geom_data_args = &args->geom_data_args; + struct pvr_device *pvr_dev = pvr_file->pvr_dev; + struct rogue_fwif_rta_ctl *rta_ctl; + int free_list_i; + int err; + + pvr_fw_object_get_fw_addr(hwrt->common_fw_obj, + &hwrt_data->data.hwrt_data_common_fw_addr); + + for (free_list_i = 0; free_list_i < ARRAY_SIZE(hwrt->free_lists); free_list_i++) { + pvr_fw_object_get_fw_addr(hwrt->free_lists[free_list_i]->fw_obj, + &hwrt_data->data.freelists_fw_addr[free_list_i]); + } + + hwrt_data->data.tail_ptrs_dev_addr = geom_data_args->tpc_dev_addr; + hwrt_data->data.vheap_table_dev_addr = geom_data_args->vheap_table_dev_addr; + hwrt_data->data.rtc_dev_addr = geom_data_args->rtc_dev_addr; + + hwrt_data->data.pm_mlist_dev_addr = rt_data_args->pm_mlist_dev_addr; + hwrt_data->data.macrotile_array_dev_addr = rt_data_args->macrotile_array_dev_addr; + hwrt_data->data.rgn_header_dev_addr = rt_data_args->region_header_dev_addr; + + rta_ctl = &hwrt_data->data.rta_ctl; + + rta_ctl->render_target_index = 0; + rta_ctl->active_render_targets = 0; + rta_ctl->valid_render_targets_fw_addr = 0; + rta_ctl->rta_num_partial_renders_fw_addr = 0; + rta_ctl->max_rts = args->layers; + + if (args->layers > 1) { + err = pvr_fw_object_create(pvr_dev, args->layers * SRTC_ENTRY_SIZE, + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + NULL, NULL, &hwrt_data->srtc_obj); + if (err) + return err; + pvr_fw_object_get_fw_addr(hwrt_data->srtc_obj, + &rta_ctl->valid_render_targets_fw_addr); + + err = pvr_fw_object_create(pvr_dev, args->layers * RAA_ENTRY_SIZE, + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + NULL, NULL, &hwrt_data->raa_obj); + if (err) + goto err_put_shadow_rt_cache; + pvr_fw_object_get_fw_addr(hwrt_data->raa_obj, + &rta_ctl->rta_num_partial_renders_fw_addr); + } + + err = pvr_fw_object_create(pvr_dev, sizeof(struct rogue_fwif_hwrtdata), + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + hwrt_fw_data_init, hwrt_data, &hwrt_data->fw_obj); + if (err) + goto err_put_raa_obj; + + pvr_free_list_add_hwrt(hwrt->free_lists[0], hwrt_data); + + return 0; + +err_put_raa_obj: + if (args->layers > 1) + pvr_fw_object_destroy(hwrt_data->raa_obj); + +err_put_shadow_rt_cache: + if (args->layers > 1) + pvr_fw_object_destroy(hwrt_data->srtc_obj); + + return err; +} + +static void +hwrt_data_fini_fw_structure(struct pvr_hwrt_dataset *hwrt, int hwrt_nr) +{ + struct pvr_hwrt_data *hwrt_data = &hwrt->data[hwrt_nr]; + + pvr_free_list_remove_hwrt(hwrt->free_lists[0], hwrt_data); + + if (hwrt->max_rts > 1) { + pvr_fw_object_destroy(hwrt_data->raa_obj); + pvr_fw_object_destroy(hwrt_data->srtc_obj); + } + + pvr_fw_object_destroy(hwrt_data->fw_obj); +} + +/** + * pvr_hwrt_dataset_create() - Create a new HWRT dataset + * @pvr_file: Pointer to pvr_file structure. + * @args: Creation arguments from userspace. + * + * Return: + * * Pointer to new HWRT, or + * * ERR_PTR(-%ENOMEM) on out of memory. + */ +struct pvr_hwrt_dataset * +pvr_hwrt_dataset_create(struct pvr_file *pvr_file, + struct drm_pvr_ioctl_create_hwrt_dataset_args *args) +{ + struct pvr_hwrt_dataset *hwrt; + int err, i = 0; + + /* Create and fill out the kernel structure */ + hwrt = kzalloc(sizeof(*hwrt), GFP_KERNEL); + + if (!hwrt) + return ERR_PTR(-ENOMEM); + + err = hwrt_init_kernel_structure(pvr_file, args, hwrt); + if (err < 0) + goto err_free; + + err = hwrt_init_common_fw_structure(pvr_file, args, hwrt); + if (err < 0) + goto err_fini_kernel_structure; + + for (; i < ARRAY_SIZE(hwrt->data); i++) { + err = hwrt_data_init_fw_structure(pvr_file, hwrt, args, + &args->rt_data_args[i], + &hwrt->data[i]); + if (err < 0) + goto err_fini_data_structures; + + hwrt->data[i].hwrt_dataset = hwrt; + } + + kref_init(&hwrt->ref_count); + return hwrt; + +err_fini_data_structures: + while (--i >= 0) + hwrt_data_fini_fw_structure(hwrt, i); + +err_fini_kernel_structure: + hwrt_fini_kernel_structure(hwrt); + +err_free: + kfree(hwrt); + + return ERR_PTR(err); +} + +static void +pvr_hwrt_dataset_release(struct kref *ref_count) +{ + struct pvr_hwrt_dataset *hwrt = + container_of(ref_count, struct pvr_hwrt_dataset, ref_count); + + for (int i = ARRAY_SIZE(hwrt->data) - 1; i >= 0; i--) { + WARN_ON(pvr_fw_structure_cleanup(hwrt->pvr_dev, ROGUE_FWIF_CLEANUP_HWRTDATA, + hwrt->data[i].fw_obj, 0)); + hwrt_data_fini_fw_structure(hwrt, i); + } + + hwrt_fini_common_fw_structure(hwrt); + hwrt_fini_kernel_structure(hwrt); + + kfree(hwrt); +} + +/** + * pvr_destroy_hwrt_datasets_for_file: Destroy any HWRT datasets associated + * with the given file. + * @pvr_file: Pointer to pvr_file structure. + * + * Removes all HWRT datasets associated with @pvr_file from the device + * hwrt_dataset list and drops initial references. HWRT datasets will then be + * destroyed once all outstanding references are dropped. + */ +void pvr_destroy_hwrt_datasets_for_file(struct pvr_file *pvr_file) +{ + struct pvr_hwrt_dataset *hwrt; + unsigned long handle; + + xa_for_each(&pvr_file->hwrt_handles, handle, hwrt) { + (void)hwrt; + pvr_hwrt_dataset_put(xa_erase(&pvr_file->hwrt_handles, handle)); + } +} + +/** + * pvr_hwrt_dataset_put() - Release reference on HWRT dataset + * @hwrt: Pointer to HWRT dataset to release reference on + */ +void +pvr_hwrt_dataset_put(struct pvr_hwrt_dataset *hwrt) +{ + if (hwrt) + kref_put(&hwrt->ref_count, pvr_hwrt_dataset_release); +} diff --git a/drivers/gpu/drm/imagination/pvr_hwrt.h b/drivers/gpu/drm/imagination/pvr_hwrt.h new file mode 100644 index 000000000000..676070b20c3b --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_hwrt.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_HWRT_H +#define PVR_HWRT_H + +#include <linux/compiler_attributes.h> +#include <linux/kref.h> +#include <linux/list.h> +#include <linux/types.h> +#include <linux/xarray.h> +#include <uapi/drm/pvr_drm.h> + +#include "pvr_device.h" +#include "pvr_rogue_fwif_shared.h" + +/* Forward declaration from pvr_free_list.h. */ +struct pvr_free_list; + +/* Forward declaration from pvr_gem.h. */ +struct pvr_fw_object; + +/** + * struct pvr_hwrt_data - structure representing HWRT data + */ +struct pvr_hwrt_data { + /** @fw_obj: FW object representing the FW-side structure. */ + struct pvr_fw_object *fw_obj; + + /** @data: Local copy of FW-side structure. */ + struct rogue_fwif_hwrtdata data; + + /** @freelist_node: List node connecting this HWRT to the local freelist. */ + struct list_head freelist_node; + + /** + * @srtc_obj: FW object representing shadow render target cache. + * + * Only valid if @max_rts > 1. + */ + struct pvr_fw_object *srtc_obj; + + /** + * @raa_obj: FW object representing renders accumulation array. + * + * Only valid if @max_rts > 1. + */ + struct pvr_fw_object *raa_obj; + + /** @hwrt_dataset: Back pointer to owning HWRT dataset. */ + struct pvr_hwrt_dataset *hwrt_dataset; +}; + +/** + * struct pvr_hwrt_dataset - structure representing a HWRT data set. + */ +struct pvr_hwrt_dataset { + /** @ref_count: Reference count of object. */ + struct kref ref_count; + + /** @pvr_dev: Pointer to device that owns this object. */ + struct pvr_device *pvr_dev; + + /** @common_fw_obj: FW object representing common FW-side structure. */ + struct pvr_fw_object *common_fw_obj; + + /** @common: Common HWRT data. */ + struct rogue_fwif_hwrtdata_common common; + + /** @data: HWRT data structures belonging to this set. */ + struct pvr_hwrt_data data[ROGUE_FWIF_NUM_RTDATAS]; + + /** @free_lists: Free lists used by HWRT data set. */ + struct pvr_free_list *free_lists[ROGUE_FWIF_NUM_RTDATA_FREELISTS]; + + /** @max_rts: Maximum render targets for this HWRT data set. */ + u16 max_rts; +}; + +struct pvr_hwrt_dataset * +pvr_hwrt_dataset_create(struct pvr_file *pvr_file, + struct drm_pvr_ioctl_create_hwrt_dataset_args *args); + +void +pvr_destroy_hwrt_datasets_for_file(struct pvr_file *pvr_file); + +/** + * pvr_hwrt_dataset_lookup() - Lookup HWRT dataset pointer from handle + * @pvr_file: Pointer to pvr_file structure. + * @handle: Object handle. + * + * Takes reference on dataset object. Call pvr_hwrt_dataset_put() to release. + * + * Returns: + * * The requested object on success, or + * * %NULL on failure (object does not exist in list, or is not a HWRT + * dataset) + */ +static __always_inline struct pvr_hwrt_dataset * +pvr_hwrt_dataset_lookup(struct pvr_file *pvr_file, u32 handle) +{ + struct pvr_hwrt_dataset *hwrt; + + xa_lock(&pvr_file->hwrt_handles); + hwrt = xa_load(&pvr_file->hwrt_handles, handle); + + if (hwrt) + kref_get(&hwrt->ref_count); + + xa_unlock(&pvr_file->hwrt_handles); + + return hwrt; +} + +void +pvr_hwrt_dataset_put(struct pvr_hwrt_dataset *hwrt); + +/** + * pvr_hwrt_data_lookup() - Lookup HWRT data pointer from handle and index + * @pvr_file: Pointer to pvr_file structure. + * @handle: Object handle. + * @index: Index of RT data within dataset. + * + * Takes reference on dataset object. Call pvr_hwrt_data_put() to release. + * + * Returns: + * * The requested object on success, or + * * %NULL on failure (object does not exist in list, or is not a HWRT + * dataset, or index is out of range) + */ +static __always_inline struct pvr_hwrt_data * +pvr_hwrt_data_lookup(struct pvr_file *pvr_file, u32 handle, u32 index) +{ + struct pvr_hwrt_dataset *hwrt_dataset = pvr_hwrt_dataset_lookup(pvr_file, handle); + + if (hwrt_dataset) { + if (index < ARRAY_SIZE(hwrt_dataset->data)) + return &hwrt_dataset->data[index]; + + pvr_hwrt_dataset_put(hwrt_dataset); + } + + return NULL; +} + +/** + * pvr_hwrt_data_put() - Release reference on HWRT data + * @hwrt: Pointer to HWRT data to release reference on + */ +static __always_inline void +pvr_hwrt_data_put(struct pvr_hwrt_data *hwrt) +{ + if (hwrt) + pvr_hwrt_dataset_put(hwrt->hwrt_dataset); +} + +static __always_inline struct pvr_hwrt_data * +pvr_hwrt_data_get(struct pvr_hwrt_data *hwrt) +{ + if (hwrt) + kref_get(&hwrt->hwrt_dataset->ref_count); + + return hwrt; +} + +#endif /* PVR_HWRT_H */ diff --git a/drivers/gpu/drm/imagination/pvr_job.c b/drivers/gpu/drm/imagination/pvr_job.c new file mode 100644 index 000000000000..78c2f3c6dce0 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_job.c @@ -0,0 +1,786 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_context.h" +#include "pvr_device.h" +#include "pvr_drv.h" +#include "pvr_gem.h" +#include "pvr_hwrt.h" +#include "pvr_job.h" +#include "pvr_mmu.h" +#include "pvr_power.h" +#include "pvr_rogue_fwif.h" +#include "pvr_rogue_fwif_client.h" +#include "pvr_stream.h" +#include "pvr_stream_defs.h" +#include "pvr_sync.h" + +#include <drm/drm_exec.h> +#include <drm/drm_gem.h> +#include <linux/types.h> +#include <uapi/drm/pvr_drm.h> + +static void pvr_job_release(struct kref *kref) +{ + struct pvr_job *job = container_of(kref, struct pvr_job, ref_count); + + xa_erase(&job->pvr_dev->job_ids, job->id); + + pvr_hwrt_data_put(job->hwrt); + pvr_context_put(job->ctx); + + WARN_ON(job->paired_job); + + pvr_queue_job_cleanup(job); + pvr_job_release_pm_ref(job); + + kfree(job->cmd); + kfree(job); +} + +/** + * pvr_job_put() - Release reference on job + * @job: Target job. + */ +void +pvr_job_put(struct pvr_job *job) +{ + if (job) + kref_put(&job->ref_count, pvr_job_release); +} + +/** + * pvr_job_process_stream() - Build job FW structure from stream + * @pvr_dev: Device pointer. + * @cmd_defs: Stream definition. + * @stream: Pointer to command stream. + * @stream_size: Size of command stream, in bytes. + * @job: Pointer to job. + * + * Caller is responsible for freeing the output structure. + * + * Returns: + * * 0 on success, + * * -%ENOMEM on out of memory, or + * * -%EINVAL on malformed stream. + */ +static int +pvr_job_process_stream(struct pvr_device *pvr_dev, const struct pvr_stream_cmd_defs *cmd_defs, + void *stream, u32 stream_size, struct pvr_job *job) +{ + int err; + + job->cmd = kzalloc(cmd_defs->dest_size, GFP_KERNEL); + if (!job->cmd) + return -ENOMEM; + + job->cmd_len = cmd_defs->dest_size; + + err = pvr_stream_process(pvr_dev, cmd_defs, stream, stream_size, job->cmd); + if (err) + kfree(job->cmd); + + return err; +} + +static int pvr_fw_cmd_init(struct pvr_device *pvr_dev, struct pvr_job *job, + const struct pvr_stream_cmd_defs *stream_def, + u64 stream_userptr, u32 stream_len) +{ + void *stream; + int err; + + stream = kzalloc(stream_len, GFP_KERNEL); + if (!stream) + return -ENOMEM; + + if (copy_from_user(stream, u64_to_user_ptr(stream_userptr), stream_len)) { + err = -EFAULT; + goto err_free_stream; + } + + err = pvr_job_process_stream(pvr_dev, stream_def, stream, stream_len, job); + +err_free_stream: + kfree(stream); + + return err; +} + +static u32 +convert_geom_flags(u32 in_flags) +{ + u32 out_flags = 0; + + if (in_flags & DRM_PVR_SUBMIT_JOB_GEOM_CMD_FIRST) + out_flags |= ROGUE_GEOM_FLAGS_FIRSTKICK; + if (in_flags & DRM_PVR_SUBMIT_JOB_GEOM_CMD_LAST) + out_flags |= ROGUE_GEOM_FLAGS_LASTKICK; + if (in_flags & DRM_PVR_SUBMIT_JOB_GEOM_CMD_SINGLE_CORE) + out_flags |= ROGUE_GEOM_FLAGS_SINGLE_CORE; + + return out_flags; +} + +static u32 +convert_frag_flags(u32 in_flags) +{ + u32 out_flags = 0; + + if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_SINGLE_CORE) + out_flags |= ROGUE_FRAG_FLAGS_SINGLE_CORE; + if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_DEPTHBUFFER) + out_flags |= ROGUE_FRAG_FLAGS_DEPTHBUFFER; + if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_STENCILBUFFER) + out_flags |= ROGUE_FRAG_FLAGS_STENCILBUFFER; + if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_PREVENT_CDM_OVERLAP) + out_flags |= ROGUE_FRAG_FLAGS_PREVENT_CDM_OVERLAP; + if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_SCRATCHBUFFER) + out_flags |= ROGUE_FRAG_FLAGS_SCRATCHBUFFER; + if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_GET_VIS_RESULTS) + out_flags |= ROGUE_FRAG_FLAGS_GET_VIS_RESULTS; + if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_DISABLE_PIXELMERGE) + out_flags |= ROGUE_FRAG_FLAGS_DISABLE_PIXELMERGE; + + return out_flags; +} + +static int +pvr_geom_job_fw_cmd_init(struct pvr_job *job, + struct drm_pvr_job *args) +{ + struct rogue_fwif_cmd_geom *cmd; + int err; + + if (args->flags & ~DRM_PVR_SUBMIT_JOB_GEOM_CMD_FLAGS_MASK) + return -EINVAL; + + if (job->ctx->type != DRM_PVR_CTX_TYPE_RENDER) + return -EINVAL; + + if (!job->hwrt) + return -EINVAL; + + job->fw_ccb_cmd_type = ROGUE_FWIF_CCB_CMD_TYPE_GEOM; + err = pvr_fw_cmd_init(job->pvr_dev, job, &pvr_cmd_geom_stream, + args->cmd_stream, args->cmd_stream_len); + if (err) + return err; + + cmd = job->cmd; + cmd->cmd_shared.cmn.frame_num = 0; + cmd->flags = convert_geom_flags(args->flags); + pvr_fw_object_get_fw_addr(job->hwrt->fw_obj, &cmd->cmd_shared.hwrt_data_fw_addr); + return 0; +} + +static int +pvr_frag_job_fw_cmd_init(struct pvr_job *job, + struct drm_pvr_job *args) +{ + struct rogue_fwif_cmd_frag *cmd; + int err; + + if (args->flags & ~DRM_PVR_SUBMIT_JOB_FRAG_CMD_FLAGS_MASK) + return -EINVAL; + + if (job->ctx->type != DRM_PVR_CTX_TYPE_RENDER) + return -EINVAL; + + if (!job->hwrt) + return -EINVAL; + + job->fw_ccb_cmd_type = (args->flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_PARTIAL_RENDER) ? + ROGUE_FWIF_CCB_CMD_TYPE_FRAG_PR : + ROGUE_FWIF_CCB_CMD_TYPE_FRAG; + err = pvr_fw_cmd_init(job->pvr_dev, job, &pvr_cmd_frag_stream, + args->cmd_stream, args->cmd_stream_len); + if (err) + return err; + + cmd = job->cmd; + cmd->cmd_shared.cmn.frame_num = 0; + cmd->flags = convert_frag_flags(args->flags); + pvr_fw_object_get_fw_addr(job->hwrt->fw_obj, &cmd->cmd_shared.hwrt_data_fw_addr); + return 0; +} + +static u32 +convert_compute_flags(u32 in_flags) +{ + u32 out_flags = 0; + + if (in_flags & DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_PREVENT_ALL_OVERLAP) + out_flags |= ROGUE_COMPUTE_FLAG_PREVENT_ALL_OVERLAP; + if (in_flags & DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_SINGLE_CORE) + out_flags |= ROGUE_COMPUTE_FLAG_SINGLE_CORE; + + return out_flags; +} + +static int +pvr_compute_job_fw_cmd_init(struct pvr_job *job, + struct drm_pvr_job *args) +{ + struct rogue_fwif_cmd_compute *cmd; + int err; + + if (args->flags & ~DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_FLAGS_MASK) + return -EINVAL; + + if (job->ctx->type != DRM_PVR_CTX_TYPE_COMPUTE) + return -EINVAL; + + job->fw_ccb_cmd_type = ROGUE_FWIF_CCB_CMD_TYPE_CDM; + err = pvr_fw_cmd_init(job->pvr_dev, job, &pvr_cmd_compute_stream, + args->cmd_stream, args->cmd_stream_len); + if (err) + return err; + + cmd = job->cmd; + cmd->common.frame_num = 0; + cmd->flags = convert_compute_flags(args->flags); + return 0; +} + +static u32 +convert_transfer_flags(u32 in_flags) +{ + u32 out_flags = 0; + + if (in_flags & DRM_PVR_SUBMIT_JOB_TRANSFER_CMD_SINGLE_CORE) + out_flags |= ROGUE_TRANSFER_FLAGS_SINGLE_CORE; + + return out_flags; +} + +static int +pvr_transfer_job_fw_cmd_init(struct pvr_job *job, + struct drm_pvr_job *args) +{ + struct rogue_fwif_cmd_transfer *cmd; + int err; + + if (args->flags & ~DRM_PVR_SUBMIT_JOB_TRANSFER_CMD_FLAGS_MASK) + return -EINVAL; + + if (job->ctx->type != DRM_PVR_CTX_TYPE_TRANSFER_FRAG) + return -EINVAL; + + job->fw_ccb_cmd_type = ROGUE_FWIF_CCB_CMD_TYPE_TQ_3D; + err = pvr_fw_cmd_init(job->pvr_dev, job, &pvr_cmd_transfer_stream, + args->cmd_stream, args->cmd_stream_len); + if (err) + return err; + + cmd = job->cmd; + cmd->common.frame_num = 0; + cmd->flags = convert_transfer_flags(args->flags); + return 0; +} + +static int +pvr_job_fw_cmd_init(struct pvr_job *job, + struct drm_pvr_job *args) +{ + switch (args->type) { + case DRM_PVR_JOB_TYPE_GEOMETRY: + return pvr_geom_job_fw_cmd_init(job, args); + + case DRM_PVR_JOB_TYPE_FRAGMENT: + return pvr_frag_job_fw_cmd_init(job, args); + + case DRM_PVR_JOB_TYPE_COMPUTE: + return pvr_compute_job_fw_cmd_init(job, args); + + case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: + return pvr_transfer_job_fw_cmd_init(job, args); + + default: + return -EINVAL; + } +} + +/** + * struct pvr_job_data - Helper container for pairing jobs with the + * sync_ops supplied for them by the user. + */ +struct pvr_job_data { + /** @job: Pointer to the job. */ + struct pvr_job *job; + + /** @sync_ops: Pointer to the sync_ops associated with @job. */ + struct drm_pvr_sync_op *sync_ops; + + /** @sync_op_count: Number of members of @sync_ops. */ + u32 sync_op_count; +}; + +/** + * prepare_job_syncs() - Prepare all sync objects for a single job. + * @pvr_file: PowerVR file. + * @job_data: Precreated job and sync_ops array. + * @signal_array: xarray to receive signal sync objects. + * + * Returns: + * * 0 on success, or + * * Any error code returned by pvr_sync_signal_array_collect_ops(), + * pvr_sync_add_deps_to_job(), drm_sched_job_add_resv_dependencies() or + * pvr_sync_signal_array_update_fences(). + */ +static int +prepare_job_syncs(struct pvr_file *pvr_file, + struct pvr_job_data *job_data, + struct xarray *signal_array) +{ + struct dma_fence *done_fence; + int err = pvr_sync_signal_array_collect_ops(signal_array, + from_pvr_file(pvr_file), + job_data->sync_op_count, + job_data->sync_ops); + + if (err) + return err; + + err = pvr_sync_add_deps_to_job(pvr_file, &job_data->job->base, + job_data->sync_op_count, + job_data->sync_ops, signal_array); + if (err) + return err; + + if (job_data->job->hwrt) { + /* The geometry job writes the HWRT region headers, which are + * then read by the fragment job. + */ + struct drm_gem_object *obj = + gem_from_pvr_gem(job_data->job->hwrt->fw_obj->gem); + enum dma_resv_usage usage = + dma_resv_usage_rw(job_data->job->type == + DRM_PVR_JOB_TYPE_GEOMETRY); + + dma_resv_lock(obj->resv, NULL); + err = drm_sched_job_add_resv_dependencies(&job_data->job->base, + obj->resv, usage); + dma_resv_unlock(obj->resv); + if (err) + return err; + } + + /* We need to arm the job to get the job done fence. */ + done_fence = pvr_queue_job_arm(job_data->job); + + err = pvr_sync_signal_array_update_fences(signal_array, + job_data->sync_op_count, + job_data->sync_ops, + done_fence); + return err; +} + +/** + * prepare_job_syncs_for_each() - Prepare all sync objects for an array of jobs. + * @pvr_file: PowerVR file. + * @job_data: Array of precreated jobs and their sync_ops. + * @job_count: Number of jobs. + * @signal_array: xarray to receive signal sync objects. + * + * Returns: + * * 0 on success, or + * * Any error code returned by pvr_vm_bind_job_prepare_syncs(). + */ +static int +prepare_job_syncs_for_each(struct pvr_file *pvr_file, + struct pvr_job_data *job_data, + u32 *job_count, + struct xarray *signal_array) +{ + for (u32 i = 0; i < *job_count; i++) { + int err = prepare_job_syncs(pvr_file, &job_data[i], + signal_array); + + if (err) { + *job_count = i; + return err; + } + } + + return 0; +} + +static struct pvr_job * +create_job(struct pvr_device *pvr_dev, + struct pvr_file *pvr_file, + struct drm_pvr_job *args) +{ + struct pvr_job *job = NULL; + int err; + + if (!args->cmd_stream || !args->cmd_stream_len) + return ERR_PTR(-EINVAL); + + if (args->type != DRM_PVR_JOB_TYPE_GEOMETRY && + args->type != DRM_PVR_JOB_TYPE_FRAGMENT && + (args->hwrt.set_handle || args->hwrt.data_index)) + return ERR_PTR(-EINVAL); + + job = kzalloc(sizeof(*job), GFP_KERNEL); + if (!job) + return ERR_PTR(-ENOMEM); + + kref_init(&job->ref_count); + job->type = args->type; + job->pvr_dev = pvr_dev; + + err = xa_alloc(&pvr_dev->job_ids, &job->id, job, xa_limit_32b, GFP_KERNEL); + if (err) + goto err_put_job; + + job->ctx = pvr_context_lookup(pvr_file, args->context_handle); + if (!job->ctx) { + err = -EINVAL; + goto err_put_job; + } + + if (args->hwrt.set_handle) { + job->hwrt = pvr_hwrt_data_lookup(pvr_file, args->hwrt.set_handle, + args->hwrt.data_index); + if (!job->hwrt) { + err = -EINVAL; + goto err_put_job; + } + } + + err = pvr_job_fw_cmd_init(job, args); + if (err) + goto err_put_job; + + err = pvr_queue_job_init(job); + if (err) + goto err_put_job; + + return job; + +err_put_job: + pvr_job_put(job); + return ERR_PTR(err); +} + +/** + * pvr_job_data_fini() - Cleanup all allocs used to set up job submission. + * @job_data: Job data array. + * @job_count: Number of members of @job_data. + */ +static void +pvr_job_data_fini(struct pvr_job_data *job_data, u32 job_count) +{ + for (u32 i = 0; i < job_count; i++) { + pvr_job_put(job_data[i].job); + kvfree(job_data[i].sync_ops); + } +} + +/** + * pvr_job_data_init() - Init an array of created jobs, associating them with + * the appropriate sync_ops args, which will be copied in. + * @pvr_dev: Target PowerVR device. + * @pvr_file: Pointer to PowerVR file structure. + * @job_args: Job args array copied from user. + * @job_count: Number of members of @job_args. + * @job_data_out: Job data array. + */ +static int pvr_job_data_init(struct pvr_device *pvr_dev, + struct pvr_file *pvr_file, + struct drm_pvr_job *job_args, + u32 *job_count, + struct pvr_job_data *job_data_out) +{ + int err = 0, i = 0; + + for (; i < *job_count; i++) { + job_data_out[i].job = + create_job(pvr_dev, pvr_file, &job_args[i]); + err = PTR_ERR_OR_ZERO(job_data_out[i].job); + + if (err) { + *job_count = i; + job_data_out[i].job = NULL; + goto err_cleanup; + } + + err = PVR_UOBJ_GET_ARRAY(job_data_out[i].sync_ops, + &job_args[i].sync_ops); + if (err) { + *job_count = i; + + /* Ensure the job created above is also cleaned up. */ + i++; + goto err_cleanup; + } + + job_data_out[i].sync_op_count = job_args[i].sync_ops.count; + } + + return 0; + +err_cleanup: + pvr_job_data_fini(job_data_out, i); + + return err; +} + +static void +push_jobs(struct pvr_job_data *job_data, u32 job_count) +{ + for (u32 i = 0; i < job_count; i++) + pvr_queue_job_push(job_data[i].job); +} + +static int +prepare_fw_obj_resv(struct drm_exec *exec, struct pvr_fw_object *fw_obj) +{ + return drm_exec_prepare_obj(exec, gem_from_pvr_gem(fw_obj->gem), 1); +} + +static int +jobs_lock_all_objs(struct drm_exec *exec, struct pvr_job_data *job_data, + u32 job_count) +{ + for (u32 i = 0; i < job_count; i++) { + struct pvr_job *job = job_data[i].job; + + /* Grab a lock on a the context, to guard against + * concurrent submission to the same queue. + */ + int err = drm_exec_lock_obj(exec, + gem_from_pvr_gem(job->ctx->fw_obj->gem)); + + if (err) + return err; + + if (job->hwrt) { + err = prepare_fw_obj_resv(exec, + job->hwrt->fw_obj); + if (err) + return err; + } + } + + return 0; +} + +static int +prepare_job_resvs_for_each(struct drm_exec *exec, struct pvr_job_data *job_data, + u32 job_count) +{ + drm_exec_until_all_locked(exec) { + int err = jobs_lock_all_objs(exec, job_data, job_count); + + drm_exec_retry_on_contention(exec); + if (err) + return err; + } + + return 0; +} + +static void +update_job_resvs(struct pvr_job *job) +{ + if (job->hwrt) { + enum dma_resv_usage usage = job->type == DRM_PVR_JOB_TYPE_GEOMETRY ? + DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ; + struct drm_gem_object *obj = gem_from_pvr_gem(job->hwrt->fw_obj->gem); + + dma_resv_add_fence(obj->resv, &job->base.s_fence->finished, usage); + } +} + +static void +update_job_resvs_for_each(struct pvr_job_data *job_data, u32 job_count) +{ + for (u32 i = 0; i < job_count; i++) + update_job_resvs(job_data[i].job); +} + +static bool can_combine_jobs(struct pvr_job *a, struct pvr_job *b) +{ + struct pvr_job *geom_job = a, *frag_job = b; + struct dma_fence *fence; + unsigned long index; + + /* Geometry and fragment jobs can be combined if they are queued to the + * same context and targeting the same HWRT. + */ + if (a->type != DRM_PVR_JOB_TYPE_GEOMETRY || + b->type != DRM_PVR_JOB_TYPE_FRAGMENT || + a->ctx != b->ctx || + a->hwrt != b->hwrt) + return false; + + xa_for_each(&frag_job->base.dependencies, index, fence) { + /* We combine when we see an explicit geom -> frag dep. */ + if (&geom_job->base.s_fence->scheduled == fence) + return true; + } + + return false; +} + +static struct dma_fence * +get_last_queued_job_scheduled_fence(struct pvr_queue *queue, + struct pvr_job_data *job_data, + u32 cur_job_pos) +{ + /* We iterate over the current job array in reverse order to grab the + * last to-be-queued job targeting the same queue. + */ + for (u32 i = cur_job_pos; i > 0; i--) { + struct pvr_job *job = job_data[i - 1].job; + + if (job->ctx == queue->ctx && job->type == queue->type) + return dma_fence_get(&job->base.s_fence->scheduled); + } + + /* If we didn't find any, we just return the last queued job scheduled + * fence attached to the queue. + */ + return dma_fence_get(queue->last_queued_job_scheduled_fence); +} + +static int +pvr_jobs_link_geom_frag(struct pvr_job_data *job_data, u32 *job_count) +{ + for (u32 i = 0; i < *job_count - 1; i++) { + struct pvr_job *geom_job = job_data[i].job; + struct pvr_job *frag_job = job_data[i + 1].job; + struct pvr_queue *frag_queue; + struct dma_fence *f; + + if (!can_combine_jobs(job_data[i].job, job_data[i + 1].job)) + continue; + + /* The fragment job will be submitted by the geometry queue. We + * need to make sure it comes after all the other fragment jobs + * queued before it. + */ + frag_queue = pvr_context_get_queue_for_job(frag_job->ctx, + frag_job->type); + f = get_last_queued_job_scheduled_fence(frag_queue, job_data, + i); + if (f) { + int err = drm_sched_job_add_dependency(&geom_job->base, + f); + if (err) { + *job_count = i; + return err; + } + } + + /* The KCCB slot will be reserved by the geometry job, so we can + * drop the KCCB fence on the fragment job. + */ + pvr_kccb_fence_put(frag_job->kccb_fence); + frag_job->kccb_fence = NULL; + + geom_job->paired_job = frag_job; + frag_job->paired_job = geom_job; + + /* Skip the fragment job we just paired to the geometry job. */ + i++; + } + + return 0; +} + +/** + * pvr_submit_jobs() - Submit jobs to the GPU + * @pvr_dev: Target PowerVR device. + * @pvr_file: Pointer to PowerVR file structure. + * @args: Ioctl args. + * + * This initial implementation is entirely synchronous; on return the GPU will + * be idle. This will not be the case for future implementations. + * + * Returns: + * * 0 on success, + * * -%EFAULT if arguments can not be copied from user space, or + * * -%EINVAL on invalid arguments, or + * * Any other error. + */ +int +pvr_submit_jobs(struct pvr_device *pvr_dev, struct pvr_file *pvr_file, + struct drm_pvr_ioctl_submit_jobs_args *args) +{ + struct pvr_job_data *job_data = NULL; + struct drm_pvr_job *job_args; + struct xarray signal_array; + u32 jobs_alloced = 0; + struct drm_exec exec; + int err; + + if (!args->jobs.count) + return -EINVAL; + + err = PVR_UOBJ_GET_ARRAY(job_args, &args->jobs); + if (err) + return err; + + job_data = kvmalloc_array(args->jobs.count, sizeof(*job_data), + GFP_KERNEL | __GFP_ZERO); + if (!job_data) { + err = -ENOMEM; + goto out_free; + } + + err = pvr_job_data_init(pvr_dev, pvr_file, job_args, &args->jobs.count, + job_data); + if (err) + goto out_free; + + jobs_alloced = args->jobs.count; + + /* + * Flush MMU if needed - this has been deferred until now to avoid + * overuse of this expensive operation. + */ + err = pvr_mmu_flush_exec(pvr_dev, false); + if (err) + goto out_job_data_cleanup; + + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES, 0); + + xa_init_flags(&signal_array, XA_FLAGS_ALLOC); + + err = prepare_job_syncs_for_each(pvr_file, job_data, &args->jobs.count, + &signal_array); + if (err) + goto out_exec_fini; + + err = prepare_job_resvs_for_each(&exec, job_data, args->jobs.count); + if (err) + goto out_exec_fini; + + err = pvr_jobs_link_geom_frag(job_data, &args->jobs.count); + if (err) + goto out_exec_fini; + + /* Anything after that point must succeed because we start exposing job + * finished fences to the outside world. + */ + update_job_resvs_for_each(job_data, args->jobs.count); + push_jobs(job_data, args->jobs.count); + pvr_sync_signal_array_push_fences(&signal_array); + err = 0; + +out_exec_fini: + drm_exec_fini(&exec); + pvr_sync_signal_array_cleanup(&signal_array); + +out_job_data_cleanup: + pvr_job_data_fini(job_data, jobs_alloced); + +out_free: + kvfree(job_data); + kvfree(job_args); + + return err; +} diff --git a/drivers/gpu/drm/imagination/pvr_job.h b/drivers/gpu/drm/imagination/pvr_job.h new file mode 100644 index 000000000000..0ca003c5c475 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_job.h @@ -0,0 +1,161 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_JOB_H +#define PVR_JOB_H + +#include <uapi/drm/pvr_drm.h> + +#include <linux/kref.h> +#include <linux/types.h> + +#include <drm/drm_gem.h> +#include <drm/gpu_scheduler.h> + +#include "pvr_power.h" + +/* Forward declaration from "pvr_context.h". */ +struct pvr_context; + +/* Forward declarations from "pvr_device.h". */ +struct pvr_device; +struct pvr_file; + +/* Forward declarations from "pvr_hwrt.h". */ +struct pvr_hwrt_data; + +/* Forward declaration from "pvr_queue.h". */ +struct pvr_queue; + +struct pvr_job { + /** @base: drm_sched_job object. */ + struct drm_sched_job base; + + /** @ref_count: Refcount for job. */ + struct kref ref_count; + + /** @type: Type of job. */ + enum drm_pvr_job_type type; + + /** @id: Job ID number. */ + u32 id; + + /** + * @paired_job: Job paired to this job. + * + * This field is only meaningful for geometry and fragment jobs. + * + * Paired jobs are executed on the same context, and need to be submitted + * atomically to the FW, to make sure the partial render logic has a + * fragment job to execute when the Parameter Manager runs out of memory. + * + * The geometry job should point to the fragment job it's paired with, + * and the fragment job should point to the geometry job it's paired with. + */ + struct pvr_job *paired_job; + + /** @cccb_fence: Fence used to wait for CCCB space. */ + struct dma_fence *cccb_fence; + + /** @kccb_fence: Fence used to wait for KCCB space. */ + struct dma_fence *kccb_fence; + + /** @done_fence: Fence to signal when the job is done. */ + struct dma_fence *done_fence; + + /** @pvr_dev: Device pointer. */ + struct pvr_device *pvr_dev; + + /** @ctx: Pointer to owning context. */ + struct pvr_context *ctx; + + /** @cmd: Command data. Format depends on @type. */ + void *cmd; + + /** @cmd_len: Length of command data, in bytes. */ + u32 cmd_len; + + /** + * @fw_ccb_cmd_type: Firmware CCB command type. Must be one of %ROGUE_FWIF_CCB_CMD_TYPE_*. + */ + u32 fw_ccb_cmd_type; + + /** @hwrt: HWRT object. Will be NULL for compute and transfer jobs. */ + struct pvr_hwrt_data *hwrt; + + /** + * @has_pm_ref: True if the job has a power ref, thus forcing the GPU to stay on until + * the job is done. + */ + bool has_pm_ref; +}; + +/** + * pvr_job_get() - Take additional reference on job. + * @job: Job pointer. + * + * Call pvr_job_put() to release. + * + * Returns: + * * The requested job on success, or + * * %NULL if no job pointer passed. + */ +static __always_inline struct pvr_job * +pvr_job_get(struct pvr_job *job) +{ + if (job) + kref_get(&job->ref_count); + + return job; +} + +void pvr_job_put(struct pvr_job *job); + +/** + * pvr_job_release_pm_ref() - Release the PM ref if the job acquired it. + * @job: The job to release the PM ref on. + */ +static __always_inline void +pvr_job_release_pm_ref(struct pvr_job *job) +{ + if (job->has_pm_ref) { + pvr_power_put(job->pvr_dev); + job->has_pm_ref = false; + } +} + +/** + * pvr_job_get_pm_ref() - Get a PM ref and attach it to the job. + * @job: The job to attach the PM ref to. + * + * Return: + * * 0 on success, or + * * Any error returned by pvr_power_get() otherwise. + */ +static __always_inline int +pvr_job_get_pm_ref(struct pvr_job *job) +{ + int err; + + if (job->has_pm_ref) + return 0; + + err = pvr_power_get(job->pvr_dev); + if (!err) + job->has_pm_ref = true; + + return err; +} + +int pvr_job_wait_first_non_signaled_native_dep(struct pvr_job *job); + +bool pvr_job_non_native_deps_done(struct pvr_job *job); + +int pvr_job_fits_in_cccb(struct pvr_job *job, unsigned long native_dep_count); + +void pvr_job_submit(struct pvr_job *job); + +int pvr_submit_jobs(struct pvr_device *pvr_dev, struct pvr_file *pvr_file, + struct drm_pvr_ioctl_submit_jobs_args *args); + +#endif /* PVR_JOB_H */ diff --git a/drivers/gpu/drm/imagination/pvr_mmu.c b/drivers/gpu/drm/imagination/pvr_mmu.c new file mode 100644 index 000000000000..4fe70610ed94 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_mmu.c @@ -0,0 +1,2640 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_mmu.h" + +#include "pvr_ccb.h" +#include "pvr_device.h" +#include "pvr_fw.h" +#include "pvr_gem.h" +#include "pvr_power.h" +#include "pvr_rogue_fwif.h" +#include "pvr_rogue_mmu_defs.h" + +#include <drm/drm_drv.h> +#include <linux/atomic.h> +#include <linux/bitops.h> +#include <linux/dma-mapping.h> +#include <linux/kmemleak.h> +#include <linux/minmax.h> +#include <linux/sizes.h> + +#define PVR_SHIFT_FROM_SIZE(size_) (__builtin_ctzll(size_)) +#define PVR_MASK_FROM_SIZE(size_) (~((size_) - U64_C(1))) + +/* + * The value of the device page size (%PVR_DEVICE_PAGE_SIZE) is currently + * pegged to the host page size (%PAGE_SIZE). This chunk of macro goodness both + * ensures that the selected host page size corresponds to a valid device page + * size and sets up values needed by the MMU code below. + */ +#if (PVR_DEVICE_PAGE_SIZE == SZ_4K) +# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_4KB +# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_4KB_RANGE_SHIFT +# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_4KB_RANGE_CLRMSK +#elif (PVR_DEVICE_PAGE_SIZE == SZ_16K) +# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_16KB +# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_16KB_RANGE_SHIFT +# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_16KB_RANGE_CLRMSK +#elif (PVR_DEVICE_PAGE_SIZE == SZ_64K) +# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_64KB +# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_64KB_RANGE_SHIFT +# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_64KB_RANGE_CLRMSK +#elif (PVR_DEVICE_PAGE_SIZE == SZ_256K) +# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_256KB +# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_256KB_RANGE_SHIFT +# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_256KB_RANGE_CLRMSK +#elif (PVR_DEVICE_PAGE_SIZE == SZ_1M) +# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_1MB +# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_1MB_RANGE_SHIFT +# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_1MB_RANGE_CLRMSK +#elif (PVR_DEVICE_PAGE_SIZE == SZ_2M) +# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_2MB +# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_2MB_RANGE_SHIFT +# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_2MB_RANGE_CLRMSK +#else +# error Unsupported device page size PVR_DEVICE_PAGE_SIZE +#endif + +#define ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X \ + (ROGUE_MMUCTRL_ENTRIES_PT_VALUE >> \ + (PVR_DEVICE_PAGE_SHIFT - PVR_SHIFT_FROM_SIZE(SZ_4K))) + +enum pvr_mmu_sync_level { + PVR_MMU_SYNC_LEVEL_NONE = -1, + PVR_MMU_SYNC_LEVEL_0 = 0, + PVR_MMU_SYNC_LEVEL_1 = 1, + PVR_MMU_SYNC_LEVEL_2 = 2, +}; + +#define PVR_MMU_SYNC_LEVEL_0_FLAGS (ROGUE_FWIF_MMUCACHEDATA_FLAGS_PT | \ + ROGUE_FWIF_MMUCACHEDATA_FLAGS_INTERRUPT | \ + ROGUE_FWIF_MMUCACHEDATA_FLAGS_TLB) +#define PVR_MMU_SYNC_LEVEL_1_FLAGS (PVR_MMU_SYNC_LEVEL_0_FLAGS | ROGUE_FWIF_MMUCACHEDATA_FLAGS_PD) +#define PVR_MMU_SYNC_LEVEL_2_FLAGS (PVR_MMU_SYNC_LEVEL_1_FLAGS | ROGUE_FWIF_MMUCACHEDATA_FLAGS_PC) + +/** + * pvr_mmu_set_flush_flags() - Set MMU cache flush flags for next call to + * pvr_mmu_flush_exec(). + * @pvr_dev: Target PowerVR device. + * @flags: MMU flush flags. Must be one of %PVR_MMU_SYNC_LEVEL_*_FLAGS. + * + * This function must be called following any possible change to the MMU page + * tables. + */ +static void pvr_mmu_set_flush_flags(struct pvr_device *pvr_dev, u32 flags) +{ + atomic_fetch_or(flags, &pvr_dev->mmu_flush_cache_flags); +} + +/** + * pvr_mmu_flush_request_all() - Request flush of all MMU caches when + * subsequently calling pvr_mmu_flush_exec(). + * @pvr_dev: Target PowerVR device. + * + * This function must be called following any possible change to the MMU page + * tables. + */ +void pvr_mmu_flush_request_all(struct pvr_device *pvr_dev) +{ + pvr_mmu_set_flush_flags(pvr_dev, PVR_MMU_SYNC_LEVEL_2_FLAGS); +} + +/** + * pvr_mmu_flush_exec() - Execute a flush of all MMU caches previously + * requested. + * @pvr_dev: Target PowerVR device. + * @wait: Do not return until the flush is completed. + * + * This function must be called prior to submitting any new GPU job. The flush + * will complete before the jobs are scheduled, so this can be called once after + * a series of maps. However, a single unmap should always be immediately + * followed by a flush and it should be explicitly waited by setting @wait. + * + * As a failure to flush the MMU caches could risk memory corruption, if the + * flush fails (implying the firmware is not responding) then the GPU device is + * marked as lost. + * + * Returns: + * * 0 on success when @wait is true, or + * * -%EIO if the device is unavailable, or + * * Any error encountered while submitting the flush command via the KCCB. + */ +int pvr_mmu_flush_exec(struct pvr_device *pvr_dev, bool wait) +{ + struct rogue_fwif_kccb_cmd cmd_mmu_cache = {}; + struct rogue_fwif_mmucachedata *cmd_mmu_cache_data = + &cmd_mmu_cache.cmd_data.mmu_cache_data; + int err = 0; + u32 slot; + int idx; + + if (!drm_dev_enter(from_pvr_device(pvr_dev), &idx)) + return -EIO; + + /* Can't flush MMU if the firmware hasn't booted yet. */ + if (!pvr_dev->fw_dev.booted) + goto err_drm_dev_exit; + + cmd_mmu_cache_data->cache_flags = + atomic_xchg(&pvr_dev->mmu_flush_cache_flags, 0); + + if (!cmd_mmu_cache_data->cache_flags) + goto err_drm_dev_exit; + + cmd_mmu_cache.cmd_type = ROGUE_FWIF_KCCB_CMD_MMUCACHE; + + pvr_fw_object_get_fw_addr(pvr_dev->fw_dev.mem.mmucache_sync_obj, + &cmd_mmu_cache_data->mmu_cache_sync_fw_addr); + cmd_mmu_cache_data->mmu_cache_sync_update_value = 0; + + err = pvr_kccb_send_cmd(pvr_dev, &cmd_mmu_cache, &slot); + if (err) + goto err_reset_and_retry; + + err = pvr_kccb_wait_for_completion(pvr_dev, slot, HZ, NULL); + if (err) + goto err_reset_and_retry; + + drm_dev_exit(idx); + + return 0; + +err_reset_and_retry: + /* + * Flush command failure is most likely the result of a firmware lockup. Hard + * reset the GPU and retry. + */ + err = pvr_power_reset(pvr_dev, true); + if (err) + goto err_drm_dev_exit; /* Device is lost. */ + + /* Retry sending flush request. */ + err = pvr_kccb_send_cmd(pvr_dev, &cmd_mmu_cache, &slot); + if (err) { + pvr_device_lost(pvr_dev); + goto err_drm_dev_exit; + } + + if (wait) { + err = pvr_kccb_wait_for_completion(pvr_dev, slot, HZ, NULL); + if (err) + pvr_device_lost(pvr_dev); + } + +err_drm_dev_exit: + drm_dev_exit(idx); + + return err; +} + +/** + * DOC: PowerVR Virtual Memory Handling + */ +/** + * DOC: PowerVR Virtual Memory Handling (constants) + * + * .. c:macro:: PVR_IDX_INVALID + * + * Default value for a u16-based index. + * + * This value cannot be zero, since zero is a valid index value. + */ +#define PVR_IDX_INVALID ((u16)(-1)) + +/** + * DOC: MMU backing pages + */ +/** + * DOC: MMU backing pages (constants) + * + * .. c:macro:: PVR_MMU_BACKING_PAGE_SIZE + * + * Page size of a PowerVR device's integrated MMU. The CPU page size must be + * at least as large as this value for the current implementation; this is + * checked at compile-time. + */ +#define PVR_MMU_BACKING_PAGE_SIZE SZ_4K +static_assert(PAGE_SIZE >= PVR_MMU_BACKING_PAGE_SIZE); + +/** + * struct pvr_mmu_backing_page - Represents a single page used to back a page + * table of any level. + * @dma_addr: DMA address of this page. + * @host_ptr: CPU address of this page. + * @pvr_dev: The PowerVR device to which this page is associated. **For + * internal use only.** + */ +struct pvr_mmu_backing_page { + dma_addr_t dma_addr; + void *host_ptr; +/* private: internal use only */ + struct page *raw_page; + struct pvr_device *pvr_dev; +}; + +/** + * pvr_mmu_backing_page_init() - Initialize a MMU backing page. + * @page: Target backing page. + * @pvr_dev: Target PowerVR device. + * + * This function performs three distinct operations: + * + * 1. Allocate a single page, + * 2. Map the page to the CPU, and + * 3. Map the page to DMA-space. + * + * It is expected that @page be zeroed (e.g. from kzalloc()) before calling + * this function. + * + * Return: + * * 0 on success, or + * * -%ENOMEM if allocation of the backing page or mapping of the backing + * page to DMA fails. + */ +static int +pvr_mmu_backing_page_init(struct pvr_mmu_backing_page *page, + struct pvr_device *pvr_dev) +{ + struct device *dev = from_pvr_device(pvr_dev)->dev; + + struct page *raw_page; + int err; + + dma_addr_t dma_addr; + void *host_ptr; + + raw_page = alloc_page(__GFP_ZERO | GFP_KERNEL); + if (!raw_page) + return -ENOMEM; + + host_ptr = vmap(&raw_page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL)); + if (!host_ptr) { + err = -ENOMEM; + goto err_free_page; + } + + dma_addr = dma_map_page(dev, raw_page, 0, PVR_MMU_BACKING_PAGE_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_addr)) { + err = -ENOMEM; + goto err_unmap_page; + } + + page->dma_addr = dma_addr; + page->host_ptr = host_ptr; + page->pvr_dev = pvr_dev; + page->raw_page = raw_page; + kmemleak_alloc(page->host_ptr, PAGE_SIZE, 1, GFP_KERNEL); + + return 0; + +err_unmap_page: + vunmap(host_ptr); + +err_free_page: + __free_page(raw_page); + + return err; +} + +/** + * pvr_mmu_backing_page_fini() - Teardown a MMU backing page. + * @page: Target backing page. + * + * This function performs the mirror operations to pvr_mmu_backing_page_init(), + * in reverse order: + * + * 1. Unmap the page from DMA-space, + * 2. Unmap the page from the CPU, and + * 3. Free the page. + * + * It also zeros @page. + * + * It is a no-op to call this function a second (or further) time on any @page. + */ +static void +pvr_mmu_backing_page_fini(struct pvr_mmu_backing_page *page) +{ + struct device *dev; + + /* Do nothing if no allocation is present. */ + if (!page->pvr_dev) + return; + + dev = from_pvr_device(page->pvr_dev)->dev; + + dma_unmap_page(dev, page->dma_addr, PVR_MMU_BACKING_PAGE_SIZE, + DMA_TO_DEVICE); + + kmemleak_free(page->host_ptr); + vunmap(page->host_ptr); + + __free_page(page->raw_page); + + memset(page, 0, sizeof(*page)); +} + +/** + * pvr_mmu_backing_page_sync() - Flush a MMU backing page from the CPU to the + * device. + * @page: Target backing page. + * @flags: MMU flush flags. Must be one of %PVR_MMU_SYNC_LEVEL_*_FLAGS. + * + * .. caution:: + * + * **This is potentially an expensive function call.** Only call + * pvr_mmu_backing_page_sync() once you're sure you have no more changes to + * make to the backing page in the immediate future. + */ +static void +pvr_mmu_backing_page_sync(struct pvr_mmu_backing_page *page, u32 flags) +{ + struct pvr_device *pvr_dev = page->pvr_dev; + struct device *dev; + + /* + * Do nothing if no allocation is present. This may be the case if + * we are unmapping pages. + */ + if (!pvr_dev) + return; + + dev = from_pvr_device(pvr_dev)->dev; + + dma_sync_single_for_device(dev, page->dma_addr, + PVR_MMU_BACKING_PAGE_SIZE, DMA_TO_DEVICE); + + pvr_mmu_set_flush_flags(pvr_dev, flags); +} + +/** + * DOC: Raw page tables + */ + +#define PVR_PAGE_TABLE_TYPEOF_ENTRY(level_) \ + typeof_member(struct pvr_page_table_l##level_##_entry_raw, val) + +#define PVR_PAGE_TABLE_FIELD_GET(level_, name_, field_, entry_) \ + (((entry_).val & \ + ~ROGUE_MMUCTRL_##name_##_DATA_##field_##_CLRMSK) >> \ + ROGUE_MMUCTRL_##name_##_DATA_##field_##_SHIFT) + +#define PVR_PAGE_TABLE_FIELD_PREP(level_, name_, field_, val_) \ + ((((PVR_PAGE_TABLE_TYPEOF_ENTRY(level_))(val_)) \ + << ROGUE_MMUCTRL_##name_##_DATA_##field_##_SHIFT) & \ + ~ROGUE_MMUCTRL_##name_##_DATA_##field_##_CLRMSK) + +/** + * struct pvr_page_table_l2_entry_raw - A single entry in a level 2 page table. + * @val: The raw value of this entry. + * + * This type is a structure for type-checking purposes. At compile-time, its + * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PC_VALUE. + * + * The value stored in this structure can be decoded using the following bitmap: + * + * .. flat-table:: + * :widths: 1 5 + * :stub-columns: 1 + * + * * - 31..4 + * - **Level 1 Page Table Base Address:** Bits 39..12 of the L1 + * page table base address, which is 4KiB aligned. + * + * * - 3..2 + * - *(reserved)* + * + * * - 1 + * - **Pending:** When valid bit is not set, indicates that a valid + * entry is pending and the MMU should wait for the driver to map + * the entry. This is used to support page demand mapping of + * memory. + * + * * - 0 + * - **Valid:** Indicates that the entry contains a valid L1 page + * table. If the valid bit is not set, then an attempted use of + * the page would result in a page fault. + */ +struct pvr_page_table_l2_entry_raw { + u32 val; +} __packed; +static_assert(sizeof(struct pvr_page_table_l2_entry_raw) * 8 == + ROGUE_MMUCTRL_ENTRY_SIZE_PC_VALUE); + +static bool +pvr_page_table_l2_entry_raw_is_valid(struct pvr_page_table_l2_entry_raw entry) +{ + return PVR_PAGE_TABLE_FIELD_GET(2, PC, VALID, entry); +} + +/** + * pvr_page_table_l2_entry_raw_set() - Write a valid entry into a raw level 2 + * page table. + * @entry: Target raw level 2 page table entry. + * @child_table_dma_addr: DMA address of the level 1 page table to be + * associated with @entry. + * + * When calling this function, @child_table_dma_addr must be a valid DMA + * address and a multiple of %ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSIZE. + */ +static void +pvr_page_table_l2_entry_raw_set(struct pvr_page_table_l2_entry_raw *entry, + dma_addr_t child_table_dma_addr) +{ + child_table_dma_addr >>= ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSHIFT; + + WRITE_ONCE(entry->val, + PVR_PAGE_TABLE_FIELD_PREP(2, PC, VALID, true) | + PVR_PAGE_TABLE_FIELD_PREP(2, PC, ENTRY_PENDING, false) | + PVR_PAGE_TABLE_FIELD_PREP(2, PC, PD_BASE, child_table_dma_addr)); +} + +static void +pvr_page_table_l2_entry_raw_clear(struct pvr_page_table_l2_entry_raw *entry) +{ + WRITE_ONCE(entry->val, 0); +} + +/** + * struct pvr_page_table_l1_entry_raw - A single entry in a level 1 page table. + * @val: The raw value of this entry. + * + * This type is a structure for type-checking purposes. At compile-time, its + * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PD_VALUE. + * + * The value stored in this structure can be decoded using the following bitmap: + * + * .. flat-table:: + * :widths: 1 5 + * :stub-columns: 1 + * + * * - 63..41 + * - *(reserved)* + * + * * - 40 + * - **Pending:** When valid bit is not set, indicates that a valid entry + * is pending and the MMU should wait for the driver to map the entry. + * This is used to support page demand mapping of memory. + * + * * - 39..5 + * - **Level 0 Page Table Base Address:** The way this value is + * interpreted depends on the page size. Bits not specified in the + * table below (e.g. bits 11..5 for page size 4KiB) should be + * considered reserved. + * + * This table shows the bits used in an L1 page table entry to + * represent the Physical Table Base Address for a given Page Size. + * Since each L1 page table entry covers 2MiB of address space, the + * maximum page size is 2MiB. + * + * .. flat-table:: + * :widths: 1 1 1 1 + * :header-rows: 1 + * :stub-columns: 1 + * + * * - Page size + * - L0 page table base address bits + * - Number of L0 page table entries + * - Size of L0 page table + * + * * - 4KiB + * - 39..12 + * - 512 + * - 4KiB + * + * * - 16KiB + * - 39..10 + * - 128 + * - 1KiB + * + * * - 64KiB + * - 39..8 + * - 32 + * - 256B + * + * * - 256KiB + * - 39..6 + * - 8 + * - 64B + * + * * - 1MiB + * - 39..5 (4 = '0') + * - 2 + * - 16B + * + * * - 2MiB + * - 39..5 (4..3 = '00') + * - 1 + * - 8B + * + * * - 4 + * - *(reserved)* + * + * * - 3..1 + * - **Page Size:** Sets the page size, from 4KiB to 2MiB. + * + * * - 0 + * - **Valid:** Indicates that the entry contains a valid L0 page table. + * If the valid bit is not set, then an attempted use of the page would + * result in a page fault. + */ +struct pvr_page_table_l1_entry_raw { + u64 val; +} __packed; +static_assert(sizeof(struct pvr_page_table_l1_entry_raw) * 8 == + ROGUE_MMUCTRL_ENTRY_SIZE_PD_VALUE); + +static bool +pvr_page_table_l1_entry_raw_is_valid(struct pvr_page_table_l1_entry_raw entry) +{ + return PVR_PAGE_TABLE_FIELD_GET(1, PD, VALID, entry); +} + +/** + * pvr_page_table_l1_entry_raw_set() - Write a valid entry into a raw level 1 + * page table. + * @entry: Target raw level 1 page table entry. + * @child_table_dma_addr: DMA address of the level 0 page table to be + * associated with @entry. + * + * When calling this function, @child_table_dma_addr must be a valid DMA + * address and a multiple of 4 KiB. + */ +static void +pvr_page_table_l1_entry_raw_set(struct pvr_page_table_l1_entry_raw *entry, + dma_addr_t child_table_dma_addr) +{ + WRITE_ONCE(entry->val, + PVR_PAGE_TABLE_FIELD_PREP(1, PD, VALID, true) | + PVR_PAGE_TABLE_FIELD_PREP(1, PD, ENTRY_PENDING, false) | + PVR_PAGE_TABLE_FIELD_PREP(1, PD, PAGE_SIZE, ROGUE_MMUCTRL_PAGE_SIZE_X) | + /* + * The use of a 4K-specific macro here is correct. It is + * a future optimization to allocate sub-host-page-sized + * blocks for individual tables, so the condition that any + * page table address is aligned to the size of the + * largest (a 4KB) table currently holds. + */ + (child_table_dma_addr & ~ROGUE_MMUCTRL_PT_BASE_4KB_RANGE_CLRMSK)); +} + +static void +pvr_page_table_l1_entry_raw_clear(struct pvr_page_table_l1_entry_raw *entry) +{ + WRITE_ONCE(entry->val, 0); +} + +/** + * struct pvr_page_table_l0_entry_raw - A single entry in a level 0 page table. + * @val: The raw value of this entry. + * + * This type is a structure for type-checking purposes. At compile-time, its + * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PT_VALUE. + * + * The value stored in this structure can be decoded using the following bitmap: + * + * .. flat-table:: + * :widths: 1 5 + * :stub-columns: 1 + * + * * - 63 + * - *(reserved)* + * + * * - 62 + * - **PM/FW Protect:** Indicates a protected region which only the + * Parameter Manager (PM) or firmware processor can write to. + * + * * - 61..40 + * - **VP Page (High):** Virtual-physical page used for Parameter Manager + * (PM) memory. This field is only used if the additional level of PB + * virtualization is enabled. The VP Page field is needed by the PM in + * order to correctly reconstitute the free lists after render + * completion. This (High) field holds bits 39..18 of the value; the + * Low field holds bits 17..12. Bits 11..0 are always zero because the + * value is always aligned to the 4KiB page size. + * + * * - 39..12 + * - **Physical Page Address:** The way this value is interpreted depends + * on the page size. Bits not specified in the table below (e.g. bits + * 20..12 for page size 2MiB) should be considered reserved. + * + * This table shows the bits used in an L0 page table entry to represent + * the Physical Page Address for a given page size (as defined in the + * associated L1 page table entry). + * + * .. flat-table:: + * :widths: 1 1 + * :header-rows: 1 + * :stub-columns: 1 + * + * * - Page size + * - Physical address bits + * + * * - 4KiB + * - 39..12 + * + * * - 16KiB + * - 39..14 + * + * * - 64KiB + * - 39..16 + * + * * - 256KiB + * - 39..18 + * + * * - 1MiB + * - 39..20 + * + * * - 2MiB + * - 39..21 + * + * * - 11..6 + * - **VP Page (Low):** Continuation of VP Page (High). + * + * * - 5 + * - **Pending:** When valid bit is not set, indicates that a valid entry + * is pending and the MMU should wait for the driver to map the entry. + * This is used to support page demand mapping of memory. + * + * * - 4 + * - **PM Src:** Set on Parameter Manager (PM) allocated page table + * entries when indicated by the PM. Note that this bit will only be set + * by the PM, not by the device driver. + * + * * - 3 + * - **SLC Bypass Control:** Specifies requests to this page should bypass + * the System Level Cache (SLC), if enabled in SLC configuration. + * + * * - 2 + * - **Cache Coherency:** Indicates that the page is coherent (i.e. it + * does not require a cache flush between operations on the CPU and the + * device). + * + * * - 1 + * - **Read Only:** If set, this bit indicates that the page is read only. + * An attempted write to this page would result in a write-protection + * fault. + * + * * - 0 + * - **Valid:** Indicates that the entry contains a valid page. If the + * valid bit is not set, then an attempted use of the page would result + * in a page fault. + */ +struct pvr_page_table_l0_entry_raw { + u64 val; +} __packed; +static_assert(sizeof(struct pvr_page_table_l0_entry_raw) * 8 == + ROGUE_MMUCTRL_ENTRY_SIZE_PT_VALUE); + +/** + * struct pvr_page_flags_raw - The configurable flags from a single entry in a + * level 0 page table. + * @val: The raw value of these flags. Since these are a strict subset of + * &struct pvr_page_table_l0_entry_raw; use that type for our member here. + * + * The flags stored in this type are: PM/FW Protect; SLC Bypass Control; Cache + * Coherency, and Read Only (bits 62, 3, 2 and 1 respectively). + * + * This type should never be instantiated directly; instead use + * pvr_page_flags_raw_create() to ensure only valid bits of @val are set. + */ +struct pvr_page_flags_raw { + struct pvr_page_table_l0_entry_raw val; +} __packed; +static_assert(sizeof(struct pvr_page_flags_raw) == + sizeof(struct pvr_page_table_l0_entry_raw)); + +static bool +pvr_page_table_l0_entry_raw_is_valid(struct pvr_page_table_l0_entry_raw entry) +{ + return PVR_PAGE_TABLE_FIELD_GET(0, PT, VALID, entry); +} + +/** + * pvr_page_table_l0_entry_raw_set() - Write a valid entry into a raw level 0 + * page table. + * @entry: Target raw level 0 page table entry. + * @dma_addr: DMA address of the physical page to be associated with @entry. + * @flags: Options to be set on @entry. + * + * When calling this function, @child_table_dma_addr must be a valid DMA + * address and a multiple of %PVR_DEVICE_PAGE_SIZE. + * + * The @flags parameter is directly assigned into @entry. It is the callers + * responsibility to ensure that only bits specified in + * &struct pvr_page_flags_raw are set in @flags. + */ +static void +pvr_page_table_l0_entry_raw_set(struct pvr_page_table_l0_entry_raw *entry, + dma_addr_t dma_addr, + struct pvr_page_flags_raw flags) +{ + WRITE_ONCE(entry->val, PVR_PAGE_TABLE_FIELD_PREP(0, PT, VALID, true) | + PVR_PAGE_TABLE_FIELD_PREP(0, PT, ENTRY_PENDING, false) | + (dma_addr & ~ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK) | + flags.val.val); +} + +static void +pvr_page_table_l0_entry_raw_clear(struct pvr_page_table_l0_entry_raw *entry) +{ + WRITE_ONCE(entry->val, 0); +} + +/** + * pvr_page_flags_raw_create() - Initialize the flag bits of a raw level 0 page + * table entry. + * @read_only: This page is read-only (see: Read Only). + * @cache_coherent: This page does not require cache flushes (see: Cache + * Coherency). + * @slc_bypass: This page bypasses the device cache (see: SLC Bypass Control). + * @pm_fw_protect: This page is only for use by the firmware or Parameter + * Manager (see PM/FW Protect). + * + * For more details on the use of these four options, see their respective + * entries in the table under &struct pvr_page_table_l0_entry_raw. + * + * Return: + * A new &struct pvr_page_flags_raw instance which can be passed directly to + * pvr_page_table_l0_entry_raw_set() or pvr_page_table_l0_insert(). + */ +static struct pvr_page_flags_raw +pvr_page_flags_raw_create(bool read_only, bool cache_coherent, bool slc_bypass, + bool pm_fw_protect) +{ + struct pvr_page_flags_raw flags; + + flags.val.val = + PVR_PAGE_TABLE_FIELD_PREP(0, PT, READ_ONLY, read_only) | + PVR_PAGE_TABLE_FIELD_PREP(0, PT, CC, cache_coherent) | + PVR_PAGE_TABLE_FIELD_PREP(0, PT, SLC_BYPASS_CTRL, slc_bypass) | + PVR_PAGE_TABLE_FIELD_PREP(0, PT, PM_META_PROTECT, pm_fw_protect); + + return flags; +} + +/** + * struct pvr_page_table_l2_raw - The raw data of a level 2 page table. + * + * This type is a structure for type-checking purposes. At compile-time, its + * size is checked against %PVR_MMU_BACKING_PAGE_SIZE. + */ +struct pvr_page_table_l2_raw { + /** @entries: The raw values of this table. */ + struct pvr_page_table_l2_entry_raw + entries[ROGUE_MMUCTRL_ENTRIES_PC_VALUE]; +} __packed; +static_assert(sizeof(struct pvr_page_table_l2_raw) == PVR_MMU_BACKING_PAGE_SIZE); + +/** + * struct pvr_page_table_l1_raw - The raw data of a level 1 page table. + * + * This type is a structure for type-checking purposes. At compile-time, its + * size is checked against %PVR_MMU_BACKING_PAGE_SIZE. + */ +struct pvr_page_table_l1_raw { + /** @entries: The raw values of this table. */ + struct pvr_page_table_l1_entry_raw + entries[ROGUE_MMUCTRL_ENTRIES_PD_VALUE]; +} __packed; +static_assert(sizeof(struct pvr_page_table_l1_raw) == PVR_MMU_BACKING_PAGE_SIZE); + +/** + * struct pvr_page_table_l0_raw - The raw data of a level 0 page table. + * + * This type is a structure for type-checking purposes. At compile-time, its + * size is checked against %PVR_MMU_BACKING_PAGE_SIZE. + * + * .. caution:: + * + * The size of level 0 page tables is variable depending on the page size + * specified in the associated level 1 page table entry. Since the device + * page size in use is pegged to the host page size, it cannot vary at + * runtime. This structure is therefore only defined to contain the required + * number of entries for the current device page size. **You should never + * read or write beyond the last supported entry.** + */ +struct pvr_page_table_l0_raw { + /** @entries: The raw values of this table. */ + struct pvr_page_table_l0_entry_raw + entries[ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X]; +} __packed; +static_assert(sizeof(struct pvr_page_table_l0_raw) <= PVR_MMU_BACKING_PAGE_SIZE); + +/** + * DOC: Mirror page tables + */ + +/* + * We pre-declare these types because they cross-depend on pointers to each + * other. + */ +struct pvr_page_table_l1; +struct pvr_page_table_l0; + +/** + * struct pvr_page_table_l2 - A wrapped level 2 page table. + * + * To access the raw part of this table, use pvr_page_table_l2_get_raw(). + * Alternatively to access a raw entry directly, use + * pvr_page_table_l2_get_entry_raw(). + * + * A level 2 page table forms the root of the page table tree structure, so + * this type has no &parent or &parent_idx members. + */ +struct pvr_page_table_l2 { + /** + * @entries: The children of this node in the page table tree + * structure. These are also mirror tables. The indexing of this array + * is identical to that of the raw equivalent + * (&pvr_page_table_l1_raw.entries). + */ + struct pvr_page_table_l1 *entries[ROGUE_MMUCTRL_ENTRIES_PC_VALUE]; + + /** + * @backing_page: A handle to the memory which holds the raw + * equivalent of this table. **For internal use only.** + */ + struct pvr_mmu_backing_page backing_page; + + /** + * @entry_count: The current number of valid entries (that we know of) + * in this table. This value is essentially a refcount - the table is + * destroyed when this value is decremented to zero by + * pvr_page_table_l2_remove(). + */ + u16 entry_count; +}; + +/** + * pvr_page_table_l2_init() - Initialize a level 2 page table. + * @table: Target level 2 page table. + * @pvr_dev: Target PowerVR device + * + * It is expected that @table be zeroed (e.g. from kzalloc()) before calling + * this function. + * + * Return: + * * 0 on success, or + * * Any error encountered while intializing &table->backing_page using + * pvr_mmu_backing_page_init(). + */ +static int +pvr_page_table_l2_init(struct pvr_page_table_l2 *table, + struct pvr_device *pvr_dev) +{ + return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev); +} + +/** + * pvr_page_table_l2_fini() - Teardown a level 2 page table. + * @table: Target level 2 page table. + * + * It is an error to attempt to use @table after calling this function. + */ +static void +pvr_page_table_l2_fini(struct pvr_page_table_l2 *table) +{ + pvr_mmu_backing_page_fini(&table->backing_page); +} + +/** + * pvr_page_table_l2_sync() - Flush a level 2 page table from the CPU to the + * device. + * @table: Target level 2 page table. + * + * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the + * warning there applies here too: **Only call pvr_page_table_l2_sync() once + * you're sure you have no more changes to make to** @table **in the immediate + * future.** + * + * If child level 1 page tables of @table also need to be flushed, this should + * be done first using pvr_page_table_l1_sync() *before* calling this function. + */ +static void +pvr_page_table_l2_sync(struct pvr_page_table_l2 *table) +{ + pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_2_FLAGS); +} + +/** + * pvr_page_table_l2_get_raw() - Access the raw equivalent of a mirror level 2 + * page table. + * @table: Target level 2 page table. + * + * Essentially returns the CPU address of the raw equivalent of @table, cast to + * a &struct pvr_page_table_l2_raw pointer. + * + * You probably want to call pvr_page_table_l2_get_entry_raw() instead. + * + * Return: + * The raw equivalent of @table. + */ +static struct pvr_page_table_l2_raw * +pvr_page_table_l2_get_raw(struct pvr_page_table_l2 *table) +{ + return table->backing_page.host_ptr; +} + +/** + * pvr_page_table_l2_get_entry_raw() - Access an entry from the raw equivalent + * of a mirror level 2 page table. + * @table: Target level 2 page table. + * @idx: Index of the entry to access. + * + * Technically this function returns a pointer to a slot in a raw level 2 page + * table, since the returned "entry" is not guaranteed to be valid. The caller + * must verify the validity of the entry at the returned address (perhaps using + * pvr_page_table_l2_entry_raw_is_valid()) before reading or overwriting it. + * + * The value of @idx is not checked here; it is the callers responsibility to + * ensure @idx refers to a valid index within @table before dereferencing the + * returned pointer. + * + * Return: + * A pointer to the requested raw level 2 page table entry. + */ +static struct pvr_page_table_l2_entry_raw * +pvr_page_table_l2_get_entry_raw(struct pvr_page_table_l2 *table, u16 idx) +{ + return &pvr_page_table_l2_get_raw(table)->entries[idx]; +} + +/** + * pvr_page_table_l2_entry_is_valid() - Check if a level 2 page table entry is + * marked as valid. + * @table: Target level 2 page table. + * @idx: Index of the entry to check. + * + * The value of @idx is not checked here; it is the callers responsibility to + * ensure @idx refers to a valid index within @table before calling this + * function. + */ +static bool +pvr_page_table_l2_entry_is_valid(struct pvr_page_table_l2 *table, u16 idx) +{ + struct pvr_page_table_l2_entry_raw entry_raw = + *pvr_page_table_l2_get_entry_raw(table, idx); + + return pvr_page_table_l2_entry_raw_is_valid(entry_raw); +} + +/** + * struct pvr_page_table_l1 - A wrapped level 1 page table. + * + * To access the raw part of this table, use pvr_page_table_l1_get_raw(). + * Alternatively to access a raw entry directly, use + * pvr_page_table_l1_get_entry_raw(). + */ +struct pvr_page_table_l1 { + /** + * @entries: The children of this node in the page table tree + * structure. These are also mirror tables. The indexing of this array + * is identical to that of the raw equivalent + * (&pvr_page_table_l0_raw.entries). + */ + struct pvr_page_table_l0 *entries[ROGUE_MMUCTRL_ENTRIES_PD_VALUE]; + + /** + * @backing_page: A handle to the memory which holds the raw + * equivalent of this table. **For internal use only.** + */ + struct pvr_mmu_backing_page backing_page; + + union { + /** + * @parent: The parent of this node in the page table tree structure. + * + * This is also a mirror table. + * + * Only valid when the L1 page table is active. When the L1 page table + * has been removed and queued for destruction, the next_free field + * should be used instead. + */ + struct pvr_page_table_l2 *parent; + + /** + * @next_free: Pointer to the next L1 page table to take/free. + * + * Used to form a linked list of L1 page tables. This is used + * when preallocating tables and when the page table has been + * removed and queued for destruction. + */ + struct pvr_page_table_l1 *next_free; + }; + + /** + * @parent_idx: The index of the entry in the parent table (see + * @parent) which corresponds to this table. + */ + u16 parent_idx; + + /** + * @entry_count: The current number of valid entries (that we know of) + * in this table. This value is essentially a refcount - the table is + * destroyed when this value is decremented to zero by + * pvr_page_table_l1_remove(). + */ + u16 entry_count; +}; + +/** + * pvr_page_table_l1_init() - Initialize a level 1 page table. + * @table: Target level 1 page table. + * @pvr_dev: Target PowerVR device + * + * When this function returns successfully, @table is still not considered + * valid. It must be inserted into the page table tree structure with + * pvr_page_table_l2_insert() before it is ready for use. + * + * It is expected that @table be zeroed (e.g. from kzalloc()) before calling + * this function. + * + * Return: + * * 0 on success, or + * * Any error encountered while intializing &table->backing_page using + * pvr_mmu_backing_page_init(). + */ +static int +pvr_page_table_l1_init(struct pvr_page_table_l1 *table, + struct pvr_device *pvr_dev) +{ + table->parent_idx = PVR_IDX_INVALID; + + return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev); +} + +/** + * pvr_page_table_l1_free() - Teardown a level 1 page table. + * @table: Target level 1 page table. + * + * It is an error to attempt to use @table after calling this function, even + * indirectly. This includes calling pvr_page_table_l2_remove(), which must + * be called *before* pvr_page_table_l1_free(). + */ +static void +pvr_page_table_l1_free(struct pvr_page_table_l1 *table) +{ + pvr_mmu_backing_page_fini(&table->backing_page); + kfree(table); +} + +/** + * pvr_page_table_l1_sync() - Flush a level 1 page table from the CPU to the + * device. + * @table: Target level 1 page table. + * + * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the + * warning there applies here too: **Only call pvr_page_table_l1_sync() once + * you're sure you have no more changes to make to** @table **in the immediate + * future.** + * + * If child level 0 page tables of @table also need to be flushed, this should + * be done first using pvr_page_table_l0_sync() *before* calling this function. + */ +static void +pvr_page_table_l1_sync(struct pvr_page_table_l1 *table) +{ + pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_1_FLAGS); +} + +/** + * pvr_page_table_l1_get_raw() - Access the raw equivalent of a mirror level 1 + * page table. + * @table: Target level 1 page table. + * + * Essentially returns the CPU address of the raw equivalent of @table, cast to + * a &struct pvr_page_table_l1_raw pointer. + * + * You probably want to call pvr_page_table_l1_get_entry_raw() instead. + * + * Return: + * The raw equivalent of @table. + */ +static struct pvr_page_table_l1_raw * +pvr_page_table_l1_get_raw(struct pvr_page_table_l1 *table) +{ + return table->backing_page.host_ptr; +} + +/** + * pvr_page_table_l1_get_entry_raw() - Access an entry from the raw equivalent + * of a mirror level 1 page table. + * @table: Target level 1 page table. + * @idx: Index of the entry to access. + * + * Technically this function returns a pointer to a slot in a raw level 1 page + * table, since the returned "entry" is not guaranteed to be valid. The caller + * must verify the validity of the entry at the returned address (perhaps using + * pvr_page_table_l1_entry_raw_is_valid()) before reading or overwriting it. + * + * The value of @idx is not checked here; it is the callers responsibility to + * ensure @idx refers to a valid index within @table before dereferencing the + * returned pointer. + * + * Return: + * A pointer to the requested raw level 1 page table entry. + */ +static struct pvr_page_table_l1_entry_raw * +pvr_page_table_l1_get_entry_raw(struct pvr_page_table_l1 *table, u16 idx) +{ + return &pvr_page_table_l1_get_raw(table)->entries[idx]; +} + +/** + * pvr_page_table_l1_entry_is_valid() - Check if a level 1 page table entry is + * marked as valid. + * @table: Target level 1 page table. + * @idx: Index of the entry to check. + * + * The value of @idx is not checked here; it is the callers responsibility to + * ensure @idx refers to a valid index within @table before calling this + * function. + */ +static bool +pvr_page_table_l1_entry_is_valid(struct pvr_page_table_l1 *table, u16 idx) +{ + struct pvr_page_table_l1_entry_raw entry_raw = + *pvr_page_table_l1_get_entry_raw(table, idx); + + return pvr_page_table_l1_entry_raw_is_valid(entry_raw); +} + +/** + * struct pvr_page_table_l0 - A wrapped level 0 page table. + * + * To access the raw part of this table, use pvr_page_table_l0_get_raw(). + * Alternatively to access a raw entry directly, use + * pvr_page_table_l0_get_entry_raw(). + * + * There is no mirror representation of an individual page, so this type has no + * &entries member. + */ +struct pvr_page_table_l0 { + /** + * @backing_page: A handle to the memory which holds the raw + * equivalent of this table. **For internal use only.** + */ + struct pvr_mmu_backing_page backing_page; + + union { + /** + * @parent: The parent of this node in the page table tree structure. + * + * This is also a mirror table. + * + * Only valid when the L0 page table is active. When the L0 page table + * has been removed and queued for destruction, the next_free field + * should be used instead. + */ + struct pvr_page_table_l1 *parent; + + /** + * @next_free: Pointer to the next L0 page table to take/free. + * + * Used to form a linked list of L0 page tables. This is used + * when preallocating tables and when the page table has been + * removed and queued for destruction. + */ + struct pvr_page_table_l0 *next_free; + }; + + /** + * @parent_idx: The index of the entry in the parent table (see + * @parent) which corresponds to this table. + */ + u16 parent_idx; + + /** + * @entry_count: The current number of valid entries (that we know of) + * in this table. This value is essentially a refcount - the table is + * destroyed when this value is decremented to zero by + * pvr_page_table_l0_remove(). + */ + u16 entry_count; +}; + +/** + * pvr_page_table_l0_init() - Initialize a level 0 page table. + * @table: Target level 0 page table. + * @pvr_dev: Target PowerVR device + * + * When this function returns successfully, @table is still not considered + * valid. It must be inserted into the page table tree structure with + * pvr_page_table_l1_insert() before it is ready for use. + * + * It is expected that @table be zeroed (e.g. from kzalloc()) before calling + * this function. + * + * Return: + * * 0 on success, or + * * Any error encountered while intializing &table->backing_page using + * pvr_mmu_backing_page_init(). + */ +static int +pvr_page_table_l0_init(struct pvr_page_table_l0 *table, + struct pvr_device *pvr_dev) +{ + table->parent_idx = PVR_IDX_INVALID; + + return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev); +} + +/** + * pvr_page_table_l0_free() - Teardown a level 0 page table. + * @table: Target level 0 page table. + * + * It is an error to attempt to use @table after calling this function, even + * indirectly. This includes calling pvr_page_table_l1_remove(), which must + * be called *before* pvr_page_table_l0_free(). + */ +static void +pvr_page_table_l0_free(struct pvr_page_table_l0 *table) +{ + pvr_mmu_backing_page_fini(&table->backing_page); + kfree(table); +} + +/** + * pvr_page_table_l0_sync() - Flush a level 0 page table from the CPU to the + * device. + * @table: Target level 0 page table. + * + * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the + * warning there applies here too: **Only call pvr_page_table_l0_sync() once + * you're sure you have no more changes to make to** @table **in the immediate + * future.** + * + * If child pages of @table also need to be flushed, this should be done first + * using a DMA sync function (e.g. dma_sync_sg_for_device()) *before* calling + * this function. + */ +static void +pvr_page_table_l0_sync(struct pvr_page_table_l0 *table) +{ + pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_0_FLAGS); +} + +/** + * pvr_page_table_l0_get_raw() - Access the raw equivalent of a mirror level 0 + * page table. + * @table: Target level 0 page table. + * + * Essentially returns the CPU address of the raw equivalent of @table, cast to + * a &struct pvr_page_table_l0_raw pointer. + * + * You probably want to call pvr_page_table_l0_get_entry_raw() instead. + * + * Return: + * The raw equivalent of @table. + */ +static struct pvr_page_table_l0_raw * +pvr_page_table_l0_get_raw(struct pvr_page_table_l0 *table) +{ + return table->backing_page.host_ptr; +} + +/** + * pvr_page_table_l0_get_entry_raw() - Access an entry from the raw equivalent + * of a mirror level 0 page table. + * @table: Target level 0 page table. + * @idx: Index of the entry to access. + * + * Technically this function returns a pointer to a slot in a raw level 0 page + * table, since the returned "entry" is not guaranteed to be valid. The caller + * must verify the validity of the entry at the returned address (perhaps using + * pvr_page_table_l0_entry_raw_is_valid()) before reading or overwriting it. + * + * The value of @idx is not checked here; it is the callers responsibility to + * ensure @idx refers to a valid index within @table before dereferencing the + * returned pointer. This is espcially important for level 0 page tables, which + * can have a variable number of entries. + * + * Return: + * A pointer to the requested raw level 0 page table entry. + */ +static struct pvr_page_table_l0_entry_raw * +pvr_page_table_l0_get_entry_raw(struct pvr_page_table_l0 *table, u16 idx) +{ + return &pvr_page_table_l0_get_raw(table)->entries[idx]; +} + +/** + * pvr_page_table_l0_entry_is_valid() - Check if a level 0 page table entry is + * marked as valid. + * @table: Target level 0 page table. + * @idx: Index of the entry to check. + * + * The value of @idx is not checked here; it is the callers responsibility to + * ensure @idx refers to a valid index within @table before calling this + * function. + */ +static bool +pvr_page_table_l0_entry_is_valid(struct pvr_page_table_l0 *table, u16 idx) +{ + struct pvr_page_table_l0_entry_raw entry_raw = + *pvr_page_table_l0_get_entry_raw(table, idx); + + return pvr_page_table_l0_entry_raw_is_valid(entry_raw); +} + +/** + * struct pvr_mmu_context - context holding data for operations at page + * catalogue level, intended for use with a VM context. + */ +struct pvr_mmu_context { + /** @pvr_dev: The PVR device associated with the owning VM context. */ + struct pvr_device *pvr_dev; + + /** @page_table_l2: The MMU table root. */ + struct pvr_page_table_l2 page_table_l2; +}; + +/** + * struct pvr_page_table_ptr - A reference to a single physical page as indexed + * by the page table structure. + * + * Intended for embedding in a &struct pvr_mmu_op_context. + */ +struct pvr_page_table_ptr { + /** + * @l1_table: A cached handle to the level 1 page table the + * context is currently traversing. + */ + struct pvr_page_table_l1 *l1_table; + + /** + * @l0_table: A cached handle to the level 0 page table the + * context is currently traversing. + */ + struct pvr_page_table_l0 *l0_table; + + /** + * @l2_idx: Index into the level 2 page table the context is + * currently referencing. + */ + u16 l2_idx; + + /** + * @l1_idx: Index into the level 1 page table the context is + * currently referencing. + */ + u16 l1_idx; + + /** + * @l0_idx: Index into the level 0 page table the context is + * currently referencing. + */ + u16 l0_idx; +}; + +/** + * struct pvr_mmu_op_context - context holding data for individual + * device-virtual mapping operations. Intended for use with a VM bind operation. + */ +struct pvr_mmu_op_context { + /** @mmu_ctx: The MMU context associated with the owning VM context. */ + struct pvr_mmu_context *mmu_ctx; + + /** @map: Data specifically for map operations. */ + struct { + /** + * @sgt: Scatter gather table containing pages pinned for use by + * this context - these are currently pinned when initialising + * the VM bind operation. + */ + struct sg_table *sgt; + + /** @sgt_offset: Start address of the device-virtual mapping. */ + u64 sgt_offset; + + /** + * @l1_prealloc_tables: Preallocated l1 page table objects + * use by this context when creating a page mapping. Linked list + * fully created during initialisation. + */ + struct pvr_page_table_l1 *l1_prealloc_tables; + + /** + * @l0_prealloc_tables: Preallocated l0 page table objects + * use by this context when creating a page mapping. Linked list + * fully created during initialisation. + */ + struct pvr_page_table_l0 *l0_prealloc_tables; + } map; + + /** @unmap: Data specifically for unmap operations. */ + struct { + /** + * @l1_free_tables: Collects page table objects freed by unmap + * ops. Linked list empty at creation. + */ + struct pvr_page_table_l1 *l1_free_tables; + + /** + * @l0_free_tables: Collects page table objects freed by unmap + * ops. Linked list empty at creation. + */ + struct pvr_page_table_l0 *l0_free_tables; + } unmap; + + /** + * @curr_page: A reference to a single physical page as indexed by the + * page table structure. + */ + struct pvr_page_table_ptr curr_page; + + /** + * @sync_level_required: The maximum level of the page table tree + * structure which has (possibly) been modified since it was last + * flushed to the device. + * + * This field should only be set with pvr_mmu_op_context_require_sync() + * or indirectly by pvr_mmu_op_context_sync_partial(). + */ + enum pvr_mmu_sync_level sync_level_required; +}; + +/** + * pvr_page_table_l2_insert() - Insert an entry referring to a level 1 page + * table into a level 2 page table. + * @op_ctx: Target MMU op context pointing at the entry to insert the L1 page + * table into. + * @child_table: Target level 1 page table to be referenced by the new entry. + * + * It is the caller's responsibility to ensure @op_ctx.curr_page points to a + * valid L2 entry. + * + * It is the caller's responsibility to execute any memory barries to ensure + * that the creation of @child_table is ordered before the L2 entry is inserted. + */ +static void +pvr_page_table_l2_insert(struct pvr_mmu_op_context *op_ctx, + struct pvr_page_table_l1 *child_table) +{ + struct pvr_page_table_l2 *l2_table = + &op_ctx->mmu_ctx->page_table_l2; + struct pvr_page_table_l2_entry_raw *entry_raw = + pvr_page_table_l2_get_entry_raw(l2_table, + op_ctx->curr_page.l2_idx); + + pvr_page_table_l2_entry_raw_set(entry_raw, + child_table->backing_page.dma_addr); + + child_table->parent = l2_table; + child_table->parent_idx = op_ctx->curr_page.l2_idx; + l2_table->entries[op_ctx->curr_page.l2_idx] = child_table; + ++l2_table->entry_count; + op_ctx->curr_page.l1_table = child_table; +} + +/** + * pvr_page_table_l2_remove() - Remove a level 1 page table from a level 2 page + * table. + * @op_ctx: Target MMU op context pointing at the L2 entry to remove. + * + * It is the caller's responsibility to ensure @op_ctx.curr_page points to a + * valid L2 entry. + */ +static void +pvr_page_table_l2_remove(struct pvr_mmu_op_context *op_ctx) +{ + struct pvr_page_table_l2 *l2_table = + &op_ctx->mmu_ctx->page_table_l2; + struct pvr_page_table_l2_entry_raw *entry_raw = + pvr_page_table_l2_get_entry_raw(l2_table, + op_ctx->curr_page.l1_table->parent_idx); + + WARN_ON(op_ctx->curr_page.l1_table->parent != l2_table); + + pvr_page_table_l2_entry_raw_clear(entry_raw); + + l2_table->entries[op_ctx->curr_page.l1_table->parent_idx] = NULL; + op_ctx->curr_page.l1_table->parent_idx = PVR_IDX_INVALID; + op_ctx->curr_page.l1_table->next_free = op_ctx->unmap.l1_free_tables; + op_ctx->unmap.l1_free_tables = op_ctx->curr_page.l1_table; + op_ctx->curr_page.l1_table = NULL; + + --l2_table->entry_count; +} + +/** + * pvr_page_table_l1_insert() - Insert an entry referring to a level 0 page + * table into a level 1 page table. + * @op_ctx: Target MMU op context pointing at the entry to insert the L0 page + * table into. + * @child_table: L0 page table to insert. + * + * It is the caller's responsibility to ensure @op_ctx.curr_page points to a + * valid L1 entry. + * + * It is the caller's responsibility to execute any memory barries to ensure + * that the creation of @child_table is ordered before the L1 entry is inserted. + */ +static void +pvr_page_table_l1_insert(struct pvr_mmu_op_context *op_ctx, + struct pvr_page_table_l0 *child_table) +{ + struct pvr_page_table_l1_entry_raw *entry_raw = + pvr_page_table_l1_get_entry_raw(op_ctx->curr_page.l1_table, + op_ctx->curr_page.l1_idx); + + pvr_page_table_l1_entry_raw_set(entry_raw, + child_table->backing_page.dma_addr); + + child_table->parent = op_ctx->curr_page.l1_table; + child_table->parent_idx = op_ctx->curr_page.l1_idx; + op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l1_idx] = child_table; + ++op_ctx->curr_page.l1_table->entry_count; + op_ctx->curr_page.l0_table = child_table; +} + +/** + * pvr_page_table_l1_remove() - Remove a level 0 page table from a level 1 page + * table. + * @op_ctx: Target MMU op context pointing at the L1 entry to remove. + * + * If this function results in the L1 table becoming empty, it will be removed + * from its parent level 2 page table and destroyed. + * + * It is the caller's responsibility to ensure @op_ctx.curr_page points to a + * valid L1 entry. + */ +static void +pvr_page_table_l1_remove(struct pvr_mmu_op_context *op_ctx) +{ + struct pvr_page_table_l1_entry_raw *entry_raw = + pvr_page_table_l1_get_entry_raw(op_ctx->curr_page.l0_table->parent, + op_ctx->curr_page.l0_table->parent_idx); + + WARN_ON(op_ctx->curr_page.l0_table->parent != + op_ctx->curr_page.l1_table); + + pvr_page_table_l1_entry_raw_clear(entry_raw); + + op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l0_table->parent_idx] = NULL; + op_ctx->curr_page.l0_table->parent_idx = PVR_IDX_INVALID; + op_ctx->curr_page.l0_table->next_free = op_ctx->unmap.l0_free_tables; + op_ctx->unmap.l0_free_tables = op_ctx->curr_page.l0_table; + op_ctx->curr_page.l0_table = NULL; + + if (--op_ctx->curr_page.l1_table->entry_count == 0) { + /* Clear the parent L2 page table entry. */ + if (op_ctx->curr_page.l1_table->parent_idx != PVR_IDX_INVALID) + pvr_page_table_l2_remove(op_ctx); + } +} + +/** + * pvr_page_table_l0_insert() - Insert an entry referring to a physical page + * into a level 0 page table. + * @op_ctx: Target MMU op context pointing at the L0 entry to insert. + * @dma_addr: Target DMA address to be referenced by the new entry. + * @flags: Page options to be stored in the new entry. + * + * It is the caller's responsibility to ensure @op_ctx.curr_page points to a + * valid L0 entry. + */ +static void +pvr_page_table_l0_insert(struct pvr_mmu_op_context *op_ctx, + dma_addr_t dma_addr, struct pvr_page_flags_raw flags) +{ + struct pvr_page_table_l0_entry_raw *entry_raw = + pvr_page_table_l0_get_entry_raw(op_ctx->curr_page.l0_table, + op_ctx->curr_page.l0_idx); + + pvr_page_table_l0_entry_raw_set(entry_raw, dma_addr, flags); + + /* + * There is no entry to set here - we don't keep a mirror of + * individual pages. + */ + + ++op_ctx->curr_page.l0_table->entry_count; +} + +/** + * pvr_page_table_l0_remove() - Remove a physical page from a level 0 page + * table. + * @op_ctx: Target MMU op context pointing at the L0 entry to remove. + * + * If this function results in the L0 table becoming empty, it will be removed + * from its parent L1 page table and destroyed. + * + * It is the caller's responsibility to ensure @op_ctx.curr_page points to a + * valid L0 entry. + */ +static void +pvr_page_table_l0_remove(struct pvr_mmu_op_context *op_ctx) +{ + struct pvr_page_table_l0_entry_raw *entry_raw = + pvr_page_table_l0_get_entry_raw(op_ctx->curr_page.l0_table, + op_ctx->curr_page.l0_idx); + + pvr_page_table_l0_entry_raw_clear(entry_raw); + + /* + * There is no entry to clear here - we don't keep a mirror of + * individual pages. + */ + + if (--op_ctx->curr_page.l0_table->entry_count == 0) { + /* Clear the parent L1 page table entry. */ + if (op_ctx->curr_page.l0_table->parent_idx != PVR_IDX_INVALID) + pvr_page_table_l1_remove(op_ctx); + } +} + +/** + * DOC: Page table index utilities + */ + +/** + * pvr_page_table_l2_idx() - Calculate the level 2 page table index for a + * device-virtual address. + * @device_addr: Target device-virtual address. + * + * This function does not perform any bounds checking - it is the caller's + * responsibility to ensure that @device_addr is valid before interpreting + * the result. + * + * Return: + * The index into a level 2 page table corresponding to @device_addr. + */ +static u16 +pvr_page_table_l2_idx(u64 device_addr) +{ + return (device_addr & ~ROGUE_MMUCTRL_VADDR_PC_INDEX_CLRMSK) >> + ROGUE_MMUCTRL_VADDR_PC_INDEX_SHIFT; +} + +/** + * pvr_page_table_l1_idx() - Calculate the level 1 page table index for a + * device-virtual address. + * @device_addr: Target device-virtual address. + * + * This function does not perform any bounds checking - it is the caller's + * responsibility to ensure that @device_addr is valid before interpreting + * the result. + * + * Return: + * The index into a level 1 page table corresponding to @device_addr. + */ +static u16 +pvr_page_table_l1_idx(u64 device_addr) +{ + return (device_addr & ~ROGUE_MMUCTRL_VADDR_PD_INDEX_CLRMSK) >> + ROGUE_MMUCTRL_VADDR_PD_INDEX_SHIFT; +} + +/** + * pvr_page_table_l0_idx() - Calculate the level 0 page table index for a + * device-virtual address. + * @device_addr: Target device-virtual address. + * + * This function does not perform any bounds checking - it is the caller's + * responsibility to ensure that @device_addr is valid before interpreting + * the result. + * + * Return: + * The index into a level 0 page table corresponding to @device_addr. + */ +static u16 +pvr_page_table_l0_idx(u64 device_addr) +{ + return (device_addr & ~ROGUE_MMUCTRL_VADDR_PT_INDEX_CLRMSK) >> + ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT; +} + +/** + * DOC: High-level page table operations + */ + +/** + * pvr_page_table_l1_get_or_insert() - Retrieves (optionally inserting if + * necessary) a level 1 page table from the specified level 2 page table entry. + * @op_ctx: Target MMU op context. + * @should_insert: [IN] Specifies whether new page tables should be inserted + * when empty page table entries are encountered during traversal. + * + * Return: + * * 0 on success, or + * + * If @should_insert is %false: + * * -%ENXIO if a level 1 page table would have been inserted. + * + * If @should_insert is %true: + * * Any error encountered while inserting the level 1 page table. + */ +static int +pvr_page_table_l1_get_or_insert(struct pvr_mmu_op_context *op_ctx, + bool should_insert) +{ + struct pvr_page_table_l2 *l2_table = + &op_ctx->mmu_ctx->page_table_l2; + struct pvr_page_table_l1 *table; + + if (pvr_page_table_l2_entry_is_valid(l2_table, + op_ctx->curr_page.l2_idx)) { + op_ctx->curr_page.l1_table = + l2_table->entries[op_ctx->curr_page.l2_idx]; + return 0; + } + + if (!should_insert) + return -ENXIO; + + /* Take a prealloced table. */ + table = op_ctx->map.l1_prealloc_tables; + if (!table) + return -ENOMEM; + + /* Pop */ + op_ctx->map.l1_prealloc_tables = table->next_free; + table->next_free = NULL; + + /* Ensure new table is fully written out before adding to L2 page table. */ + wmb(); + + pvr_page_table_l2_insert(op_ctx, table); + + return 0; +} + +/** + * pvr_page_table_l0_get_or_insert() - Retrieves (optionally inserting if + * necessary) a level 0 page table from the specified level 1 page table entry. + * @op_ctx: Target MMU op context. + * @should_insert: [IN] Specifies whether new page tables should be inserted + * when empty page table entries are encountered during traversal. + * + * Return: + * * 0 on success, + * + * If @should_insert is %false: + * * -%ENXIO if a level 0 page table would have been inserted. + * + * If @should_insert is %true: + * * Any error encountered while inserting the level 0 page table. + */ +static int +pvr_page_table_l0_get_or_insert(struct pvr_mmu_op_context *op_ctx, + bool should_insert) +{ + struct pvr_page_table_l0 *table; + + if (pvr_page_table_l1_entry_is_valid(op_ctx->curr_page.l1_table, + op_ctx->curr_page.l1_idx)) { + op_ctx->curr_page.l0_table = + op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l1_idx]; + return 0; + } + + if (!should_insert) + return -ENXIO; + + /* Take a prealloced table. */ + table = op_ctx->map.l0_prealloc_tables; + if (!table) + return -ENOMEM; + + /* Pop */ + op_ctx->map.l0_prealloc_tables = table->next_free; + table->next_free = NULL; + + /* Ensure new table is fully written out before adding to L1 page table. */ + wmb(); + + pvr_page_table_l1_insert(op_ctx, table); + + return 0; +} + +/** + * pvr_mmu_context_create() - Create an MMU context. + * @pvr_dev: PVR device associated with owning VM context. + * + * Returns: + * * Newly created MMU context object on success, or + * * -%ENOMEM if no memory is available, + * * Any error code returned by pvr_page_table_l2_init(). + */ +struct pvr_mmu_context *pvr_mmu_context_create(struct pvr_device *pvr_dev) +{ + struct pvr_mmu_context *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + int err; + + if (!ctx) + return ERR_PTR(-ENOMEM); + + err = pvr_page_table_l2_init(&ctx->page_table_l2, pvr_dev); + if (err) + return ERR_PTR(err); + + ctx->pvr_dev = pvr_dev; + + return ctx; +} + +/** + * pvr_mmu_context_destroy() - Destroy an MMU context. + * @ctx: Target MMU context. + */ +void pvr_mmu_context_destroy(struct pvr_mmu_context *ctx) +{ + pvr_page_table_l2_fini(&ctx->page_table_l2); + kfree(ctx); +} + +/** + * pvr_mmu_get_root_table_dma_addr() - Get the DMA address of the root of the + * page table structure behind a VM context. + * @ctx: Target MMU context. + */ +dma_addr_t pvr_mmu_get_root_table_dma_addr(struct pvr_mmu_context *ctx) +{ + return ctx->page_table_l2.backing_page.dma_addr; +} + +/** + * pvr_page_table_l1_alloc() - Allocate a l1 page_table object. + * @ctx: MMU context of owning VM context. + * + * Returns: + * * Newly created page table object on success, or + * * -%ENOMEM if no memory is available, + * * Any error code returned by pvr_page_table_l1_init(). + */ +static struct pvr_page_table_l1 * +pvr_page_table_l1_alloc(struct pvr_mmu_context *ctx) +{ + int err; + + struct pvr_page_table_l1 *table = + kzalloc(sizeof(*table), GFP_KERNEL); + + if (!table) + return ERR_PTR(-ENOMEM); + + err = pvr_page_table_l1_init(table, ctx->pvr_dev); + if (err) { + kfree(table); + return ERR_PTR(err); + } + + return table; +} + +/** + * pvr_page_table_l0_alloc() - Allocate a l0 page_table object. + * @ctx: MMU context of owning VM context. + * + * Returns: + * * Newly created page table object on success, or + * * -%ENOMEM if no memory is available, + * * Any error code returned by pvr_page_table_l0_init(). + */ +static struct pvr_page_table_l0 * +pvr_page_table_l0_alloc(struct pvr_mmu_context *ctx) +{ + int err; + + struct pvr_page_table_l0 *table = + kzalloc(sizeof(*table), GFP_KERNEL); + + if (!table) + return ERR_PTR(-ENOMEM); + + err = pvr_page_table_l0_init(table, ctx->pvr_dev); + if (err) { + kfree(table); + return ERR_PTR(err); + } + + return table; +} + +/** + * pvr_mmu_op_context_require_sync() - Mark an MMU op context as requiring a + * sync operation for the referenced page tables up to a specified level. + * @op_ctx: Target MMU op context. + * @level: Maximum page table level for which a sync is required. + */ +static void +pvr_mmu_op_context_require_sync(struct pvr_mmu_op_context *op_ctx, + enum pvr_mmu_sync_level level) +{ + if (op_ctx->sync_level_required < level) + op_ctx->sync_level_required = level; +} + +/** + * pvr_mmu_op_context_sync_manual() - Trigger a sync of some or all of the + * page tables referenced by a MMU op context. + * @op_ctx: Target MMU op context. + * @level: Maximum page table level to sync. + * + * Do not call this function directly. Instead use + * pvr_mmu_op_context_sync_partial() which is checked against the current + * value of &op_ctx->sync_level_required as set by + * pvr_mmu_op_context_require_sync(). + */ +static void +pvr_mmu_op_context_sync_manual(struct pvr_mmu_op_context *op_ctx, + enum pvr_mmu_sync_level level) +{ + /* + * We sync the page table levels in ascending order (starting from the + * leaf node) to ensure consistency. + */ + + WARN_ON(level < PVR_MMU_SYNC_LEVEL_NONE); + + if (level <= PVR_MMU_SYNC_LEVEL_NONE) + return; + + if (op_ctx->curr_page.l0_table) + pvr_page_table_l0_sync(op_ctx->curr_page.l0_table); + + if (level < PVR_MMU_SYNC_LEVEL_1) + return; + + if (op_ctx->curr_page.l1_table) + pvr_page_table_l1_sync(op_ctx->curr_page.l1_table); + + if (level < PVR_MMU_SYNC_LEVEL_2) + return; + + pvr_page_table_l2_sync(&op_ctx->mmu_ctx->page_table_l2); +} + +/** + * pvr_mmu_op_context_sync_partial() - Trigger a sync of some or all of the + * page tables referenced by a MMU op context. + * @op_ctx: Target MMU op context. + * @level: Requested page table level to sync up to (inclusive). + * + * If @level is greater than the maximum level recorded by @op_ctx as requiring + * a sync operation, only the previously recorded maximum will be used. + * + * Additionally, if @level is greater than or equal to the maximum level + * recorded by @op_ctx as requiring a sync operation, that maximum level will be + * reset as a full sync will be performed. This is equivalent to calling + * pvr_mmu_op_context_sync(). + */ +static void +pvr_mmu_op_context_sync_partial(struct pvr_mmu_op_context *op_ctx, + enum pvr_mmu_sync_level level) +{ + /* + * If the requested sync level is greater than or equal to the + * currently required sync level, we do two things: + * * Don't waste time syncing levels we haven't previously marked as + * requiring a sync, and + * * Reset the required sync level since we are about to sync + * everything that was previously marked as requiring a sync. + */ + if (level >= op_ctx->sync_level_required) { + level = op_ctx->sync_level_required; + op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE; + } + + pvr_mmu_op_context_sync_manual(op_ctx, level); +} + +/** + * pvr_mmu_op_context_sync() - Trigger a sync of every page table referenced by + * a MMU op context. + * @op_ctx: Target MMU op context. + * + * The maximum level marked internally as requiring a sync will be reset so + * that subsequent calls to this function will be no-ops unless @op_ctx is + * otherwise updated. + */ +static void +pvr_mmu_op_context_sync(struct pvr_mmu_op_context *op_ctx) +{ + pvr_mmu_op_context_sync_manual(op_ctx, op_ctx->sync_level_required); + + op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE; +} + +/** + * pvr_mmu_op_context_load_tables() - Load pointers to tables in each level of + * the page table tree structure needed to reference the physical page + * referenced by a MMU op context. + * @op_ctx: Target MMU op context. + * @should_create: Specifies whether new page tables should be created when + * empty page table entries are encountered during traversal. + * @load_level_required: Maximum page table level to load. + * + * If @should_create is %true, this function may modify the stored required + * sync level of @op_ctx as new page tables are created and inserted into their + * respective parents. + * + * Since there is only one root page table, it is technically incorrect to call + * this function with a value of @load_level_required greater than or equal to + * the root level number. However, this is not explicitly disallowed here. + * + * Return: + * * 0 on success, + * * Any error returned by pvr_page_table_l1_get_or_create() if + * @load_level_required >= 1 except -%ENXIO, or + * * Any error returned by pvr_page_table_l0_get_or_create() if + * @load_level_required >= 0 except -%ENXIO. + */ +static int +pvr_mmu_op_context_load_tables(struct pvr_mmu_op_context *op_ctx, + bool should_create, + enum pvr_mmu_sync_level load_level_required) +{ + const struct pvr_page_table_l1 *l1_head_before = + op_ctx->map.l1_prealloc_tables; + const struct pvr_page_table_l0 *l0_head_before = + op_ctx->map.l0_prealloc_tables; + int err; + + /* Clear tables we're about to fetch in case of error states. */ + if (load_level_required >= PVR_MMU_SYNC_LEVEL_1) + op_ctx->curr_page.l1_table = NULL; + + if (load_level_required >= PVR_MMU_SYNC_LEVEL_0) + op_ctx->curr_page.l0_table = NULL; + + /* Get or create L1 page table. */ + if (load_level_required >= PVR_MMU_SYNC_LEVEL_1) { + err = pvr_page_table_l1_get_or_insert(op_ctx, should_create); + if (err) { + /* + * If @should_create is %false and no L1 page table was + * found, return early but without an error. Since + * pvr_page_table_l1_get_or_create() can only return + * -%ENXIO if @should_create is %false, there is no + * need to check it here. + */ + if (err == -ENXIO) + err = 0; + + return err; + } + } + + /* Get or create L0 page table. */ + if (load_level_required >= PVR_MMU_SYNC_LEVEL_0) { + err = pvr_page_table_l0_get_or_insert(op_ctx, should_create); + if (err) { + /* + * If @should_create is %false and no L0 page table was + * found, return early but without an error. Since + * pvr_page_table_l0_get_or_insert() can only return + * -%ENXIO if @should_create is %false, there is no + * need to check it here. + */ + if (err == -ENXIO) + err = 0; + + /* + * At this point, an L1 page table could have been + * inserted but is now empty due to the failed attempt + * at inserting an L0 page table. In this instance, we + * must remove the empty L1 page table ourselves as + * pvr_page_table_l1_remove() is never called as part + * of the error path in + * pvr_page_table_l0_get_or_insert(). + */ + if (l1_head_before != op_ctx->map.l1_prealloc_tables) { + pvr_page_table_l2_remove(op_ctx); + pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_2); + } + + return err; + } + } + + /* + * A sync is only needed if table objects were inserted. This can be + * inferred by checking if the pointer at the head of the linked list + * has changed. + */ + if (l1_head_before != op_ctx->map.l1_prealloc_tables) + pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_2); + else if (l0_head_before != op_ctx->map.l0_prealloc_tables) + pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_1); + + return 0; +} + +/** + * pvr_mmu_op_context_set_curr_page() - Reassign the current page of an MMU op + * context, syncing any page tables previously assigned to it which are no + * longer relevant. + * @op_ctx: Target MMU op context. + * @device_addr: New pointer target. + * @should_create: Specify whether new page tables should be created when + * empty page table entries are encountered during traversal. + * + * This function performs a full sync on the pointer, regardless of which + * levels are modified. + * + * Return: + * * 0 on success, or + * * Any error returned by pvr_mmu_op_context_load_tables(). + */ +static int +pvr_mmu_op_context_set_curr_page(struct pvr_mmu_op_context *op_ctx, + u64 device_addr, bool should_create) +{ + pvr_mmu_op_context_sync(op_ctx); + + op_ctx->curr_page.l2_idx = pvr_page_table_l2_idx(device_addr); + op_ctx->curr_page.l1_idx = pvr_page_table_l1_idx(device_addr); + op_ctx->curr_page.l0_idx = pvr_page_table_l0_idx(device_addr); + op_ctx->curr_page.l1_table = NULL; + op_ctx->curr_page.l0_table = NULL; + + return pvr_mmu_op_context_load_tables(op_ctx, should_create, + PVR_MMU_SYNC_LEVEL_1); +} + +/** + * pvr_mmu_op_context_next_page() - Advance the current page of an MMU op + * context. + * @op_ctx: Target MMU op context. + * @should_create: Specify whether new page tables should be created when + * empty page table entries are encountered during traversal. + * + * If @should_create is %false, it is the caller's responsibility to verify that + * the state of the table references in @op_ctx is valid on return. If -%ENXIO + * is returned, at least one of the table references is invalid. It should be + * noted that @op_ctx as a whole will be left in a valid state if -%ENXIO is + * returned, unlike other error codes. The caller should check which references + * are invalid by comparing them to %NULL. Only &@ptr->l2_table is guaranteed + * to be valid, since it represents the root of the page table tree structure. + * + * Return: + * * 0 on success, + * * -%EPERM if the operation would wrap at the top of the page table + * hierarchy, + * * -%ENXIO if @should_create is %false and a page table of any level would + * have otherwise been created, or + * * Any error returned while attempting to create missing page tables if + * @should_create is %true. + */ +static int +pvr_mmu_op_context_next_page(struct pvr_mmu_op_context *op_ctx, + bool should_create) +{ + s8 load_level_required = PVR_MMU_SYNC_LEVEL_NONE; + + if (++op_ctx->curr_page.l0_idx != ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X) + goto load_tables; + + op_ctx->curr_page.l0_idx = 0; + load_level_required = PVR_MMU_SYNC_LEVEL_0; + + if (++op_ctx->curr_page.l1_idx != ROGUE_MMUCTRL_ENTRIES_PD_VALUE) + goto load_tables; + + op_ctx->curr_page.l1_idx = 0; + load_level_required = PVR_MMU_SYNC_LEVEL_1; + + if (++op_ctx->curr_page.l2_idx != ROGUE_MMUCTRL_ENTRIES_PC_VALUE) + goto load_tables; + + /* + * If the pattern continued, we would set &op_ctx->curr_page.l2_idx to + * zero here. However, that would wrap the top layer of the page table + * hierarchy which is not a valid operation. Instead, we warn and return + * an error. + */ + WARN(true, + "%s(%p) attempted to loop the top of the page table hierarchy", + __func__, op_ctx); + return -EPERM; + + /* If indices have wrapped, we need to load new tables. */ +load_tables: + /* First, flush tables which will be unloaded. */ + pvr_mmu_op_context_sync_partial(op_ctx, load_level_required); + + /* Then load tables from the required level down. */ + return pvr_mmu_op_context_load_tables(op_ctx, should_create, + load_level_required); +} + +/** + * DOC: Single page operations + */ + +/** + * pvr_page_create() - Create a device-virtual memory page and insert it into + * a level 0 page table. + * @op_ctx: Target MMU op context pointing at the device-virtual address of the + * target page. + * @dma_addr: DMA address of the physical page backing the created page. + * @flags: Page options saved on the level 0 page table entry for reading by + * the device. + * + * Return: + * * 0 on success, or + * * -%EEXIST if the requested page already exists. + */ +static int +pvr_page_create(struct pvr_mmu_op_context *op_ctx, dma_addr_t dma_addr, + struct pvr_page_flags_raw flags) +{ + /* Do not create a new page if one already exists. */ + if (pvr_page_table_l0_entry_is_valid(op_ctx->curr_page.l0_table, + op_ctx->curr_page.l0_idx)) { + return -EEXIST; + } + + pvr_page_table_l0_insert(op_ctx, dma_addr, flags); + + pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0); + + return 0; +} + +/** + * pvr_page_destroy() - Destroy a device page after removing it from its + * parent level 0 page table. + * @op_ctx: Target MMU op context. + */ +static void +pvr_page_destroy(struct pvr_mmu_op_context *op_ctx) +{ + /* Do nothing if the page does not exist. */ + if (!pvr_page_table_l0_entry_is_valid(op_ctx->curr_page.l0_table, + op_ctx->curr_page.l0_idx)) { + return; + } + + /* Clear the parent L0 page table entry. */ + pvr_page_table_l0_remove(op_ctx); + + pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0); +} + +/** + * pvr_mmu_op_context_destroy() - Destroy an MMU op context. + * @op_ctx: Target MMU op context. + */ +void pvr_mmu_op_context_destroy(struct pvr_mmu_op_context *op_ctx) +{ + const bool flush_caches = + op_ctx->sync_level_required != PVR_MMU_SYNC_LEVEL_NONE; + + pvr_mmu_op_context_sync(op_ctx); + + /* Unmaps should be flushed immediately. Map flushes can be deferred. */ + if (flush_caches && !op_ctx->map.sgt) + pvr_mmu_flush_exec(op_ctx->mmu_ctx->pvr_dev, true); + + while (op_ctx->map.l0_prealloc_tables) { + struct pvr_page_table_l0 *tmp = op_ctx->map.l0_prealloc_tables; + + op_ctx->map.l0_prealloc_tables = + op_ctx->map.l0_prealloc_tables->next_free; + pvr_page_table_l0_free(tmp); + } + + while (op_ctx->map.l1_prealloc_tables) { + struct pvr_page_table_l1 *tmp = op_ctx->map.l1_prealloc_tables; + + op_ctx->map.l1_prealloc_tables = + op_ctx->map.l1_prealloc_tables->next_free; + pvr_page_table_l1_free(tmp); + } + + while (op_ctx->unmap.l0_free_tables) { + struct pvr_page_table_l0 *tmp = op_ctx->unmap.l0_free_tables; + + op_ctx->unmap.l0_free_tables = + op_ctx->unmap.l0_free_tables->next_free; + pvr_page_table_l0_free(tmp); + } + + while (op_ctx->unmap.l1_free_tables) { + struct pvr_page_table_l1 *tmp = op_ctx->unmap.l1_free_tables; + + op_ctx->unmap.l1_free_tables = + op_ctx->unmap.l1_free_tables->next_free; + pvr_page_table_l1_free(tmp); + } + + kfree(op_ctx); +} + +/** + * pvr_mmu_op_context_create() - Create an MMU op context. + * @ctx: MMU context associated with owning VM context. + * @sgt: Scatter gather table containing pages pinned for use by this context. + * @sgt_offset: Start offset of the requested device-virtual memory mapping. + * @size: Size in bytes of the requested device-virtual memory mapping. For an + * unmapping, this should be zero so that no page tables are allocated. + * + * Returns: + * * Newly created MMU op context object on success, or + * * -%ENOMEM if no memory is available, + * * Any error code returned by pvr_page_table_l2_init(). + */ +struct pvr_mmu_op_context * +pvr_mmu_op_context_create(struct pvr_mmu_context *ctx, struct sg_table *sgt, + u64 sgt_offset, u64 size) +{ + int err; + + struct pvr_mmu_op_context *op_ctx = + kzalloc(sizeof(*op_ctx), GFP_KERNEL); + + if (!op_ctx) + return ERR_PTR(-ENOMEM); + + op_ctx->mmu_ctx = ctx; + op_ctx->map.sgt = sgt; + op_ctx->map.sgt_offset = sgt_offset; + op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE; + + if (size) { + /* + * The number of page table objects we need to prealloc is + * indicated by the mapping size, start offset and the sizes + * of the areas mapped per PT or PD. The range calculation is + * identical to that for the index into a table for a device + * address, so we reuse those functions here. + */ + const u32 l1_start_idx = pvr_page_table_l2_idx(sgt_offset); + const u32 l1_end_idx = pvr_page_table_l2_idx(sgt_offset + size); + const u32 l1_count = l1_end_idx - l1_start_idx + 1; + const u32 l0_start_idx = pvr_page_table_l1_idx(sgt_offset); + const u32 l0_end_idx = pvr_page_table_l1_idx(sgt_offset + size); + const u32 l0_count = l0_end_idx - l0_start_idx + 1; + + /* + * Alloc and push page table entries until we have enough of + * each type, ending with linked lists of l0 and l1 entries in + * reverse order. + */ + for (int i = 0; i < l1_count; i++) { + struct pvr_page_table_l1 *l1_tmp = + pvr_page_table_l1_alloc(ctx); + + err = PTR_ERR_OR_ZERO(l1_tmp); + if (err) + goto err_cleanup; + + l1_tmp->next_free = op_ctx->map.l1_prealloc_tables; + op_ctx->map.l1_prealloc_tables = l1_tmp; + } + + for (int i = 0; i < l0_count; i++) { + struct pvr_page_table_l0 *l0_tmp = + pvr_page_table_l0_alloc(ctx); + + err = PTR_ERR_OR_ZERO(l0_tmp); + if (err) + goto err_cleanup; + + l0_tmp->next_free = op_ctx->map.l0_prealloc_tables; + op_ctx->map.l0_prealloc_tables = l0_tmp; + } + } + + return op_ctx; + +err_cleanup: + pvr_mmu_op_context_destroy(op_ctx); + + return ERR_PTR(err); +} + +/** + * pvr_mmu_op_context_unmap_curr_page() - Unmap pages from a memory context + * starting from the current page of an MMU op context. + * @op_ctx: Target MMU op context pointing at the first page to unmap. + * @nr_pages: Number of pages to unmap. + * + * Return: + * * 0 on success, or + * * Any error encountered while advancing @op_ctx.curr_page with + * pvr_mmu_op_context_next_page() (except -%ENXIO). + */ +static int +pvr_mmu_op_context_unmap_curr_page(struct pvr_mmu_op_context *op_ctx, + u64 nr_pages) +{ + int err; + + if (nr_pages == 0) + return 0; + + /* + * Destroy first page outside loop, as it doesn't require a page + * advance beforehand. If the L0 page table reference in + * @op_ctx.curr_page is %NULL, there cannot be a mapped page at + * @op_ctx.curr_page (so skip ahead). + */ + if (op_ctx->curr_page.l0_table) + pvr_page_destroy(op_ctx); + + for (u64 page = 1; page < nr_pages; ++page) { + err = pvr_mmu_op_context_next_page(op_ctx, false); + /* + * If the page table tree structure at @op_ctx.curr_page is + * incomplete, skip ahead. We don't care about unmapping pages + * that cannot exist. + * + * FIXME: This could be made more efficient by jumping ahead + * using pvr_mmu_op_context_set_curr_page(). + */ + if (err == -ENXIO) + continue; + else if (err) + return err; + + pvr_page_destroy(op_ctx); + } + + return 0; +} + +/** + * pvr_mmu_unmap() - Unmap pages from a memory context. + * @op_ctx: Target MMU op context. + * @device_addr: First device-virtual address to unmap. + * @size: Size in bytes to unmap. + * + * The total amount of device-virtual memory unmapped is + * @nr_pages * %PVR_DEVICE_PAGE_SIZE. + * + * Returns: + * * 0 on success, or + * * Any error code returned by pvr_page_table_ptr_init(), or + * * Any error code returned by pvr_page_table_ptr_unmap(). + */ +int pvr_mmu_unmap(struct pvr_mmu_op_context *op_ctx, u64 device_addr, u64 size) +{ + int err = pvr_mmu_op_context_set_curr_page(op_ctx, device_addr, false); + + if (err) + return err; + + return pvr_mmu_op_context_unmap_curr_page(op_ctx, + size >> PVR_DEVICE_PAGE_SHIFT); +} + +/** + * pvr_mmu_map_sgl() - Map part of a scatter-gather table entry to + * device-virtual memory. + * @op_ctx: Target MMU op context pointing to the first page that should be + * mapped. + * @sgl: Target scatter-gather table entry. + * @offset: Offset into @sgl to map from. Must result in a starting address + * from @sgl which is CPU page-aligned. + * @size: Size of the memory to be mapped in bytes. Must be a non-zero multiple + * of the device page size. + * @page_flags: Page options to be applied to every device-virtual memory page + * in the created mapping. + * + * Return: + * * 0 on success, + * * -%EINVAL if the range specified by @offset and @size is not completely + * within @sgl, or + * * Any error encountered while creating a page with pvr_page_create(), or + * * Any error encountered while advancing @op_ctx.curr_page with + * pvr_mmu_op_context_next_page(). + */ +static int +pvr_mmu_map_sgl(struct pvr_mmu_op_context *op_ctx, struct scatterlist *sgl, + u64 offset, u64 size, struct pvr_page_flags_raw page_flags) +{ + const unsigned int pages = size >> PVR_DEVICE_PAGE_SHIFT; + dma_addr_t dma_addr = sg_dma_address(sgl) + offset; + const unsigned int dma_len = sg_dma_len(sgl); + struct pvr_page_table_ptr ptr_copy; + unsigned int page; + int err; + + if (size > dma_len || offset > dma_len - size) + return -EINVAL; + + /* + * Before progressing, save a copy of the start pointer so we can use + * it again if we enter an error state and have to destroy pages. + */ + memcpy(&ptr_copy, &op_ctx->curr_page, sizeof(ptr_copy)); + + /* + * Create first page outside loop, as it doesn't require a page advance + * beforehand. + */ + err = pvr_page_create(op_ctx, dma_addr, page_flags); + if (err) + return err; + + for (page = 1; page < pages; ++page) { + err = pvr_mmu_op_context_next_page(op_ctx, true); + if (err) + goto err_destroy_pages; + + dma_addr += PVR_DEVICE_PAGE_SIZE; + + err = pvr_page_create(op_ctx, dma_addr, page_flags); + if (err) + goto err_destroy_pages; + } + + return 0; + +err_destroy_pages: + memcpy(&op_ctx->curr_page, &ptr_copy, sizeof(op_ctx->curr_page)); + err = pvr_mmu_op_context_unmap_curr_page(op_ctx, page); + + return err; +} + +/** + * pvr_mmu_map() - Map an object's virtual memory to physical memory. + * @op_ctx: Target MMU op context. + * @size: Size of memory to be mapped in bytes. Must be a non-zero multiple + * of the device page size. + * @flags: Flags from pvr_gem_object associated with the mapping. + * @device_addr: Virtual device address to map to. Must be device page-aligned. + * + * Returns: + * * 0 on success, or + * * Any error code returned by pvr_page_table_ptr_init(), or + * * Any error code returned by pvr_mmu_map_sgl(), or + * * Any error code returned by pvr_page_table_ptr_next_page(). + */ +int pvr_mmu_map(struct pvr_mmu_op_context *op_ctx, u64 size, u64 flags, + u64 device_addr) +{ + struct pvr_page_table_ptr ptr_copy; + struct pvr_page_flags_raw flags_raw; + struct scatterlist *sgl; + u64 mapped_size = 0; + unsigned int count; + int err; + + if (!size) + return 0; + + if ((op_ctx->map.sgt_offset | size) & ~PVR_DEVICE_PAGE_MASK) + return -EINVAL; + + err = pvr_mmu_op_context_set_curr_page(op_ctx, device_addr, true); + if (err) + return -EINVAL; + + memcpy(&ptr_copy, &op_ctx->curr_page, sizeof(ptr_copy)); + + flags_raw = pvr_page_flags_raw_create(false, false, + flags & DRM_PVR_BO_BYPASS_DEVICE_CACHE, + flags & DRM_PVR_BO_PM_FW_PROTECT); + + /* Map scatter gather table */ + for_each_sgtable_dma_sg(op_ctx->map.sgt, sgl, count) { + const size_t sgl_len = sg_dma_len(sgl); + u64 sgl_offset, map_sgl_len; + + if (sgl_len <= op_ctx->map.sgt_offset) { + op_ctx->map.sgt_offset -= sgl_len; + continue; + } + + sgl_offset = op_ctx->map.sgt_offset; + map_sgl_len = min_t(u64, sgl_len - sgl_offset, size - mapped_size); + + err = pvr_mmu_map_sgl(op_ctx, sgl, sgl_offset, map_sgl_len, + flags_raw); + if (err) + break; + + /* + * Flag the L0 page table as requiring a flush when the MMU op + * context is destroyed. + */ + pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0); + + op_ctx->map.sgt_offset = 0; + mapped_size += map_sgl_len; + + if (mapped_size >= size) + break; + + err = pvr_mmu_op_context_next_page(op_ctx, true); + if (err) + break; + } + + if (err && mapped_size) { + memcpy(&op_ctx->curr_page, &ptr_copy, sizeof(op_ctx->curr_page)); + pvr_mmu_op_context_unmap_curr_page(op_ctx, + mapped_size >> PVR_DEVICE_PAGE_SHIFT); + } + + return err; +} diff --git a/drivers/gpu/drm/imagination/pvr_mmu.h b/drivers/gpu/drm/imagination/pvr_mmu.h new file mode 100644 index 000000000000..a8ecd460168d --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_mmu.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_MMU_H +#define PVR_MMU_H + +#include <linux/memory.h> +#include <linux/types.h> + +/* Forward declaration from "pvr_device.h" */ +struct pvr_device; + +/* Forward declaration from "pvr_mmu.c" */ +struct pvr_mmu_context; +struct pvr_mmu_op_context; + +/* Forward declaration from "pvr_vm.c" */ +struct pvr_vm_context; + +/* Forward declaration from <linux/scatterlist.h> */ +struct sg_table; + +/** + * DOC: Public API (constants) + * + * .. c:macro:: PVR_DEVICE_PAGE_SIZE + * + * Fixed page size referenced by leaf nodes in the page table tree + * structure. In the current implementation, this value is pegged to the + * CPU page size (%PAGE_SIZE). It is therefore an error to specify a CPU + * page size which is not also a supported device page size. The supported + * device page sizes are: 4KiB, 16KiB, 64KiB, 256KiB, 1MiB and 2MiB. + * + * .. c:macro:: PVR_DEVICE_PAGE_SHIFT + * + * Shift value used to efficiently multiply or divide by + * %PVR_DEVICE_PAGE_SIZE. + * + * This value is derived from %PVR_DEVICE_PAGE_SIZE. + * + * .. c:macro:: PVR_DEVICE_PAGE_MASK + * + * Mask used to round a value down to the nearest multiple of + * %PVR_DEVICE_PAGE_SIZE. When bitwise negated, it will indicate whether a + * value is already a multiple of %PVR_DEVICE_PAGE_SIZE. + * + * This value is derived from %PVR_DEVICE_PAGE_SIZE. + */ + +/* PVR_DEVICE_PAGE_SIZE determines the page size */ +#define PVR_DEVICE_PAGE_SIZE (PAGE_SIZE) +#define PVR_DEVICE_PAGE_SHIFT (PAGE_SHIFT) +#define PVR_DEVICE_PAGE_MASK (PAGE_MASK) + +/** + * DOC: Page table index utilities (constants) + * + * .. c:macro:: PVR_PAGE_TABLE_ADDR_SPACE_SIZE + * + * Size of device-virtual address space which can be represented in the page + * table structure. + * + * This value is checked at runtime against + * &pvr_device_features.virtual_address_space_bits by + * pvr_vm_create_context(), which will return an error if the feature value + * does not match this constant. + * + * .. admonition:: Future work + * + * It should be possible to support other values of + * &pvr_device_features.virtual_address_space_bits, but so far no + * hardware has been created which advertises an unsupported value. + * + * .. c:macro:: PVR_PAGE_TABLE_ADDR_BITS + * + * Number of bits needed to represent any value less than + * %PVR_PAGE_TABLE_ADDR_SPACE_SIZE exactly. + * + * .. c:macro:: PVR_PAGE_TABLE_ADDR_MASK + * + * Bitmask of device-virtual addresses which are valid in the page table + * structure. + * + * This value is derived from %PVR_PAGE_TABLE_ADDR_SPACE_SIZE, so the same + * notes on that constant apply here. + */ +#define PVR_PAGE_TABLE_ADDR_SPACE_SIZE SZ_1T +#define PVR_PAGE_TABLE_ADDR_BITS __ffs(PVR_PAGE_TABLE_ADDR_SPACE_SIZE) +#define PVR_PAGE_TABLE_ADDR_MASK (PVR_PAGE_TABLE_ADDR_SPACE_SIZE - 1) + +void pvr_mmu_flush_request_all(struct pvr_device *pvr_dev); +int pvr_mmu_flush_exec(struct pvr_device *pvr_dev, bool wait); + +struct pvr_mmu_context *pvr_mmu_context_create(struct pvr_device *pvr_dev); +void pvr_mmu_context_destroy(struct pvr_mmu_context *ctx); + +dma_addr_t pvr_mmu_get_root_table_dma_addr(struct pvr_mmu_context *ctx); + +void pvr_mmu_op_context_destroy(struct pvr_mmu_op_context *op_ctx); +struct pvr_mmu_op_context * +pvr_mmu_op_context_create(struct pvr_mmu_context *ctx, + struct sg_table *sgt, u64 sgt_offset, u64 size); + +int pvr_mmu_map(struct pvr_mmu_op_context *op_ctx, u64 size, u64 flags, + u64 device_addr); +int pvr_mmu_unmap(struct pvr_mmu_op_context *op_ctx, u64 device_addr, u64 size); + +#endif /* PVR_MMU_H */ diff --git a/drivers/gpu/drm/imagination/pvr_params.c b/drivers/gpu/drm/imagination/pvr_params.c new file mode 100644 index 000000000000..b91759f362c5 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_params.c @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_params.h" + +#include <linux/cache.h> +#include <linux/moduleparam.h> + +static struct pvr_device_params pvr_device_param_defaults __read_mostly = { +#define X(type_, name_, value_, desc_, ...) .name_ = (value_), + PVR_DEVICE_PARAMS +#undef X +}; + +#define PVR_DEVICE_PARAM_NAMED(name_, type_, desc_) \ + module_param_named(name_, pvr_device_param_defaults.name_, type_, \ + 0400); \ + MODULE_PARM_DESC(name_, desc_); + +/* + * This list of defines must contain every type specified in "pvr_params.h" as + * ``PVR_PARAM_TYPE_*_C``. + */ +#define PVR_PARAM_TYPE_X32_MODPARAM uint + +#define X(type_, name_, value_, desc_, ...) \ + PVR_DEVICE_PARAM_NAMED(name_, PVR_PARAM_TYPE_##type_##_MODPARAM, desc_); +PVR_DEVICE_PARAMS +#undef X + +int +pvr_device_params_init(struct pvr_device_params *params) +{ + /* + * If heap-allocated parameters are added in the future (e.g. + * modparam's charp type), they must be handled specially here (via + * kstrdup() in the case of charp). Since that's not necessary yet, + * a straight copy will do for now. This change will also require a + * pvr_device_params_fini() function to free any heap-allocated copies. + */ + + *params = pvr_device_param_defaults; + + return 0; +} + +#if defined(CONFIG_DEBUG_FS) +#include "pvr_device.h" + +#include <linux/dcache.h> +#include <linux/debugfs.h> +#include <linux/export.h> +#include <linux/fs.h> +#include <linux/stddef.h> + +/* + * This list of defines must contain every type specified in "pvr_params.h" as + * ``PVR_PARAM_TYPE_*_C``. + */ +#define PVR_PARAM_TYPE_X32_FMT "0x%08llx" + +#define X_SET(name_, mode_) X_SET_##mode_(name_) +#define X_SET_DEF(name_, update_, mode_) X_SET_DEF_##mode_(name_, update_) + +#define X_SET_RO(name_) NULL +#define X_SET_RW(name_) __pvr_device_param_##name_##set + +#define X_SET_DEF_RO(name_, update_) +#define X_SET_DEF_RW(name_, update_) \ + static int \ + X_SET_RW(name_)(void *data, u64 val) \ + { \ + struct pvr_device *pvr_dev = data; \ + /* This is not just (update_) to suppress -Waddress. */ \ + if ((void *)(update_) != NULL) \ + (update_)(pvr_dev, pvr_dev->params.name_, val); \ + pvr_dev->params.name_ = val; \ + return 0; \ + } + +#define X(type_, name_, value_, desc_, mode_, update_) \ + static int \ + __pvr_device_param_##name_##_get(void *data, u64 *val) \ + { \ + struct pvr_device *pvr_dev = data; \ + *val = pvr_dev->params.name_; \ + return 0; \ + } \ + X_SET_DEF(name_, update_, mode_) \ + static int \ + __pvr_device_param_##name_##_open(struct inode *inode, \ + struct file *file) \ + { \ + __simple_attr_check_format(PVR_PARAM_TYPE_##type_##_FMT, \ + 0ull); \ + return simple_attr_open(inode, file, \ + __pvr_device_param_##name_##_get, \ + X_SET(name_, mode_), \ + PVR_PARAM_TYPE_##type_##_FMT); \ + } +PVR_DEVICE_PARAMS +#undef X + +#undef X_SET +#undef X_SET_RO +#undef X_SET_RW +#undef X_SET_DEF +#undef X_SET_DEF_RO +#undef X_SET_DEF_RW + +static struct { +#define X(type_, name_, value_, desc_, mode_, update_) \ + const struct file_operations name_; + PVR_DEVICE_PARAMS +#undef X +} pvr_device_param_debugfs_fops = { +#define X(type_, name_, value_, desc_, mode_, update_) \ + .name_ = { \ + .owner = THIS_MODULE, \ + .open = __pvr_device_param_##name_##_open, \ + .release = simple_attr_release, \ + .read = simple_attr_read, \ + .write = simple_attr_write, \ + .llseek = generic_file_llseek, \ + }, + PVR_DEVICE_PARAMS +#undef X +}; + +void +pvr_params_debugfs_init(struct pvr_device *pvr_dev, struct dentry *dir) +{ +#define X_MODE(mode_) X_MODE_##mode_ +#define X_MODE_RO 0400 +#define X_MODE_RW 0600 + +#define X(type_, name_, value_, desc_, mode_, update_) \ + debugfs_create_file(#name_, X_MODE(mode_), dir, pvr_dev, \ + &pvr_device_param_debugfs_fops.name_); + PVR_DEVICE_PARAMS +#undef X + +#undef X_MODE +#undef X_MODE_RO +#undef X_MODE_RW +} +#endif diff --git a/drivers/gpu/drm/imagination/pvr_params.h b/drivers/gpu/drm/imagination/pvr_params.h new file mode 100644 index 000000000000..5807915b456b --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_params.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_PARAMS_H +#define PVR_PARAMS_H + +#include "pvr_rogue_fwif.h" + +#include <linux/cache.h> +#include <linux/compiler_attributes.h> + +/* + * This is the definitive list of types allowed in the definition of + * %PVR_DEVICE_PARAMS. + */ +#define PVR_PARAM_TYPE_X32_C u32 + +/* + * This macro defines all device-specific parameters; that is parameters which + * are set independently per device. + * + * The X-macro accepts the following arguments. Arguments marked with [debugfs] + * are ignored when debugfs is disabled; values used for these arguments may + * safely be gated behind CONFIG_DEBUG_FS. + * + * @type_: The definitive list of allowed values is PVR_PARAM_TYPE_*_C. + * @name_: Name of the parameter. This is used both as the field name in C and + * stringified as the parameter name. + * @value_: Initial/default value. + * @desc_: String literal used as help text to describe the usage of this + * parameter. + * @mode_: [debugfs] One of {RO,RW}. The access mode of the debugfs entry for + * this parameter. + * @update_: [debugfs] When debugfs support is enabled, parameters may be + * updated at runtime. When this happens, this function will be + * called to allow changes to propagate. The signature of this + * function is: + * + * void (*)(struct pvr_device *pvr_dev, T old_val, T new_val) + * + * Where T is the C type associated with @type_. + * + * If @mode_ does not allow write access, this function will never be + * called. In this case, or if no update callback is required, you + * should specify NULL for this argument. + */ +#define PVR_DEVICE_PARAMS \ + X(X32, fw_trace_mask, ROGUE_FWIF_LOG_TYPE_NONE, \ + "Enable FW trace for the specified groups. Specifying 0 disables " \ + "all FW tracing.", \ + RW, pvr_fw_trace_mask_update) + +struct pvr_device_params { +#define X(type_, name_, value_, desc_, ...) \ + PVR_PARAM_TYPE_##type_##_C name_; + PVR_DEVICE_PARAMS +#undef X +}; + +int pvr_device_params_init(struct pvr_device_params *params); + +#if defined(CONFIG_DEBUG_FS) +/* Forward declaration from "pvr_device.h". */ +struct pvr_device; + +/* Forward declaration from <linux/dcache.h>. */ +struct dentry; + +void pvr_params_debugfs_init(struct pvr_device *pvr_dev, struct dentry *dir); +#endif /* defined(CONFIG_DEBUG_FS) */ + +#endif /* PVR_PARAMS_H */ diff --git a/drivers/gpu/drm/imagination/pvr_power.c b/drivers/gpu/drm/imagination/pvr_power.c new file mode 100644 index 000000000000..ba7816fd28ec --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_power.c @@ -0,0 +1,433 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_device.h" +#include "pvr_fw.h" +#include "pvr_fw_startstop.h" +#include "pvr_power.h" +#include "pvr_queue.h" +#include "pvr_rogue_fwif.h" + +#include <drm/drm_drv.h> +#include <drm/drm_managed.h> +#include <linux/clk.h> +#include <linux/interrupt.h> +#include <linux/mutex.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/timer.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +#define POWER_SYNC_TIMEOUT_US (1000000) /* 1s */ + +#define WATCHDOG_TIME_MS (500) + +/** + * pvr_device_lost() - Mark GPU device as lost + * @pvr_dev: Target PowerVR device. + * + * This will cause the DRM device to be unplugged. + */ +void +pvr_device_lost(struct pvr_device *pvr_dev) +{ + if (!pvr_dev->lost) { + pvr_dev->lost = true; + drm_dev_unplug(from_pvr_device(pvr_dev)); + } +} + +static int +pvr_power_send_command(struct pvr_device *pvr_dev, struct rogue_fwif_kccb_cmd *pow_cmd) +{ + struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev; + u32 slot_nr; + u32 value; + int err; + + WRITE_ONCE(*fw_dev->power_sync, 0); + + err = pvr_kccb_send_cmd_powered(pvr_dev, pow_cmd, &slot_nr); + if (err) + return err; + + /* Wait for FW to acknowledge. */ + return readl_poll_timeout(pvr_dev->fw_dev.power_sync, value, value != 0, 100, + POWER_SYNC_TIMEOUT_US); +} + +static int +pvr_power_request_idle(struct pvr_device *pvr_dev) +{ + struct rogue_fwif_kccb_cmd pow_cmd; + + /* Send FORCED_IDLE request to FW. */ + pow_cmd.cmd_type = ROGUE_FWIF_KCCB_CMD_POW; + pow_cmd.cmd_data.pow_data.pow_type = ROGUE_FWIF_POW_FORCED_IDLE_REQ; + pow_cmd.cmd_data.pow_data.power_req_data.pow_request_type = ROGUE_FWIF_POWER_FORCE_IDLE; + + return pvr_power_send_command(pvr_dev, &pow_cmd); +} + +static int +pvr_power_request_pwr_off(struct pvr_device *pvr_dev) +{ + struct rogue_fwif_kccb_cmd pow_cmd; + + /* Send POW_OFF request to firmware. */ + pow_cmd.cmd_type = ROGUE_FWIF_KCCB_CMD_POW; + pow_cmd.cmd_data.pow_data.pow_type = ROGUE_FWIF_POW_OFF_REQ; + pow_cmd.cmd_data.pow_data.power_req_data.forced = true; + + return pvr_power_send_command(pvr_dev, &pow_cmd); +} + +static int +pvr_power_fw_disable(struct pvr_device *pvr_dev, bool hard_reset) +{ + if (!hard_reset) { + int err; + + cancel_delayed_work_sync(&pvr_dev->watchdog.work); + + err = pvr_power_request_idle(pvr_dev); + if (err) + return err; + + err = pvr_power_request_pwr_off(pvr_dev); + if (err) + return err; + } + + return pvr_fw_stop(pvr_dev); +} + +static int +pvr_power_fw_enable(struct pvr_device *pvr_dev) +{ + int err; + + err = pvr_fw_start(pvr_dev); + if (err) + return err; + + err = pvr_wait_for_fw_boot(pvr_dev); + if (err) { + drm_err(from_pvr_device(pvr_dev), "Firmware failed to boot\n"); + pvr_fw_stop(pvr_dev); + return err; + } + + queue_delayed_work(pvr_dev->sched_wq, &pvr_dev->watchdog.work, + msecs_to_jiffies(WATCHDOG_TIME_MS)); + + return 0; +} + +bool +pvr_power_is_idle(struct pvr_device *pvr_dev) +{ + /* + * FW power state can be out of date if a KCCB command has been submitted but the FW hasn't + * started processing it yet. So also check the KCCB status. + */ + enum rogue_fwif_pow_state pow_state = READ_ONCE(pvr_dev->fw_dev.fwif_sysdata->pow_state); + bool kccb_idle = pvr_kccb_is_idle(pvr_dev); + + return (pow_state == ROGUE_FWIF_POW_IDLE) && kccb_idle; +} + +static bool +pvr_watchdog_kccb_stalled(struct pvr_device *pvr_dev) +{ + /* Check KCCB commands are progressing. */ + u32 kccb_cmds_executed = pvr_dev->fw_dev.fwif_osdata->kccb_cmds_executed; + bool kccb_is_idle = pvr_kccb_is_idle(pvr_dev); + + if (pvr_dev->watchdog.old_kccb_cmds_executed == kccb_cmds_executed && !kccb_is_idle) { + pvr_dev->watchdog.kccb_stall_count++; + + /* + * If we have commands pending with no progress for 2 consecutive polls then + * consider KCCB command processing stalled. + */ + if (pvr_dev->watchdog.kccb_stall_count == 2) { + pvr_dev->watchdog.kccb_stall_count = 0; + return true; + } + } else if (pvr_dev->watchdog.old_kccb_cmds_executed == kccb_cmds_executed) { + bool has_active_contexts; + + mutex_lock(&pvr_dev->queues.lock); + has_active_contexts = list_empty(&pvr_dev->queues.active); + mutex_unlock(&pvr_dev->queues.lock); + + if (has_active_contexts) { + /* Send a HEALTH_CHECK command so we can verify FW is still alive. */ + struct rogue_fwif_kccb_cmd health_check_cmd; + + health_check_cmd.cmd_type = ROGUE_FWIF_KCCB_CMD_HEALTH_CHECK; + + pvr_kccb_send_cmd_powered(pvr_dev, &health_check_cmd, NULL); + } + } else { + pvr_dev->watchdog.old_kccb_cmds_executed = kccb_cmds_executed; + pvr_dev->watchdog.kccb_stall_count = 0; + } + + return false; +} + +static void +pvr_watchdog_worker(struct work_struct *work) +{ + struct pvr_device *pvr_dev = container_of(work, struct pvr_device, + watchdog.work.work); + bool stalled; + + if (pvr_dev->lost) + return; + + if (pm_runtime_get_if_in_use(from_pvr_device(pvr_dev)->dev) <= 0) + goto out_requeue; + + if (!pvr_dev->fw_dev.booted) + goto out_pm_runtime_put; + + stalled = pvr_watchdog_kccb_stalled(pvr_dev); + + if (stalled) { + drm_err(from_pvr_device(pvr_dev), "FW stalled, trying hard reset"); + + pvr_power_reset(pvr_dev, true); + /* Device may be lost at this point. */ + } + +out_pm_runtime_put: + pm_runtime_put(from_pvr_device(pvr_dev)->dev); + +out_requeue: + if (!pvr_dev->lost) { + queue_delayed_work(pvr_dev->sched_wq, &pvr_dev->watchdog.work, + msecs_to_jiffies(WATCHDOG_TIME_MS)); + } +} + +/** + * pvr_watchdog_init() - Initialise watchdog for device + * @pvr_dev: Target PowerVR device. + * + * Returns: + * * 0 on success, or + * * -%ENOMEM on out of memory. + */ +int +pvr_watchdog_init(struct pvr_device *pvr_dev) +{ + INIT_DELAYED_WORK(&pvr_dev->watchdog.work, pvr_watchdog_worker); + + return 0; +} + +int +pvr_power_device_suspend(struct device *dev) +{ + struct platform_device *plat_dev = to_platform_device(dev); + struct drm_device *drm_dev = platform_get_drvdata(plat_dev); + struct pvr_device *pvr_dev = to_pvr_device(drm_dev); + int err = 0; + int idx; + + if (!drm_dev_enter(drm_dev, &idx)) + return -EIO; + + if (pvr_dev->fw_dev.booted) { + err = pvr_power_fw_disable(pvr_dev, false); + if (err) + goto err_drm_dev_exit; + } + + clk_disable_unprepare(pvr_dev->mem_clk); + clk_disable_unprepare(pvr_dev->sys_clk); + clk_disable_unprepare(pvr_dev->core_clk); + +err_drm_dev_exit: + drm_dev_exit(idx); + + return err; +} + +int +pvr_power_device_resume(struct device *dev) +{ + struct platform_device *plat_dev = to_platform_device(dev); + struct drm_device *drm_dev = platform_get_drvdata(plat_dev); + struct pvr_device *pvr_dev = to_pvr_device(drm_dev); + int idx; + int err; + + if (!drm_dev_enter(drm_dev, &idx)) + return -EIO; + + err = clk_prepare_enable(pvr_dev->core_clk); + if (err) + goto err_drm_dev_exit; + + err = clk_prepare_enable(pvr_dev->sys_clk); + if (err) + goto err_core_clk_disable; + + err = clk_prepare_enable(pvr_dev->mem_clk); + if (err) + goto err_sys_clk_disable; + + if (pvr_dev->fw_dev.booted) { + err = pvr_power_fw_enable(pvr_dev); + if (err) + goto err_mem_clk_disable; + } + + drm_dev_exit(idx); + + return 0; + +err_mem_clk_disable: + clk_disable_unprepare(pvr_dev->mem_clk); + +err_sys_clk_disable: + clk_disable_unprepare(pvr_dev->sys_clk); + +err_core_clk_disable: + clk_disable_unprepare(pvr_dev->core_clk); + +err_drm_dev_exit: + drm_dev_exit(idx); + + return err; +} + +int +pvr_power_device_idle(struct device *dev) +{ + struct platform_device *plat_dev = to_platform_device(dev); + struct drm_device *drm_dev = platform_get_drvdata(plat_dev); + struct pvr_device *pvr_dev = to_pvr_device(drm_dev); + + return pvr_power_is_idle(pvr_dev) ? 0 : -EBUSY; +} + +/** + * pvr_power_reset() - Reset the GPU + * @pvr_dev: Device pointer + * @hard_reset: %true for hard reset, %false for soft reset + * + * If @hard_reset is %false and the FW processor fails to respond during the reset process, this + * function will attempt a hard reset. + * + * If a hard reset fails then the GPU device is reported as lost. + * + * Returns: + * * 0 on success, or + * * Any error code returned by pvr_power_get, pvr_power_fw_disable or pvr_power_fw_enable(). + */ +int +pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset) +{ + bool queues_disabled = false; + int err; + + /* + * Take a power reference during the reset. This should prevent any interference with the + * power state during reset. + */ + WARN_ON(pvr_power_get(pvr_dev)); + + down_write(&pvr_dev->reset_sem); + + if (pvr_dev->lost) { + err = -EIO; + goto err_up_write; + } + + /* Disable IRQs for the duration of the reset. */ + disable_irq(pvr_dev->irq); + + do { + if (hard_reset) { + pvr_queue_device_pre_reset(pvr_dev); + queues_disabled = true; + } + + err = pvr_power_fw_disable(pvr_dev, hard_reset); + if (!err) { + if (hard_reset) { + pvr_dev->fw_dev.booted = false; + WARN_ON(pm_runtime_force_suspend(from_pvr_device(pvr_dev)->dev)); + + err = pvr_fw_hard_reset(pvr_dev); + if (err) + goto err_device_lost; + + err = pm_runtime_force_resume(from_pvr_device(pvr_dev)->dev); + pvr_dev->fw_dev.booted = true; + if (err) + goto err_device_lost; + } else { + /* Clear the FW faulted flags. */ + pvr_dev->fw_dev.fwif_sysdata->hwr_state_flags &= + ~(ROGUE_FWIF_HWR_FW_FAULT | + ROGUE_FWIF_HWR_RESTART_REQUESTED); + } + + pvr_fw_irq_clear(pvr_dev); + + err = pvr_power_fw_enable(pvr_dev); + } + + if (err && hard_reset) + goto err_device_lost; + + if (err && !hard_reset) { + drm_err(from_pvr_device(pvr_dev), "FW stalled, trying hard reset"); + hard_reset = true; + } + } while (err); + + if (queues_disabled) + pvr_queue_device_post_reset(pvr_dev); + + enable_irq(pvr_dev->irq); + + up_write(&pvr_dev->reset_sem); + + pvr_power_put(pvr_dev); + + return 0; + +err_device_lost: + drm_err(from_pvr_device(pvr_dev), "GPU device lost"); + pvr_device_lost(pvr_dev); + + /* Leave IRQs disabled if the device is lost. */ + + if (queues_disabled) + pvr_queue_device_post_reset(pvr_dev); + +err_up_write: + up_write(&pvr_dev->reset_sem); + + pvr_power_put(pvr_dev); + + return err; +} + +/** + * pvr_watchdog_fini() - Shutdown watchdog for device + * @pvr_dev: Target PowerVR device. + */ +void +pvr_watchdog_fini(struct pvr_device *pvr_dev) +{ + cancel_delayed_work_sync(&pvr_dev->watchdog.work); +} diff --git a/drivers/gpu/drm/imagination/pvr_power.h b/drivers/gpu/drm/imagination/pvr_power.h new file mode 100644 index 000000000000..9a9312dcb2da --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_power.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_POWER_H +#define PVR_POWER_H + +#include "pvr_device.h" + +#include <linux/mutex.h> +#include <linux/pm_runtime.h> + +int pvr_watchdog_init(struct pvr_device *pvr_dev); +void pvr_watchdog_fini(struct pvr_device *pvr_dev); + +void pvr_device_lost(struct pvr_device *pvr_dev); + +bool pvr_power_is_idle(struct pvr_device *pvr_dev); + +int pvr_power_device_suspend(struct device *dev); +int pvr_power_device_resume(struct device *dev); +int pvr_power_device_idle(struct device *dev); + +int pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset); + +static __always_inline int +pvr_power_get(struct pvr_device *pvr_dev) +{ + struct drm_device *drm_dev = from_pvr_device(pvr_dev); + + return pm_runtime_resume_and_get(drm_dev->dev); +} + +static __always_inline int +pvr_power_put(struct pvr_device *pvr_dev) +{ + struct drm_device *drm_dev = from_pvr_device(pvr_dev); + + return pm_runtime_put(drm_dev->dev); +} + +#endif /* PVR_POWER_H */ diff --git a/drivers/gpu/drm/imagination/pvr_queue.c b/drivers/gpu/drm/imagination/pvr_queue.c new file mode 100644 index 000000000000..5ed9c98fb599 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_queue.c @@ -0,0 +1,1432 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include <drm/drm_managed.h> +#include <drm/gpu_scheduler.h> + +#include "pvr_cccb.h" +#include "pvr_context.h" +#include "pvr_device.h" +#include "pvr_drv.h" +#include "pvr_job.h" +#include "pvr_queue.h" +#include "pvr_vm.h" + +#include "pvr_rogue_fwif_client.h" + +#define MAX_DEADLINE_MS 30000 + +#define CTX_COMPUTE_CCCB_SIZE_LOG2 15 +#define CTX_FRAG_CCCB_SIZE_LOG2 15 +#define CTX_GEOM_CCCB_SIZE_LOG2 15 +#define CTX_TRANSFER_CCCB_SIZE_LOG2 15 + +static int get_xfer_ctx_state_size(struct pvr_device *pvr_dev) +{ + u32 num_isp_store_registers; + + if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) { + num_isp_store_registers = 1; + } else { + int err; + + err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers); + if (WARN_ON(err)) + return err; + } + + return sizeof(struct rogue_fwif_frag_ctx_state) + + (num_isp_store_registers * + sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0])); +} + +static int get_frag_ctx_state_size(struct pvr_device *pvr_dev) +{ + u32 num_isp_store_registers; + int err; + + if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) { + err = PVR_FEATURE_VALUE(pvr_dev, num_raster_pipes, &num_isp_store_registers); + if (WARN_ON(err)) + return err; + + if (PVR_HAS_FEATURE(pvr_dev, gpu_multicore_support)) { + u32 xpu_max_slaves; + + err = PVR_FEATURE_VALUE(pvr_dev, xpu_max_slaves, &xpu_max_slaves); + if (WARN_ON(err)) + return err; + + num_isp_store_registers *= (1 + xpu_max_slaves); + } + } else { + err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers); + if (WARN_ON(err)) + return err; + } + + return sizeof(struct rogue_fwif_frag_ctx_state) + + (num_isp_store_registers * + sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0])); +} + +static int get_ctx_state_size(struct pvr_device *pvr_dev, enum drm_pvr_job_type type) +{ + switch (type) { + case DRM_PVR_JOB_TYPE_GEOMETRY: + return sizeof(struct rogue_fwif_geom_ctx_state); + case DRM_PVR_JOB_TYPE_FRAGMENT: + return get_frag_ctx_state_size(pvr_dev); + case DRM_PVR_JOB_TYPE_COMPUTE: + return sizeof(struct rogue_fwif_compute_ctx_state); + case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: + return get_xfer_ctx_state_size(pvr_dev); + } + + WARN(1, "Invalid queue type"); + return -EINVAL; +} + +static u32 get_ctx_offset(enum drm_pvr_job_type type) +{ + switch (type) { + case DRM_PVR_JOB_TYPE_GEOMETRY: + return offsetof(struct rogue_fwif_fwrendercontext, geom_context); + case DRM_PVR_JOB_TYPE_FRAGMENT: + return offsetof(struct rogue_fwif_fwrendercontext, frag_context); + case DRM_PVR_JOB_TYPE_COMPUTE: + return offsetof(struct rogue_fwif_fwcomputecontext, cdm_context); + case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: + return offsetof(struct rogue_fwif_fwtransfercontext, tq_context); + } + + return 0; +} + +static const char * +pvr_queue_fence_get_driver_name(struct dma_fence *f) +{ + return PVR_DRIVER_NAME; +} + +static void pvr_queue_fence_release(struct dma_fence *f) +{ + struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); + + pvr_context_put(fence->queue->ctx); + dma_fence_free(f); +} + +static const char * +pvr_queue_job_fence_get_timeline_name(struct dma_fence *f) +{ + struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); + + switch (fence->queue->type) { + case DRM_PVR_JOB_TYPE_GEOMETRY: + return "geometry"; + + case DRM_PVR_JOB_TYPE_FRAGMENT: + return "fragment"; + + case DRM_PVR_JOB_TYPE_COMPUTE: + return "compute"; + + case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: + return "transfer"; + } + + WARN(1, "Invalid queue type"); + return "invalid"; +} + +static const char * +pvr_queue_cccb_fence_get_timeline_name(struct dma_fence *f) +{ + struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); + + switch (fence->queue->type) { + case DRM_PVR_JOB_TYPE_GEOMETRY: + return "geometry-cccb"; + + case DRM_PVR_JOB_TYPE_FRAGMENT: + return "fragment-cccb"; + + case DRM_PVR_JOB_TYPE_COMPUTE: + return "compute-cccb"; + + case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: + return "transfer-cccb"; + } + + WARN(1, "Invalid queue type"); + return "invalid"; +} + +static const struct dma_fence_ops pvr_queue_job_fence_ops = { + .get_driver_name = pvr_queue_fence_get_driver_name, + .get_timeline_name = pvr_queue_job_fence_get_timeline_name, + .release = pvr_queue_fence_release, +}; + +/** + * to_pvr_queue_job_fence() - Return a pvr_queue_fence object if the fence is + * backed by a UFO. + * @f: The dma_fence to turn into a pvr_queue_fence. + * + * Return: + * * A non-NULL pvr_queue_fence object if the dma_fence is backed by a UFO, or + * * NULL otherwise. + */ +static struct pvr_queue_fence * +to_pvr_queue_job_fence(struct dma_fence *f) +{ + struct drm_sched_fence *sched_fence = to_drm_sched_fence(f); + + if (sched_fence) + f = sched_fence->parent; + + if (f && f->ops == &pvr_queue_job_fence_ops) + return container_of(f, struct pvr_queue_fence, base); + + return NULL; +} + +static const struct dma_fence_ops pvr_queue_cccb_fence_ops = { + .get_driver_name = pvr_queue_fence_get_driver_name, + .get_timeline_name = pvr_queue_cccb_fence_get_timeline_name, + .release = pvr_queue_fence_release, +}; + +/** + * pvr_queue_fence_put() - Put wrapper for pvr_queue_fence objects. + * @f: The dma_fence object to put. + * + * If the pvr_queue_fence has been initialized, we call dma_fence_put(), + * otherwise we free the object with dma_fence_free(). This allows us + * to do the right thing before and after pvr_queue_fence_init() had been + * called. + */ +static void pvr_queue_fence_put(struct dma_fence *f) +{ + if (!f) + return; + + if (WARN_ON(f->ops && + f->ops != &pvr_queue_cccb_fence_ops && + f->ops != &pvr_queue_job_fence_ops)) + return; + + /* If the fence hasn't been initialized yet, free the object directly. */ + if (f->ops) + dma_fence_put(f); + else + dma_fence_free(f); +} + +/** + * pvr_queue_fence_alloc() - Allocate a pvr_queue_fence fence object + * + * Call this function to allocate job CCCB and done fences. This only + * allocates the objects. Initialization happens when the underlying + * dma_fence object is to be returned to drm_sched (in prepare_job() or + * run_job()). + * + * Return: + * * A valid pointer if the allocation succeeds, or + * * NULL if the allocation fails. + */ +static struct dma_fence * +pvr_queue_fence_alloc(void) +{ + struct pvr_queue_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return NULL; + + return &fence->base; +} + +/** + * pvr_queue_fence_init() - Initializes a pvr_queue_fence object. + * @f: The fence to initialize + * @queue: The queue this fence belongs to. + * @fence_ops: The fence operations. + * @fence_ctx: The fence context. + * + * Wrapper around dma_fence_init() that takes care of initializing the + * pvr_queue_fence::queue field too. + */ +static void +pvr_queue_fence_init(struct dma_fence *f, + struct pvr_queue *queue, + const struct dma_fence_ops *fence_ops, + struct pvr_queue_fence_ctx *fence_ctx) +{ + struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); + + pvr_context_get(queue->ctx); + fence->queue = queue; + dma_fence_init(&fence->base, fence_ops, + &fence_ctx->lock, fence_ctx->id, + atomic_inc_return(&fence_ctx->seqno)); +} + +/** + * pvr_queue_cccb_fence_init() - Initializes a CCCB fence object. + * @fence: The fence to initialize. + * @queue: The queue this fence belongs to. + * + * Initializes a fence that can be used to wait for CCCB space. + * + * Should be called in the ::prepare_job() path, so the fence returned to + * drm_sched is valid. + */ +static void +pvr_queue_cccb_fence_init(struct dma_fence *fence, struct pvr_queue *queue) +{ + pvr_queue_fence_init(fence, queue, &pvr_queue_cccb_fence_ops, + &queue->cccb_fence_ctx.base); +} + +/** + * pvr_queue_job_fence_init() - Initializes a job done fence object. + * @fence: The fence to initialize. + * @queue: The queue this fence belongs to. + * + * Initializes a fence that will be signaled when the GPU is done executing + * a job. + * + * Should be called *before* the ::run_job() path, so the fence is initialised + * before being placed in the pending_list. + */ +static void +pvr_queue_job_fence_init(struct dma_fence *fence, struct pvr_queue *queue) +{ + pvr_queue_fence_init(fence, queue, &pvr_queue_job_fence_ops, + &queue->job_fence_ctx); +} + +/** + * pvr_queue_fence_ctx_init() - Queue fence context initialization. + * @fence_ctx: The context to initialize + */ +static void +pvr_queue_fence_ctx_init(struct pvr_queue_fence_ctx *fence_ctx) +{ + spin_lock_init(&fence_ctx->lock); + fence_ctx->id = dma_fence_context_alloc(1); + atomic_set(&fence_ctx->seqno, 0); +} + +static u32 ufo_cmds_size(u32 elem_count) +{ + /* We can pass at most ROGUE_FWIF_CCB_CMD_MAX_UFOS per UFO-related command. */ + u32 full_cmd_count = elem_count / ROGUE_FWIF_CCB_CMD_MAX_UFOS; + u32 remaining_elems = elem_count % ROGUE_FWIF_CCB_CMD_MAX_UFOS; + u32 size = full_cmd_count * + pvr_cccb_get_size_of_cmd_with_hdr(ROGUE_FWIF_CCB_CMD_MAX_UFOS * + sizeof(struct rogue_fwif_ufo)); + + if (remaining_elems) { + size += pvr_cccb_get_size_of_cmd_with_hdr(remaining_elems * + sizeof(struct rogue_fwif_ufo)); + } + + return size; +} + +static u32 job_cmds_size(struct pvr_job *job, u32 ufo_wait_count) +{ + /* One UFO cmd for the fence signaling, one UFO cmd per native fence native, + * and a command for the job itself. + */ + return ufo_cmds_size(1) + ufo_cmds_size(ufo_wait_count) + + pvr_cccb_get_size_of_cmd_with_hdr(job->cmd_len); +} + +/** + * job_count_remaining_native_deps() - Count the number of non-signaled native dependencies. + * @job: Job to operate on. + * + * Returns: Number of non-signaled native deps remaining. + */ +static unsigned long job_count_remaining_native_deps(struct pvr_job *job) +{ + unsigned long remaining_count = 0; + struct dma_fence *fence = NULL; + unsigned long index; + + xa_for_each(&job->base.dependencies, index, fence) { + struct pvr_queue_fence *jfence; + + jfence = to_pvr_queue_job_fence(fence); + if (!jfence) + continue; + + if (!dma_fence_is_signaled(&jfence->base)) + remaining_count++; + } + + return remaining_count; +} + +/** + * pvr_queue_get_job_cccb_fence() - Get the CCCB fence attached to a job. + * @queue: The queue this job will be submitted to. + * @job: The job to get the CCCB fence on. + * + * The CCCB fence is a synchronization primitive allowing us to delay job + * submission until there's enough space in the CCCB to submit the job. + * + * Return: + * * NULL if there's enough space in the CCCB to submit this job, or + * * A valid dma_fence object otherwise. + */ +static struct dma_fence * +pvr_queue_get_job_cccb_fence(struct pvr_queue *queue, struct pvr_job *job) +{ + struct pvr_queue_fence *cccb_fence; + unsigned int native_deps_remaining; + + /* If the fence is NULL, that means we already checked that we had + * enough space in the cccb for our job. + */ + if (!job->cccb_fence) + return NULL; + + mutex_lock(&queue->cccb_fence_ctx.job_lock); + + /* Count remaining native dependencies and check if the job fits in the CCCB. */ + native_deps_remaining = job_count_remaining_native_deps(job); + if (pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) { + pvr_queue_fence_put(job->cccb_fence); + job->cccb_fence = NULL; + goto out_unlock; + } + + /* There should be no job attached to the CCCB fence context: + * drm_sched_entity guarantees that jobs are submitted one at a time. + */ + if (WARN_ON(queue->cccb_fence_ctx.job)) + pvr_job_put(queue->cccb_fence_ctx.job); + + queue->cccb_fence_ctx.job = pvr_job_get(job); + + /* Initialize the fence before returning it. */ + cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base); + if (!WARN_ON(cccb_fence->queue)) + pvr_queue_cccb_fence_init(job->cccb_fence, queue); + +out_unlock: + mutex_unlock(&queue->cccb_fence_ctx.job_lock); + + return dma_fence_get(job->cccb_fence); +} + +/** + * pvr_queue_get_job_kccb_fence() - Get the KCCB fence attached to a job. + * @queue: The queue this job will be submitted to. + * @job: The job to get the KCCB fence on. + * + * The KCCB fence is a synchronization primitive allowing us to delay job + * submission until there's enough space in the KCCB to submit the job. + * + * Return: + * * NULL if there's enough space in the KCCB to submit this job, or + * * A valid dma_fence object otherwise. + */ +static struct dma_fence * +pvr_queue_get_job_kccb_fence(struct pvr_queue *queue, struct pvr_job *job) +{ + struct pvr_device *pvr_dev = queue->ctx->pvr_dev; + struct dma_fence *kccb_fence = NULL; + + /* If the fence is NULL, that means we already checked that we had + * enough space in the KCCB for our job. + */ + if (!job->kccb_fence) + return NULL; + + if (!WARN_ON(job->kccb_fence->ops)) { + kccb_fence = pvr_kccb_reserve_slot(pvr_dev, job->kccb_fence); + job->kccb_fence = NULL; + } + + return kccb_fence; +} + +static struct dma_fence * +pvr_queue_get_paired_frag_job_dep(struct pvr_queue *queue, struct pvr_job *job) +{ + struct pvr_job *frag_job = job->type == DRM_PVR_JOB_TYPE_GEOMETRY ? + job->paired_job : NULL; + struct dma_fence *f; + unsigned long index; + + if (!frag_job) + return NULL; + + xa_for_each(&frag_job->base.dependencies, index, f) { + /* Skip already signaled fences. */ + if (dma_fence_is_signaled(f)) + continue; + + /* Skip our own fence. */ + if (f == &job->base.s_fence->scheduled) + continue; + + return dma_fence_get(f); + } + + return frag_job->base.sched->ops->prepare_job(&frag_job->base, &queue->entity); +} + +/** + * pvr_queue_prepare_job() - Return the next internal dependencies expressed as a dma_fence. + * @sched_job: The job to query the next internal dependency on + * @s_entity: The entity this job is queue on. + * + * After iterating over drm_sched_job::dependencies, drm_sched let the driver return + * its own internal dependencies. We use this function to return our internal dependencies. + */ +static struct dma_fence * +pvr_queue_prepare_job(struct drm_sched_job *sched_job, + struct drm_sched_entity *s_entity) +{ + struct pvr_job *job = container_of(sched_job, struct pvr_job, base); + struct pvr_queue *queue = container_of(s_entity, struct pvr_queue, entity); + struct dma_fence *internal_dep = NULL; + + /* + * Initialize the done_fence, so we can signal it. This must be done + * here because otherwise by the time of run_job() the job will end up + * in the pending list without a valid fence. + */ + if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) { + /* + * This will be called on a paired fragment job after being + * submitted to firmware. We can tell if this is the case and + * bail early from whether run_job() has been called on the + * geometry job, which would issue a pm ref. + */ + if (job->paired_job->has_pm_ref) + return NULL; + + /* + * In this case we need to use the job's own ctx to initialise + * the done_fence. The other steps are done in the ctx of the + * paired geometry job. + */ + pvr_queue_job_fence_init(job->done_fence, + job->ctx->queues.fragment); + } else { + pvr_queue_job_fence_init(job->done_fence, queue); + } + + /* CCCB fence is used to make sure we have enough space in the CCCB to + * submit our commands. + */ + internal_dep = pvr_queue_get_job_cccb_fence(queue, job); + + /* KCCB fence is used to make sure we have a KCCB slot to queue our + * CMD_KICK. + */ + if (!internal_dep) + internal_dep = pvr_queue_get_job_kccb_fence(queue, job); + + /* Any extra internal dependency should be added here, using the following + * pattern: + * + * if (!internal_dep) + * internal_dep = pvr_queue_get_job_xxxx_fence(queue, job); + */ + + /* The paired job fence should come last, when everything else is ready. */ + if (!internal_dep) + internal_dep = pvr_queue_get_paired_frag_job_dep(queue, job); + + return internal_dep; +} + +/** + * pvr_queue_update_active_state_locked() - Update the queue active state. + * @queue: Queue to update the state on. + * + * Locked version of pvr_queue_update_active_state(). Must be called with + * pvr_device::queue::lock held. + */ +static void pvr_queue_update_active_state_locked(struct pvr_queue *queue) +{ + struct pvr_device *pvr_dev = queue->ctx->pvr_dev; + + lockdep_assert_held(&pvr_dev->queues.lock); + + /* The queue is temporary out of any list when it's being reset, + * we don't want a call to pvr_queue_update_active_state_locked() + * to re-insert it behind our back. + */ + if (list_empty(&queue->node)) + return; + + if (!atomic_read(&queue->in_flight_job_count)) + list_move_tail(&queue->node, &pvr_dev->queues.idle); + else + list_move_tail(&queue->node, &pvr_dev->queues.active); +} + +/** + * pvr_queue_update_active_state() - Update the queue active state. + * @queue: Queue to update the state on. + * + * Active state is based on the in_flight_job_count value. + * + * Updating the active state implies moving the queue in or out of the + * active queue list, which also defines whether the queue is checked + * or not when a FW event is received. + * + * This function should be called any time a job is submitted or it done + * fence is signaled. + */ +static void pvr_queue_update_active_state(struct pvr_queue *queue) +{ + struct pvr_device *pvr_dev = queue->ctx->pvr_dev; + + mutex_lock(&pvr_dev->queues.lock); + pvr_queue_update_active_state_locked(queue); + mutex_unlock(&pvr_dev->queues.lock); +} + +static void pvr_queue_submit_job_to_cccb(struct pvr_job *job) +{ + struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler); + struct rogue_fwif_ufo ufos[ROGUE_FWIF_CCB_CMD_MAX_UFOS]; + struct pvr_cccb *cccb = &queue->cccb; + struct pvr_queue_fence *jfence; + struct dma_fence *fence; + unsigned long index; + u32 ufo_count = 0; + + /* We need to add the queue to the active list before updating the CCCB, + * otherwise we might miss the FW event informing us that something + * happened on this queue. + */ + atomic_inc(&queue->in_flight_job_count); + pvr_queue_update_active_state(queue); + + xa_for_each(&job->base.dependencies, index, fence) { + jfence = to_pvr_queue_job_fence(fence); + if (!jfence) + continue; + + /* Skip the partial render fence, we will place it at the end. */ + if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job && + &job->paired_job->base.s_fence->scheduled == fence) + continue; + + if (dma_fence_is_signaled(&jfence->base)) + continue; + + pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj, + &ufos[ufo_count].addr); + ufos[ufo_count++].value = jfence->base.seqno; + + if (ufo_count == ARRAY_SIZE(ufos)) { + pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR, + sizeof(ufos), ufos, 0, 0); + ufo_count = 0; + } + } + + /* Partial render fence goes last. */ + if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) { + jfence = to_pvr_queue_job_fence(job->paired_job->done_fence); + if (!WARN_ON(!jfence)) { + pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj, + &ufos[ufo_count].addr); + ufos[ufo_count++].value = job->paired_job->done_fence->seqno; + } + } + + if (ufo_count) { + pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR, + sizeof(ufos[0]) * ufo_count, ufos, 0, 0); + } + + if (job->type == DRM_PVR_JOB_TYPE_GEOMETRY && job->paired_job) { + struct rogue_fwif_cmd_geom *cmd = job->cmd; + + /* Reference value for the partial render test is the current queue fence + * seqno minus one. + */ + pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj, + &cmd->partial_render_geom_frag_fence.addr); + cmd->partial_render_geom_frag_fence.value = job->done_fence->seqno - 1; + } + + /* Submit job to FW */ + pvr_cccb_write_command_with_header(cccb, job->fw_ccb_cmd_type, job->cmd_len, job->cmd, + job->id, job->id); + + /* Signal the job fence. */ + pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj, &ufos[0].addr); + ufos[0].value = job->done_fence->seqno; + pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_UPDATE, + sizeof(ufos[0]), ufos, 0, 0); +} + +/** + * pvr_queue_run_job() - Submit a job to the FW. + * @sched_job: The job to submit. + * + * This function is called when all non-native dependencies have been met and + * when the commands resulting from this job are guaranteed to fit in the CCCB. + */ +static struct dma_fence *pvr_queue_run_job(struct drm_sched_job *sched_job) +{ + struct pvr_job *job = container_of(sched_job, struct pvr_job, base); + struct pvr_device *pvr_dev = job->pvr_dev; + int err; + + /* The fragment job is issued along the geometry job when we use combined + * geom+frag kicks. When we get there, we should simply return the + * done_fence that's been initialized earlier. + */ + if (job->paired_job && job->type == DRM_PVR_JOB_TYPE_FRAGMENT && + job->done_fence->ops) { + return dma_fence_get(job->done_fence); + } + + /* The only kind of jobs that can be paired are geometry and fragment, and + * we bail out early if we see a fragment job that's paired with a geomtry + * job. + * Paired jobs must also target the same context and point to the same + * HWRT. + */ + if (WARN_ON(job->paired_job && + (job->type != DRM_PVR_JOB_TYPE_GEOMETRY || + job->paired_job->type != DRM_PVR_JOB_TYPE_FRAGMENT || + job->hwrt != job->paired_job->hwrt || + job->ctx != job->paired_job->ctx))) + return ERR_PTR(-EINVAL); + + err = pvr_job_get_pm_ref(job); + if (WARN_ON(err)) + return ERR_PTR(err); + + if (job->paired_job) { + err = pvr_job_get_pm_ref(job->paired_job); + if (WARN_ON(err)) + return ERR_PTR(err); + } + + /* Submit our job to the CCCB */ + pvr_queue_submit_job_to_cccb(job); + + if (job->paired_job) { + struct pvr_job *geom_job = job; + struct pvr_job *frag_job = job->paired_job; + struct pvr_queue *geom_queue = job->ctx->queues.geometry; + struct pvr_queue *frag_queue = job->ctx->queues.fragment; + + /* Submit the fragment job along the geometry job and send a combined kick. */ + pvr_queue_submit_job_to_cccb(frag_job); + pvr_cccb_send_kccb_combined_kick(pvr_dev, + &geom_queue->cccb, &frag_queue->cccb, + pvr_context_get_fw_addr(geom_job->ctx) + + geom_queue->ctx_offset, + pvr_context_get_fw_addr(frag_job->ctx) + + frag_queue->ctx_offset, + job->hwrt, + frag_job->fw_ccb_cmd_type == + ROGUE_FWIF_CCB_CMD_TYPE_FRAG_PR); + } else { + struct pvr_queue *queue = container_of(job->base.sched, + struct pvr_queue, scheduler); + + pvr_cccb_send_kccb_kick(pvr_dev, &queue->cccb, + pvr_context_get_fw_addr(job->ctx) + queue->ctx_offset, + job->hwrt); + } + + return dma_fence_get(job->done_fence); +} + +static void pvr_queue_stop(struct pvr_queue *queue, struct pvr_job *bad_job) +{ + drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL); +} + +static void pvr_queue_start(struct pvr_queue *queue) +{ + struct pvr_job *job; + + /* Make sure we CPU-signal the UFO object, so other queues don't get + * blocked waiting on it. + */ + *queue->timeline_ufo.value = atomic_read(&queue->job_fence_ctx.seqno); + + list_for_each_entry(job, &queue->scheduler.pending_list, base.list) { + if (dma_fence_is_signaled(job->done_fence)) { + /* Jobs might have completed after drm_sched_stop() was called. + * In that case, re-assign the parent field to the done_fence. + */ + WARN_ON(job->base.s_fence->parent); + job->base.s_fence->parent = dma_fence_get(job->done_fence); + } else { + /* If we had unfinished jobs, flag the entity as guilty so no + * new job can be submitted. + */ + atomic_set(&queue->ctx->faulty, 1); + } + } + + drm_sched_start(&queue->scheduler, true); +} + +/** + * pvr_queue_timedout_job() - Handle a job timeout event. + * @s_job: The job this timeout occurred on. + * + * FIXME: We don't do anything here to unblock the situation, we just stop+start + * the scheduler, and re-assign parent fences in the middle. + * + * Return: + * * DRM_GPU_SCHED_STAT_NOMINAL. + */ +static enum drm_gpu_sched_stat +pvr_queue_timedout_job(struct drm_sched_job *s_job) +{ + struct drm_gpu_scheduler *sched = s_job->sched; + struct pvr_queue *queue = container_of(sched, struct pvr_queue, scheduler); + struct pvr_device *pvr_dev = queue->ctx->pvr_dev; + struct pvr_job *job; + u32 job_count = 0; + + dev_err(sched->dev, "Job timeout\n"); + + /* Before we stop the scheduler, make sure the queue is out of any list, so + * any call to pvr_queue_update_active_state_locked() that might happen + * until the scheduler is really stopped doesn't end up re-inserting the + * queue in the active list. This would cause + * pvr_queue_signal_done_fences() and drm_sched_stop() to race with each + * other when accessing the pending_list, since drm_sched_stop() doesn't + * grab the job_list_lock when modifying the list (it's assuming the + * only other accessor is the scheduler, and it's safe to not grab the + * lock since it's stopped). + */ + mutex_lock(&pvr_dev->queues.lock); + list_del_init(&queue->node); + mutex_unlock(&pvr_dev->queues.lock); + + drm_sched_stop(sched, s_job); + + /* Re-assign job parent fences. */ + list_for_each_entry(job, &sched->pending_list, base.list) { + job->base.s_fence->parent = dma_fence_get(job->done_fence); + job_count++; + } + WARN_ON(atomic_read(&queue->in_flight_job_count) != job_count); + + /* Re-insert the queue in the proper list, and kick a queue processing + * operation if there were jobs pending. + */ + mutex_lock(&pvr_dev->queues.lock); + if (!job_count) { + list_move_tail(&queue->node, &pvr_dev->queues.idle); + } else { + atomic_set(&queue->in_flight_job_count, job_count); + list_move_tail(&queue->node, &pvr_dev->queues.active); + pvr_queue_process(queue); + } + mutex_unlock(&pvr_dev->queues.lock); + + drm_sched_start(sched, true); + + return DRM_GPU_SCHED_STAT_NOMINAL; +} + +/** + * pvr_queue_free_job() - Release the reference the scheduler had on a job object. + * @sched_job: Job object to free. + */ +static void pvr_queue_free_job(struct drm_sched_job *sched_job) +{ + struct pvr_job *job = container_of(sched_job, struct pvr_job, base); + + drm_sched_job_cleanup(sched_job); + job->paired_job = NULL; + pvr_job_put(job); +} + +static const struct drm_sched_backend_ops pvr_queue_sched_ops = { + .prepare_job = pvr_queue_prepare_job, + .run_job = pvr_queue_run_job, + .timedout_job = pvr_queue_timedout_job, + .free_job = pvr_queue_free_job, +}; + +/** + * pvr_queue_fence_is_ufo_backed() - Check if a dma_fence is backed by a UFO object + * @f: Fence to test. + * + * A UFO-backed fence is a fence that can be signaled or waited upon FW-side. + * pvr_job::done_fence objects are backed by the timeline UFO attached to the queue + * they are pushed to, but those fences are not directly exposed to the outside + * world, so we also need to check if the fence we're being passed is a + * drm_sched_fence that was coming from our driver. + */ +bool pvr_queue_fence_is_ufo_backed(struct dma_fence *f) +{ + struct drm_sched_fence *sched_fence = f ? to_drm_sched_fence(f) : NULL; + + if (sched_fence && + sched_fence->sched->ops == &pvr_queue_sched_ops) + return true; + + if (f && f->ops == &pvr_queue_job_fence_ops) + return true; + + return false; +} + +/** + * pvr_queue_signal_done_fences() - Signal done fences. + * @queue: Queue to check. + * + * Signal done fences of jobs whose seqno is less than the current value of + * the UFO object attached to the queue. + */ +static void +pvr_queue_signal_done_fences(struct pvr_queue *queue) +{ + struct pvr_job *job, *tmp_job; + u32 cur_seqno; + + spin_lock(&queue->scheduler.job_list_lock); + cur_seqno = *queue->timeline_ufo.value; + list_for_each_entry_safe(job, tmp_job, &queue->scheduler.pending_list, base.list) { + if ((int)(cur_seqno - lower_32_bits(job->done_fence->seqno)) < 0) + break; + + if (!dma_fence_is_signaled(job->done_fence)) { + dma_fence_signal(job->done_fence); + pvr_job_release_pm_ref(job); + atomic_dec(&queue->in_flight_job_count); + } + } + spin_unlock(&queue->scheduler.job_list_lock); +} + +/** + * pvr_queue_check_job_waiting_for_cccb_space() - Check if the job waiting for CCCB space + * can be unblocked + * pushed to the CCCB + * @queue: Queue to check + * + * If we have a job waiting for CCCB, and this job now fits in the CCCB, we signal + * its CCCB fence, which should kick drm_sched. + */ +static void +pvr_queue_check_job_waiting_for_cccb_space(struct pvr_queue *queue) +{ + struct pvr_queue_fence *cccb_fence; + u32 native_deps_remaining; + struct pvr_job *job; + + mutex_lock(&queue->cccb_fence_ctx.job_lock); + job = queue->cccb_fence_ctx.job; + if (!job) + goto out_unlock; + + /* If we have a job attached to the CCCB fence context, its CCCB fence + * shouldn't be NULL. + */ + if (WARN_ON(!job->cccb_fence)) { + job = NULL; + goto out_unlock; + } + + /* If we get there, CCCB fence has to be initialized. */ + cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base); + if (WARN_ON(!cccb_fence->queue)) { + job = NULL; + goto out_unlock; + } + + /* Evict signaled dependencies before checking for CCCB space. + * If the job fits, signal the CCCB fence, this should unblock + * the drm_sched_entity. + */ + native_deps_remaining = job_count_remaining_native_deps(job); + if (!pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) { + job = NULL; + goto out_unlock; + } + + dma_fence_signal(job->cccb_fence); + pvr_queue_fence_put(job->cccb_fence); + job->cccb_fence = NULL; + queue->cccb_fence_ctx.job = NULL; + +out_unlock: + mutex_unlock(&queue->cccb_fence_ctx.job_lock); + + pvr_job_put(job); +} + +/** + * pvr_queue_process() - Process events that happened on a queue. + * @queue: Queue to check + * + * Signal job fences and check if jobs waiting for CCCB space can be unblocked. + */ +void pvr_queue_process(struct pvr_queue *queue) +{ + lockdep_assert_held(&queue->ctx->pvr_dev->queues.lock); + + pvr_queue_check_job_waiting_for_cccb_space(queue); + pvr_queue_signal_done_fences(queue); + pvr_queue_update_active_state_locked(queue); +} + +static u32 get_dm_type(struct pvr_queue *queue) +{ + switch (queue->type) { + case DRM_PVR_JOB_TYPE_GEOMETRY: + return PVR_FWIF_DM_GEOM; + case DRM_PVR_JOB_TYPE_TRANSFER_FRAG: + case DRM_PVR_JOB_TYPE_FRAGMENT: + return PVR_FWIF_DM_FRAG; + case DRM_PVR_JOB_TYPE_COMPUTE: + return PVR_FWIF_DM_CDM; + } + + return ~0; +} + +/** + * init_fw_context() - Initializes the queue part of a FW context. + * @queue: Queue object to initialize the FW context for. + * @fw_ctx_map: The FW context CPU mapping. + * + * FW contexts are containing various states, one of them being a per-queue state + * that needs to be initialized for each queue being exposed by a context. This + * function takes care of that. + */ +static void init_fw_context(struct pvr_queue *queue, void *fw_ctx_map) +{ + struct pvr_context *ctx = queue->ctx; + struct pvr_fw_object *fw_mem_ctx_obj = pvr_vm_get_fw_mem_context(ctx->vm_ctx); + struct rogue_fwif_fwcommoncontext *cctx_fw; + struct pvr_cccb *cccb = &queue->cccb; + + cctx_fw = fw_ctx_map + queue->ctx_offset; + cctx_fw->ccbctl_fw_addr = cccb->ctrl_fw_addr; + cctx_fw->ccb_fw_addr = cccb->cccb_fw_addr; + + cctx_fw->dm = get_dm_type(queue); + cctx_fw->priority = ctx->priority; + cctx_fw->priority_seq_num = 0; + cctx_fw->max_deadline_ms = MAX_DEADLINE_MS; + cctx_fw->pid = task_tgid_nr(current); + cctx_fw->server_common_context_id = ctx->ctx_id; + + pvr_fw_object_get_fw_addr(fw_mem_ctx_obj, &cctx_fw->fw_mem_context_fw_addr); + + pvr_fw_object_get_fw_addr(queue->reg_state_obj, &cctx_fw->context_state_addr); +} + +/** + * pvr_queue_cleanup_fw_context() - Wait for the FW context to be idle and clean it up. + * @queue: Queue on FW context to clean up. + * + * Return: + * * 0 on success, + * * Any error returned by pvr_fw_structure_cleanup() otherwise. + */ +static int pvr_queue_cleanup_fw_context(struct pvr_queue *queue) +{ + if (!queue->ctx->fw_obj) + return 0; + + return pvr_fw_structure_cleanup(queue->ctx->pvr_dev, + ROGUE_FWIF_CLEANUP_FWCOMMONCONTEXT, + queue->ctx->fw_obj, queue->ctx_offset); +} + +/** + * pvr_queue_job_init() - Initialize queue related fields in a pvr_job object. + * @job: The job to initialize. + * + * Bind the job to a queue and allocate memory to guarantee pvr_queue_job_arm() + * and pvr_queue_job_push() can't fail. We also make sure the context type is + * valid and the job can fit in the CCCB. + * + * Return: + * * 0 on success, or + * * An error code if something failed. + */ +int pvr_queue_job_init(struct pvr_job *job) +{ + /* Fragment jobs need at least one native fence wait on the geometry job fence. */ + u32 min_native_dep_count = job->type == DRM_PVR_JOB_TYPE_FRAGMENT ? 1 : 0; + struct pvr_queue *queue; + int err; + + if (atomic_read(&job->ctx->faulty)) + return -EIO; + + queue = pvr_context_get_queue_for_job(job->ctx, job->type); + if (!queue) + return -EINVAL; + + if (!pvr_cccb_cmdseq_can_fit(&queue->cccb, job_cmds_size(job, min_native_dep_count))) + return -E2BIG; + + err = drm_sched_job_init(&job->base, &queue->entity, 1, THIS_MODULE); + if (err) + return err; + + job->cccb_fence = pvr_queue_fence_alloc(); + job->kccb_fence = pvr_kccb_fence_alloc(); + job->done_fence = pvr_queue_fence_alloc(); + if (!job->cccb_fence || !job->kccb_fence || !job->done_fence) + return -ENOMEM; + + return 0; +} + +/** + * pvr_queue_job_arm() - Arm a job object. + * @job: The job to arm. + * + * Initializes fences and return the drm_sched finished fence so it can + * be exposed to the outside world. Once this function is called, you should + * make sure the job is pushed using pvr_queue_job_push(), or guarantee that + * no one grabbed a reference to the returned fence. The latter can happen if + * we do multi-job submission, and something failed when creating/initializing + * a job. In that case, we know the fence didn't leave the driver, and we + * can thus guarantee nobody will wait on an dead fence object. + * + * Return: + * * A dma_fence object. + */ +struct dma_fence *pvr_queue_job_arm(struct pvr_job *job) +{ + drm_sched_job_arm(&job->base); + + return &job->base.s_fence->finished; +} + +/** + * pvr_queue_job_cleanup() - Cleanup fence/scheduler related fields in the job object. + * @job: The job to cleanup. + * + * Should be called in the job release path. + */ +void pvr_queue_job_cleanup(struct pvr_job *job) +{ + pvr_queue_fence_put(job->done_fence); + pvr_queue_fence_put(job->cccb_fence); + pvr_kccb_fence_put(job->kccb_fence); + + if (job->base.s_fence) + drm_sched_job_cleanup(&job->base); +} + +/** + * pvr_queue_job_push() - Push a job to its queue. + * @job: The job to push. + * + * Must be called after pvr_queue_job_init() and after all dependencies + * have been added to the job. This will effectively queue the job to + * the drm_sched_entity attached to the queue. We grab a reference on + * the job object, so the caller is free to drop its reference when it's + * done accessing the job object. + */ +void pvr_queue_job_push(struct pvr_job *job) +{ + struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler); + + /* Keep track of the last queued job scheduled fence for combined submit. */ + dma_fence_put(queue->last_queued_job_scheduled_fence); + queue->last_queued_job_scheduled_fence = dma_fence_get(&job->base.s_fence->scheduled); + + pvr_job_get(job); + drm_sched_entity_push_job(&job->base); +} + +static void reg_state_init(void *cpu_ptr, void *priv) +{ + struct pvr_queue *queue = priv; + + if (queue->type == DRM_PVR_JOB_TYPE_GEOMETRY) { + struct rogue_fwif_geom_ctx_state *geom_ctx_state_fw = cpu_ptr; + + geom_ctx_state_fw->geom_core[0].geom_reg_vdm_call_stack_pointer_init = + queue->callstack_addr; + } +} + +/** + * pvr_queue_create() - Create a queue object. + * @ctx: The context this queue will be attached to. + * @type: The type of jobs being pushed to this queue. + * @args: The arguments passed to the context creation function. + * @fw_ctx_map: CPU mapping of the FW context object. + * + * Create a queue object that will be used to queue and track jobs. + * + * Return: + * * A valid pointer to a pvr_queue object, or + * * An error pointer if the creation/initialization failed. + */ +struct pvr_queue *pvr_queue_create(struct pvr_context *ctx, + enum drm_pvr_job_type type, + struct drm_pvr_ioctl_create_context_args *args, + void *fw_ctx_map) +{ + static const struct { + u32 cccb_size; + const char *name; + } props[] = { + [DRM_PVR_JOB_TYPE_GEOMETRY] = { + .cccb_size = CTX_GEOM_CCCB_SIZE_LOG2, + .name = "geometry", + }, + [DRM_PVR_JOB_TYPE_FRAGMENT] = { + .cccb_size = CTX_FRAG_CCCB_SIZE_LOG2, + .name = "fragment" + }, + [DRM_PVR_JOB_TYPE_COMPUTE] = { + .cccb_size = CTX_COMPUTE_CCCB_SIZE_LOG2, + .name = "compute" + }, + [DRM_PVR_JOB_TYPE_TRANSFER_FRAG] = { + .cccb_size = CTX_TRANSFER_CCCB_SIZE_LOG2, + .name = "transfer_frag" + }, + }; + struct pvr_device *pvr_dev = ctx->pvr_dev; + struct drm_gpu_scheduler *sched; + struct pvr_queue *queue; + int ctx_state_size, err; + void *cpu_map; + + if (WARN_ON(type >= sizeof(props))) + return ERR_PTR(-EINVAL); + + switch (ctx->type) { + case DRM_PVR_CTX_TYPE_RENDER: + if (type != DRM_PVR_JOB_TYPE_GEOMETRY && + type != DRM_PVR_JOB_TYPE_FRAGMENT) + return ERR_PTR(-EINVAL); + break; + case DRM_PVR_CTX_TYPE_COMPUTE: + if (type != DRM_PVR_JOB_TYPE_COMPUTE) + return ERR_PTR(-EINVAL); + break; + case DRM_PVR_CTX_TYPE_TRANSFER_FRAG: + if (type != DRM_PVR_JOB_TYPE_TRANSFER_FRAG) + return ERR_PTR(-EINVAL); + break; + default: + return ERR_PTR(-EINVAL); + } + + ctx_state_size = get_ctx_state_size(pvr_dev, type); + if (ctx_state_size < 0) + return ERR_PTR(ctx_state_size); + + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + if (!queue) + return ERR_PTR(-ENOMEM); + + queue->type = type; + queue->ctx_offset = get_ctx_offset(type); + queue->ctx = ctx; + queue->callstack_addr = args->callstack_addr; + sched = &queue->scheduler; + INIT_LIST_HEAD(&queue->node); + mutex_init(&queue->cccb_fence_ctx.job_lock); + pvr_queue_fence_ctx_init(&queue->cccb_fence_ctx.base); + pvr_queue_fence_ctx_init(&queue->job_fence_ctx); + + err = pvr_cccb_init(pvr_dev, &queue->cccb, props[type].cccb_size, props[type].name); + if (err) + goto err_free_queue; + + err = pvr_fw_object_create(pvr_dev, ctx_state_size, + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + reg_state_init, queue, &queue->reg_state_obj); + if (err) + goto err_cccb_fini; + + init_fw_context(queue, fw_ctx_map); + + if (type != DRM_PVR_JOB_TYPE_GEOMETRY && type != DRM_PVR_JOB_TYPE_FRAGMENT && + args->callstack_addr) { + err = -EINVAL; + goto err_release_reg_state; + } + + cpu_map = pvr_fw_object_create_and_map(pvr_dev, sizeof(*queue->timeline_ufo.value), + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + NULL, NULL, &queue->timeline_ufo.fw_obj); + if (IS_ERR(cpu_map)) { + err = PTR_ERR(cpu_map); + goto err_release_reg_state; + } + + queue->timeline_ufo.value = cpu_map; + + err = drm_sched_init(&queue->scheduler, + &pvr_queue_sched_ops, + pvr_dev->sched_wq, 1, 64 * 1024, 1, + msecs_to_jiffies(500), + pvr_dev->sched_wq, NULL, "pvr-queue", + pvr_dev->base.dev); + if (err) + goto err_release_ufo; + + err = drm_sched_entity_init(&queue->entity, + DRM_SCHED_PRIORITY_KERNEL, + &sched, 1, &ctx->faulty); + if (err) + goto err_sched_fini; + + mutex_lock(&pvr_dev->queues.lock); + list_add_tail(&queue->node, &pvr_dev->queues.idle); + mutex_unlock(&pvr_dev->queues.lock); + + return queue; + +err_sched_fini: + drm_sched_fini(&queue->scheduler); + +err_release_ufo: + pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj); + +err_release_reg_state: + pvr_fw_object_destroy(queue->reg_state_obj); + +err_cccb_fini: + pvr_cccb_fini(&queue->cccb); + +err_free_queue: + mutex_destroy(&queue->cccb_fence_ctx.job_lock); + kfree(queue); + + return ERR_PTR(err); +} + +void pvr_queue_device_pre_reset(struct pvr_device *pvr_dev) +{ + struct pvr_queue *queue; + + mutex_lock(&pvr_dev->queues.lock); + list_for_each_entry(queue, &pvr_dev->queues.idle, node) + pvr_queue_stop(queue, NULL); + list_for_each_entry(queue, &pvr_dev->queues.active, node) + pvr_queue_stop(queue, NULL); + mutex_unlock(&pvr_dev->queues.lock); +} + +void pvr_queue_device_post_reset(struct pvr_device *pvr_dev) +{ + struct pvr_queue *queue; + + mutex_lock(&pvr_dev->queues.lock); + list_for_each_entry(queue, &pvr_dev->queues.active, node) + pvr_queue_start(queue); + list_for_each_entry(queue, &pvr_dev->queues.idle, node) + pvr_queue_start(queue); + mutex_unlock(&pvr_dev->queues.lock); +} + +/** + * pvr_queue_kill() - Kill a queue. + * @queue: The queue to kill. + * + * Kill the queue so no new jobs can be pushed. Should be called when the + * context handle is destroyed. The queue object might last longer if jobs + * are still in flight and holding a reference to the context this queue + * belongs to. + */ +void pvr_queue_kill(struct pvr_queue *queue) +{ + drm_sched_entity_destroy(&queue->entity); + dma_fence_put(queue->last_queued_job_scheduled_fence); + queue->last_queued_job_scheduled_fence = NULL; +} + +/** + * pvr_queue_destroy() - Destroy a queue. + * @queue: The queue to destroy. + * + * Cleanup the queue and free the resources attached to it. Should be + * called from the context release function. + */ +void pvr_queue_destroy(struct pvr_queue *queue) +{ + if (!queue) + return; + + mutex_lock(&queue->ctx->pvr_dev->queues.lock); + list_del_init(&queue->node); + mutex_unlock(&queue->ctx->pvr_dev->queues.lock); + + drm_sched_fini(&queue->scheduler); + drm_sched_entity_fini(&queue->entity); + + if (WARN_ON(queue->last_queued_job_scheduled_fence)) + dma_fence_put(queue->last_queued_job_scheduled_fence); + + pvr_queue_cleanup_fw_context(queue); + + pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj); + pvr_fw_object_destroy(queue->reg_state_obj); + pvr_cccb_fini(&queue->cccb); + mutex_destroy(&queue->cccb_fence_ctx.job_lock); + kfree(queue); +} + +/** + * pvr_queue_device_init() - Device-level initialization of queue related fields. + * @pvr_dev: The device to initialize. + * + * Initializes all fields related to queue management in pvr_device. + * + * Return: + * * 0 on success, or + * * An error code on failure. + */ +int pvr_queue_device_init(struct pvr_device *pvr_dev) +{ + int err; + + INIT_LIST_HEAD(&pvr_dev->queues.active); + INIT_LIST_HEAD(&pvr_dev->queues.idle); + err = drmm_mutex_init(from_pvr_device(pvr_dev), &pvr_dev->queues.lock); + if (err) + return err; + + pvr_dev->sched_wq = alloc_workqueue("powervr-sched", WQ_UNBOUND, 0); + if (!pvr_dev->sched_wq) + return -ENOMEM; + + return 0; +} + +/** + * pvr_queue_device_fini() - Device-level cleanup of queue related fields. + * @pvr_dev: The device to cleanup. + * + * Cleanup/free all queue-related resources attached to a pvr_device object. + */ +void pvr_queue_device_fini(struct pvr_device *pvr_dev) +{ + destroy_workqueue(pvr_dev->sched_wq); +} diff --git a/drivers/gpu/drm/imagination/pvr_queue.h b/drivers/gpu/drm/imagination/pvr_queue.h new file mode 100644 index 000000000000..e06ced69302f --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_queue.h @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_QUEUE_H +#define PVR_QUEUE_H + +#include <drm/gpu_scheduler.h> + +#include "pvr_cccb.h" +#include "pvr_device.h" + +struct pvr_context; +struct pvr_queue; + +/** + * struct pvr_queue_fence_ctx - Queue fence context + * + * Used to implement dma_fence_ops for pvr_job::{done,cccb}_fence. + */ +struct pvr_queue_fence_ctx { + /** @id: Fence context ID allocated with dma_fence_context_alloc(). */ + u64 id; + + /** @seqno: Sequence number incremented each time a fence is created. */ + atomic_t seqno; + + /** @lock: Lock used to synchronize access to fences allocated by this context. */ + spinlock_t lock; +}; + +/** + * struct pvr_queue_cccb_fence_ctx - CCCB fence context + * + * Context used to manage fences controlling access to the CCCB. No fences are + * issued if there's enough space in the CCCB to push job commands. + */ +struct pvr_queue_cccb_fence_ctx { + /** @base: Base queue fence context. */ + struct pvr_queue_fence_ctx base; + + /** + * @job: Job waiting for CCCB space. + * + * Thanks to the serializationg done at the drm_sched_entity level, + * there's no more than one job waiting for CCCB at a given time. + * + * This field is NULL if no jobs are currently waiting for CCCB space. + * + * Must be accessed with @job_lock held. + */ + struct pvr_job *job; + + /** @job_lock: Lock protecting access to the job object. */ + struct mutex job_lock; +}; + +/** + * struct pvr_queue_fence - Queue fence object + */ +struct pvr_queue_fence { + /** @base: Base dma_fence. */ + struct dma_fence base; + + /** @queue: Queue that created this fence. */ + struct pvr_queue *queue; +}; + +/** + * struct pvr_queue - Job queue + * + * Used to queue and track execution of pvr_job objects. + */ +struct pvr_queue { + /** @scheduler: Single entity scheduler use to push jobs to this queue. */ + struct drm_gpu_scheduler scheduler; + + /** @entity: Scheduling entity backing this queue. */ + struct drm_sched_entity entity; + + /** @type: Type of jobs queued to this queue. */ + enum drm_pvr_job_type type; + + /** @ctx: Context object this queue is bound to. */ + struct pvr_context *ctx; + + /** @node: Used to add the queue to the active/idle queue list. */ + struct list_head node; + + /** + * @in_flight_job_count: Number of jobs submitted to the CCCB that + * have not been processed yet. + */ + atomic_t in_flight_job_count; + + /** + * @cccb_fence_ctx: CCCB fence context. + * + * Used to control access to the CCCB is full, such that we don't + * end up trying to push commands to the CCCB if there's not enough + * space to receive all commands needed for a job to complete. + */ + struct pvr_queue_cccb_fence_ctx cccb_fence_ctx; + + /** @job_fence_ctx: Job fence context object. */ + struct pvr_queue_fence_ctx job_fence_ctx; + + /** @timeline_ufo: Timeline UFO for the context queue. */ + struct { + /** @fw_obj: FW object representing the UFO value. */ + struct pvr_fw_object *fw_obj; + + /** @value: CPU mapping of the UFO value. */ + u32 *value; + } timeline_ufo; + + /** + * @last_queued_job_scheduled_fence: The scheduled fence of the last + * job queued to this queue. + * + * We use it to insert frag -> geom dependencies when issuing combined + * geom+frag jobs, to guarantee that the fragment job that's part of + * the combined operation comes after all fragment jobs that were queued + * before it. + */ + struct dma_fence *last_queued_job_scheduled_fence; + + /** @cccb: Client Circular Command Buffer. */ + struct pvr_cccb cccb; + + /** @reg_state_obj: FW object representing the register state of this queue. */ + struct pvr_fw_object *reg_state_obj; + + /** @ctx_offset: Offset of the queue context in the FW context object. */ + u32 ctx_offset; + + /** @callstack_addr: Initial call stack address for register state object. */ + u64 callstack_addr; +}; + +bool pvr_queue_fence_is_ufo_backed(struct dma_fence *f); + +int pvr_queue_job_init(struct pvr_job *job); + +void pvr_queue_job_cleanup(struct pvr_job *job); + +void pvr_queue_job_push(struct pvr_job *job); + +struct dma_fence *pvr_queue_job_arm(struct pvr_job *job); + +struct pvr_queue *pvr_queue_create(struct pvr_context *ctx, + enum drm_pvr_job_type type, + struct drm_pvr_ioctl_create_context_args *args, + void *fw_ctx_map); + +void pvr_queue_kill(struct pvr_queue *queue); + +void pvr_queue_destroy(struct pvr_queue *queue); + +void pvr_queue_process(struct pvr_queue *queue); + +void pvr_queue_device_pre_reset(struct pvr_device *pvr_dev); + +void pvr_queue_device_post_reset(struct pvr_device *pvr_dev); + +int pvr_queue_device_init(struct pvr_device *pvr_dev); + +void pvr_queue_device_fini(struct pvr_device *pvr_dev); + +#endif /* PVR_QUEUE_H */ diff --git a/drivers/gpu/drm/imagination/pvr_rogue_cr_defs.h b/drivers/gpu/drm/imagination/pvr_rogue_cr_defs.h new file mode 100644 index 000000000000..2a90d02796d3 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_rogue_cr_defs.h @@ -0,0 +1,6193 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +/* *** Autogenerated C -- do not edit *** */ + +#ifndef PVR_ROGUE_CR_DEFS_H +#define PVR_ROGUE_CR_DEFS_H + +/* clang-format off */ + +#define ROGUE_CR_DEFS_REVISION 1 + +/* Register ROGUE_CR_RASTERISATION_INDIRECT */ +#define ROGUE_CR_RASTERISATION_INDIRECT 0x8238U +#define ROGUE_CR_RASTERISATION_INDIRECT_MASKFULL 0x000000000000000FULL +#define ROGUE_CR_RASTERISATION_INDIRECT_ADDRESS_SHIFT 0U +#define ROGUE_CR_RASTERISATION_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFF0U + +/* Register ROGUE_CR_PBE_INDIRECT */ +#define ROGUE_CR_PBE_INDIRECT 0x83E0U +#define ROGUE_CR_PBE_INDIRECT_MASKFULL 0x000000000000000FULL +#define ROGUE_CR_PBE_INDIRECT_ADDRESS_SHIFT 0U +#define ROGUE_CR_PBE_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFF0U + +/* Register ROGUE_CR_PBE_PERF_INDIRECT */ +#define ROGUE_CR_PBE_PERF_INDIRECT 0x83D8U +#define ROGUE_CR_PBE_PERF_INDIRECT_MASKFULL 0x000000000000000FULL +#define ROGUE_CR_PBE_PERF_INDIRECT_ADDRESS_SHIFT 0U +#define ROGUE_CR_PBE_PERF_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFF0U + +/* Register ROGUE_CR_TPU_PERF_INDIRECT */ +#define ROGUE_CR_TPU_PERF_INDIRECT 0x83F0U +#define ROGUE_CR_TPU_PERF_INDIRECT_MASKFULL 0x0000000000000007ULL +#define ROGUE_CR_TPU_PERF_INDIRECT_ADDRESS_SHIFT 0U +#define ROGUE_CR_TPU_PERF_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFF8U + +/* Register ROGUE_CR_RASTERISATION_PERF_INDIRECT */ +#define ROGUE_CR_RASTERISATION_PERF_INDIRECT 0x8318U +#define ROGUE_CR_RASTERISATION_PERF_INDIRECT_MASKFULL 0x000000000000000FULL +#define ROGUE_CR_RASTERISATION_PERF_INDIRECT_ADDRESS_SHIFT 0U +#define ROGUE_CR_RASTERISATION_PERF_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFF0U + +/* Register ROGUE_CR_TPU_MCU_L0_PERF_INDIRECT */ +#define ROGUE_CR_TPU_MCU_L0_PERF_INDIRECT 0x8028U +#define ROGUE_CR_TPU_MCU_L0_PERF_INDIRECT_MASKFULL 0x0000000000000007ULL +#define ROGUE_CR_TPU_MCU_L0_PERF_INDIRECT_ADDRESS_SHIFT 0U +#define ROGUE_CR_TPU_MCU_L0_PERF_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFF8U + +/* Register ROGUE_CR_USC_PERF_INDIRECT */ +#define ROGUE_CR_USC_PERF_INDIRECT 0x8030U +#define ROGUE_CR_USC_PERF_INDIRECT_MASKFULL 0x000000000000000FULL +#define ROGUE_CR_USC_PERF_INDIRECT_ADDRESS_SHIFT 0U +#define ROGUE_CR_USC_PERF_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFF0U + +/* Register ROGUE_CR_BLACKPEARL_INDIRECT */ +#define ROGUE_CR_BLACKPEARL_INDIRECT 0x8388U +#define ROGUE_CR_BLACKPEARL_INDIRECT_MASKFULL 0x0000000000000003ULL +#define ROGUE_CR_BLACKPEARL_INDIRECT_ADDRESS_SHIFT 0U +#define ROGUE_CR_BLACKPEARL_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFFCU + +/* Register ROGUE_CR_BLACKPEARL_PERF_INDIRECT */ +#define ROGUE_CR_BLACKPEARL_PERF_INDIRECT 0x83F8U +#define ROGUE_CR_BLACKPEARL_PERF_INDIRECT_MASKFULL 0x0000000000000003ULL +#define ROGUE_CR_BLACKPEARL_PERF_INDIRECT_ADDRESS_SHIFT 0U +#define ROGUE_CR_BLACKPEARL_PERF_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFFCU + +/* Register ROGUE_CR_TEXAS3_PERF_INDIRECT */ +#define ROGUE_CR_TEXAS3_PERF_INDIRECT 0x83D0U +#define ROGUE_CR_TEXAS3_PERF_INDIRECT_MASKFULL 0x0000000000000007ULL +#define ROGUE_CR_TEXAS3_PERF_INDIRECT_ADDRESS_SHIFT 0U +#define ROGUE_CR_TEXAS3_PERF_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFF8U + +/* Register ROGUE_CR_TEXAS_PERF_INDIRECT */ +#define ROGUE_CR_TEXAS_PERF_INDIRECT 0x8288U +#define ROGUE_CR_TEXAS_PERF_INDIRECT_MASKFULL 0x0000000000000003ULL +#define ROGUE_CR_TEXAS_PERF_INDIRECT_ADDRESS_SHIFT 0U +#define ROGUE_CR_TEXAS_PERF_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFFCU + +/* Register ROGUE_CR_BX_TU_PERF_INDIRECT */ +#define ROGUE_CR_BX_TU_PERF_INDIRECT 0xC900U +#define ROGUE_CR_BX_TU_PERF_INDIRECT_MASKFULL 0x0000000000000003ULL +#define ROGUE_CR_BX_TU_PERF_INDIRECT_ADDRESS_SHIFT 0U +#define ROGUE_CR_BX_TU_PERF_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFFCU + +/* Register ROGUE_CR_CLK_CTRL */ +#define ROGUE_CR_CLK_CTRL 0x0000U +#define ROGUE_CR_CLK_CTRL__PBE2_XE__MASKFULL 0xFFFFFF003F3FFFFFULL +#define ROGUE_CR_CLK_CTRL__S7_TOP__MASKFULL 0xCFCF03000F3F3F0FULL +#define ROGUE_CR_CLK_CTRL_MASKFULL 0xFFFFFF003F3FFFFFULL +#define ROGUE_CR_CLK_CTRL_BIF_TEXAS_SHIFT 62U +#define ROGUE_CR_CLK_CTRL_BIF_TEXAS_CLRMSK 0x3FFFFFFFFFFFFFFFULL +#define ROGUE_CR_CLK_CTRL_BIF_TEXAS_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_BIF_TEXAS_ON 0x4000000000000000ULL +#define ROGUE_CR_CLK_CTRL_BIF_TEXAS_AUTO 0x8000000000000000ULL +#define ROGUE_CR_CLK_CTRL_IPP_SHIFT 60U +#define ROGUE_CR_CLK_CTRL_IPP_CLRMSK 0xCFFFFFFFFFFFFFFFULL +#define ROGUE_CR_CLK_CTRL_IPP_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_IPP_ON 0x1000000000000000ULL +#define ROGUE_CR_CLK_CTRL_IPP_AUTO 0x2000000000000000ULL +#define ROGUE_CR_CLK_CTRL_FBC_SHIFT 58U +#define ROGUE_CR_CLK_CTRL_FBC_CLRMSK 0xF3FFFFFFFFFFFFFFULL +#define ROGUE_CR_CLK_CTRL_FBC_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_FBC_ON 0x0400000000000000ULL +#define ROGUE_CR_CLK_CTRL_FBC_AUTO 0x0800000000000000ULL +#define ROGUE_CR_CLK_CTRL_FBDC_SHIFT 56U +#define ROGUE_CR_CLK_CTRL_FBDC_CLRMSK 0xFCFFFFFFFFFFFFFFULL +#define ROGUE_CR_CLK_CTRL_FBDC_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_FBDC_ON 0x0100000000000000ULL +#define ROGUE_CR_CLK_CTRL_FBDC_AUTO 0x0200000000000000ULL +#define ROGUE_CR_CLK_CTRL_FB_TLCACHE_SHIFT 54U +#define ROGUE_CR_CLK_CTRL_FB_TLCACHE_CLRMSK 0xFF3FFFFFFFFFFFFFULL +#define ROGUE_CR_CLK_CTRL_FB_TLCACHE_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_FB_TLCACHE_ON 0x0040000000000000ULL +#define ROGUE_CR_CLK_CTRL_FB_TLCACHE_AUTO 0x0080000000000000ULL +#define ROGUE_CR_CLK_CTRL_USCS_SHIFT 52U +#define ROGUE_CR_CLK_CTRL_USCS_CLRMSK 0xFFCFFFFFFFFFFFFFULL +#define ROGUE_CR_CLK_CTRL_USCS_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_USCS_ON 0x0010000000000000ULL +#define ROGUE_CR_CLK_CTRL_USCS_AUTO 0x0020000000000000ULL +#define ROGUE_CR_CLK_CTRL_PBE_SHIFT 50U +#define ROGUE_CR_CLK_CTRL_PBE_CLRMSK 0xFFF3FFFFFFFFFFFFULL +#define ROGUE_CR_CLK_CTRL_PBE_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_PBE_ON 0x0004000000000000ULL +#define ROGUE_CR_CLK_CTRL_PBE_AUTO 0x0008000000000000ULL +#define ROGUE_CR_CLK_CTRL_MCU_L1_SHIFT 48U +#define ROGUE_CR_CLK_CTRL_MCU_L1_CLRMSK 0xFFFCFFFFFFFFFFFFULL +#define ROGUE_CR_CLK_CTRL_MCU_L1_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_MCU_L1_ON 0x0001000000000000ULL +#define ROGUE_CR_CLK_CTRL_MCU_L1_AUTO 0x0002000000000000ULL +#define ROGUE_CR_CLK_CTRL_CDM_SHIFT 46U +#define ROGUE_CR_CLK_CTRL_CDM_CLRMSK 0xFFFF3FFFFFFFFFFFULL +#define ROGUE_CR_CLK_CTRL_CDM_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_CDM_ON 0x0000400000000000ULL +#define ROGUE_CR_CLK_CTRL_CDM_AUTO 0x0000800000000000ULL +#define ROGUE_CR_CLK_CTRL_SIDEKICK_SHIFT 44U +#define ROGUE_CR_CLK_CTRL_SIDEKICK_CLRMSK 0xFFFFCFFFFFFFFFFFULL +#define ROGUE_CR_CLK_CTRL_SIDEKICK_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_SIDEKICK_ON 0x0000100000000000ULL +#define ROGUE_CR_CLK_CTRL_SIDEKICK_AUTO 0x0000200000000000ULL +#define ROGUE_CR_CLK_CTRL_BIF_SIDEKICK_SHIFT 42U +#define ROGUE_CR_CLK_CTRL_BIF_SIDEKICK_CLRMSK 0xFFFFF3FFFFFFFFFFULL +#define ROGUE_CR_CLK_CTRL_BIF_SIDEKICK_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_BIF_SIDEKICK_ON 0x0000040000000000ULL +#define ROGUE_CR_CLK_CTRL_BIF_SIDEKICK_AUTO 0x0000080000000000ULL +#define ROGUE_CR_CLK_CTRL_BIF_SHIFT 40U +#define ROGUE_CR_CLK_CTRL_BIF_CLRMSK 0xFFFFFCFFFFFFFFFFULL +#define ROGUE_CR_CLK_CTRL_BIF_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_BIF_ON 0x0000010000000000ULL +#define ROGUE_CR_CLK_CTRL_BIF_AUTO 0x0000020000000000ULL +#define ROGUE_CR_CLK_CTRL_TPU_MCU_DEMUX_SHIFT 28U +#define ROGUE_CR_CLK_CTRL_TPU_MCU_DEMUX_CLRMSK 0xFFFFFFFFCFFFFFFFULL +#define ROGUE_CR_CLK_CTRL_TPU_MCU_DEMUX_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_TPU_MCU_DEMUX_ON 0x0000000010000000ULL +#define ROGUE_CR_CLK_CTRL_TPU_MCU_DEMUX_AUTO 0x0000000020000000ULL +#define ROGUE_CR_CLK_CTRL_MCU_L0_SHIFT 26U +#define ROGUE_CR_CLK_CTRL_MCU_L0_CLRMSK 0xFFFFFFFFF3FFFFFFULL +#define ROGUE_CR_CLK_CTRL_MCU_L0_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_MCU_L0_ON 0x0000000004000000ULL +#define ROGUE_CR_CLK_CTRL_MCU_L0_AUTO 0x0000000008000000ULL +#define ROGUE_CR_CLK_CTRL_TPU_SHIFT 24U +#define ROGUE_CR_CLK_CTRL_TPU_CLRMSK 0xFFFFFFFFFCFFFFFFULL +#define ROGUE_CR_CLK_CTRL_TPU_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_TPU_ON 0x0000000001000000ULL +#define ROGUE_CR_CLK_CTRL_TPU_AUTO 0x0000000002000000ULL +#define ROGUE_CR_CLK_CTRL_USC_SHIFT 20U +#define ROGUE_CR_CLK_CTRL_USC_CLRMSK 0xFFFFFFFFFFCFFFFFULL +#define ROGUE_CR_CLK_CTRL_USC_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_USC_ON 0x0000000000100000ULL +#define ROGUE_CR_CLK_CTRL_USC_AUTO 0x0000000000200000ULL +#define ROGUE_CR_CLK_CTRL_TLA_SHIFT 18U +#define ROGUE_CR_CLK_CTRL_TLA_CLRMSK 0xFFFFFFFFFFF3FFFFULL +#define ROGUE_CR_CLK_CTRL_TLA_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_TLA_ON 0x0000000000040000ULL +#define ROGUE_CR_CLK_CTRL_TLA_AUTO 0x0000000000080000ULL +#define ROGUE_CR_CLK_CTRL_SLC_SHIFT 16U +#define ROGUE_CR_CLK_CTRL_SLC_CLRMSK 0xFFFFFFFFFFFCFFFFULL +#define ROGUE_CR_CLK_CTRL_SLC_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_SLC_ON 0x0000000000010000ULL +#define ROGUE_CR_CLK_CTRL_SLC_AUTO 0x0000000000020000ULL +#define ROGUE_CR_CLK_CTRL_UVS_SHIFT 14U +#define ROGUE_CR_CLK_CTRL_UVS_CLRMSK 0xFFFFFFFFFFFF3FFFULL +#define ROGUE_CR_CLK_CTRL_UVS_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_UVS_ON 0x0000000000004000ULL +#define ROGUE_CR_CLK_CTRL_UVS_AUTO 0x0000000000008000ULL +#define ROGUE_CR_CLK_CTRL_PDS_SHIFT 12U +#define ROGUE_CR_CLK_CTRL_PDS_CLRMSK 0xFFFFFFFFFFFFCFFFULL +#define ROGUE_CR_CLK_CTRL_PDS_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_PDS_ON 0x0000000000001000ULL +#define ROGUE_CR_CLK_CTRL_PDS_AUTO 0x0000000000002000ULL +#define ROGUE_CR_CLK_CTRL_VDM_SHIFT 10U +#define ROGUE_CR_CLK_CTRL_VDM_CLRMSK 0xFFFFFFFFFFFFF3FFULL +#define ROGUE_CR_CLK_CTRL_VDM_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_VDM_ON 0x0000000000000400ULL +#define ROGUE_CR_CLK_CTRL_VDM_AUTO 0x0000000000000800ULL +#define ROGUE_CR_CLK_CTRL_PM_SHIFT 8U +#define ROGUE_CR_CLK_CTRL_PM_CLRMSK 0xFFFFFFFFFFFFFCFFULL +#define ROGUE_CR_CLK_CTRL_PM_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_PM_ON 0x0000000000000100ULL +#define ROGUE_CR_CLK_CTRL_PM_AUTO 0x0000000000000200ULL +#define ROGUE_CR_CLK_CTRL_GPP_SHIFT 6U +#define ROGUE_CR_CLK_CTRL_GPP_CLRMSK 0xFFFFFFFFFFFFFF3FULL +#define ROGUE_CR_CLK_CTRL_GPP_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_GPP_ON 0x0000000000000040ULL +#define ROGUE_CR_CLK_CTRL_GPP_AUTO 0x0000000000000080ULL +#define ROGUE_CR_CLK_CTRL_TE_SHIFT 4U +#define ROGUE_CR_CLK_CTRL_TE_CLRMSK 0xFFFFFFFFFFFFFFCFULL +#define ROGUE_CR_CLK_CTRL_TE_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_TE_ON 0x0000000000000010ULL +#define ROGUE_CR_CLK_CTRL_TE_AUTO 0x0000000000000020ULL +#define ROGUE_CR_CLK_CTRL_TSP_SHIFT 2U +#define ROGUE_CR_CLK_CTRL_TSP_CLRMSK 0xFFFFFFFFFFFFFFF3ULL +#define ROGUE_CR_CLK_CTRL_TSP_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_TSP_ON 0x0000000000000004ULL +#define ROGUE_CR_CLK_CTRL_TSP_AUTO 0x0000000000000008ULL +#define ROGUE_CR_CLK_CTRL_ISP_SHIFT 0U +#define ROGUE_CR_CLK_CTRL_ISP_CLRMSK 0xFFFFFFFFFFFFFFFCULL +#define ROGUE_CR_CLK_CTRL_ISP_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL_ISP_ON 0x0000000000000001ULL +#define ROGUE_CR_CLK_CTRL_ISP_AUTO 0x0000000000000002ULL + +/* Register ROGUE_CR_CLK_STATUS */ +#define ROGUE_CR_CLK_STATUS 0x0008U +#define ROGUE_CR_CLK_STATUS__PBE2_XE__MASKFULL 0x00000001FFF077FFULL +#define ROGUE_CR_CLK_STATUS__S7_TOP__MASKFULL 0x00000001B3101773ULL +#define ROGUE_CR_CLK_STATUS_MASKFULL 0x00000001FFF077FFULL +#define ROGUE_CR_CLK_STATUS_MCU_FBTC_SHIFT 32U +#define ROGUE_CR_CLK_STATUS_MCU_FBTC_CLRMSK 0xFFFFFFFEFFFFFFFFULL +#define ROGUE_CR_CLK_STATUS_MCU_FBTC_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_MCU_FBTC_RUNNING 0x0000000100000000ULL +#define ROGUE_CR_CLK_STATUS_BIF_TEXAS_SHIFT 31U +#define ROGUE_CR_CLK_STATUS_BIF_TEXAS_CLRMSK 0xFFFFFFFF7FFFFFFFULL +#define ROGUE_CR_CLK_STATUS_BIF_TEXAS_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_BIF_TEXAS_RUNNING 0x0000000080000000ULL +#define ROGUE_CR_CLK_STATUS_IPP_SHIFT 30U +#define ROGUE_CR_CLK_STATUS_IPP_CLRMSK 0xFFFFFFFFBFFFFFFFULL +#define ROGUE_CR_CLK_STATUS_IPP_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_IPP_RUNNING 0x0000000040000000ULL +#define ROGUE_CR_CLK_STATUS_FBC_SHIFT 29U +#define ROGUE_CR_CLK_STATUS_FBC_CLRMSK 0xFFFFFFFFDFFFFFFFULL +#define ROGUE_CR_CLK_STATUS_FBC_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_FBC_RUNNING 0x0000000020000000ULL +#define ROGUE_CR_CLK_STATUS_FBDC_SHIFT 28U +#define ROGUE_CR_CLK_STATUS_FBDC_CLRMSK 0xFFFFFFFFEFFFFFFFULL +#define ROGUE_CR_CLK_STATUS_FBDC_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_FBDC_RUNNING 0x0000000010000000ULL +#define ROGUE_CR_CLK_STATUS_FB_TLCACHE_SHIFT 27U +#define ROGUE_CR_CLK_STATUS_FB_TLCACHE_CLRMSK 0xFFFFFFFFF7FFFFFFULL +#define ROGUE_CR_CLK_STATUS_FB_TLCACHE_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_FB_TLCACHE_RUNNING 0x0000000008000000ULL +#define ROGUE_CR_CLK_STATUS_USCS_SHIFT 26U +#define ROGUE_CR_CLK_STATUS_USCS_CLRMSK 0xFFFFFFFFFBFFFFFFULL +#define ROGUE_CR_CLK_STATUS_USCS_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_USCS_RUNNING 0x0000000004000000ULL +#define ROGUE_CR_CLK_STATUS_PBE_SHIFT 25U +#define ROGUE_CR_CLK_STATUS_PBE_CLRMSK 0xFFFFFFFFFDFFFFFFULL +#define ROGUE_CR_CLK_STATUS_PBE_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_PBE_RUNNING 0x0000000002000000ULL +#define ROGUE_CR_CLK_STATUS_MCU_L1_SHIFT 24U +#define ROGUE_CR_CLK_STATUS_MCU_L1_CLRMSK 0xFFFFFFFFFEFFFFFFULL +#define ROGUE_CR_CLK_STATUS_MCU_L1_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_MCU_L1_RUNNING 0x0000000001000000ULL +#define ROGUE_CR_CLK_STATUS_CDM_SHIFT 23U +#define ROGUE_CR_CLK_STATUS_CDM_CLRMSK 0xFFFFFFFFFF7FFFFFULL +#define ROGUE_CR_CLK_STATUS_CDM_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_CDM_RUNNING 0x0000000000800000ULL +#define ROGUE_CR_CLK_STATUS_SIDEKICK_SHIFT 22U +#define ROGUE_CR_CLK_STATUS_SIDEKICK_CLRMSK 0xFFFFFFFFFFBFFFFFULL +#define ROGUE_CR_CLK_STATUS_SIDEKICK_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_SIDEKICK_RUNNING 0x0000000000400000ULL +#define ROGUE_CR_CLK_STATUS_BIF_SIDEKICK_SHIFT 21U +#define ROGUE_CR_CLK_STATUS_BIF_SIDEKICK_CLRMSK 0xFFFFFFFFFFDFFFFFULL +#define ROGUE_CR_CLK_STATUS_BIF_SIDEKICK_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_BIF_SIDEKICK_RUNNING 0x0000000000200000ULL +#define ROGUE_CR_CLK_STATUS_BIF_SHIFT 20U +#define ROGUE_CR_CLK_STATUS_BIF_CLRMSK 0xFFFFFFFFFFEFFFFFULL +#define ROGUE_CR_CLK_STATUS_BIF_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_BIF_RUNNING 0x0000000000100000ULL +#define ROGUE_CR_CLK_STATUS_TPU_MCU_DEMUX_SHIFT 14U +#define ROGUE_CR_CLK_STATUS_TPU_MCU_DEMUX_CLRMSK 0xFFFFFFFFFFFFBFFFULL +#define ROGUE_CR_CLK_STATUS_TPU_MCU_DEMUX_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_TPU_MCU_DEMUX_RUNNING 0x0000000000004000ULL +#define ROGUE_CR_CLK_STATUS_MCU_L0_SHIFT 13U +#define ROGUE_CR_CLK_STATUS_MCU_L0_CLRMSK 0xFFFFFFFFFFFFDFFFULL +#define ROGUE_CR_CLK_STATUS_MCU_L0_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_MCU_L0_RUNNING 0x0000000000002000ULL +#define ROGUE_CR_CLK_STATUS_TPU_SHIFT 12U +#define ROGUE_CR_CLK_STATUS_TPU_CLRMSK 0xFFFFFFFFFFFFEFFFULL +#define ROGUE_CR_CLK_STATUS_TPU_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_TPU_RUNNING 0x0000000000001000ULL +#define ROGUE_CR_CLK_STATUS_USC_SHIFT 10U +#define ROGUE_CR_CLK_STATUS_USC_CLRMSK 0xFFFFFFFFFFFFFBFFULL +#define ROGUE_CR_CLK_STATUS_USC_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_USC_RUNNING 0x0000000000000400ULL +#define ROGUE_CR_CLK_STATUS_TLA_SHIFT 9U +#define ROGUE_CR_CLK_STATUS_TLA_CLRMSK 0xFFFFFFFFFFFFFDFFULL +#define ROGUE_CR_CLK_STATUS_TLA_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_TLA_RUNNING 0x0000000000000200ULL +#define ROGUE_CR_CLK_STATUS_SLC_SHIFT 8U +#define ROGUE_CR_CLK_STATUS_SLC_CLRMSK 0xFFFFFFFFFFFFFEFFULL +#define ROGUE_CR_CLK_STATUS_SLC_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_SLC_RUNNING 0x0000000000000100ULL +#define ROGUE_CR_CLK_STATUS_UVS_SHIFT 7U +#define ROGUE_CR_CLK_STATUS_UVS_CLRMSK 0xFFFFFFFFFFFFFF7FULL +#define ROGUE_CR_CLK_STATUS_UVS_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_UVS_RUNNING 0x0000000000000080ULL +#define ROGUE_CR_CLK_STATUS_PDS_SHIFT 6U +#define ROGUE_CR_CLK_STATUS_PDS_CLRMSK 0xFFFFFFFFFFFFFFBFULL +#define ROGUE_CR_CLK_STATUS_PDS_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_PDS_RUNNING 0x0000000000000040ULL +#define ROGUE_CR_CLK_STATUS_VDM_SHIFT 5U +#define ROGUE_CR_CLK_STATUS_VDM_CLRMSK 0xFFFFFFFFFFFFFFDFULL +#define ROGUE_CR_CLK_STATUS_VDM_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_VDM_RUNNING 0x0000000000000020ULL +#define ROGUE_CR_CLK_STATUS_PM_SHIFT 4U +#define ROGUE_CR_CLK_STATUS_PM_CLRMSK 0xFFFFFFFFFFFFFFEFULL +#define ROGUE_CR_CLK_STATUS_PM_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_PM_RUNNING 0x0000000000000010ULL +#define ROGUE_CR_CLK_STATUS_GPP_SHIFT 3U +#define ROGUE_CR_CLK_STATUS_GPP_CLRMSK 0xFFFFFFFFFFFFFFF7ULL +#define ROGUE_CR_CLK_STATUS_GPP_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_GPP_RUNNING 0x0000000000000008ULL +#define ROGUE_CR_CLK_STATUS_TE_SHIFT 2U +#define ROGUE_CR_CLK_STATUS_TE_CLRMSK 0xFFFFFFFFFFFFFFFBULL +#define ROGUE_CR_CLK_STATUS_TE_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_TE_RUNNING 0x0000000000000004ULL +#define ROGUE_CR_CLK_STATUS_TSP_SHIFT 1U +#define ROGUE_CR_CLK_STATUS_TSP_CLRMSK 0xFFFFFFFFFFFFFFFDULL +#define ROGUE_CR_CLK_STATUS_TSP_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_TSP_RUNNING 0x0000000000000002ULL +#define ROGUE_CR_CLK_STATUS_ISP_SHIFT 0U +#define ROGUE_CR_CLK_STATUS_ISP_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_CLK_STATUS_ISP_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS_ISP_RUNNING 0x0000000000000001ULL + +/* Register ROGUE_CR_CORE_ID */ +#define ROGUE_CR_CORE_ID__PBVNC 0x0020U +#define ROGUE_CR_CORE_ID__PBVNC__MASKFULL 0xFFFFFFFFFFFFFFFFULL +#define ROGUE_CR_CORE_ID__PBVNC__BRANCH_ID_SHIFT 48U +#define ROGUE_CR_CORE_ID__PBVNC__BRANCH_ID_CLRMSK 0x0000FFFFFFFFFFFFULL +#define ROGUE_CR_CORE_ID__PBVNC__VERSION_ID_SHIFT 32U +#define ROGUE_CR_CORE_ID__PBVNC__VERSION_ID_CLRMSK 0xFFFF0000FFFFFFFFULL +#define ROGUE_CR_CORE_ID__PBVNC__NUMBER_OF_SCALABLE_UNITS_SHIFT 16U +#define ROGUE_CR_CORE_ID__PBVNC__NUMBER_OF_SCALABLE_UNITS_CLRMSK 0xFFFFFFFF0000FFFFULL +#define ROGUE_CR_CORE_ID__PBVNC__CONFIG_ID_SHIFT 0U +#define ROGUE_CR_CORE_ID__PBVNC__CONFIG_ID_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register ROGUE_CR_CORE_ID */ +#define ROGUE_CR_CORE_ID 0x0018U +#define ROGUE_CR_CORE_ID_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_CORE_ID_ID_SHIFT 16U +#define ROGUE_CR_CORE_ID_ID_CLRMSK 0x0000FFFFU +#define ROGUE_CR_CORE_ID_CONFIG_SHIFT 0U +#define ROGUE_CR_CORE_ID_CONFIG_CLRMSK 0xFFFF0000U + +/* Register ROGUE_CR_CORE_REVISION */ +#define ROGUE_CR_CORE_REVISION 0x0020U +#define ROGUE_CR_CORE_REVISION_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_CORE_REVISION_DESIGNER_SHIFT 24U +#define ROGUE_CR_CORE_REVISION_DESIGNER_CLRMSK 0x00FFFFFFU +#define ROGUE_CR_CORE_REVISION_MAJOR_SHIFT 16U +#define ROGUE_CR_CORE_REVISION_MAJOR_CLRMSK 0xFF00FFFFU +#define ROGUE_CR_CORE_REVISION_MINOR_SHIFT 8U +#define ROGUE_CR_CORE_REVISION_MINOR_CLRMSK 0xFFFF00FFU +#define ROGUE_CR_CORE_REVISION_MAINTENANCE_SHIFT 0U +#define ROGUE_CR_CORE_REVISION_MAINTENANCE_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_DESIGNER_REV_FIELD1 */ +#define ROGUE_CR_DESIGNER_REV_FIELD1 0x0028U +#define ROGUE_CR_DESIGNER_REV_FIELD1_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_DESIGNER_REV_FIELD1_DESIGNER_REV_FIELD1_SHIFT 0U +#define ROGUE_CR_DESIGNER_REV_FIELD1_DESIGNER_REV_FIELD1_CLRMSK 0x00000000U + +/* Register ROGUE_CR_DESIGNER_REV_FIELD2 */ +#define ROGUE_CR_DESIGNER_REV_FIELD2 0x0030U +#define ROGUE_CR_DESIGNER_REV_FIELD2_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_DESIGNER_REV_FIELD2_DESIGNER_REV_FIELD2_SHIFT 0U +#define ROGUE_CR_DESIGNER_REV_FIELD2_DESIGNER_REV_FIELD2_CLRMSK 0x00000000U + +/* Register ROGUE_CR_CHANGESET_NUMBER */ +#define ROGUE_CR_CHANGESET_NUMBER 0x0040U +#define ROGUE_CR_CHANGESET_NUMBER_MASKFULL 0xFFFFFFFFFFFFFFFFULL +#define ROGUE_CR_CHANGESET_NUMBER_CHANGESET_NUMBER_SHIFT 0U +#define ROGUE_CR_CHANGESET_NUMBER_CHANGESET_NUMBER_CLRMSK 0x0000000000000000ULL + +/* Register ROGUE_CR_CLK_XTPLUS_CTRL */ +#define ROGUE_CR_CLK_XTPLUS_CTRL 0x0080U +#define ROGUE_CR_CLK_XTPLUS_CTRL_MASKFULL 0x0000003FFFFF0000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_TDM_SHIFT 36U +#define ROGUE_CR_CLK_XTPLUS_CTRL_TDM_CLRMSK 0xFFFFFFCFFFFFFFFFULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_TDM_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_TDM_ON 0x0000001000000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_TDM_AUTO 0x0000002000000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_ASTC_SHIFT 34U +#define ROGUE_CR_CLK_XTPLUS_CTRL_ASTC_CLRMSK 0xFFFFFFF3FFFFFFFFULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_ASTC_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_ASTC_ON 0x0000000400000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_ASTC_AUTO 0x0000000800000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_IPF_SHIFT 32U +#define ROGUE_CR_CLK_XTPLUS_CTRL_IPF_CLRMSK 0xFFFFFFFCFFFFFFFFULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_IPF_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_IPF_ON 0x0000000100000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_IPF_AUTO 0x0000000200000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_COMPUTE_SHIFT 30U +#define ROGUE_CR_CLK_XTPLUS_CTRL_COMPUTE_CLRMSK 0xFFFFFFFF3FFFFFFFULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_COMPUTE_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_COMPUTE_ON 0x0000000040000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_COMPUTE_AUTO 0x0000000080000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_PIXEL_SHIFT 28U +#define ROGUE_CR_CLK_XTPLUS_CTRL_PIXEL_CLRMSK 0xFFFFFFFFCFFFFFFFULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_PIXEL_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_PIXEL_ON 0x0000000010000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_PIXEL_AUTO 0x0000000020000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_VERTEX_SHIFT 26U +#define ROGUE_CR_CLK_XTPLUS_CTRL_VERTEX_CLRMSK 0xFFFFFFFFF3FFFFFFULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_VERTEX_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_VERTEX_ON 0x0000000004000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_VERTEX_AUTO 0x0000000008000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_USCPS_SHIFT 24U +#define ROGUE_CR_CLK_XTPLUS_CTRL_USCPS_CLRMSK 0xFFFFFFFFFCFFFFFFULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_USCPS_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_USCPS_ON 0x0000000001000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_USCPS_AUTO 0x0000000002000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_PDS_SHARED_SHIFT 22U +#define ROGUE_CR_CLK_XTPLUS_CTRL_PDS_SHARED_CLRMSK 0xFFFFFFFFFF3FFFFFULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_PDS_SHARED_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_PDS_SHARED_ON 0x0000000000400000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_PDS_SHARED_AUTO 0x0000000000800000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_BIF_BLACKPEARL_SHIFT 20U +#define ROGUE_CR_CLK_XTPLUS_CTRL_BIF_BLACKPEARL_CLRMSK 0xFFFFFFFFFFCFFFFFULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_BIF_BLACKPEARL_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_BIF_BLACKPEARL_ON 0x0000000000100000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_BIF_BLACKPEARL_AUTO 0x0000000000200000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_USC_SHARED_SHIFT 18U +#define ROGUE_CR_CLK_XTPLUS_CTRL_USC_SHARED_CLRMSK 0xFFFFFFFFFFF3FFFFULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_USC_SHARED_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_USC_SHARED_ON 0x0000000000040000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_USC_SHARED_AUTO 0x0000000000080000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_GEOMETRY_SHIFT 16U +#define ROGUE_CR_CLK_XTPLUS_CTRL_GEOMETRY_CLRMSK 0xFFFFFFFFFFFCFFFFULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_GEOMETRY_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_GEOMETRY_ON 0x0000000000010000ULL +#define ROGUE_CR_CLK_XTPLUS_CTRL_GEOMETRY_AUTO 0x0000000000020000ULL + +/* Register ROGUE_CR_CLK_XTPLUS_STATUS */ +#define ROGUE_CR_CLK_XTPLUS_STATUS 0x0088U +#define ROGUE_CR_CLK_XTPLUS_STATUS_MASKFULL 0x00000000000007FFULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_TDM_SHIFT 10U +#define ROGUE_CR_CLK_XTPLUS_STATUS_TDM_CLRMSK 0xFFFFFFFFFFFFFBFFULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_TDM_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_TDM_RUNNING 0x0000000000000400ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_IPF_SHIFT 9U +#define ROGUE_CR_CLK_XTPLUS_STATUS_IPF_CLRMSK 0xFFFFFFFFFFFFFDFFULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_IPF_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_IPF_RUNNING 0x0000000000000200ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_COMPUTE_SHIFT 8U +#define ROGUE_CR_CLK_XTPLUS_STATUS_COMPUTE_CLRMSK 0xFFFFFFFFFFFFFEFFULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_COMPUTE_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_COMPUTE_RUNNING 0x0000000000000100ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_ASTC_SHIFT 7U +#define ROGUE_CR_CLK_XTPLUS_STATUS_ASTC_CLRMSK 0xFFFFFFFFFFFFFF7FULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_ASTC_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_ASTC_RUNNING 0x0000000000000080ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_PIXEL_SHIFT 6U +#define ROGUE_CR_CLK_XTPLUS_STATUS_PIXEL_CLRMSK 0xFFFFFFFFFFFFFFBFULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_PIXEL_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_PIXEL_RUNNING 0x0000000000000040ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_VERTEX_SHIFT 5U +#define ROGUE_CR_CLK_XTPLUS_STATUS_VERTEX_CLRMSK 0xFFFFFFFFFFFFFFDFULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_VERTEX_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_VERTEX_RUNNING 0x0000000000000020ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_USCPS_SHIFT 4U +#define ROGUE_CR_CLK_XTPLUS_STATUS_USCPS_CLRMSK 0xFFFFFFFFFFFFFFEFULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_USCPS_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_USCPS_RUNNING 0x0000000000000010ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_PDS_SHARED_SHIFT 3U +#define ROGUE_CR_CLK_XTPLUS_STATUS_PDS_SHARED_CLRMSK 0xFFFFFFFFFFFFFFF7ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_PDS_SHARED_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_PDS_SHARED_RUNNING 0x0000000000000008ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_BIF_BLACKPEARL_SHIFT 2U +#define ROGUE_CR_CLK_XTPLUS_STATUS_BIF_BLACKPEARL_CLRMSK 0xFFFFFFFFFFFFFFFBULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_BIF_BLACKPEARL_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_BIF_BLACKPEARL_RUNNING 0x0000000000000004ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_USC_SHARED_SHIFT 1U +#define ROGUE_CR_CLK_XTPLUS_STATUS_USC_SHARED_CLRMSK 0xFFFFFFFFFFFFFFFDULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_USC_SHARED_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_USC_SHARED_RUNNING 0x0000000000000002ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_GEOMETRY_SHIFT 0U +#define ROGUE_CR_CLK_XTPLUS_STATUS_GEOMETRY_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_GEOMETRY_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_XTPLUS_STATUS_GEOMETRY_RUNNING 0x0000000000000001ULL + +/* Register ROGUE_CR_SOFT_RESET */ +#define ROGUE_CR_SOFT_RESET 0x0100U +#define ROGUE_CR_SOFT_RESET__PBE2_XE__MASKFULL 0xFFEFFFFFFFFFFC3DULL +#define ROGUE_CR_SOFT_RESET_MASKFULL 0x00E7FFFFFFFFFC3DULL +#define ROGUE_CR_SOFT_RESET_PHANTOM3_CORE_SHIFT 63U +#define ROGUE_CR_SOFT_RESET_PHANTOM3_CORE_CLRMSK 0x7FFFFFFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_PHANTOM3_CORE_EN 0x8000000000000000ULL +#define ROGUE_CR_SOFT_RESET_PHANTOM2_CORE_SHIFT 62U +#define ROGUE_CR_SOFT_RESET_PHANTOM2_CORE_CLRMSK 0xBFFFFFFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_PHANTOM2_CORE_EN 0x4000000000000000ULL +#define ROGUE_CR_SOFT_RESET_BERNADO2_CORE_SHIFT 61U +#define ROGUE_CR_SOFT_RESET_BERNADO2_CORE_CLRMSK 0xDFFFFFFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_BERNADO2_CORE_EN 0x2000000000000000ULL +#define ROGUE_CR_SOFT_RESET_JONES_CORE_SHIFT 60U +#define ROGUE_CR_SOFT_RESET_JONES_CORE_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_JONES_CORE_EN 0x1000000000000000ULL +#define ROGUE_CR_SOFT_RESET_TILING_CORE_SHIFT 59U +#define ROGUE_CR_SOFT_RESET_TILING_CORE_CLRMSK 0xF7FFFFFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_TILING_CORE_EN 0x0800000000000000ULL +#define ROGUE_CR_SOFT_RESET_TE3_SHIFT 58U +#define ROGUE_CR_SOFT_RESET_TE3_CLRMSK 0xFBFFFFFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_TE3_EN 0x0400000000000000ULL +#define ROGUE_CR_SOFT_RESET_VCE_SHIFT 57U +#define ROGUE_CR_SOFT_RESET_VCE_CLRMSK 0xFDFFFFFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_VCE_EN 0x0200000000000000ULL +#define ROGUE_CR_SOFT_RESET_VBS_SHIFT 56U +#define ROGUE_CR_SOFT_RESET_VBS_CLRMSK 0xFEFFFFFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_VBS_EN 0x0100000000000000ULL +#define ROGUE_CR_SOFT_RESET_DPX1_CORE_SHIFT 55U +#define ROGUE_CR_SOFT_RESET_DPX1_CORE_CLRMSK 0xFF7FFFFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_DPX1_CORE_EN 0x0080000000000000ULL +#define ROGUE_CR_SOFT_RESET_DPX0_CORE_SHIFT 54U +#define ROGUE_CR_SOFT_RESET_DPX0_CORE_CLRMSK 0xFFBFFFFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_DPX0_CORE_EN 0x0040000000000000ULL +#define ROGUE_CR_SOFT_RESET_FBA_SHIFT 53U +#define ROGUE_CR_SOFT_RESET_FBA_CLRMSK 0xFFDFFFFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_FBA_EN 0x0020000000000000ULL +#define ROGUE_CR_SOFT_RESET_FB_CDC_SHIFT 51U +#define ROGUE_CR_SOFT_RESET_FB_CDC_CLRMSK 0xFFF7FFFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_FB_CDC_EN 0x0008000000000000ULL +#define ROGUE_CR_SOFT_RESET_SH_SHIFT 50U +#define ROGUE_CR_SOFT_RESET_SH_CLRMSK 0xFFFBFFFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_SH_EN 0x0004000000000000ULL +#define ROGUE_CR_SOFT_RESET_VRDM_SHIFT 49U +#define ROGUE_CR_SOFT_RESET_VRDM_CLRMSK 0xFFFDFFFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_VRDM_EN 0x0002000000000000ULL +#define ROGUE_CR_SOFT_RESET_MCU_FBTC_SHIFT 48U +#define ROGUE_CR_SOFT_RESET_MCU_FBTC_CLRMSK 0xFFFEFFFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_MCU_FBTC_EN 0x0001000000000000ULL +#define ROGUE_CR_SOFT_RESET_PHANTOM1_CORE_SHIFT 47U +#define ROGUE_CR_SOFT_RESET_PHANTOM1_CORE_CLRMSK 0xFFFF7FFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_PHANTOM1_CORE_EN 0x0000800000000000ULL +#define ROGUE_CR_SOFT_RESET_PHANTOM0_CORE_SHIFT 46U +#define ROGUE_CR_SOFT_RESET_PHANTOM0_CORE_CLRMSK 0xFFFFBFFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_PHANTOM0_CORE_EN 0x0000400000000000ULL +#define ROGUE_CR_SOFT_RESET_BERNADO1_CORE_SHIFT 45U +#define ROGUE_CR_SOFT_RESET_BERNADO1_CORE_CLRMSK 0xFFFFDFFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_BERNADO1_CORE_EN 0x0000200000000000ULL +#define ROGUE_CR_SOFT_RESET_BERNADO0_CORE_SHIFT 44U +#define ROGUE_CR_SOFT_RESET_BERNADO0_CORE_CLRMSK 0xFFFFEFFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_BERNADO0_CORE_EN 0x0000100000000000ULL +#define ROGUE_CR_SOFT_RESET_IPP_SHIFT 43U +#define ROGUE_CR_SOFT_RESET_IPP_CLRMSK 0xFFFFF7FFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_IPP_EN 0x0000080000000000ULL +#define ROGUE_CR_SOFT_RESET_BIF_TEXAS_SHIFT 42U +#define ROGUE_CR_SOFT_RESET_BIF_TEXAS_CLRMSK 0xFFFFFBFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_BIF_TEXAS_EN 0x0000040000000000ULL +#define ROGUE_CR_SOFT_RESET_TORNADO_CORE_SHIFT 41U +#define ROGUE_CR_SOFT_RESET_TORNADO_CORE_CLRMSK 0xFFFFFDFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_TORNADO_CORE_EN 0x0000020000000000ULL +#define ROGUE_CR_SOFT_RESET_DUST_H_CORE_SHIFT 40U +#define ROGUE_CR_SOFT_RESET_DUST_H_CORE_CLRMSK 0xFFFFFEFFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_DUST_H_CORE_EN 0x0000010000000000ULL +#define ROGUE_CR_SOFT_RESET_DUST_G_CORE_SHIFT 39U +#define ROGUE_CR_SOFT_RESET_DUST_G_CORE_CLRMSK 0xFFFFFF7FFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_DUST_G_CORE_EN 0x0000008000000000ULL +#define ROGUE_CR_SOFT_RESET_DUST_F_CORE_SHIFT 38U +#define ROGUE_CR_SOFT_RESET_DUST_F_CORE_CLRMSK 0xFFFFFFBFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_DUST_F_CORE_EN 0x0000004000000000ULL +#define ROGUE_CR_SOFT_RESET_DUST_E_CORE_SHIFT 37U +#define ROGUE_CR_SOFT_RESET_DUST_E_CORE_CLRMSK 0xFFFFFFDFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_DUST_E_CORE_EN 0x0000002000000000ULL +#define ROGUE_CR_SOFT_RESET_DUST_D_CORE_SHIFT 36U +#define ROGUE_CR_SOFT_RESET_DUST_D_CORE_CLRMSK 0xFFFFFFEFFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_DUST_D_CORE_EN 0x0000001000000000ULL +#define ROGUE_CR_SOFT_RESET_DUST_C_CORE_SHIFT 35U +#define ROGUE_CR_SOFT_RESET_DUST_C_CORE_CLRMSK 0xFFFFFFF7FFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_DUST_C_CORE_EN 0x0000000800000000ULL +#define ROGUE_CR_SOFT_RESET_MMU_SHIFT 34U +#define ROGUE_CR_SOFT_RESET_MMU_CLRMSK 0xFFFFFFFBFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_MMU_EN 0x0000000400000000ULL +#define ROGUE_CR_SOFT_RESET_BIF1_SHIFT 33U +#define ROGUE_CR_SOFT_RESET_BIF1_CLRMSK 0xFFFFFFFDFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_BIF1_EN 0x0000000200000000ULL +#define ROGUE_CR_SOFT_RESET_GARTEN_SHIFT 32U +#define ROGUE_CR_SOFT_RESET_GARTEN_CLRMSK 0xFFFFFFFEFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_GARTEN_EN 0x0000000100000000ULL +#define ROGUE_CR_SOFT_RESET_CPU_SHIFT 32U +#define ROGUE_CR_SOFT_RESET_CPU_CLRMSK 0xFFFFFFFEFFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_CPU_EN 0x0000000100000000ULL +#define ROGUE_CR_SOFT_RESET_RASCAL_CORE_SHIFT 31U +#define ROGUE_CR_SOFT_RESET_RASCAL_CORE_CLRMSK 0xFFFFFFFF7FFFFFFFULL +#define ROGUE_CR_SOFT_RESET_RASCAL_CORE_EN 0x0000000080000000ULL +#define ROGUE_CR_SOFT_RESET_DUST_B_CORE_SHIFT 30U +#define ROGUE_CR_SOFT_RESET_DUST_B_CORE_CLRMSK 0xFFFFFFFFBFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_DUST_B_CORE_EN 0x0000000040000000ULL +#define ROGUE_CR_SOFT_RESET_DUST_A_CORE_SHIFT 29U +#define ROGUE_CR_SOFT_RESET_DUST_A_CORE_CLRMSK 0xFFFFFFFFDFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_DUST_A_CORE_EN 0x0000000020000000ULL +#define ROGUE_CR_SOFT_RESET_FB_TLCACHE_SHIFT 28U +#define ROGUE_CR_SOFT_RESET_FB_TLCACHE_CLRMSK 0xFFFFFFFFEFFFFFFFULL +#define ROGUE_CR_SOFT_RESET_FB_TLCACHE_EN 0x0000000010000000ULL +#define ROGUE_CR_SOFT_RESET_SLC_SHIFT 27U +#define ROGUE_CR_SOFT_RESET_SLC_CLRMSK 0xFFFFFFFFF7FFFFFFULL +#define ROGUE_CR_SOFT_RESET_SLC_EN 0x0000000008000000ULL +#define ROGUE_CR_SOFT_RESET_TLA_SHIFT 26U +#define ROGUE_CR_SOFT_RESET_TLA_CLRMSK 0xFFFFFFFFFBFFFFFFULL +#define ROGUE_CR_SOFT_RESET_TLA_EN 0x0000000004000000ULL +#define ROGUE_CR_SOFT_RESET_UVS_SHIFT 25U +#define ROGUE_CR_SOFT_RESET_UVS_CLRMSK 0xFFFFFFFFFDFFFFFFULL +#define ROGUE_CR_SOFT_RESET_UVS_EN 0x0000000002000000ULL +#define ROGUE_CR_SOFT_RESET_TE_SHIFT 24U +#define ROGUE_CR_SOFT_RESET_TE_CLRMSK 0xFFFFFFFFFEFFFFFFULL +#define ROGUE_CR_SOFT_RESET_TE_EN 0x0000000001000000ULL +#define ROGUE_CR_SOFT_RESET_GPP_SHIFT 23U +#define ROGUE_CR_SOFT_RESET_GPP_CLRMSK 0xFFFFFFFFFF7FFFFFULL +#define ROGUE_CR_SOFT_RESET_GPP_EN 0x0000000000800000ULL +#define ROGUE_CR_SOFT_RESET_FBDC_SHIFT 22U +#define ROGUE_CR_SOFT_RESET_FBDC_CLRMSK 0xFFFFFFFFFFBFFFFFULL +#define ROGUE_CR_SOFT_RESET_FBDC_EN 0x0000000000400000ULL +#define ROGUE_CR_SOFT_RESET_FBC_SHIFT 21U +#define ROGUE_CR_SOFT_RESET_FBC_CLRMSK 0xFFFFFFFFFFDFFFFFULL +#define ROGUE_CR_SOFT_RESET_FBC_EN 0x0000000000200000ULL +#define ROGUE_CR_SOFT_RESET_PM_SHIFT 20U +#define ROGUE_CR_SOFT_RESET_PM_CLRMSK 0xFFFFFFFFFFEFFFFFULL +#define ROGUE_CR_SOFT_RESET_PM_EN 0x0000000000100000ULL +#define ROGUE_CR_SOFT_RESET_PBE_SHIFT 19U +#define ROGUE_CR_SOFT_RESET_PBE_CLRMSK 0xFFFFFFFFFFF7FFFFULL +#define ROGUE_CR_SOFT_RESET_PBE_EN 0x0000000000080000ULL +#define ROGUE_CR_SOFT_RESET_USC_SHARED_SHIFT 18U +#define ROGUE_CR_SOFT_RESET_USC_SHARED_CLRMSK 0xFFFFFFFFFFFBFFFFULL +#define ROGUE_CR_SOFT_RESET_USC_SHARED_EN 0x0000000000040000ULL +#define ROGUE_CR_SOFT_RESET_MCU_L1_SHIFT 17U +#define ROGUE_CR_SOFT_RESET_MCU_L1_CLRMSK 0xFFFFFFFFFFFDFFFFULL +#define ROGUE_CR_SOFT_RESET_MCU_L1_EN 0x0000000000020000ULL +#define ROGUE_CR_SOFT_RESET_BIF_SHIFT 16U +#define ROGUE_CR_SOFT_RESET_BIF_CLRMSK 0xFFFFFFFFFFFEFFFFULL +#define ROGUE_CR_SOFT_RESET_BIF_EN 0x0000000000010000ULL +#define ROGUE_CR_SOFT_RESET_CDM_SHIFT 15U +#define ROGUE_CR_SOFT_RESET_CDM_CLRMSK 0xFFFFFFFFFFFF7FFFULL +#define ROGUE_CR_SOFT_RESET_CDM_EN 0x0000000000008000ULL +#define ROGUE_CR_SOFT_RESET_VDM_SHIFT 14U +#define ROGUE_CR_SOFT_RESET_VDM_CLRMSK 0xFFFFFFFFFFFFBFFFULL +#define ROGUE_CR_SOFT_RESET_VDM_EN 0x0000000000004000ULL +#define ROGUE_CR_SOFT_RESET_TESS_SHIFT 13U +#define ROGUE_CR_SOFT_RESET_TESS_CLRMSK 0xFFFFFFFFFFFFDFFFULL +#define ROGUE_CR_SOFT_RESET_TESS_EN 0x0000000000002000ULL +#define ROGUE_CR_SOFT_RESET_PDS_SHIFT 12U +#define ROGUE_CR_SOFT_RESET_PDS_CLRMSK 0xFFFFFFFFFFFFEFFFULL +#define ROGUE_CR_SOFT_RESET_PDS_EN 0x0000000000001000ULL +#define ROGUE_CR_SOFT_RESET_ISP_SHIFT 11U +#define ROGUE_CR_SOFT_RESET_ISP_CLRMSK 0xFFFFFFFFFFFFF7FFULL +#define ROGUE_CR_SOFT_RESET_ISP_EN 0x0000000000000800ULL +#define ROGUE_CR_SOFT_RESET_TSP_SHIFT 10U +#define ROGUE_CR_SOFT_RESET_TSP_CLRMSK 0xFFFFFFFFFFFFFBFFULL +#define ROGUE_CR_SOFT_RESET_TSP_EN 0x0000000000000400ULL +#define ROGUE_CR_SOFT_RESET_SYSARB_SHIFT 5U +#define ROGUE_CR_SOFT_RESET_SYSARB_CLRMSK 0xFFFFFFFFFFFFFFDFULL +#define ROGUE_CR_SOFT_RESET_SYSARB_EN 0x0000000000000020ULL +#define ROGUE_CR_SOFT_RESET_TPU_MCU_DEMUX_SHIFT 4U +#define ROGUE_CR_SOFT_RESET_TPU_MCU_DEMUX_CLRMSK 0xFFFFFFFFFFFFFFEFULL +#define ROGUE_CR_SOFT_RESET_TPU_MCU_DEMUX_EN 0x0000000000000010ULL +#define ROGUE_CR_SOFT_RESET_MCU_L0_SHIFT 3U +#define ROGUE_CR_SOFT_RESET_MCU_L0_CLRMSK 0xFFFFFFFFFFFFFFF7ULL +#define ROGUE_CR_SOFT_RESET_MCU_L0_EN 0x0000000000000008ULL +#define ROGUE_CR_SOFT_RESET_TPU_SHIFT 2U +#define ROGUE_CR_SOFT_RESET_TPU_CLRMSK 0xFFFFFFFFFFFFFFFBULL +#define ROGUE_CR_SOFT_RESET_TPU_EN 0x0000000000000004ULL +#define ROGUE_CR_SOFT_RESET_USC_SHIFT 0U +#define ROGUE_CR_SOFT_RESET_USC_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_SOFT_RESET_USC_EN 0x0000000000000001ULL + +/* Register ROGUE_CR_SOFT_RESET2 */ +#define ROGUE_CR_SOFT_RESET2 0x0108U +#define ROGUE_CR_SOFT_RESET2_MASKFULL 0x00000000001FFFFFULL +#define ROGUE_CR_SOFT_RESET2_SPFILTER_SHIFT 12U +#define ROGUE_CR_SOFT_RESET2_SPFILTER_CLRMSK 0xFFE00FFFU +#define ROGUE_CR_SOFT_RESET2_TDM_SHIFT 11U +#define ROGUE_CR_SOFT_RESET2_TDM_CLRMSK 0xFFFFF7FFU +#define ROGUE_CR_SOFT_RESET2_TDM_EN 0x00000800U +#define ROGUE_CR_SOFT_RESET2_ASTC_SHIFT 10U +#define ROGUE_CR_SOFT_RESET2_ASTC_CLRMSK 0xFFFFFBFFU +#define ROGUE_CR_SOFT_RESET2_ASTC_EN 0x00000400U +#define ROGUE_CR_SOFT_RESET2_BLACKPEARL_SHIFT 9U +#define ROGUE_CR_SOFT_RESET2_BLACKPEARL_CLRMSK 0xFFFFFDFFU +#define ROGUE_CR_SOFT_RESET2_BLACKPEARL_EN 0x00000200U +#define ROGUE_CR_SOFT_RESET2_USCPS_SHIFT 8U +#define ROGUE_CR_SOFT_RESET2_USCPS_CLRMSK 0xFFFFFEFFU +#define ROGUE_CR_SOFT_RESET2_USCPS_EN 0x00000100U +#define ROGUE_CR_SOFT_RESET2_IPF_SHIFT 7U +#define ROGUE_CR_SOFT_RESET2_IPF_CLRMSK 0xFFFFFF7FU +#define ROGUE_CR_SOFT_RESET2_IPF_EN 0x00000080U +#define ROGUE_CR_SOFT_RESET2_GEOMETRY_SHIFT 6U +#define ROGUE_CR_SOFT_RESET2_GEOMETRY_CLRMSK 0xFFFFFFBFU +#define ROGUE_CR_SOFT_RESET2_GEOMETRY_EN 0x00000040U +#define ROGUE_CR_SOFT_RESET2_USC_SHARED_SHIFT 5U +#define ROGUE_CR_SOFT_RESET2_USC_SHARED_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_SOFT_RESET2_USC_SHARED_EN 0x00000020U +#define ROGUE_CR_SOFT_RESET2_PDS_SHARED_SHIFT 4U +#define ROGUE_CR_SOFT_RESET2_PDS_SHARED_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_SOFT_RESET2_PDS_SHARED_EN 0x00000010U +#define ROGUE_CR_SOFT_RESET2_BIF_BLACKPEARL_SHIFT 3U +#define ROGUE_CR_SOFT_RESET2_BIF_BLACKPEARL_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_SOFT_RESET2_BIF_BLACKPEARL_EN 0x00000008U +#define ROGUE_CR_SOFT_RESET2_PIXEL_SHIFT 2U +#define ROGUE_CR_SOFT_RESET2_PIXEL_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_SOFT_RESET2_PIXEL_EN 0x00000004U +#define ROGUE_CR_SOFT_RESET2_CDM_SHIFT 1U +#define ROGUE_CR_SOFT_RESET2_CDM_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_SOFT_RESET2_CDM_EN 0x00000002U +#define ROGUE_CR_SOFT_RESET2_VERTEX_SHIFT 0U +#define ROGUE_CR_SOFT_RESET2_VERTEX_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_SOFT_RESET2_VERTEX_EN 0x00000001U + +/* Register ROGUE_CR_EVENT_STATUS */ +#define ROGUE_CR_EVENT_STATUS 0x0130U +#define ROGUE_CR_EVENT_STATUS__ROGUEXE__MASKFULL 0x00000000E01DFFFFULL +#define ROGUE_CR_EVENT_STATUS__SIGNALS__MASKFULL 0x00000000E007FFFFULL +#define ROGUE_CR_EVENT_STATUS_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_EVENT_STATUS_TDM_FENCE_FINISHED_SHIFT 31U +#define ROGUE_CR_EVENT_STATUS_TDM_FENCE_FINISHED_CLRMSK 0x7FFFFFFFU +#define ROGUE_CR_EVENT_STATUS_TDM_FENCE_FINISHED_EN 0x80000000U +#define ROGUE_CR_EVENT_STATUS_TDM_BUFFER_STALL_SHIFT 30U +#define ROGUE_CR_EVENT_STATUS_TDM_BUFFER_STALL_CLRMSK 0xBFFFFFFFU +#define ROGUE_CR_EVENT_STATUS_TDM_BUFFER_STALL_EN 0x40000000U +#define ROGUE_CR_EVENT_STATUS_COMPUTE_SIGNAL_FAILURE_SHIFT 29U +#define ROGUE_CR_EVENT_STATUS_COMPUTE_SIGNAL_FAILURE_CLRMSK 0xDFFFFFFFU +#define ROGUE_CR_EVENT_STATUS_COMPUTE_SIGNAL_FAILURE_EN 0x20000000U +#define ROGUE_CR_EVENT_STATUS_DPX_OUT_OF_MEMORY_SHIFT 28U +#define ROGUE_CR_EVENT_STATUS_DPX_OUT_OF_MEMORY_CLRMSK 0xEFFFFFFFU +#define ROGUE_CR_EVENT_STATUS_DPX_OUT_OF_MEMORY_EN 0x10000000U +#define ROGUE_CR_EVENT_STATUS_DPX_MMU_PAGE_FAULT_SHIFT 27U +#define ROGUE_CR_EVENT_STATUS_DPX_MMU_PAGE_FAULT_CLRMSK 0xF7FFFFFFU +#define ROGUE_CR_EVENT_STATUS_DPX_MMU_PAGE_FAULT_EN 0x08000000U +#define ROGUE_CR_EVENT_STATUS_RPM_OUT_OF_MEMORY_SHIFT 26U +#define ROGUE_CR_EVENT_STATUS_RPM_OUT_OF_MEMORY_CLRMSK 0xFBFFFFFFU +#define ROGUE_CR_EVENT_STATUS_RPM_OUT_OF_MEMORY_EN 0x04000000U +#define ROGUE_CR_EVENT_STATUS_FBA_FC3_FINISHED_SHIFT 25U +#define ROGUE_CR_EVENT_STATUS_FBA_FC3_FINISHED_CLRMSK 0xFDFFFFFFU +#define ROGUE_CR_EVENT_STATUS_FBA_FC3_FINISHED_EN 0x02000000U +#define ROGUE_CR_EVENT_STATUS_FBA_FC2_FINISHED_SHIFT 24U +#define ROGUE_CR_EVENT_STATUS_FBA_FC2_FINISHED_CLRMSK 0xFEFFFFFFU +#define ROGUE_CR_EVENT_STATUS_FBA_FC2_FINISHED_EN 0x01000000U +#define ROGUE_CR_EVENT_STATUS_FBA_FC1_FINISHED_SHIFT 23U +#define ROGUE_CR_EVENT_STATUS_FBA_FC1_FINISHED_CLRMSK 0xFF7FFFFFU +#define ROGUE_CR_EVENT_STATUS_FBA_FC1_FINISHED_EN 0x00800000U +#define ROGUE_CR_EVENT_STATUS_FBA_FC0_FINISHED_SHIFT 22U +#define ROGUE_CR_EVENT_STATUS_FBA_FC0_FINISHED_CLRMSK 0xFFBFFFFFU +#define ROGUE_CR_EVENT_STATUS_FBA_FC0_FINISHED_EN 0x00400000U +#define ROGUE_CR_EVENT_STATUS_RDM_FC3_FINISHED_SHIFT 21U +#define ROGUE_CR_EVENT_STATUS_RDM_FC3_FINISHED_CLRMSK 0xFFDFFFFFU +#define ROGUE_CR_EVENT_STATUS_RDM_FC3_FINISHED_EN 0x00200000U +#define ROGUE_CR_EVENT_STATUS_RDM_FC2_FINISHED_SHIFT 20U +#define ROGUE_CR_EVENT_STATUS_RDM_FC2_FINISHED_CLRMSK 0xFFEFFFFFU +#define ROGUE_CR_EVENT_STATUS_RDM_FC2_FINISHED_EN 0x00100000U +#define ROGUE_CR_EVENT_STATUS_SAFETY_SHIFT 20U +#define ROGUE_CR_EVENT_STATUS_SAFETY_CLRMSK 0xFFEFFFFFU +#define ROGUE_CR_EVENT_STATUS_SAFETY_EN 0x00100000U +#define ROGUE_CR_EVENT_STATUS_RDM_FC1_FINISHED_SHIFT 19U +#define ROGUE_CR_EVENT_STATUS_RDM_FC1_FINISHED_CLRMSK 0xFFF7FFFFU +#define ROGUE_CR_EVENT_STATUS_RDM_FC1_FINISHED_EN 0x00080000U +#define ROGUE_CR_EVENT_STATUS_SLAVE_REQ_SHIFT 19U +#define ROGUE_CR_EVENT_STATUS_SLAVE_REQ_CLRMSK 0xFFF7FFFFU +#define ROGUE_CR_EVENT_STATUS_SLAVE_REQ_EN 0x00080000U +#define ROGUE_CR_EVENT_STATUS_RDM_FC0_FINISHED_SHIFT 18U +#define ROGUE_CR_EVENT_STATUS_RDM_FC0_FINISHED_CLRMSK 0xFFFBFFFFU +#define ROGUE_CR_EVENT_STATUS_RDM_FC0_FINISHED_EN 0x00040000U +#define ROGUE_CR_EVENT_STATUS_TDM_CONTEXT_STORE_FINISHED_SHIFT 18U +#define ROGUE_CR_EVENT_STATUS_TDM_CONTEXT_STORE_FINISHED_CLRMSK 0xFFFBFFFFU +#define ROGUE_CR_EVENT_STATUS_TDM_CONTEXT_STORE_FINISHED_EN 0x00040000U +#define ROGUE_CR_EVENT_STATUS_SHG_FINISHED_SHIFT 17U +#define ROGUE_CR_EVENT_STATUS_SHG_FINISHED_CLRMSK 0xFFFDFFFFU +#define ROGUE_CR_EVENT_STATUS_SHG_FINISHED_EN 0x00020000U +#define ROGUE_CR_EVENT_STATUS_SPFILTER_SIGNAL_UPDATE_SHIFT 17U +#define ROGUE_CR_EVENT_STATUS_SPFILTER_SIGNAL_UPDATE_CLRMSK 0xFFFDFFFFU +#define ROGUE_CR_EVENT_STATUS_SPFILTER_SIGNAL_UPDATE_EN 0x00020000U +#define ROGUE_CR_EVENT_STATUS_COMPUTE_BUFFER_STALL_SHIFT 16U +#define ROGUE_CR_EVENT_STATUS_COMPUTE_BUFFER_STALL_CLRMSK 0xFFFEFFFFU +#define ROGUE_CR_EVENT_STATUS_COMPUTE_BUFFER_STALL_EN 0x00010000U +#define ROGUE_CR_EVENT_STATUS_USC_TRIGGER_SHIFT 15U +#define ROGUE_CR_EVENT_STATUS_USC_TRIGGER_CLRMSK 0xFFFF7FFFU +#define ROGUE_CR_EVENT_STATUS_USC_TRIGGER_EN 0x00008000U +#define ROGUE_CR_EVENT_STATUS_ZLS_FINISHED_SHIFT 14U +#define ROGUE_CR_EVENT_STATUS_ZLS_FINISHED_CLRMSK 0xFFFFBFFFU +#define ROGUE_CR_EVENT_STATUS_ZLS_FINISHED_EN 0x00004000U +#define ROGUE_CR_EVENT_STATUS_GPIO_ACK_SHIFT 13U +#define ROGUE_CR_EVENT_STATUS_GPIO_ACK_CLRMSK 0xFFFFDFFFU +#define ROGUE_CR_EVENT_STATUS_GPIO_ACK_EN 0x00002000U +#define ROGUE_CR_EVENT_STATUS_GPIO_REQ_SHIFT 12U +#define ROGUE_CR_EVENT_STATUS_GPIO_REQ_CLRMSK 0xFFFFEFFFU +#define ROGUE_CR_EVENT_STATUS_GPIO_REQ_EN 0x00001000U +#define ROGUE_CR_EVENT_STATUS_POWER_ABORT_SHIFT 11U +#define ROGUE_CR_EVENT_STATUS_POWER_ABORT_CLRMSK 0xFFFFF7FFU +#define ROGUE_CR_EVENT_STATUS_POWER_ABORT_EN 0x00000800U +#define ROGUE_CR_EVENT_STATUS_POWER_COMPLETE_SHIFT 10U +#define ROGUE_CR_EVENT_STATUS_POWER_COMPLETE_CLRMSK 0xFFFFFBFFU +#define ROGUE_CR_EVENT_STATUS_POWER_COMPLETE_EN 0x00000400U +#define ROGUE_CR_EVENT_STATUS_MMU_PAGE_FAULT_SHIFT 9U +#define ROGUE_CR_EVENT_STATUS_MMU_PAGE_FAULT_CLRMSK 0xFFFFFDFFU +#define ROGUE_CR_EVENT_STATUS_MMU_PAGE_FAULT_EN 0x00000200U +#define ROGUE_CR_EVENT_STATUS_PM_3D_MEM_FREE_SHIFT 8U +#define ROGUE_CR_EVENT_STATUS_PM_3D_MEM_FREE_CLRMSK 0xFFFFFEFFU +#define ROGUE_CR_EVENT_STATUS_PM_3D_MEM_FREE_EN 0x00000100U +#define ROGUE_CR_EVENT_STATUS_PM_OUT_OF_MEMORY_SHIFT 7U +#define ROGUE_CR_EVENT_STATUS_PM_OUT_OF_MEMORY_CLRMSK 0xFFFFFF7FU +#define ROGUE_CR_EVENT_STATUS_PM_OUT_OF_MEMORY_EN 0x00000080U +#define ROGUE_CR_EVENT_STATUS_TA_TERMINATE_SHIFT 6U +#define ROGUE_CR_EVENT_STATUS_TA_TERMINATE_CLRMSK 0xFFFFFFBFU +#define ROGUE_CR_EVENT_STATUS_TA_TERMINATE_EN 0x00000040U +#define ROGUE_CR_EVENT_STATUS_TA_FINISHED_SHIFT 5U +#define ROGUE_CR_EVENT_STATUS_TA_FINISHED_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_EVENT_STATUS_TA_FINISHED_EN 0x00000020U +#define ROGUE_CR_EVENT_STATUS_ISP_END_MACROTILE_SHIFT 4U +#define ROGUE_CR_EVENT_STATUS_ISP_END_MACROTILE_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_EVENT_STATUS_ISP_END_MACROTILE_EN 0x00000010U +#define ROGUE_CR_EVENT_STATUS_PIXELBE_END_RENDER_SHIFT 3U +#define ROGUE_CR_EVENT_STATUS_PIXELBE_END_RENDER_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_EVENT_STATUS_PIXELBE_END_RENDER_EN 0x00000008U +#define ROGUE_CR_EVENT_STATUS_COMPUTE_FINISHED_SHIFT 2U +#define ROGUE_CR_EVENT_STATUS_COMPUTE_FINISHED_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_EVENT_STATUS_COMPUTE_FINISHED_EN 0x00000004U +#define ROGUE_CR_EVENT_STATUS_KERNEL_FINISHED_SHIFT 1U +#define ROGUE_CR_EVENT_STATUS_KERNEL_FINISHED_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_EVENT_STATUS_KERNEL_FINISHED_EN 0x00000002U +#define ROGUE_CR_EVENT_STATUS_TLA_COMPLETE_SHIFT 0U +#define ROGUE_CR_EVENT_STATUS_TLA_COMPLETE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_EVENT_STATUS_TLA_COMPLETE_EN 0x00000001U + +/* Register ROGUE_CR_TIMER */ +#define ROGUE_CR_TIMER 0x0160U +#define ROGUE_CR_TIMER_MASKFULL 0x8000FFFFFFFFFFFFULL +#define ROGUE_CR_TIMER_BIT31_SHIFT 63U +#define ROGUE_CR_TIMER_BIT31_CLRMSK 0x7FFFFFFFFFFFFFFFULL +#define ROGUE_CR_TIMER_BIT31_EN 0x8000000000000000ULL +#define ROGUE_CR_TIMER_VALUE_SHIFT 0U +#define ROGUE_CR_TIMER_VALUE_CLRMSK 0xFFFF000000000000ULL + +/* Register ROGUE_CR_TLA_STATUS */ +#define ROGUE_CR_TLA_STATUS 0x0178U +#define ROGUE_CR_TLA_STATUS_MASKFULL 0xFFFFFFFFFFFFFFFFULL +#define ROGUE_CR_TLA_STATUS_BLIT_COUNT_SHIFT 39U +#define ROGUE_CR_TLA_STATUS_BLIT_COUNT_CLRMSK 0x0000007FFFFFFFFFULL +#define ROGUE_CR_TLA_STATUS_REQUEST_SHIFT 7U +#define ROGUE_CR_TLA_STATUS_REQUEST_CLRMSK 0xFFFFFF800000007FULL +#define ROGUE_CR_TLA_STATUS_FIFO_FULLNESS_SHIFT 1U +#define ROGUE_CR_TLA_STATUS_FIFO_FULLNESS_CLRMSK 0xFFFFFFFFFFFFFF81ULL +#define ROGUE_CR_TLA_STATUS_BUSY_SHIFT 0U +#define ROGUE_CR_TLA_STATUS_BUSY_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_TLA_STATUS_BUSY_EN 0x0000000000000001ULL + +/* Register ROGUE_CR_PM_PARTIAL_RENDER_ENABLE */ +#define ROGUE_CR_PM_PARTIAL_RENDER_ENABLE 0x0338U +#define ROGUE_CR_PM_PARTIAL_RENDER_ENABLE_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_PM_PARTIAL_RENDER_ENABLE_OP_SHIFT 0U +#define ROGUE_CR_PM_PARTIAL_RENDER_ENABLE_OP_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_PM_PARTIAL_RENDER_ENABLE_OP_EN 0x00000001U + +/* Register ROGUE_CR_SIDEKICK_IDLE */ +#define ROGUE_CR_SIDEKICK_IDLE 0x03C8U +#define ROGUE_CR_SIDEKICK_IDLE_MASKFULL 0x000000000000007FULL +#define ROGUE_CR_SIDEKICK_IDLE_FB_CDC_SHIFT 6U +#define ROGUE_CR_SIDEKICK_IDLE_FB_CDC_CLRMSK 0xFFFFFFBFU +#define ROGUE_CR_SIDEKICK_IDLE_FB_CDC_EN 0x00000040U +#define ROGUE_CR_SIDEKICK_IDLE_MMU_SHIFT 5U +#define ROGUE_CR_SIDEKICK_IDLE_MMU_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_SIDEKICK_IDLE_MMU_EN 0x00000020U +#define ROGUE_CR_SIDEKICK_IDLE_BIF128_SHIFT 4U +#define ROGUE_CR_SIDEKICK_IDLE_BIF128_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_SIDEKICK_IDLE_BIF128_EN 0x00000010U +#define ROGUE_CR_SIDEKICK_IDLE_TLA_SHIFT 3U +#define ROGUE_CR_SIDEKICK_IDLE_TLA_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_SIDEKICK_IDLE_TLA_EN 0x00000008U +#define ROGUE_CR_SIDEKICK_IDLE_GARTEN_SHIFT 2U +#define ROGUE_CR_SIDEKICK_IDLE_GARTEN_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_SIDEKICK_IDLE_GARTEN_EN 0x00000004U +#define ROGUE_CR_SIDEKICK_IDLE_HOSTIF_SHIFT 1U +#define ROGUE_CR_SIDEKICK_IDLE_HOSTIF_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_SIDEKICK_IDLE_HOSTIF_EN 0x00000002U +#define ROGUE_CR_SIDEKICK_IDLE_SOCIF_SHIFT 0U +#define ROGUE_CR_SIDEKICK_IDLE_SOCIF_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_SIDEKICK_IDLE_SOCIF_EN 0x00000001U + +/* Register ROGUE_CR_MARS_IDLE */ +#define ROGUE_CR_MARS_IDLE 0x08F8U +#define ROGUE_CR_MARS_IDLE_MASKFULL 0x0000000000000007ULL +#define ROGUE_CR_MARS_IDLE_MH_SYSARB0_SHIFT 2U +#define ROGUE_CR_MARS_IDLE_MH_SYSARB0_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_MARS_IDLE_MH_SYSARB0_EN 0x00000004U +#define ROGUE_CR_MARS_IDLE_CPU_SHIFT 1U +#define ROGUE_CR_MARS_IDLE_CPU_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_MARS_IDLE_CPU_EN 0x00000002U +#define ROGUE_CR_MARS_IDLE_SOCIF_SHIFT 0U +#define ROGUE_CR_MARS_IDLE_SOCIF_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_MARS_IDLE_SOCIF_EN 0x00000001U + +/* Register ROGUE_CR_VDM_CONTEXT_STORE_STATUS */ +#define ROGUE_CR_VDM_CONTEXT_STORE_STATUS 0x0430U +#define ROGUE_CR_VDM_CONTEXT_STORE_STATUS_MASKFULL 0x00000000000000F3ULL +#define ROGUE_CR_VDM_CONTEXT_STORE_STATUS_LAST_PIPE_SHIFT 4U +#define ROGUE_CR_VDM_CONTEXT_STORE_STATUS_LAST_PIPE_CLRMSK 0xFFFFFF0FU +#define ROGUE_CR_VDM_CONTEXT_STORE_STATUS_NEED_RESUME_SHIFT 1U +#define ROGUE_CR_VDM_CONTEXT_STORE_STATUS_NEED_RESUME_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_VDM_CONTEXT_STORE_STATUS_NEED_RESUME_EN 0x00000002U +#define ROGUE_CR_VDM_CONTEXT_STORE_STATUS_COMPLETE_SHIFT 0U +#define ROGUE_CR_VDM_CONTEXT_STORE_STATUS_COMPLETE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_VDM_CONTEXT_STORE_STATUS_COMPLETE_EN 0x00000001U + +/* Register ROGUE_CR_VDM_CONTEXT_STORE_TASK0 */ +#define ROGUE_CR_VDM_CONTEXT_STORE_TASK0 0x0438U +#define ROGUE_CR_VDM_CONTEXT_STORE_TASK0_MASKFULL 0xFFFFFFFFFFFFFFFFULL +#define ROGUE_CR_VDM_CONTEXT_STORE_TASK0_PDS_STATE1_SHIFT 32U +#define ROGUE_CR_VDM_CONTEXT_STORE_TASK0_PDS_STATE1_CLRMSK 0x00000000FFFFFFFFULL +#define ROGUE_CR_VDM_CONTEXT_STORE_TASK0_PDS_STATE0_SHIFT 0U +#define ROGUE_CR_VDM_CONTEXT_STORE_TASK0_PDS_STATE0_CLRMSK 0xFFFFFFFF00000000ULL + +/* Register ROGUE_CR_VDM_CONTEXT_STORE_TASK1 */ +#define ROGUE_CR_VDM_CONTEXT_STORE_TASK1 0x0440U +#define ROGUE_CR_VDM_CONTEXT_STORE_TASK1_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_VDM_CONTEXT_STORE_TASK1_PDS_STATE2_SHIFT 0U +#define ROGUE_CR_VDM_CONTEXT_STORE_TASK1_PDS_STATE2_CLRMSK 0x00000000U + +/* Register ROGUE_CR_VDM_CONTEXT_STORE_TASK2 */ +#define ROGUE_CR_VDM_CONTEXT_STORE_TASK2 0x0448U +#define ROGUE_CR_VDM_CONTEXT_STORE_TASK2_MASKFULL 0xFFFFFFFFFFFFFFFFULL +#define ROGUE_CR_VDM_CONTEXT_STORE_TASK2_STREAM_OUT2_SHIFT 32U +#define ROGUE_CR_VDM_CONTEXT_STORE_TASK2_STREAM_OUT2_CLRMSK 0x00000000FFFFFFFFULL +#define ROGUE_CR_VDM_CONTEXT_STORE_TASK2_STREAM_OUT1_SHIFT 0U +#define ROGUE_CR_VDM_CONTEXT_STORE_TASK2_STREAM_OUT1_CLRMSK 0xFFFFFFFF00000000ULL + +/* Register ROGUE_CR_VDM_CONTEXT_RESUME_TASK0 */ +#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK0 0x0450U +#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK0_MASKFULL 0xFFFFFFFFFFFFFFFFULL +#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK0_PDS_STATE1_SHIFT 32U +#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK0_PDS_STATE1_CLRMSK 0x00000000FFFFFFFFULL +#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK0_PDS_STATE0_SHIFT 0U +#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK0_PDS_STATE0_CLRMSK 0xFFFFFFFF00000000ULL + +/* Register ROGUE_CR_VDM_CONTEXT_RESUME_TASK1 */ +#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK1 0x0458U +#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK1_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK1_PDS_STATE2_SHIFT 0U +#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK1_PDS_STATE2_CLRMSK 0x00000000U + +/* Register ROGUE_CR_VDM_CONTEXT_RESUME_TASK2 */ +#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK2 0x0460U +#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK2_MASKFULL 0xFFFFFFFFFFFFFFFFULL +#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK2_STREAM_OUT2_SHIFT 32U +#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK2_STREAM_OUT2_CLRMSK 0x00000000FFFFFFFFULL +#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK2_STREAM_OUT1_SHIFT 0U +#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK2_STREAM_OUT1_CLRMSK 0xFFFFFFFF00000000ULL + +/* Register ROGUE_CR_CDM_CONTEXT_STORE_STATUS */ +#define ROGUE_CR_CDM_CONTEXT_STORE_STATUS 0x04A0U +#define ROGUE_CR_CDM_CONTEXT_STORE_STATUS_MASKFULL 0x0000000000000003ULL +#define ROGUE_CR_CDM_CONTEXT_STORE_STATUS_NEED_RESUME_SHIFT 1U +#define ROGUE_CR_CDM_CONTEXT_STORE_STATUS_NEED_RESUME_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_CDM_CONTEXT_STORE_STATUS_NEED_RESUME_EN 0x00000002U +#define ROGUE_CR_CDM_CONTEXT_STORE_STATUS_COMPLETE_SHIFT 0U +#define ROGUE_CR_CDM_CONTEXT_STORE_STATUS_COMPLETE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_CDM_CONTEXT_STORE_STATUS_COMPLETE_EN 0x00000001U + +/* Register ROGUE_CR_CDM_CONTEXT_PDS0 */ +#define ROGUE_CR_CDM_CONTEXT_PDS0 0x04A8U +#define ROGUE_CR_CDM_CONTEXT_PDS0_MASKFULL 0xFFFFFFF0FFFFFFF0ULL +#define ROGUE_CR_CDM_CONTEXT_PDS0_DATA_ADDR_SHIFT 36U +#define ROGUE_CR_CDM_CONTEXT_PDS0_DATA_ADDR_CLRMSK 0x0000000FFFFFFFFFULL +#define ROGUE_CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNSHIFT 4U +#define ROGUE_CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNSIZE 16U +#define ROGUE_CR_CDM_CONTEXT_PDS0_CODE_ADDR_SHIFT 4U +#define ROGUE_CR_CDM_CONTEXT_PDS0_CODE_ADDR_CLRMSK 0xFFFFFFFF0000000FULL +#define ROGUE_CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNSHIFT 4U +#define ROGUE_CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNSIZE 16U + +/* Register ROGUE_CR_CDM_CONTEXT_PDS1 */ +#define ROGUE_CR_CDM_CONTEXT_PDS1 0x04B0U +#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__MASKFULL 0x000000007FFFFFFFULL +#define ROGUE_CR_CDM_CONTEXT_PDS1_MASKFULL 0x000000003FFFFFFFULL +#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__PDS_SEQ_DEP_SHIFT 30U +#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__PDS_SEQ_DEP_CLRMSK 0xBFFFFFFFU +#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__PDS_SEQ_DEP_EN 0x40000000U +#define ROGUE_CR_CDM_CONTEXT_PDS1_PDS_SEQ_DEP_SHIFT 29U +#define ROGUE_CR_CDM_CONTEXT_PDS1_PDS_SEQ_DEP_CLRMSK 0xDFFFFFFFU +#define ROGUE_CR_CDM_CONTEXT_PDS1_PDS_SEQ_DEP_EN 0x20000000U +#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__USC_SEQ_DEP_SHIFT 29U +#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__USC_SEQ_DEP_CLRMSK 0xDFFFFFFFU +#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__USC_SEQ_DEP_EN 0x20000000U +#define ROGUE_CR_CDM_CONTEXT_PDS1_USC_SEQ_DEP_SHIFT 28U +#define ROGUE_CR_CDM_CONTEXT_PDS1_USC_SEQ_DEP_CLRMSK 0xEFFFFFFFU +#define ROGUE_CR_CDM_CONTEXT_PDS1_USC_SEQ_DEP_EN 0x10000000U +#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__TARGET_SHIFT 28U +#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__TARGET_CLRMSK 0xEFFFFFFFU +#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__TARGET_EN 0x10000000U +#define ROGUE_CR_CDM_CONTEXT_PDS1_TARGET_SHIFT 27U +#define ROGUE_CR_CDM_CONTEXT_PDS1_TARGET_CLRMSK 0xF7FFFFFFU +#define ROGUE_CR_CDM_CONTEXT_PDS1_TARGET_EN 0x08000000U +#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__UNIFIED_SIZE_SHIFT 22U +#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__UNIFIED_SIZE_CLRMSK 0xF03FFFFFU +#define ROGUE_CR_CDM_CONTEXT_PDS1_UNIFIED_SIZE_SHIFT 21U +#define ROGUE_CR_CDM_CONTEXT_PDS1_UNIFIED_SIZE_CLRMSK 0xF81FFFFFU +#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__COMMON_SHARED_SHIFT 21U +#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__COMMON_SHARED_CLRMSK 0xFFDFFFFFU +#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__COMMON_SHARED_EN 0x00200000U +#define ROGUE_CR_CDM_CONTEXT_PDS1_COMMON_SHARED_SHIFT 20U +#define ROGUE_CR_CDM_CONTEXT_PDS1_COMMON_SHARED_CLRMSK 0xFFEFFFFFU +#define ROGUE_CR_CDM_CONTEXT_PDS1_COMMON_SHARED_EN 0x00100000U +#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__COMMON_SIZE_SHIFT 12U +#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__COMMON_SIZE_CLRMSK 0xFFE00FFFU +#define ROGUE_CR_CDM_CONTEXT_PDS1_COMMON_SIZE_SHIFT 11U +#define ROGUE_CR_CDM_CONTEXT_PDS1_COMMON_SIZE_CLRMSK 0xFFF007FFU +#define ROGUE_CR_CDM_CONTEXT_PDS1_TEMP_SIZE_SHIFT 7U +#define ROGUE_CR_CDM_CONTEXT_PDS1_TEMP_SIZE_CLRMSK 0xFFFFF87FU +#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__TEMP_SIZE_SHIFT 7U +#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__TEMP_SIZE_CLRMSK 0xFFFFF07FU +#define ROGUE_CR_CDM_CONTEXT_PDS1_DATA_SIZE_SHIFT 1U +#define ROGUE_CR_CDM_CONTEXT_PDS1_DATA_SIZE_CLRMSK 0xFFFFFF81U +#define ROGUE_CR_CDM_CONTEXT_PDS1_FENCE_SHIFT 0U +#define ROGUE_CR_CDM_CONTEXT_PDS1_FENCE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_CDM_CONTEXT_PDS1_FENCE_EN 0x00000001U + +/* Register ROGUE_CR_CDM_TERMINATE_PDS */ +#define ROGUE_CR_CDM_TERMINATE_PDS 0x04B8U +#define ROGUE_CR_CDM_TERMINATE_PDS_MASKFULL 0xFFFFFFF0FFFFFFF0ULL +#define ROGUE_CR_CDM_TERMINATE_PDS_DATA_ADDR_SHIFT 36U +#define ROGUE_CR_CDM_TERMINATE_PDS_DATA_ADDR_CLRMSK 0x0000000FFFFFFFFFULL +#define ROGUE_CR_CDM_TERMINATE_PDS_DATA_ADDR_ALIGNSHIFT 4U +#define ROGUE_CR_CDM_TERMINATE_PDS_DATA_ADDR_ALIGNSIZE 16U +#define ROGUE_CR_CDM_TERMINATE_PDS_CODE_ADDR_SHIFT 4U +#define ROGUE_CR_CDM_TERMINATE_PDS_CODE_ADDR_CLRMSK 0xFFFFFFFF0000000FULL +#define ROGUE_CR_CDM_TERMINATE_PDS_CODE_ADDR_ALIGNSHIFT 4U +#define ROGUE_CR_CDM_TERMINATE_PDS_CODE_ADDR_ALIGNSIZE 16U + +/* Register ROGUE_CR_CDM_TERMINATE_PDS1 */ +#define ROGUE_CR_CDM_TERMINATE_PDS1 0x04C0U +#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__MASKFULL 0x000000007FFFFFFFULL +#define ROGUE_CR_CDM_TERMINATE_PDS1_MASKFULL 0x000000003FFFFFFFULL +#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__PDS_SEQ_DEP_SHIFT 30U +#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__PDS_SEQ_DEP_CLRMSK 0xBFFFFFFFU +#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__PDS_SEQ_DEP_EN 0x40000000U +#define ROGUE_CR_CDM_TERMINATE_PDS1_PDS_SEQ_DEP_SHIFT 29U +#define ROGUE_CR_CDM_TERMINATE_PDS1_PDS_SEQ_DEP_CLRMSK 0xDFFFFFFFU +#define ROGUE_CR_CDM_TERMINATE_PDS1_PDS_SEQ_DEP_EN 0x20000000U +#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__USC_SEQ_DEP_SHIFT 29U +#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__USC_SEQ_DEP_CLRMSK 0xDFFFFFFFU +#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__USC_SEQ_DEP_EN 0x20000000U +#define ROGUE_CR_CDM_TERMINATE_PDS1_USC_SEQ_DEP_SHIFT 28U +#define ROGUE_CR_CDM_TERMINATE_PDS1_USC_SEQ_DEP_CLRMSK 0xEFFFFFFFU +#define ROGUE_CR_CDM_TERMINATE_PDS1_USC_SEQ_DEP_EN 0x10000000U +#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__TARGET_SHIFT 28U +#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__TARGET_CLRMSK 0xEFFFFFFFU +#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__TARGET_EN 0x10000000U +#define ROGUE_CR_CDM_TERMINATE_PDS1_TARGET_SHIFT 27U +#define ROGUE_CR_CDM_TERMINATE_PDS1_TARGET_CLRMSK 0xF7FFFFFFU +#define ROGUE_CR_CDM_TERMINATE_PDS1_TARGET_EN 0x08000000U +#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__UNIFIED_SIZE_SHIFT 22U +#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__UNIFIED_SIZE_CLRMSK 0xF03FFFFFU +#define ROGUE_CR_CDM_TERMINATE_PDS1_UNIFIED_SIZE_SHIFT 21U +#define ROGUE_CR_CDM_TERMINATE_PDS1_UNIFIED_SIZE_CLRMSK 0xF81FFFFFU +#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__COMMON_SHARED_SHIFT 21U +#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__COMMON_SHARED_CLRMSK 0xFFDFFFFFU +#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__COMMON_SHARED_EN 0x00200000U +#define ROGUE_CR_CDM_TERMINATE_PDS1_COMMON_SHARED_SHIFT 20U +#define ROGUE_CR_CDM_TERMINATE_PDS1_COMMON_SHARED_CLRMSK 0xFFEFFFFFU +#define ROGUE_CR_CDM_TERMINATE_PDS1_COMMON_SHARED_EN 0x00100000U +#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__COMMON_SIZE_SHIFT 12U +#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__COMMON_SIZE_CLRMSK 0xFFE00FFFU +#define ROGUE_CR_CDM_TERMINATE_PDS1_COMMON_SIZE_SHIFT 11U +#define ROGUE_CR_CDM_TERMINATE_PDS1_COMMON_SIZE_CLRMSK 0xFFF007FFU +#define ROGUE_CR_CDM_TERMINATE_PDS1_TEMP_SIZE_SHIFT 7U +#define ROGUE_CR_CDM_TERMINATE_PDS1_TEMP_SIZE_CLRMSK 0xFFFFF87FU +#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__TEMP_SIZE_SHIFT 7U +#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__TEMP_SIZE_CLRMSK 0xFFFFF07FU +#define ROGUE_CR_CDM_TERMINATE_PDS1_DATA_SIZE_SHIFT 1U +#define ROGUE_CR_CDM_TERMINATE_PDS1_DATA_SIZE_CLRMSK 0xFFFFFF81U +#define ROGUE_CR_CDM_TERMINATE_PDS1_FENCE_SHIFT 0U +#define ROGUE_CR_CDM_TERMINATE_PDS1_FENCE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_CDM_TERMINATE_PDS1_FENCE_EN 0x00000001U + +/* Register ROGUE_CR_CDM_CONTEXT_LOAD_PDS0 */ +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS0 0x04D8U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS0_MASKFULL 0xFFFFFFF0FFFFFFF0ULL +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS0_DATA_ADDR_SHIFT 36U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS0_DATA_ADDR_CLRMSK 0x0000000FFFFFFFFFULL +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS0_DATA_ADDR_ALIGNSHIFT 4U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS0_DATA_ADDR_ALIGNSIZE 16U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS0_CODE_ADDR_SHIFT 4U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS0_CODE_ADDR_CLRMSK 0xFFFFFFFF0000000FULL +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS0_CODE_ADDR_ALIGNSHIFT 4U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS0_CODE_ADDR_ALIGNSIZE 16U + +/* Register ROGUE_CR_CDM_CONTEXT_LOAD_PDS1 */ +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1 0x04E0U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__MASKFULL 0x000000007FFFFFFFULL +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_MASKFULL 0x000000003FFFFFFFULL +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__PDS_SEQ_DEP_SHIFT 30U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__PDS_SEQ_DEP_CLRMSK 0xBFFFFFFFU +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__PDS_SEQ_DEP_EN 0x40000000U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_PDS_SEQ_DEP_SHIFT 29U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_PDS_SEQ_DEP_CLRMSK 0xDFFFFFFFU +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_PDS_SEQ_DEP_EN 0x20000000U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__USC_SEQ_DEP_SHIFT 29U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__USC_SEQ_DEP_CLRMSK 0xDFFFFFFFU +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__USC_SEQ_DEP_EN 0x20000000U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_USC_SEQ_DEP_SHIFT 28U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_USC_SEQ_DEP_CLRMSK 0xEFFFFFFFU +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_USC_SEQ_DEP_EN 0x10000000U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__TARGET_SHIFT 28U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__TARGET_CLRMSK 0xEFFFFFFFU +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__TARGET_EN 0x10000000U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_TARGET_SHIFT 27U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_TARGET_CLRMSK 0xF7FFFFFFU +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_TARGET_EN 0x08000000U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__UNIFIED_SIZE_SHIFT 22U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__UNIFIED_SIZE_CLRMSK 0xF03FFFFFU +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_UNIFIED_SIZE_SHIFT 21U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_UNIFIED_SIZE_CLRMSK 0xF81FFFFFU +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__COMMON_SHARED_SHIFT 21U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__COMMON_SHARED_CLRMSK 0xFFDFFFFFU +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__COMMON_SHARED_EN 0x00200000U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_COMMON_SHARED_SHIFT 20U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_COMMON_SHARED_CLRMSK 0xFFEFFFFFU +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_COMMON_SHARED_EN 0x00100000U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__COMMON_SIZE_SHIFT 12U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__COMMON_SIZE_CLRMSK 0xFFE00FFFU +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_COMMON_SIZE_SHIFT 11U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_COMMON_SIZE_CLRMSK 0xFFF007FFU +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_TEMP_SIZE_SHIFT 7U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_TEMP_SIZE_CLRMSK 0xFFFFF87FU +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__TEMP_SIZE_SHIFT 7U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__TEMP_SIZE_CLRMSK 0xFFFFF07FU +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_DATA_SIZE_SHIFT 1U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_DATA_SIZE_CLRMSK 0xFFFFFF81U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_FENCE_SHIFT 0U +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_FENCE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_FENCE_EN 0x00000001U + +/* Register ROGUE_CR_MIPS_WRAPPER_CONFIG */ +#define ROGUE_CR_MIPS_WRAPPER_CONFIG 0x0810U +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_MASKFULL 0x000001030F01FFFFULL +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_FW_IDLE_ENABLE_SHIFT 40U +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_FW_IDLE_ENABLE_CLRMSK 0xFFFFFEFFFFFFFFFFULL +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_FW_IDLE_ENABLE_EN 0x0000010000000000ULL +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_DISABLE_BOOT_SHIFT 33U +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_DISABLE_BOOT_CLRMSK 0xFFFFFFFDFFFFFFFFULL +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_DISABLE_BOOT_EN 0x0000000200000000ULL +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_L2_CACHE_OFF_SHIFT 32U +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_L2_CACHE_OFF_CLRMSK 0xFFFFFFFEFFFFFFFFULL +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_L2_CACHE_OFF_EN 0x0000000100000000ULL +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_OS_ID_SHIFT 25U +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_OS_ID_CLRMSK 0xFFFFFFFFF1FFFFFFULL +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_TRUSTED_SHIFT 24U +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_TRUSTED_CLRMSK 0xFFFFFFFFFEFFFFFFULL +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_TRUSTED_EN 0x0000000001000000ULL +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_BOOT_ISA_MODE_SHIFT 16U +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_BOOT_ISA_MODE_CLRMSK 0xFFFFFFFFFFFEFFFFULL +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_BOOT_ISA_MODE_MIPS32 0x0000000000000000ULL +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_BOOT_ISA_MODE_MICROMIPS 0x0000000000010000ULL +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_REGBANK_BASE_ADDR_SHIFT 0U +#define ROGUE_CR_MIPS_WRAPPER_CONFIG_REGBANK_BASE_ADDR_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG1 */ +#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG1 0x0818U +#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG1_MASKFULL 0x00000000FFFFF001ULL +#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG1_BASE_ADDR_IN_SHIFT 12U +#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG1_BASE_ADDR_IN_CLRMSK 0xFFFFFFFF00000FFFULL +#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG1_MODE_ENABLE_SHIFT 0U +#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG1_MODE_ENABLE_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG1_MODE_ENABLE_EN 0x0000000000000001ULL + +/* Register ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2 */ +#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2 0x0820U +#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_MASKFULL 0x000000FFFFFFF1FFULL +#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_ADDR_OUT_SHIFT 12U +#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_ADDR_OUT_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_OS_ID_SHIFT 6U +#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_OS_ID_CLRMSK 0xFFFFFFFFFFFFFE3FULL +#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_TRUSTED_SHIFT 5U +#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_TRUSTED_CLRMSK 0xFFFFFFFFFFFFFFDFULL +#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_TRUSTED_EN 0x0000000000000020ULL +#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_REGION_SIZE_POW2_SHIFT 0U +#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_REGION_SIZE_POW2_CLRMSK 0xFFFFFFFFFFFFFFE0ULL + +/* Register ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG1 */ +#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG1 0x0828U +#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG1_MASKFULL 0x00000000FFFFF001ULL +#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG1_BASE_ADDR_IN_SHIFT 12U +#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG1_BASE_ADDR_IN_CLRMSK 0xFFFFFFFF00000FFFULL +#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG1_MODE_ENABLE_SHIFT 0U +#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG1_MODE_ENABLE_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG1_MODE_ENABLE_EN 0x0000000000000001ULL + +/* Register ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2 */ +#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2 0x0830U +#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_MASKFULL 0x000000FFFFFFF1FFULL +#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_ADDR_OUT_SHIFT 12U +#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_ADDR_OUT_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_OS_ID_SHIFT 6U +#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_OS_ID_CLRMSK 0xFFFFFFFFFFFFFE3FULL +#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_TRUSTED_SHIFT 5U +#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_TRUSTED_CLRMSK 0xFFFFFFFFFFFFFFDFULL +#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_TRUSTED_EN 0x0000000000000020ULL +#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_REGION_SIZE_POW2_SHIFT 0U +#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_REGION_SIZE_POW2_CLRMSK 0xFFFFFFFFFFFFFFE0ULL + +/* Register ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG1 */ +#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG1 0x0838U +#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG1_MASKFULL 0x00000000FFFFF001ULL +#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG1_BASE_ADDR_IN_SHIFT 12U +#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG1_BASE_ADDR_IN_CLRMSK 0xFFFFFFFF00000FFFULL +#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG1_MODE_ENABLE_SHIFT 0U +#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG1_MODE_ENABLE_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG1_MODE_ENABLE_EN 0x0000000000000001ULL + +/* Register ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2 */ +#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2 0x0840U +#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_MASKFULL 0x000000FFFFFFF1FFULL +#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_ADDR_OUT_SHIFT 12U +#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_ADDR_OUT_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_OS_ID_SHIFT 6U +#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_OS_ID_CLRMSK 0xFFFFFFFFFFFFFE3FULL +#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_TRUSTED_SHIFT 5U +#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_TRUSTED_CLRMSK 0xFFFFFFFFFFFFFFDFULL +#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_TRUSTED_EN 0x0000000000000020ULL +#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_REGION_SIZE_POW2_SHIFT 0U +#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_REGION_SIZE_POW2_CLRMSK 0xFFFFFFFFFFFFFFE0ULL + +/* Register ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG1 */ +#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG1 0x0848U +#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG1_MASKFULL 0x00000000FFFFF001ULL +#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG1_BASE_ADDR_IN_SHIFT 12U +#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG1_BASE_ADDR_IN_CLRMSK 0xFFFFFFFF00000FFFULL +#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG1_MODE_ENABLE_SHIFT 0U +#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG1_MODE_ENABLE_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG1_MODE_ENABLE_EN 0x0000000000000001ULL + +/* Register ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2 */ +#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2 0x0850U +#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2_MASKFULL 0x000000FFFFFFF1FFULL +#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2_ADDR_OUT_SHIFT 12U +#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2_ADDR_OUT_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2_OS_ID_SHIFT 6U +#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2_OS_ID_CLRMSK 0xFFFFFFFFFFFFFE3FULL +#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2_TRUSTED_SHIFT 5U +#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2_TRUSTED_CLRMSK 0xFFFFFFFFFFFFFFDFULL +#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2_TRUSTED_EN 0x0000000000000020ULL +#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2_REGION_SIZE_POW2_SHIFT 0U +#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2_REGION_SIZE_POW2_CLRMSK 0xFFFFFFFFFFFFFFE0ULL + +/* Register ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG1 */ +#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG1 0x0858U +#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG1_MASKFULL 0x00000000FFFFF001ULL +#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG1_BASE_ADDR_IN_SHIFT 12U +#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG1_BASE_ADDR_IN_CLRMSK 0xFFFFFFFF00000FFFULL +#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG1_MODE_ENABLE_SHIFT 0U +#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG1_MODE_ENABLE_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG1_MODE_ENABLE_EN 0x0000000000000001ULL + +/* Register ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2 */ +#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2 0x0860U +#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_MASKFULL 0x000000FFFFFFF1FFULL +#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_ADDR_OUT_SHIFT 12U +#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_ADDR_OUT_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_OS_ID_SHIFT 6U +#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_OS_ID_CLRMSK 0xFFFFFFFFFFFFFE3FULL +#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_TRUSTED_SHIFT 5U +#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_TRUSTED_CLRMSK 0xFFFFFFFFFFFFFFDFULL +#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_TRUSTED_EN 0x0000000000000020ULL +#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_REGION_SIZE_POW2_SHIFT 0U +#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_REGION_SIZE_POW2_CLRMSK 0xFFFFFFFFFFFFFFE0ULL + +/* Register ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_STATUS */ +#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_STATUS 0x0868U +#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_STATUS_MASKFULL 0x00000001FFFFFFFFULL +#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_STATUS_EVENT_SHIFT 32U +#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_STATUS_EVENT_CLRMSK 0xFFFFFFFEFFFFFFFFULL +#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_STATUS_EVENT_EN 0x0000000100000000ULL +#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_STATUS_ADDRESS_SHIFT 0U +#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_STATUS_ADDRESS_CLRMSK 0xFFFFFFFF00000000ULL + +/* Register ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_CLEAR */ +#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_CLEAR 0x0870U +#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_CLEAR_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_CLEAR_EVENT_SHIFT 0U +#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_CLEAR_EVENT_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_CLEAR_EVENT_EN 0x00000001U + +/* Register ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG */ +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG 0x0878U +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_MASKFULL 0xFFFFFFF7FFFFFFBFULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_ADDR_OUT_SHIFT 36U +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_ADDR_OUT_CLRMSK 0x0000000FFFFFFFFFULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_OS_ID_SHIFT 32U +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_OS_ID_CLRMSK 0xFFFFFFF8FFFFFFFFULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_BASE_ADDR_IN_SHIFT 12U +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_BASE_ADDR_IN_CLRMSK 0xFFFFFFFF00000FFFULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_TRUSTED_SHIFT 11U +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_TRUSTED_CLRMSK 0xFFFFFFFFFFFFF7FFULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_TRUSTED_EN 0x0000000000000800ULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_SHIFT 7U +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_CLRMSK 0xFFFFFFFFFFFFF87FULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_4KB 0x0000000000000000ULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_16KB 0x0000000000000080ULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_64KB 0x0000000000000100ULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_256KB 0x0000000000000180ULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_1MB 0x0000000000000200ULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_4MB 0x0000000000000280ULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_16MB 0x0000000000000300ULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_64MB 0x0000000000000380ULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_256MB 0x0000000000000400ULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_ENTRY_SHIFT 1U +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_ENTRY_CLRMSK 0xFFFFFFFFFFFFFFC1ULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_MODE_ENABLE_SHIFT 0U +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_MODE_ENABLE_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_MODE_ENABLE_EN 0x0000000000000001ULL + +/* Register ROGUE_CR_MIPS_ADDR_REMAP_RANGE_READ */ +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_READ 0x0880U +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_READ_MASKFULL 0x000000000000003FULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_READ_ENTRY_SHIFT 1U +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_READ_ENTRY_CLRMSK 0xFFFFFFC1U +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_READ_REQUEST_SHIFT 0U +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_READ_REQUEST_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_READ_REQUEST_EN 0x00000001U + +/* Register ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA */ +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA 0x0888U +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_MASKFULL 0xFFFFFFF7FFFFFF81ULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_ADDR_OUT_SHIFT 36U +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_ADDR_OUT_CLRMSK 0x0000000FFFFFFFFFULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_OS_ID_SHIFT 32U +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_OS_ID_CLRMSK 0xFFFFFFF8FFFFFFFFULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_BASE_ADDR_IN_SHIFT 12U +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_BASE_ADDR_IN_CLRMSK 0xFFFFFFFF00000FFFULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_TRUSTED_SHIFT 11U +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_TRUSTED_CLRMSK 0xFFFFFFFFFFFFF7FFULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_TRUSTED_EN 0x0000000000000800ULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_REGION_SIZE_SHIFT 7U +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_REGION_SIZE_CLRMSK 0xFFFFFFFFFFFFF87FULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_MODE_ENABLE_SHIFT 0U +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_MODE_ENABLE_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_MODE_ENABLE_EN 0x0000000000000001ULL + +/* Register ROGUE_CR_MIPS_WRAPPER_IRQ_ENABLE */ +#define ROGUE_CR_MIPS_WRAPPER_IRQ_ENABLE 0x08A0U +#define ROGUE_CR_MIPS_WRAPPER_IRQ_ENABLE_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_MIPS_WRAPPER_IRQ_ENABLE_EVENT_SHIFT 0U +#define ROGUE_CR_MIPS_WRAPPER_IRQ_ENABLE_EVENT_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_MIPS_WRAPPER_IRQ_ENABLE_EVENT_EN 0x00000001U + +/* Register ROGUE_CR_MIPS_WRAPPER_IRQ_STATUS */ +#define ROGUE_CR_MIPS_WRAPPER_IRQ_STATUS 0x08A8U +#define ROGUE_CR_MIPS_WRAPPER_IRQ_STATUS_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_MIPS_WRAPPER_IRQ_STATUS_EVENT_SHIFT 0U +#define ROGUE_CR_MIPS_WRAPPER_IRQ_STATUS_EVENT_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_MIPS_WRAPPER_IRQ_STATUS_EVENT_EN 0x00000001U + +/* Register ROGUE_CR_MIPS_WRAPPER_IRQ_CLEAR */ +#define ROGUE_CR_MIPS_WRAPPER_IRQ_CLEAR 0x08B0U +#define ROGUE_CR_MIPS_WRAPPER_IRQ_CLEAR_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_MIPS_WRAPPER_IRQ_CLEAR_EVENT_SHIFT 0U +#define ROGUE_CR_MIPS_WRAPPER_IRQ_CLEAR_EVENT_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_MIPS_WRAPPER_IRQ_CLEAR_EVENT_EN 0x00000001U + +/* Register ROGUE_CR_MIPS_WRAPPER_NMI_ENABLE */ +#define ROGUE_CR_MIPS_WRAPPER_NMI_ENABLE 0x08B8U +#define ROGUE_CR_MIPS_WRAPPER_NMI_ENABLE_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_MIPS_WRAPPER_NMI_ENABLE_EVENT_SHIFT 0U +#define ROGUE_CR_MIPS_WRAPPER_NMI_ENABLE_EVENT_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_MIPS_WRAPPER_NMI_ENABLE_EVENT_EN 0x00000001U + +/* Register ROGUE_CR_MIPS_WRAPPER_NMI_EVENT */ +#define ROGUE_CR_MIPS_WRAPPER_NMI_EVENT 0x08C0U +#define ROGUE_CR_MIPS_WRAPPER_NMI_EVENT_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_MIPS_WRAPPER_NMI_EVENT_TRIGGER_SHIFT 0U +#define ROGUE_CR_MIPS_WRAPPER_NMI_EVENT_TRIGGER_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_MIPS_WRAPPER_NMI_EVENT_TRIGGER_EN 0x00000001U + +/* Register ROGUE_CR_MIPS_DEBUG_CONFIG */ +#define ROGUE_CR_MIPS_DEBUG_CONFIG 0x08C8U +#define ROGUE_CR_MIPS_DEBUG_CONFIG_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_MIPS_DEBUG_CONFIG_DISABLE_PROBE_DEBUG_SHIFT 0U +#define ROGUE_CR_MIPS_DEBUG_CONFIG_DISABLE_PROBE_DEBUG_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_MIPS_DEBUG_CONFIG_DISABLE_PROBE_DEBUG_EN 0x00000001U + +/* Register ROGUE_CR_MIPS_EXCEPTION_STATUS */ +#define ROGUE_CR_MIPS_EXCEPTION_STATUS 0x08D0U +#define ROGUE_CR_MIPS_EXCEPTION_STATUS_MASKFULL 0x000000000000003FULL +#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_SLEEP_SHIFT 5U +#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_SLEEP_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_SLEEP_EN 0x00000020U +#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_NMI_TAKEN_SHIFT 4U +#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_NMI_TAKEN_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_NMI_TAKEN_EN 0x00000010U +#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_NEST_EXL_SHIFT 3U +#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_NEST_EXL_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_NEST_EXL_EN 0x00000008U +#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_NEST_ERL_SHIFT 2U +#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_NEST_ERL_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_NEST_ERL_EN 0x00000004U +#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_EXL_SHIFT 1U +#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_EXL_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_EXL_EN 0x00000002U +#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_ERL_SHIFT 0U +#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_ERL_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_ERL_EN 0x00000001U + +/* Register ROGUE_CR_MIPS_WRAPPER_STATUS */ +#define ROGUE_CR_MIPS_WRAPPER_STATUS 0x08E8U +#define ROGUE_CR_MIPS_WRAPPER_STATUS_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_MIPS_WRAPPER_STATUS_OUTSTANDING_REQUESTS_SHIFT 0U +#define ROGUE_CR_MIPS_WRAPPER_STATUS_OUTSTANDING_REQUESTS_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_XPU_BROADCAST */ +#define ROGUE_CR_XPU_BROADCAST 0x0890U +#define ROGUE_CR_XPU_BROADCAST_MASKFULL 0x00000000000001FFULL +#define ROGUE_CR_XPU_BROADCAST_MASK_SHIFT 0U +#define ROGUE_CR_XPU_BROADCAST_MASK_CLRMSK 0xFFFFFE00U + +/* Register ROGUE_CR_META_SP_MSLVDATAX */ +#define ROGUE_CR_META_SP_MSLVDATAX 0x0A00U +#define ROGUE_CR_META_SP_MSLVDATAX_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_META_SP_MSLVDATAX_MSLVDATAX_SHIFT 0U +#define ROGUE_CR_META_SP_MSLVDATAX_MSLVDATAX_CLRMSK 0x00000000U + +/* Register ROGUE_CR_META_SP_MSLVDATAT */ +#define ROGUE_CR_META_SP_MSLVDATAT 0x0A08U +#define ROGUE_CR_META_SP_MSLVDATAT_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_META_SP_MSLVDATAT_MSLVDATAT_SHIFT 0U +#define ROGUE_CR_META_SP_MSLVDATAT_MSLVDATAT_CLRMSK 0x00000000U + +/* Register ROGUE_CR_META_SP_MSLVCTRL0 */ +#define ROGUE_CR_META_SP_MSLVCTRL0 0x0A10U +#define ROGUE_CR_META_SP_MSLVCTRL0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_META_SP_MSLVCTRL0_ADDR_SHIFT 2U +#define ROGUE_CR_META_SP_MSLVCTRL0_ADDR_CLRMSK 0x00000003U +#define ROGUE_CR_META_SP_MSLVCTRL0_AUTOINCR_SHIFT 1U +#define ROGUE_CR_META_SP_MSLVCTRL0_AUTOINCR_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_META_SP_MSLVCTRL0_AUTOINCR_EN 0x00000002U +#define ROGUE_CR_META_SP_MSLVCTRL0_RD_SHIFT 0U +#define ROGUE_CR_META_SP_MSLVCTRL0_RD_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_META_SP_MSLVCTRL0_RD_EN 0x00000001U + +/* Register ROGUE_CR_META_SP_MSLVCTRL1 */ +#define ROGUE_CR_META_SP_MSLVCTRL1 0x0A18U +#define ROGUE_CR_META_SP_MSLVCTRL1_MASKFULL 0x00000000F7F4003FULL +#define ROGUE_CR_META_SP_MSLVCTRL1_DEFERRTHREAD_SHIFT 30U +#define ROGUE_CR_META_SP_MSLVCTRL1_DEFERRTHREAD_CLRMSK 0x3FFFFFFFU +#define ROGUE_CR_META_SP_MSLVCTRL1_LOCK2_INTERLOCK_SHIFT 29U +#define ROGUE_CR_META_SP_MSLVCTRL1_LOCK2_INTERLOCK_CLRMSK 0xDFFFFFFFU +#define ROGUE_CR_META_SP_MSLVCTRL1_LOCK2_INTERLOCK_EN 0x20000000U +#define ROGUE_CR_META_SP_MSLVCTRL1_ATOMIC_INTERLOCK_SHIFT 28U +#define ROGUE_CR_META_SP_MSLVCTRL1_ATOMIC_INTERLOCK_CLRMSK 0xEFFFFFFFU +#define ROGUE_CR_META_SP_MSLVCTRL1_ATOMIC_INTERLOCK_EN 0x10000000U +#define ROGUE_CR_META_SP_MSLVCTRL1_GBLPORT_IDLE_SHIFT 26U +#define ROGUE_CR_META_SP_MSLVCTRL1_GBLPORT_IDLE_CLRMSK 0xFBFFFFFFU +#define ROGUE_CR_META_SP_MSLVCTRL1_GBLPORT_IDLE_EN 0x04000000U +#define ROGUE_CR_META_SP_MSLVCTRL1_COREMEM_IDLE_SHIFT 25U +#define ROGUE_CR_META_SP_MSLVCTRL1_COREMEM_IDLE_CLRMSK 0xFDFFFFFFU +#define ROGUE_CR_META_SP_MSLVCTRL1_COREMEM_IDLE_EN 0x02000000U +#define ROGUE_CR_META_SP_MSLVCTRL1_READY_SHIFT 24U +#define ROGUE_CR_META_SP_MSLVCTRL1_READY_CLRMSK 0xFEFFFFFFU +#define ROGUE_CR_META_SP_MSLVCTRL1_READY_EN 0x01000000U +#define ROGUE_CR_META_SP_MSLVCTRL1_DEFERRID_SHIFT 21U +#define ROGUE_CR_META_SP_MSLVCTRL1_DEFERRID_CLRMSK 0xFF1FFFFFU +#define ROGUE_CR_META_SP_MSLVCTRL1_DEFERR_SHIFT 20U +#define ROGUE_CR_META_SP_MSLVCTRL1_DEFERR_CLRMSK 0xFFEFFFFFU +#define ROGUE_CR_META_SP_MSLVCTRL1_DEFERR_EN 0x00100000U +#define ROGUE_CR_META_SP_MSLVCTRL1_WR_ACTIVE_SHIFT 18U +#define ROGUE_CR_META_SP_MSLVCTRL1_WR_ACTIVE_CLRMSK 0xFFFBFFFFU +#define ROGUE_CR_META_SP_MSLVCTRL1_WR_ACTIVE_EN 0x00040000U +#define ROGUE_CR_META_SP_MSLVCTRL1_THREAD_SHIFT 4U +#define ROGUE_CR_META_SP_MSLVCTRL1_THREAD_CLRMSK 0xFFFFFFCFU +#define ROGUE_CR_META_SP_MSLVCTRL1_TRANS_SIZE_SHIFT 2U +#define ROGUE_CR_META_SP_MSLVCTRL1_TRANS_SIZE_CLRMSK 0xFFFFFFF3U +#define ROGUE_CR_META_SP_MSLVCTRL1_BYTE_ROUND_SHIFT 0U +#define ROGUE_CR_META_SP_MSLVCTRL1_BYTE_ROUND_CLRMSK 0xFFFFFFFCU + +/* Register ROGUE_CR_META_SP_MSLVHANDSHKE */ +#define ROGUE_CR_META_SP_MSLVHANDSHKE 0x0A50U +#define ROGUE_CR_META_SP_MSLVHANDSHKE_MASKFULL 0x000000000000000FULL +#define ROGUE_CR_META_SP_MSLVHANDSHKE_INPUT_SHIFT 2U +#define ROGUE_CR_META_SP_MSLVHANDSHKE_INPUT_CLRMSK 0xFFFFFFF3U +#define ROGUE_CR_META_SP_MSLVHANDSHKE_OUTPUT_SHIFT 0U +#define ROGUE_CR_META_SP_MSLVHANDSHKE_OUTPUT_CLRMSK 0xFFFFFFFCU + +/* Register ROGUE_CR_META_SP_MSLVT0KICK */ +#define ROGUE_CR_META_SP_MSLVT0KICK 0x0A80U +#define ROGUE_CR_META_SP_MSLVT0KICK_MASKFULL 0x000000000000FFFFULL +#define ROGUE_CR_META_SP_MSLVT0KICK_MSLVT0KICK_SHIFT 0U +#define ROGUE_CR_META_SP_MSLVT0KICK_MSLVT0KICK_CLRMSK 0xFFFF0000U + +/* Register ROGUE_CR_META_SP_MSLVT0KICKI */ +#define ROGUE_CR_META_SP_MSLVT0KICKI 0x0A88U +#define ROGUE_CR_META_SP_MSLVT0KICKI_MASKFULL 0x000000000000FFFFULL +#define ROGUE_CR_META_SP_MSLVT0KICKI_MSLVT0KICKI_SHIFT 0U +#define ROGUE_CR_META_SP_MSLVT0KICKI_MSLVT0KICKI_CLRMSK 0xFFFF0000U + +/* Register ROGUE_CR_META_SP_MSLVT1KICK */ +#define ROGUE_CR_META_SP_MSLVT1KICK 0x0A90U +#define ROGUE_CR_META_SP_MSLVT1KICK_MASKFULL 0x000000000000FFFFULL +#define ROGUE_CR_META_SP_MSLVT1KICK_MSLVT1KICK_SHIFT 0U +#define ROGUE_CR_META_SP_MSLVT1KICK_MSLVT1KICK_CLRMSK 0xFFFF0000U + +/* Register ROGUE_CR_META_SP_MSLVT1KICKI */ +#define ROGUE_CR_META_SP_MSLVT1KICKI 0x0A98U +#define ROGUE_CR_META_SP_MSLVT1KICKI_MASKFULL 0x000000000000FFFFULL +#define ROGUE_CR_META_SP_MSLVT1KICKI_MSLVT1KICKI_SHIFT 0U +#define ROGUE_CR_META_SP_MSLVT1KICKI_MSLVT1KICKI_CLRMSK 0xFFFF0000U + +/* Register ROGUE_CR_META_SP_MSLVT2KICK */ +#define ROGUE_CR_META_SP_MSLVT2KICK 0x0AA0U +#define ROGUE_CR_META_SP_MSLVT2KICK_MASKFULL 0x000000000000FFFFULL +#define ROGUE_CR_META_SP_MSLVT2KICK_MSLVT2KICK_SHIFT 0U +#define ROGUE_CR_META_SP_MSLVT2KICK_MSLVT2KICK_CLRMSK 0xFFFF0000U + +/* Register ROGUE_CR_META_SP_MSLVT2KICKI */ +#define ROGUE_CR_META_SP_MSLVT2KICKI 0x0AA8U +#define ROGUE_CR_META_SP_MSLVT2KICKI_MASKFULL 0x000000000000FFFFULL +#define ROGUE_CR_META_SP_MSLVT2KICKI_MSLVT2KICKI_SHIFT 0U +#define ROGUE_CR_META_SP_MSLVT2KICKI_MSLVT2KICKI_CLRMSK 0xFFFF0000U + +/* Register ROGUE_CR_META_SP_MSLVT3KICK */ +#define ROGUE_CR_META_SP_MSLVT3KICK 0x0AB0U +#define ROGUE_CR_META_SP_MSLVT3KICK_MASKFULL 0x000000000000FFFFULL +#define ROGUE_CR_META_SP_MSLVT3KICK_MSLVT3KICK_SHIFT 0U +#define ROGUE_CR_META_SP_MSLVT3KICK_MSLVT3KICK_CLRMSK 0xFFFF0000U + +/* Register ROGUE_CR_META_SP_MSLVT3KICKI */ +#define ROGUE_CR_META_SP_MSLVT3KICKI 0x0AB8U +#define ROGUE_CR_META_SP_MSLVT3KICKI_MASKFULL 0x000000000000FFFFULL +#define ROGUE_CR_META_SP_MSLVT3KICKI_MSLVT3KICKI_SHIFT 0U +#define ROGUE_CR_META_SP_MSLVT3KICKI_MSLVT3KICKI_CLRMSK 0xFFFF0000U + +/* Register ROGUE_CR_META_SP_MSLVRST */ +#define ROGUE_CR_META_SP_MSLVRST 0x0AC0U +#define ROGUE_CR_META_SP_MSLVRST_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_META_SP_MSLVRST_SOFTRESET_SHIFT 0U +#define ROGUE_CR_META_SP_MSLVRST_SOFTRESET_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_META_SP_MSLVRST_SOFTRESET_EN 0x00000001U + +/* Register ROGUE_CR_META_SP_MSLVIRQSTATUS */ +#define ROGUE_CR_META_SP_MSLVIRQSTATUS 0x0AC8U +#define ROGUE_CR_META_SP_MSLVIRQSTATUS_MASKFULL 0x000000000000000CULL +#define ROGUE_CR_META_SP_MSLVIRQSTATUS_TRIGVECT3_SHIFT 3U +#define ROGUE_CR_META_SP_MSLVIRQSTATUS_TRIGVECT3_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_META_SP_MSLVIRQSTATUS_TRIGVECT3_EN 0x00000008U +#define ROGUE_CR_META_SP_MSLVIRQSTATUS_TRIGVECT2_SHIFT 2U +#define ROGUE_CR_META_SP_MSLVIRQSTATUS_TRIGVECT2_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_META_SP_MSLVIRQSTATUS_TRIGVECT2_EN 0x00000004U + +/* Register ROGUE_CR_META_SP_MSLVIRQENABLE */ +#define ROGUE_CR_META_SP_MSLVIRQENABLE 0x0AD0U +#define ROGUE_CR_META_SP_MSLVIRQENABLE_MASKFULL 0x000000000000000CULL +#define ROGUE_CR_META_SP_MSLVIRQENABLE_EVENT1_SHIFT 3U +#define ROGUE_CR_META_SP_MSLVIRQENABLE_EVENT1_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_META_SP_MSLVIRQENABLE_EVENT1_EN 0x00000008U +#define ROGUE_CR_META_SP_MSLVIRQENABLE_EVENT0_SHIFT 2U +#define ROGUE_CR_META_SP_MSLVIRQENABLE_EVENT0_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_META_SP_MSLVIRQENABLE_EVENT0_EN 0x00000004U + +/* Register ROGUE_CR_META_SP_MSLVIRQLEVEL */ +#define ROGUE_CR_META_SP_MSLVIRQLEVEL 0x0AD8U +#define ROGUE_CR_META_SP_MSLVIRQLEVEL_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_META_SP_MSLVIRQLEVEL_MODE_SHIFT 0U +#define ROGUE_CR_META_SP_MSLVIRQLEVEL_MODE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_META_SP_MSLVIRQLEVEL_MODE_EN 0x00000001U + +/* Register ROGUE_CR_MTS_SCHEDULE */ +#define ROGUE_CR_MTS_SCHEDULE 0x0B00U +#define ROGUE_CR_MTS_SCHEDULE_MASKFULL 0x00000000000001FFULL +#define ROGUE_CR_MTS_SCHEDULE_HOST_SHIFT 8U +#define ROGUE_CR_MTS_SCHEDULE_HOST_CLRMSK 0xFFFFFEFFU +#define ROGUE_CR_MTS_SCHEDULE_HOST_BG_TIMER 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE_HOST_HOST 0x00000100U +#define ROGUE_CR_MTS_SCHEDULE_PRIORITY_SHIFT 6U +#define ROGUE_CR_MTS_SCHEDULE_PRIORITY_CLRMSK 0xFFFFFF3FU +#define ROGUE_CR_MTS_SCHEDULE_PRIORITY_PRT0 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE_PRIORITY_PRT1 0x00000040U +#define ROGUE_CR_MTS_SCHEDULE_PRIORITY_PRT2 0x00000080U +#define ROGUE_CR_MTS_SCHEDULE_PRIORITY_PRT3 0x000000C0U +#define ROGUE_CR_MTS_SCHEDULE_CONTEXT_SHIFT 5U +#define ROGUE_CR_MTS_SCHEDULE_CONTEXT_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_MTS_SCHEDULE_CONTEXT_BGCTX 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE_CONTEXT_INTCTX 0x00000020U +#define ROGUE_CR_MTS_SCHEDULE_TASK_SHIFT 4U +#define ROGUE_CR_MTS_SCHEDULE_TASK_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_MTS_SCHEDULE_TASK_NON_COUNTED 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE_TASK_COUNTED 0x00000010U +#define ROGUE_CR_MTS_SCHEDULE_DM_SHIFT 0U +#define ROGUE_CR_MTS_SCHEDULE_DM_CLRMSK 0xFFFFFFF0U +#define ROGUE_CR_MTS_SCHEDULE_DM_DM0 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE_DM_DM1 0x00000001U +#define ROGUE_CR_MTS_SCHEDULE_DM_DM2 0x00000002U +#define ROGUE_CR_MTS_SCHEDULE_DM_DM3 0x00000003U +#define ROGUE_CR_MTS_SCHEDULE_DM_DM4 0x00000004U +#define ROGUE_CR_MTS_SCHEDULE_DM_DM5 0x00000005U +#define ROGUE_CR_MTS_SCHEDULE_DM_DM6 0x00000006U +#define ROGUE_CR_MTS_SCHEDULE_DM_DM7 0x00000007U +#define ROGUE_CR_MTS_SCHEDULE_DM_DM_ALL 0x0000000FU + +/* Register ROGUE_CR_MTS_SCHEDULE1 */ +#define ROGUE_CR_MTS_SCHEDULE1 0x10B00U +#define ROGUE_CR_MTS_SCHEDULE1_MASKFULL 0x00000000000001FFULL +#define ROGUE_CR_MTS_SCHEDULE1_HOST_SHIFT 8U +#define ROGUE_CR_MTS_SCHEDULE1_HOST_CLRMSK 0xFFFFFEFFU +#define ROGUE_CR_MTS_SCHEDULE1_HOST_BG_TIMER 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE1_HOST_HOST 0x00000100U +#define ROGUE_CR_MTS_SCHEDULE1_PRIORITY_SHIFT 6U +#define ROGUE_CR_MTS_SCHEDULE1_PRIORITY_CLRMSK 0xFFFFFF3FU +#define ROGUE_CR_MTS_SCHEDULE1_PRIORITY_PRT0 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE1_PRIORITY_PRT1 0x00000040U +#define ROGUE_CR_MTS_SCHEDULE1_PRIORITY_PRT2 0x00000080U +#define ROGUE_CR_MTS_SCHEDULE1_PRIORITY_PRT3 0x000000C0U +#define ROGUE_CR_MTS_SCHEDULE1_CONTEXT_SHIFT 5U +#define ROGUE_CR_MTS_SCHEDULE1_CONTEXT_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_MTS_SCHEDULE1_CONTEXT_BGCTX 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE1_CONTEXT_INTCTX 0x00000020U +#define ROGUE_CR_MTS_SCHEDULE1_TASK_SHIFT 4U +#define ROGUE_CR_MTS_SCHEDULE1_TASK_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_MTS_SCHEDULE1_TASK_NON_COUNTED 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE1_TASK_COUNTED 0x00000010U +#define ROGUE_CR_MTS_SCHEDULE1_DM_SHIFT 0U +#define ROGUE_CR_MTS_SCHEDULE1_DM_CLRMSK 0xFFFFFFF0U +#define ROGUE_CR_MTS_SCHEDULE1_DM_DM0 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE1_DM_DM1 0x00000001U +#define ROGUE_CR_MTS_SCHEDULE1_DM_DM2 0x00000002U +#define ROGUE_CR_MTS_SCHEDULE1_DM_DM3 0x00000003U +#define ROGUE_CR_MTS_SCHEDULE1_DM_DM4 0x00000004U +#define ROGUE_CR_MTS_SCHEDULE1_DM_DM5 0x00000005U +#define ROGUE_CR_MTS_SCHEDULE1_DM_DM6 0x00000006U +#define ROGUE_CR_MTS_SCHEDULE1_DM_DM7 0x00000007U +#define ROGUE_CR_MTS_SCHEDULE1_DM_DM_ALL 0x0000000FU + +/* Register ROGUE_CR_MTS_SCHEDULE2 */ +#define ROGUE_CR_MTS_SCHEDULE2 0x20B00U +#define ROGUE_CR_MTS_SCHEDULE2_MASKFULL 0x00000000000001FFULL +#define ROGUE_CR_MTS_SCHEDULE2_HOST_SHIFT 8U +#define ROGUE_CR_MTS_SCHEDULE2_HOST_CLRMSK 0xFFFFFEFFU +#define ROGUE_CR_MTS_SCHEDULE2_HOST_BG_TIMER 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE2_HOST_HOST 0x00000100U +#define ROGUE_CR_MTS_SCHEDULE2_PRIORITY_SHIFT 6U +#define ROGUE_CR_MTS_SCHEDULE2_PRIORITY_CLRMSK 0xFFFFFF3FU +#define ROGUE_CR_MTS_SCHEDULE2_PRIORITY_PRT0 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE2_PRIORITY_PRT1 0x00000040U +#define ROGUE_CR_MTS_SCHEDULE2_PRIORITY_PRT2 0x00000080U +#define ROGUE_CR_MTS_SCHEDULE2_PRIORITY_PRT3 0x000000C0U +#define ROGUE_CR_MTS_SCHEDULE2_CONTEXT_SHIFT 5U +#define ROGUE_CR_MTS_SCHEDULE2_CONTEXT_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_MTS_SCHEDULE2_CONTEXT_BGCTX 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE2_CONTEXT_INTCTX 0x00000020U +#define ROGUE_CR_MTS_SCHEDULE2_TASK_SHIFT 4U +#define ROGUE_CR_MTS_SCHEDULE2_TASK_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_MTS_SCHEDULE2_TASK_NON_COUNTED 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE2_TASK_COUNTED 0x00000010U +#define ROGUE_CR_MTS_SCHEDULE2_DM_SHIFT 0U +#define ROGUE_CR_MTS_SCHEDULE2_DM_CLRMSK 0xFFFFFFF0U +#define ROGUE_CR_MTS_SCHEDULE2_DM_DM0 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE2_DM_DM1 0x00000001U +#define ROGUE_CR_MTS_SCHEDULE2_DM_DM2 0x00000002U +#define ROGUE_CR_MTS_SCHEDULE2_DM_DM3 0x00000003U +#define ROGUE_CR_MTS_SCHEDULE2_DM_DM4 0x00000004U +#define ROGUE_CR_MTS_SCHEDULE2_DM_DM5 0x00000005U +#define ROGUE_CR_MTS_SCHEDULE2_DM_DM6 0x00000006U +#define ROGUE_CR_MTS_SCHEDULE2_DM_DM7 0x00000007U +#define ROGUE_CR_MTS_SCHEDULE2_DM_DM_ALL 0x0000000FU + +/* Register ROGUE_CR_MTS_SCHEDULE3 */ +#define ROGUE_CR_MTS_SCHEDULE3 0x30B00U +#define ROGUE_CR_MTS_SCHEDULE3_MASKFULL 0x00000000000001FFULL +#define ROGUE_CR_MTS_SCHEDULE3_HOST_SHIFT 8U +#define ROGUE_CR_MTS_SCHEDULE3_HOST_CLRMSK 0xFFFFFEFFU +#define ROGUE_CR_MTS_SCHEDULE3_HOST_BG_TIMER 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE3_HOST_HOST 0x00000100U +#define ROGUE_CR_MTS_SCHEDULE3_PRIORITY_SHIFT 6U +#define ROGUE_CR_MTS_SCHEDULE3_PRIORITY_CLRMSK 0xFFFFFF3FU +#define ROGUE_CR_MTS_SCHEDULE3_PRIORITY_PRT0 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE3_PRIORITY_PRT1 0x00000040U +#define ROGUE_CR_MTS_SCHEDULE3_PRIORITY_PRT2 0x00000080U +#define ROGUE_CR_MTS_SCHEDULE3_PRIORITY_PRT3 0x000000C0U +#define ROGUE_CR_MTS_SCHEDULE3_CONTEXT_SHIFT 5U +#define ROGUE_CR_MTS_SCHEDULE3_CONTEXT_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_MTS_SCHEDULE3_CONTEXT_BGCTX 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE3_CONTEXT_INTCTX 0x00000020U +#define ROGUE_CR_MTS_SCHEDULE3_TASK_SHIFT 4U +#define ROGUE_CR_MTS_SCHEDULE3_TASK_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_MTS_SCHEDULE3_TASK_NON_COUNTED 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE3_TASK_COUNTED 0x00000010U +#define ROGUE_CR_MTS_SCHEDULE3_DM_SHIFT 0U +#define ROGUE_CR_MTS_SCHEDULE3_DM_CLRMSK 0xFFFFFFF0U +#define ROGUE_CR_MTS_SCHEDULE3_DM_DM0 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE3_DM_DM1 0x00000001U +#define ROGUE_CR_MTS_SCHEDULE3_DM_DM2 0x00000002U +#define ROGUE_CR_MTS_SCHEDULE3_DM_DM3 0x00000003U +#define ROGUE_CR_MTS_SCHEDULE3_DM_DM4 0x00000004U +#define ROGUE_CR_MTS_SCHEDULE3_DM_DM5 0x00000005U +#define ROGUE_CR_MTS_SCHEDULE3_DM_DM6 0x00000006U +#define ROGUE_CR_MTS_SCHEDULE3_DM_DM7 0x00000007U +#define ROGUE_CR_MTS_SCHEDULE3_DM_DM_ALL 0x0000000FU + +/* Register ROGUE_CR_MTS_SCHEDULE4 */ +#define ROGUE_CR_MTS_SCHEDULE4 0x40B00U +#define ROGUE_CR_MTS_SCHEDULE4_MASKFULL 0x00000000000001FFULL +#define ROGUE_CR_MTS_SCHEDULE4_HOST_SHIFT 8U +#define ROGUE_CR_MTS_SCHEDULE4_HOST_CLRMSK 0xFFFFFEFFU +#define ROGUE_CR_MTS_SCHEDULE4_HOST_BG_TIMER 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE4_HOST_HOST 0x00000100U +#define ROGUE_CR_MTS_SCHEDULE4_PRIORITY_SHIFT 6U +#define ROGUE_CR_MTS_SCHEDULE4_PRIORITY_CLRMSK 0xFFFFFF3FU +#define ROGUE_CR_MTS_SCHEDULE4_PRIORITY_PRT0 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE4_PRIORITY_PRT1 0x00000040U +#define ROGUE_CR_MTS_SCHEDULE4_PRIORITY_PRT2 0x00000080U +#define ROGUE_CR_MTS_SCHEDULE4_PRIORITY_PRT3 0x000000C0U +#define ROGUE_CR_MTS_SCHEDULE4_CONTEXT_SHIFT 5U +#define ROGUE_CR_MTS_SCHEDULE4_CONTEXT_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_MTS_SCHEDULE4_CONTEXT_BGCTX 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE4_CONTEXT_INTCTX 0x00000020U +#define ROGUE_CR_MTS_SCHEDULE4_TASK_SHIFT 4U +#define ROGUE_CR_MTS_SCHEDULE4_TASK_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_MTS_SCHEDULE4_TASK_NON_COUNTED 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE4_TASK_COUNTED 0x00000010U +#define ROGUE_CR_MTS_SCHEDULE4_DM_SHIFT 0U +#define ROGUE_CR_MTS_SCHEDULE4_DM_CLRMSK 0xFFFFFFF0U +#define ROGUE_CR_MTS_SCHEDULE4_DM_DM0 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE4_DM_DM1 0x00000001U +#define ROGUE_CR_MTS_SCHEDULE4_DM_DM2 0x00000002U +#define ROGUE_CR_MTS_SCHEDULE4_DM_DM3 0x00000003U +#define ROGUE_CR_MTS_SCHEDULE4_DM_DM4 0x00000004U +#define ROGUE_CR_MTS_SCHEDULE4_DM_DM5 0x00000005U +#define ROGUE_CR_MTS_SCHEDULE4_DM_DM6 0x00000006U +#define ROGUE_CR_MTS_SCHEDULE4_DM_DM7 0x00000007U +#define ROGUE_CR_MTS_SCHEDULE4_DM_DM_ALL 0x0000000FU + +/* Register ROGUE_CR_MTS_SCHEDULE5 */ +#define ROGUE_CR_MTS_SCHEDULE5 0x50B00U +#define ROGUE_CR_MTS_SCHEDULE5_MASKFULL 0x00000000000001FFULL +#define ROGUE_CR_MTS_SCHEDULE5_HOST_SHIFT 8U +#define ROGUE_CR_MTS_SCHEDULE5_HOST_CLRMSK 0xFFFFFEFFU +#define ROGUE_CR_MTS_SCHEDULE5_HOST_BG_TIMER 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE5_HOST_HOST 0x00000100U +#define ROGUE_CR_MTS_SCHEDULE5_PRIORITY_SHIFT 6U +#define ROGUE_CR_MTS_SCHEDULE5_PRIORITY_CLRMSK 0xFFFFFF3FU +#define ROGUE_CR_MTS_SCHEDULE5_PRIORITY_PRT0 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE5_PRIORITY_PRT1 0x00000040U +#define ROGUE_CR_MTS_SCHEDULE5_PRIORITY_PRT2 0x00000080U +#define ROGUE_CR_MTS_SCHEDULE5_PRIORITY_PRT3 0x000000C0U +#define ROGUE_CR_MTS_SCHEDULE5_CONTEXT_SHIFT 5U +#define ROGUE_CR_MTS_SCHEDULE5_CONTEXT_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_MTS_SCHEDULE5_CONTEXT_BGCTX 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE5_CONTEXT_INTCTX 0x00000020U +#define ROGUE_CR_MTS_SCHEDULE5_TASK_SHIFT 4U +#define ROGUE_CR_MTS_SCHEDULE5_TASK_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_MTS_SCHEDULE5_TASK_NON_COUNTED 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE5_TASK_COUNTED 0x00000010U +#define ROGUE_CR_MTS_SCHEDULE5_DM_SHIFT 0U +#define ROGUE_CR_MTS_SCHEDULE5_DM_CLRMSK 0xFFFFFFF0U +#define ROGUE_CR_MTS_SCHEDULE5_DM_DM0 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE5_DM_DM1 0x00000001U +#define ROGUE_CR_MTS_SCHEDULE5_DM_DM2 0x00000002U +#define ROGUE_CR_MTS_SCHEDULE5_DM_DM3 0x00000003U +#define ROGUE_CR_MTS_SCHEDULE5_DM_DM4 0x00000004U +#define ROGUE_CR_MTS_SCHEDULE5_DM_DM5 0x00000005U +#define ROGUE_CR_MTS_SCHEDULE5_DM_DM6 0x00000006U +#define ROGUE_CR_MTS_SCHEDULE5_DM_DM7 0x00000007U +#define ROGUE_CR_MTS_SCHEDULE5_DM_DM_ALL 0x0000000FU + +/* Register ROGUE_CR_MTS_SCHEDULE6 */ +#define ROGUE_CR_MTS_SCHEDULE6 0x60B00U +#define ROGUE_CR_MTS_SCHEDULE6_MASKFULL 0x00000000000001FFULL +#define ROGUE_CR_MTS_SCHEDULE6_HOST_SHIFT 8U +#define ROGUE_CR_MTS_SCHEDULE6_HOST_CLRMSK 0xFFFFFEFFU +#define ROGUE_CR_MTS_SCHEDULE6_HOST_BG_TIMER 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE6_HOST_HOST 0x00000100U +#define ROGUE_CR_MTS_SCHEDULE6_PRIORITY_SHIFT 6U +#define ROGUE_CR_MTS_SCHEDULE6_PRIORITY_CLRMSK 0xFFFFFF3FU +#define ROGUE_CR_MTS_SCHEDULE6_PRIORITY_PRT0 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE6_PRIORITY_PRT1 0x00000040U +#define ROGUE_CR_MTS_SCHEDULE6_PRIORITY_PRT2 0x00000080U +#define ROGUE_CR_MTS_SCHEDULE6_PRIORITY_PRT3 0x000000C0U +#define ROGUE_CR_MTS_SCHEDULE6_CONTEXT_SHIFT 5U +#define ROGUE_CR_MTS_SCHEDULE6_CONTEXT_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_MTS_SCHEDULE6_CONTEXT_BGCTX 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE6_CONTEXT_INTCTX 0x00000020U +#define ROGUE_CR_MTS_SCHEDULE6_TASK_SHIFT 4U +#define ROGUE_CR_MTS_SCHEDULE6_TASK_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_MTS_SCHEDULE6_TASK_NON_COUNTED 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE6_TASK_COUNTED 0x00000010U +#define ROGUE_CR_MTS_SCHEDULE6_DM_SHIFT 0U +#define ROGUE_CR_MTS_SCHEDULE6_DM_CLRMSK 0xFFFFFFF0U +#define ROGUE_CR_MTS_SCHEDULE6_DM_DM0 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE6_DM_DM1 0x00000001U +#define ROGUE_CR_MTS_SCHEDULE6_DM_DM2 0x00000002U +#define ROGUE_CR_MTS_SCHEDULE6_DM_DM3 0x00000003U +#define ROGUE_CR_MTS_SCHEDULE6_DM_DM4 0x00000004U +#define ROGUE_CR_MTS_SCHEDULE6_DM_DM5 0x00000005U +#define ROGUE_CR_MTS_SCHEDULE6_DM_DM6 0x00000006U +#define ROGUE_CR_MTS_SCHEDULE6_DM_DM7 0x00000007U +#define ROGUE_CR_MTS_SCHEDULE6_DM_DM_ALL 0x0000000FU + +/* Register ROGUE_CR_MTS_SCHEDULE7 */ +#define ROGUE_CR_MTS_SCHEDULE7 0x70B00U +#define ROGUE_CR_MTS_SCHEDULE7_MASKFULL 0x00000000000001FFULL +#define ROGUE_CR_MTS_SCHEDULE7_HOST_SHIFT 8U +#define ROGUE_CR_MTS_SCHEDULE7_HOST_CLRMSK 0xFFFFFEFFU +#define ROGUE_CR_MTS_SCHEDULE7_HOST_BG_TIMER 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE7_HOST_HOST 0x00000100U +#define ROGUE_CR_MTS_SCHEDULE7_PRIORITY_SHIFT 6U +#define ROGUE_CR_MTS_SCHEDULE7_PRIORITY_CLRMSK 0xFFFFFF3FU +#define ROGUE_CR_MTS_SCHEDULE7_PRIORITY_PRT0 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE7_PRIORITY_PRT1 0x00000040U +#define ROGUE_CR_MTS_SCHEDULE7_PRIORITY_PRT2 0x00000080U +#define ROGUE_CR_MTS_SCHEDULE7_PRIORITY_PRT3 0x000000C0U +#define ROGUE_CR_MTS_SCHEDULE7_CONTEXT_SHIFT 5U +#define ROGUE_CR_MTS_SCHEDULE7_CONTEXT_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_MTS_SCHEDULE7_CONTEXT_BGCTX 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE7_CONTEXT_INTCTX 0x00000020U +#define ROGUE_CR_MTS_SCHEDULE7_TASK_SHIFT 4U +#define ROGUE_CR_MTS_SCHEDULE7_TASK_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_MTS_SCHEDULE7_TASK_NON_COUNTED 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE7_TASK_COUNTED 0x00000010U +#define ROGUE_CR_MTS_SCHEDULE7_DM_SHIFT 0U +#define ROGUE_CR_MTS_SCHEDULE7_DM_CLRMSK 0xFFFFFFF0U +#define ROGUE_CR_MTS_SCHEDULE7_DM_DM0 0x00000000U +#define ROGUE_CR_MTS_SCHEDULE7_DM_DM1 0x00000001U +#define ROGUE_CR_MTS_SCHEDULE7_DM_DM2 0x00000002U +#define ROGUE_CR_MTS_SCHEDULE7_DM_DM3 0x00000003U +#define ROGUE_CR_MTS_SCHEDULE7_DM_DM4 0x00000004U +#define ROGUE_CR_MTS_SCHEDULE7_DM_DM5 0x00000005U +#define ROGUE_CR_MTS_SCHEDULE7_DM_DM6 0x00000006U +#define ROGUE_CR_MTS_SCHEDULE7_DM_DM7 0x00000007U +#define ROGUE_CR_MTS_SCHEDULE7_DM_DM_ALL 0x0000000FU + +/* Register ROGUE_CR_MTS_BGCTX_THREAD0_DM_ASSOC */ +#define ROGUE_CR_MTS_BGCTX_THREAD0_DM_ASSOC 0x0B30U +#define ROGUE_CR_MTS_BGCTX_THREAD0_DM_ASSOC_MASKFULL 0x000000000000FFFFULL +#define ROGUE_CR_MTS_BGCTX_THREAD0_DM_ASSOC_DM_ASSOC_SHIFT 0U +#define ROGUE_CR_MTS_BGCTX_THREAD0_DM_ASSOC_DM_ASSOC_CLRMSK 0xFFFF0000U + +/* Register ROGUE_CR_MTS_BGCTX_THREAD1_DM_ASSOC */ +#define ROGUE_CR_MTS_BGCTX_THREAD1_DM_ASSOC 0x0B38U +#define ROGUE_CR_MTS_BGCTX_THREAD1_DM_ASSOC_MASKFULL 0x000000000000FFFFULL +#define ROGUE_CR_MTS_BGCTX_THREAD1_DM_ASSOC_DM_ASSOC_SHIFT 0U +#define ROGUE_CR_MTS_BGCTX_THREAD1_DM_ASSOC_DM_ASSOC_CLRMSK 0xFFFF0000U + +/* Register ROGUE_CR_MTS_INTCTX_THREAD0_DM_ASSOC */ +#define ROGUE_CR_MTS_INTCTX_THREAD0_DM_ASSOC 0x0B40U +#define ROGUE_CR_MTS_INTCTX_THREAD0_DM_ASSOC_MASKFULL 0x000000000000FFFFULL +#define ROGUE_CR_MTS_INTCTX_THREAD0_DM_ASSOC_DM_ASSOC_SHIFT 0U +#define ROGUE_CR_MTS_INTCTX_THREAD0_DM_ASSOC_DM_ASSOC_CLRMSK 0xFFFF0000U + +/* Register ROGUE_CR_MTS_INTCTX_THREAD1_DM_ASSOC */ +#define ROGUE_CR_MTS_INTCTX_THREAD1_DM_ASSOC 0x0B48U +#define ROGUE_CR_MTS_INTCTX_THREAD1_DM_ASSOC_MASKFULL 0x000000000000FFFFULL +#define ROGUE_CR_MTS_INTCTX_THREAD1_DM_ASSOC_DM_ASSOC_SHIFT 0U +#define ROGUE_CR_MTS_INTCTX_THREAD1_DM_ASSOC_DM_ASSOC_CLRMSK 0xFFFF0000U + +/* Register ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG */ +#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG 0x0B50U +#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG__S7_TOP__MASKFULL 0x000FF0FFFFFFF701ULL +#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_MASKFULL 0x0000FFFFFFFFF001ULL +#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_PC_BASE_SHIFT 44U +#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_PC_BASE_CLRMSK 0xFFFF0FFFFFFFFFFFULL +#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG__S7_TOP__FENCE_PC_BASE_SHIFT 44U +#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG__S7_TOP__FENCE_PC_BASE_CLRMSK 0xFFF00FFFFFFFFFFFULL +#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_DM_SHIFT 40U +#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_DM_CLRMSK 0xFFFFF0FFFFFFFFFFULL +#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_ADDR_SHIFT 12U +#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_PERSISTENCE_SHIFT 9U +#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_PERSISTENCE_CLRMSK 0xFFFFFFFFFFFFF9FFULL +#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_SLC_COHERENT_SHIFT 8U +#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_SLC_COHERENT_CLRMSK 0xFFFFFFFFFFFFFEFFULL +#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_SLC_COHERENT_EN 0x0000000000000100ULL +#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_IDLE_CTRL_SHIFT 0U +#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_IDLE_CTRL_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_IDLE_CTRL_META 0x0000000000000000ULL +#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_IDLE_CTRL_MTS 0x0000000000000001ULL + +/* Register ROGUE_CR_MTS_DM0_INTERRUPT_ENABLE */ +#define ROGUE_CR_MTS_DM0_INTERRUPT_ENABLE 0x0B58U +#define ROGUE_CR_MTS_DM0_INTERRUPT_ENABLE_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_MTS_DM0_INTERRUPT_ENABLE_INT_ENABLE_SHIFT 0U +#define ROGUE_CR_MTS_DM0_INTERRUPT_ENABLE_INT_ENABLE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_MTS_DM1_INTERRUPT_ENABLE */ +#define ROGUE_CR_MTS_DM1_INTERRUPT_ENABLE 0x0B60U +#define ROGUE_CR_MTS_DM1_INTERRUPT_ENABLE_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_MTS_DM1_INTERRUPT_ENABLE_INT_ENABLE_SHIFT 0U +#define ROGUE_CR_MTS_DM1_INTERRUPT_ENABLE_INT_ENABLE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_MTS_DM2_INTERRUPT_ENABLE */ +#define ROGUE_CR_MTS_DM2_INTERRUPT_ENABLE 0x0B68U +#define ROGUE_CR_MTS_DM2_INTERRUPT_ENABLE_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_MTS_DM2_INTERRUPT_ENABLE_INT_ENABLE_SHIFT 0U +#define ROGUE_CR_MTS_DM2_INTERRUPT_ENABLE_INT_ENABLE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_MTS_DM3_INTERRUPT_ENABLE */ +#define ROGUE_CR_MTS_DM3_INTERRUPT_ENABLE 0x0B70U +#define ROGUE_CR_MTS_DM3_INTERRUPT_ENABLE_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_MTS_DM3_INTERRUPT_ENABLE_INT_ENABLE_SHIFT 0U +#define ROGUE_CR_MTS_DM3_INTERRUPT_ENABLE_INT_ENABLE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_MTS_DM4_INTERRUPT_ENABLE */ +#define ROGUE_CR_MTS_DM4_INTERRUPT_ENABLE 0x0B78U +#define ROGUE_CR_MTS_DM4_INTERRUPT_ENABLE_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_MTS_DM4_INTERRUPT_ENABLE_INT_ENABLE_SHIFT 0U +#define ROGUE_CR_MTS_DM4_INTERRUPT_ENABLE_INT_ENABLE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_MTS_DM5_INTERRUPT_ENABLE */ +#define ROGUE_CR_MTS_DM5_INTERRUPT_ENABLE 0x0B80U +#define ROGUE_CR_MTS_DM5_INTERRUPT_ENABLE_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_MTS_DM5_INTERRUPT_ENABLE_INT_ENABLE_SHIFT 0U +#define ROGUE_CR_MTS_DM5_INTERRUPT_ENABLE_INT_ENABLE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_MTS_INTCTX */ +#define ROGUE_CR_MTS_INTCTX 0x0B98U +#define ROGUE_CR_MTS_INTCTX_MASKFULL 0x000000003FFFFFFFULL +#define ROGUE_CR_MTS_INTCTX_DM_HOST_SCHEDULE_SHIFT 22U +#define ROGUE_CR_MTS_INTCTX_DM_HOST_SCHEDULE_CLRMSK 0xC03FFFFFU +#define ROGUE_CR_MTS_INTCTX_DM_PTR_SHIFT 18U +#define ROGUE_CR_MTS_INTCTX_DM_PTR_CLRMSK 0xFFC3FFFFU +#define ROGUE_CR_MTS_INTCTX_THREAD_ACTIVE_SHIFT 16U +#define ROGUE_CR_MTS_INTCTX_THREAD_ACTIVE_CLRMSK 0xFFFCFFFFU +#define ROGUE_CR_MTS_INTCTX_DM_TIMER_SCHEDULE_SHIFT 8U +#define ROGUE_CR_MTS_INTCTX_DM_TIMER_SCHEDULE_CLRMSK 0xFFFF00FFU +#define ROGUE_CR_MTS_INTCTX_DM_INTERRUPT_SCHEDULE_SHIFT 0U +#define ROGUE_CR_MTS_INTCTX_DM_INTERRUPT_SCHEDULE_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_MTS_BGCTX */ +#define ROGUE_CR_MTS_BGCTX 0x0BA0U +#define ROGUE_CR_MTS_BGCTX_MASKFULL 0x0000000000003FFFULL +#define ROGUE_CR_MTS_BGCTX_DM_PTR_SHIFT 10U +#define ROGUE_CR_MTS_BGCTX_DM_PTR_CLRMSK 0xFFFFC3FFU +#define ROGUE_CR_MTS_BGCTX_THREAD_ACTIVE_SHIFT 8U +#define ROGUE_CR_MTS_BGCTX_THREAD_ACTIVE_CLRMSK 0xFFFFFCFFU +#define ROGUE_CR_MTS_BGCTX_DM_NONCOUNTED_SCHEDULE_SHIFT 0U +#define ROGUE_CR_MTS_BGCTX_DM_NONCOUNTED_SCHEDULE_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE */ +#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE 0x0BA8U +#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_MASKFULL 0xFFFFFFFFFFFFFFFFULL +#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM7_SHIFT 56U +#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM7_CLRMSK 0x00FFFFFFFFFFFFFFULL +#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM6_SHIFT 48U +#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM6_CLRMSK 0xFF00FFFFFFFFFFFFULL +#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM5_SHIFT 40U +#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM5_CLRMSK 0xFFFF00FFFFFFFFFFULL +#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM4_SHIFT 32U +#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM4_CLRMSK 0xFFFFFF00FFFFFFFFULL +#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM3_SHIFT 24U +#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM3_CLRMSK 0xFFFFFFFF00FFFFFFULL +#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM2_SHIFT 16U +#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM2_CLRMSK 0xFFFFFFFFFF00FFFFULL +#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM1_SHIFT 8U +#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM1_CLRMSK 0xFFFFFFFFFFFF00FFULL +#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM0_SHIFT 0U +#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM0_CLRMSK 0xFFFFFFFFFFFFFF00ULL + +/* Register ROGUE_CR_MTS_GPU_INT_STATUS */ +#define ROGUE_CR_MTS_GPU_INT_STATUS 0x0BB0U +#define ROGUE_CR_MTS_GPU_INT_STATUS_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_MTS_GPU_INT_STATUS_STATUS_SHIFT 0U +#define ROGUE_CR_MTS_GPU_INT_STATUS_STATUS_CLRMSK 0x00000000U + +/* Register ROGUE_CR_MTS_SCHEDULE_ENABLE */ +#define ROGUE_CR_MTS_SCHEDULE_ENABLE 0x0BC8U +#define ROGUE_CR_MTS_SCHEDULE_ENABLE_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_MTS_SCHEDULE_ENABLE_MASK_SHIFT 0U +#define ROGUE_CR_MTS_SCHEDULE_ENABLE_MASK_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_IRQ_OS0_EVENT_STATUS */ +#define ROGUE_CR_IRQ_OS0_EVENT_STATUS 0x0BD8U +#define ROGUE_CR_IRQ_OS0_EVENT_STATUS_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_IRQ_OS0_EVENT_STATUS_SOURCE_SHIFT 0U +#define ROGUE_CR_IRQ_OS0_EVENT_STATUS_SOURCE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_IRQ_OS0_EVENT_STATUS_SOURCE_EN 0x00000001U + +/* Register ROGUE_CR_IRQ_OS0_EVENT_CLEAR */ +#define ROGUE_CR_IRQ_OS0_EVENT_CLEAR 0x0BE8U +#define ROGUE_CR_IRQ_OS0_EVENT_CLEAR_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_IRQ_OS0_EVENT_CLEAR_SOURCE_SHIFT 0U +#define ROGUE_CR_IRQ_OS0_EVENT_CLEAR_SOURCE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_IRQ_OS0_EVENT_CLEAR_SOURCE_EN 0x00000001U + +/* Register ROGUE_CR_IRQ_OS1_EVENT_STATUS */ +#define ROGUE_CR_IRQ_OS1_EVENT_STATUS 0x10BD8U +#define ROGUE_CR_IRQ_OS1_EVENT_STATUS_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_IRQ_OS1_EVENT_STATUS_SOURCE_SHIFT 0U +#define ROGUE_CR_IRQ_OS1_EVENT_STATUS_SOURCE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_IRQ_OS1_EVENT_STATUS_SOURCE_EN 0x00000001U + +/* Register ROGUE_CR_IRQ_OS1_EVENT_CLEAR */ +#define ROGUE_CR_IRQ_OS1_EVENT_CLEAR 0x10BE8U +#define ROGUE_CR_IRQ_OS1_EVENT_CLEAR_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_IRQ_OS1_EVENT_CLEAR_SOURCE_SHIFT 0U +#define ROGUE_CR_IRQ_OS1_EVENT_CLEAR_SOURCE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_IRQ_OS1_EVENT_CLEAR_SOURCE_EN 0x00000001U + +/* Register ROGUE_CR_IRQ_OS2_EVENT_STATUS */ +#define ROGUE_CR_IRQ_OS2_EVENT_STATUS 0x20BD8U +#define ROGUE_CR_IRQ_OS2_EVENT_STATUS_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_IRQ_OS2_EVENT_STATUS_SOURCE_SHIFT 0U +#define ROGUE_CR_IRQ_OS2_EVENT_STATUS_SOURCE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_IRQ_OS2_EVENT_STATUS_SOURCE_EN 0x00000001U + +/* Register ROGUE_CR_IRQ_OS2_EVENT_CLEAR */ +#define ROGUE_CR_IRQ_OS2_EVENT_CLEAR 0x20BE8U +#define ROGUE_CR_IRQ_OS2_EVENT_CLEAR_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_IRQ_OS2_EVENT_CLEAR_SOURCE_SHIFT 0U +#define ROGUE_CR_IRQ_OS2_EVENT_CLEAR_SOURCE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_IRQ_OS2_EVENT_CLEAR_SOURCE_EN 0x00000001U + +/* Register ROGUE_CR_IRQ_OS3_EVENT_STATUS */ +#define ROGUE_CR_IRQ_OS3_EVENT_STATUS 0x30BD8U +#define ROGUE_CR_IRQ_OS3_EVENT_STATUS_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_IRQ_OS3_EVENT_STATUS_SOURCE_SHIFT 0U +#define ROGUE_CR_IRQ_OS3_EVENT_STATUS_SOURCE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_IRQ_OS3_EVENT_STATUS_SOURCE_EN 0x00000001U + +/* Register ROGUE_CR_IRQ_OS3_EVENT_CLEAR */ +#define ROGUE_CR_IRQ_OS3_EVENT_CLEAR 0x30BE8U +#define ROGUE_CR_IRQ_OS3_EVENT_CLEAR_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_IRQ_OS3_EVENT_CLEAR_SOURCE_SHIFT 0U +#define ROGUE_CR_IRQ_OS3_EVENT_CLEAR_SOURCE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_IRQ_OS3_EVENT_CLEAR_SOURCE_EN 0x00000001U + +/* Register ROGUE_CR_IRQ_OS4_EVENT_STATUS */ +#define ROGUE_CR_IRQ_OS4_EVENT_STATUS 0x40BD8U +#define ROGUE_CR_IRQ_OS4_EVENT_STATUS_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_IRQ_OS4_EVENT_STATUS_SOURCE_SHIFT 0U +#define ROGUE_CR_IRQ_OS4_EVENT_STATUS_SOURCE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_IRQ_OS4_EVENT_STATUS_SOURCE_EN 0x00000001U + +/* Register ROGUE_CR_IRQ_OS4_EVENT_CLEAR */ +#define ROGUE_CR_IRQ_OS4_EVENT_CLEAR 0x40BE8U +#define ROGUE_CR_IRQ_OS4_EVENT_CLEAR_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_IRQ_OS4_EVENT_CLEAR_SOURCE_SHIFT 0U +#define ROGUE_CR_IRQ_OS4_EVENT_CLEAR_SOURCE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_IRQ_OS4_EVENT_CLEAR_SOURCE_EN 0x00000001U + +/* Register ROGUE_CR_IRQ_OS5_EVENT_STATUS */ +#define ROGUE_CR_IRQ_OS5_EVENT_STATUS 0x50BD8U +#define ROGUE_CR_IRQ_OS5_EVENT_STATUS_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_IRQ_OS5_EVENT_STATUS_SOURCE_SHIFT 0U +#define ROGUE_CR_IRQ_OS5_EVENT_STATUS_SOURCE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_IRQ_OS5_EVENT_STATUS_SOURCE_EN 0x00000001U + +/* Register ROGUE_CR_IRQ_OS5_EVENT_CLEAR */ +#define ROGUE_CR_IRQ_OS5_EVENT_CLEAR 0x50BE8U +#define ROGUE_CR_IRQ_OS5_EVENT_CLEAR_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_IRQ_OS5_EVENT_CLEAR_SOURCE_SHIFT 0U +#define ROGUE_CR_IRQ_OS5_EVENT_CLEAR_SOURCE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_IRQ_OS5_EVENT_CLEAR_SOURCE_EN 0x00000001U + +/* Register ROGUE_CR_IRQ_OS6_EVENT_STATUS */ +#define ROGUE_CR_IRQ_OS6_EVENT_STATUS 0x60BD8U +#define ROGUE_CR_IRQ_OS6_EVENT_STATUS_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_IRQ_OS6_EVENT_STATUS_SOURCE_SHIFT 0U +#define ROGUE_CR_IRQ_OS6_EVENT_STATUS_SOURCE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_IRQ_OS6_EVENT_STATUS_SOURCE_EN 0x00000001U + +/* Register ROGUE_CR_IRQ_OS6_EVENT_CLEAR */ +#define ROGUE_CR_IRQ_OS6_EVENT_CLEAR 0x60BE8U +#define ROGUE_CR_IRQ_OS6_EVENT_CLEAR_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_IRQ_OS6_EVENT_CLEAR_SOURCE_SHIFT 0U +#define ROGUE_CR_IRQ_OS6_EVENT_CLEAR_SOURCE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_IRQ_OS6_EVENT_CLEAR_SOURCE_EN 0x00000001U + +/* Register ROGUE_CR_IRQ_OS7_EVENT_STATUS */ +#define ROGUE_CR_IRQ_OS7_EVENT_STATUS 0x70BD8U +#define ROGUE_CR_IRQ_OS7_EVENT_STATUS_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_IRQ_OS7_EVENT_STATUS_SOURCE_SHIFT 0U +#define ROGUE_CR_IRQ_OS7_EVENT_STATUS_SOURCE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_IRQ_OS7_EVENT_STATUS_SOURCE_EN 0x00000001U + +/* Register ROGUE_CR_IRQ_OS7_EVENT_CLEAR */ +#define ROGUE_CR_IRQ_OS7_EVENT_CLEAR 0x70BE8U +#define ROGUE_CR_IRQ_OS7_EVENT_CLEAR_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_IRQ_OS7_EVENT_CLEAR_SOURCE_SHIFT 0U +#define ROGUE_CR_IRQ_OS7_EVENT_CLEAR_SOURCE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_IRQ_OS7_EVENT_CLEAR_SOURCE_EN 0x00000001U + +/* Register ROGUE_CR_META_BOOT */ +#define ROGUE_CR_META_BOOT 0x0BF8U +#define ROGUE_CR_META_BOOT_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_META_BOOT_MODE_SHIFT 0U +#define ROGUE_CR_META_BOOT_MODE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_META_BOOT_MODE_EN 0x00000001U + +/* Register ROGUE_CR_GARTEN_SLC */ +#define ROGUE_CR_GARTEN_SLC 0x0BB8U +#define ROGUE_CR_GARTEN_SLC_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_GARTEN_SLC_FORCE_COHERENCY_SHIFT 0U +#define ROGUE_CR_GARTEN_SLC_FORCE_COHERENCY_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_GARTEN_SLC_FORCE_COHERENCY_EN 0x00000001U + +/* Register ROGUE_CR_PPP */ +#define ROGUE_CR_PPP 0x0CD0U +#define ROGUE_CR_PPP_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PPP_CHECKSUM_SHIFT 0U +#define ROGUE_CR_PPP_CHECKSUM_CLRMSK 0x00000000U + +#define ROGUE_CR_ISP_RENDER_DIR_TYPE_MASK 0x00000003U +/* Top-left to bottom-right */ +#define ROGUE_CR_ISP_RENDER_DIR_TYPE_TL2BR 0x00000000U +/* Top-right to bottom-left */ +#define ROGUE_CR_ISP_RENDER_DIR_TYPE_TR2BL 0x00000001U +/* Bottom-left to top-right */ +#define ROGUE_CR_ISP_RENDER_DIR_TYPE_BL2TR 0x00000002U +/* Bottom-right to top-left */ +#define ROGUE_CR_ISP_RENDER_DIR_TYPE_BR2TL 0x00000003U + +#define ROGUE_CR_ISP_RENDER_MODE_TYPE_MASK 0x00000003U +/* Normal render */ +#define ROGUE_CR_ISP_RENDER_MODE_TYPE_NORM 0x00000000U +/* Fast 2D render */ +#define ROGUE_CR_ISP_RENDER_MODE_TYPE_FAST_2D 0x00000002U +/* Fast scale render */ +#define ROGUE_CR_ISP_RENDER_MODE_TYPE_FAST_SCALE 0x00000003U + +/* Register ROGUE_CR_ISP_RENDER */ +#define ROGUE_CR_ISP_RENDER 0x0F08U +#define ROGUE_CR_ISP_RENDER_MASKFULL 0x00000000000001FFULL +#define ROGUE_CR_ISP_RENDER_FAST_RENDER_FORCE_PROTECT_SHIFT 8U +#define ROGUE_CR_ISP_RENDER_FAST_RENDER_FORCE_PROTECT_CLRMSK 0xFFFFFEFFU +#define ROGUE_CR_ISP_RENDER_FAST_RENDER_FORCE_PROTECT_EN 0x00000100U +#define ROGUE_CR_ISP_RENDER_PROCESS_PROTECTED_TILES_SHIFT 7U +#define ROGUE_CR_ISP_RENDER_PROCESS_PROTECTED_TILES_CLRMSK 0xFFFFFF7FU +#define ROGUE_CR_ISP_RENDER_PROCESS_PROTECTED_TILES_EN 0x00000080U +#define ROGUE_CR_ISP_RENDER_PROCESS_UNPROTECTED_TILES_SHIFT 6U +#define ROGUE_CR_ISP_RENDER_PROCESS_UNPROTECTED_TILES_CLRMSK 0xFFFFFFBFU +#define ROGUE_CR_ISP_RENDER_PROCESS_UNPROTECTED_TILES_EN 0x00000040U +#define ROGUE_CR_ISP_RENDER_DISABLE_EOMT_SHIFT 5U +#define ROGUE_CR_ISP_RENDER_DISABLE_EOMT_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_ISP_RENDER_DISABLE_EOMT_EN 0x00000020U +#define ROGUE_CR_ISP_RENDER_RESUME_SHIFT 4U +#define ROGUE_CR_ISP_RENDER_RESUME_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_ISP_RENDER_RESUME_EN 0x00000010U +#define ROGUE_CR_ISP_RENDER_DIR_SHIFT 2U +#define ROGUE_CR_ISP_RENDER_DIR_CLRMSK 0xFFFFFFF3U +#define ROGUE_CR_ISP_RENDER_DIR_TL2BR 0x00000000U +#define ROGUE_CR_ISP_RENDER_DIR_TR2BL 0x00000004U +#define ROGUE_CR_ISP_RENDER_DIR_BL2TR 0x00000008U +#define ROGUE_CR_ISP_RENDER_DIR_BR2TL 0x0000000CU +#define ROGUE_CR_ISP_RENDER_MODE_SHIFT 0U +#define ROGUE_CR_ISP_RENDER_MODE_CLRMSK 0xFFFFFFFCU +#define ROGUE_CR_ISP_RENDER_MODE_NORM 0x00000000U +#define ROGUE_CR_ISP_RENDER_MODE_FAST_2D 0x00000002U +#define ROGUE_CR_ISP_RENDER_MODE_FAST_SCALE 0x00000003U + +/* Register ROGUE_CR_ISP_CTL */ +#define ROGUE_CR_ISP_CTL 0x0F38U +#define ROGUE_CR_ISP_CTL_MASKFULL 0x00000000FFFFF3FFULL +#define ROGUE_CR_ISP_CTL_SKIP_INIT_HDRS_SHIFT 31U +#define ROGUE_CR_ISP_CTL_SKIP_INIT_HDRS_CLRMSK 0x7FFFFFFFU +#define ROGUE_CR_ISP_CTL_SKIP_INIT_HDRS_EN 0x80000000U +#define ROGUE_CR_ISP_CTL_LINE_STYLE_SHIFT 30U +#define ROGUE_CR_ISP_CTL_LINE_STYLE_CLRMSK 0xBFFFFFFFU +#define ROGUE_CR_ISP_CTL_LINE_STYLE_EN 0x40000000U +#define ROGUE_CR_ISP_CTL_LINE_STYLE_PIX_SHIFT 29U +#define ROGUE_CR_ISP_CTL_LINE_STYLE_PIX_CLRMSK 0xDFFFFFFFU +#define ROGUE_CR_ISP_CTL_LINE_STYLE_PIX_EN 0x20000000U +#define ROGUE_CR_ISP_CTL_PAIR_TILES_VERT_SHIFT 28U +#define ROGUE_CR_ISP_CTL_PAIR_TILES_VERT_CLRMSK 0xEFFFFFFFU +#define ROGUE_CR_ISP_CTL_PAIR_TILES_VERT_EN 0x10000000U +#define ROGUE_CR_ISP_CTL_PAIR_TILES_SHIFT 27U +#define ROGUE_CR_ISP_CTL_PAIR_TILES_CLRMSK 0xF7FFFFFFU +#define ROGUE_CR_ISP_CTL_PAIR_TILES_EN 0x08000000U +#define ROGUE_CR_ISP_CTL_CREQ_BUF_EN_SHIFT 26U +#define ROGUE_CR_ISP_CTL_CREQ_BUF_EN_CLRMSK 0xFBFFFFFFU +#define ROGUE_CR_ISP_CTL_CREQ_BUF_EN_EN 0x04000000U +#define ROGUE_CR_ISP_CTL_TILE_AGE_EN_SHIFT 25U +#define ROGUE_CR_ISP_CTL_TILE_AGE_EN_CLRMSK 0xFDFFFFFFU +#define ROGUE_CR_ISP_CTL_TILE_AGE_EN_EN 0x02000000U +#define ROGUE_CR_ISP_CTL_ISP_SAMPLE_POS_MODE_SHIFT 23U +#define ROGUE_CR_ISP_CTL_ISP_SAMPLE_POS_MODE_CLRMSK 0xFE7FFFFFU +#define ROGUE_CR_ISP_CTL_ISP_SAMPLE_POS_MODE_DX9 0x00000000U +#define ROGUE_CR_ISP_CTL_ISP_SAMPLE_POS_MODE_DX10 0x00800000U +#define ROGUE_CR_ISP_CTL_ISP_SAMPLE_POS_MODE_OGL 0x01000000U +#define ROGUE_CR_ISP_CTL_NUM_TILES_PER_USC_SHIFT 21U +#define ROGUE_CR_ISP_CTL_NUM_TILES_PER_USC_CLRMSK 0xFF9FFFFFU +#define ROGUE_CR_ISP_CTL_DBIAS_IS_INT_SHIFT 20U +#define ROGUE_CR_ISP_CTL_DBIAS_IS_INT_CLRMSK 0xFFEFFFFFU +#define ROGUE_CR_ISP_CTL_DBIAS_IS_INT_EN 0x00100000U +#define ROGUE_CR_ISP_CTL_OVERLAP_CHECK_MODE_SHIFT 19U +#define ROGUE_CR_ISP_CTL_OVERLAP_CHECK_MODE_CLRMSK 0xFFF7FFFFU +#define ROGUE_CR_ISP_CTL_OVERLAP_CHECK_MODE_EN 0x00080000U +#define ROGUE_CR_ISP_CTL_PT_UPFRONT_DEPTH_DISABLE_SHIFT 18U +#define ROGUE_CR_ISP_CTL_PT_UPFRONT_DEPTH_DISABLE_CLRMSK 0xFFFBFFFFU +#define ROGUE_CR_ISP_CTL_PT_UPFRONT_DEPTH_DISABLE_EN 0x00040000U +#define ROGUE_CR_ISP_CTL_PROCESS_EMPTY_TILES_SHIFT 17U +#define ROGUE_CR_ISP_CTL_PROCESS_EMPTY_TILES_CLRMSK 0xFFFDFFFFU +#define ROGUE_CR_ISP_CTL_PROCESS_EMPTY_TILES_EN 0x00020000U +#define ROGUE_CR_ISP_CTL_SAMPLE_POS_SHIFT 16U +#define ROGUE_CR_ISP_CTL_SAMPLE_POS_CLRMSK 0xFFFEFFFFU +#define ROGUE_CR_ISP_CTL_SAMPLE_POS_EN 0x00010000U +#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_SHIFT 12U +#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_CLRMSK 0xFFFF0FFFU +#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_ONE 0x00000000U +#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_TWO 0x00001000U +#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_THREE 0x00002000U +#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_FOUR 0x00003000U +#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_FIVE 0x00004000U +#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_SIX 0x00005000U +#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_SEVEN 0x00006000U +#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_EIGHT 0x00007000U +#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_NINE 0x00008000U +#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_TEN 0x00009000U +#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_ELEVEN 0x0000A000U +#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_TWELVE 0x0000B000U +#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_THIRTEEN 0x0000C000U +#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_FOURTEEN 0x0000D000U +#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_FIFTEEN 0x0000E000U +#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_SIXTEEN 0x0000F000U +#define ROGUE_CR_ISP_CTL_VALID_ID_SHIFT 4U +#define ROGUE_CR_ISP_CTL_VALID_ID_CLRMSK 0xFFFFFC0FU +#define ROGUE_CR_ISP_CTL_UPASS_START_SHIFT 0U +#define ROGUE_CR_ISP_CTL_UPASS_START_CLRMSK 0xFFFFFFF0U + +/* Register ROGUE_CR_ISP_STATUS */ +#define ROGUE_CR_ISP_STATUS 0x1038U +#define ROGUE_CR_ISP_STATUS_MASKFULL 0x0000000000000007ULL +#define ROGUE_CR_ISP_STATUS_SPLIT_MAX_SHIFT 2U +#define ROGUE_CR_ISP_STATUS_SPLIT_MAX_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_ISP_STATUS_SPLIT_MAX_EN 0x00000004U +#define ROGUE_CR_ISP_STATUS_ACTIVE_SHIFT 1U +#define ROGUE_CR_ISP_STATUS_ACTIVE_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_ISP_STATUS_ACTIVE_EN 0x00000002U +#define ROGUE_CR_ISP_STATUS_EOR_SHIFT 0U +#define ROGUE_CR_ISP_STATUS_EOR_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_ISP_STATUS_EOR_EN 0x00000001U + +/* Register group: ROGUE_CR_ISP_XTP_RESUME, with 64 repeats */ +#define ROGUE_CR_ISP_XTP_RESUME_REPEATCOUNT 64U +/* Register ROGUE_CR_ISP_XTP_RESUME0 */ +#define ROGUE_CR_ISP_XTP_RESUME0 0x3A00U +#define ROGUE_CR_ISP_XTP_RESUME0_MASKFULL 0x00000000003FF3FFULL +#define ROGUE_CR_ISP_XTP_RESUME0_TILE_X_SHIFT 12U +#define ROGUE_CR_ISP_XTP_RESUME0_TILE_X_CLRMSK 0xFFC00FFFU +#define ROGUE_CR_ISP_XTP_RESUME0_TILE_Y_SHIFT 0U +#define ROGUE_CR_ISP_XTP_RESUME0_TILE_Y_CLRMSK 0xFFFFFC00U + +/* Register group: ROGUE_CR_ISP_XTP_STORE, with 32 repeats */ +#define ROGUE_CR_ISP_XTP_STORE_REPEATCOUNT 32U +/* Register ROGUE_CR_ISP_XTP_STORE0 */ +#define ROGUE_CR_ISP_XTP_STORE0 0x3C00U +#define ROGUE_CR_ISP_XTP_STORE0_MASKFULL 0x000000007F3FF3FFULL +#define ROGUE_CR_ISP_XTP_STORE0_ACTIVE_SHIFT 30U +#define ROGUE_CR_ISP_XTP_STORE0_ACTIVE_CLRMSK 0xBFFFFFFFU +#define ROGUE_CR_ISP_XTP_STORE0_ACTIVE_EN 0x40000000U +#define ROGUE_CR_ISP_XTP_STORE0_EOR_SHIFT 29U +#define ROGUE_CR_ISP_XTP_STORE0_EOR_CLRMSK 0xDFFFFFFFU +#define ROGUE_CR_ISP_XTP_STORE0_EOR_EN 0x20000000U +#define ROGUE_CR_ISP_XTP_STORE0_TILE_LAST_SHIFT 28U +#define ROGUE_CR_ISP_XTP_STORE0_TILE_LAST_CLRMSK 0xEFFFFFFFU +#define ROGUE_CR_ISP_XTP_STORE0_TILE_LAST_EN 0x10000000U +#define ROGUE_CR_ISP_XTP_STORE0_MT_SHIFT 24U +#define ROGUE_CR_ISP_XTP_STORE0_MT_CLRMSK 0xF0FFFFFFU +#define ROGUE_CR_ISP_XTP_STORE0_TILE_X_SHIFT 12U +#define ROGUE_CR_ISP_XTP_STORE0_TILE_X_CLRMSK 0xFFC00FFFU +#define ROGUE_CR_ISP_XTP_STORE0_TILE_Y_SHIFT 0U +#define ROGUE_CR_ISP_XTP_STORE0_TILE_Y_CLRMSK 0xFFFFFC00U + +/* Register group: ROGUE_CR_BIF_CAT_BASE, with 8 repeats */ +#define ROGUE_CR_BIF_CAT_BASE_REPEATCOUNT 8U +/* Register ROGUE_CR_BIF_CAT_BASE0 */ +#define ROGUE_CR_BIF_CAT_BASE0 0x1200U +#define ROGUE_CR_BIF_CAT_BASE0_MASKFULL 0x000000FFFFFFF000ULL +#define ROGUE_CR_BIF_CAT_BASE0_ADDR_SHIFT 12U +#define ROGUE_CR_BIF_CAT_BASE0_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_BIF_CAT_BASE0_ADDR_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_CAT_BASE0_ADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_BIF_CAT_BASE1 */ +#define ROGUE_CR_BIF_CAT_BASE1 0x1208U +#define ROGUE_CR_BIF_CAT_BASE1_MASKFULL 0x000000FFFFFFF000ULL +#define ROGUE_CR_BIF_CAT_BASE1_ADDR_SHIFT 12U +#define ROGUE_CR_BIF_CAT_BASE1_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_BIF_CAT_BASE1_ADDR_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_CAT_BASE1_ADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_BIF_CAT_BASE2 */ +#define ROGUE_CR_BIF_CAT_BASE2 0x1210U +#define ROGUE_CR_BIF_CAT_BASE2_MASKFULL 0x000000FFFFFFF000ULL +#define ROGUE_CR_BIF_CAT_BASE2_ADDR_SHIFT 12U +#define ROGUE_CR_BIF_CAT_BASE2_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_BIF_CAT_BASE2_ADDR_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_CAT_BASE2_ADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_BIF_CAT_BASE3 */ +#define ROGUE_CR_BIF_CAT_BASE3 0x1218U +#define ROGUE_CR_BIF_CAT_BASE3_MASKFULL 0x000000FFFFFFF000ULL +#define ROGUE_CR_BIF_CAT_BASE3_ADDR_SHIFT 12U +#define ROGUE_CR_BIF_CAT_BASE3_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_BIF_CAT_BASE3_ADDR_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_CAT_BASE3_ADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_BIF_CAT_BASE4 */ +#define ROGUE_CR_BIF_CAT_BASE4 0x1220U +#define ROGUE_CR_BIF_CAT_BASE4_MASKFULL 0x000000FFFFFFF000ULL +#define ROGUE_CR_BIF_CAT_BASE4_ADDR_SHIFT 12U +#define ROGUE_CR_BIF_CAT_BASE4_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_BIF_CAT_BASE4_ADDR_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_CAT_BASE4_ADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_BIF_CAT_BASE5 */ +#define ROGUE_CR_BIF_CAT_BASE5 0x1228U +#define ROGUE_CR_BIF_CAT_BASE5_MASKFULL 0x000000FFFFFFF000ULL +#define ROGUE_CR_BIF_CAT_BASE5_ADDR_SHIFT 12U +#define ROGUE_CR_BIF_CAT_BASE5_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_BIF_CAT_BASE5_ADDR_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_CAT_BASE5_ADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_BIF_CAT_BASE6 */ +#define ROGUE_CR_BIF_CAT_BASE6 0x1230U +#define ROGUE_CR_BIF_CAT_BASE6_MASKFULL 0x000000FFFFFFF000ULL +#define ROGUE_CR_BIF_CAT_BASE6_ADDR_SHIFT 12U +#define ROGUE_CR_BIF_CAT_BASE6_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_BIF_CAT_BASE6_ADDR_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_CAT_BASE6_ADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_BIF_CAT_BASE7 */ +#define ROGUE_CR_BIF_CAT_BASE7 0x1238U +#define ROGUE_CR_BIF_CAT_BASE7_MASKFULL 0x000000FFFFFFF000ULL +#define ROGUE_CR_BIF_CAT_BASE7_ADDR_SHIFT 12U +#define ROGUE_CR_BIF_CAT_BASE7_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_BIF_CAT_BASE7_ADDR_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_CAT_BASE7_ADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_BIF_CAT_BASE_INDEX */ +#define ROGUE_CR_BIF_CAT_BASE_INDEX 0x1240U +#define ROGUE_CR_BIF_CAT_BASE_INDEX_MASKFULL 0x00070707073F0707ULL +#define ROGUE_CR_BIF_CAT_BASE_INDEX_RVTX_SHIFT 48U +#define ROGUE_CR_BIF_CAT_BASE_INDEX_RVTX_CLRMSK 0xFFF8FFFFFFFFFFFFULL +#define ROGUE_CR_BIF_CAT_BASE_INDEX_RAY_SHIFT 40U +#define ROGUE_CR_BIF_CAT_BASE_INDEX_RAY_CLRMSK 0xFFFFF8FFFFFFFFFFULL +#define ROGUE_CR_BIF_CAT_BASE_INDEX_HOST_SHIFT 32U +#define ROGUE_CR_BIF_CAT_BASE_INDEX_HOST_CLRMSK 0xFFFFFFF8FFFFFFFFULL +#define ROGUE_CR_BIF_CAT_BASE_INDEX_TLA_SHIFT 24U +#define ROGUE_CR_BIF_CAT_BASE_INDEX_TLA_CLRMSK 0xFFFFFFFFF8FFFFFFULL +#define ROGUE_CR_BIF_CAT_BASE_INDEX_TDM_SHIFT 19U +#define ROGUE_CR_BIF_CAT_BASE_INDEX_TDM_CLRMSK 0xFFFFFFFFFFC7FFFFULL +#define ROGUE_CR_BIF_CAT_BASE_INDEX_CDM_SHIFT 16U +#define ROGUE_CR_BIF_CAT_BASE_INDEX_CDM_CLRMSK 0xFFFFFFFFFFF8FFFFULL +#define ROGUE_CR_BIF_CAT_BASE_INDEX_PIXEL_SHIFT 8U +#define ROGUE_CR_BIF_CAT_BASE_INDEX_PIXEL_CLRMSK 0xFFFFFFFFFFFFF8FFULL +#define ROGUE_CR_BIF_CAT_BASE_INDEX_TA_SHIFT 0U +#define ROGUE_CR_BIF_CAT_BASE_INDEX_TA_CLRMSK 0xFFFFFFFFFFFFFFF8ULL + +/* Register ROGUE_CR_BIF_PM_CAT_BASE_VCE0 */ +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0 0x1248U +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_MASKFULL 0x0FFFFFFFFFFFF003ULL +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_INIT_PAGE_SHIFT 40U +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_INIT_PAGE_CLRMSK 0xF00000FFFFFFFFFFULL +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_ADDR_SHIFT 12U +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_WRAP_SHIFT 1U +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_WRAP_CLRMSK 0xFFFFFFFFFFFFFFFDULL +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_WRAP_EN 0x0000000000000002ULL +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_VALID_SHIFT 0U +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_VALID_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_VALID_EN 0x0000000000000001ULL + +/* Register ROGUE_CR_BIF_PM_CAT_BASE_TE0 */ +#define ROGUE_CR_BIF_PM_CAT_BASE_TE0 0x1250U +#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_MASKFULL 0x0FFFFFFFFFFFF003ULL +#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_INIT_PAGE_SHIFT 40U +#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_INIT_PAGE_CLRMSK 0xF00000FFFFFFFFFFULL +#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_ADDR_SHIFT 12U +#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_WRAP_SHIFT 1U +#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_WRAP_CLRMSK 0xFFFFFFFFFFFFFFFDULL +#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_WRAP_EN 0x0000000000000002ULL +#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_VALID_SHIFT 0U +#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_VALID_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_VALID_EN 0x0000000000000001ULL + +/* Register ROGUE_CR_BIF_PM_CAT_BASE_ALIST0 */ +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0 0x1260U +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_MASKFULL 0x0FFFFFFFFFFFF003ULL +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_INIT_PAGE_SHIFT 40U +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_INIT_PAGE_CLRMSK 0xF00000FFFFFFFFFFULL +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_ADDR_SHIFT 12U +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_WRAP_SHIFT 1U +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_WRAP_CLRMSK 0xFFFFFFFFFFFFFFFDULL +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_WRAP_EN 0x0000000000000002ULL +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_VALID_SHIFT 0U +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_VALID_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_VALID_EN 0x0000000000000001ULL + +/* Register ROGUE_CR_BIF_PM_CAT_BASE_VCE1 */ +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1 0x1268U +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_MASKFULL 0x0FFFFFFFFFFFF003ULL +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_INIT_PAGE_SHIFT 40U +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_INIT_PAGE_CLRMSK 0xF00000FFFFFFFFFFULL +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_ADDR_SHIFT 12U +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_WRAP_SHIFT 1U +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_WRAP_CLRMSK 0xFFFFFFFFFFFFFFFDULL +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_WRAP_EN 0x0000000000000002ULL +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_VALID_SHIFT 0U +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_VALID_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_VALID_EN 0x0000000000000001ULL + +/* Register ROGUE_CR_BIF_PM_CAT_BASE_TE1 */ +#define ROGUE_CR_BIF_PM_CAT_BASE_TE1 0x1270U +#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_MASKFULL 0x0FFFFFFFFFFFF003ULL +#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_INIT_PAGE_SHIFT 40U +#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_INIT_PAGE_CLRMSK 0xF00000FFFFFFFFFFULL +#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_ADDR_SHIFT 12U +#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_WRAP_SHIFT 1U +#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_WRAP_CLRMSK 0xFFFFFFFFFFFFFFFDULL +#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_WRAP_EN 0x0000000000000002ULL +#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_VALID_SHIFT 0U +#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_VALID_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_VALID_EN 0x0000000000000001ULL + +/* Register ROGUE_CR_BIF_PM_CAT_BASE_ALIST1 */ +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1 0x1280U +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_MASKFULL 0x0FFFFFFFFFFFF003ULL +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_INIT_PAGE_SHIFT 40U +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_INIT_PAGE_CLRMSK 0xF00000FFFFFFFFFFULL +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_ADDR_SHIFT 12U +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_WRAP_SHIFT 1U +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_WRAP_CLRMSK 0xFFFFFFFFFFFFFFFDULL +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_WRAP_EN 0x0000000000000002ULL +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_VALID_SHIFT 0U +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_VALID_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_VALID_EN 0x0000000000000001ULL + +/* Register ROGUE_CR_BIF_MMU_ENTRY_STATUS */ +#define ROGUE_CR_BIF_MMU_ENTRY_STATUS 0x1288U +#define ROGUE_CR_BIF_MMU_ENTRY_STATUS_MASKFULL 0x000000FFFFFFF0F3ULL +#define ROGUE_CR_BIF_MMU_ENTRY_STATUS_ADDRESS_SHIFT 12U +#define ROGUE_CR_BIF_MMU_ENTRY_STATUS_ADDRESS_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_BIF_MMU_ENTRY_STATUS_CAT_BASE_SHIFT 4U +#define ROGUE_CR_BIF_MMU_ENTRY_STATUS_CAT_BASE_CLRMSK 0xFFFFFFFFFFFFFF0FULL +#define ROGUE_CR_BIF_MMU_ENTRY_STATUS_DATA_TYPE_SHIFT 0U +#define ROGUE_CR_BIF_MMU_ENTRY_STATUS_DATA_TYPE_CLRMSK 0xFFFFFFFFFFFFFFFCULL + +/* Register ROGUE_CR_BIF_MMU_ENTRY */ +#define ROGUE_CR_BIF_MMU_ENTRY 0x1290U +#define ROGUE_CR_BIF_MMU_ENTRY_MASKFULL 0x0000000000000003ULL +#define ROGUE_CR_BIF_MMU_ENTRY_ENABLE_SHIFT 1U +#define ROGUE_CR_BIF_MMU_ENTRY_ENABLE_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_BIF_MMU_ENTRY_ENABLE_EN 0x00000002U +#define ROGUE_CR_BIF_MMU_ENTRY_PENDING_SHIFT 0U +#define ROGUE_CR_BIF_MMU_ENTRY_PENDING_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_BIF_MMU_ENTRY_PENDING_EN 0x00000001U + +/* Register ROGUE_CR_BIF_CTRL_INVAL */ +#define ROGUE_CR_BIF_CTRL_INVAL 0x12A0U +#define ROGUE_CR_BIF_CTRL_INVAL_MASKFULL 0x000000000000000FULL +#define ROGUE_CR_BIF_CTRL_INVAL_TLB1_SHIFT 3U +#define ROGUE_CR_BIF_CTRL_INVAL_TLB1_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_BIF_CTRL_INVAL_TLB1_EN 0x00000008U +#define ROGUE_CR_BIF_CTRL_INVAL_PC_SHIFT 2U +#define ROGUE_CR_BIF_CTRL_INVAL_PC_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_BIF_CTRL_INVAL_PC_EN 0x00000004U +#define ROGUE_CR_BIF_CTRL_INVAL_PD_SHIFT 1U +#define ROGUE_CR_BIF_CTRL_INVAL_PD_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_BIF_CTRL_INVAL_PD_EN 0x00000002U +#define ROGUE_CR_BIF_CTRL_INVAL_PT_SHIFT 0U +#define ROGUE_CR_BIF_CTRL_INVAL_PT_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_BIF_CTRL_INVAL_PT_EN 0x00000001U + +/* Register ROGUE_CR_BIF_CTRL */ +#define ROGUE_CR_BIF_CTRL 0x12A8U +#define ROGUE_CR_BIF_CTRL__XE_MEM__MASKFULL 0x000000000000033FULL +#define ROGUE_CR_BIF_CTRL_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_CPU_SHIFT 9U +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_CPU_CLRMSK 0xFFFFFDFFU +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_CPU_EN 0x00000200U +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF4_SHIFT 8U +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF4_CLRMSK 0xFFFFFEFFU +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF4_EN 0x00000100U +#define ROGUE_CR_BIF_CTRL_ENABLE_MMU_QUEUE_BYPASS_SHIFT 7U +#define ROGUE_CR_BIF_CTRL_ENABLE_MMU_QUEUE_BYPASS_CLRMSK 0xFFFFFF7FU +#define ROGUE_CR_BIF_CTRL_ENABLE_MMU_QUEUE_BYPASS_EN 0x00000080U +#define ROGUE_CR_BIF_CTRL_ENABLE_MMU_AUTO_PREFETCH_SHIFT 6U +#define ROGUE_CR_BIF_CTRL_ENABLE_MMU_AUTO_PREFETCH_CLRMSK 0xFFFFFFBFU +#define ROGUE_CR_BIF_CTRL_ENABLE_MMU_AUTO_PREFETCH_EN 0x00000040U +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF3_SHIFT 5U +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF3_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF3_EN 0x00000020U +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF2_SHIFT 4U +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF2_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF2_EN 0x00000010U +#define ROGUE_CR_BIF_CTRL_PAUSE_BIF1_SHIFT 3U +#define ROGUE_CR_BIF_CTRL_PAUSE_BIF1_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_BIF_CTRL_PAUSE_BIF1_EN 0x00000008U +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_PM_SHIFT 2U +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_PM_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_PM_EN 0x00000004U +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF1_SHIFT 1U +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF1_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF1_EN 0x00000002U +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF0_SHIFT 0U +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF0_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF0_EN 0x00000001U + +/* Register ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS */ +#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS 0x12B0U +#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_MASKFULL 0x000000000000F775ULL +#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_CAT_BASE_SHIFT 12U +#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_CAT_BASE_CLRMSK 0xFFFF0FFFU +#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_PAGE_SIZE_SHIFT 8U +#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_PAGE_SIZE_CLRMSK 0xFFFFF8FFU +#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_DATA_TYPE_SHIFT 5U +#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_DATA_TYPE_CLRMSK 0xFFFFFF9FU +#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_FAULT_RO_SHIFT 4U +#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_FAULT_RO_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_FAULT_RO_EN 0x00000010U +#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_FAULT_PM_META_RO_SHIFT 2U +#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_FAULT_PM_META_RO_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_FAULT_PM_META_RO_EN 0x00000004U +#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_FAULT_SHIFT 0U +#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_FAULT_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_FAULT_EN 0x00000001U + +/* Register ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS */ +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS 0x12B8U +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS__XE_MEM__MASKFULL 0x001FFFFFFFFFFFF0ULL +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_MASKFULL 0x0007FFFFFFFFFFF0ULL +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS__XE_MEM__RNW_SHIFT 52U +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS__XE_MEM__RNW_CLRMSK 0xFFEFFFFFFFFFFFFFULL +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS__XE_MEM__RNW_EN 0x0010000000000000ULL +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_RNW_SHIFT 50U +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_RNW_CLRMSK 0xFFFBFFFFFFFFFFFFULL +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_RNW_EN 0x0004000000000000ULL +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS__XE_MEM__TAG_SB_SHIFT 46U +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS__XE_MEM__TAG_SB_CLRMSK 0xFFF03FFFFFFFFFFFULL +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_TAG_SB_SHIFT 44U +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_TAG_SB_CLRMSK 0xFFFC0FFFFFFFFFFFULL +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_TAG_ID_SHIFT 40U +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_TAG_ID_CLRMSK 0xFFFFF0FFFFFFFFFFULL +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS__XE_MEM__TAG_ID_SHIFT 40U +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS__XE_MEM__TAG_ID_CLRMSK 0xFFFFC0FFFFFFFFFFULL +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_ADDRESS_SHIFT 4U +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_ADDRESS_CLRMSK 0xFFFFFF000000000FULL +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_ADDRESS_ALIGNSHIFT 4U +#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_ADDRESS_ALIGNSIZE 16U + +/* Register ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS */ +#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS 0x12C0U +#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_MASKFULL 0x000000000000F775ULL +#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_CAT_BASE_SHIFT 12U +#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_CAT_BASE_CLRMSK 0xFFFF0FFFU +#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_PAGE_SIZE_SHIFT 8U +#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_PAGE_SIZE_CLRMSK 0xFFFFF8FFU +#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_DATA_TYPE_SHIFT 5U +#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_DATA_TYPE_CLRMSK 0xFFFFFF9FU +#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_FAULT_RO_SHIFT 4U +#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_FAULT_RO_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_FAULT_RO_EN 0x00000010U +#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_FAULT_PM_META_RO_SHIFT 2U +#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_FAULT_PM_META_RO_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_FAULT_PM_META_RO_EN 0x00000004U +#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_FAULT_SHIFT 0U +#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_FAULT_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_FAULT_EN 0x00000001U + +/* Register ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS */ +#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS 0x12C8U +#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_MASKFULL 0x0007FFFFFFFFFFF0ULL +#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_RNW_SHIFT 50U +#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_RNW_CLRMSK 0xFFFBFFFFFFFFFFFFULL +#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_RNW_EN 0x0004000000000000ULL +#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_TAG_SB_SHIFT 44U +#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_TAG_SB_CLRMSK 0xFFFC0FFFFFFFFFFFULL +#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_TAG_ID_SHIFT 40U +#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_TAG_ID_CLRMSK 0xFFFFF0FFFFFFFFFFULL +#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_ADDRESS_SHIFT 4U +#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_ADDRESS_CLRMSK 0xFFFFFF000000000FULL +#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_ADDRESS_ALIGNSHIFT 4U +#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_ADDRESS_ALIGNSIZE 16U + +/* Register ROGUE_CR_BIF_MMU_STATUS */ +#define ROGUE_CR_BIF_MMU_STATUS 0x12D0U +#define ROGUE_CR_BIF_MMU_STATUS__XE_MEM__MASKFULL 0x000000001FFFFFF7ULL +#define ROGUE_CR_BIF_MMU_STATUS_MASKFULL 0x000000001FFFFFF7ULL +#define ROGUE_CR_BIF_MMU_STATUS_PM_FAULT_SHIFT 28U +#define ROGUE_CR_BIF_MMU_STATUS_PM_FAULT_CLRMSK 0xEFFFFFFFU +#define ROGUE_CR_BIF_MMU_STATUS_PM_FAULT_EN 0x10000000U +#define ROGUE_CR_BIF_MMU_STATUS_PC_DATA_SHIFT 20U +#define ROGUE_CR_BIF_MMU_STATUS_PC_DATA_CLRMSK 0xF00FFFFFU +#define ROGUE_CR_BIF_MMU_STATUS_PD_DATA_SHIFT 12U +#define ROGUE_CR_BIF_MMU_STATUS_PD_DATA_CLRMSK 0xFFF00FFFU +#define ROGUE_CR_BIF_MMU_STATUS_PT_DATA_SHIFT 4U +#define ROGUE_CR_BIF_MMU_STATUS_PT_DATA_CLRMSK 0xFFFFF00FU +#define ROGUE_CR_BIF_MMU_STATUS_STALLED_SHIFT 2U +#define ROGUE_CR_BIF_MMU_STATUS_STALLED_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_BIF_MMU_STATUS_STALLED_EN 0x00000004U +#define ROGUE_CR_BIF_MMU_STATUS_PAUSED_SHIFT 1U +#define ROGUE_CR_BIF_MMU_STATUS_PAUSED_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_BIF_MMU_STATUS_PAUSED_EN 0x00000002U +#define ROGUE_CR_BIF_MMU_STATUS_BUSY_SHIFT 0U +#define ROGUE_CR_BIF_MMU_STATUS_BUSY_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_BIF_MMU_STATUS_BUSY_EN 0x00000001U + +/* Register group: ROGUE_CR_BIF_TILING_CFG, with 8 repeats */ +#define ROGUE_CR_BIF_TILING_CFG_REPEATCOUNT 8U +/* Register ROGUE_CR_BIF_TILING_CFG0 */ +#define ROGUE_CR_BIF_TILING_CFG0 0x12D8U +#define ROGUE_CR_BIF_TILING_CFG0_MASKFULL 0xFFFFFFFF0FFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG0_XSTRIDE_SHIFT 61U +#define ROGUE_CR_BIF_TILING_CFG0_XSTRIDE_CLRMSK 0x1FFFFFFFFFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG0_ENABLE_SHIFT 60U +#define ROGUE_CR_BIF_TILING_CFG0_ENABLE_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG0_ENABLE_EN 0x1000000000000000ULL +#define ROGUE_CR_BIF_TILING_CFG0_MAX_ADDRESS_SHIFT 32U +#define ROGUE_CR_BIF_TILING_CFG0_MAX_ADDRESS_CLRMSK 0xF0000000FFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG0_MAX_ADDRESS_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_TILING_CFG0_MAX_ADDRESS_ALIGNSIZE 4096U +#define ROGUE_CR_BIF_TILING_CFG0_MIN_ADDRESS_SHIFT 0U +#define ROGUE_CR_BIF_TILING_CFG0_MIN_ADDRESS_CLRMSK 0xFFFFFFFFF0000000ULL +#define ROGUE_CR_BIF_TILING_CFG0_MIN_ADDRESS_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_TILING_CFG0_MIN_ADDRESS_ALIGNSIZE 4096U + +/* Register ROGUE_CR_BIF_TILING_CFG1 */ +#define ROGUE_CR_BIF_TILING_CFG1 0x12E0U +#define ROGUE_CR_BIF_TILING_CFG1_MASKFULL 0xFFFFFFFF0FFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG1_XSTRIDE_SHIFT 61U +#define ROGUE_CR_BIF_TILING_CFG1_XSTRIDE_CLRMSK 0x1FFFFFFFFFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG1_ENABLE_SHIFT 60U +#define ROGUE_CR_BIF_TILING_CFG1_ENABLE_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG1_ENABLE_EN 0x1000000000000000ULL +#define ROGUE_CR_BIF_TILING_CFG1_MAX_ADDRESS_SHIFT 32U +#define ROGUE_CR_BIF_TILING_CFG1_MAX_ADDRESS_CLRMSK 0xF0000000FFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG1_MAX_ADDRESS_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_TILING_CFG1_MAX_ADDRESS_ALIGNSIZE 4096U +#define ROGUE_CR_BIF_TILING_CFG1_MIN_ADDRESS_SHIFT 0U +#define ROGUE_CR_BIF_TILING_CFG1_MIN_ADDRESS_CLRMSK 0xFFFFFFFFF0000000ULL +#define ROGUE_CR_BIF_TILING_CFG1_MIN_ADDRESS_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_TILING_CFG1_MIN_ADDRESS_ALIGNSIZE 4096U + +/* Register ROGUE_CR_BIF_TILING_CFG2 */ +#define ROGUE_CR_BIF_TILING_CFG2 0x12E8U +#define ROGUE_CR_BIF_TILING_CFG2_MASKFULL 0xFFFFFFFF0FFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG2_XSTRIDE_SHIFT 61U +#define ROGUE_CR_BIF_TILING_CFG2_XSTRIDE_CLRMSK 0x1FFFFFFFFFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG2_ENABLE_SHIFT 60U +#define ROGUE_CR_BIF_TILING_CFG2_ENABLE_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG2_ENABLE_EN 0x1000000000000000ULL +#define ROGUE_CR_BIF_TILING_CFG2_MAX_ADDRESS_SHIFT 32U +#define ROGUE_CR_BIF_TILING_CFG2_MAX_ADDRESS_CLRMSK 0xF0000000FFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG2_MAX_ADDRESS_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_TILING_CFG2_MAX_ADDRESS_ALIGNSIZE 4096U +#define ROGUE_CR_BIF_TILING_CFG2_MIN_ADDRESS_SHIFT 0U +#define ROGUE_CR_BIF_TILING_CFG2_MIN_ADDRESS_CLRMSK 0xFFFFFFFFF0000000ULL +#define ROGUE_CR_BIF_TILING_CFG2_MIN_ADDRESS_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_TILING_CFG2_MIN_ADDRESS_ALIGNSIZE 4096U + +/* Register ROGUE_CR_BIF_TILING_CFG3 */ +#define ROGUE_CR_BIF_TILING_CFG3 0x12F0U +#define ROGUE_CR_BIF_TILING_CFG3_MASKFULL 0xFFFFFFFF0FFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG3_XSTRIDE_SHIFT 61U +#define ROGUE_CR_BIF_TILING_CFG3_XSTRIDE_CLRMSK 0x1FFFFFFFFFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG3_ENABLE_SHIFT 60U +#define ROGUE_CR_BIF_TILING_CFG3_ENABLE_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG3_ENABLE_EN 0x1000000000000000ULL +#define ROGUE_CR_BIF_TILING_CFG3_MAX_ADDRESS_SHIFT 32U +#define ROGUE_CR_BIF_TILING_CFG3_MAX_ADDRESS_CLRMSK 0xF0000000FFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG3_MAX_ADDRESS_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_TILING_CFG3_MAX_ADDRESS_ALIGNSIZE 4096U +#define ROGUE_CR_BIF_TILING_CFG3_MIN_ADDRESS_SHIFT 0U +#define ROGUE_CR_BIF_TILING_CFG3_MIN_ADDRESS_CLRMSK 0xFFFFFFFFF0000000ULL +#define ROGUE_CR_BIF_TILING_CFG3_MIN_ADDRESS_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_TILING_CFG3_MIN_ADDRESS_ALIGNSIZE 4096U + +/* Register ROGUE_CR_BIF_TILING_CFG4 */ +#define ROGUE_CR_BIF_TILING_CFG4 0x12F8U +#define ROGUE_CR_BIF_TILING_CFG4_MASKFULL 0xFFFFFFFF0FFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG4_XSTRIDE_SHIFT 61U +#define ROGUE_CR_BIF_TILING_CFG4_XSTRIDE_CLRMSK 0x1FFFFFFFFFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG4_ENABLE_SHIFT 60U +#define ROGUE_CR_BIF_TILING_CFG4_ENABLE_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG4_ENABLE_EN 0x1000000000000000ULL +#define ROGUE_CR_BIF_TILING_CFG4_MAX_ADDRESS_SHIFT 32U +#define ROGUE_CR_BIF_TILING_CFG4_MAX_ADDRESS_CLRMSK 0xF0000000FFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG4_MAX_ADDRESS_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_TILING_CFG4_MAX_ADDRESS_ALIGNSIZE 4096U +#define ROGUE_CR_BIF_TILING_CFG4_MIN_ADDRESS_SHIFT 0U +#define ROGUE_CR_BIF_TILING_CFG4_MIN_ADDRESS_CLRMSK 0xFFFFFFFFF0000000ULL +#define ROGUE_CR_BIF_TILING_CFG4_MIN_ADDRESS_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_TILING_CFG4_MIN_ADDRESS_ALIGNSIZE 4096U + +/* Register ROGUE_CR_BIF_TILING_CFG5 */ +#define ROGUE_CR_BIF_TILING_CFG5 0x1300U +#define ROGUE_CR_BIF_TILING_CFG5_MASKFULL 0xFFFFFFFF0FFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG5_XSTRIDE_SHIFT 61U +#define ROGUE_CR_BIF_TILING_CFG5_XSTRIDE_CLRMSK 0x1FFFFFFFFFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG5_ENABLE_SHIFT 60U +#define ROGUE_CR_BIF_TILING_CFG5_ENABLE_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG5_ENABLE_EN 0x1000000000000000ULL +#define ROGUE_CR_BIF_TILING_CFG5_MAX_ADDRESS_SHIFT 32U +#define ROGUE_CR_BIF_TILING_CFG5_MAX_ADDRESS_CLRMSK 0xF0000000FFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG5_MAX_ADDRESS_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_TILING_CFG5_MAX_ADDRESS_ALIGNSIZE 4096U +#define ROGUE_CR_BIF_TILING_CFG5_MIN_ADDRESS_SHIFT 0U +#define ROGUE_CR_BIF_TILING_CFG5_MIN_ADDRESS_CLRMSK 0xFFFFFFFFF0000000ULL +#define ROGUE_CR_BIF_TILING_CFG5_MIN_ADDRESS_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_TILING_CFG5_MIN_ADDRESS_ALIGNSIZE 4096U + +/* Register ROGUE_CR_BIF_TILING_CFG6 */ +#define ROGUE_CR_BIF_TILING_CFG6 0x1308U +#define ROGUE_CR_BIF_TILING_CFG6_MASKFULL 0xFFFFFFFF0FFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG6_XSTRIDE_SHIFT 61U +#define ROGUE_CR_BIF_TILING_CFG6_XSTRIDE_CLRMSK 0x1FFFFFFFFFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG6_ENABLE_SHIFT 60U +#define ROGUE_CR_BIF_TILING_CFG6_ENABLE_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG6_ENABLE_EN 0x1000000000000000ULL +#define ROGUE_CR_BIF_TILING_CFG6_MAX_ADDRESS_SHIFT 32U +#define ROGUE_CR_BIF_TILING_CFG6_MAX_ADDRESS_CLRMSK 0xF0000000FFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG6_MAX_ADDRESS_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_TILING_CFG6_MAX_ADDRESS_ALIGNSIZE 4096U +#define ROGUE_CR_BIF_TILING_CFG6_MIN_ADDRESS_SHIFT 0U +#define ROGUE_CR_BIF_TILING_CFG6_MIN_ADDRESS_CLRMSK 0xFFFFFFFFF0000000ULL +#define ROGUE_CR_BIF_TILING_CFG6_MIN_ADDRESS_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_TILING_CFG6_MIN_ADDRESS_ALIGNSIZE 4096U + +/* Register ROGUE_CR_BIF_TILING_CFG7 */ +#define ROGUE_CR_BIF_TILING_CFG7 0x1310U +#define ROGUE_CR_BIF_TILING_CFG7_MASKFULL 0xFFFFFFFF0FFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG7_XSTRIDE_SHIFT 61U +#define ROGUE_CR_BIF_TILING_CFG7_XSTRIDE_CLRMSK 0x1FFFFFFFFFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG7_ENABLE_SHIFT 60U +#define ROGUE_CR_BIF_TILING_CFG7_ENABLE_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG7_ENABLE_EN 0x1000000000000000ULL +#define ROGUE_CR_BIF_TILING_CFG7_MAX_ADDRESS_SHIFT 32U +#define ROGUE_CR_BIF_TILING_CFG7_MAX_ADDRESS_CLRMSK 0xF0000000FFFFFFFFULL +#define ROGUE_CR_BIF_TILING_CFG7_MAX_ADDRESS_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_TILING_CFG7_MAX_ADDRESS_ALIGNSIZE 4096U +#define ROGUE_CR_BIF_TILING_CFG7_MIN_ADDRESS_SHIFT 0U +#define ROGUE_CR_BIF_TILING_CFG7_MIN_ADDRESS_CLRMSK 0xFFFFFFFFF0000000ULL +#define ROGUE_CR_BIF_TILING_CFG7_MIN_ADDRESS_ALIGNSHIFT 12U +#define ROGUE_CR_BIF_TILING_CFG7_MIN_ADDRESS_ALIGNSIZE 4096U + +/* Register ROGUE_CR_BIF_READS_EXT_STATUS */ +#define ROGUE_CR_BIF_READS_EXT_STATUS 0x1320U +#define ROGUE_CR_BIF_READS_EXT_STATUS_MASKFULL 0x000000000FFFFFFFULL +#define ROGUE_CR_BIF_READS_EXT_STATUS_MMU_SHIFT 16U +#define ROGUE_CR_BIF_READS_EXT_STATUS_MMU_CLRMSK 0xF000FFFFU +#define ROGUE_CR_BIF_READS_EXT_STATUS_BANK1_SHIFT 0U +#define ROGUE_CR_BIF_READS_EXT_STATUS_BANK1_CLRMSK 0xFFFF0000U + +/* Register ROGUE_CR_BIF_READS_INT_STATUS */ +#define ROGUE_CR_BIF_READS_INT_STATUS 0x1328U +#define ROGUE_CR_BIF_READS_INT_STATUS_MASKFULL 0x0000000007FFFFFFULL +#define ROGUE_CR_BIF_READS_INT_STATUS_MMU_SHIFT 16U +#define ROGUE_CR_BIF_READS_INT_STATUS_MMU_CLRMSK 0xF800FFFFU +#define ROGUE_CR_BIF_READS_INT_STATUS_BANK1_SHIFT 0U +#define ROGUE_CR_BIF_READS_INT_STATUS_BANK1_CLRMSK 0xFFFF0000U + +/* Register ROGUE_CR_BIFPM_READS_INT_STATUS */ +#define ROGUE_CR_BIFPM_READS_INT_STATUS 0x1330U +#define ROGUE_CR_BIFPM_READS_INT_STATUS_MASKFULL 0x000000000000FFFFULL +#define ROGUE_CR_BIFPM_READS_INT_STATUS_BANK0_SHIFT 0U +#define ROGUE_CR_BIFPM_READS_INT_STATUS_BANK0_CLRMSK 0xFFFF0000U + +/* Register ROGUE_CR_BIFPM_READS_EXT_STATUS */ +#define ROGUE_CR_BIFPM_READS_EXT_STATUS 0x1338U +#define ROGUE_CR_BIFPM_READS_EXT_STATUS_MASKFULL 0x000000000000FFFFULL +#define ROGUE_CR_BIFPM_READS_EXT_STATUS_BANK0_SHIFT 0U +#define ROGUE_CR_BIFPM_READS_EXT_STATUS_BANK0_CLRMSK 0xFFFF0000U + +/* Register ROGUE_CR_BIFPM_STATUS_MMU */ +#define ROGUE_CR_BIFPM_STATUS_MMU 0x1350U +#define ROGUE_CR_BIFPM_STATUS_MMU_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_BIFPM_STATUS_MMU_REQUESTS_SHIFT 0U +#define ROGUE_CR_BIFPM_STATUS_MMU_REQUESTS_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_BIF_STATUS_MMU */ +#define ROGUE_CR_BIF_STATUS_MMU 0x1358U +#define ROGUE_CR_BIF_STATUS_MMU_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_BIF_STATUS_MMU_REQUESTS_SHIFT 0U +#define ROGUE_CR_BIF_STATUS_MMU_REQUESTS_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_BIF_FAULT_READ */ +#define ROGUE_CR_BIF_FAULT_READ 0x13E0U +#define ROGUE_CR_BIF_FAULT_READ_MASKFULL 0x000000FFFFFFFFF0ULL +#define ROGUE_CR_BIF_FAULT_READ_ADDRESS_SHIFT 4U +#define ROGUE_CR_BIF_FAULT_READ_ADDRESS_CLRMSK 0xFFFFFF000000000FULL +#define ROGUE_CR_BIF_FAULT_READ_ADDRESS_ALIGNSHIFT 4U +#define ROGUE_CR_BIF_FAULT_READ_ADDRESS_ALIGNSIZE 16U + +/* Register ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS */ +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS 0x1430U +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_MASKFULL 0x000000000000F775ULL +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_CAT_BASE_SHIFT 12U +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_CAT_BASE_CLRMSK 0xFFFF0FFFU +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_PAGE_SIZE_SHIFT 8U +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_PAGE_SIZE_CLRMSK 0xFFFFF8FFU +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_DATA_TYPE_SHIFT 5U +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_DATA_TYPE_CLRMSK 0xFFFFFF9FU +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_FAULT_RO_SHIFT 4U +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_FAULT_RO_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_FAULT_RO_EN 0x00000010U +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_FAULT_PM_META_RO_SHIFT 2U +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_FAULT_PM_META_RO_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_FAULT_PM_META_RO_EN 0x00000004U +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_FAULT_SHIFT 0U +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_FAULT_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_FAULT_EN 0x00000001U + +/* Register ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS */ +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS 0x1438U +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_MASKFULL 0x0007FFFFFFFFFFF0ULL +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_RNW_SHIFT 50U +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_RNW_CLRMSK 0xFFFBFFFFFFFFFFFFULL +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_RNW_EN 0x0004000000000000ULL +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_TAG_SB_SHIFT 44U +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_TAG_SB_CLRMSK 0xFFFC0FFFFFFFFFFFULL +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_TAG_ID_SHIFT 40U +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_TAG_ID_CLRMSK 0xFFFFF0FFFFFFFFFFULL +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_ADDRESS_SHIFT 4U +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_ADDRESS_CLRMSK 0xFFFFFF000000000FULL +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_ADDRESS_ALIGNSHIFT 4U +#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_ADDRESS_ALIGNSIZE 16U + +/* Register ROGUE_CR_MCU_FENCE */ +#define ROGUE_CR_MCU_FENCE 0x1740U +#define ROGUE_CR_MCU_FENCE_MASKFULL 0x000007FFFFFFFFE0ULL +#define ROGUE_CR_MCU_FENCE_DM_SHIFT 40U +#define ROGUE_CR_MCU_FENCE_DM_CLRMSK 0xFFFFF8FFFFFFFFFFULL +#define ROGUE_CR_MCU_FENCE_DM_VERTEX 0x0000000000000000ULL +#define ROGUE_CR_MCU_FENCE_DM_PIXEL 0x0000010000000000ULL +#define ROGUE_CR_MCU_FENCE_DM_COMPUTE 0x0000020000000000ULL +#define ROGUE_CR_MCU_FENCE_DM_RAY_VERTEX 0x0000030000000000ULL +#define ROGUE_CR_MCU_FENCE_DM_RAY 0x0000040000000000ULL +#define ROGUE_CR_MCU_FENCE_DM_FASTRENDER 0x0000050000000000ULL +#define ROGUE_CR_MCU_FENCE_ADDR_SHIFT 5U +#define ROGUE_CR_MCU_FENCE_ADDR_CLRMSK 0xFFFFFF000000001FULL +#define ROGUE_CR_MCU_FENCE_ADDR_ALIGNSHIFT 5U +#define ROGUE_CR_MCU_FENCE_ADDR_ALIGNSIZE 32U + +/* Register group: ROGUE_CR_SCRATCH, with 16 repeats */ +#define ROGUE_CR_SCRATCH_REPEATCOUNT 16U +/* Register ROGUE_CR_SCRATCH0 */ +#define ROGUE_CR_SCRATCH0 0x1A00U +#define ROGUE_CR_SCRATCH0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SCRATCH0_DATA_SHIFT 0U +#define ROGUE_CR_SCRATCH0_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SCRATCH1 */ +#define ROGUE_CR_SCRATCH1 0x1A08U +#define ROGUE_CR_SCRATCH1_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SCRATCH1_DATA_SHIFT 0U +#define ROGUE_CR_SCRATCH1_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SCRATCH2 */ +#define ROGUE_CR_SCRATCH2 0x1A10U +#define ROGUE_CR_SCRATCH2_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SCRATCH2_DATA_SHIFT 0U +#define ROGUE_CR_SCRATCH2_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SCRATCH3 */ +#define ROGUE_CR_SCRATCH3 0x1A18U +#define ROGUE_CR_SCRATCH3_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SCRATCH3_DATA_SHIFT 0U +#define ROGUE_CR_SCRATCH3_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SCRATCH4 */ +#define ROGUE_CR_SCRATCH4 0x1A20U +#define ROGUE_CR_SCRATCH4_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SCRATCH4_DATA_SHIFT 0U +#define ROGUE_CR_SCRATCH4_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SCRATCH5 */ +#define ROGUE_CR_SCRATCH5 0x1A28U +#define ROGUE_CR_SCRATCH5_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SCRATCH5_DATA_SHIFT 0U +#define ROGUE_CR_SCRATCH5_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SCRATCH6 */ +#define ROGUE_CR_SCRATCH6 0x1A30U +#define ROGUE_CR_SCRATCH6_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SCRATCH6_DATA_SHIFT 0U +#define ROGUE_CR_SCRATCH6_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SCRATCH7 */ +#define ROGUE_CR_SCRATCH7 0x1A38U +#define ROGUE_CR_SCRATCH7_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SCRATCH7_DATA_SHIFT 0U +#define ROGUE_CR_SCRATCH7_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SCRATCH8 */ +#define ROGUE_CR_SCRATCH8 0x1A40U +#define ROGUE_CR_SCRATCH8_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SCRATCH8_DATA_SHIFT 0U +#define ROGUE_CR_SCRATCH8_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SCRATCH9 */ +#define ROGUE_CR_SCRATCH9 0x1A48U +#define ROGUE_CR_SCRATCH9_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SCRATCH9_DATA_SHIFT 0U +#define ROGUE_CR_SCRATCH9_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SCRATCH10 */ +#define ROGUE_CR_SCRATCH10 0x1A50U +#define ROGUE_CR_SCRATCH10_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SCRATCH10_DATA_SHIFT 0U +#define ROGUE_CR_SCRATCH10_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SCRATCH11 */ +#define ROGUE_CR_SCRATCH11 0x1A58U +#define ROGUE_CR_SCRATCH11_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SCRATCH11_DATA_SHIFT 0U +#define ROGUE_CR_SCRATCH11_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SCRATCH12 */ +#define ROGUE_CR_SCRATCH12 0x1A60U +#define ROGUE_CR_SCRATCH12_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SCRATCH12_DATA_SHIFT 0U +#define ROGUE_CR_SCRATCH12_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SCRATCH13 */ +#define ROGUE_CR_SCRATCH13 0x1A68U +#define ROGUE_CR_SCRATCH13_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SCRATCH13_DATA_SHIFT 0U +#define ROGUE_CR_SCRATCH13_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SCRATCH14 */ +#define ROGUE_CR_SCRATCH14 0x1A70U +#define ROGUE_CR_SCRATCH14_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SCRATCH14_DATA_SHIFT 0U +#define ROGUE_CR_SCRATCH14_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SCRATCH15 */ +#define ROGUE_CR_SCRATCH15 0x1A78U +#define ROGUE_CR_SCRATCH15_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SCRATCH15_DATA_SHIFT 0U +#define ROGUE_CR_SCRATCH15_DATA_CLRMSK 0x00000000U + +/* Register group: ROGUE_CR_OS0_SCRATCH, with 2 repeats */ +#define ROGUE_CR_OS0_SCRATCH_REPEATCOUNT 2U +/* Register ROGUE_CR_OS0_SCRATCH0 */ +#define ROGUE_CR_OS0_SCRATCH0 0x1A80U +#define ROGUE_CR_OS0_SCRATCH0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_OS0_SCRATCH0_DATA_SHIFT 0U +#define ROGUE_CR_OS0_SCRATCH0_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_OS0_SCRATCH1 */ +#define ROGUE_CR_OS0_SCRATCH1 0x1A88U +#define ROGUE_CR_OS0_SCRATCH1_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_OS0_SCRATCH1_DATA_SHIFT 0U +#define ROGUE_CR_OS0_SCRATCH1_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_OS0_SCRATCH2 */ +#define ROGUE_CR_OS0_SCRATCH2 0x1A90U +#define ROGUE_CR_OS0_SCRATCH2_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_OS0_SCRATCH2_DATA_SHIFT 0U +#define ROGUE_CR_OS0_SCRATCH2_DATA_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_OS0_SCRATCH3 */ +#define ROGUE_CR_OS0_SCRATCH3 0x1A98U +#define ROGUE_CR_OS0_SCRATCH3_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_OS0_SCRATCH3_DATA_SHIFT 0U +#define ROGUE_CR_OS0_SCRATCH3_DATA_CLRMSK 0xFFFFFF00U + +/* Register group: ROGUE_CR_OS1_SCRATCH, with 2 repeats */ +#define ROGUE_CR_OS1_SCRATCH_REPEATCOUNT 2U +/* Register ROGUE_CR_OS1_SCRATCH0 */ +#define ROGUE_CR_OS1_SCRATCH0 0x11A80U +#define ROGUE_CR_OS1_SCRATCH0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_OS1_SCRATCH0_DATA_SHIFT 0U +#define ROGUE_CR_OS1_SCRATCH0_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_OS1_SCRATCH1 */ +#define ROGUE_CR_OS1_SCRATCH1 0x11A88U +#define ROGUE_CR_OS1_SCRATCH1_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_OS1_SCRATCH1_DATA_SHIFT 0U +#define ROGUE_CR_OS1_SCRATCH1_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_OS1_SCRATCH2 */ +#define ROGUE_CR_OS1_SCRATCH2 0x11A90U +#define ROGUE_CR_OS1_SCRATCH2_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_OS1_SCRATCH2_DATA_SHIFT 0U +#define ROGUE_CR_OS1_SCRATCH2_DATA_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_OS1_SCRATCH3 */ +#define ROGUE_CR_OS1_SCRATCH3 0x11A98U +#define ROGUE_CR_OS1_SCRATCH3_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_OS1_SCRATCH3_DATA_SHIFT 0U +#define ROGUE_CR_OS1_SCRATCH3_DATA_CLRMSK 0xFFFFFF00U + +/* Register group: ROGUE_CR_OS2_SCRATCH, with 2 repeats */ +#define ROGUE_CR_OS2_SCRATCH_REPEATCOUNT 2U +/* Register ROGUE_CR_OS2_SCRATCH0 */ +#define ROGUE_CR_OS2_SCRATCH0 0x21A80U +#define ROGUE_CR_OS2_SCRATCH0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_OS2_SCRATCH0_DATA_SHIFT 0U +#define ROGUE_CR_OS2_SCRATCH0_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_OS2_SCRATCH1 */ +#define ROGUE_CR_OS2_SCRATCH1 0x21A88U +#define ROGUE_CR_OS2_SCRATCH1_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_OS2_SCRATCH1_DATA_SHIFT 0U +#define ROGUE_CR_OS2_SCRATCH1_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_OS2_SCRATCH2 */ +#define ROGUE_CR_OS2_SCRATCH2 0x21A90U +#define ROGUE_CR_OS2_SCRATCH2_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_OS2_SCRATCH2_DATA_SHIFT 0U +#define ROGUE_CR_OS2_SCRATCH2_DATA_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_OS2_SCRATCH3 */ +#define ROGUE_CR_OS2_SCRATCH3 0x21A98U +#define ROGUE_CR_OS2_SCRATCH3_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_OS2_SCRATCH3_DATA_SHIFT 0U +#define ROGUE_CR_OS2_SCRATCH3_DATA_CLRMSK 0xFFFFFF00U + +/* Register group: ROGUE_CR_OS3_SCRATCH, with 2 repeats */ +#define ROGUE_CR_OS3_SCRATCH_REPEATCOUNT 2U +/* Register ROGUE_CR_OS3_SCRATCH0 */ +#define ROGUE_CR_OS3_SCRATCH0 0x31A80U +#define ROGUE_CR_OS3_SCRATCH0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_OS3_SCRATCH0_DATA_SHIFT 0U +#define ROGUE_CR_OS3_SCRATCH0_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_OS3_SCRATCH1 */ +#define ROGUE_CR_OS3_SCRATCH1 0x31A88U +#define ROGUE_CR_OS3_SCRATCH1_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_OS3_SCRATCH1_DATA_SHIFT 0U +#define ROGUE_CR_OS3_SCRATCH1_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_OS3_SCRATCH2 */ +#define ROGUE_CR_OS3_SCRATCH2 0x31A90U +#define ROGUE_CR_OS3_SCRATCH2_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_OS3_SCRATCH2_DATA_SHIFT 0U +#define ROGUE_CR_OS3_SCRATCH2_DATA_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_OS3_SCRATCH3 */ +#define ROGUE_CR_OS3_SCRATCH3 0x31A98U +#define ROGUE_CR_OS3_SCRATCH3_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_OS3_SCRATCH3_DATA_SHIFT 0U +#define ROGUE_CR_OS3_SCRATCH3_DATA_CLRMSK 0xFFFFFF00U + +/* Register group: ROGUE_CR_OS4_SCRATCH, with 2 repeats */ +#define ROGUE_CR_OS4_SCRATCH_REPEATCOUNT 2U +/* Register ROGUE_CR_OS4_SCRATCH0 */ +#define ROGUE_CR_OS4_SCRATCH0 0x41A80U +#define ROGUE_CR_OS4_SCRATCH0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_OS4_SCRATCH0_DATA_SHIFT 0U +#define ROGUE_CR_OS4_SCRATCH0_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_OS4_SCRATCH1 */ +#define ROGUE_CR_OS4_SCRATCH1 0x41A88U +#define ROGUE_CR_OS4_SCRATCH1_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_OS4_SCRATCH1_DATA_SHIFT 0U +#define ROGUE_CR_OS4_SCRATCH1_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_OS4_SCRATCH2 */ +#define ROGUE_CR_OS4_SCRATCH2 0x41A90U +#define ROGUE_CR_OS4_SCRATCH2_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_OS4_SCRATCH2_DATA_SHIFT 0U +#define ROGUE_CR_OS4_SCRATCH2_DATA_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_OS4_SCRATCH3 */ +#define ROGUE_CR_OS4_SCRATCH3 0x41A98U +#define ROGUE_CR_OS4_SCRATCH3_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_OS4_SCRATCH3_DATA_SHIFT 0U +#define ROGUE_CR_OS4_SCRATCH3_DATA_CLRMSK 0xFFFFFF00U + +/* Register group: ROGUE_CR_OS5_SCRATCH, with 2 repeats */ +#define ROGUE_CR_OS5_SCRATCH_REPEATCOUNT 2U +/* Register ROGUE_CR_OS5_SCRATCH0 */ +#define ROGUE_CR_OS5_SCRATCH0 0x51A80U +#define ROGUE_CR_OS5_SCRATCH0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_OS5_SCRATCH0_DATA_SHIFT 0U +#define ROGUE_CR_OS5_SCRATCH0_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_OS5_SCRATCH1 */ +#define ROGUE_CR_OS5_SCRATCH1 0x51A88U +#define ROGUE_CR_OS5_SCRATCH1_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_OS5_SCRATCH1_DATA_SHIFT 0U +#define ROGUE_CR_OS5_SCRATCH1_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_OS5_SCRATCH2 */ +#define ROGUE_CR_OS5_SCRATCH2 0x51A90U +#define ROGUE_CR_OS5_SCRATCH2_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_OS5_SCRATCH2_DATA_SHIFT 0U +#define ROGUE_CR_OS5_SCRATCH2_DATA_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_OS5_SCRATCH3 */ +#define ROGUE_CR_OS5_SCRATCH3 0x51A98U +#define ROGUE_CR_OS5_SCRATCH3_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_OS5_SCRATCH3_DATA_SHIFT 0U +#define ROGUE_CR_OS5_SCRATCH3_DATA_CLRMSK 0xFFFFFF00U + +/* Register group: ROGUE_CR_OS6_SCRATCH, with 2 repeats */ +#define ROGUE_CR_OS6_SCRATCH_REPEATCOUNT 2U +/* Register ROGUE_CR_OS6_SCRATCH0 */ +#define ROGUE_CR_OS6_SCRATCH0 0x61A80U +#define ROGUE_CR_OS6_SCRATCH0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_OS6_SCRATCH0_DATA_SHIFT 0U +#define ROGUE_CR_OS6_SCRATCH0_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_OS6_SCRATCH1 */ +#define ROGUE_CR_OS6_SCRATCH1 0x61A88U +#define ROGUE_CR_OS6_SCRATCH1_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_OS6_SCRATCH1_DATA_SHIFT 0U +#define ROGUE_CR_OS6_SCRATCH1_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_OS6_SCRATCH2 */ +#define ROGUE_CR_OS6_SCRATCH2 0x61A90U +#define ROGUE_CR_OS6_SCRATCH2_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_OS6_SCRATCH2_DATA_SHIFT 0U +#define ROGUE_CR_OS6_SCRATCH2_DATA_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_OS6_SCRATCH3 */ +#define ROGUE_CR_OS6_SCRATCH3 0x61A98U +#define ROGUE_CR_OS6_SCRATCH3_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_OS6_SCRATCH3_DATA_SHIFT 0U +#define ROGUE_CR_OS6_SCRATCH3_DATA_CLRMSK 0xFFFFFF00U + +/* Register group: ROGUE_CR_OS7_SCRATCH, with 2 repeats */ +#define ROGUE_CR_OS7_SCRATCH_REPEATCOUNT 2U +/* Register ROGUE_CR_OS7_SCRATCH0 */ +#define ROGUE_CR_OS7_SCRATCH0 0x71A80U +#define ROGUE_CR_OS7_SCRATCH0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_OS7_SCRATCH0_DATA_SHIFT 0U +#define ROGUE_CR_OS7_SCRATCH0_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_OS7_SCRATCH1 */ +#define ROGUE_CR_OS7_SCRATCH1 0x71A88U +#define ROGUE_CR_OS7_SCRATCH1_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_OS7_SCRATCH1_DATA_SHIFT 0U +#define ROGUE_CR_OS7_SCRATCH1_DATA_CLRMSK 0x00000000U + +/* Register ROGUE_CR_OS7_SCRATCH2 */ +#define ROGUE_CR_OS7_SCRATCH2 0x71A90U +#define ROGUE_CR_OS7_SCRATCH2_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_OS7_SCRATCH2_DATA_SHIFT 0U +#define ROGUE_CR_OS7_SCRATCH2_DATA_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_OS7_SCRATCH3 */ +#define ROGUE_CR_OS7_SCRATCH3 0x71A98U +#define ROGUE_CR_OS7_SCRATCH3_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_OS7_SCRATCH3_DATA_SHIFT 0U +#define ROGUE_CR_OS7_SCRATCH3_DATA_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_SPFILTER_SIGNAL_DESCR */ +#define ROGUE_CR_SPFILTER_SIGNAL_DESCR 0x2700U +#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_MASKFULL 0x000000000000FFFFULL +#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_SIZE_SHIFT 0U +#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_SIZE_CLRMSK 0xFFFF0000U +#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_SIZE_ALIGNSHIFT 4U +#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_SIZE_ALIGNSIZE 16U + +/* Register ROGUE_CR_SPFILTER_SIGNAL_DESCR_MIN */ +#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_MIN 0x2708U +#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_MIN_MASKFULL 0x000000FFFFFFFFF0ULL +#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_MIN_ADDR_SHIFT 4U +#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_MIN_ADDR_CLRMSK 0xFFFFFF000000000FULL +#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_MIN_ADDR_ALIGNSHIFT 4U +#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_MIN_ADDR_ALIGNSIZE 16U + +/* Register group: ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG, with 16 repeats */ +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG_REPEATCOUNT 16U +/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0 */ +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0 0x3000U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_MASKFULL 0x7FFFF7FFFFFFF000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_TRUSTED_SHIFT 62U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_TRUSTED_EN 0x4000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_LOAD_STORE_EN_SHIFT 61U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_LOAD_STORE_EN_EN 0x2000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_FETCH_EN_SHIFT 60U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_FETCH_EN_EN 0x1000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_SIZE_SHIFT 44U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_CBASE_SHIFT 40U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_DEVVADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_DEVVADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_DEVVADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1 */ +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1 0x3008U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_MASKFULL 0x7FFFF7FFFFFFF000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_TRUSTED_SHIFT 62U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_TRUSTED_EN 0x4000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_LOAD_STORE_EN_SHIFT 61U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_LOAD_STORE_EN_EN 0x2000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_FETCH_EN_SHIFT 60U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_FETCH_EN_EN 0x1000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_SIZE_SHIFT 44U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_CBASE_SHIFT 40U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_DEVVADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_DEVVADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_DEVVADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2 */ +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2 0x3010U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_MASKFULL 0x7FFFF7FFFFFFF000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_TRUSTED_SHIFT 62U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_TRUSTED_EN 0x4000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_LOAD_STORE_EN_SHIFT 61U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_LOAD_STORE_EN_EN 0x2000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_FETCH_EN_SHIFT 60U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_FETCH_EN_EN 0x1000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_SIZE_SHIFT 44U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_CBASE_SHIFT 40U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_DEVVADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_DEVVADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_DEVVADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3 */ +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3 0x3018U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_MASKFULL 0x7FFFF7FFFFFFF000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_TRUSTED_SHIFT 62U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_TRUSTED_EN 0x4000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_LOAD_STORE_EN_SHIFT 61U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_LOAD_STORE_EN_EN 0x2000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_FETCH_EN_SHIFT 60U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_FETCH_EN_EN 0x1000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_SIZE_SHIFT 44U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_CBASE_SHIFT 40U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_DEVVADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_DEVVADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_DEVVADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4 */ +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4 0x3020U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_MASKFULL 0x7FFFF7FFFFFFF000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_TRUSTED_SHIFT 62U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_TRUSTED_EN 0x4000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_LOAD_STORE_EN_SHIFT 61U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_LOAD_STORE_EN_EN 0x2000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_FETCH_EN_SHIFT 60U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_FETCH_EN_EN 0x1000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_SIZE_SHIFT 44U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_CBASE_SHIFT 40U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_DEVVADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_DEVVADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_DEVVADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5 */ +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5 0x3028U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_MASKFULL 0x7FFFF7FFFFFFF000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_TRUSTED_SHIFT 62U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_TRUSTED_EN 0x4000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_LOAD_STORE_EN_SHIFT 61U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_LOAD_STORE_EN_EN 0x2000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_FETCH_EN_SHIFT 60U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_FETCH_EN_EN 0x1000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_SIZE_SHIFT 44U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_CBASE_SHIFT 40U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_DEVVADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_DEVVADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_DEVVADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6 */ +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6 0x3030U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_MASKFULL 0x7FFFF7FFFFFFF000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_TRUSTED_SHIFT 62U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_TRUSTED_EN 0x4000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_LOAD_STORE_EN_SHIFT 61U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_LOAD_STORE_EN_EN 0x2000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_FETCH_EN_SHIFT 60U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_FETCH_EN_EN 0x1000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_SIZE_SHIFT 44U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_CBASE_SHIFT 40U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_DEVVADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_DEVVADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_DEVVADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7 */ +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7 0x3038U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_MASKFULL 0x7FFFF7FFFFFFF000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_TRUSTED_SHIFT 62U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_TRUSTED_EN 0x4000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_LOAD_STORE_EN_SHIFT 61U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_LOAD_STORE_EN_EN 0x2000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_FETCH_EN_SHIFT 60U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_FETCH_EN_EN 0x1000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_SIZE_SHIFT 44U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_CBASE_SHIFT 40U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_DEVVADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_DEVVADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_DEVVADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8 */ +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8 0x3040U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_MASKFULL 0x7FFFF7FFFFFFF000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_TRUSTED_SHIFT 62U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_TRUSTED_EN 0x4000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_LOAD_STORE_EN_SHIFT 61U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_LOAD_STORE_EN_EN 0x2000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_FETCH_EN_SHIFT 60U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_FETCH_EN_EN 0x1000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_SIZE_SHIFT 44U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_CBASE_SHIFT 40U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_DEVVADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_DEVVADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_DEVVADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9 */ +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9 0x3048U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_MASKFULL 0x7FFFF7FFFFFFF000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_TRUSTED_SHIFT 62U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_TRUSTED_EN 0x4000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_LOAD_STORE_EN_SHIFT 61U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_LOAD_STORE_EN_EN 0x2000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_FETCH_EN_SHIFT 60U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_FETCH_EN_EN 0x1000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_SIZE_SHIFT 44U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_CBASE_SHIFT 40U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_DEVVADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_DEVVADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_DEVVADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10 */ +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10 0x3050U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_MASKFULL 0x7FFFF7FFFFFFF000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_TRUSTED_SHIFT 62U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_TRUSTED_EN 0x4000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_LOAD_STORE_EN_SHIFT 61U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_LOAD_STORE_EN_EN 0x2000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_FETCH_EN_SHIFT 60U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_FETCH_EN_EN 0x1000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_SIZE_SHIFT 44U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_CBASE_SHIFT 40U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_DEVVADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_DEVVADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_DEVVADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11 */ +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11 0x3058U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_MASKFULL 0x7FFFF7FFFFFFF000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_TRUSTED_SHIFT 62U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_TRUSTED_EN 0x4000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_LOAD_STORE_EN_SHIFT 61U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_LOAD_STORE_EN_EN 0x2000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_FETCH_EN_SHIFT 60U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_FETCH_EN_EN 0x1000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_SIZE_SHIFT 44U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_CBASE_SHIFT 40U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_DEVVADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_DEVVADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_DEVVADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12 */ +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12 0x3060U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_MASKFULL 0x7FFFF7FFFFFFF000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_TRUSTED_SHIFT 62U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_TRUSTED_EN 0x4000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_LOAD_STORE_EN_SHIFT 61U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_LOAD_STORE_EN_EN 0x2000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_FETCH_EN_SHIFT 60U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_FETCH_EN_EN 0x1000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_SIZE_SHIFT 44U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_CBASE_SHIFT 40U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_DEVVADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_DEVVADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_DEVVADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13 */ +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13 0x3068U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_MASKFULL 0x7FFFF7FFFFFFF000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_TRUSTED_SHIFT 62U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_TRUSTED_EN 0x4000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_LOAD_STORE_EN_SHIFT 61U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_LOAD_STORE_EN_EN 0x2000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_FETCH_EN_SHIFT 60U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_FETCH_EN_EN 0x1000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_SIZE_SHIFT 44U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_CBASE_SHIFT 40U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_DEVVADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_DEVVADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_DEVVADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14 */ +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14 0x3070U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_MASKFULL 0x7FFFF7FFFFFFF000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_TRUSTED_SHIFT 62U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_TRUSTED_EN 0x4000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_LOAD_STORE_EN_SHIFT 61U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_LOAD_STORE_EN_EN 0x2000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_FETCH_EN_SHIFT 60U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_FETCH_EN_EN 0x1000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_SIZE_SHIFT 44U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_CBASE_SHIFT 40U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_DEVVADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_DEVVADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_DEVVADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15 */ +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15 0x3078U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_MASKFULL 0x7FFFF7FFFFFFF000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_TRUSTED_SHIFT 62U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_TRUSTED_EN 0x4000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_LOAD_STORE_EN_SHIFT 61U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_LOAD_STORE_EN_EN 0x2000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_FETCH_EN_SHIFT 60U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_FETCH_EN_EN 0x1000000000000000ULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_SIZE_SHIFT 44U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_CBASE_SHIFT 40U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_DEVVADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_DEVVADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_DEVVADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_BOOT */ +#define ROGUE_CR_FWCORE_BOOT 0x3090U +#define ROGUE_CR_FWCORE_BOOT_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_FWCORE_BOOT_ENABLE_SHIFT 0U +#define ROGUE_CR_FWCORE_BOOT_ENABLE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_FWCORE_BOOT_ENABLE_EN 0x00000001U + +/* Register ROGUE_CR_FWCORE_RESET_ADDR */ +#define ROGUE_CR_FWCORE_RESET_ADDR 0x3098U +#define ROGUE_CR_FWCORE_RESET_ADDR_MASKFULL 0x00000000FFFFFFFEULL +#define ROGUE_CR_FWCORE_RESET_ADDR_ADDR_SHIFT 1U +#define ROGUE_CR_FWCORE_RESET_ADDR_ADDR_CLRMSK 0x00000001U +#define ROGUE_CR_FWCORE_RESET_ADDR_ADDR_ALIGNSHIFT 1U +#define ROGUE_CR_FWCORE_RESET_ADDR_ADDR_ALIGNSIZE 2U + +/* Register ROGUE_CR_FWCORE_WRAPPER_NMI_ADDR */ +#define ROGUE_CR_FWCORE_WRAPPER_NMI_ADDR 0x30A0U +#define ROGUE_CR_FWCORE_WRAPPER_NMI_ADDR_MASKFULL 0x00000000FFFFFFFEULL +#define ROGUE_CR_FWCORE_WRAPPER_NMI_ADDR_ADDR_SHIFT 1U +#define ROGUE_CR_FWCORE_WRAPPER_NMI_ADDR_ADDR_CLRMSK 0x00000001U +#define ROGUE_CR_FWCORE_WRAPPER_NMI_ADDR_ADDR_ALIGNSHIFT 1U +#define ROGUE_CR_FWCORE_WRAPPER_NMI_ADDR_ADDR_ALIGNSIZE 2U + +/* Register ROGUE_CR_FWCORE_WRAPPER_NMI_EVENT */ +#define ROGUE_CR_FWCORE_WRAPPER_NMI_EVENT 0x30A8U +#define ROGUE_CR_FWCORE_WRAPPER_NMI_EVENT_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_FWCORE_WRAPPER_NMI_EVENT_TRIGGER_EN_SHIFT 0U +#define ROGUE_CR_FWCORE_WRAPPER_NMI_EVENT_TRIGGER_EN_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_FWCORE_WRAPPER_NMI_EVENT_TRIGGER_EN_EN 0x00000001U + +/* Register ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS */ +#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS 0x30B0U +#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_MASKFULL 0x000000000000F771ULL +#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_CAT_BASE_SHIFT 12U +#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_CAT_BASE_CLRMSK 0xFFFF0FFFU +#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_PAGE_SIZE_SHIFT 8U +#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_PAGE_SIZE_CLRMSK 0xFFFFF8FFU +#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_DATA_TYPE_SHIFT 5U +#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_DATA_TYPE_CLRMSK 0xFFFFFF9FU +#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_FAULT_RO_SHIFT 4U +#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_FAULT_RO_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_FAULT_RO_EN 0x00000010U +#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_FAULT_SHIFT 0U +#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_FAULT_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_FAULT_EN 0x00000001U + +/* Register ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS */ +#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS 0x30B8U +#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_MASKFULL 0x001FFFFFFFFFFFF0ULL +#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_RNW_SHIFT 52U +#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_RNW_CLRMSK 0xFFEFFFFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_RNW_EN 0x0010000000000000ULL +#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_TAG_SB_SHIFT 46U +#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_TAG_SB_CLRMSK 0xFFF03FFFFFFFFFFFULL +#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_TAG_ID_SHIFT 40U +#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_TAG_ID_CLRMSK 0xFFFFC0FFFFFFFFFFULL +#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_ADDRESS_SHIFT 4U +#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_ADDRESS_CLRMSK 0xFFFFFF000000000FULL +#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_ADDRESS_ALIGNSHIFT 4U +#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_ADDRESS_ALIGNSIZE 16U + +/* Register ROGUE_CR_FWCORE_MEM_CTRL_INVAL */ +#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL 0x30C0U +#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_MASKFULL 0x000000000000000FULL +#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_TLB_SHIFT 3U +#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_TLB_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_TLB_EN 0x00000008U +#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_PC_SHIFT 2U +#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_PC_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_PC_EN 0x00000004U +#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_PD_SHIFT 1U +#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_PD_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_PD_EN 0x00000002U +#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_PT_SHIFT 0U +#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_PT_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_PT_EN 0x00000001U + +/* Register ROGUE_CR_FWCORE_MEM_MMU_STATUS */ +#define ROGUE_CR_FWCORE_MEM_MMU_STATUS 0x30C8U +#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_MASKFULL 0x000000000FFFFFF7ULL +#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_PC_DATA_SHIFT 20U +#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_PC_DATA_CLRMSK 0xF00FFFFFU +#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_PD_DATA_SHIFT 12U +#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_PD_DATA_CLRMSK 0xFFF00FFFU +#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_PT_DATA_SHIFT 4U +#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_PT_DATA_CLRMSK 0xFFFFF00FU +#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_STALLED_SHIFT 2U +#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_STALLED_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_STALLED_EN 0x00000004U +#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_PAUSED_SHIFT 1U +#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_PAUSED_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_PAUSED_EN 0x00000002U +#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_BUSY_SHIFT 0U +#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_BUSY_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_BUSY_EN 0x00000001U + +/* Register ROGUE_CR_FWCORE_MEM_READS_EXT_STATUS */ +#define ROGUE_CR_FWCORE_MEM_READS_EXT_STATUS 0x30D8U +#define ROGUE_CR_FWCORE_MEM_READS_EXT_STATUS_MASKFULL 0x0000000000000FFFULL +#define ROGUE_CR_FWCORE_MEM_READS_EXT_STATUS_MMU_SHIFT 0U +#define ROGUE_CR_FWCORE_MEM_READS_EXT_STATUS_MMU_CLRMSK 0xFFFFF000U + +/* Register ROGUE_CR_FWCORE_MEM_READS_INT_STATUS */ +#define ROGUE_CR_FWCORE_MEM_READS_INT_STATUS 0x30E0U +#define ROGUE_CR_FWCORE_MEM_READS_INT_STATUS_MASKFULL 0x00000000000007FFULL +#define ROGUE_CR_FWCORE_MEM_READS_INT_STATUS_MMU_SHIFT 0U +#define ROGUE_CR_FWCORE_MEM_READS_INT_STATUS_MMU_CLRMSK 0xFFFFF800U + +/* Register ROGUE_CR_FWCORE_WRAPPER_FENCE */ +#define ROGUE_CR_FWCORE_WRAPPER_FENCE 0x30E8U +#define ROGUE_CR_FWCORE_WRAPPER_FENCE_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_FWCORE_WRAPPER_FENCE_ID_SHIFT 0U +#define ROGUE_CR_FWCORE_WRAPPER_FENCE_ID_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_FWCORE_WRAPPER_FENCE_ID_EN 0x00000001U + +/* Register group: ROGUE_CR_FWCORE_MEM_CAT_BASE, with 8 repeats */ +#define ROGUE_CR_FWCORE_MEM_CAT_BASE_REPEATCOUNT 8U +/* Register ROGUE_CR_FWCORE_MEM_CAT_BASE0 */ +#define ROGUE_CR_FWCORE_MEM_CAT_BASE0 0x30F0U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE0_MASKFULL 0x000000FFFFFFF000ULL +#define ROGUE_CR_FWCORE_MEM_CAT_BASE0_ADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE0_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_MEM_CAT_BASE0_ADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE0_ADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_MEM_CAT_BASE1 */ +#define ROGUE_CR_FWCORE_MEM_CAT_BASE1 0x30F8U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE1_MASKFULL 0x000000FFFFFFF000ULL +#define ROGUE_CR_FWCORE_MEM_CAT_BASE1_ADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE1_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_MEM_CAT_BASE1_ADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE1_ADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_MEM_CAT_BASE2 */ +#define ROGUE_CR_FWCORE_MEM_CAT_BASE2 0x3100U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE2_MASKFULL 0x000000FFFFFFF000ULL +#define ROGUE_CR_FWCORE_MEM_CAT_BASE2_ADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE2_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_MEM_CAT_BASE2_ADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE2_ADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_MEM_CAT_BASE3 */ +#define ROGUE_CR_FWCORE_MEM_CAT_BASE3 0x3108U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE3_MASKFULL 0x000000FFFFFFF000ULL +#define ROGUE_CR_FWCORE_MEM_CAT_BASE3_ADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE3_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_MEM_CAT_BASE3_ADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE3_ADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_MEM_CAT_BASE4 */ +#define ROGUE_CR_FWCORE_MEM_CAT_BASE4 0x3110U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE4_MASKFULL 0x000000FFFFFFF000ULL +#define ROGUE_CR_FWCORE_MEM_CAT_BASE4_ADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE4_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_MEM_CAT_BASE4_ADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE4_ADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_MEM_CAT_BASE5 */ +#define ROGUE_CR_FWCORE_MEM_CAT_BASE5 0x3118U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE5_MASKFULL 0x000000FFFFFFF000ULL +#define ROGUE_CR_FWCORE_MEM_CAT_BASE5_ADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE5_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_MEM_CAT_BASE5_ADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE5_ADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_MEM_CAT_BASE6 */ +#define ROGUE_CR_FWCORE_MEM_CAT_BASE6 0x3120U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE6_MASKFULL 0x000000FFFFFFF000ULL +#define ROGUE_CR_FWCORE_MEM_CAT_BASE6_ADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE6_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_MEM_CAT_BASE6_ADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE6_ADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_MEM_CAT_BASE7 */ +#define ROGUE_CR_FWCORE_MEM_CAT_BASE7 0x3128U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE7_MASKFULL 0x000000FFFFFFF000ULL +#define ROGUE_CR_FWCORE_MEM_CAT_BASE7_ADDR_SHIFT 12U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE7_ADDR_CLRMSK 0xFFFFFF0000000FFFULL +#define ROGUE_CR_FWCORE_MEM_CAT_BASE7_ADDR_ALIGNSHIFT 12U +#define ROGUE_CR_FWCORE_MEM_CAT_BASE7_ADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_FWCORE_WDT_RESET */ +#define ROGUE_CR_FWCORE_WDT_RESET 0x3130U +#define ROGUE_CR_FWCORE_WDT_RESET_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_FWCORE_WDT_RESET_EN_SHIFT 0U +#define ROGUE_CR_FWCORE_WDT_RESET_EN_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_FWCORE_WDT_RESET_EN_EN 0x00000001U + +/* Register ROGUE_CR_FWCORE_WDT_CTRL */ +#define ROGUE_CR_FWCORE_WDT_CTRL 0x3138U +#define ROGUE_CR_FWCORE_WDT_CTRL_MASKFULL 0x00000000FFFF1F01ULL +#define ROGUE_CR_FWCORE_WDT_CTRL_PROT_SHIFT 16U +#define ROGUE_CR_FWCORE_WDT_CTRL_PROT_CLRMSK 0x0000FFFFU +#define ROGUE_CR_FWCORE_WDT_CTRL_THRESHOLD_SHIFT 8U +#define ROGUE_CR_FWCORE_WDT_CTRL_THRESHOLD_CLRMSK 0xFFFFE0FFU +#define ROGUE_CR_FWCORE_WDT_CTRL_ENABLE_SHIFT 0U +#define ROGUE_CR_FWCORE_WDT_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_FWCORE_WDT_CTRL_ENABLE_EN 0x00000001U + +/* Register ROGUE_CR_FWCORE_WDT_COUNT */ +#define ROGUE_CR_FWCORE_WDT_COUNT 0x3140U +#define ROGUE_CR_FWCORE_WDT_COUNT_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_FWCORE_WDT_COUNT_VALUE_SHIFT 0U +#define ROGUE_CR_FWCORE_WDT_COUNT_VALUE_CLRMSK 0x00000000U + +/* Register group: ROGUE_CR_FWCORE_DMI_RESERVED0, with 4 repeats */ +#define ROGUE_CR_FWCORE_DMI_RESERVED0_REPEATCOUNT 4U +/* Register ROGUE_CR_FWCORE_DMI_RESERVED00 */ +#define ROGUE_CR_FWCORE_DMI_RESERVED00 0x3400U +#define ROGUE_CR_FWCORE_DMI_RESERVED00_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_RESERVED01 */ +#define ROGUE_CR_FWCORE_DMI_RESERVED01 0x3408U +#define ROGUE_CR_FWCORE_DMI_RESERVED01_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_RESERVED02 */ +#define ROGUE_CR_FWCORE_DMI_RESERVED02 0x3410U +#define ROGUE_CR_FWCORE_DMI_RESERVED02_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_RESERVED03 */ +#define ROGUE_CR_FWCORE_DMI_RESERVED03 0x3418U +#define ROGUE_CR_FWCORE_DMI_RESERVED03_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_DATA0 */ +#define ROGUE_CR_FWCORE_DMI_DATA0 0x3420U +#define ROGUE_CR_FWCORE_DMI_DATA0_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_DATA1 */ +#define ROGUE_CR_FWCORE_DMI_DATA1 0x3428U +#define ROGUE_CR_FWCORE_DMI_DATA1_MASKFULL 0x0000000000000000ULL + +/* Register group: ROGUE_CR_FWCORE_DMI_RESERVED1, with 5 repeats */ +#define ROGUE_CR_FWCORE_DMI_RESERVED1_REPEATCOUNT 5U +/* Register ROGUE_CR_FWCORE_DMI_RESERVED10 */ +#define ROGUE_CR_FWCORE_DMI_RESERVED10 0x3430U +#define ROGUE_CR_FWCORE_DMI_RESERVED10_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_RESERVED11 */ +#define ROGUE_CR_FWCORE_DMI_RESERVED11 0x3438U +#define ROGUE_CR_FWCORE_DMI_RESERVED11_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_RESERVED12 */ +#define ROGUE_CR_FWCORE_DMI_RESERVED12 0x3440U +#define ROGUE_CR_FWCORE_DMI_RESERVED12_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_RESERVED13 */ +#define ROGUE_CR_FWCORE_DMI_RESERVED13 0x3448U +#define ROGUE_CR_FWCORE_DMI_RESERVED13_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_RESERVED14 */ +#define ROGUE_CR_FWCORE_DMI_RESERVED14 0x3450U +#define ROGUE_CR_FWCORE_DMI_RESERVED14_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_DMCONTROL */ +#define ROGUE_CR_FWCORE_DMI_DMCONTROL 0x3480U +#define ROGUE_CR_FWCORE_DMI_DMCONTROL_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_DMSTATUS */ +#define ROGUE_CR_FWCORE_DMI_DMSTATUS 0x3488U +#define ROGUE_CR_FWCORE_DMI_DMSTATUS_MASKFULL 0x0000000000000000ULL + +/* Register group: ROGUE_CR_FWCORE_DMI_RESERVED2, with 4 repeats */ +#define ROGUE_CR_FWCORE_DMI_RESERVED2_REPEATCOUNT 4U +/* Register ROGUE_CR_FWCORE_DMI_RESERVED20 */ +#define ROGUE_CR_FWCORE_DMI_RESERVED20 0x3490U +#define ROGUE_CR_FWCORE_DMI_RESERVED20_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_RESERVED21 */ +#define ROGUE_CR_FWCORE_DMI_RESERVED21 0x3498U +#define ROGUE_CR_FWCORE_DMI_RESERVED21_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_RESERVED22 */ +#define ROGUE_CR_FWCORE_DMI_RESERVED22 0x34A0U +#define ROGUE_CR_FWCORE_DMI_RESERVED22_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_RESERVED23 */ +#define ROGUE_CR_FWCORE_DMI_RESERVED23 0x34A8U +#define ROGUE_CR_FWCORE_DMI_RESERVED23_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_ABSTRACTCS */ +#define ROGUE_CR_FWCORE_DMI_ABSTRACTCS 0x34B0U +#define ROGUE_CR_FWCORE_DMI_ABSTRACTCS_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_COMMAND */ +#define ROGUE_CR_FWCORE_DMI_COMMAND 0x34B8U +#define ROGUE_CR_FWCORE_DMI_COMMAND_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_SBCS */ +#define ROGUE_CR_FWCORE_DMI_SBCS 0x35C0U +#define ROGUE_CR_FWCORE_DMI_SBCS_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_SBADDRESS0 */ +#define ROGUE_CR_FWCORE_DMI_SBADDRESS0 0x35C8U +#define ROGUE_CR_FWCORE_DMI_SBADDRESS0_MASKFULL 0x0000000000000000ULL + +/* Register group: ROGUE_CR_FWCORE_DMI_RESERVED3, with 2 repeats */ +#define ROGUE_CR_FWCORE_DMI_RESERVED3_REPEATCOUNT 2U +/* Register ROGUE_CR_FWCORE_DMI_RESERVED30 */ +#define ROGUE_CR_FWCORE_DMI_RESERVED30 0x34D0U +#define ROGUE_CR_FWCORE_DMI_RESERVED30_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_RESERVED31 */ +#define ROGUE_CR_FWCORE_DMI_RESERVED31 0x34D8U +#define ROGUE_CR_FWCORE_DMI_RESERVED31_MASKFULL 0x0000000000000000ULL + +/* Register group: ROGUE_CR_FWCORE_DMI_SBDATA, with 4 repeats */ +#define ROGUE_CR_FWCORE_DMI_SBDATA_REPEATCOUNT 4U +/* Register ROGUE_CR_FWCORE_DMI_SBDATA0 */ +#define ROGUE_CR_FWCORE_DMI_SBDATA0 0x35E0U +#define ROGUE_CR_FWCORE_DMI_SBDATA0_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_SBDATA1 */ +#define ROGUE_CR_FWCORE_DMI_SBDATA1 0x35E8U +#define ROGUE_CR_FWCORE_DMI_SBDATA1_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_SBDATA2 */ +#define ROGUE_CR_FWCORE_DMI_SBDATA2 0x35F0U +#define ROGUE_CR_FWCORE_DMI_SBDATA2_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_SBDATA3 */ +#define ROGUE_CR_FWCORE_DMI_SBDATA3 0x35F8U +#define ROGUE_CR_FWCORE_DMI_SBDATA3_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_FWCORE_DMI_HALTSUM0 */ +#define ROGUE_CR_FWCORE_DMI_HALTSUM0 0x3600U +#define ROGUE_CR_FWCORE_DMI_HALTSUM0_MASKFULL 0x0000000000000000ULL + +/* Register ROGUE_CR_SLC_CTRL_MISC */ +#define ROGUE_CR_SLC_CTRL_MISC 0x3800U +#define ROGUE_CR_SLC_CTRL_MISC_MASKFULL 0xFFFFFFFF01FF010FULL +#define ROGUE_CR_SLC_CTRL_MISC_SCRAMBLE_BITS_SHIFT 32U +#define ROGUE_CR_SLC_CTRL_MISC_SCRAMBLE_BITS_CLRMSK 0x00000000FFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_MISC_LAZYWB_OVERRIDE_SHIFT 24U +#define ROGUE_CR_SLC_CTRL_MISC_LAZYWB_OVERRIDE_CLRMSK 0xFFFFFFFFFEFFFFFFULL +#define ROGUE_CR_SLC_CTRL_MISC_LAZYWB_OVERRIDE_EN 0x0000000001000000ULL +#define ROGUE_CR_SLC_CTRL_MISC_ADDR_DECODE_MODE_SHIFT 16U +#define ROGUE_CR_SLC_CTRL_MISC_ADDR_DECODE_MODE_CLRMSK 0xFFFFFFFFFF00FFFFULL +#define ROGUE_CR_SLC_CTRL_MISC_ADDR_DECODE_MODE_INTERLEAVED_64_BYTE 0x0000000000000000ULL +#define ROGUE_CR_SLC_CTRL_MISC_ADDR_DECODE_MODE_INTERLEAVED_128_BYTE 0x0000000000010000ULL +#define ROGUE_CR_SLC_CTRL_MISC_ADDR_DECODE_MODE_SIMPLE_HASH1 0x0000000000100000ULL +#define ROGUE_CR_SLC_CTRL_MISC_ADDR_DECODE_MODE_SIMPLE_HASH2 0x0000000000110000ULL +#define ROGUE_CR_SLC_CTRL_MISC_ADDR_DECODE_MODE_PVR_HASH1 0x0000000000200000ULL +#define ROGUE_CR_SLC_CTRL_MISC_ADDR_DECODE_MODE_PVR_HASH2_SCRAMBLE 0x0000000000210000ULL +#define ROGUE_CR_SLC_CTRL_MISC_PAUSE_SHIFT 8U +#define ROGUE_CR_SLC_CTRL_MISC_PAUSE_CLRMSK 0xFFFFFFFFFFFFFEFFULL +#define ROGUE_CR_SLC_CTRL_MISC_PAUSE_EN 0x0000000000000100ULL +#define ROGUE_CR_SLC_CTRL_MISC_RESP_PRIORITY_SHIFT 3U +#define ROGUE_CR_SLC_CTRL_MISC_RESP_PRIORITY_CLRMSK 0xFFFFFFFFFFFFFFF7ULL +#define ROGUE_CR_SLC_CTRL_MISC_RESP_PRIORITY_EN 0x0000000000000008ULL +#define ROGUE_CR_SLC_CTRL_MISC_ENABLE_LINE_USE_LIMIT_SHIFT 2U +#define ROGUE_CR_SLC_CTRL_MISC_ENABLE_LINE_USE_LIMIT_CLRMSK 0xFFFFFFFFFFFFFFFBULL +#define ROGUE_CR_SLC_CTRL_MISC_ENABLE_LINE_USE_LIMIT_EN 0x0000000000000004ULL +#define ROGUE_CR_SLC_CTRL_MISC_ENABLE_PSG_HAZARD_CHECK_SHIFT 1U +#define ROGUE_CR_SLC_CTRL_MISC_ENABLE_PSG_HAZARD_CHECK_CLRMSK 0xFFFFFFFFFFFFFFFDULL +#define ROGUE_CR_SLC_CTRL_MISC_ENABLE_PSG_HAZARD_CHECK_EN 0x0000000000000002ULL +#define ROGUE_CR_SLC_CTRL_MISC_BYPASS_BURST_COMBINER_SHIFT 0U +#define ROGUE_CR_SLC_CTRL_MISC_BYPASS_BURST_COMBINER_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_SLC_CTRL_MISC_BYPASS_BURST_COMBINER_EN 0x0000000000000001ULL + +/* Register ROGUE_CR_SLC_CTRL_FLUSH_INVAL */ +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL 0x3818U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_MASKFULL 0x0000000080000FFFULL +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_LAZY_SHIFT 31U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_LAZY_CLRMSK 0x7FFFFFFFU +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_LAZY_EN 0x80000000U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_FASTRENDER_SHIFT 11U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_FASTRENDER_CLRMSK 0xFFFFF7FFU +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_FASTRENDER_EN 0x00000800U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_RAY_VERTEX_SHIFT 10U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_RAY_VERTEX_CLRMSK 0xFFFFFBFFU +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_RAY_VERTEX_EN 0x00000400U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_RAY_SHIFT 9U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_RAY_CLRMSK 0xFFFFFDFFU +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_RAY_EN 0x00000200U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_FRC_SHIFT 8U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_FRC_CLRMSK 0xFFFFFEFFU +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_FRC_EN 0x00000100U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_VXE_SHIFT 7U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_VXE_CLRMSK 0xFFFFFF7FU +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_VXE_EN 0x00000080U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_VXD_SHIFT 6U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_VXD_CLRMSK 0xFFFFFFBFU +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_VXD_EN 0x00000040U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_HOST_META_SHIFT 5U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_HOST_META_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_HOST_META_EN 0x00000020U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_MMU_SHIFT 4U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_MMU_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_MMU_EN 0x00000010U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_COMPUTE_SHIFT 3U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_COMPUTE_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_COMPUTE_EN 0x00000008U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_PIXEL_SHIFT 2U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_PIXEL_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_PIXEL_EN 0x00000004U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_TA_SHIFT 1U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_TA_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_TA_EN 0x00000002U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_ALL_SHIFT 0U +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_ALL_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_ALL_EN 0x00000001U + +/* Register ROGUE_CR_SLC_STATUS0 */ +#define ROGUE_CR_SLC_STATUS0 0x3820U +#define ROGUE_CR_SLC_STATUS0_MASKFULL 0x0000000000000007ULL +#define ROGUE_CR_SLC_STATUS0_FLUSH_INVAL_PENDING_SHIFT 2U +#define ROGUE_CR_SLC_STATUS0_FLUSH_INVAL_PENDING_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_SLC_STATUS0_FLUSH_INVAL_PENDING_EN 0x00000004U +#define ROGUE_CR_SLC_STATUS0_INVAL_PENDING_SHIFT 1U +#define ROGUE_CR_SLC_STATUS0_INVAL_PENDING_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_SLC_STATUS0_INVAL_PENDING_EN 0x00000002U +#define ROGUE_CR_SLC_STATUS0_FLUSH_PENDING_SHIFT 0U +#define ROGUE_CR_SLC_STATUS0_FLUSH_PENDING_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_SLC_STATUS0_FLUSH_PENDING_EN 0x00000001U + +/* Register ROGUE_CR_SLC_CTRL_BYPASS */ +#define ROGUE_CR_SLC_CTRL_BYPASS 0x3828U +#define ROGUE_CR_SLC_CTRL_BYPASS__XE_MEM__MASKFULL 0x0FFFFFFFFFFF7FFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_MASKFULL 0x000000000FFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_COMP_ZLS_SHIFT 59U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_COMP_ZLS_CLRMSK 0xF7FFFFFFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_COMP_ZLS_EN 0x0800000000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_ZLS_HEADER_SHIFT 58U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_ZLS_HEADER_CLRMSK 0xFBFFFFFFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_ZLS_HEADER_EN 0x0400000000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_TCU_HEADER_SHIFT 57U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_TCU_HEADER_CLRMSK 0xFDFFFFFFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_TCU_HEADER_EN 0x0200000000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_ZLS_DATA_SHIFT 56U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_ZLS_DATA_CLRMSK 0xFEFFFFFFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_ZLS_DATA_EN 0x0100000000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_TCU_DATA_SHIFT 55U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_TCU_DATA_CLRMSK 0xFF7FFFFFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_TCU_DATA_EN 0x0080000000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_COMP_PBE_SHIFT 54U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_COMP_PBE_CLRMSK 0xFFBFFFFFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_COMP_PBE_EN 0x0040000000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TCU_DM_COMPUTE_SHIFT 53U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TCU_DM_COMPUTE_CLRMSK 0xFFDFFFFFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TCU_DM_COMPUTE_EN 0x0020000000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_PDSRW_NOLINEFILL_SHIFT 52U +#define ROGUE_CR_SLC_CTRL_BYPASS_PDSRW_NOLINEFILL_CLRMSK 0xFFEFFFFFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_PDSRW_NOLINEFILL_EN 0x0010000000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_PBE_NOLINEFILL_SHIFT 51U +#define ROGUE_CR_SLC_CTRL_BYPASS_PBE_NOLINEFILL_CLRMSK 0xFFF7FFFFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_PBE_NOLINEFILL_EN 0x0008000000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_FBC_SHIFT 50U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_FBC_CLRMSK 0xFFFBFFFFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_FBC_EN 0x0004000000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_RREQ_SHIFT 49U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_RREQ_CLRMSK 0xFFFDFFFFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_RREQ_EN 0x0002000000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_CREQ_SHIFT 48U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_CREQ_CLRMSK 0xFFFEFFFFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_CREQ_EN 0x0001000000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_PREQ_SHIFT 47U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_PREQ_CLRMSK 0xFFFF7FFFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_PREQ_EN 0x0000800000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_DBSC_SHIFT 46U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_DBSC_CLRMSK 0xFFFFBFFFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_DBSC_EN 0x0000400000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TCU_SHIFT 45U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TCU_CLRMSK 0xFFFFDFFFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TCU_EN 0x0000200000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_PBE_SHIFT 44U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_PBE_CLRMSK 0xFFFFEFFFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_PBE_EN 0x0000100000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_ISP_SHIFT 43U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_ISP_CLRMSK 0xFFFFF7FFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_ISP_EN 0x0000080000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_PM_SHIFT 42U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_PM_CLRMSK 0xFFFFFBFFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_PM_EN 0x0000040000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TDM_SHIFT 41U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TDM_CLRMSK 0xFFFFFDFFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TDM_EN 0x0000020000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_CDM_SHIFT 40U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_CDM_CLRMSK 0xFFFFFEFFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_CDM_EN 0x0000010000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TSPF_PDS_STATE_SHIFT 39U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TSPF_PDS_STATE_CLRMSK 0xFFFFFF7FFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TSPF_PDS_STATE_EN 0x0000008000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TSPF_DB_SHIFT 38U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TSPF_DB_CLRMSK 0xFFFFFFBFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TSPF_DB_EN 0x0000004000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TSPF_VTX_VAR_SHIFT 37U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TSPF_VTX_VAR_CLRMSK 0xFFFFFFDFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TSPF_VTX_VAR_EN 0x0000002000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_VDM_SHIFT 36U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_VDM_CLRMSK 0xFFFFFFEFFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_VDM_EN 0x0000001000000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_PSG_STREAM_SHIFT 35U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_PSG_STREAM_CLRMSK 0xFFFFFFF7FFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_PSG_STREAM_EN 0x0000000800000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_PSG_REGION_SHIFT 34U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_PSG_REGION_CLRMSK 0xFFFFFFFBFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_PSG_REGION_EN 0x0000000400000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_VCE_SHIFT 33U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_VCE_CLRMSK 0xFFFFFFFDFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_VCE_EN 0x0000000200000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_PPP_SHIFT 32U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_PPP_CLRMSK 0xFFFFFFFEFFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_PPP_EN 0x0000000100000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_FASTRENDER_SHIFT 31U +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_FASTRENDER_CLRMSK 0xFFFFFFFF7FFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_FASTRENDER_EN 0x0000000080000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PM_ALIST_SHIFT 30U +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PM_ALIST_CLRMSK 0xFFFFFFFFBFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PM_ALIST_EN 0x0000000040000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PB_TE_SHIFT 29U +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PB_TE_CLRMSK 0xFFFFFFFFDFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PB_TE_EN 0x0000000020000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PB_VCE_SHIFT 28U +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PB_VCE_CLRMSK 0xFFFFFFFFEFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PB_VCE_EN 0x0000000010000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_RAY_VERTEX_SHIFT 27U +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_RAY_VERTEX_CLRMSK 0xFFFFFFFFF7FFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_RAY_VERTEX_EN 0x0000000008000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_RAY_SHIFT 26U +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_RAY_CLRMSK 0xFFFFFFFFFBFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_RAY_EN 0x0000000004000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_CPF_SHIFT 25U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_CPF_CLRMSK 0xFFFFFFFFFDFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_CPF_EN 0x0000000002000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TPU_SHIFT 24U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TPU_CLRMSK 0xFFFFFFFFFEFFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TPU_EN 0x0000000001000000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_FBDC_SHIFT 23U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_FBDC_CLRMSK 0xFFFFFFFFFF7FFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_FBDC_EN 0x0000000000800000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TLA_SHIFT 22U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TLA_CLRMSK 0xFFFFFFFFFFBFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TLA_EN 0x0000000000400000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_BYP_CC_N_SHIFT 21U +#define ROGUE_CR_SLC_CTRL_BYPASS_BYP_CC_N_CLRMSK 0xFFFFFFFFFFDFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_BYP_CC_N_EN 0x0000000000200000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_BYP_CC_SHIFT 20U +#define ROGUE_CR_SLC_CTRL_BYPASS_BYP_CC_CLRMSK 0xFFFFFFFFFFEFFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_BYP_CC_EN 0x0000000000100000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MCU_SHIFT 19U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MCU_CLRMSK 0xFFFFFFFFFFF7FFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MCU_EN 0x0000000000080000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_PDS_SHIFT 18U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_PDS_CLRMSK 0xFFFFFFFFFFFBFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_PDS_EN 0x0000000000040000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TPF_SHIFT 17U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TPF_CLRMSK 0xFFFFFFFFFFFDFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TPF_EN 0x0000000000020000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_TPC_SHIFT 16U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_TPC_CLRMSK 0xFFFFFFFFFFFEFFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_TPC_EN 0x0000000000010000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_OBJ_SHIFT 15U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_OBJ_CLRMSK 0xFFFFFFFFFFFF7FFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_OBJ_EN 0x0000000000008000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_USC_SHIFT 14U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_USC_CLRMSK 0xFFFFFFFFFFFFBFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_USC_EN 0x0000000000004000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_META_SHIFT 13U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_META_CLRMSK 0xFFFFFFFFFFFFDFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_META_EN 0x0000000000002000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_HOST_SHIFT 12U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_HOST_CLRMSK 0xFFFFFFFFFFFFEFFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_HOST_EN 0x0000000000001000ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MMU_PT_SHIFT 11U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MMU_PT_CLRMSK 0xFFFFFFFFFFFFF7FFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MMU_PT_EN 0x0000000000000800ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MMU_PD_SHIFT 10U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MMU_PD_CLRMSK 0xFFFFFFFFFFFFFBFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MMU_PD_EN 0x0000000000000400ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MMU_PC_SHIFT 9U +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MMU_PC_CLRMSK 0xFFFFFFFFFFFFFDFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MMU_PC_EN 0x0000000000000200ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_FRC_SHIFT 8U +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_FRC_CLRMSK 0xFFFFFFFFFFFFFEFFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_FRC_EN 0x0000000000000100ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_VXE_SHIFT 7U +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_VXE_CLRMSK 0xFFFFFFFFFFFFFF7FULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_VXE_EN 0x0000000000000080ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_VXD_SHIFT 6U +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_VXD_CLRMSK 0xFFFFFFFFFFFFFFBFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_VXD_EN 0x0000000000000040ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_HOST_META_SHIFT 5U +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_HOST_META_CLRMSK 0xFFFFFFFFFFFFFFDFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_HOST_META_EN 0x0000000000000020ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_MMU_SHIFT 4U +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_MMU_CLRMSK 0xFFFFFFFFFFFFFFEFULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_MMU_EN 0x0000000000000010ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_COMPUTE_SHIFT 3U +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_COMPUTE_CLRMSK 0xFFFFFFFFFFFFFFF7ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_COMPUTE_EN 0x0000000000000008ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PIXEL_SHIFT 2U +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PIXEL_CLRMSK 0xFFFFFFFFFFFFFFFBULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PIXEL_EN 0x0000000000000004ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_TA_SHIFT 1U +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_TA_CLRMSK 0xFFFFFFFFFFFFFFFDULL +#define ROGUE_CR_SLC_CTRL_BYPASS_DM_TA_EN 0x0000000000000002ULL +#define ROGUE_CR_SLC_CTRL_BYPASS_ALL_SHIFT 0U +#define ROGUE_CR_SLC_CTRL_BYPASS_ALL_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_SLC_CTRL_BYPASS_ALL_EN 0x0000000000000001ULL + +/* Register ROGUE_CR_SLC_STATUS1 */ +#define ROGUE_CR_SLC_STATUS1 0x3870U +#define ROGUE_CR_SLC_STATUS1_MASKFULL 0x800003FF03FFFFFFULL +#define ROGUE_CR_SLC_STATUS1_PAUSED_SHIFT 63U +#define ROGUE_CR_SLC_STATUS1_PAUSED_CLRMSK 0x7FFFFFFFFFFFFFFFULL +#define ROGUE_CR_SLC_STATUS1_PAUSED_EN 0x8000000000000000ULL +#define ROGUE_CR_SLC_STATUS1_READS1_SHIFT 32U +#define ROGUE_CR_SLC_STATUS1_READS1_CLRMSK 0xFFFFFC00FFFFFFFFULL +#define ROGUE_CR_SLC_STATUS1_READS0_SHIFT 16U +#define ROGUE_CR_SLC_STATUS1_READS0_CLRMSK 0xFFFFFFFFFC00FFFFULL +#define ROGUE_CR_SLC_STATUS1_READS1_EXT_SHIFT 8U +#define ROGUE_CR_SLC_STATUS1_READS1_EXT_CLRMSK 0xFFFFFFFFFFFF00FFULL +#define ROGUE_CR_SLC_STATUS1_READS0_EXT_SHIFT 0U +#define ROGUE_CR_SLC_STATUS1_READS0_EXT_CLRMSK 0xFFFFFFFFFFFFFF00ULL + +/* Register ROGUE_CR_SLC_IDLE */ +#define ROGUE_CR_SLC_IDLE 0x3898U +#define ROGUE_CR_SLC_IDLE__XE_MEM__MASKFULL 0x00000000000003FFULL +#define ROGUE_CR_SLC_IDLE_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_SLC_IDLE_MH_SYSARB1_SHIFT 9U +#define ROGUE_CR_SLC_IDLE_MH_SYSARB1_CLRMSK 0xFFFFFDFFU +#define ROGUE_CR_SLC_IDLE_MH_SYSARB1_EN 0x00000200U +#define ROGUE_CR_SLC_IDLE_MH_SYSARB0_SHIFT 8U +#define ROGUE_CR_SLC_IDLE_MH_SYSARB0_CLRMSK 0xFFFFFEFFU +#define ROGUE_CR_SLC_IDLE_MH_SYSARB0_EN 0x00000100U +#define ROGUE_CR_SLC_IDLE_IMGBV4_SHIFT 7U +#define ROGUE_CR_SLC_IDLE_IMGBV4_CLRMSK 0xFFFFFF7FU +#define ROGUE_CR_SLC_IDLE_IMGBV4_EN 0x00000080U +#define ROGUE_CR_SLC_IDLE_CACHE_BANKS_SHIFT 6U +#define ROGUE_CR_SLC_IDLE_CACHE_BANKS_CLRMSK 0xFFFFFFBFU +#define ROGUE_CR_SLC_IDLE_CACHE_BANKS_EN 0x00000040U +#define ROGUE_CR_SLC_IDLE_RBOFIFO_SHIFT 5U +#define ROGUE_CR_SLC_IDLE_RBOFIFO_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_SLC_IDLE_RBOFIFO_EN 0x00000020U +#define ROGUE_CR_SLC_IDLE_FRC_CONV_SHIFT 4U +#define ROGUE_CR_SLC_IDLE_FRC_CONV_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_SLC_IDLE_FRC_CONV_EN 0x00000010U +#define ROGUE_CR_SLC_IDLE_VXE_CONV_SHIFT 3U +#define ROGUE_CR_SLC_IDLE_VXE_CONV_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_SLC_IDLE_VXE_CONV_EN 0x00000008U +#define ROGUE_CR_SLC_IDLE_VXD_CONV_SHIFT 2U +#define ROGUE_CR_SLC_IDLE_VXD_CONV_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_SLC_IDLE_VXD_CONV_EN 0x00000004U +#define ROGUE_CR_SLC_IDLE_BIF1_CONV_SHIFT 1U +#define ROGUE_CR_SLC_IDLE_BIF1_CONV_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_SLC_IDLE_BIF1_CONV_EN 0x00000002U +#define ROGUE_CR_SLC_IDLE_CBAR_SHIFT 0U +#define ROGUE_CR_SLC_IDLE_CBAR_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_SLC_IDLE_CBAR_EN 0x00000001U + +/* Register ROGUE_CR_SLC_STATUS2 */ +#define ROGUE_CR_SLC_STATUS2 0x3908U +#define ROGUE_CR_SLC_STATUS2_MASKFULL 0x000003FF03FFFFFFULL +#define ROGUE_CR_SLC_STATUS2_READS3_SHIFT 32U +#define ROGUE_CR_SLC_STATUS2_READS3_CLRMSK 0xFFFFFC00FFFFFFFFULL +#define ROGUE_CR_SLC_STATUS2_READS2_SHIFT 16U +#define ROGUE_CR_SLC_STATUS2_READS2_CLRMSK 0xFFFFFFFFFC00FFFFULL +#define ROGUE_CR_SLC_STATUS2_READS3_EXT_SHIFT 8U +#define ROGUE_CR_SLC_STATUS2_READS3_EXT_CLRMSK 0xFFFFFFFFFFFF00FFULL +#define ROGUE_CR_SLC_STATUS2_READS2_EXT_SHIFT 0U +#define ROGUE_CR_SLC_STATUS2_READS2_EXT_CLRMSK 0xFFFFFFFFFFFFFF00ULL + +/* Register ROGUE_CR_SLC_CTRL_MISC2 */ +#define ROGUE_CR_SLC_CTRL_MISC2 0x3930U +#define ROGUE_CR_SLC_CTRL_MISC2_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SLC_CTRL_MISC2_SCRAMBLE_BITS_SHIFT 0U +#define ROGUE_CR_SLC_CTRL_MISC2_SCRAMBLE_BITS_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SLC_CROSSBAR_LOAD_BALANCE */ +#define ROGUE_CR_SLC_CROSSBAR_LOAD_BALANCE 0x3938U +#define ROGUE_CR_SLC_CROSSBAR_LOAD_BALANCE_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_SLC_CROSSBAR_LOAD_BALANCE_BYPASS_SHIFT 0U +#define ROGUE_CR_SLC_CROSSBAR_LOAD_BALANCE_BYPASS_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_SLC_CROSSBAR_LOAD_BALANCE_BYPASS_EN 0x00000001U + +/* Register ROGUE_CR_USC_UVS0_CHECKSUM */ +#define ROGUE_CR_USC_UVS0_CHECKSUM 0x5000U +#define ROGUE_CR_USC_UVS0_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_USC_UVS0_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_USC_UVS0_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_USC_UVS1_CHECKSUM */ +#define ROGUE_CR_USC_UVS1_CHECKSUM 0x5008U +#define ROGUE_CR_USC_UVS1_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_USC_UVS1_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_USC_UVS1_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_USC_UVS2_CHECKSUM */ +#define ROGUE_CR_USC_UVS2_CHECKSUM 0x5010U +#define ROGUE_CR_USC_UVS2_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_USC_UVS2_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_USC_UVS2_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_USC_UVS3_CHECKSUM */ +#define ROGUE_CR_USC_UVS3_CHECKSUM 0x5018U +#define ROGUE_CR_USC_UVS3_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_USC_UVS3_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_USC_UVS3_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PPP_SIGNATURE */ +#define ROGUE_CR_PPP_SIGNATURE 0x5020U +#define ROGUE_CR_PPP_SIGNATURE_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PPP_SIGNATURE_VALUE_SHIFT 0U +#define ROGUE_CR_PPP_SIGNATURE_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_TE_SIGNATURE */ +#define ROGUE_CR_TE_SIGNATURE 0x5028U +#define ROGUE_CR_TE_SIGNATURE_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_TE_SIGNATURE_VALUE_SHIFT 0U +#define ROGUE_CR_TE_SIGNATURE_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_TE_CHECKSUM */ +#define ROGUE_CR_TE_CHECKSUM 0x5110U +#define ROGUE_CR_TE_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_TE_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_TE_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_USC_UVB_CHECKSUM */ +#define ROGUE_CR_USC_UVB_CHECKSUM 0x5118U +#define ROGUE_CR_USC_UVB_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_USC_UVB_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_USC_UVB_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_VCE_CHECKSUM */ +#define ROGUE_CR_VCE_CHECKSUM 0x5030U +#define ROGUE_CR_VCE_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_VCE_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_VCE_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_ISP_PDS_CHECKSUM */ +#define ROGUE_CR_ISP_PDS_CHECKSUM 0x5038U +#define ROGUE_CR_ISP_PDS_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_ISP_PDS_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_ISP_PDS_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_ISP_TPF_CHECKSUM */ +#define ROGUE_CR_ISP_TPF_CHECKSUM 0x5040U +#define ROGUE_CR_ISP_TPF_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_ISP_TPF_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_ISP_TPF_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_TFPU_PLANE0_CHECKSUM */ +#define ROGUE_CR_TFPU_PLANE0_CHECKSUM 0x5048U +#define ROGUE_CR_TFPU_PLANE0_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_TFPU_PLANE0_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_TFPU_PLANE0_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_TFPU_PLANE1_CHECKSUM */ +#define ROGUE_CR_TFPU_PLANE1_CHECKSUM 0x5050U +#define ROGUE_CR_TFPU_PLANE1_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_TFPU_PLANE1_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_TFPU_PLANE1_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PBE_CHECKSUM */ +#define ROGUE_CR_PBE_CHECKSUM 0x5058U +#define ROGUE_CR_PBE_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PBE_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_PBE_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PDS_DOUTM_STM_SIGNATURE */ +#define ROGUE_CR_PDS_DOUTM_STM_SIGNATURE 0x5060U +#define ROGUE_CR_PDS_DOUTM_STM_SIGNATURE_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PDS_DOUTM_STM_SIGNATURE_VALUE_SHIFT 0U +#define ROGUE_CR_PDS_DOUTM_STM_SIGNATURE_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_IFPU_ISP_CHECKSUM */ +#define ROGUE_CR_IFPU_ISP_CHECKSUM 0x5068U +#define ROGUE_CR_IFPU_ISP_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_IFPU_ISP_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_IFPU_ISP_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_USC_UVS4_CHECKSUM */ +#define ROGUE_CR_USC_UVS4_CHECKSUM 0x5100U +#define ROGUE_CR_USC_UVS4_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_USC_UVS4_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_USC_UVS4_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_USC_UVS5_CHECKSUM */ +#define ROGUE_CR_USC_UVS5_CHECKSUM 0x5108U +#define ROGUE_CR_USC_UVS5_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_USC_UVS5_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_USC_UVS5_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PPP_CLIP_CHECKSUM */ +#define ROGUE_CR_PPP_CLIP_CHECKSUM 0x5120U +#define ROGUE_CR_PPP_CLIP_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PPP_CLIP_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_PPP_CLIP_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PERF_TA_PHASE */ +#define ROGUE_CR_PERF_TA_PHASE 0x6008U +#define ROGUE_CR_PERF_TA_PHASE_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PERF_TA_PHASE_COUNT_SHIFT 0U +#define ROGUE_CR_PERF_TA_PHASE_COUNT_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PERF_3D_PHASE */ +#define ROGUE_CR_PERF_3D_PHASE 0x6010U +#define ROGUE_CR_PERF_3D_PHASE_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PERF_3D_PHASE_COUNT_SHIFT 0U +#define ROGUE_CR_PERF_3D_PHASE_COUNT_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PERF_COMPUTE_PHASE */ +#define ROGUE_CR_PERF_COMPUTE_PHASE 0x6018U +#define ROGUE_CR_PERF_COMPUTE_PHASE_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PERF_COMPUTE_PHASE_COUNT_SHIFT 0U +#define ROGUE_CR_PERF_COMPUTE_PHASE_COUNT_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PERF_TA_CYCLE */ +#define ROGUE_CR_PERF_TA_CYCLE 0x6020U +#define ROGUE_CR_PERF_TA_CYCLE_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PERF_TA_CYCLE_COUNT_SHIFT 0U +#define ROGUE_CR_PERF_TA_CYCLE_COUNT_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PERF_3D_CYCLE */ +#define ROGUE_CR_PERF_3D_CYCLE 0x6028U +#define ROGUE_CR_PERF_3D_CYCLE_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PERF_3D_CYCLE_COUNT_SHIFT 0U +#define ROGUE_CR_PERF_3D_CYCLE_COUNT_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PERF_COMPUTE_CYCLE */ +#define ROGUE_CR_PERF_COMPUTE_CYCLE 0x6030U +#define ROGUE_CR_PERF_COMPUTE_CYCLE_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PERF_COMPUTE_CYCLE_COUNT_SHIFT 0U +#define ROGUE_CR_PERF_COMPUTE_CYCLE_COUNT_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PERF_TA_OR_3D_CYCLE */ +#define ROGUE_CR_PERF_TA_OR_3D_CYCLE 0x6038U +#define ROGUE_CR_PERF_TA_OR_3D_CYCLE_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PERF_TA_OR_3D_CYCLE_COUNT_SHIFT 0U +#define ROGUE_CR_PERF_TA_OR_3D_CYCLE_COUNT_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PERF_INITIAL_TA_CYCLE */ +#define ROGUE_CR_PERF_INITIAL_TA_CYCLE 0x6040U +#define ROGUE_CR_PERF_INITIAL_TA_CYCLE_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PERF_INITIAL_TA_CYCLE_COUNT_SHIFT 0U +#define ROGUE_CR_PERF_INITIAL_TA_CYCLE_COUNT_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PERF_SLC0_READ_STALL */ +#define ROGUE_CR_PERF_SLC0_READ_STALL 0x60B8U +#define ROGUE_CR_PERF_SLC0_READ_STALL_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PERF_SLC0_READ_STALL_COUNT_SHIFT 0U +#define ROGUE_CR_PERF_SLC0_READ_STALL_COUNT_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PERF_SLC0_WRITE_STALL */ +#define ROGUE_CR_PERF_SLC0_WRITE_STALL 0x60C0U +#define ROGUE_CR_PERF_SLC0_WRITE_STALL_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PERF_SLC0_WRITE_STALL_COUNT_SHIFT 0U +#define ROGUE_CR_PERF_SLC0_WRITE_STALL_COUNT_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PERF_SLC1_READ_STALL */ +#define ROGUE_CR_PERF_SLC1_READ_STALL 0x60E0U +#define ROGUE_CR_PERF_SLC1_READ_STALL_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PERF_SLC1_READ_STALL_COUNT_SHIFT 0U +#define ROGUE_CR_PERF_SLC1_READ_STALL_COUNT_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PERF_SLC1_WRITE_STALL */ +#define ROGUE_CR_PERF_SLC1_WRITE_STALL 0x60E8U +#define ROGUE_CR_PERF_SLC1_WRITE_STALL_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PERF_SLC1_WRITE_STALL_COUNT_SHIFT 0U +#define ROGUE_CR_PERF_SLC1_WRITE_STALL_COUNT_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PERF_SLC2_READ_STALL */ +#define ROGUE_CR_PERF_SLC2_READ_STALL 0x6158U +#define ROGUE_CR_PERF_SLC2_READ_STALL_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PERF_SLC2_READ_STALL_COUNT_SHIFT 0U +#define ROGUE_CR_PERF_SLC2_READ_STALL_COUNT_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PERF_SLC2_WRITE_STALL */ +#define ROGUE_CR_PERF_SLC2_WRITE_STALL 0x6160U +#define ROGUE_CR_PERF_SLC2_WRITE_STALL_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PERF_SLC2_WRITE_STALL_COUNT_SHIFT 0U +#define ROGUE_CR_PERF_SLC2_WRITE_STALL_COUNT_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PERF_SLC3_READ_STALL */ +#define ROGUE_CR_PERF_SLC3_READ_STALL 0x6180U +#define ROGUE_CR_PERF_SLC3_READ_STALL_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PERF_SLC3_READ_STALL_COUNT_SHIFT 0U +#define ROGUE_CR_PERF_SLC3_READ_STALL_COUNT_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PERF_SLC3_WRITE_STALL */ +#define ROGUE_CR_PERF_SLC3_WRITE_STALL 0x6188U +#define ROGUE_CR_PERF_SLC3_WRITE_STALL_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PERF_SLC3_WRITE_STALL_COUNT_SHIFT 0U +#define ROGUE_CR_PERF_SLC3_WRITE_STALL_COUNT_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PERF_3D_SPINUP */ +#define ROGUE_CR_PERF_3D_SPINUP 0x6220U +#define ROGUE_CR_PERF_3D_SPINUP_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PERF_3D_SPINUP_CYCLES_SHIFT 0U +#define ROGUE_CR_PERF_3D_SPINUP_CYCLES_CLRMSK 0x00000000U + +/* Register ROGUE_CR_AXI_ACE_LITE_CONFIGURATION */ +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION 0x38C0U +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_MASKFULL 0x00003FFFFFFFFFFFULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ENABLE_FENCE_OUT_SHIFT 45U +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ENABLE_FENCE_OUT_CLRMSK 0xFFFFDFFFFFFFFFFFULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ENABLE_FENCE_OUT_EN 0x0000200000000000ULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_OSID_SECURITY_SHIFT 37U +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_OSID_SECURITY_CLRMSK 0xFFFFE01FFFFFFFFFULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_DISABLE_COHERENT_WRITELINEUNIQUE_SHIFT 36U +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_DISABLE_COHERENT_WRITELINEUNIQUE_CLRMSK \ + 0xFFFFFFEFFFFFFFFFULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_DISABLE_COHERENT_WRITELINEUNIQUE_EN \ + 0x0000001000000000ULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_DISABLE_COHERENT_WRITE_SHIFT 35U +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_DISABLE_COHERENT_WRITE_CLRMSK 0xFFFFFFF7FFFFFFFFULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_DISABLE_COHERENT_WRITE_EN 0x0000000800000000ULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_DISABLE_COHERENT_READ_SHIFT 34U +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_DISABLE_COHERENT_READ_CLRMSK 0xFFFFFFFBFFFFFFFFULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_DISABLE_COHERENT_READ_EN 0x0000000400000000ULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARCACHE_CACHE_MAINTENANCE_SHIFT 30U +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARCACHE_CACHE_MAINTENANCE_CLRMSK 0xFFFFFFFC3FFFFFFFULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARCACHE_COHERENT_SHIFT 26U +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARCACHE_COHERENT_CLRMSK 0xFFFFFFFFC3FFFFFFULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWCACHE_COHERENT_SHIFT 22U +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWCACHE_COHERENT_CLRMSK 0xFFFFFFFFFC3FFFFFULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_BARRIER_SHIFT 20U +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_BARRIER_CLRMSK 0xFFFFFFFFFFCFFFFFULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWDOMAIN_BARRIER_SHIFT 18U +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWDOMAIN_BARRIER_CLRMSK 0xFFFFFFFFFFF3FFFFULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_CACHE_MAINTENANCE_SHIFT 16U +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_CACHE_MAINTENANCE_CLRMSK 0xFFFFFFFFFFFCFFFFULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWDOMAIN_COHERENT_SHIFT 14U +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWDOMAIN_COHERENT_CLRMSK 0xFFFFFFFFFFFF3FFFULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_COHERENT_SHIFT 12U +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_COHERENT_CLRMSK 0xFFFFFFFFFFFFCFFFULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_NON_SNOOPING_SHIFT 10U +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_NON_SNOOPING_CLRMSK 0xFFFFFFFFFFFFF3FFULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWDOMAIN_NON_SNOOPING_SHIFT 8U +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWDOMAIN_NON_SNOOPING_CLRMSK 0xFFFFFFFFFFFFFCFFULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARCACHE_NON_SNOOPING_SHIFT 4U +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARCACHE_NON_SNOOPING_CLRMSK 0xFFFFFFFFFFFFFF0FULL +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWCACHE_NON_SNOOPING_SHIFT 0U +#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWCACHE_NON_SNOOPING_CLRMSK 0xFFFFFFFFFFFFFFF0ULL + +/* Register ROGUE_CR_POWER_ESTIMATE_RESULT */ +#define ROGUE_CR_POWER_ESTIMATE_RESULT 0x6328U +#define ROGUE_CR_POWER_ESTIMATE_RESULT_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_POWER_ESTIMATE_RESULT_VALUE_SHIFT 0U +#define ROGUE_CR_POWER_ESTIMATE_RESULT_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_TA_PERF */ +#define ROGUE_CR_TA_PERF 0x7600U +#define ROGUE_CR_TA_PERF_MASKFULL 0x000000000000001FULL +#define ROGUE_CR_TA_PERF_CLR_3_SHIFT 4U +#define ROGUE_CR_TA_PERF_CLR_3_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_TA_PERF_CLR_3_EN 0x00000010U +#define ROGUE_CR_TA_PERF_CLR_2_SHIFT 3U +#define ROGUE_CR_TA_PERF_CLR_2_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_TA_PERF_CLR_2_EN 0x00000008U +#define ROGUE_CR_TA_PERF_CLR_1_SHIFT 2U +#define ROGUE_CR_TA_PERF_CLR_1_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_TA_PERF_CLR_1_EN 0x00000004U +#define ROGUE_CR_TA_PERF_CLR_0_SHIFT 1U +#define ROGUE_CR_TA_PERF_CLR_0_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_TA_PERF_CLR_0_EN 0x00000002U +#define ROGUE_CR_TA_PERF_CTRL_ENABLE_SHIFT 0U +#define ROGUE_CR_TA_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_TA_PERF_CTRL_ENABLE_EN 0x00000001U + +/* Register ROGUE_CR_TA_PERF_SELECT0 */ +#define ROGUE_CR_TA_PERF_SELECT0 0x7608U +#define ROGUE_CR_TA_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL +#define ROGUE_CR_TA_PERF_SELECT0_BATCH_MAX_SHIFT 48U +#define ROGUE_CR_TA_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL +#define ROGUE_CR_TA_PERF_SELECT0_BATCH_MIN_SHIFT 32U +#define ROGUE_CR_TA_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL +#define ROGUE_CR_TA_PERF_SELECT0_MODE_SHIFT 21U +#define ROGUE_CR_TA_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL +#define ROGUE_CR_TA_PERF_SELECT0_MODE_EN 0x0000000000200000ULL +#define ROGUE_CR_TA_PERF_SELECT0_GROUP_SELECT_SHIFT 16U +#define ROGUE_CR_TA_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL +#define ROGUE_CR_TA_PERF_SELECT0_BIT_SELECT_SHIFT 0U +#define ROGUE_CR_TA_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register ROGUE_CR_TA_PERF_SELECT1 */ +#define ROGUE_CR_TA_PERF_SELECT1 0x7610U +#define ROGUE_CR_TA_PERF_SELECT1_MASKFULL 0x3FFF3FFF003FFFFFULL +#define ROGUE_CR_TA_PERF_SELECT1_BATCH_MAX_SHIFT 48U +#define ROGUE_CR_TA_PERF_SELECT1_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL +#define ROGUE_CR_TA_PERF_SELECT1_BATCH_MIN_SHIFT 32U +#define ROGUE_CR_TA_PERF_SELECT1_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL +#define ROGUE_CR_TA_PERF_SELECT1_MODE_SHIFT 21U +#define ROGUE_CR_TA_PERF_SELECT1_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL +#define ROGUE_CR_TA_PERF_SELECT1_MODE_EN 0x0000000000200000ULL +#define ROGUE_CR_TA_PERF_SELECT1_GROUP_SELECT_SHIFT 16U +#define ROGUE_CR_TA_PERF_SELECT1_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL +#define ROGUE_CR_TA_PERF_SELECT1_BIT_SELECT_SHIFT 0U +#define ROGUE_CR_TA_PERF_SELECT1_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register ROGUE_CR_TA_PERF_SELECT2 */ +#define ROGUE_CR_TA_PERF_SELECT2 0x7618U +#define ROGUE_CR_TA_PERF_SELECT2_MASKFULL 0x3FFF3FFF003FFFFFULL +#define ROGUE_CR_TA_PERF_SELECT2_BATCH_MAX_SHIFT 48U +#define ROGUE_CR_TA_PERF_SELECT2_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL +#define ROGUE_CR_TA_PERF_SELECT2_BATCH_MIN_SHIFT 32U +#define ROGUE_CR_TA_PERF_SELECT2_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL +#define ROGUE_CR_TA_PERF_SELECT2_MODE_SHIFT 21U +#define ROGUE_CR_TA_PERF_SELECT2_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL +#define ROGUE_CR_TA_PERF_SELECT2_MODE_EN 0x0000000000200000ULL +#define ROGUE_CR_TA_PERF_SELECT2_GROUP_SELECT_SHIFT 16U +#define ROGUE_CR_TA_PERF_SELECT2_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL +#define ROGUE_CR_TA_PERF_SELECT2_BIT_SELECT_SHIFT 0U +#define ROGUE_CR_TA_PERF_SELECT2_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register ROGUE_CR_TA_PERF_SELECT3 */ +#define ROGUE_CR_TA_PERF_SELECT3 0x7620U +#define ROGUE_CR_TA_PERF_SELECT3_MASKFULL 0x3FFF3FFF003FFFFFULL +#define ROGUE_CR_TA_PERF_SELECT3_BATCH_MAX_SHIFT 48U +#define ROGUE_CR_TA_PERF_SELECT3_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL +#define ROGUE_CR_TA_PERF_SELECT3_BATCH_MIN_SHIFT 32U +#define ROGUE_CR_TA_PERF_SELECT3_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL +#define ROGUE_CR_TA_PERF_SELECT3_MODE_SHIFT 21U +#define ROGUE_CR_TA_PERF_SELECT3_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL +#define ROGUE_CR_TA_PERF_SELECT3_MODE_EN 0x0000000000200000ULL +#define ROGUE_CR_TA_PERF_SELECT3_GROUP_SELECT_SHIFT 16U +#define ROGUE_CR_TA_PERF_SELECT3_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL +#define ROGUE_CR_TA_PERF_SELECT3_BIT_SELECT_SHIFT 0U +#define ROGUE_CR_TA_PERF_SELECT3_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register ROGUE_CR_TA_PERF_SELECTED_BITS */ +#define ROGUE_CR_TA_PERF_SELECTED_BITS 0x7648U +#define ROGUE_CR_TA_PERF_SELECTED_BITS_MASKFULL 0xFFFFFFFFFFFFFFFFULL +#define ROGUE_CR_TA_PERF_SELECTED_BITS_REG3_SHIFT 48U +#define ROGUE_CR_TA_PERF_SELECTED_BITS_REG3_CLRMSK 0x0000FFFFFFFFFFFFULL +#define ROGUE_CR_TA_PERF_SELECTED_BITS_REG2_SHIFT 32U +#define ROGUE_CR_TA_PERF_SELECTED_BITS_REG2_CLRMSK 0xFFFF0000FFFFFFFFULL +#define ROGUE_CR_TA_PERF_SELECTED_BITS_REG1_SHIFT 16U +#define ROGUE_CR_TA_PERF_SELECTED_BITS_REG1_CLRMSK 0xFFFFFFFF0000FFFFULL +#define ROGUE_CR_TA_PERF_SELECTED_BITS_REG0_SHIFT 0U +#define ROGUE_CR_TA_PERF_SELECTED_BITS_REG0_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register ROGUE_CR_TA_PERF_COUNTER_0 */ +#define ROGUE_CR_TA_PERF_COUNTER_0 0x7650U +#define ROGUE_CR_TA_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_TA_PERF_COUNTER_0_REG_SHIFT 0U +#define ROGUE_CR_TA_PERF_COUNTER_0_REG_CLRMSK 0x00000000U + +/* Register ROGUE_CR_TA_PERF_COUNTER_1 */ +#define ROGUE_CR_TA_PERF_COUNTER_1 0x7658U +#define ROGUE_CR_TA_PERF_COUNTER_1_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_TA_PERF_COUNTER_1_REG_SHIFT 0U +#define ROGUE_CR_TA_PERF_COUNTER_1_REG_CLRMSK 0x00000000U + +/* Register ROGUE_CR_TA_PERF_COUNTER_2 */ +#define ROGUE_CR_TA_PERF_COUNTER_2 0x7660U +#define ROGUE_CR_TA_PERF_COUNTER_2_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_TA_PERF_COUNTER_2_REG_SHIFT 0U +#define ROGUE_CR_TA_PERF_COUNTER_2_REG_CLRMSK 0x00000000U + +/* Register ROGUE_CR_TA_PERF_COUNTER_3 */ +#define ROGUE_CR_TA_PERF_COUNTER_3 0x7668U +#define ROGUE_CR_TA_PERF_COUNTER_3_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_TA_PERF_COUNTER_3_REG_SHIFT 0U +#define ROGUE_CR_TA_PERF_COUNTER_3_REG_CLRMSK 0x00000000U + +/* Register ROGUE_CR_RASTERISATION_PERF */ +#define ROGUE_CR_RASTERISATION_PERF 0x7700U +#define ROGUE_CR_RASTERISATION_PERF_MASKFULL 0x000000000000001FULL +#define ROGUE_CR_RASTERISATION_PERF_CLR_3_SHIFT 4U +#define ROGUE_CR_RASTERISATION_PERF_CLR_3_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_RASTERISATION_PERF_CLR_3_EN 0x00000010U +#define ROGUE_CR_RASTERISATION_PERF_CLR_2_SHIFT 3U +#define ROGUE_CR_RASTERISATION_PERF_CLR_2_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_RASTERISATION_PERF_CLR_2_EN 0x00000008U +#define ROGUE_CR_RASTERISATION_PERF_CLR_1_SHIFT 2U +#define ROGUE_CR_RASTERISATION_PERF_CLR_1_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_RASTERISATION_PERF_CLR_1_EN 0x00000004U +#define ROGUE_CR_RASTERISATION_PERF_CLR_0_SHIFT 1U +#define ROGUE_CR_RASTERISATION_PERF_CLR_0_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_RASTERISATION_PERF_CLR_0_EN 0x00000002U +#define ROGUE_CR_RASTERISATION_PERF_CTRL_ENABLE_SHIFT 0U +#define ROGUE_CR_RASTERISATION_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_RASTERISATION_PERF_CTRL_ENABLE_EN 0x00000001U + +/* Register ROGUE_CR_RASTERISATION_PERF_SELECT0 */ +#define ROGUE_CR_RASTERISATION_PERF_SELECT0 0x7708U +#define ROGUE_CR_RASTERISATION_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL +#define ROGUE_CR_RASTERISATION_PERF_SELECT0_BATCH_MAX_SHIFT 48U +#define ROGUE_CR_RASTERISATION_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL +#define ROGUE_CR_RASTERISATION_PERF_SELECT0_BATCH_MIN_SHIFT 32U +#define ROGUE_CR_RASTERISATION_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL +#define ROGUE_CR_RASTERISATION_PERF_SELECT0_MODE_SHIFT 21U +#define ROGUE_CR_RASTERISATION_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL +#define ROGUE_CR_RASTERISATION_PERF_SELECT0_MODE_EN 0x0000000000200000ULL +#define ROGUE_CR_RASTERISATION_PERF_SELECT0_GROUP_SELECT_SHIFT 16U +#define ROGUE_CR_RASTERISATION_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL +#define ROGUE_CR_RASTERISATION_PERF_SELECT0_BIT_SELECT_SHIFT 0U +#define ROGUE_CR_RASTERISATION_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register ROGUE_CR_RASTERISATION_PERF_COUNTER_0 */ +#define ROGUE_CR_RASTERISATION_PERF_COUNTER_0 0x7750U +#define ROGUE_CR_RASTERISATION_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_RASTERISATION_PERF_COUNTER_0_REG_SHIFT 0U +#define ROGUE_CR_RASTERISATION_PERF_COUNTER_0_REG_CLRMSK 0x00000000U + +/* Register ROGUE_CR_HUB_BIFPMCACHE_PERF */ +#define ROGUE_CR_HUB_BIFPMCACHE_PERF 0x7800U +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_MASKFULL 0x000000000000001FULL +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_3_SHIFT 4U +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_3_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_3_EN 0x00000010U +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_2_SHIFT 3U +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_2_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_2_EN 0x00000008U +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_1_SHIFT 2U +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_1_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_1_EN 0x00000004U +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_0_SHIFT 1U +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_0_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_0_EN 0x00000002U +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CTRL_ENABLE_SHIFT 0U +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CTRL_ENABLE_EN 0x00000001U + +/* Register ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0 */ +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0 0x7808U +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_BATCH_MAX_SHIFT 48U +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_BATCH_MIN_SHIFT 32U +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_MODE_SHIFT 21U +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_MODE_EN 0x0000000000200000ULL +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_GROUP_SELECT_SHIFT 16U +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_BIT_SELECT_SHIFT 0U +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register ROGUE_CR_HUB_BIFPMCACHE_PERF_COUNTER_0 */ +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_COUNTER_0 0x7850U +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_COUNTER_0_REG_SHIFT 0U +#define ROGUE_CR_HUB_BIFPMCACHE_PERF_COUNTER_0_REG_CLRMSK 0x00000000U + +/* Register ROGUE_CR_TPU_MCU_L0_PERF */ +#define ROGUE_CR_TPU_MCU_L0_PERF 0x7900U +#define ROGUE_CR_TPU_MCU_L0_PERF_MASKFULL 0x000000000000001FULL +#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_3_SHIFT 4U +#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_3_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_3_EN 0x00000010U +#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_2_SHIFT 3U +#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_2_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_2_EN 0x00000008U +#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_1_SHIFT 2U +#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_1_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_1_EN 0x00000004U +#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_0_SHIFT 1U +#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_0_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_0_EN 0x00000002U +#define ROGUE_CR_TPU_MCU_L0_PERF_CTRL_ENABLE_SHIFT 0U +#define ROGUE_CR_TPU_MCU_L0_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_TPU_MCU_L0_PERF_CTRL_ENABLE_EN 0x00000001U + +/* Register ROGUE_CR_TPU_MCU_L0_PERF_SELECT0 */ +#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0 0x7908U +#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL +#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_BATCH_MAX_SHIFT 48U +#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL +#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_BATCH_MIN_SHIFT 32U +#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL +#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_MODE_SHIFT 21U +#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL +#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_MODE_EN 0x0000000000200000ULL +#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_GROUP_SELECT_SHIFT 16U +#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL +#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_BIT_SELECT_SHIFT 0U +#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register ROGUE_CR_TPU_MCU_L0_PERF_COUNTER_0 */ +#define ROGUE_CR_TPU_MCU_L0_PERF_COUNTER_0 0x7950U +#define ROGUE_CR_TPU_MCU_L0_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_TPU_MCU_L0_PERF_COUNTER_0_REG_SHIFT 0U +#define ROGUE_CR_TPU_MCU_L0_PERF_COUNTER_0_REG_CLRMSK 0x00000000U + +/* Register ROGUE_CR_USC_PERF */ +#define ROGUE_CR_USC_PERF 0x8100U +#define ROGUE_CR_USC_PERF_MASKFULL 0x000000000000001FULL +#define ROGUE_CR_USC_PERF_CLR_3_SHIFT 4U +#define ROGUE_CR_USC_PERF_CLR_3_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_USC_PERF_CLR_3_EN 0x00000010U +#define ROGUE_CR_USC_PERF_CLR_2_SHIFT 3U +#define ROGUE_CR_USC_PERF_CLR_2_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_USC_PERF_CLR_2_EN 0x00000008U +#define ROGUE_CR_USC_PERF_CLR_1_SHIFT 2U +#define ROGUE_CR_USC_PERF_CLR_1_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_USC_PERF_CLR_1_EN 0x00000004U +#define ROGUE_CR_USC_PERF_CLR_0_SHIFT 1U +#define ROGUE_CR_USC_PERF_CLR_0_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_USC_PERF_CLR_0_EN 0x00000002U +#define ROGUE_CR_USC_PERF_CTRL_ENABLE_SHIFT 0U +#define ROGUE_CR_USC_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_USC_PERF_CTRL_ENABLE_EN 0x00000001U + +/* Register ROGUE_CR_USC_PERF_SELECT0 */ +#define ROGUE_CR_USC_PERF_SELECT0 0x8108U +#define ROGUE_CR_USC_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL +#define ROGUE_CR_USC_PERF_SELECT0_BATCH_MAX_SHIFT 48U +#define ROGUE_CR_USC_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL +#define ROGUE_CR_USC_PERF_SELECT0_BATCH_MIN_SHIFT 32U +#define ROGUE_CR_USC_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL +#define ROGUE_CR_USC_PERF_SELECT0_MODE_SHIFT 21U +#define ROGUE_CR_USC_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL +#define ROGUE_CR_USC_PERF_SELECT0_MODE_EN 0x0000000000200000ULL +#define ROGUE_CR_USC_PERF_SELECT0_GROUP_SELECT_SHIFT 16U +#define ROGUE_CR_USC_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL +#define ROGUE_CR_USC_PERF_SELECT0_BIT_SELECT_SHIFT 0U +#define ROGUE_CR_USC_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register ROGUE_CR_USC_PERF_COUNTER_0 */ +#define ROGUE_CR_USC_PERF_COUNTER_0 0x8150U +#define ROGUE_CR_USC_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_USC_PERF_COUNTER_0_REG_SHIFT 0U +#define ROGUE_CR_USC_PERF_COUNTER_0_REG_CLRMSK 0x00000000U + +/* Register ROGUE_CR_JONES_IDLE */ +#define ROGUE_CR_JONES_IDLE 0x8328U +#define ROGUE_CR_JONES_IDLE_MASKFULL 0x0000000000007FFFULL +#define ROGUE_CR_JONES_IDLE_TDM_SHIFT 14U +#define ROGUE_CR_JONES_IDLE_TDM_CLRMSK 0xFFFFBFFFU +#define ROGUE_CR_JONES_IDLE_TDM_EN 0x00004000U +#define ROGUE_CR_JONES_IDLE_FB_CDC_TLA_SHIFT 13U +#define ROGUE_CR_JONES_IDLE_FB_CDC_TLA_CLRMSK 0xFFFFDFFFU +#define ROGUE_CR_JONES_IDLE_FB_CDC_TLA_EN 0x00002000U +#define ROGUE_CR_JONES_IDLE_FB_CDC_SHIFT 12U +#define ROGUE_CR_JONES_IDLE_FB_CDC_CLRMSK 0xFFFFEFFFU +#define ROGUE_CR_JONES_IDLE_FB_CDC_EN 0x00001000U +#define ROGUE_CR_JONES_IDLE_MMU_SHIFT 11U +#define ROGUE_CR_JONES_IDLE_MMU_CLRMSK 0xFFFFF7FFU +#define ROGUE_CR_JONES_IDLE_MMU_EN 0x00000800U +#define ROGUE_CR_JONES_IDLE_TLA_SHIFT 10U +#define ROGUE_CR_JONES_IDLE_TLA_CLRMSK 0xFFFFFBFFU +#define ROGUE_CR_JONES_IDLE_TLA_EN 0x00000400U +#define ROGUE_CR_JONES_IDLE_GARTEN_SHIFT 9U +#define ROGUE_CR_JONES_IDLE_GARTEN_CLRMSK 0xFFFFFDFFU +#define ROGUE_CR_JONES_IDLE_GARTEN_EN 0x00000200U +#define ROGUE_CR_JONES_IDLE_HOSTIF_SHIFT 8U +#define ROGUE_CR_JONES_IDLE_HOSTIF_CLRMSK 0xFFFFFEFFU +#define ROGUE_CR_JONES_IDLE_HOSTIF_EN 0x00000100U +#define ROGUE_CR_JONES_IDLE_SOCIF_SHIFT 7U +#define ROGUE_CR_JONES_IDLE_SOCIF_CLRMSK 0xFFFFFF7FU +#define ROGUE_CR_JONES_IDLE_SOCIF_EN 0x00000080U +#define ROGUE_CR_JONES_IDLE_TILING_SHIFT 6U +#define ROGUE_CR_JONES_IDLE_TILING_CLRMSK 0xFFFFFFBFU +#define ROGUE_CR_JONES_IDLE_TILING_EN 0x00000040U +#define ROGUE_CR_JONES_IDLE_IPP_SHIFT 5U +#define ROGUE_CR_JONES_IDLE_IPP_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_JONES_IDLE_IPP_EN 0x00000020U +#define ROGUE_CR_JONES_IDLE_USCS_SHIFT 4U +#define ROGUE_CR_JONES_IDLE_USCS_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_JONES_IDLE_USCS_EN 0x00000010U +#define ROGUE_CR_JONES_IDLE_PM_SHIFT 3U +#define ROGUE_CR_JONES_IDLE_PM_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_JONES_IDLE_PM_EN 0x00000008U +#define ROGUE_CR_JONES_IDLE_CDM_SHIFT 2U +#define ROGUE_CR_JONES_IDLE_CDM_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_JONES_IDLE_CDM_EN 0x00000004U +#define ROGUE_CR_JONES_IDLE_VDM_SHIFT 1U +#define ROGUE_CR_JONES_IDLE_VDM_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_JONES_IDLE_VDM_EN 0x00000002U +#define ROGUE_CR_JONES_IDLE_BIF_SHIFT 0U +#define ROGUE_CR_JONES_IDLE_BIF_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_JONES_IDLE_BIF_EN 0x00000001U + +/* Register ROGUE_CR_TORNADO_PERF */ +#define ROGUE_CR_TORNADO_PERF 0x8228U +#define ROGUE_CR_TORNADO_PERF_MASKFULL 0x000000000000001FULL +#define ROGUE_CR_TORNADO_PERF_CLR_3_SHIFT 4U +#define ROGUE_CR_TORNADO_PERF_CLR_3_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_TORNADO_PERF_CLR_3_EN 0x00000010U +#define ROGUE_CR_TORNADO_PERF_CLR_2_SHIFT 3U +#define ROGUE_CR_TORNADO_PERF_CLR_2_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_TORNADO_PERF_CLR_2_EN 0x00000008U +#define ROGUE_CR_TORNADO_PERF_CLR_1_SHIFT 2U +#define ROGUE_CR_TORNADO_PERF_CLR_1_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_TORNADO_PERF_CLR_1_EN 0x00000004U +#define ROGUE_CR_TORNADO_PERF_CLR_0_SHIFT 1U +#define ROGUE_CR_TORNADO_PERF_CLR_0_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_TORNADO_PERF_CLR_0_EN 0x00000002U +#define ROGUE_CR_TORNADO_PERF_CTRL_ENABLE_SHIFT 0U +#define ROGUE_CR_TORNADO_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_TORNADO_PERF_CTRL_ENABLE_EN 0x00000001U + +/* Register ROGUE_CR_TORNADO_PERF_SELECT0 */ +#define ROGUE_CR_TORNADO_PERF_SELECT0 0x8230U +#define ROGUE_CR_TORNADO_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL +#define ROGUE_CR_TORNADO_PERF_SELECT0_BATCH_MAX_SHIFT 48U +#define ROGUE_CR_TORNADO_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL +#define ROGUE_CR_TORNADO_PERF_SELECT0_BATCH_MIN_SHIFT 32U +#define ROGUE_CR_TORNADO_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL +#define ROGUE_CR_TORNADO_PERF_SELECT0_MODE_SHIFT 21U +#define ROGUE_CR_TORNADO_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL +#define ROGUE_CR_TORNADO_PERF_SELECT0_MODE_EN 0x0000000000200000ULL +#define ROGUE_CR_TORNADO_PERF_SELECT0_GROUP_SELECT_SHIFT 16U +#define ROGUE_CR_TORNADO_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL +#define ROGUE_CR_TORNADO_PERF_SELECT0_BIT_SELECT_SHIFT 0U +#define ROGUE_CR_TORNADO_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register ROGUE_CR_TORNADO_PERF_COUNTER_0 */ +#define ROGUE_CR_TORNADO_PERF_COUNTER_0 0x8268U +#define ROGUE_CR_TORNADO_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_TORNADO_PERF_COUNTER_0_REG_SHIFT 0U +#define ROGUE_CR_TORNADO_PERF_COUNTER_0_REG_CLRMSK 0x00000000U + +/* Register ROGUE_CR_TEXAS_PERF */ +#define ROGUE_CR_TEXAS_PERF 0x8290U +#define ROGUE_CR_TEXAS_PERF_MASKFULL 0x000000000000007FULL +#define ROGUE_CR_TEXAS_PERF_CLR_5_SHIFT 6U +#define ROGUE_CR_TEXAS_PERF_CLR_5_CLRMSK 0xFFFFFFBFU +#define ROGUE_CR_TEXAS_PERF_CLR_5_EN 0x00000040U +#define ROGUE_CR_TEXAS_PERF_CLR_4_SHIFT 5U +#define ROGUE_CR_TEXAS_PERF_CLR_4_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_TEXAS_PERF_CLR_4_EN 0x00000020U +#define ROGUE_CR_TEXAS_PERF_CLR_3_SHIFT 4U +#define ROGUE_CR_TEXAS_PERF_CLR_3_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_TEXAS_PERF_CLR_3_EN 0x00000010U +#define ROGUE_CR_TEXAS_PERF_CLR_2_SHIFT 3U +#define ROGUE_CR_TEXAS_PERF_CLR_2_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_TEXAS_PERF_CLR_2_EN 0x00000008U +#define ROGUE_CR_TEXAS_PERF_CLR_1_SHIFT 2U +#define ROGUE_CR_TEXAS_PERF_CLR_1_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_TEXAS_PERF_CLR_1_EN 0x00000004U +#define ROGUE_CR_TEXAS_PERF_CLR_0_SHIFT 1U +#define ROGUE_CR_TEXAS_PERF_CLR_0_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_TEXAS_PERF_CLR_0_EN 0x00000002U +#define ROGUE_CR_TEXAS_PERF_CTRL_ENABLE_SHIFT 0U +#define ROGUE_CR_TEXAS_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_TEXAS_PERF_CTRL_ENABLE_EN 0x00000001U + +/* Register ROGUE_CR_TEXAS_PERF_SELECT0 */ +#define ROGUE_CR_TEXAS_PERF_SELECT0 0x8298U +#define ROGUE_CR_TEXAS_PERF_SELECT0_MASKFULL 0x3FFF3FFF803FFFFFULL +#define ROGUE_CR_TEXAS_PERF_SELECT0_BATCH_MAX_SHIFT 48U +#define ROGUE_CR_TEXAS_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL +#define ROGUE_CR_TEXAS_PERF_SELECT0_BATCH_MIN_SHIFT 32U +#define ROGUE_CR_TEXAS_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL +#define ROGUE_CR_TEXAS_PERF_SELECT0_MODE_SHIFT 31U +#define ROGUE_CR_TEXAS_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFF7FFFFFFFULL +#define ROGUE_CR_TEXAS_PERF_SELECT0_MODE_EN 0x0000000080000000ULL +#define ROGUE_CR_TEXAS_PERF_SELECT0_GROUP_SELECT_SHIFT 16U +#define ROGUE_CR_TEXAS_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFC0FFFFULL +#define ROGUE_CR_TEXAS_PERF_SELECT0_BIT_SELECT_SHIFT 0U +#define ROGUE_CR_TEXAS_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register ROGUE_CR_TEXAS_PERF_COUNTER_0 */ +#define ROGUE_CR_TEXAS_PERF_COUNTER_0 0x82D8U +#define ROGUE_CR_TEXAS_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_TEXAS_PERF_COUNTER_0_REG_SHIFT 0U +#define ROGUE_CR_TEXAS_PERF_COUNTER_0_REG_CLRMSK 0x00000000U + +/* Register ROGUE_CR_JONES_PERF */ +#define ROGUE_CR_JONES_PERF 0x8330U +#define ROGUE_CR_JONES_PERF_MASKFULL 0x000000000000001FULL +#define ROGUE_CR_JONES_PERF_CLR_3_SHIFT 4U +#define ROGUE_CR_JONES_PERF_CLR_3_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_JONES_PERF_CLR_3_EN 0x00000010U +#define ROGUE_CR_JONES_PERF_CLR_2_SHIFT 3U +#define ROGUE_CR_JONES_PERF_CLR_2_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_JONES_PERF_CLR_2_EN 0x00000008U +#define ROGUE_CR_JONES_PERF_CLR_1_SHIFT 2U +#define ROGUE_CR_JONES_PERF_CLR_1_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_JONES_PERF_CLR_1_EN 0x00000004U +#define ROGUE_CR_JONES_PERF_CLR_0_SHIFT 1U +#define ROGUE_CR_JONES_PERF_CLR_0_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_JONES_PERF_CLR_0_EN 0x00000002U +#define ROGUE_CR_JONES_PERF_CTRL_ENABLE_SHIFT 0U +#define ROGUE_CR_JONES_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_JONES_PERF_CTRL_ENABLE_EN 0x00000001U + +/* Register ROGUE_CR_JONES_PERF_SELECT0 */ +#define ROGUE_CR_JONES_PERF_SELECT0 0x8338U +#define ROGUE_CR_JONES_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL +#define ROGUE_CR_JONES_PERF_SELECT0_BATCH_MAX_SHIFT 48U +#define ROGUE_CR_JONES_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL +#define ROGUE_CR_JONES_PERF_SELECT0_BATCH_MIN_SHIFT 32U +#define ROGUE_CR_JONES_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL +#define ROGUE_CR_JONES_PERF_SELECT0_MODE_SHIFT 21U +#define ROGUE_CR_JONES_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL +#define ROGUE_CR_JONES_PERF_SELECT0_MODE_EN 0x0000000000200000ULL +#define ROGUE_CR_JONES_PERF_SELECT0_GROUP_SELECT_SHIFT 16U +#define ROGUE_CR_JONES_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL +#define ROGUE_CR_JONES_PERF_SELECT0_BIT_SELECT_SHIFT 0U +#define ROGUE_CR_JONES_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register ROGUE_CR_JONES_PERF_COUNTER_0 */ +#define ROGUE_CR_JONES_PERF_COUNTER_0 0x8368U +#define ROGUE_CR_JONES_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_JONES_PERF_COUNTER_0_REG_SHIFT 0U +#define ROGUE_CR_JONES_PERF_COUNTER_0_REG_CLRMSK 0x00000000U + +/* Register ROGUE_CR_BLACKPEARL_PERF */ +#define ROGUE_CR_BLACKPEARL_PERF 0x8400U +#define ROGUE_CR_BLACKPEARL_PERF_MASKFULL 0x000000000000007FULL +#define ROGUE_CR_BLACKPEARL_PERF_CLR_5_SHIFT 6U +#define ROGUE_CR_BLACKPEARL_PERF_CLR_5_CLRMSK 0xFFFFFFBFU +#define ROGUE_CR_BLACKPEARL_PERF_CLR_5_EN 0x00000040U +#define ROGUE_CR_BLACKPEARL_PERF_CLR_4_SHIFT 5U +#define ROGUE_CR_BLACKPEARL_PERF_CLR_4_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_BLACKPEARL_PERF_CLR_4_EN 0x00000020U +#define ROGUE_CR_BLACKPEARL_PERF_CLR_3_SHIFT 4U +#define ROGUE_CR_BLACKPEARL_PERF_CLR_3_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_BLACKPEARL_PERF_CLR_3_EN 0x00000010U +#define ROGUE_CR_BLACKPEARL_PERF_CLR_2_SHIFT 3U +#define ROGUE_CR_BLACKPEARL_PERF_CLR_2_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_BLACKPEARL_PERF_CLR_2_EN 0x00000008U +#define ROGUE_CR_BLACKPEARL_PERF_CLR_1_SHIFT 2U +#define ROGUE_CR_BLACKPEARL_PERF_CLR_1_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_BLACKPEARL_PERF_CLR_1_EN 0x00000004U +#define ROGUE_CR_BLACKPEARL_PERF_CLR_0_SHIFT 1U +#define ROGUE_CR_BLACKPEARL_PERF_CLR_0_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_BLACKPEARL_PERF_CLR_0_EN 0x00000002U +#define ROGUE_CR_BLACKPEARL_PERF_CTRL_ENABLE_SHIFT 0U +#define ROGUE_CR_BLACKPEARL_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_BLACKPEARL_PERF_CTRL_ENABLE_EN 0x00000001U + +/* Register ROGUE_CR_BLACKPEARL_PERF_SELECT0 */ +#define ROGUE_CR_BLACKPEARL_PERF_SELECT0 0x8408U +#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_MASKFULL 0x3FFF3FFF803FFFFFULL +#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_BATCH_MAX_SHIFT 48U +#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL +#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_BATCH_MIN_SHIFT 32U +#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL +#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_MODE_SHIFT 31U +#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFF7FFFFFFFULL +#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_MODE_EN 0x0000000080000000ULL +#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_GROUP_SELECT_SHIFT 16U +#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFC0FFFFULL +#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_BIT_SELECT_SHIFT 0U +#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register ROGUE_CR_BLACKPEARL_PERF_COUNTER_0 */ +#define ROGUE_CR_BLACKPEARL_PERF_COUNTER_0 0x8448U +#define ROGUE_CR_BLACKPEARL_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_BLACKPEARL_PERF_COUNTER_0_REG_SHIFT 0U +#define ROGUE_CR_BLACKPEARL_PERF_COUNTER_0_REG_CLRMSK 0x00000000U + +/* Register ROGUE_CR_PBE_PERF */ +#define ROGUE_CR_PBE_PERF 0x8478U +#define ROGUE_CR_PBE_PERF_MASKFULL 0x000000000000001FULL +#define ROGUE_CR_PBE_PERF_CLR_3_SHIFT 4U +#define ROGUE_CR_PBE_PERF_CLR_3_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_PBE_PERF_CLR_3_EN 0x00000010U +#define ROGUE_CR_PBE_PERF_CLR_2_SHIFT 3U +#define ROGUE_CR_PBE_PERF_CLR_2_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_PBE_PERF_CLR_2_EN 0x00000008U +#define ROGUE_CR_PBE_PERF_CLR_1_SHIFT 2U +#define ROGUE_CR_PBE_PERF_CLR_1_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_PBE_PERF_CLR_1_EN 0x00000004U +#define ROGUE_CR_PBE_PERF_CLR_0_SHIFT 1U +#define ROGUE_CR_PBE_PERF_CLR_0_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_PBE_PERF_CLR_0_EN 0x00000002U +#define ROGUE_CR_PBE_PERF_CTRL_ENABLE_SHIFT 0U +#define ROGUE_CR_PBE_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_PBE_PERF_CTRL_ENABLE_EN 0x00000001U + +/* Register ROGUE_CR_PBE_PERF_SELECT0 */ +#define ROGUE_CR_PBE_PERF_SELECT0 0x8480U +#define ROGUE_CR_PBE_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL +#define ROGUE_CR_PBE_PERF_SELECT0_BATCH_MAX_SHIFT 48U +#define ROGUE_CR_PBE_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL +#define ROGUE_CR_PBE_PERF_SELECT0_BATCH_MIN_SHIFT 32U +#define ROGUE_CR_PBE_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL +#define ROGUE_CR_PBE_PERF_SELECT0_MODE_SHIFT 21U +#define ROGUE_CR_PBE_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL +#define ROGUE_CR_PBE_PERF_SELECT0_MODE_EN 0x0000000000200000ULL +#define ROGUE_CR_PBE_PERF_SELECT0_GROUP_SELECT_SHIFT 16U +#define ROGUE_CR_PBE_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL +#define ROGUE_CR_PBE_PERF_SELECT0_BIT_SELECT_SHIFT 0U +#define ROGUE_CR_PBE_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register ROGUE_CR_PBE_PERF_COUNTER_0 */ +#define ROGUE_CR_PBE_PERF_COUNTER_0 0x84B0U +#define ROGUE_CR_PBE_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_PBE_PERF_COUNTER_0_REG_SHIFT 0U +#define ROGUE_CR_PBE_PERF_COUNTER_0_REG_CLRMSK 0x00000000U + +/* Register ROGUE_CR_OCP_REVINFO */ +#define ROGUE_CR_OCP_REVINFO 0x9000U +#define ROGUE_CR_OCP_REVINFO_MASKFULL 0x00000007FFFFFFFFULL +#define ROGUE_CR_OCP_REVINFO_HWINFO_SYSBUS_SHIFT 33U +#define ROGUE_CR_OCP_REVINFO_HWINFO_SYSBUS_CLRMSK 0xFFFFFFF9FFFFFFFFULL +#define ROGUE_CR_OCP_REVINFO_HWINFO_MEMBUS_SHIFT 32U +#define ROGUE_CR_OCP_REVINFO_HWINFO_MEMBUS_CLRMSK 0xFFFFFFFEFFFFFFFFULL +#define ROGUE_CR_OCP_REVINFO_HWINFO_MEMBUS_EN 0x0000000100000000ULL +#define ROGUE_CR_OCP_REVINFO_REVISION_SHIFT 0U +#define ROGUE_CR_OCP_REVINFO_REVISION_CLRMSK 0xFFFFFFFF00000000ULL + +/* Register ROGUE_CR_OCP_SYSCONFIG */ +#define ROGUE_CR_OCP_SYSCONFIG 0x9010U +#define ROGUE_CR_OCP_SYSCONFIG_MASKFULL 0x0000000000000FFFULL +#define ROGUE_CR_OCP_SYSCONFIG_DUST2_STANDBY_MODE_SHIFT 10U +#define ROGUE_CR_OCP_SYSCONFIG_DUST2_STANDBY_MODE_CLRMSK 0xFFFFF3FFU +#define ROGUE_CR_OCP_SYSCONFIG_DUST1_STANDBY_MODE_SHIFT 8U +#define ROGUE_CR_OCP_SYSCONFIG_DUST1_STANDBY_MODE_CLRMSK 0xFFFFFCFFU +#define ROGUE_CR_OCP_SYSCONFIG_DUST0_STANDBY_MODE_SHIFT 6U +#define ROGUE_CR_OCP_SYSCONFIG_DUST0_STANDBY_MODE_CLRMSK 0xFFFFFF3FU +#define ROGUE_CR_OCP_SYSCONFIG_RASCAL_STANDBYMODE_SHIFT 4U +#define ROGUE_CR_OCP_SYSCONFIG_RASCAL_STANDBYMODE_CLRMSK 0xFFFFFFCFU +#define ROGUE_CR_OCP_SYSCONFIG_STANDBY_MODE_SHIFT 2U +#define ROGUE_CR_OCP_SYSCONFIG_STANDBY_MODE_CLRMSK 0xFFFFFFF3U +#define ROGUE_CR_OCP_SYSCONFIG_IDLE_MODE_SHIFT 0U +#define ROGUE_CR_OCP_SYSCONFIG_IDLE_MODE_CLRMSK 0xFFFFFFFCU + +/* Register ROGUE_CR_OCP_IRQSTATUS_RAW_0 */ +#define ROGUE_CR_OCP_IRQSTATUS_RAW_0 0x9020U +#define ROGUE_CR_OCP_IRQSTATUS_RAW_0_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_OCP_IRQSTATUS_RAW_0_INIT_MINTERRUPT_RAW_SHIFT 0U +#define ROGUE_CR_OCP_IRQSTATUS_RAW_0_INIT_MINTERRUPT_RAW_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_OCP_IRQSTATUS_RAW_0_INIT_MINTERRUPT_RAW_EN 0x00000001U + +/* Register ROGUE_CR_OCP_IRQSTATUS_RAW_1 */ +#define ROGUE_CR_OCP_IRQSTATUS_RAW_1 0x9028U +#define ROGUE_CR_OCP_IRQSTATUS_RAW_1_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_OCP_IRQSTATUS_RAW_1_TARGET_SINTERRUPT_RAW_SHIFT 0U +#define ROGUE_CR_OCP_IRQSTATUS_RAW_1_TARGET_SINTERRUPT_RAW_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_OCP_IRQSTATUS_RAW_1_TARGET_SINTERRUPT_RAW_EN 0x00000001U + +/* Register ROGUE_CR_OCP_IRQSTATUS_RAW_2 */ +#define ROGUE_CR_OCP_IRQSTATUS_RAW_2 0x9030U +#define ROGUE_CR_OCP_IRQSTATUS_RAW_2_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_OCP_IRQSTATUS_RAW_2_RGX_IRQ_RAW_SHIFT 0U +#define ROGUE_CR_OCP_IRQSTATUS_RAW_2_RGX_IRQ_RAW_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_OCP_IRQSTATUS_RAW_2_RGX_IRQ_RAW_EN 0x00000001U + +/* Register ROGUE_CR_OCP_IRQSTATUS_0 */ +#define ROGUE_CR_OCP_IRQSTATUS_0 0x9038U +#define ROGUE_CR_OCP_IRQSTATUS_0_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_OCP_IRQSTATUS_0_INIT_MINTERRUPT_STATUS_SHIFT 0U +#define ROGUE_CR_OCP_IRQSTATUS_0_INIT_MINTERRUPT_STATUS_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_OCP_IRQSTATUS_0_INIT_MINTERRUPT_STATUS_EN 0x00000001U + +/* Register ROGUE_CR_OCP_IRQSTATUS_1 */ +#define ROGUE_CR_OCP_IRQSTATUS_1 0x9040U +#define ROGUE_CR_OCP_IRQSTATUS_1_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_OCP_IRQSTATUS_1_TARGET_SINTERRUPT_STATUS_SHIFT 0U +#define ROGUE_CR_OCP_IRQSTATUS_1_TARGET_SINTERRUPT_STATUS_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_OCP_IRQSTATUS_1_TARGET_SINTERRUPT_STATUS_EN 0x00000001U + +/* Register ROGUE_CR_OCP_IRQSTATUS_2 */ +#define ROGUE_CR_OCP_IRQSTATUS_2 0x9048U +#define ROGUE_CR_OCP_IRQSTATUS_2_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_OCP_IRQSTATUS_2_RGX_IRQ_STATUS_SHIFT 0U +#define ROGUE_CR_OCP_IRQSTATUS_2_RGX_IRQ_STATUS_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_OCP_IRQSTATUS_2_RGX_IRQ_STATUS_EN 0x00000001U + +/* Register ROGUE_CR_OCP_IRQENABLE_SET_0 */ +#define ROGUE_CR_OCP_IRQENABLE_SET_0 0x9050U +#define ROGUE_CR_OCP_IRQENABLE_SET_0_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_OCP_IRQENABLE_SET_0_INIT_MINTERRUPT_ENABLE_SHIFT 0U +#define ROGUE_CR_OCP_IRQENABLE_SET_0_INIT_MINTERRUPT_ENABLE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_OCP_IRQENABLE_SET_0_INIT_MINTERRUPT_ENABLE_EN 0x00000001U + +/* Register ROGUE_CR_OCP_IRQENABLE_SET_1 */ +#define ROGUE_CR_OCP_IRQENABLE_SET_1 0x9058U +#define ROGUE_CR_OCP_IRQENABLE_SET_1_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_OCP_IRQENABLE_SET_1_TARGET_SINTERRUPT_ENABLE_SHIFT 0U +#define ROGUE_CR_OCP_IRQENABLE_SET_1_TARGET_SINTERRUPT_ENABLE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_OCP_IRQENABLE_SET_1_TARGET_SINTERRUPT_ENABLE_EN 0x00000001U + +/* Register ROGUE_CR_OCP_IRQENABLE_SET_2 */ +#define ROGUE_CR_OCP_IRQENABLE_SET_2 0x9060U +#define ROGUE_CR_OCP_IRQENABLE_SET_2_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_OCP_IRQENABLE_SET_2_RGX_IRQ_ENABLE_SHIFT 0U +#define ROGUE_CR_OCP_IRQENABLE_SET_2_RGX_IRQ_ENABLE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_OCP_IRQENABLE_SET_2_RGX_IRQ_ENABLE_EN 0x00000001U + +/* Register ROGUE_CR_OCP_IRQENABLE_CLR_0 */ +#define ROGUE_CR_OCP_IRQENABLE_CLR_0 0x9068U +#define ROGUE_CR_OCP_IRQENABLE_CLR_0_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_OCP_IRQENABLE_CLR_0_INIT_MINTERRUPT_DISABLE_SHIFT 0U +#define ROGUE_CR_OCP_IRQENABLE_CLR_0_INIT_MINTERRUPT_DISABLE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_OCP_IRQENABLE_CLR_0_INIT_MINTERRUPT_DISABLE_EN 0x00000001U + +/* Register ROGUE_CR_OCP_IRQENABLE_CLR_1 */ +#define ROGUE_CR_OCP_IRQENABLE_CLR_1 0x9070U +#define ROGUE_CR_OCP_IRQENABLE_CLR_1_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_OCP_IRQENABLE_CLR_1_TARGET_SINTERRUPT_DISABLE_SHIFT 0U +#define ROGUE_CR_OCP_IRQENABLE_CLR_1_TARGET_SINTERRUPT_DISABLE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_OCP_IRQENABLE_CLR_1_TARGET_SINTERRUPT_DISABLE_EN 0x00000001U + +/* Register ROGUE_CR_OCP_IRQENABLE_CLR_2 */ +#define ROGUE_CR_OCP_IRQENABLE_CLR_2 0x9078U +#define ROGUE_CR_OCP_IRQENABLE_CLR_2_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_OCP_IRQENABLE_CLR_2_RGX_IRQ_DISABLE_SHIFT 0U +#define ROGUE_CR_OCP_IRQENABLE_CLR_2_RGX_IRQ_DISABLE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_OCP_IRQENABLE_CLR_2_RGX_IRQ_DISABLE_EN 0x00000001U + +/* Register ROGUE_CR_OCP_IRQ_EVENT */ +#define ROGUE_CR_OCP_IRQ_EVENT 0x9080U +#define ROGUE_CR_OCP_IRQ_EVENT_MASKFULL 0x00000000000FFFFFULL +#define ROGUE_CR_OCP_IRQ_EVENT_TARGETH_RCVD_UNEXPECTED_RDATA_SHIFT 19U +#define ROGUE_CR_OCP_IRQ_EVENT_TARGETH_RCVD_UNEXPECTED_RDATA_CLRMSK 0xFFFFFFFFFFF7FFFFULL +#define ROGUE_CR_OCP_IRQ_EVENT_TARGETH_RCVD_UNEXPECTED_RDATA_EN 0x0000000000080000ULL +#define ROGUE_CR_OCP_IRQ_EVENT_TARGETH_RCVD_UNSUPPORTED_MCMD_SHIFT 18U +#define ROGUE_CR_OCP_IRQ_EVENT_TARGETH_RCVD_UNSUPPORTED_MCMD_CLRMSK 0xFFFFFFFFFFFBFFFFULL +#define ROGUE_CR_OCP_IRQ_EVENT_TARGETH_RCVD_UNSUPPORTED_MCMD_EN 0x0000000000040000ULL +#define ROGUE_CR_OCP_IRQ_EVENT_TARGETS_RCVD_UNEXPECTED_RDATA_SHIFT 17U +#define ROGUE_CR_OCP_IRQ_EVENT_TARGETS_RCVD_UNEXPECTED_RDATA_CLRMSK 0xFFFFFFFFFFFDFFFFULL +#define ROGUE_CR_OCP_IRQ_EVENT_TARGETS_RCVD_UNEXPECTED_RDATA_EN 0x0000000000020000ULL +#define ROGUE_CR_OCP_IRQ_EVENT_TARGETS_RCVD_UNSUPPORTED_MCMD_SHIFT 16U +#define ROGUE_CR_OCP_IRQ_EVENT_TARGETS_RCVD_UNSUPPORTED_MCMD_CLRMSK 0xFFFFFFFFFFFEFFFFULL +#define ROGUE_CR_OCP_IRQ_EVENT_TARGETS_RCVD_UNSUPPORTED_MCMD_EN 0x0000000000010000ULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_IMG_PAGE_BOUNDARY_CROSS_SHIFT 15U +#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_IMG_PAGE_BOUNDARY_CROSS_CLRMSK 0xFFFFFFFFFFFF7FFFULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_IMG_PAGE_BOUNDARY_CROSS_EN 0x0000000000008000ULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_RCVD_RESP_ERR_FAIL_SHIFT 14U +#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_RCVD_RESP_ERR_FAIL_CLRMSK 0xFFFFFFFFFFFFBFFFULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_RCVD_RESP_ERR_FAIL_EN 0x0000000000004000ULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_RCVD_UNUSED_TAGID_SHIFT 13U +#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_RCVD_UNUSED_TAGID_CLRMSK 0xFFFFFFFFFFFFDFFFULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_RCVD_UNUSED_TAGID_EN 0x0000000000002000ULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_RDATA_FIFO_OVERFILL_SHIFT 12U +#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_RDATA_FIFO_OVERFILL_CLRMSK 0xFFFFFFFFFFFFEFFFULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_RDATA_FIFO_OVERFILL_EN 0x0000000000001000ULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_IMG_PAGE_BOUNDARY_CROSS_SHIFT 11U +#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_IMG_PAGE_BOUNDARY_CROSS_CLRMSK 0xFFFFFFFFFFFFF7FFULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_IMG_PAGE_BOUNDARY_CROSS_EN 0x0000000000000800ULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_RCVD_RESP_ERR_FAIL_SHIFT 10U +#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_RCVD_RESP_ERR_FAIL_CLRMSK 0xFFFFFFFFFFFFFBFFULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_RCVD_RESP_ERR_FAIL_EN 0x0000000000000400ULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_RCVD_UNUSED_TAGID_SHIFT 9U +#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_RCVD_UNUSED_TAGID_CLRMSK 0xFFFFFFFFFFFFFDFFULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_RCVD_UNUSED_TAGID_EN 0x0000000000000200ULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_RDATA_FIFO_OVERFILL_SHIFT 8U +#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_RDATA_FIFO_OVERFILL_CLRMSK 0xFFFFFFFFFFFFFEFFULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_RDATA_FIFO_OVERFILL_EN 0x0000000000000100ULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_IMG_PAGE_BOUNDARY_CROSS_SHIFT 7U +#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_IMG_PAGE_BOUNDARY_CROSS_CLRMSK 0xFFFFFFFFFFFFFF7FULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_IMG_PAGE_BOUNDARY_CROSS_EN 0x0000000000000080ULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_RCVD_RESP_ERR_FAIL_SHIFT 6U +#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_RCVD_RESP_ERR_FAIL_CLRMSK 0xFFFFFFFFFFFFFFBFULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_RCVD_RESP_ERR_FAIL_EN 0x0000000000000040ULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_RCVD_UNUSED_TAGID_SHIFT 5U +#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_RCVD_UNUSED_TAGID_CLRMSK 0xFFFFFFFFFFFFFFDFULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_RCVD_UNUSED_TAGID_EN 0x0000000000000020ULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_RDATA_FIFO_OVERFILL_SHIFT 4U +#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_RDATA_FIFO_OVERFILL_CLRMSK 0xFFFFFFFFFFFFFFEFULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_RDATA_FIFO_OVERFILL_EN 0x0000000000000010ULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_IMG_PAGE_BOUNDARY_CROSS_SHIFT 3U +#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_IMG_PAGE_BOUNDARY_CROSS_CLRMSK 0xFFFFFFFFFFFFFFF7ULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_IMG_PAGE_BOUNDARY_CROSS_EN 0x0000000000000008ULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_RCVD_RESP_ERR_FAIL_SHIFT 2U +#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_RCVD_RESP_ERR_FAIL_CLRMSK 0xFFFFFFFFFFFFFFFBULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_RCVD_RESP_ERR_FAIL_EN 0x0000000000000004ULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_RCVD_UNUSED_TAGID_SHIFT 1U +#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_RCVD_UNUSED_TAGID_CLRMSK 0xFFFFFFFFFFFFFFFDULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_RCVD_UNUSED_TAGID_EN 0x0000000000000002ULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_RDATA_FIFO_OVERFILL_SHIFT 0U +#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_RDATA_FIFO_OVERFILL_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_RDATA_FIFO_OVERFILL_EN 0x0000000000000001ULL + +/* Register ROGUE_CR_OCP_DEBUG_CONFIG */ +#define ROGUE_CR_OCP_DEBUG_CONFIG 0x9088U +#define ROGUE_CR_OCP_DEBUG_CONFIG_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_OCP_DEBUG_CONFIG_REG_SHIFT 0U +#define ROGUE_CR_OCP_DEBUG_CONFIG_REG_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_OCP_DEBUG_CONFIG_REG_EN 0x00000001U + +/* Register ROGUE_CR_OCP_DEBUG_STATUS */ +#define ROGUE_CR_OCP_DEBUG_STATUS 0x9090U +#define ROGUE_CR_OCP_DEBUG_STATUS_MASKFULL 0x001F1F77FFFFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_SDISCACK_SHIFT 51U +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_SDISCACK_CLRMSK 0xFFE7FFFFFFFFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_SCONNECT_SHIFT 50U +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_SCONNECT_CLRMSK 0xFFFBFFFFFFFFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_SCONNECT_EN 0x0004000000000000ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_MCONNECT_SHIFT 48U +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_MCONNECT_CLRMSK 0xFFFCFFFFFFFFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_SDISCACK_SHIFT 43U +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_SDISCACK_CLRMSK 0xFFFFE7FFFFFFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_SCONNECT_SHIFT 42U +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_SCONNECT_CLRMSK 0xFFFFFBFFFFFFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_SCONNECT_EN 0x0000040000000000ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_MCONNECT_SHIFT 40U +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_MCONNECT_CLRMSK 0xFFFFFCFFFFFFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_BUSY_SHIFT 38U +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_BUSY_CLRMSK 0xFFFFFFBFFFFFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_BUSY_EN 0x0000004000000000ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_CMD_FIFO_FULL_SHIFT 37U +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_CMD_FIFO_FULL_CLRMSK 0xFFFFFFDFFFFFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_CMD_FIFO_FULL_EN 0x0000002000000000ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_SRESP_ERROR_SHIFT 36U +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_SRESP_ERROR_CLRMSK 0xFFFFFFEFFFFFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_SRESP_ERROR_EN 0x0000001000000000ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_BUSY_SHIFT 34U +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_BUSY_CLRMSK 0xFFFFFFFBFFFFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_BUSY_EN 0x0000000400000000ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_CMD_FIFO_FULL_SHIFT 33U +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_CMD_FIFO_FULL_CLRMSK 0xFFFFFFFDFFFFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_CMD_FIFO_FULL_EN 0x0000000200000000ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_SRESP_ERROR_SHIFT 32U +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_SRESP_ERROR_CLRMSK 0xFFFFFFFEFFFFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_SRESP_ERROR_EN 0x0000000100000000ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_RESERVED_SHIFT 31U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_RESERVED_CLRMSK 0xFFFFFFFF7FFFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_RESERVED_EN 0x0000000080000000ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_SWAIT_SHIFT 30U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_SWAIT_CLRMSK 0xFFFFFFFFBFFFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_SWAIT_EN 0x0000000040000000ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_MDISCREQ_SHIFT 29U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_MDISCREQ_CLRMSK 0xFFFFFFFFDFFFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_MDISCREQ_EN 0x0000000020000000ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_MDISCACK_SHIFT 27U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_MDISCACK_CLRMSK 0xFFFFFFFFE7FFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_SCONNECT_SHIFT 26U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_SCONNECT_CLRMSK 0xFFFFFFFFFBFFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_SCONNECT_EN 0x0000000004000000ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_MCONNECT_SHIFT 24U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_MCONNECT_CLRMSK 0xFFFFFFFFFCFFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_RESERVED_SHIFT 23U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_RESERVED_CLRMSK 0xFFFFFFFFFF7FFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_RESERVED_EN 0x0000000000800000ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_SWAIT_SHIFT 22U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_SWAIT_CLRMSK 0xFFFFFFFFFFBFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_SWAIT_EN 0x0000000000400000ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_MDISCREQ_SHIFT 21U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_MDISCREQ_CLRMSK 0xFFFFFFFFFFDFFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_MDISCREQ_EN 0x0000000000200000ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_MDISCACK_SHIFT 19U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_MDISCACK_CLRMSK 0xFFFFFFFFFFE7FFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_SCONNECT_SHIFT 18U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_SCONNECT_CLRMSK 0xFFFFFFFFFFFBFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_SCONNECT_EN 0x0000000000040000ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_MCONNECT_SHIFT 16U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_MCONNECT_CLRMSK 0xFFFFFFFFFFFCFFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_RESERVED_SHIFT 15U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_RESERVED_CLRMSK 0xFFFFFFFFFFFF7FFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_RESERVED_EN 0x0000000000008000ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_SWAIT_SHIFT 14U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_SWAIT_CLRMSK 0xFFFFFFFFFFFFBFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_SWAIT_EN 0x0000000000004000ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_MDISCREQ_SHIFT 13U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_MDISCREQ_CLRMSK 0xFFFFFFFFFFFFDFFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_MDISCREQ_EN 0x0000000000002000ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_MDISCACK_SHIFT 11U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_MDISCACK_CLRMSK 0xFFFFFFFFFFFFE7FFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_SCONNECT_SHIFT 10U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_SCONNECT_CLRMSK 0xFFFFFFFFFFFFFBFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_SCONNECT_EN 0x0000000000000400ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_MCONNECT_SHIFT 8U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_MCONNECT_CLRMSK 0xFFFFFFFFFFFFFCFFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_RESERVED_SHIFT 7U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_RESERVED_CLRMSK 0xFFFFFFFFFFFFFF7FULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_RESERVED_EN 0x0000000000000080ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_SWAIT_SHIFT 6U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_SWAIT_CLRMSK 0xFFFFFFFFFFFFFFBFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_SWAIT_EN 0x0000000000000040ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_MDISCREQ_SHIFT 5U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_MDISCREQ_CLRMSK 0xFFFFFFFFFFFFFFDFULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_MDISCREQ_EN 0x0000000000000020ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_MDISCACK_SHIFT 3U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_MDISCACK_CLRMSK 0xFFFFFFFFFFFFFFE7ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_SCONNECT_SHIFT 2U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_SCONNECT_CLRMSK 0xFFFFFFFFFFFFFFFBULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_SCONNECT_EN 0x0000000000000004ULL +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_MCONNECT_SHIFT 0U +#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_MCONNECT_CLRMSK 0xFFFFFFFFFFFFFFFCULL + +#define ROGUE_CR_BIF_TRUST_DM_TYPE_PM_ALIST_SHIFT 6U +#define ROGUE_CR_BIF_TRUST_DM_TYPE_PM_ALIST_CLRMSK 0xFFFFFFBFU +#define ROGUE_CR_BIF_TRUST_DM_TYPE_PM_ALIST_EN 0x00000040U +#define ROGUE_CR_BIF_TRUST_DM_TYPE_HOST_SHIFT 5U +#define ROGUE_CR_BIF_TRUST_DM_TYPE_HOST_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_BIF_TRUST_DM_TYPE_HOST_EN 0x00000020U +#define ROGUE_CR_BIF_TRUST_DM_TYPE_META_SHIFT 4U +#define ROGUE_CR_BIF_TRUST_DM_TYPE_META_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_BIF_TRUST_DM_TYPE_META_EN 0x00000010U +#define ROGUE_CR_BIF_TRUST_DM_TYPE_PB_ZLS_SHIFT 3U +#define ROGUE_CR_BIF_TRUST_DM_TYPE_PB_ZLS_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_BIF_TRUST_DM_TYPE_PB_ZLS_EN 0x00000008U +#define ROGUE_CR_BIF_TRUST_DM_TYPE_PB_TE_SHIFT 2U +#define ROGUE_CR_BIF_TRUST_DM_TYPE_PB_TE_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_BIF_TRUST_DM_TYPE_PB_TE_EN 0x00000004U +#define ROGUE_CR_BIF_TRUST_DM_TYPE_PB_VCE_SHIFT 1U +#define ROGUE_CR_BIF_TRUST_DM_TYPE_PB_VCE_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_BIF_TRUST_DM_TYPE_PB_VCE_EN 0x00000002U +#define ROGUE_CR_BIF_TRUST_DM_TYPE_TLA_SHIFT 0U +#define ROGUE_CR_BIF_TRUST_DM_TYPE_TLA_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_BIF_TRUST_DM_TYPE_TLA_EN 0x00000001U + +#define ROGUE_CR_BIF_TRUST_DM_MASK 0x0000007FU + +/* Register ROGUE_CR_BIF_TRUST */ +#define ROGUE_CR_BIF_TRUST 0xA000U +#define ROGUE_CR_BIF_TRUST_MASKFULL 0x00000000001FFFFFULL +#define ROGUE_CR_BIF_TRUST_OTHER_RAY_VERTEX_DM_TRUSTED_SHIFT 20U +#define ROGUE_CR_BIF_TRUST_OTHER_RAY_VERTEX_DM_TRUSTED_CLRMSK 0xFFEFFFFFU +#define ROGUE_CR_BIF_TRUST_OTHER_RAY_VERTEX_DM_TRUSTED_EN 0x00100000U +#define ROGUE_CR_BIF_TRUST_MCU_RAY_VERTEX_DM_TRUSTED_SHIFT 19U +#define ROGUE_CR_BIF_TRUST_MCU_RAY_VERTEX_DM_TRUSTED_CLRMSK 0xFFF7FFFFU +#define ROGUE_CR_BIF_TRUST_MCU_RAY_VERTEX_DM_TRUSTED_EN 0x00080000U +#define ROGUE_CR_BIF_TRUST_OTHER_RAY_DM_TRUSTED_SHIFT 18U +#define ROGUE_CR_BIF_TRUST_OTHER_RAY_DM_TRUSTED_CLRMSK 0xFFFBFFFFU +#define ROGUE_CR_BIF_TRUST_OTHER_RAY_DM_TRUSTED_EN 0x00040000U +#define ROGUE_CR_BIF_TRUST_MCU_RAY_DM_TRUSTED_SHIFT 17U +#define ROGUE_CR_BIF_TRUST_MCU_RAY_DM_TRUSTED_CLRMSK 0xFFFDFFFFU +#define ROGUE_CR_BIF_TRUST_MCU_RAY_DM_TRUSTED_EN 0x00020000U +#define ROGUE_CR_BIF_TRUST_ENABLE_SHIFT 16U +#define ROGUE_CR_BIF_TRUST_ENABLE_CLRMSK 0xFFFEFFFFU +#define ROGUE_CR_BIF_TRUST_ENABLE_EN 0x00010000U +#define ROGUE_CR_BIF_TRUST_DM_TRUSTED_SHIFT 9U +#define ROGUE_CR_BIF_TRUST_DM_TRUSTED_CLRMSK 0xFFFF01FFU +#define ROGUE_CR_BIF_TRUST_OTHER_COMPUTE_DM_TRUSTED_SHIFT 8U +#define ROGUE_CR_BIF_TRUST_OTHER_COMPUTE_DM_TRUSTED_CLRMSK 0xFFFFFEFFU +#define ROGUE_CR_BIF_TRUST_OTHER_COMPUTE_DM_TRUSTED_EN 0x00000100U +#define ROGUE_CR_BIF_TRUST_MCU_COMPUTE_DM_TRUSTED_SHIFT 7U +#define ROGUE_CR_BIF_TRUST_MCU_COMPUTE_DM_TRUSTED_CLRMSK 0xFFFFFF7FU +#define ROGUE_CR_BIF_TRUST_MCU_COMPUTE_DM_TRUSTED_EN 0x00000080U +#define ROGUE_CR_BIF_TRUST_PBE_COMPUTE_DM_TRUSTED_SHIFT 6U +#define ROGUE_CR_BIF_TRUST_PBE_COMPUTE_DM_TRUSTED_CLRMSK 0xFFFFFFBFU +#define ROGUE_CR_BIF_TRUST_PBE_COMPUTE_DM_TRUSTED_EN 0x00000040U +#define ROGUE_CR_BIF_TRUST_OTHER_PIXEL_DM_TRUSTED_SHIFT 5U +#define ROGUE_CR_BIF_TRUST_OTHER_PIXEL_DM_TRUSTED_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_BIF_TRUST_OTHER_PIXEL_DM_TRUSTED_EN 0x00000020U +#define ROGUE_CR_BIF_TRUST_MCU_PIXEL_DM_TRUSTED_SHIFT 4U +#define ROGUE_CR_BIF_TRUST_MCU_PIXEL_DM_TRUSTED_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_BIF_TRUST_MCU_PIXEL_DM_TRUSTED_EN 0x00000010U +#define ROGUE_CR_BIF_TRUST_PBE_PIXEL_DM_TRUSTED_SHIFT 3U +#define ROGUE_CR_BIF_TRUST_PBE_PIXEL_DM_TRUSTED_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_BIF_TRUST_PBE_PIXEL_DM_TRUSTED_EN 0x00000008U +#define ROGUE_CR_BIF_TRUST_OTHER_VERTEX_DM_TRUSTED_SHIFT 2U +#define ROGUE_CR_BIF_TRUST_OTHER_VERTEX_DM_TRUSTED_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_BIF_TRUST_OTHER_VERTEX_DM_TRUSTED_EN 0x00000004U +#define ROGUE_CR_BIF_TRUST_MCU_VERTEX_DM_TRUSTED_SHIFT 1U +#define ROGUE_CR_BIF_TRUST_MCU_VERTEX_DM_TRUSTED_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_BIF_TRUST_MCU_VERTEX_DM_TRUSTED_EN 0x00000002U +#define ROGUE_CR_BIF_TRUST_PBE_VERTEX_DM_TRUSTED_SHIFT 0U +#define ROGUE_CR_BIF_TRUST_PBE_VERTEX_DM_TRUSTED_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_BIF_TRUST_PBE_VERTEX_DM_TRUSTED_EN 0x00000001U + +/* Register ROGUE_CR_SYS_BUS_SECURE */ +#define ROGUE_CR_SYS_BUS_SECURE 0xA100U +#define ROGUE_CR_SYS_BUS_SECURE__SECR__MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_SYS_BUS_SECURE_MASKFULL 0x0000000000000001ULL +#define ROGUE_CR_SYS_BUS_SECURE_ENABLE_SHIFT 0U +#define ROGUE_CR_SYS_BUS_SECURE_ENABLE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_SYS_BUS_SECURE_ENABLE_EN 0x00000001U + +/* Register ROGUE_CR_FBA_FC0_CHECKSUM */ +#define ROGUE_CR_FBA_FC0_CHECKSUM 0xD170U +#define ROGUE_CR_FBA_FC0_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_FBA_FC0_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_FBA_FC0_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_FBA_FC1_CHECKSUM */ +#define ROGUE_CR_FBA_FC1_CHECKSUM 0xD178U +#define ROGUE_CR_FBA_FC1_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_FBA_FC1_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_FBA_FC1_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_FBA_FC2_CHECKSUM */ +#define ROGUE_CR_FBA_FC2_CHECKSUM 0xD180U +#define ROGUE_CR_FBA_FC2_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_FBA_FC2_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_FBA_FC2_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_FBA_FC3_CHECKSUM */ +#define ROGUE_CR_FBA_FC3_CHECKSUM 0xD188U +#define ROGUE_CR_FBA_FC3_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_FBA_FC3_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_FBA_FC3_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_CLK_CTRL2 */ +#define ROGUE_CR_CLK_CTRL2 0xD200U +#define ROGUE_CR_CLK_CTRL2_MASKFULL 0x0000000000000F33ULL +#define ROGUE_CR_CLK_CTRL2_MCU_FBTC_SHIFT 10U +#define ROGUE_CR_CLK_CTRL2_MCU_FBTC_CLRMSK 0xFFFFFFFFFFFFF3FFULL +#define ROGUE_CR_CLK_CTRL2_MCU_FBTC_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL2_MCU_FBTC_ON 0x0000000000000400ULL +#define ROGUE_CR_CLK_CTRL2_MCU_FBTC_AUTO 0x0000000000000800ULL +#define ROGUE_CR_CLK_CTRL2_VRDM_SHIFT 8U +#define ROGUE_CR_CLK_CTRL2_VRDM_CLRMSK 0xFFFFFFFFFFFFFCFFULL +#define ROGUE_CR_CLK_CTRL2_VRDM_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL2_VRDM_ON 0x0000000000000100ULL +#define ROGUE_CR_CLK_CTRL2_VRDM_AUTO 0x0000000000000200ULL +#define ROGUE_CR_CLK_CTRL2_SH_SHIFT 4U +#define ROGUE_CR_CLK_CTRL2_SH_CLRMSK 0xFFFFFFFFFFFFFFCFULL +#define ROGUE_CR_CLK_CTRL2_SH_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL2_SH_ON 0x0000000000000010ULL +#define ROGUE_CR_CLK_CTRL2_SH_AUTO 0x0000000000000020ULL +#define ROGUE_CR_CLK_CTRL2_FBA_SHIFT 0U +#define ROGUE_CR_CLK_CTRL2_FBA_CLRMSK 0xFFFFFFFFFFFFFFFCULL +#define ROGUE_CR_CLK_CTRL2_FBA_OFF 0x0000000000000000ULL +#define ROGUE_CR_CLK_CTRL2_FBA_ON 0x0000000000000001ULL +#define ROGUE_CR_CLK_CTRL2_FBA_AUTO 0x0000000000000002ULL + +/* Register ROGUE_CR_CLK_STATUS2 */ +#define ROGUE_CR_CLK_STATUS2 0xD208U +#define ROGUE_CR_CLK_STATUS2_MASKFULL 0x0000000000000015ULL +#define ROGUE_CR_CLK_STATUS2_VRDM_SHIFT 4U +#define ROGUE_CR_CLK_STATUS2_VRDM_CLRMSK 0xFFFFFFFFFFFFFFEFULL +#define ROGUE_CR_CLK_STATUS2_VRDM_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS2_VRDM_RUNNING 0x0000000000000010ULL +#define ROGUE_CR_CLK_STATUS2_SH_SHIFT 2U +#define ROGUE_CR_CLK_STATUS2_SH_CLRMSK 0xFFFFFFFFFFFFFFFBULL +#define ROGUE_CR_CLK_STATUS2_SH_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS2_SH_RUNNING 0x0000000000000004ULL +#define ROGUE_CR_CLK_STATUS2_FBA_SHIFT 0U +#define ROGUE_CR_CLK_STATUS2_FBA_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_CLK_STATUS2_FBA_GATED 0x0000000000000000ULL +#define ROGUE_CR_CLK_STATUS2_FBA_RUNNING 0x0000000000000001ULL + +/* Register ROGUE_CR_RPM_SHF_FPL */ +#define ROGUE_CR_RPM_SHF_FPL 0xD520U +#define ROGUE_CR_RPM_SHF_FPL_MASKFULL 0x3FFFFFFFFFFFFFFCULL +#define ROGUE_CR_RPM_SHF_FPL_SIZE_SHIFT 40U +#define ROGUE_CR_RPM_SHF_FPL_SIZE_CLRMSK 0xC00000FFFFFFFFFFULL +#define ROGUE_CR_RPM_SHF_FPL_BASE_SHIFT 2U +#define ROGUE_CR_RPM_SHF_FPL_BASE_CLRMSK 0xFFFFFF0000000003ULL +#define ROGUE_CR_RPM_SHF_FPL_BASE_ALIGNSHIFT 2U +#define ROGUE_CR_RPM_SHF_FPL_BASE_ALIGNSIZE 4U + +/* Register ROGUE_CR_RPM_SHF_FPL_READ */ +#define ROGUE_CR_RPM_SHF_FPL_READ 0xD528U +#define ROGUE_CR_RPM_SHF_FPL_READ_MASKFULL 0x00000000007FFFFFULL +#define ROGUE_CR_RPM_SHF_FPL_READ_TOGGLE_SHIFT 22U +#define ROGUE_CR_RPM_SHF_FPL_READ_TOGGLE_CLRMSK 0xFFBFFFFFU +#define ROGUE_CR_RPM_SHF_FPL_READ_TOGGLE_EN 0x00400000U +#define ROGUE_CR_RPM_SHF_FPL_READ_OFFSET_SHIFT 0U +#define ROGUE_CR_RPM_SHF_FPL_READ_OFFSET_CLRMSK 0xFFC00000U + +/* Register ROGUE_CR_RPM_SHF_FPL_WRITE */ +#define ROGUE_CR_RPM_SHF_FPL_WRITE 0xD530U +#define ROGUE_CR_RPM_SHF_FPL_WRITE_MASKFULL 0x00000000007FFFFFULL +#define ROGUE_CR_RPM_SHF_FPL_WRITE_TOGGLE_SHIFT 22U +#define ROGUE_CR_RPM_SHF_FPL_WRITE_TOGGLE_CLRMSK 0xFFBFFFFFU +#define ROGUE_CR_RPM_SHF_FPL_WRITE_TOGGLE_EN 0x00400000U +#define ROGUE_CR_RPM_SHF_FPL_WRITE_OFFSET_SHIFT 0U +#define ROGUE_CR_RPM_SHF_FPL_WRITE_OFFSET_CLRMSK 0xFFC00000U + +/* Register ROGUE_CR_RPM_SHG_FPL */ +#define ROGUE_CR_RPM_SHG_FPL 0xD538U +#define ROGUE_CR_RPM_SHG_FPL_MASKFULL 0x3FFFFFFFFFFFFFFCULL +#define ROGUE_CR_RPM_SHG_FPL_SIZE_SHIFT 40U +#define ROGUE_CR_RPM_SHG_FPL_SIZE_CLRMSK 0xC00000FFFFFFFFFFULL +#define ROGUE_CR_RPM_SHG_FPL_BASE_SHIFT 2U +#define ROGUE_CR_RPM_SHG_FPL_BASE_CLRMSK 0xFFFFFF0000000003ULL +#define ROGUE_CR_RPM_SHG_FPL_BASE_ALIGNSHIFT 2U +#define ROGUE_CR_RPM_SHG_FPL_BASE_ALIGNSIZE 4U + +/* Register ROGUE_CR_RPM_SHG_FPL_READ */ +#define ROGUE_CR_RPM_SHG_FPL_READ 0xD540U +#define ROGUE_CR_RPM_SHG_FPL_READ_MASKFULL 0x00000000007FFFFFULL +#define ROGUE_CR_RPM_SHG_FPL_READ_TOGGLE_SHIFT 22U +#define ROGUE_CR_RPM_SHG_FPL_READ_TOGGLE_CLRMSK 0xFFBFFFFFU +#define ROGUE_CR_RPM_SHG_FPL_READ_TOGGLE_EN 0x00400000U +#define ROGUE_CR_RPM_SHG_FPL_READ_OFFSET_SHIFT 0U +#define ROGUE_CR_RPM_SHG_FPL_READ_OFFSET_CLRMSK 0xFFC00000U + +/* Register ROGUE_CR_RPM_SHG_FPL_WRITE */ +#define ROGUE_CR_RPM_SHG_FPL_WRITE 0xD548U +#define ROGUE_CR_RPM_SHG_FPL_WRITE_MASKFULL 0x00000000007FFFFFULL +#define ROGUE_CR_RPM_SHG_FPL_WRITE_TOGGLE_SHIFT 22U +#define ROGUE_CR_RPM_SHG_FPL_WRITE_TOGGLE_CLRMSK 0xFFBFFFFFU +#define ROGUE_CR_RPM_SHG_FPL_WRITE_TOGGLE_EN 0x00400000U +#define ROGUE_CR_RPM_SHG_FPL_WRITE_OFFSET_SHIFT 0U +#define ROGUE_CR_RPM_SHG_FPL_WRITE_OFFSET_CLRMSK 0xFFC00000U + +/* Register ROGUE_CR_SH_PERF */ +#define ROGUE_CR_SH_PERF 0xD5F8U +#define ROGUE_CR_SH_PERF_MASKFULL 0x000000000000001FULL +#define ROGUE_CR_SH_PERF_CLR_3_SHIFT 4U +#define ROGUE_CR_SH_PERF_CLR_3_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_SH_PERF_CLR_3_EN 0x00000010U +#define ROGUE_CR_SH_PERF_CLR_2_SHIFT 3U +#define ROGUE_CR_SH_PERF_CLR_2_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_SH_PERF_CLR_2_EN 0x00000008U +#define ROGUE_CR_SH_PERF_CLR_1_SHIFT 2U +#define ROGUE_CR_SH_PERF_CLR_1_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_SH_PERF_CLR_1_EN 0x00000004U +#define ROGUE_CR_SH_PERF_CLR_0_SHIFT 1U +#define ROGUE_CR_SH_PERF_CLR_0_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_SH_PERF_CLR_0_EN 0x00000002U +#define ROGUE_CR_SH_PERF_CTRL_ENABLE_SHIFT 0U +#define ROGUE_CR_SH_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_SH_PERF_CTRL_ENABLE_EN 0x00000001U + +/* Register ROGUE_CR_SH_PERF_SELECT0 */ +#define ROGUE_CR_SH_PERF_SELECT0 0xD600U +#define ROGUE_CR_SH_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL +#define ROGUE_CR_SH_PERF_SELECT0_BATCH_MAX_SHIFT 48U +#define ROGUE_CR_SH_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL +#define ROGUE_CR_SH_PERF_SELECT0_BATCH_MIN_SHIFT 32U +#define ROGUE_CR_SH_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL +#define ROGUE_CR_SH_PERF_SELECT0_MODE_SHIFT 21U +#define ROGUE_CR_SH_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL +#define ROGUE_CR_SH_PERF_SELECT0_MODE_EN 0x0000000000200000ULL +#define ROGUE_CR_SH_PERF_SELECT0_GROUP_SELECT_SHIFT 16U +#define ROGUE_CR_SH_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL +#define ROGUE_CR_SH_PERF_SELECT0_BIT_SELECT_SHIFT 0U +#define ROGUE_CR_SH_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register ROGUE_CR_SH_PERF_COUNTER_0 */ +#define ROGUE_CR_SH_PERF_COUNTER_0 0xD628U +#define ROGUE_CR_SH_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SH_PERF_COUNTER_0_REG_SHIFT 0U +#define ROGUE_CR_SH_PERF_COUNTER_0_REG_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SHF_SHG_CHECKSUM */ +#define ROGUE_CR_SHF_SHG_CHECKSUM 0xD1C0U +#define ROGUE_CR_SHF_SHG_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SHF_SHG_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_SHF_SHG_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SHF_VERTEX_BIF_CHECKSUM */ +#define ROGUE_CR_SHF_VERTEX_BIF_CHECKSUM 0xD1C8U +#define ROGUE_CR_SHF_VERTEX_BIF_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SHF_VERTEX_BIF_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_SHF_VERTEX_BIF_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SHF_VARY_BIF_CHECKSUM */ +#define ROGUE_CR_SHF_VARY_BIF_CHECKSUM 0xD1D0U +#define ROGUE_CR_SHF_VARY_BIF_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SHF_VARY_BIF_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_SHF_VARY_BIF_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_RPM_BIF_CHECKSUM */ +#define ROGUE_CR_RPM_BIF_CHECKSUM 0xD1D8U +#define ROGUE_CR_RPM_BIF_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_RPM_BIF_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_RPM_BIF_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SHG_BIF_CHECKSUM */ +#define ROGUE_CR_SHG_BIF_CHECKSUM 0xD1E0U +#define ROGUE_CR_SHG_BIF_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SHG_BIF_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_SHG_BIF_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register ROGUE_CR_SHG_FE_BE_CHECKSUM */ +#define ROGUE_CR_SHG_FE_BE_CHECKSUM 0xD1E8U +#define ROGUE_CR_SHG_FE_BE_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_SHG_FE_BE_CHECKSUM_VALUE_SHIFT 0U +#define ROGUE_CR_SHG_FE_BE_CHECKSUM_VALUE_CLRMSK 0x00000000U + +/* Register DPX_CR_BF_PERF */ +#define DPX_CR_BF_PERF 0xC458U +#define DPX_CR_BF_PERF_MASKFULL 0x000000000000001FULL +#define DPX_CR_BF_PERF_CLR_3_SHIFT 4U +#define DPX_CR_BF_PERF_CLR_3_CLRMSK 0xFFFFFFEFU +#define DPX_CR_BF_PERF_CLR_3_EN 0x00000010U +#define DPX_CR_BF_PERF_CLR_2_SHIFT 3U +#define DPX_CR_BF_PERF_CLR_2_CLRMSK 0xFFFFFFF7U +#define DPX_CR_BF_PERF_CLR_2_EN 0x00000008U +#define DPX_CR_BF_PERF_CLR_1_SHIFT 2U +#define DPX_CR_BF_PERF_CLR_1_CLRMSK 0xFFFFFFFBU +#define DPX_CR_BF_PERF_CLR_1_EN 0x00000004U +#define DPX_CR_BF_PERF_CLR_0_SHIFT 1U +#define DPX_CR_BF_PERF_CLR_0_CLRMSK 0xFFFFFFFDU +#define DPX_CR_BF_PERF_CLR_0_EN 0x00000002U +#define DPX_CR_BF_PERF_CTRL_ENABLE_SHIFT 0U +#define DPX_CR_BF_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU +#define DPX_CR_BF_PERF_CTRL_ENABLE_EN 0x00000001U + +/* Register DPX_CR_BF_PERF_SELECT0 */ +#define DPX_CR_BF_PERF_SELECT0 0xC460U +#define DPX_CR_BF_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL +#define DPX_CR_BF_PERF_SELECT0_BATCH_MAX_SHIFT 48U +#define DPX_CR_BF_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL +#define DPX_CR_BF_PERF_SELECT0_BATCH_MIN_SHIFT 32U +#define DPX_CR_BF_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL +#define DPX_CR_BF_PERF_SELECT0_MODE_SHIFT 21U +#define DPX_CR_BF_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL +#define DPX_CR_BF_PERF_SELECT0_MODE_EN 0x0000000000200000ULL +#define DPX_CR_BF_PERF_SELECT0_GROUP_SELECT_SHIFT 16U +#define DPX_CR_BF_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL +#define DPX_CR_BF_PERF_SELECT0_BIT_SELECT_SHIFT 0U +#define DPX_CR_BF_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register DPX_CR_BF_PERF_COUNTER_0 */ +#define DPX_CR_BF_PERF_COUNTER_0 0xC488U +#define DPX_CR_BF_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL +#define DPX_CR_BF_PERF_COUNTER_0_REG_SHIFT 0U +#define DPX_CR_BF_PERF_COUNTER_0_REG_CLRMSK 0x00000000U + +/* Register DPX_CR_BT_PERF */ +#define DPX_CR_BT_PERF 0xC3D0U +#define DPX_CR_BT_PERF_MASKFULL 0x000000000000001FULL +#define DPX_CR_BT_PERF_CLR_3_SHIFT 4U +#define DPX_CR_BT_PERF_CLR_3_CLRMSK 0xFFFFFFEFU +#define DPX_CR_BT_PERF_CLR_3_EN 0x00000010U +#define DPX_CR_BT_PERF_CLR_2_SHIFT 3U +#define DPX_CR_BT_PERF_CLR_2_CLRMSK 0xFFFFFFF7U +#define DPX_CR_BT_PERF_CLR_2_EN 0x00000008U +#define DPX_CR_BT_PERF_CLR_1_SHIFT 2U +#define DPX_CR_BT_PERF_CLR_1_CLRMSK 0xFFFFFFFBU +#define DPX_CR_BT_PERF_CLR_1_EN 0x00000004U +#define DPX_CR_BT_PERF_CLR_0_SHIFT 1U +#define DPX_CR_BT_PERF_CLR_0_CLRMSK 0xFFFFFFFDU +#define DPX_CR_BT_PERF_CLR_0_EN 0x00000002U +#define DPX_CR_BT_PERF_CTRL_ENABLE_SHIFT 0U +#define DPX_CR_BT_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU +#define DPX_CR_BT_PERF_CTRL_ENABLE_EN 0x00000001U + +/* Register DPX_CR_BT_PERF_SELECT0 */ +#define DPX_CR_BT_PERF_SELECT0 0xC3D8U +#define DPX_CR_BT_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL +#define DPX_CR_BT_PERF_SELECT0_BATCH_MAX_SHIFT 48U +#define DPX_CR_BT_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL +#define DPX_CR_BT_PERF_SELECT0_BATCH_MIN_SHIFT 32U +#define DPX_CR_BT_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL +#define DPX_CR_BT_PERF_SELECT0_MODE_SHIFT 21U +#define DPX_CR_BT_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL +#define DPX_CR_BT_PERF_SELECT0_MODE_EN 0x0000000000200000ULL +#define DPX_CR_BT_PERF_SELECT0_GROUP_SELECT_SHIFT 16U +#define DPX_CR_BT_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL +#define DPX_CR_BT_PERF_SELECT0_BIT_SELECT_SHIFT 0U +#define DPX_CR_BT_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register DPX_CR_BT_PERF_COUNTER_0 */ +#define DPX_CR_BT_PERF_COUNTER_0 0xC420U +#define DPX_CR_BT_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL +#define DPX_CR_BT_PERF_COUNTER_0_REG_SHIFT 0U +#define DPX_CR_BT_PERF_COUNTER_0_REG_CLRMSK 0x00000000U + +/* Register DPX_CR_RQ_USC_DEBUG */ +#define DPX_CR_RQ_USC_DEBUG 0xC110U +#define DPX_CR_RQ_USC_DEBUG_MASKFULL 0x00000000FFFFFFFFULL +#define DPX_CR_RQ_USC_DEBUG_CHECKSUM_SHIFT 0U +#define DPX_CR_RQ_USC_DEBUG_CHECKSUM_CLRMSK 0xFFFFFFFF00000000ULL + +/* Register DPX_CR_BIF_FAULT_BANK_MMU_STATUS */ +#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS 0xC5C8U +#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_MASKFULL 0x000000000000F775ULL +#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_CAT_BASE_SHIFT 12U +#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_CAT_BASE_CLRMSK 0xFFFF0FFFU +#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_PAGE_SIZE_SHIFT 8U +#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_PAGE_SIZE_CLRMSK 0xFFFFF8FFU +#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_DATA_TYPE_SHIFT 5U +#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_DATA_TYPE_CLRMSK 0xFFFFFF9FU +#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_FAULT_RO_SHIFT 4U +#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_FAULT_RO_CLRMSK 0xFFFFFFEFU +#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_FAULT_RO_EN 0x00000010U +#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_FAULT_PM_META_RO_SHIFT 2U +#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_FAULT_PM_META_RO_CLRMSK 0xFFFFFFFBU +#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_FAULT_PM_META_RO_EN 0x00000004U +#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_FAULT_SHIFT 0U +#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_FAULT_CLRMSK 0xFFFFFFFEU +#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_FAULT_EN 0x00000001U + +/* Register DPX_CR_BIF_FAULT_BANK_REQ_STATUS */ +#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS 0xC5D0U +#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_MASKFULL 0x03FFFFFFFFFFFFF0ULL +#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_RNW_SHIFT 57U +#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_RNW_CLRMSK 0xFDFFFFFFFFFFFFFFULL +#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_RNW_EN 0x0200000000000000ULL +#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_TAG_SB_SHIFT 44U +#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_TAG_SB_CLRMSK 0xFE000FFFFFFFFFFFULL +#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_TAG_ID_SHIFT 40U +#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_TAG_ID_CLRMSK 0xFFFFF0FFFFFFFFFFULL +#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_ADDRESS_SHIFT 4U +#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_ADDRESS_CLRMSK 0xFFFFFF000000000FULL +#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_ADDRESS_ALIGNSHIFT 4U +#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_ADDRESS_ALIGNSIZE 16U + +/* Register DPX_CR_BIF_MMU_STATUS */ +#define DPX_CR_BIF_MMU_STATUS 0xC5D8U +#define DPX_CR_BIF_MMU_STATUS_MASKFULL 0x000000000FFFFFF7ULL +#define DPX_CR_BIF_MMU_STATUS_PC_DATA_SHIFT 20U +#define DPX_CR_BIF_MMU_STATUS_PC_DATA_CLRMSK 0xF00FFFFFU +#define DPX_CR_BIF_MMU_STATUS_PD_DATA_SHIFT 12U +#define DPX_CR_BIF_MMU_STATUS_PD_DATA_CLRMSK 0xFFF00FFFU +#define DPX_CR_BIF_MMU_STATUS_PT_DATA_SHIFT 4U +#define DPX_CR_BIF_MMU_STATUS_PT_DATA_CLRMSK 0xFFFFF00FU +#define DPX_CR_BIF_MMU_STATUS_STALLED_SHIFT 2U +#define DPX_CR_BIF_MMU_STATUS_STALLED_CLRMSK 0xFFFFFFFBU +#define DPX_CR_BIF_MMU_STATUS_STALLED_EN 0x00000004U +#define DPX_CR_BIF_MMU_STATUS_PAUSED_SHIFT 1U +#define DPX_CR_BIF_MMU_STATUS_PAUSED_CLRMSK 0xFFFFFFFDU +#define DPX_CR_BIF_MMU_STATUS_PAUSED_EN 0x00000002U +#define DPX_CR_BIF_MMU_STATUS_BUSY_SHIFT 0U +#define DPX_CR_BIF_MMU_STATUS_BUSY_CLRMSK 0xFFFFFFFEU +#define DPX_CR_BIF_MMU_STATUS_BUSY_EN 0x00000001U + +/* Register DPX_CR_RT_PERF */ +#define DPX_CR_RT_PERF 0xC700U +#define DPX_CR_RT_PERF_MASKFULL 0x000000000000001FULL +#define DPX_CR_RT_PERF_CLR_3_SHIFT 4U +#define DPX_CR_RT_PERF_CLR_3_CLRMSK 0xFFFFFFEFU +#define DPX_CR_RT_PERF_CLR_3_EN 0x00000010U +#define DPX_CR_RT_PERF_CLR_2_SHIFT 3U +#define DPX_CR_RT_PERF_CLR_2_CLRMSK 0xFFFFFFF7U +#define DPX_CR_RT_PERF_CLR_2_EN 0x00000008U +#define DPX_CR_RT_PERF_CLR_1_SHIFT 2U +#define DPX_CR_RT_PERF_CLR_1_CLRMSK 0xFFFFFFFBU +#define DPX_CR_RT_PERF_CLR_1_EN 0x00000004U +#define DPX_CR_RT_PERF_CLR_0_SHIFT 1U +#define DPX_CR_RT_PERF_CLR_0_CLRMSK 0xFFFFFFFDU +#define DPX_CR_RT_PERF_CLR_0_EN 0x00000002U +#define DPX_CR_RT_PERF_CTRL_ENABLE_SHIFT 0U +#define DPX_CR_RT_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU +#define DPX_CR_RT_PERF_CTRL_ENABLE_EN 0x00000001U + +/* Register DPX_CR_RT_PERF_SELECT0 */ +#define DPX_CR_RT_PERF_SELECT0 0xC708U +#define DPX_CR_RT_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL +#define DPX_CR_RT_PERF_SELECT0_BATCH_MAX_SHIFT 48U +#define DPX_CR_RT_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL +#define DPX_CR_RT_PERF_SELECT0_BATCH_MIN_SHIFT 32U +#define DPX_CR_RT_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL +#define DPX_CR_RT_PERF_SELECT0_MODE_SHIFT 21U +#define DPX_CR_RT_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL +#define DPX_CR_RT_PERF_SELECT0_MODE_EN 0x0000000000200000ULL +#define DPX_CR_RT_PERF_SELECT0_GROUP_SELECT_SHIFT 16U +#define DPX_CR_RT_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL +#define DPX_CR_RT_PERF_SELECT0_BIT_SELECT_SHIFT 0U +#define DPX_CR_RT_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register DPX_CR_RT_PERF_COUNTER_0 */ +#define DPX_CR_RT_PERF_COUNTER_0 0xC730U +#define DPX_CR_RT_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL +#define DPX_CR_RT_PERF_COUNTER_0_REG_SHIFT 0U +#define DPX_CR_RT_PERF_COUNTER_0_REG_CLRMSK 0x00000000U + +/* Register DPX_CR_BX_TU_PERF */ +#define DPX_CR_BX_TU_PERF 0xC908U +#define DPX_CR_BX_TU_PERF_MASKFULL 0x000000000000001FULL +#define DPX_CR_BX_TU_PERF_CLR_3_SHIFT 4U +#define DPX_CR_BX_TU_PERF_CLR_3_CLRMSK 0xFFFFFFEFU +#define DPX_CR_BX_TU_PERF_CLR_3_EN 0x00000010U +#define DPX_CR_BX_TU_PERF_CLR_2_SHIFT 3U +#define DPX_CR_BX_TU_PERF_CLR_2_CLRMSK 0xFFFFFFF7U +#define DPX_CR_BX_TU_PERF_CLR_2_EN 0x00000008U +#define DPX_CR_BX_TU_PERF_CLR_1_SHIFT 2U +#define DPX_CR_BX_TU_PERF_CLR_1_CLRMSK 0xFFFFFFFBU +#define DPX_CR_BX_TU_PERF_CLR_1_EN 0x00000004U +#define DPX_CR_BX_TU_PERF_CLR_0_SHIFT 1U +#define DPX_CR_BX_TU_PERF_CLR_0_CLRMSK 0xFFFFFFFDU +#define DPX_CR_BX_TU_PERF_CLR_0_EN 0x00000002U +#define DPX_CR_BX_TU_PERF_CTRL_ENABLE_SHIFT 0U +#define DPX_CR_BX_TU_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU +#define DPX_CR_BX_TU_PERF_CTRL_ENABLE_EN 0x00000001U + +/* Register DPX_CR_BX_TU_PERF_SELECT0 */ +#define DPX_CR_BX_TU_PERF_SELECT0 0xC910U +#define DPX_CR_BX_TU_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL +#define DPX_CR_BX_TU_PERF_SELECT0_BATCH_MAX_SHIFT 48U +#define DPX_CR_BX_TU_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL +#define DPX_CR_BX_TU_PERF_SELECT0_BATCH_MIN_SHIFT 32U +#define DPX_CR_BX_TU_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL +#define DPX_CR_BX_TU_PERF_SELECT0_MODE_SHIFT 21U +#define DPX_CR_BX_TU_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL +#define DPX_CR_BX_TU_PERF_SELECT0_MODE_EN 0x0000000000200000ULL +#define DPX_CR_BX_TU_PERF_SELECT0_GROUP_SELECT_SHIFT 16U +#define DPX_CR_BX_TU_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL +#define DPX_CR_BX_TU_PERF_SELECT0_BIT_SELECT_SHIFT 0U +#define DPX_CR_BX_TU_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register DPX_CR_BX_TU_PERF_COUNTER_0 */ +#define DPX_CR_BX_TU_PERF_COUNTER_0 0xC938U +#define DPX_CR_BX_TU_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL +#define DPX_CR_BX_TU_PERF_COUNTER_0_REG_SHIFT 0U +#define DPX_CR_BX_TU_PERF_COUNTER_0_REG_CLRMSK 0x00000000U + +/* Register DPX_CR_RS_PDS_RR_CHECKSUM */ +#define DPX_CR_RS_PDS_RR_CHECKSUM 0xC0F0U +#define DPX_CR_RS_PDS_RR_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL +#define DPX_CR_RS_PDS_RR_CHECKSUM_VALUE_SHIFT 0U +#define DPX_CR_RS_PDS_RR_CHECKSUM_VALUE_CLRMSK 0xFFFFFFFF00000000ULL + +/* Register ROGUE_CR_MMU_CBASE_MAPPING_CONTEXT */ +#define ROGUE_CR_MMU_CBASE_MAPPING_CONTEXT 0xE140U +#define ROGUE_CR_MMU_CBASE_MAPPING_CONTEXT_MASKFULL 0x00000000000000FFULL +#define ROGUE_CR_MMU_CBASE_MAPPING_CONTEXT_ID_SHIFT 0U +#define ROGUE_CR_MMU_CBASE_MAPPING_CONTEXT_ID_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_MMU_CBASE_MAPPING */ +#define ROGUE_CR_MMU_CBASE_MAPPING 0xE148U +#define ROGUE_CR_MMU_CBASE_MAPPING_MASKFULL 0x000000000FFFFFFFULL +#define ROGUE_CR_MMU_CBASE_MAPPING_BASE_ADDR_SHIFT 0U +#define ROGUE_CR_MMU_CBASE_MAPPING_BASE_ADDR_CLRMSK 0xF0000000U +#define ROGUE_CR_MMU_CBASE_MAPPING_BASE_ADDR_ALIGNSHIFT 12U +#define ROGUE_CR_MMU_CBASE_MAPPING_BASE_ADDR_ALIGNSIZE 4096U + +/* Register ROGUE_CR_MMU_FAULT_STATUS */ +#define ROGUE_CR_MMU_FAULT_STATUS 0xE150U +#define ROGUE_CR_MMU_FAULT_STATUS_MASKFULL 0xFFFFFFFFFFFFFFFFULL +#define ROGUE_CR_MMU_FAULT_STATUS_ADDRESS_SHIFT 28U +#define ROGUE_CR_MMU_FAULT_STATUS_ADDRESS_CLRMSK 0x000000000FFFFFFFULL +#define ROGUE_CR_MMU_FAULT_STATUS_CONTEXT_SHIFT 20U +#define ROGUE_CR_MMU_FAULT_STATUS_CONTEXT_CLRMSK 0xFFFFFFFFF00FFFFFULL +#define ROGUE_CR_MMU_FAULT_STATUS_TAG_SB_SHIFT 12U +#define ROGUE_CR_MMU_FAULT_STATUS_TAG_SB_CLRMSK 0xFFFFFFFFFFF00FFFULL +#define ROGUE_CR_MMU_FAULT_STATUS_REQ_ID_SHIFT 6U +#define ROGUE_CR_MMU_FAULT_STATUS_REQ_ID_CLRMSK 0xFFFFFFFFFFFFF03FULL +#define ROGUE_CR_MMU_FAULT_STATUS_LEVEL_SHIFT 4U +#define ROGUE_CR_MMU_FAULT_STATUS_LEVEL_CLRMSK 0xFFFFFFFFFFFFFFCFULL +#define ROGUE_CR_MMU_FAULT_STATUS_RNW_SHIFT 3U +#define ROGUE_CR_MMU_FAULT_STATUS_RNW_CLRMSK 0xFFFFFFFFFFFFFFF7ULL +#define ROGUE_CR_MMU_FAULT_STATUS_RNW_EN 0x0000000000000008ULL +#define ROGUE_CR_MMU_FAULT_STATUS_TYPE_SHIFT 1U +#define ROGUE_CR_MMU_FAULT_STATUS_TYPE_CLRMSK 0xFFFFFFFFFFFFFFF9ULL +#define ROGUE_CR_MMU_FAULT_STATUS_FAULT_SHIFT 0U +#define ROGUE_CR_MMU_FAULT_STATUS_FAULT_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_MMU_FAULT_STATUS_FAULT_EN 0x0000000000000001ULL + +/* Register ROGUE_CR_MMU_FAULT_STATUS_META */ +#define ROGUE_CR_MMU_FAULT_STATUS_META 0xE158U +#define ROGUE_CR_MMU_FAULT_STATUS_META_MASKFULL 0xFFFFFFFFFFFFFFFFULL +#define ROGUE_CR_MMU_FAULT_STATUS_META_ADDRESS_SHIFT 28U +#define ROGUE_CR_MMU_FAULT_STATUS_META_ADDRESS_CLRMSK 0x000000000FFFFFFFULL +#define ROGUE_CR_MMU_FAULT_STATUS_META_CONTEXT_SHIFT 20U +#define ROGUE_CR_MMU_FAULT_STATUS_META_CONTEXT_CLRMSK 0xFFFFFFFFF00FFFFFULL +#define ROGUE_CR_MMU_FAULT_STATUS_META_TAG_SB_SHIFT 12U +#define ROGUE_CR_MMU_FAULT_STATUS_META_TAG_SB_CLRMSK 0xFFFFFFFFFFF00FFFULL +#define ROGUE_CR_MMU_FAULT_STATUS_META_REQ_ID_SHIFT 6U +#define ROGUE_CR_MMU_FAULT_STATUS_META_REQ_ID_CLRMSK 0xFFFFFFFFFFFFF03FULL +#define ROGUE_CR_MMU_FAULT_STATUS_META_LEVEL_SHIFT 4U +#define ROGUE_CR_MMU_FAULT_STATUS_META_LEVEL_CLRMSK 0xFFFFFFFFFFFFFFCFULL +#define ROGUE_CR_MMU_FAULT_STATUS_META_RNW_SHIFT 3U +#define ROGUE_CR_MMU_FAULT_STATUS_META_RNW_CLRMSK 0xFFFFFFFFFFFFFFF7ULL +#define ROGUE_CR_MMU_FAULT_STATUS_META_RNW_EN 0x0000000000000008ULL +#define ROGUE_CR_MMU_FAULT_STATUS_META_TYPE_SHIFT 1U +#define ROGUE_CR_MMU_FAULT_STATUS_META_TYPE_CLRMSK 0xFFFFFFFFFFFFFFF9ULL +#define ROGUE_CR_MMU_FAULT_STATUS_META_FAULT_SHIFT 0U +#define ROGUE_CR_MMU_FAULT_STATUS_META_FAULT_CLRMSK 0xFFFFFFFFFFFFFFFEULL +#define ROGUE_CR_MMU_FAULT_STATUS_META_FAULT_EN 0x0000000000000001ULL + +/* Register ROGUE_CR_SLC3_CTRL_MISC */ +#define ROGUE_CR_SLC3_CTRL_MISC 0xE200U +#define ROGUE_CR_SLC3_CTRL_MISC_MASKFULL 0x0000000000000107ULL +#define ROGUE_CR_SLC3_CTRL_MISC_WRITE_COMBINER_SHIFT 8U +#define ROGUE_CR_SLC3_CTRL_MISC_WRITE_COMBINER_CLRMSK 0xFFFFFEFFU +#define ROGUE_CR_SLC3_CTRL_MISC_WRITE_COMBINER_EN 0x00000100U +#define ROGUE_CR_SLC3_CTRL_MISC_ADDR_DECODE_MODE_SHIFT 0U +#define ROGUE_CR_SLC3_CTRL_MISC_ADDR_DECODE_MODE_CLRMSK 0xFFFFFFF8U +#define ROGUE_CR_SLC3_CTRL_MISC_ADDR_DECODE_MODE_LINEAR 0x00000000U +#define ROGUE_CR_SLC3_CTRL_MISC_ADDR_DECODE_MODE_IN_PAGE_HASH 0x00000001U +#define ROGUE_CR_SLC3_CTRL_MISC_ADDR_DECODE_MODE_FIXED_PVR_HASH 0x00000002U +#define ROGUE_CR_SLC3_CTRL_MISC_ADDR_DECODE_MODE_SCRAMBLE_PVR_HASH 0x00000003U +#define ROGUE_CR_SLC3_CTRL_MISC_ADDR_DECODE_MODE_WEAVED_HASH 0x00000004U + +/* Register ROGUE_CR_SLC3_SCRAMBLE */ +#define ROGUE_CR_SLC3_SCRAMBLE 0xE208U +#define ROGUE_CR_SLC3_SCRAMBLE_MASKFULL 0xFFFFFFFFFFFFFFFFULL +#define ROGUE_CR_SLC3_SCRAMBLE_BITS_SHIFT 0U +#define ROGUE_CR_SLC3_SCRAMBLE_BITS_CLRMSK 0x0000000000000000ULL + +/* Register ROGUE_CR_SLC3_SCRAMBLE2 */ +#define ROGUE_CR_SLC3_SCRAMBLE2 0xE210U +#define ROGUE_CR_SLC3_SCRAMBLE2_MASKFULL 0xFFFFFFFFFFFFFFFFULL +#define ROGUE_CR_SLC3_SCRAMBLE2_BITS_SHIFT 0U +#define ROGUE_CR_SLC3_SCRAMBLE2_BITS_CLRMSK 0x0000000000000000ULL + +/* Register ROGUE_CR_SLC3_SCRAMBLE3 */ +#define ROGUE_CR_SLC3_SCRAMBLE3 0xE218U +#define ROGUE_CR_SLC3_SCRAMBLE3_MASKFULL 0xFFFFFFFFFFFFFFFFULL +#define ROGUE_CR_SLC3_SCRAMBLE3_BITS_SHIFT 0U +#define ROGUE_CR_SLC3_SCRAMBLE3_BITS_CLRMSK 0x0000000000000000ULL + +/* Register ROGUE_CR_SLC3_SCRAMBLE4 */ +#define ROGUE_CR_SLC3_SCRAMBLE4 0xE260U +#define ROGUE_CR_SLC3_SCRAMBLE4_MASKFULL 0xFFFFFFFFFFFFFFFFULL +#define ROGUE_CR_SLC3_SCRAMBLE4_BITS_SHIFT 0U +#define ROGUE_CR_SLC3_SCRAMBLE4_BITS_CLRMSK 0x0000000000000000ULL + +/* Register ROGUE_CR_SLC3_STATUS */ +#define ROGUE_CR_SLC3_STATUS 0xE220U +#define ROGUE_CR_SLC3_STATUS_MASKFULL 0xFFFFFFFFFFFFFFFFULL +#define ROGUE_CR_SLC3_STATUS_WRITES1_SHIFT 48U +#define ROGUE_CR_SLC3_STATUS_WRITES1_CLRMSK 0x0000FFFFFFFFFFFFULL +#define ROGUE_CR_SLC3_STATUS_WRITES0_SHIFT 32U +#define ROGUE_CR_SLC3_STATUS_WRITES0_CLRMSK 0xFFFF0000FFFFFFFFULL +#define ROGUE_CR_SLC3_STATUS_READS1_SHIFT 16U +#define ROGUE_CR_SLC3_STATUS_READS1_CLRMSK 0xFFFFFFFF0000FFFFULL +#define ROGUE_CR_SLC3_STATUS_READS0_SHIFT 0U +#define ROGUE_CR_SLC3_STATUS_READS0_CLRMSK 0xFFFFFFFFFFFF0000ULL + +/* Register ROGUE_CR_SLC3_IDLE */ +#define ROGUE_CR_SLC3_IDLE 0xE228U +#define ROGUE_CR_SLC3_IDLE_MASKFULL 0x00000000000FFFFFULL +#define ROGUE_CR_SLC3_IDLE_ORDERQ_DUST2_SHIFT 18U +#define ROGUE_CR_SLC3_IDLE_ORDERQ_DUST2_CLRMSK 0xFFF3FFFFU +#define ROGUE_CR_SLC3_IDLE_MMU_SHIFT 17U +#define ROGUE_CR_SLC3_IDLE_MMU_CLRMSK 0xFFFDFFFFU +#define ROGUE_CR_SLC3_IDLE_MMU_EN 0x00020000U +#define ROGUE_CR_SLC3_IDLE_RDI_SHIFT 16U +#define ROGUE_CR_SLC3_IDLE_RDI_CLRMSK 0xFFFEFFFFU +#define ROGUE_CR_SLC3_IDLE_RDI_EN 0x00010000U +#define ROGUE_CR_SLC3_IDLE_IMGBV4_SHIFT 12U +#define ROGUE_CR_SLC3_IDLE_IMGBV4_CLRMSK 0xFFFF0FFFU +#define ROGUE_CR_SLC3_IDLE_CACHE_BANKS_SHIFT 4U +#define ROGUE_CR_SLC3_IDLE_CACHE_BANKS_CLRMSK 0xFFFFF00FU +#define ROGUE_CR_SLC3_IDLE_ORDERQ_DUST_SHIFT 2U +#define ROGUE_CR_SLC3_IDLE_ORDERQ_DUST_CLRMSK 0xFFFFFFF3U +#define ROGUE_CR_SLC3_IDLE_ORDERQ_JONES_SHIFT 1U +#define ROGUE_CR_SLC3_IDLE_ORDERQ_JONES_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_SLC3_IDLE_ORDERQ_JONES_EN 0x00000002U +#define ROGUE_CR_SLC3_IDLE_XBAR_SHIFT 0U +#define ROGUE_CR_SLC3_IDLE_XBAR_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_SLC3_IDLE_XBAR_EN 0x00000001U + +/* Register ROGUE_CR_SLC3_FAULT_STOP_STATUS */ +#define ROGUE_CR_SLC3_FAULT_STOP_STATUS 0xE248U +#define ROGUE_CR_SLC3_FAULT_STOP_STATUS_MASKFULL 0x0000000000001FFFULL +#define ROGUE_CR_SLC3_FAULT_STOP_STATUS_BIF_SHIFT 0U +#define ROGUE_CR_SLC3_FAULT_STOP_STATUS_BIF_CLRMSK 0xFFFFE000U + +/* Register ROGUE_CR_VDM_CONTEXT_STORE_MODE */ +#define ROGUE_CR_VDM_CONTEXT_STORE_MODE 0xF048U +#define ROGUE_CR_VDM_CONTEXT_STORE_MODE_MASKFULL 0x0000000000000003ULL +#define ROGUE_CR_VDM_CONTEXT_STORE_MODE_MODE_SHIFT 0U +#define ROGUE_CR_VDM_CONTEXT_STORE_MODE_MODE_CLRMSK 0xFFFFFFFCU +#define ROGUE_CR_VDM_CONTEXT_STORE_MODE_MODE_INDEX 0x00000000U +#define ROGUE_CR_VDM_CONTEXT_STORE_MODE_MODE_INSTANCE 0x00000001U +#define ROGUE_CR_VDM_CONTEXT_STORE_MODE_MODE_LIST 0x00000002U + +/* Register ROGUE_CR_CONTEXT_MAPPING0 */ +#define ROGUE_CR_CONTEXT_MAPPING0 0xF078U +#define ROGUE_CR_CONTEXT_MAPPING0_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_CONTEXT_MAPPING0_2D_SHIFT 24U +#define ROGUE_CR_CONTEXT_MAPPING0_2D_CLRMSK 0x00FFFFFFU +#define ROGUE_CR_CONTEXT_MAPPING0_CDM_SHIFT 16U +#define ROGUE_CR_CONTEXT_MAPPING0_CDM_CLRMSK 0xFF00FFFFU +#define ROGUE_CR_CONTEXT_MAPPING0_3D_SHIFT 8U +#define ROGUE_CR_CONTEXT_MAPPING0_3D_CLRMSK 0xFFFF00FFU +#define ROGUE_CR_CONTEXT_MAPPING0_TA_SHIFT 0U +#define ROGUE_CR_CONTEXT_MAPPING0_TA_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_CONTEXT_MAPPING1 */ +#define ROGUE_CR_CONTEXT_MAPPING1 0xF080U +#define ROGUE_CR_CONTEXT_MAPPING1_MASKFULL 0x000000000000FFFFULL +#define ROGUE_CR_CONTEXT_MAPPING1_HOST_SHIFT 8U +#define ROGUE_CR_CONTEXT_MAPPING1_HOST_CLRMSK 0xFFFF00FFU +#define ROGUE_CR_CONTEXT_MAPPING1_TLA_SHIFT 0U +#define ROGUE_CR_CONTEXT_MAPPING1_TLA_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_CONTEXT_MAPPING2 */ +#define ROGUE_CR_CONTEXT_MAPPING2 0xF088U +#define ROGUE_CR_CONTEXT_MAPPING2_MASKFULL 0x0000000000FFFFFFULL +#define ROGUE_CR_CONTEXT_MAPPING2_ALIST0_SHIFT 16U +#define ROGUE_CR_CONTEXT_MAPPING2_ALIST0_CLRMSK 0xFF00FFFFU +#define ROGUE_CR_CONTEXT_MAPPING2_TE0_SHIFT 8U +#define ROGUE_CR_CONTEXT_MAPPING2_TE0_CLRMSK 0xFFFF00FFU +#define ROGUE_CR_CONTEXT_MAPPING2_VCE0_SHIFT 0U +#define ROGUE_CR_CONTEXT_MAPPING2_VCE0_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_CONTEXT_MAPPING3 */ +#define ROGUE_CR_CONTEXT_MAPPING3 0xF090U +#define ROGUE_CR_CONTEXT_MAPPING3_MASKFULL 0x0000000000FFFFFFULL +#define ROGUE_CR_CONTEXT_MAPPING3_ALIST1_SHIFT 16U +#define ROGUE_CR_CONTEXT_MAPPING3_ALIST1_CLRMSK 0xFF00FFFFU +#define ROGUE_CR_CONTEXT_MAPPING3_TE1_SHIFT 8U +#define ROGUE_CR_CONTEXT_MAPPING3_TE1_CLRMSK 0xFFFF00FFU +#define ROGUE_CR_CONTEXT_MAPPING3_VCE1_SHIFT 0U +#define ROGUE_CR_CONTEXT_MAPPING3_VCE1_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_BIF_JONES_OUTSTANDING_READ */ +#define ROGUE_CR_BIF_JONES_OUTSTANDING_READ 0xF098U +#define ROGUE_CR_BIF_JONES_OUTSTANDING_READ_MASKFULL 0x000000000000FFFFULL +#define ROGUE_CR_BIF_JONES_OUTSTANDING_READ_COUNTER_SHIFT 0U +#define ROGUE_CR_BIF_JONES_OUTSTANDING_READ_COUNTER_CLRMSK 0xFFFF0000U + +/* Register ROGUE_CR_BIF_BLACKPEARL_OUTSTANDING_READ */ +#define ROGUE_CR_BIF_BLACKPEARL_OUTSTANDING_READ 0xF0A0U +#define ROGUE_CR_BIF_BLACKPEARL_OUTSTANDING_READ_MASKFULL 0x000000000000FFFFULL +#define ROGUE_CR_BIF_BLACKPEARL_OUTSTANDING_READ_COUNTER_SHIFT 0U +#define ROGUE_CR_BIF_BLACKPEARL_OUTSTANDING_READ_COUNTER_CLRMSK 0xFFFF0000U + +/* Register ROGUE_CR_BIF_DUST_OUTSTANDING_READ */ +#define ROGUE_CR_BIF_DUST_OUTSTANDING_READ 0xF0A8U +#define ROGUE_CR_BIF_DUST_OUTSTANDING_READ_MASKFULL 0x000000000000FFFFULL +#define ROGUE_CR_BIF_DUST_OUTSTANDING_READ_COUNTER_SHIFT 0U +#define ROGUE_CR_BIF_DUST_OUTSTANDING_READ_COUNTER_CLRMSK 0xFFFF0000U + +/* Register ROGUE_CR_CONTEXT_MAPPING4 */ +#define ROGUE_CR_CONTEXT_MAPPING4 0xF210U +#define ROGUE_CR_CONTEXT_MAPPING4_MASKFULL 0x0000FFFFFFFFFFFFULL +#define ROGUE_CR_CONTEXT_MAPPING4_3D_MMU_STACK_SHIFT 40U +#define ROGUE_CR_CONTEXT_MAPPING4_3D_MMU_STACK_CLRMSK 0xFFFF00FFFFFFFFFFULL +#define ROGUE_CR_CONTEXT_MAPPING4_3D_UFSTACK_SHIFT 32U +#define ROGUE_CR_CONTEXT_MAPPING4_3D_UFSTACK_CLRMSK 0xFFFFFF00FFFFFFFFULL +#define ROGUE_CR_CONTEXT_MAPPING4_3D_FSTACK_SHIFT 24U +#define ROGUE_CR_CONTEXT_MAPPING4_3D_FSTACK_CLRMSK 0xFFFFFFFF00FFFFFFULL +#define ROGUE_CR_CONTEXT_MAPPING4_TA_MMU_STACK_SHIFT 16U +#define ROGUE_CR_CONTEXT_MAPPING4_TA_MMU_STACK_CLRMSK 0xFFFFFFFFFF00FFFFULL +#define ROGUE_CR_CONTEXT_MAPPING4_TA_UFSTACK_SHIFT 8U +#define ROGUE_CR_CONTEXT_MAPPING4_TA_UFSTACK_CLRMSK 0xFFFFFFFFFFFF00FFULL +#define ROGUE_CR_CONTEXT_MAPPING4_TA_FSTACK_SHIFT 0U +#define ROGUE_CR_CONTEXT_MAPPING4_TA_FSTACK_CLRMSK 0xFFFFFFFFFFFFFF00ULL + +/* Register ROGUE_CR_MULTICORE_GPU */ +#define ROGUE_CR_MULTICORE_GPU 0xF300U +#define ROGUE_CR_MULTICORE_GPU_MASKFULL 0x000000000000007FULL +#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_FRAGMENT_SHIFT 6U +#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_FRAGMENT_CLRMSK 0xFFFFFFBFU +#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_FRAGMENT_EN 0x00000040U +#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_GEOMETRY_SHIFT 5U +#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_GEOMETRY_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_GEOMETRY_EN 0x00000020U +#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_COMPUTE_SHIFT 4U +#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_COMPUTE_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_COMPUTE_EN 0x00000010U +#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_PRIMARY_SHIFT 3U +#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_PRIMARY_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_PRIMARY_EN 0x00000008U +#define ROGUE_CR_MULTICORE_GPU_ID_SHIFT 0U +#define ROGUE_CR_MULTICORE_GPU_ID_CLRMSK 0xFFFFFFF8U + +/* Register ROGUE_CR_MULTICORE_SYSTEM */ +#define ROGUE_CR_MULTICORE_SYSTEM 0xF308U +#define ROGUE_CR_MULTICORE_SYSTEM_MASKFULL 0x000000000000000FULL +#define ROGUE_CR_MULTICORE_SYSTEM_GPU_COUNT_SHIFT 0U +#define ROGUE_CR_MULTICORE_SYSTEM_GPU_COUNT_CLRMSK 0xFFFFFFF0U + +/* Register ROGUE_CR_MULTICORE_FRAGMENT_CTRL_COMMON */ +#define ROGUE_CR_MULTICORE_FRAGMENT_CTRL_COMMON 0xF310U +#define ROGUE_CR_MULTICORE_FRAGMENT_CTRL_COMMON_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_MULTICORE_FRAGMENT_CTRL_COMMON_WORKLOAD_TYPE_SHIFT 30U +#define ROGUE_CR_MULTICORE_FRAGMENT_CTRL_COMMON_WORKLOAD_TYPE_CLRMSK 0x3FFFFFFFU +#define ROGUE_CR_MULTICORE_FRAGMENT_CTRL_COMMON_WORKLOAD_EXECUTE_COUNT_SHIFT 8U +#define ROGUE_CR_MULTICORE_FRAGMENT_CTRL_COMMON_WORKLOAD_EXECUTE_COUNT_CLRMSK 0xC00000FFU +#define ROGUE_CR_MULTICORE_FRAGMENT_CTRL_COMMON_GPU_ENABLE_SHIFT 0U +#define ROGUE_CR_MULTICORE_FRAGMENT_CTRL_COMMON_GPU_ENABLE_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_MULTICORE_GEOMETRY_CTRL_COMMON */ +#define ROGUE_CR_MULTICORE_GEOMETRY_CTRL_COMMON 0xF320U +#define ROGUE_CR_MULTICORE_GEOMETRY_CTRL_COMMON_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_MULTICORE_GEOMETRY_CTRL_COMMON_WORKLOAD_TYPE_SHIFT 30U +#define ROGUE_CR_MULTICORE_GEOMETRY_CTRL_COMMON_WORKLOAD_TYPE_CLRMSK 0x3FFFFFFFU +#define ROGUE_CR_MULTICORE_GEOMETRY_CTRL_COMMON_WORKLOAD_EXECUTE_COUNT_SHIFT 8U +#define ROGUE_CR_MULTICORE_GEOMETRY_CTRL_COMMON_WORKLOAD_EXECUTE_COUNT_CLRMSK 0xC00000FFU +#define ROGUE_CR_MULTICORE_GEOMETRY_CTRL_COMMON_GPU_ENABLE_SHIFT 0U +#define ROGUE_CR_MULTICORE_GEOMETRY_CTRL_COMMON_GPU_ENABLE_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_MULTICORE_COMPUTE_CTRL_COMMON */ +#define ROGUE_CR_MULTICORE_COMPUTE_CTRL_COMMON 0xF330U +#define ROGUE_CR_MULTICORE_COMPUTE_CTRL_COMMON_MASKFULL 0x00000000FFFFFFFFULL +#define ROGUE_CR_MULTICORE_COMPUTE_CTRL_COMMON_WORKLOAD_TYPE_SHIFT 30U +#define ROGUE_CR_MULTICORE_COMPUTE_CTRL_COMMON_WORKLOAD_TYPE_CLRMSK 0x3FFFFFFFU +#define ROGUE_CR_MULTICORE_COMPUTE_CTRL_COMMON_WORKLOAD_EXECUTE_COUNT_SHIFT 8U +#define ROGUE_CR_MULTICORE_COMPUTE_CTRL_COMMON_WORKLOAD_EXECUTE_COUNT_CLRMSK 0xC00000FFU +#define ROGUE_CR_MULTICORE_COMPUTE_CTRL_COMMON_GPU_ENABLE_SHIFT 0U +#define ROGUE_CR_MULTICORE_COMPUTE_CTRL_COMMON_GPU_ENABLE_CLRMSK 0xFFFFFF00U + +/* Register ROGUE_CR_ECC_RAM_ERR_INJ */ +#define ROGUE_CR_ECC_RAM_ERR_INJ 0xF340U +#define ROGUE_CR_ECC_RAM_ERR_INJ_MASKFULL 0x000000000000001FULL +#define ROGUE_CR_ECC_RAM_ERR_INJ_SLC_SIDEKICK_SHIFT 4U +#define ROGUE_CR_ECC_RAM_ERR_INJ_SLC_SIDEKICK_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_ECC_RAM_ERR_INJ_SLC_SIDEKICK_EN 0x00000010U +#define ROGUE_CR_ECC_RAM_ERR_INJ_USC_SHIFT 3U +#define ROGUE_CR_ECC_RAM_ERR_INJ_USC_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_ECC_RAM_ERR_INJ_USC_EN 0x00000008U +#define ROGUE_CR_ECC_RAM_ERR_INJ_TPU_MCU_L0_SHIFT 2U +#define ROGUE_CR_ECC_RAM_ERR_INJ_TPU_MCU_L0_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_ECC_RAM_ERR_INJ_TPU_MCU_L0_EN 0x00000004U +#define ROGUE_CR_ECC_RAM_ERR_INJ_RASCAL_SHIFT 1U +#define ROGUE_CR_ECC_RAM_ERR_INJ_RASCAL_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_ECC_RAM_ERR_INJ_RASCAL_EN 0x00000002U +#define ROGUE_CR_ECC_RAM_ERR_INJ_MARS_SHIFT 0U +#define ROGUE_CR_ECC_RAM_ERR_INJ_MARS_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_ECC_RAM_ERR_INJ_MARS_EN 0x00000001U + +/* Register ROGUE_CR_ECC_RAM_INIT_KICK */ +#define ROGUE_CR_ECC_RAM_INIT_KICK 0xF348U +#define ROGUE_CR_ECC_RAM_INIT_KICK_MASKFULL 0x000000000000001FULL +#define ROGUE_CR_ECC_RAM_INIT_KICK_SLC_SIDEKICK_SHIFT 4U +#define ROGUE_CR_ECC_RAM_INIT_KICK_SLC_SIDEKICK_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_ECC_RAM_INIT_KICK_SLC_SIDEKICK_EN 0x00000010U +#define ROGUE_CR_ECC_RAM_INIT_KICK_USC_SHIFT 3U +#define ROGUE_CR_ECC_RAM_INIT_KICK_USC_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_ECC_RAM_INIT_KICK_USC_EN 0x00000008U +#define ROGUE_CR_ECC_RAM_INIT_KICK_TPU_MCU_L0_SHIFT 2U +#define ROGUE_CR_ECC_RAM_INIT_KICK_TPU_MCU_L0_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_ECC_RAM_INIT_KICK_TPU_MCU_L0_EN 0x00000004U +#define ROGUE_CR_ECC_RAM_INIT_KICK_RASCAL_SHIFT 1U +#define ROGUE_CR_ECC_RAM_INIT_KICK_RASCAL_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_ECC_RAM_INIT_KICK_RASCAL_EN 0x00000002U +#define ROGUE_CR_ECC_RAM_INIT_KICK_MARS_SHIFT 0U +#define ROGUE_CR_ECC_RAM_INIT_KICK_MARS_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_ECC_RAM_INIT_KICK_MARS_EN 0x00000001U + +/* Register ROGUE_CR_ECC_RAM_INIT_DONE */ +#define ROGUE_CR_ECC_RAM_INIT_DONE 0xF350U +#define ROGUE_CR_ECC_RAM_INIT_DONE_MASKFULL 0x000000000000001FULL +#define ROGUE_CR_ECC_RAM_INIT_DONE_SLC_SIDEKICK_SHIFT 4U +#define ROGUE_CR_ECC_RAM_INIT_DONE_SLC_SIDEKICK_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_ECC_RAM_INIT_DONE_SLC_SIDEKICK_EN 0x00000010U +#define ROGUE_CR_ECC_RAM_INIT_DONE_USC_SHIFT 3U +#define ROGUE_CR_ECC_RAM_INIT_DONE_USC_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_ECC_RAM_INIT_DONE_USC_EN 0x00000008U +#define ROGUE_CR_ECC_RAM_INIT_DONE_TPU_MCU_L0_SHIFT 2U +#define ROGUE_CR_ECC_RAM_INIT_DONE_TPU_MCU_L0_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_ECC_RAM_INIT_DONE_TPU_MCU_L0_EN 0x00000004U +#define ROGUE_CR_ECC_RAM_INIT_DONE_RASCAL_SHIFT 1U +#define ROGUE_CR_ECC_RAM_INIT_DONE_RASCAL_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_ECC_RAM_INIT_DONE_RASCAL_EN 0x00000002U +#define ROGUE_CR_ECC_RAM_INIT_DONE_MARS_SHIFT 0U +#define ROGUE_CR_ECC_RAM_INIT_DONE_MARS_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_ECC_RAM_INIT_DONE_MARS_EN 0x00000001U + +/* Register ROGUE_CR_SAFETY_EVENT_ENABLE */ +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE 0xF390U +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__MASKFULL 0x000000000000007FULL +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__CPU_PAGE_FAULT_SHIFT 6U +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__CPU_PAGE_FAULT_CLRMSK 0xFFFFFFBFU +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__CPU_PAGE_FAULT_EN 0x00000040U +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__SAFE_COMPUTE_FAIL_SHIFT 5U +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__SAFE_COMPUTE_FAIL_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__SAFE_COMPUTE_FAIL_EN 0x00000020U +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__WATCHDOG_TIMEOUT_SHIFT 4U +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__WATCHDOG_TIMEOUT_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__WATCHDOG_TIMEOUT_EN 0x00000010U +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__TRP_FAIL_SHIFT 3U +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__TRP_FAIL_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__TRP_FAIL_EN 0x00000008U +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_FW_SHIFT 2U +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_FW_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_FW_EN 0x00000004U +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_GPU_SHIFT 1U +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_GPU_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_GPU_EN 0x00000002U +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__GPU_PAGE_FAULT_SHIFT 0U +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__GPU_PAGE_FAULT_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__GPU_PAGE_FAULT_EN 0x00000001U + +/* Register ROGUE_CR_SAFETY_EVENT_STATUS */ +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE 0xF398U +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__MASKFULL 0x000000000000007FULL +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__CPU_PAGE_FAULT_SHIFT 6U +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__CPU_PAGE_FAULT_CLRMSK 0xFFFFFFBFU +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__CPU_PAGE_FAULT_EN 0x00000040U +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__SAFE_COMPUTE_FAIL_SHIFT 5U +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__SAFE_COMPUTE_FAIL_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__SAFE_COMPUTE_FAIL_EN 0x00000020U +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__WATCHDOG_TIMEOUT_SHIFT 4U +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__WATCHDOG_TIMEOUT_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__WATCHDOG_TIMEOUT_EN 0x00000010U +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__TRP_FAIL_SHIFT 3U +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__TRP_FAIL_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__TRP_FAIL_EN 0x00000008U +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__FAULT_FW_SHIFT 2U +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__FAULT_FW_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__FAULT_FW_EN 0x00000004U +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__FAULT_GPU_SHIFT 1U +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__FAULT_GPU_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__FAULT_GPU_EN 0x00000002U +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__GPU_PAGE_FAULT_SHIFT 0U +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__GPU_PAGE_FAULT_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__GPU_PAGE_FAULT_EN 0x00000001U + +/* Register ROGUE_CR_SAFETY_EVENT_CLEAR */ +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE 0xF3A0U +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__MASKFULL 0x000000000000007FULL +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__CPU_PAGE_FAULT_SHIFT 6U +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__CPU_PAGE_FAULT_CLRMSK 0xFFFFFFBFU +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__CPU_PAGE_FAULT_EN 0x00000040U +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__SAFE_COMPUTE_FAIL_SHIFT 5U +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__SAFE_COMPUTE_FAIL_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__SAFE_COMPUTE_FAIL_EN 0x00000020U +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__WATCHDOG_TIMEOUT_SHIFT 4U +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__WATCHDOG_TIMEOUT_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__WATCHDOG_TIMEOUT_EN 0x00000010U +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__TRP_FAIL_SHIFT 3U +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__TRP_FAIL_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__TRP_FAIL_EN 0x00000008U +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__FAULT_FW_SHIFT 2U +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__FAULT_FW_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__FAULT_FW_EN 0x00000004U +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__FAULT_GPU_SHIFT 1U +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__FAULT_GPU_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__FAULT_GPU_EN 0x00000002U +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__GPU_PAGE_FAULT_SHIFT 0U +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__GPU_PAGE_FAULT_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__GPU_PAGE_FAULT_EN 0x00000001U + +/* Register ROGUE_CR_MTS_SAFETY_EVENT_ENABLE */ +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE 0xF3D8U +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__MASKFULL 0x000000000000007FULL +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__CPU_PAGE_FAULT_SHIFT 6U +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__CPU_PAGE_FAULT_CLRMSK 0xFFFFFFBFU +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__CPU_PAGE_FAULT_EN 0x00000040U +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__SAFE_COMPUTE_FAIL_SHIFT 5U +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__SAFE_COMPUTE_FAIL_CLRMSK 0xFFFFFFDFU +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__SAFE_COMPUTE_FAIL_EN 0x00000020U +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__WATCHDOG_TIMEOUT_SHIFT 4U +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__WATCHDOG_TIMEOUT_CLRMSK 0xFFFFFFEFU +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__WATCHDOG_TIMEOUT_EN 0x00000010U +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__TRP_FAIL_SHIFT 3U +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__TRP_FAIL_CLRMSK 0xFFFFFFF7U +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__TRP_FAIL_EN 0x00000008U +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_FW_SHIFT 2U +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_FW_CLRMSK 0xFFFFFFFBU +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_FW_EN 0x00000004U +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_GPU_SHIFT 1U +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_GPU_CLRMSK 0xFFFFFFFDU +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_GPU_EN 0x00000002U +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__GPU_PAGE_FAULT_SHIFT 0U +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__GPU_PAGE_FAULT_CLRMSK 0xFFFFFFFEU +#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__GPU_PAGE_FAULT_EN 0x00000001U + +/* clang-format on */ + +#endif /* PVR_ROGUE_CR_DEFS_H */ diff --git a/drivers/gpu/drm/imagination/pvr_rogue_cr_defs_client.h b/drivers/gpu/drm/imagination/pvr_rogue_cr_defs_client.h new file mode 100644 index 000000000000..46186b56effc --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_rogue_cr_defs_client.h @@ -0,0 +1,159 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_ROGUE_CR_DEFS_CLIENT_H +#define PVR_ROGUE_CR_DEFS_CLIENT_H + +/* clang-format off */ + +/* + * This register controls the anti-aliasing mode of the Tiling Co-Processor, independent control is + * provided in both X & Y axis. + * This register needs to be set based on the ISP Samples Per Pixel a core supports. + * + * When ISP Samples Per Pixel = 1: + * 2xmsaa is achieved by enabling Y - TE does AA on Y plane only + * 4xmsaa is achieved by enabling Y and X - TE does AA on X and Y plane + * 8xmsaa not supported by XE cores + * + * When ISP Samples Per Pixel = 2: + * 2xmsaa is achieved by enabling X2 - does not affect TE + * 4xmsaa is achieved by enabling Y and X2 - TE does AA on Y plane only + * 8xmsaa is achieved by enabling Y, X and X2 - TE does AA on X and Y plane + * 8xmsaa not supported by XE cores + * + * When ISP Samples Per Pixel = 4: + * 2xmsaa is achieved by enabling X2 - does not affect TE + * 4xmsaa is achieved by enabling Y2 and X2 - TE does AA on Y plane only + * 8xmsaa not supported by XE cores + */ +/* Register ROGUE_CR_TE_AA */ +#define ROGUE_CR_TE_AA 0x0C00U +#define ROGUE_CR_TE_AA_MASKFULL 0x000000000000000Full +/* Y2 + * Indicates 4xmsaa when X2 and Y2 are set to 1. This does not affect TE and is only used within + * TPW. + */ +#define ROGUE_CR_TE_AA_Y2_SHIFT 3 +#define ROGUE_CR_TE_AA_Y2_CLRMSK 0xFFFFFFF7 +#define ROGUE_CR_TE_AA_Y2_EN 0x00000008 +/* Y + * Anti-Aliasing in Y Plane Enabled + */ +#define ROGUE_CR_TE_AA_Y_SHIFT 2 +#define ROGUE_CR_TE_AA_Y_CLRMSK 0xFFFFFFFB +#define ROGUE_CR_TE_AA_Y_EN 0x00000004 +/* X + * Anti-Aliasing in X Plane Enabled + */ +#define ROGUE_CR_TE_AA_X_SHIFT 1 +#define ROGUE_CR_TE_AA_X_CLRMSK 0xFFFFFFFD +#define ROGUE_CR_TE_AA_X_EN 0x00000002 +/* X2 + * 2x Anti-Aliasing Enabled, affects PPP only + */ +#define ROGUE_CR_TE_AA_X2_SHIFT (0U) +#define ROGUE_CR_TE_AA_X2_CLRMSK (0xFFFFFFFEU) +#define ROGUE_CR_TE_AA_X2_EN (0x00000001U) + +/* MacroTile Boundaries X Plane */ +/* Register ROGUE_CR_TE_MTILE1 */ +#define ROGUE_CR_TE_MTILE1 0x0C08 +#define ROGUE_CR_TE_MTILE1_MASKFULL 0x0000000007FFFFFFull +/* X1 default: 0x00000004 + * X1 MacroTile boundary, left tile X for second column of macrotiles (16MT mode) - 32 pixels across + * tile + */ +#define ROGUE_CR_TE_MTILE1_X1_SHIFT 18 +#define ROGUE_CR_TE_MTILE1_X1_CLRMSK 0xF803FFFF +/* X2 default: 0x00000008 + * X2 MacroTile boundary, left tile X for third(16MT) column of macrotiles - 32 pixels across tile + */ +#define ROGUE_CR_TE_MTILE1_X2_SHIFT 9U +#define ROGUE_CR_TE_MTILE1_X2_CLRMSK 0xFFFC01FF +/* X3 default: 0x0000000c + * X3 MacroTile boundary, left tile X for fourth column of macrotiles (16MT) - 32 pixels across tile + */ +#define ROGUE_CR_TE_MTILE1_X3_SHIFT 0 +#define ROGUE_CR_TE_MTILE1_X3_CLRMSK 0xFFFFFE00 + +/* MacroTile Boundaries Y Plane. */ +/* Register ROGUE_CR_TE_MTILE2 */ +#define ROGUE_CR_TE_MTILE2 0x0C10 +#define ROGUE_CR_TE_MTILE2_MASKFULL 0x0000000007FFFFFFull +/* Y1 default: 0x00000004 + * X1 MacroTile boundary, ltop tile Y for second column of macrotiles (16MT mode) - 32 pixels tile + * height + */ +#define ROGUE_CR_TE_MTILE2_Y1_SHIFT 18 +#define ROGUE_CR_TE_MTILE2_Y1_CLRMSK 0xF803FFFF +/* Y2 default: 0x00000008 + * X2 MacroTile boundary, top tile Y for third(16MT) column of macrotiles - 32 pixels tile height + */ +#define ROGUE_CR_TE_MTILE2_Y2_SHIFT 9 +#define ROGUE_CR_TE_MTILE2_Y2_CLRMSK 0xFFFC01FF +/* Y3 default: 0x0000000c + * X3 MacroTile boundary, top tile Y for fourth column of macrotiles (16MT) - 32 pixels tile height + */ +#define ROGUE_CR_TE_MTILE2_Y3_SHIFT 0 +#define ROGUE_CR_TE_MTILE2_Y3_CLRMSK 0xFFFFFE00 + +/* + * In order to perform the tiling operation and generate the display list the maximum screen size + * must be configured in terms of the number of tiles in X & Y axis. + */ + +/* Register ROGUE_CR_TE_SCREEN */ +#define ROGUE_CR_TE_SCREEN 0x0C18U +#define ROGUE_CR_TE_SCREEN_MASKFULL 0x00000000001FF1FFull +/* YMAX default: 0x00000010 + * Maximum Y tile address visible on screen, 32 pixel tile height, 16Kx16K max screen size + */ +#define ROGUE_CR_TE_SCREEN_YMAX_SHIFT 12 +#define ROGUE_CR_TE_SCREEN_YMAX_CLRMSK 0xFFE00FFF +/* XMAX default: 0x00000010 + * Maximum X tile address visible on screen, 32 pixel tile width, 16Kx16K max screen size + */ +#define ROGUE_CR_TE_SCREEN_XMAX_SHIFT 0 +#define ROGUE_CR_TE_SCREEN_XMAX_CLRMSK 0xFFFFFE00 + +/* + * In order to perform the tiling operation and generate the display list the maximum screen size + * must be configured in terms of the number of pixels in X & Y axis since this may not be the same + * as the number of tiles defined in the RGX_CR_TE_SCREEN register. + */ +/* Register ROGUE_CR_PPP_SCREEN */ +#define ROGUE_CR_PPP_SCREEN 0x0C98 +#define ROGUE_CR_PPP_SCREEN_MASKFULL 0x000000007FFF7FFFull +/* PIXYMAX + * Screen height in pixels. (16K x 16K max screen size) + */ +#define ROGUE_CR_PPP_SCREEN_PIXYMAX_SHIFT 16 +#define ROGUE_CR_PPP_SCREEN_PIXYMAX_CLRMSK 0x8000FFFF +/* PIXXMAX + * Screen width in pixels.(16K x 16K max screen size) + */ +#define ROGUE_CR_PPP_SCREEN_PIXXMAX_SHIFT 0 +#define ROGUE_CR_PPP_SCREEN_PIXXMAX_CLRMSK 0xFFFF8000 + +/* Register ROGUE_CR_ISP_MTILE_SIZE */ +#define ROGUE_CR_ISP_MTILE_SIZE 0x0F18 +#define ROGUE_CR_ISP_MTILE_SIZE_MASKFULL 0x0000000003FF03FFull +/* X + * Macrotile width, in tiles. A value of zero corresponds to the maximum size + */ +#define ROGUE_CR_ISP_MTILE_SIZE_X_SHIFT 16 +#define ROGUE_CR_ISP_MTILE_SIZE_X_CLRMSK 0xFC00FFFF +#define ROGUE_CR_ISP_MTILE_SIZE_X_ALIGNSHIFT 0 +#define ROGUE_CR_ISP_MTILE_SIZE_X_ALIGNSIZE 1 +/* Y + * Macrotile height, in tiles. A value of zero corresponds to the maximum size + */ +#define ROGUE_CR_ISP_MTILE_SIZE_Y_SHIFT 0 +#define ROGUE_CR_ISP_MTILE_SIZE_Y_CLRMSK 0xFFFFFC00 +#define ROGUE_CR_ISP_MTILE_SIZE_Y_ALIGNSHIFT 0 +#define ROGUE_CR_ISP_MTILE_SIZE_Y_ALIGNSIZE 1 + +/* clang-format on */ + +#endif /* PVR_ROGUE_CR_DEFS_CLIENT_H */ diff --git a/drivers/gpu/drm/imagination/pvr_rogue_defs.h b/drivers/gpu/drm/imagination/pvr_rogue_defs.h new file mode 100644 index 000000000000..932b01686008 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_rogue_defs.h @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_ROGUE_DEFS_H +#define PVR_ROGUE_DEFS_H + +#include "pvr_rogue_cr_defs.h" + +#include <linux/bits.h> + +/* + ****************************************************************************** + * ROGUE Defines + ****************************************************************************** + */ + +#define ROGUE_FW_MAX_NUM_OS (8U) +#define ROGUE_FW_HOST_OS (0U) +#define ROGUE_FW_GUEST_OSID_START (1U) + +#define ROGUE_FW_THREAD_0 (0U) +#define ROGUE_FW_THREAD_1 (1U) + +#define GET_ROGUE_CACHE_LINE_SIZE(x) ((((s32)(x)) > 0) ? ((x) / 8) : (0)) + +#define MAX_HW_GEOM_FRAG_CONTEXTS 2U + +#define ROGUE_CR_CLK_CTRL_ALL_ON \ + (0x5555555555555555ull & ROGUE_CR_CLK_CTRL_MASKFULL) +#define ROGUE_CR_CLK_CTRL_ALL_AUTO \ + (0xaaaaaaaaaaaaaaaaull & ROGUE_CR_CLK_CTRL_MASKFULL) +#define ROGUE_CR_CLK_CTRL2_ALL_ON \ + (0x5555555555555555ull & ROGUE_CR_CLK_CTRL2_MASKFULL) +#define ROGUE_CR_CLK_CTRL2_ALL_AUTO \ + (0xaaaaaaaaaaaaaaaaull & ROGUE_CR_CLK_CTRL2_MASKFULL) + +#define ROGUE_CR_SOFT_RESET_DUST_n_CORE_EN \ + (ROGUE_CR_SOFT_RESET_DUST_A_CORE_EN | \ + ROGUE_CR_SOFT_RESET_DUST_B_CORE_EN | \ + ROGUE_CR_SOFT_RESET_DUST_C_CORE_EN | \ + ROGUE_CR_SOFT_RESET_DUST_D_CORE_EN | \ + ROGUE_CR_SOFT_RESET_DUST_E_CORE_EN | \ + ROGUE_CR_SOFT_RESET_DUST_F_CORE_EN | \ + ROGUE_CR_SOFT_RESET_DUST_G_CORE_EN | \ + ROGUE_CR_SOFT_RESET_DUST_H_CORE_EN) + +/* SOFT_RESET Rascal and DUSTs bits */ +#define ROGUE_CR_SOFT_RESET_RASCALDUSTS_EN \ + (ROGUE_CR_SOFT_RESET_RASCAL_CORE_EN | \ + ROGUE_CR_SOFT_RESET_DUST_n_CORE_EN) + +/* SOFT_RESET steps as defined in the TRM */ +#define ROGUE_S7_SOFT_RESET_DUSTS (ROGUE_CR_SOFT_RESET_DUST_n_CORE_EN) + +#define ROGUE_S7_SOFT_RESET_JONES \ + (ROGUE_CR_SOFT_RESET_PM_EN | ROGUE_CR_SOFT_RESET_VDM_EN | \ + ROGUE_CR_SOFT_RESET_ISP_EN) + +#define ROGUE_S7_SOFT_RESET_JONES_ALL \ + (ROGUE_S7_SOFT_RESET_JONES | ROGUE_CR_SOFT_RESET_BIF_EN | \ + ROGUE_CR_SOFT_RESET_SLC_EN | ROGUE_CR_SOFT_RESET_GARTEN_EN) + +#define ROGUE_S7_SOFT_RESET2 \ + (ROGUE_CR_SOFT_RESET2_BLACKPEARL_EN | ROGUE_CR_SOFT_RESET2_PIXEL_EN | \ + ROGUE_CR_SOFT_RESET2_CDM_EN | ROGUE_CR_SOFT_RESET2_VERTEX_EN) + +#define ROGUE_BIF_PM_PHYSICAL_PAGE_ALIGNSHIFT (12U) +#define ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE \ + BIT(ROGUE_BIF_PM_PHYSICAL_PAGE_ALIGNSHIFT) + +#define ROGUE_BIF_PM_VIRTUAL_PAGE_ALIGNSHIFT (14U) +#define ROGUE_BIF_PM_VIRTUAL_PAGE_SIZE BIT(ROGUE_BIF_PM_VIRTUAL_PAGE_ALIGNSHIFT) + +#define ROGUE_BIF_PM_FREELIST_BASE_ADDR_ALIGNSIZE (16U) + +/* + * To get the number of required Dusts, divide the number of + * clusters by 2 and round up + */ +#define ROGUE_REQ_NUM_DUSTS(CLUSTERS) (((CLUSTERS) + 1U) / 2U) + +/* + * To get the number of required Bernado/Phantom(s), divide + * the number of clusters by 4 and round up + */ +#define ROGUE_REQ_NUM_PHANTOMS(CLUSTERS) (((CLUSTERS) + 3U) / 4U) +#define ROGUE_REQ_NUM_BERNADOS(CLUSTERS) (((CLUSTERS) + 3U) / 4U) +#define ROGUE_REQ_NUM_BLACKPEARLS(CLUSTERS) (((CLUSTERS) + 3U) / 4U) + +/* + * FW MMU contexts + */ +#define MMU_CONTEXT_MAPPING_FWPRIV (0x0) /* FW code/private data */ +#define MMU_CONTEXT_MAPPING_FWIF (0x0) /* Host/FW data */ + +/* + * Utility macros to calculate CAT_BASE register addresses + */ +#define BIF_CAT_BASEX(n) \ + (ROGUE_CR_BIF_CAT_BASE0 + \ + (n) * (ROGUE_CR_BIF_CAT_BASE1 - ROGUE_CR_BIF_CAT_BASE0)) + +#define FWCORE_MEM_CAT_BASEX(n) \ + (ROGUE_CR_FWCORE_MEM_CAT_BASE0 + \ + (n) * (ROGUE_CR_FWCORE_MEM_CAT_BASE1 - \ + ROGUE_CR_FWCORE_MEM_CAT_BASE0)) + +/* + * FWCORE wrapper register defines + */ +#define FWCORE_ADDR_REMAP_CONFIG0_MMU_CONTEXT_SHIFT \ + ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_CBASE_SHIFT +#define FWCORE_ADDR_REMAP_CONFIG0_MMU_CONTEXT_CLRMSK \ + ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_CBASE_CLRMSK +#define FWCORE_ADDR_REMAP_CONFIG0_SIZE_ALIGNSHIFT (12U) + +#define ROGUE_MAX_COMPUTE_SHARED_REGISTERS (2 * 1024) +#define ROGUE_MAX_VERTEX_SHARED_REGISTERS 1024 +#define ROGUE_MAX_PIXEL_SHARED_REGISTERS 1024 +#define ROGUE_CSRM_LINE_SIZE_IN_DWORDS (64 * 4 * 4) + +#define ROGUE_CDMCTRL_USC_COMMON_SIZE_ALIGNSIZE 64 +#define ROGUE_CDMCTRL_USC_COMMON_SIZE_UPPER 256 + +/* + * The maximum amount of local memory which can be allocated by a single kernel + * (in dwords/32-bit registers). + * + * ROGUE_CDMCTRL_USC_COMMON_SIZE_ALIGNSIZE is in bytes so we divide by four. + */ +#define ROGUE_MAX_PER_KERNEL_LOCAL_MEM_SIZE_REGS ((ROGUE_CDMCTRL_USC_COMMON_SIZE_ALIGNSIZE * \ + ROGUE_CDMCTRL_USC_COMMON_SIZE_UPPER) >> 2) + +/* + ****************************************************************************** + * WA HWBRNs + ****************************************************************************** + */ + +/* GPU CR timer tick in GPU cycles */ +#define ROGUE_CRTIME_TICK_IN_CYCLES (256U) + +/* for nohw multicore return max cores possible to client */ +#define ROGUE_MULTICORE_MAX_NOHW_CORES (4U) + +/* + * If the size of the SLC is less than this value then the TPU bypasses the SLC. + */ +#define ROGUE_TPU_CACHED_SLC_SIZE_THRESHOLD (128U * 1024U) + +/* + * If the size of the SLC is bigger than this value then the TCU must not be + * bypassed in the SLC. + * In XE_MEMORY_HIERARCHY cores, the TCU is bypassed by default. + */ +#define ROGUE_TCU_CACHED_SLC_SIZE_THRESHOLD (32U * 1024U) + +/* + * Register used by the FW to track the current boot stage (not used in MIPS) + */ +#define ROGUE_FW_BOOT_STAGE_REGISTER (ROGUE_CR_POWER_ESTIMATE_RESULT) + +/* + * Virtualisation definitions + */ +#define ROGUE_VIRTUALISATION_REG_SIZE_PER_OS \ + (ROGUE_CR_MTS_SCHEDULE1 - ROGUE_CR_MTS_SCHEDULE) + +/* + * Macro used to indicate which version of HWPerf is active + */ +#define ROGUE_FEATURE_HWPERF_ROGUE + +/* + * Maximum number of cores supported by TRP + */ +#define ROGUE_TRP_MAX_NUM_CORES (4U) + +#endif /* PVR_ROGUE_DEFS_H */ diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif.h new file mode 100644 index 000000000000..172886be4c82 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif.h @@ -0,0 +1,2188 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_ROGUE_FWIF_H +#define PVR_ROGUE_FWIF_H + +#include <linux/bits.h> +#include <linux/build_bug.h> +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/types.h> + +#include "pvr_rogue_defs.h" +#include "pvr_rogue_fwif_common.h" +#include "pvr_rogue_fwif_shared.h" + +/* + **************************************************************************** + * Logging type + **************************************************************************** + */ +#define ROGUE_FWIF_LOG_TYPE_NONE 0x00000000U +#define ROGUE_FWIF_LOG_TYPE_TRACE 0x00000001U +#define ROGUE_FWIF_LOG_TYPE_GROUP_MAIN 0x00000002U +#define ROGUE_FWIF_LOG_TYPE_GROUP_MTS 0x00000004U +#define ROGUE_FWIF_LOG_TYPE_GROUP_CLEANUP 0x00000008U +#define ROGUE_FWIF_LOG_TYPE_GROUP_CSW 0x00000010U +#define ROGUE_FWIF_LOG_TYPE_GROUP_BIF 0x00000020U +#define ROGUE_FWIF_LOG_TYPE_GROUP_PM 0x00000040U +#define ROGUE_FWIF_LOG_TYPE_GROUP_RTD 0x00000080U +#define ROGUE_FWIF_LOG_TYPE_GROUP_SPM 0x00000100U +#define ROGUE_FWIF_LOG_TYPE_GROUP_POW 0x00000200U +#define ROGUE_FWIF_LOG_TYPE_GROUP_HWR 0x00000400U +#define ROGUE_FWIF_LOG_TYPE_GROUP_HWP 0x00000800U +#define ROGUE_FWIF_LOG_TYPE_GROUP_RPM 0x00001000U +#define ROGUE_FWIF_LOG_TYPE_GROUP_DMA 0x00002000U +#define ROGUE_FWIF_LOG_TYPE_GROUP_MISC 0x00004000U +#define ROGUE_FWIF_LOG_TYPE_GROUP_DEBUG 0x80000000U +#define ROGUE_FWIF_LOG_TYPE_GROUP_MASK 0x80007FFEU +#define ROGUE_FWIF_LOG_TYPE_MASK 0x80007FFFU + +/* String used in pvrdebug -h output */ +#define ROGUE_FWIF_LOG_GROUPS_STRING_LIST \ + "main,mts,cleanup,csw,bif,pm,rtd,spm,pow,hwr,hwp,rpm,dma,misc,debug" + +/* Table entry to map log group strings to log type value */ +struct rogue_fwif_log_group_map_entry { + const char *log_group_name; + u32 log_group_type; +}; + +/* + **************************************************************************** + * ROGUE FW signature checks + **************************************************************************** + */ +#define ROGUE_FW_SIG_BUFFER_SIZE_MIN (8192) + +#define ROGUE_FWIF_TIMEDIFF_ID ((0x1UL << 28) | ROGUE_CR_TIMER) + +/* + **************************************************************************** + * Trace Buffer + **************************************************************************** + */ + +/* Default size of ROGUE_FWIF_TRACEBUF_SPACE in DWords */ +#define ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS 12000U +#define ROGUE_FW_TRACE_BUFFER_ASSERT_SIZE 200U +#define ROGUE_FW_THREAD_NUM 1U +#define ROGUE_FW_THREAD_MAX 2U + +#define ROGUE_FW_POLL_TYPE_SET 0x80000000U + +struct rogue_fwif_file_info_buf { + char path[ROGUE_FW_TRACE_BUFFER_ASSERT_SIZE]; + char info[ROGUE_FW_TRACE_BUFFER_ASSERT_SIZE]; + u32 line_num; + u32 padding; +} __aligned(8); + +struct rogue_fwif_tracebuf_space { + u32 trace_pointer; + + u32 trace_buffer_fw_addr; + + /* To be used by host when reading from trace buffer */ + u32 *trace_buffer; + + struct rogue_fwif_file_info_buf assert_buf; +} __aligned(8); + +/* Total number of FW fault logs stored */ +#define ROGUE_FWIF_FWFAULTINFO_MAX (8U) + +struct rogue_fw_fault_info { + aligned_u64 cr_timer; + aligned_u64 os_timer; + + u32 data __aligned(8); + u32 reserved; + struct rogue_fwif_file_info_buf fault_buf; +} __aligned(8); + +enum rogue_fwif_pow_state { + ROGUE_FWIF_POW_OFF, /* idle and ready to full power down */ + ROGUE_FWIF_POW_ON, /* running HW commands */ + ROGUE_FWIF_POW_FORCED_IDLE, /* forced idle */ + ROGUE_FWIF_POW_IDLE, /* idle waiting for host handshake */ +}; + +/* Firmware HWR states */ +/* The HW state is ok or locked up */ +#define ROGUE_FWIF_HWR_HARDWARE_OK BIT(0) +/* Tells if a HWR reset is in progress */ +#define ROGUE_FWIF_HWR_RESET_IN_PROGRESS BIT(1) +/* A DM unrelated lockup has been detected */ +#define ROGUE_FWIF_HWR_GENERAL_LOCKUP BIT(3) +/* At least one DM is running without being close to a lockup */ +#define ROGUE_FWIF_HWR_DM_RUNNING_OK BIT(4) +/* At least one DM is close to lockup */ +#define ROGUE_FWIF_HWR_DM_STALLING BIT(5) +/* The FW has faulted and needs to restart */ +#define ROGUE_FWIF_HWR_FW_FAULT BIT(6) +/* The FW has requested the host to restart it */ +#define ROGUE_FWIF_HWR_RESTART_REQUESTED BIT(7) + +#define ROGUE_FWIF_PHR_STATE_SHIFT (8U) +/* The FW has requested the host to restart it, per PHR configuration */ +#define ROGUE_FWIF_PHR_RESTART_REQUESTED ((1) << ROGUE_FWIF_PHR_STATE_SHIFT) +/* A PHR triggered GPU reset has just finished */ +#define ROGUE_FWIF_PHR_RESTART_FINISHED ((2) << ROGUE_FWIF_PHR_STATE_SHIFT) +#define ROGUE_FWIF_PHR_RESTART_MASK \ + (ROGUE_FWIF_PHR_RESTART_REQUESTED | ROGUE_FWIF_PHR_RESTART_FINISHED) + +#define ROGUE_FWIF_PHR_MODE_OFF (0UL) +#define ROGUE_FWIF_PHR_MODE_RD_RESET (1UL) +#define ROGUE_FWIF_PHR_MODE_FULL_RESET (2UL) + +/* Firmware per-DM HWR states */ +/* DM is working if all flags are cleared */ +#define ROGUE_FWIF_DM_STATE_WORKING (0) +/* DM is idle and ready for HWR */ +#define ROGUE_FWIF_DM_STATE_READY_FOR_HWR BIT(0) +/* DM need to skip to next cmd before resuming processing */ +#define ROGUE_FWIF_DM_STATE_NEEDS_SKIP BIT(2) +/* DM need partial render cleanup before resuming processing */ +#define ROGUE_FWIF_DM_STATE_NEEDS_PR_CLEANUP BIT(3) +/* DM need to increment Recovery Count once fully recovered */ +#define ROGUE_FWIF_DM_STATE_NEEDS_TRACE_CLEAR BIT(4) +/* DM was identified as locking up and causing HWR */ +#define ROGUE_FWIF_DM_STATE_GUILTY_LOCKUP BIT(5) +/* DM was innocently affected by another lockup which caused HWR */ +#define ROGUE_FWIF_DM_STATE_INNOCENT_LOCKUP BIT(6) +/* DM was identified as over-running and causing HWR */ +#define ROGUE_FWIF_DM_STATE_GUILTY_OVERRUNING BIT(7) +/* DM was innocently affected by another DM over-running which caused HWR */ +#define ROGUE_FWIF_DM_STATE_INNOCENT_OVERRUNING BIT(8) +/* DM was forced into HWR as it delayed more important workloads */ +#define ROGUE_FWIF_DM_STATE_HARD_CONTEXT_SWITCH BIT(9) +/* DM was forced into HWR due to an uncorrected GPU ECC error */ +#define ROGUE_FWIF_DM_STATE_GPU_ECC_HWR BIT(10) + +/* Firmware's connection state */ +enum rogue_fwif_connection_fw_state { + /* Firmware is offline */ + ROGUE_FW_CONNECTION_FW_OFFLINE = 0, + /* Firmware is initialised */ + ROGUE_FW_CONNECTION_FW_READY, + /* Firmware connection is fully established */ + ROGUE_FW_CONNECTION_FW_ACTIVE, + /* Firmware is clearing up connection data*/ + ROGUE_FW_CONNECTION_FW_OFFLOADING, + ROGUE_FW_CONNECTION_FW_STATE_COUNT +}; + +/* OS' connection state */ +enum rogue_fwif_connection_os_state { + /* OS is offline */ + ROGUE_FW_CONNECTION_OS_OFFLINE = 0, + /* OS's KM driver is setup and waiting */ + ROGUE_FW_CONNECTION_OS_READY, + /* OS connection is fully established */ + ROGUE_FW_CONNECTION_OS_ACTIVE, + ROGUE_FW_CONNECTION_OS_STATE_COUNT +}; + +struct rogue_fwif_os_runtime_flags { + unsigned int os_state : 3; + unsigned int fl_ok : 1; + unsigned int fl_grow_pending : 1; + unsigned int isolated_os : 1; + unsigned int reserved : 26; +}; + +#define PVR_SLR_LOG_ENTRIES 10 +/* MAX_CLIENT_CCB_NAME not visible to this header */ +#define PVR_SLR_LOG_STRLEN 30 + +struct rogue_fwif_slr_entry { + aligned_u64 timestamp; + u32 fw_ctx_addr; + u32 num_ufos; + char ccb_name[PVR_SLR_LOG_STRLEN]; + char padding[2]; +} __aligned(8); + +#define MAX_THREAD_NUM 2 + +/* firmware trace control data */ +struct rogue_fwif_tracebuf { + u32 log_type; + struct rogue_fwif_tracebuf_space tracebuf[MAX_THREAD_NUM]; + /* + * Member initialised only when sTraceBuf is actually allocated (in + * ROGUETraceBufferInitOnDemandResources) + */ + u32 tracebuf_size_in_dwords; + /* Compatibility and other flags */ + u32 tracebuf_flags; +} __aligned(8); + +/* firmware system data shared with the Host driver */ +struct rogue_fwif_sysdata { + /* Configuration flags from host */ + u32 config_flags; + /* Extended configuration flags from host */ + u32 config_flags_ext; + enum rogue_fwif_pow_state pow_state; + u32 hw_perf_ridx; + u32 hw_perf_widx; + u32 hw_perf_wrap_count; + /* Constant after setup, needed in FW */ + u32 hw_perf_size; + /* The number of times the FW drops a packet due to buffer full */ + u32 hw_perf_drop_count; + + /* + * ui32HWPerfUt, ui32FirstDropOrdinal, ui32LastDropOrdinal only valid + * when FW is built with ROGUE_HWPERF_UTILIZATION & + * ROGUE_HWPERF_DROP_TRACKING defined in rogue_fw_hwperf.c + */ + /* Buffer utilisation, high watermark of bytes in use */ + u32 hw_perf_ut; + /* The ordinal of the first packet the FW dropped */ + u32 first_drop_ordinal; + /* The ordinal of the last packet the FW dropped */ + u32 last_drop_ordinal; + /* State flags for each Operating System mirrored from Fw coremem */ + struct rogue_fwif_os_runtime_flags + os_runtime_flags_mirror[ROGUE_FW_MAX_NUM_OS]; + + struct rogue_fw_fault_info fault_info[ROGUE_FWIF_FWFAULTINFO_MAX]; + u32 fw_faults; + u32 cr_poll_addr[MAX_THREAD_NUM]; + u32 cr_poll_mask[MAX_THREAD_NUM]; + u32 cr_poll_count[MAX_THREAD_NUM]; + aligned_u64 start_idle_time; + +#if defined(SUPPORT_ROGUE_FW_STATS_FRAMEWORK) +# define ROGUE_FWIF_STATS_FRAMEWORK_LINESIZE (8) +# define ROGUE_FWIF_STATS_FRAMEWORK_MAX \ + (2048 * ROGUE_FWIF_STATS_FRAMEWORK_LINESIZE) + u32 fw_stats_buf[ROGUE_FWIF_STATS_FRAMEWORK_MAX] __aligned(8); +#endif + u32 hwr_state_flags; + u32 hwr_recovery_flags[PVR_FWIF_DM_MAX]; + /* Compatibility and other flags */ + u32 fw_sys_data_flags; + /* Identify whether MC config is P-P or P-S */ + u32 mc_config; +} __aligned(8); + +/* per-os firmware shared data */ +struct rogue_fwif_osdata { + /* Configuration flags from an OS */ + u32 fw_os_config_flags; + /* Markers to signal that the host should perform a full sync check */ + u32 fw_sync_check_mark; + u32 host_sync_check_mark; + + u32 forced_updates_requested; + u8 slr_log_wp; + struct rogue_fwif_slr_entry slr_log_first; + struct rogue_fwif_slr_entry slr_log[PVR_SLR_LOG_ENTRIES]; + aligned_u64 last_forced_update_time; + + /* Interrupt count from Threads > */ + u32 interrupt_count[MAX_THREAD_NUM]; + u32 kccb_cmds_executed; + u32 power_sync_fw_addr; + /* Compatibility and other flags */ + u32 fw_os_data_flags; + u32 padding; +} __aligned(8); + +/* Firmware trace time-stamp field breakup */ + +/* ROGUE_CR_TIMER register read (48 bits) value*/ +#define ROGUE_FWT_TIMESTAMP_TIME_SHIFT (0U) +#define ROGUE_FWT_TIMESTAMP_TIME_CLRMSK (0xFFFF000000000000ull) + +/* Extra debug-info (16 bits) */ +#define ROGUE_FWT_TIMESTAMP_DEBUG_INFO_SHIFT (48U) +#define ROGUE_FWT_TIMESTAMP_DEBUG_INFO_CLRMSK ~ROGUE_FWT_TIMESTAMP_TIME_CLRMSK + +/* Debug-info sub-fields */ +/* + * Bit 0: ROGUE_CR_EVENT_STATUS_MMU_PAGE_FAULT bit from ROGUE_CR_EVENT_STATUS + * register + */ +#define ROGUE_FWT_DEBUG_INFO_MMU_PAGE_FAULT_SHIFT (0U) +#define ROGUE_FWT_DEBUG_INFO_MMU_PAGE_FAULT_SET \ + BIT(ROGUE_FWT_DEBUG_INFO_MMU_PAGE_FAULT_SHIFT) + +/* Bit 1: ROGUE_CR_BIF_MMU_ENTRY_PENDING bit from ROGUE_CR_BIF_MMU_ENTRY register */ +#define ROGUE_FWT_DEBUG_INFO_MMU_ENTRY_PENDING_SHIFT (1U) +#define ROGUE_FWT_DEBUG_INFO_MMU_ENTRY_PENDING_SET \ + BIT(ROGUE_FWT_DEBUG_INFO_MMU_ENTRY_PENDING_SHIFT) + +/* Bit 2: ROGUE_CR_SLAVE_EVENT register is non-zero */ +#define ROGUE_FWT_DEBUG_INFO_SLAVE_EVENTS_SHIFT (2U) +#define ROGUE_FWT_DEBUG_INFO_SLAVE_EVENTS_SET \ + BIT(ROGUE_FWT_DEBUG_INFO_SLAVE_EVENTS_SHIFT) + +/* Bit 3-15: Unused bits */ + +#define ROGUE_FWT_DEBUG_INFO_STR_MAXLEN 64 +#define ROGUE_FWT_DEBUG_INFO_STR_PREPEND " (debug info: " +#define ROGUE_FWT_DEBUG_INFO_STR_APPEND ")" + +/* + ****************************************************************************** + * HWR Data + ****************************************************************************** + */ +enum rogue_hwrtype { + ROGUE_HWRTYPE_UNKNOWNFAILURE = 0, + ROGUE_HWRTYPE_OVERRUN = 1, + ROGUE_HWRTYPE_POLLFAILURE = 2, + ROGUE_HWRTYPE_BIF0FAULT = 3, + ROGUE_HWRTYPE_BIF1FAULT = 4, + ROGUE_HWRTYPE_TEXASBIF0FAULT = 5, + ROGUE_HWRTYPE_MMUFAULT = 6, + ROGUE_HWRTYPE_MMUMETAFAULT = 7, + ROGUE_HWRTYPE_MIPSTLBFAULT = 8, + ROGUE_HWRTYPE_ECCFAULT = 9, + ROGUE_HWRTYPE_MMURISCVFAULT = 10, +}; + +#define ROGUE_FWIF_HWRTYPE_BIF_BANK_GET(hwr_type) \ + (((hwr_type) == ROGUE_HWRTYPE_BIF0FAULT) ? 0 : 1) + +#define ROGUE_FWIF_HWRTYPE_PAGE_FAULT_GET(hwr_type) \ + ((((hwr_type) == ROGUE_HWRTYPE_BIF0FAULT) || \ + ((hwr_type) == ROGUE_HWRTYPE_BIF1FAULT) || \ + ((hwr_type) == ROGUE_HWRTYPE_TEXASBIF0FAULT) || \ + ((hwr_type) == ROGUE_HWRTYPE_MMUFAULT) || \ + ((hwr_type) == ROGUE_HWRTYPE_MMUMETAFAULT) || \ + ((hwr_type) == ROGUE_HWRTYPE_MIPSTLBFAULT) || \ + ((hwr_type) == ROGUE_HWRTYPE_MMURISCVFAULT)) \ + ? true \ + : false) + +struct rogue_bifinfo { + aligned_u64 bif_req_status; + aligned_u64 bif_mmu_status; + aligned_u64 pc_address; /* phys address of the page catalogue */ + aligned_u64 reserved; +}; + +struct rogue_eccinfo { + u32 fault_gpu; +}; + +struct rogue_mmuinfo { + aligned_u64 mmu_status[2]; + aligned_u64 pc_address; /* phys address of the page catalogue */ + aligned_u64 reserved; +}; + +struct rogue_pollinfo { + u32 thread_num; + u32 cr_poll_addr; + u32 cr_poll_mask; + u32 cr_poll_last_value; + aligned_u64 reserved; +} __aligned(8); + +struct rogue_tlbinfo { + u32 bad_addr; + u32 entry_lo; +}; + +struct rogue_hwrinfo { + union { + struct rogue_bifinfo bif_info; + struct rogue_mmuinfo mmu_info; + struct rogue_pollinfo poll_info; + struct rogue_tlbinfo tlb_info; + struct rogue_eccinfo ecc_info; + } hwr_data; + + aligned_u64 cr_timer; + aligned_u64 os_timer; + u32 frame_num; + u32 pid; + u32 active_hwrt_data; + u32 hwr_number; + u32 event_status; + u32 hwr_recovery_flags; + enum rogue_hwrtype hwr_type; + u32 dm; + u32 core_id; + aligned_u64 cr_time_of_kick; + aligned_u64 cr_time_hw_reset_start; + aligned_u64 cr_time_hw_reset_finish; + aligned_u64 cr_time_freelist_ready; + aligned_u64 reserved[2]; +} __aligned(8); + +/* Number of first HWR logs recorded (never overwritten by newer logs) */ +#define ROGUE_FWIF_HWINFO_MAX_FIRST 8U +/* Number of latest HWR logs (older logs are overwritten by newer logs) */ +#define ROGUE_FWIF_HWINFO_MAX_LAST 8U +/* Total number of HWR logs stored in a buffer */ +#define ROGUE_FWIF_HWINFO_MAX \ + (ROGUE_FWIF_HWINFO_MAX_FIRST + ROGUE_FWIF_HWINFO_MAX_LAST) +/* Index of the last log in the HWR log buffer */ +#define ROGUE_FWIF_HWINFO_LAST_INDEX (ROGUE_FWIF_HWINFO_MAX - 1U) + +struct rogue_fwif_hwrinfobuf { + struct rogue_hwrinfo hwr_info[ROGUE_FWIF_HWINFO_MAX]; + u32 hwr_counter; + u32 write_index; + u32 dd_req_count; + u32 hwr_info_buf_flags; /* Compatibility and other flags */ + u32 hwr_dm_locked_up_count[PVR_FWIF_DM_MAX]; + u32 hwr_dm_overran_count[PVR_FWIF_DM_MAX]; + u32 hwr_dm_recovered_count[PVR_FWIF_DM_MAX]; + u32 hwr_dm_false_detect_count[PVR_FWIF_DM_MAX]; +} __aligned(8); + +#define ROGUE_FWIF_CTXSWITCH_PROFILE_FAST_EN (1) +#define ROGUE_FWIF_CTXSWITCH_PROFILE_MEDIUM_EN (2) +#define ROGUE_FWIF_CTXSWITCH_PROFILE_SLOW_EN (3) +#define ROGUE_FWIF_CTXSWITCH_PROFILE_NODELAY_EN (4) + +#define ROGUE_FWIF_CDM_ARBITRATION_TASK_DEMAND_EN (1) +#define ROGUE_FWIF_CDM_ARBITRATION_ROUND_ROBIN_EN (2) + +#define ROGUE_FWIF_ISP_SCHEDMODE_VER1_IPP (1) +#define ROGUE_FWIF_ISP_SCHEDMODE_VER2_ISP (2) +/* + ****************************************************************************** + * ROGUE firmware Init Config Data + ****************************************************************************** + */ + +/* Flag definitions affecting the firmware globally */ +#define ROGUE_FWIF_INICFG_CTXSWITCH_MODE_RAND BIT(0) +#define ROGUE_FWIF_INICFG_CTXSWITCH_SRESET_EN BIT(1) +#define ROGUE_FWIF_INICFG_HWPERF_EN BIT(2) +#define ROGUE_FWIF_INICFG_DM_KILL_MODE_RAND_EN BIT(3) +#define ROGUE_FWIF_INICFG_POW_RASCALDUST BIT(4) +/* Bit 5 is reserved. */ +#define ROGUE_FWIF_INICFG_FBCDC_V3_1_EN BIT(6) +#define ROGUE_FWIF_INICFG_CHECK_MLIST_EN BIT(7) +#define ROGUE_FWIF_INICFG_DISABLE_CLKGATING_EN BIT(8) +/* Bit 9 is reserved. */ +/* Bit 10 is reserved. */ +/* Bit 11 is reserved. */ +#define ROGUE_FWIF_INICFG_REGCONFIG_EN BIT(12) +#define ROGUE_FWIF_INICFG_ASSERT_ON_OUTOFMEMORY BIT(13) +#define ROGUE_FWIF_INICFG_HWP_DISABLE_FILTER BIT(14) +/* Bit 15 is reserved. */ +#define ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_SHIFT (16) +#define ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_FAST \ + (ROGUE_FWIF_CTXSWITCH_PROFILE_FAST_EN \ + << ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_SHIFT) +#define ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_MEDIUM \ + (ROGUE_FWIF_CTXSWITCH_PROFILE_MEDIUM_EN \ + << ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_SHIFT) +#define ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_SLOW \ + (ROGUE_FWIF_CTXSWITCH_PROFILE_SLOW_EN \ + << ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_SHIFT) +#define ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_NODELAY \ + (ROGUE_FWIF_CTXSWITCH_PROFILE_NODELAY_EN \ + << ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_SHIFT) +#define ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_MASK \ + (7 << ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_SHIFT) +#define ROGUE_FWIF_INICFG_DISABLE_DM_OVERLAP BIT(19) +#define ROGUE_FWIF_INICFG_ASSERT_ON_HWR_TRIGGER BIT(20) +#define ROGUE_FWIF_INICFG_FABRIC_COHERENCY_ENABLED BIT(21) +#define ROGUE_FWIF_INICFG_VALIDATE_IRQ BIT(22) +#define ROGUE_FWIF_INICFG_DISABLE_PDP_EN BIT(23) +#define ROGUE_FWIF_INICFG_SPU_POWER_STATE_MASK_CHANGE_EN BIT(24) +#define ROGUE_FWIF_INICFG_WORKEST BIT(25) +#define ROGUE_FWIF_INICFG_PDVFS BIT(26) +#define ROGUE_FWIF_INICFG_CDM_ARBITRATION_SHIFT (27) +#define ROGUE_FWIF_INICFG_CDM_ARBITRATION_TASK_DEMAND \ + (ROGUE_FWIF_CDM_ARBITRATION_TASK_DEMAND_EN \ + << ROGUE_FWIF_INICFG_CDM_ARBITRATION_SHIFT) +#define ROGUE_FWIF_INICFG_CDM_ARBITRATION_ROUND_ROBIN \ + (ROGUE_FWIF_CDM_ARBITRATION_ROUND_ROBIN_EN \ + << ROGUE_FWIF_INICFG_CDM_ARBITRATION_SHIFT) +#define ROGUE_FWIF_INICFG_CDM_ARBITRATION_MASK \ + (3 << ROGUE_FWIF_INICFG_CDM_ARBITRATION_SHIFT) +#define ROGUE_FWIF_INICFG_ISPSCHEDMODE_SHIFT (29) +#define ROGUE_FWIF_INICFG_ISPSCHEDMODE_NONE (0) +#define ROGUE_FWIF_INICFG_ISPSCHEDMODE_VER1_IPP \ + (ROGUE_FWIF_ISP_SCHEDMODE_VER1_IPP \ + << ROGUE_FWIF_INICFG_ISPSCHEDMODE_SHIFT) +#define ROGUE_FWIF_INICFG_ISPSCHEDMODE_VER2_ISP \ + (ROGUE_FWIF_ISP_SCHEDMODE_VER2_ISP \ + << ROGUE_FWIF_INICFG_ISPSCHEDMODE_SHIFT) +#define ROGUE_FWIF_INICFG_ISPSCHEDMODE_MASK \ + (ROGUE_FWIF_INICFG_ISPSCHEDMODE_VER1_IPP | \ + ROGUE_FWIF_INICFG_ISPSCHEDMODE_VER2_ISP) +#define ROGUE_FWIF_INICFG_VALIDATE_SOCUSC_TIMER BIT(31) + +#define ROGUE_FWIF_INICFG_ALL (0xFFFFFFFFU) + +/* Extended Flag definitions affecting the firmware globally */ +#define ROGUE_FWIF_INICFG_EXT_TFBC_CONTROL_SHIFT (0) +/* [7] YUV10 override + * [6:4] Quality + * [3] Quality enable + * [2:1] Compression scheme + * [0] Lossy group + */ +#define ROGUE_FWIF_INICFG_EXT_TFBC_CONTROL_MASK (0xFF) +#define ROGUE_FWIF_INICFG_EXT_ALL (ROGUE_FWIF_INICFG_EXT_TFBC_CONTROL_MASK) + +/* Flag definitions affecting only workloads submitted by a particular OS */ +#define ROGUE_FWIF_INICFG_OS_CTXSWITCH_TDM_EN BIT(0) +#define ROGUE_FWIF_INICFG_OS_CTXSWITCH_GEOM_EN BIT(1) +#define ROGUE_FWIF_INICFG_OS_CTXSWITCH_FRAG_EN BIT(2) +#define ROGUE_FWIF_INICFG_OS_CTXSWITCH_CDM_EN BIT(3) + +#define ROGUE_FWIF_INICFG_OS_LOW_PRIO_CS_TDM BIT(4) +#define ROGUE_FWIF_INICFG_OS_LOW_PRIO_CS_GEOM BIT(5) +#define ROGUE_FWIF_INICFG_OS_LOW_PRIO_CS_FRAG BIT(6) +#define ROGUE_FWIF_INICFG_OS_LOW_PRIO_CS_CDM BIT(7) + +#define ROGUE_FWIF_INICFG_OS_ALL (0xFF) + +#define ROGUE_FWIF_INICFG_OS_CTXSWITCH_DM_ALL \ + (ROGUE_FWIF_INICFG_OS_CTXSWITCH_TDM_EN | \ + ROGUE_FWIF_INICFG_OS_CTXSWITCH_GEOM_EN | \ + ROGUE_FWIF_INICFG_OS_CTXSWITCH_FRAG_EN | \ + ROGUE_FWIF_INICFG_OS_CTXSWITCH_CDM_EN) + +#define ROGUE_FWIF_INICFG_OS_CTXSWITCH_CLRMSK \ + ~(ROGUE_FWIF_INICFG_OS_CTXSWITCH_DM_ALL) + +#define ROGUE_FWIF_FILTCFG_TRUNCATE_HALF BIT(3) +#define ROGUE_FWIF_FILTCFG_TRUNCATE_INT BIT(2) +#define ROGUE_FWIF_FILTCFG_NEW_FILTER_MODE BIT(1) + +enum rogue_activepm_conf { + ROGUE_ACTIVEPM_FORCE_OFF = 0, + ROGUE_ACTIVEPM_FORCE_ON = 1, + ROGUE_ACTIVEPM_DEFAULT = 2 +}; + +enum rogue_rd_power_island_conf { + ROGUE_RD_POWER_ISLAND_FORCE_OFF = 0, + ROGUE_RD_POWER_ISLAND_FORCE_ON = 1, + ROGUE_RD_POWER_ISLAND_DEFAULT = 2 +}; + +struct rogue_fw_register_list { + /* Register number */ + u16 reg_num; + /* Indirect register number (or 0 if not used) */ + u16 indirect_reg_num; + /* Start value for indirect register */ + u16 indirect_start_val; + /* End value for indirect register */ + u16 indirect_end_val; +}; + +struct rogue_fwif_dllist_node { + u32 p; + u32 n; +}; + +/* + * This number is used to represent an invalid page catalogue physical address + */ +#define ROGUE_FWIF_INVALID_PC_PHYADDR 0xFFFFFFFFFFFFFFFFLLU + +/* This number is used to represent unallocated page catalog base register */ +#define ROGUE_FW_BIF_INVALID_PCSET 0xFFFFFFFFU + +/* Firmware memory context. */ +struct rogue_fwif_fwmemcontext { + /* device physical address of context's page catalogue */ + aligned_u64 pc_dev_paddr; + /* + * associated page catalog base register (ROGUE_FW_BIF_INVALID_PCSET == + * unallocated) + */ + u32 page_cat_base_reg_set; + /* breakpoint address */ + u32 breakpoint_addr; + /* breakpoint handler address */ + u32 bp_handler_addr; + /* DM and enable control for BP */ + u32 breakpoint_ctl; + /* Compatibility and other flags */ + u32 fw_mem_ctx_flags; + u32 padding; +} __aligned(8); + +/* + * FW context state flags + */ +#define ROGUE_FWIF_CONTEXT_FLAGS_NEED_RESUME (0x00000001U) +#define ROGUE_FWIF_CONTEXT_FLAGS_MC_NEED_RESUME_MASKFULL (0x000000FFU) +#define ROGUE_FWIF_CONTEXT_FLAGS_TDM_HEADER_STALE (0x00000100U) +#define ROGUE_FWIF_CONTEXT_FLAGS_LAST_KICK_SECURE (0x00000200U) + +#define ROGUE_NUM_GEOM_CORES_MAX 4 + +/* + * FW-accessible TA state which must be written out to memory on context store + */ +struct rogue_fwif_geom_ctx_state_per_geom { + /* To store in mid-TA */ + aligned_u64 geom_reg_vdm_call_stack_pointer; + /* Initial value (in case is 'lost' due to a lock-up */ + aligned_u64 geom_reg_vdm_call_stack_pointer_init; + u32 geom_reg_vbs_so_prim[4]; + u16 geom_current_idx; + u16 padding[3]; +} __aligned(8); + +struct rogue_fwif_geom_ctx_state { + /* FW-accessible TA state which must be written out to memory on context store */ + struct rogue_fwif_geom_ctx_state_per_geom geom_core[ROGUE_NUM_GEOM_CORES_MAX]; +} __aligned(8); + +/* + * FW-accessible ISP state which must be written out to memory on context store + */ +struct rogue_fwif_frag_ctx_state { + u32 frag_reg_pm_deallocated_mask_status; + u32 frag_reg_dm_pds_mtilefree_status; + /* Compatibility and other flags */ + u32 ctx_state_flags; + /* + * frag_reg_isp_store should be the last element of the structure as this + * is an array whose size is determined at runtime after detecting the + * ROGUE core + */ + u32 frag_reg_isp_store[]; +} __aligned(8); + +#define ROGUE_FWIF_CTX_USING_BUFFER_A (0) +#define ROGUE_FWIF_CTX_USING_BUFFER_B (1U) + +struct rogue_fwif_compute_ctx_state { + u32 ctx_state_flags; /* Target buffer and other flags */ +}; + +struct rogue_fwif_fwcommoncontext { + /* CCB details for this firmware context */ + u32 ccbctl_fw_addr; /* CCB control */ + u32 ccb_fw_addr; /* CCB base */ + struct rogue_fwif_dma_addr ccb_meta_dma_addr; + + /* Context suspend state */ + /* geom/frag context suspend state, read/written by FW */ + u32 context_state_addr __aligned(8); + + /* Flags e.g. for context switching */ + u32 fw_com_ctx_flags; + u32 priority; + u32 priority_seq_num; + + /* Framework state */ + /* Register updates for Framework */ + u32 rf_cmd_addr __aligned(8); + + /* Statistic updates waiting to be passed back to the host... */ + /* True when some stats are pending */ + bool stats_pending __aligned(4); + /* Number of stores on this context since last update */ + s32 stats_num_stores; + /* Number of OOMs on this context since last update */ + s32 stats_num_out_of_memory; + /* Number of PRs on this context since last update */ + s32 stats_num_partial_renders; + /* Data Master type */ + u32 dm; + /* Device Virtual Address of the signal the context is waiting on */ + aligned_u64 wait_signal_address; + /* List entry for the wait-signal list */ + struct rogue_fwif_dllist_node wait_signal_node __aligned(8); + /* List entry for the buffer stalled list */ + struct rogue_fwif_dllist_node buf_stalled_node __aligned(8); + /* Address of the circular buffer queue pointers */ + aligned_u64 cbuf_queue_ctrl_addr; + + aligned_u64 robustness_address; + /* Max HWR deadline limit in ms */ + u32 max_deadline_ms; + /* Following HWR circular buffer read-offset needs resetting */ + bool read_offset_needs_reset; + + /* List entry for the waiting list */ + struct rogue_fwif_dllist_node waiting_node __aligned(8); + /* List entry for the run list */ + struct rogue_fwif_dllist_node run_node __aligned(8); + /* UFO that last failed (or NULL) */ + struct rogue_fwif_ufo last_failed_ufo; + + /* Memory context */ + u32 fw_mem_context_fw_addr; + + /* References to the host side originators */ + /* the Server Common Context */ + u32 server_common_context_id; + /* associated process ID */ + u32 pid; + + /* True when Geom DM OOM is not allowed */ + bool geom_oom_disabled __aligned(4); +} __aligned(8); + +/* Firmware render context. */ +struct rogue_fwif_fwrendercontext { + /* Geometry firmware context. */ + struct rogue_fwif_fwcommoncontext geom_context; + /* Fragment firmware context. */ + struct rogue_fwif_fwcommoncontext frag_context; + + struct rogue_fwif_static_rendercontext_state static_render_context_state; + + /* Number of commands submitted to the WorkEst FW CCB */ + u32 work_est_ccb_submitted; + + /* Compatibility and other flags */ + u32 fw_render_ctx_flags; +} __aligned(8); + +/* Firmware compute context. */ +struct rogue_fwif_fwcomputecontext { + /* Firmware context for the CDM */ + struct rogue_fwif_fwcommoncontext cdm_context; + + struct rogue_fwif_static_computecontext_state + static_compute_context_state; + + /* Number of commands submitted to the WorkEst FW CCB */ + u32 work_est_ccb_submitted; + + /* Compatibility and other flags */ + u32 compute_ctx_flags; + + u32 wgp_state; + u32 wgp_checksum; + u32 core_mask_a; + u32 core_mask_b; +} __aligned(8); + +/* Firmware TDM context. */ +struct rogue_fwif_fwtdmcontext { + /* Firmware context for the TDM */ + struct rogue_fwif_fwcommoncontext tdm_context; + + /* Number of commands submitted to the WorkEst FW CCB */ + u32 work_est_ccb_submitted; +} __aligned(8); + +/* Firmware TQ3D context. */ +struct rogue_fwif_fwtransfercontext { + /* Firmware context for TQ3D. */ + struct rogue_fwif_fwcommoncontext tq_context; +} __aligned(8); + +/* + ****************************************************************************** + * Defines for CMD_TYPE corruption detection and forward compatibility check + ****************************************************************************** + */ + +/* + * CMD_TYPE 32bit contains: + * 31:16 Reserved for magic value to detect corruption (16 bits) + * 15 Reserved for ROGUE_CCB_TYPE_TASK (1 bit) + * 14:0 Bits available for CMD_TYPEs (15 bits) + */ + +/* Magic value to detect corruption */ +#define ROGUE_CMD_MAGIC_DWORD (0x2ABC) +#define ROGUE_CMD_MAGIC_DWORD_MASK (0xFFFF0000U) +#define ROGUE_CMD_MAGIC_DWORD_SHIFT (16U) +#define ROGUE_CMD_MAGIC_DWORD_SHIFTED \ + (ROGUE_CMD_MAGIC_DWORD << ROGUE_CMD_MAGIC_DWORD_SHIFT) + +/* Kernel CCB control for ROGUE */ +struct rogue_fwif_ccb_ctl { + /* write offset into array of commands (MUST be aligned to 16 bytes!) */ + u32 write_offset; + /* Padding to ensure read and write offsets are in separate cache lines. */ + u8 padding[128 - sizeof(u32)]; + /* read offset into array of commands */ + u32 read_offset; + /* Offset wrapping mask (Total capacity of the CCB - 1) */ + u32 wrap_mask; + /* size of each command in bytes */ + u32 cmd_size; + u32 padding2; +} __aligned(8); + +/* Kernel CCB command structure for ROGUE */ + +#define ROGUE_FWIF_MMUCACHEDATA_FLAGS_PT (0x1U) /* MMU_CTRL_INVAL_PT_EN */ +#define ROGUE_FWIF_MMUCACHEDATA_FLAGS_PD (0x2U) /* MMU_CTRL_INVAL_PD_EN */ +#define ROGUE_FWIF_MMUCACHEDATA_FLAGS_PC (0x4U) /* MMU_CTRL_INVAL_PC_EN */ + +/* + * can't use PM_TLB0 bit from BIFPM_CTRL reg because it collides with PT + * bit from BIF_CTRL reg + */ +#define ROGUE_FWIF_MMUCACHEDATA_FLAGS_PMTLB (0x10) +/* BIF_CTRL_INVAL_TLB1_EN */ +#define ROGUE_FWIF_MMUCACHEDATA_FLAGS_TLB \ + (ROGUE_FWIF_MMUCACHEDATA_FLAGS_PMTLB | 0x8) +/* MMU_CTRL_INVAL_ALL_CONTEXTS_EN */ +#define ROGUE_FWIF_MMUCACHEDATA_FLAGS_CTX_ALL (0x800) + +/* indicates FW should interrupt the host */ +#define ROGUE_FWIF_MMUCACHEDATA_FLAGS_INTERRUPT (0x4000000U) + +struct rogue_fwif_mmucachedata { + u32 cache_flags; + u32 mmu_cache_sync_fw_addr; + u32 mmu_cache_sync_update_value; +}; + +#define ROGUE_FWIF_BPDATA_FLAGS_ENABLE BIT(0) +#define ROGUE_FWIF_BPDATA_FLAGS_WRITE BIT(1) +#define ROGUE_FWIF_BPDATA_FLAGS_CTL BIT(2) +#define ROGUE_FWIF_BPDATA_FLAGS_REGS BIT(3) + +struct rogue_fwif_bpdata { + /* Memory context */ + u32 fw_mem_context_fw_addr; + /* Breakpoint address */ + u32 bp_addr; + /* Breakpoint handler */ + u32 bp_handler_addr; + /* Breakpoint control */ + u32 bp_dm; + u32 bp_data_flags; + /* Number of temporary registers to overallocate */ + u32 temp_regs; + /* Number of shared registers to overallocate */ + u32 shared_regs; + /* DM associated with the breakpoint */ + u32 dm; +}; + +#define ROGUE_FWIF_KCCB_CMD_KICK_DATA_MAX_NUM_CLEANUP_CTLS \ + (ROGUE_FWIF_PRBUFFER_MAXSUPPORTED + 1U) /* +1 is RTDATASET cleanup */ + +struct rogue_fwif_kccb_cmd_kick_data { + /* address of the firmware context */ + u32 context_fw_addr; + /* Client CCB woff update */ + u32 client_woff_update; + /* Client CCB wrap mask update after CCCB growth */ + u32 client_wrap_mask_update; + /* number of CleanupCtl pointers attached */ + u32 num_cleanup_ctl; + /* CleanupCtl structures associated with command */ + u32 cleanup_ctl_fw_addr + [ROGUE_FWIF_KCCB_CMD_KICK_DATA_MAX_NUM_CLEANUP_CTLS]; + /* + * offset to the CmdHeader which houses the workload estimation kick + * data. + */ + u32 work_est_cmd_header_offset; +}; + +struct rogue_fwif_kccb_cmd_combined_geom_frag_kick_data { + struct rogue_fwif_kccb_cmd_kick_data geom_cmd_kick_data; + struct rogue_fwif_kccb_cmd_kick_data frag_cmd_kick_data; +}; + +struct rogue_fwif_kccb_cmd_force_update_data { + /* address of the firmware context */ + u32 context_fw_addr; + /* Client CCB fence offset */ + u32 ccb_fence_offset; +}; + +enum rogue_fwif_cleanup_type { + /* FW common context cleanup */ + ROGUE_FWIF_CLEANUP_FWCOMMONCONTEXT, + /* FW HW RT data cleanup */ + ROGUE_FWIF_CLEANUP_HWRTDATA, + /* FW freelist cleanup */ + ROGUE_FWIF_CLEANUP_FREELIST, + /* FW ZS Buffer cleanup */ + ROGUE_FWIF_CLEANUP_ZSBUFFER, +}; + +struct rogue_fwif_cleanup_request { + /* Cleanup type */ + enum rogue_fwif_cleanup_type cleanup_type; + union { + /* FW common context to cleanup */ + u32 context_fw_addr; + /* HW RT to cleanup */ + u32 hwrt_data_fw_addr; + /* Freelist to cleanup */ + u32 freelist_fw_addr; + /* ZS Buffer to cleanup */ + u32 zs_buffer_fw_addr; + } cleanup_data; +}; + +enum rogue_fwif_power_type { + ROGUE_FWIF_POW_OFF_REQ = 1, + ROGUE_FWIF_POW_FORCED_IDLE_REQ, + ROGUE_FWIF_POW_NUM_UNITS_CHANGE, + ROGUE_FWIF_POW_APM_LATENCY_CHANGE +}; + +enum rogue_fwif_power_force_idle_type { + ROGUE_FWIF_POWER_FORCE_IDLE = 1, + ROGUE_FWIF_POWER_CANCEL_FORCED_IDLE, + ROGUE_FWIF_POWER_HOST_TIMEOUT, +}; + +struct rogue_fwif_power_request { + /* Type of power request */ + enum rogue_fwif_power_type pow_type; + union { + /* Number of active Dusts */ + u32 num_of_dusts; + /* If the operation is mandatory */ + bool forced __aligned(4); + /* + * Type of Request. Consolidating Force Idle, Cancel Forced + * Idle, Host Timeout + */ + enum rogue_fwif_power_force_idle_type pow_request_type; + } power_req_data; +}; + +struct rogue_fwif_slcflushinvaldata { + /* Context to fence on (only useful when bDMContext == TRUE) */ + u32 context_fw_addr; + /* Invalidate the cache as well as flushing */ + bool inval __aligned(4); + /* The data to flush/invalidate belongs to a specific DM context */ + bool dm_context __aligned(4); + /* Optional address of range (only useful when bDMContext == FALSE) */ + aligned_u64 address; + /* Optional size of range (only useful when bDMContext == FALSE) */ + aligned_u64 size; +}; + +enum rogue_fwif_hwperf_update_config { + ROGUE_FWIF_HWPERF_CTRL_TOGGLE = 0, + ROGUE_FWIF_HWPERF_CTRL_SET = 1, + ROGUE_FWIF_HWPERF_CTRL_EMIT_FEATURES_EV = 2 +}; + +struct rogue_fwif_hwperf_ctrl { + enum rogue_fwif_hwperf_update_config opcode; /* Control operation code */ + aligned_u64 mask; /* Mask of events to toggle */ +}; + +struct rogue_fwif_hwperf_config_enable_blks { + /* Number of ROGUE_HWPERF_CONFIG_MUX_CNTBLK in the array */ + u32 num_blocks; + /* Address of the ROGUE_HWPERF_CONFIG_MUX_CNTBLK array */ + u32 block_configs_fw_addr; +}; + +struct rogue_fwif_hwperf_config_da_blks { + /* Number of ROGUE_HWPERF_CONFIG_CNTBLK in the array */ + u32 num_blocks; + /* Address of the ROGUE_HWPERF_CONFIG_CNTBLK array */ + u32 block_configs_fw_addr; +}; + +struct rogue_fwif_coreclkspeedchange_data { + u32 new_clock_speed; /* New clock speed */ +}; + +#define ROGUE_FWIF_HWPERF_CTRL_BLKS_MAX 16 + +struct rogue_fwif_hwperf_ctrl_blks { + bool enable; + /* Number of block IDs in the array */ + u32 num_blocks; + /* Array of ROGUE_HWPERF_CNTBLK_ID values */ + u16 block_ids[ROGUE_FWIF_HWPERF_CTRL_BLKS_MAX]; +}; + +struct rogue_fwif_hwperf_select_custom_cntrs { + u16 custom_block; + u16 num_counters; + u32 custom_counter_ids_fw_addr; +}; + +struct rogue_fwif_zsbuffer_backing_data { + u32 zs_buffer_fw_addr; /* ZS-Buffer FW address */ + + bool done __aligned(4); /* action backing/unbacking succeeded */ +}; + +struct rogue_fwif_freelist_gs_data { + /* Freelist FW address */ + u32 freelist_fw_addr; + /* Amount of the Freelist change */ + u32 delta_pages; + /* New amount of pages on the freelist (including ready pages) */ + u32 new_pages; + /* Number of ready pages to be held in reserve until OOM */ + u32 ready_pages; +}; + +#define MAX_FREELISTS_SIZE 3 +#define MAX_HW_GEOM_FRAG_CONTEXTS_SIZE 3 + +#define ROGUE_FWIF_MAX_FREELISTS_TO_RECONSTRUCT \ + (MAX_HW_GEOM_FRAG_CONTEXTS_SIZE * MAX_FREELISTS_SIZE * 2U) +#define ROGUE_FWIF_FREELISTS_RECONSTRUCTION_FAILED_FLAG 0x80000000U + +struct rogue_fwif_freelists_reconstruction_data { + u32 freelist_count; + u32 freelist_ids[ROGUE_FWIF_MAX_FREELISTS_TO_RECONSTRUCT]; +}; + +struct rogue_fwif_write_offset_update_data { + /* + * Context to that may need to be resumed following write offset update + */ + u32 context_fw_addr; +} __aligned(8); + +/* + ****************************************************************************** + * Proactive DVFS Structures + ****************************************************************************** + */ +#define NUM_OPP_VALUES 16 + +struct pdvfs_opp { + u32 volt; /* V */ + u32 freq; /* Hz */ +} __aligned(8); + +struct rogue_fwif_pdvfs_opp { + struct pdvfs_opp opp_values[NUM_OPP_VALUES]; + u32 min_opp_point; + u32 max_opp_point; +} __aligned(8); + +struct rogue_fwif_pdvfs_max_freq_data { + u32 max_opp_point; +} __aligned(8); + +struct rogue_fwif_pdvfs_min_freq_data { + u32 min_opp_point; +} __aligned(8); + +/* + ****************************************************************************** + * Register configuration structures + ****************************************************************************** + */ + +#define ROGUE_FWIF_REG_CFG_MAX_SIZE 512 + +enum rogue_fwif_regdata_cmd_type { + ROGUE_FWIF_REGCFG_CMD_ADD = 101, + ROGUE_FWIF_REGCFG_CMD_CLEAR = 102, + ROGUE_FWIF_REGCFG_CMD_ENABLE = 103, + ROGUE_FWIF_REGCFG_CMD_DISABLE = 104 +}; + +enum rogue_fwif_reg_cfg_type { + /* Sidekick power event */ + ROGUE_FWIF_REG_CFG_TYPE_PWR_ON = 0, + /* Rascal / dust power event */ + ROGUE_FWIF_REG_CFG_TYPE_DUST_CHANGE, + /* Geometry kick */ + ROGUE_FWIF_REG_CFG_TYPE_GEOM, + /* Fragment kick */ + ROGUE_FWIF_REG_CFG_TYPE_FRAG, + /* Compute kick */ + ROGUE_FWIF_REG_CFG_TYPE_CDM, + /* TLA kick */ + ROGUE_FWIF_REG_CFG_TYPE_TLA, + /* TDM kick */ + ROGUE_FWIF_REG_CFG_TYPE_TDM, + /* Applies to all types. Keep as last element */ + ROGUE_FWIF_REG_CFG_TYPE_ALL +}; + +struct rogue_fwif_reg_cfg_rec { + u64 sddr; + u64 mask; + u64 value; +}; + +struct rogue_fwif_regconfig_data { + enum rogue_fwif_regdata_cmd_type cmd_type; + enum rogue_fwif_reg_cfg_type reg_config_type; + struct rogue_fwif_reg_cfg_rec reg_config __aligned(8); +}; + +struct rogue_fwif_reg_cfg { + /* + * PDump WRW command write granularity is 32 bits. + * Add padding to ensure array size is 32 bit granular. + */ + u8 num_regs_type[ALIGN((u32)ROGUE_FWIF_REG_CFG_TYPE_ALL, + sizeof(u32))] __aligned(8); + struct rogue_fwif_reg_cfg_rec + reg_configs[ROGUE_FWIF_REG_CFG_MAX_SIZE] __aligned(8); +} __aligned(8); + +enum rogue_fwif_os_state_change { + ROGUE_FWIF_OS_ONLINE = 1, + ROGUE_FWIF_OS_OFFLINE +}; + +struct rogue_fwif_os_state_change_data { + u32 osid; + enum rogue_fwif_os_state_change new_os_state; +} __aligned(8); + +enum rogue_fwif_counter_dump_request { + ROGUE_FWIF_PWR_COUNTER_DUMP_START = 1, + ROGUE_FWIF_PWR_COUNTER_DUMP_STOP, + ROGUE_FWIF_PWR_COUNTER_DUMP_SAMPLE, +}; + +struct rogue_fwif_counter_dump_data { + enum rogue_fwif_counter_dump_request counter_dump_request; +} __aligned(8); + +enum rogue_fwif_kccb_cmd_type { + /* Common commands */ + ROGUE_FWIF_KCCB_CMD_KICK = 101U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + ROGUE_FWIF_KCCB_CMD_MMUCACHE = 102U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + ROGUE_FWIF_KCCB_CMD_BP = 103U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* SLC flush and invalidation request */ + ROGUE_FWIF_KCCB_CMD_SLCFLUSHINVAL = 105U | + ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* + * Requests cleanup of a FW resource (type specified in the command + * data) + */ + ROGUE_FWIF_KCCB_CMD_CLEANUP = 106U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Power request */ + ROGUE_FWIF_KCCB_CMD_POW = 107U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Backing for on-demand ZS-Buffer done */ + ROGUE_FWIF_KCCB_CMD_ZSBUFFER_BACKING_UPDATE = + 108U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Unbacking for on-demand ZS-Buffer done */ + ROGUE_FWIF_KCCB_CMD_ZSBUFFER_UNBACKING_UPDATE = + 109U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Freelist Grow done */ + ROGUE_FWIF_KCCB_CMD_FREELIST_GROW_UPDATE = + 110U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Freelists Reconstruction done */ + ROGUE_FWIF_KCCB_CMD_FREELISTS_RECONSTRUCTION_UPDATE = + 112U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* + * Informs the firmware that the host has added more data to a CDM2 + * Circular Buffer + */ + ROGUE_FWIF_KCCB_CMD_NOTIFY_WRITE_OFFSET_UPDATE = + 114U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Health check request */ + ROGUE_FWIF_KCCB_CMD_HEALTH_CHECK = 115U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Forcing signalling of all unmet UFOs for a given CCB offset */ + ROGUE_FWIF_KCCB_CMD_FORCE_UPDATE = 116U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + + /* There is a geometry and a fragment command in this single kick */ + ROGUE_FWIF_KCCB_CMD_COMBINED_GEOM_FRAG_KICK = 117U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Informs the FW that a Guest OS has come online / offline. */ + ROGUE_FWIF_KCCB_CMD_OS_ONLINE_STATE_CONFIGURE = 118U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + + /* Commands only permitted to the native or host OS */ + ROGUE_FWIF_KCCB_CMD_REGCONFIG = 200U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + + /* Configure HWPerf events (to be generated) and HWPerf buffer address (if required) */ + ROGUE_FWIF_KCCB_CMD_HWPERF_UPDATE_CONFIG = 201U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + + /* Enable or disable multiple HWPerf blocks (reusing existing configuration) */ + ROGUE_FWIF_KCCB_CMD_HWPERF_CTRL_BLKS = 203U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Core clock speed change event */ + ROGUE_FWIF_KCCB_CMD_CORECLKSPEEDCHANGE = 204U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + + /* + * Ask the firmware to update its cached ui32LogType value from the (shared) + * tracebuf control structure + */ + ROGUE_FWIF_KCCB_CMD_LOGTYPE_UPDATE = 206U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Set a maximum frequency/OPP point */ + ROGUE_FWIF_KCCB_CMD_PDVFS_LIMIT_MAX_FREQ = 207U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* + * Changes the relative scheduling priority for a particular OSid. It can + * only be serviced for the Host DDK + */ + ROGUE_FWIF_KCCB_CMD_OSID_PRIORITY_CHANGE = 208U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Set or clear firmware state flags */ + ROGUE_FWIF_KCCB_CMD_STATEFLAGS_CTRL = 209U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + + /* Set a minimum frequency/OPP point */ + ROGUE_FWIF_KCCB_CMD_PDVFS_LIMIT_MIN_FREQ = 212U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Configure Periodic Hardware Reset behaviour */ + ROGUE_FWIF_KCCB_CMD_PHR_CFG = 213U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + + /* Configure Safety Firmware Watchdog */ + ROGUE_FWIF_KCCB_CMD_WDG_CFG = 215U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Controls counter dumping in the FW */ + ROGUE_FWIF_KCCB_CMD_COUNTER_DUMP = 216U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Configure, clear and enable multiple HWPerf blocks */ + ROGUE_FWIF_KCCB_CMD_HWPERF_CONFIG_ENABLE_BLKS = 217U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Configure the custom counters for HWPerf */ + ROGUE_FWIF_KCCB_CMD_HWPERF_SELECT_CUSTOM_CNTRS = 218U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + + /* Configure directly addressable counters for HWPerf */ + ROGUE_FWIF_KCCB_CMD_HWPERF_CONFIG_BLKS = 220U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, +}; + +#define ROGUE_FWIF_LAST_ALLOWED_GUEST_KCCB_CMD \ + (ROGUE_FWIF_KCCB_CMD_REGCONFIG - 1) + +/* Kernel CCB command packet */ +struct rogue_fwif_kccb_cmd { + /* Command type */ + enum rogue_fwif_kccb_cmd_type cmd_type; + /* Compatibility and other flags */ + u32 kccb_flags; + + /* + * NOTE: Make sure that uCmdData is the last member of this struct + * This is to calculate actual command size for device mem copy. + * (Refer ROGUEGetCmdMemCopySize()) + */ + union { + /* Data for Kick command */ + struct rogue_fwif_kccb_cmd_kick_data cmd_kick_data; + /* Data for combined geom/frag Kick command */ + struct rogue_fwif_kccb_cmd_combined_geom_frag_kick_data + combined_geom_frag_cmd_kick_data; + /* Data for MMU cache command */ + struct rogue_fwif_mmucachedata mmu_cache_data; + /* Data for Breakpoint Commands */ + struct rogue_fwif_bpdata bp_data; + /* Data for SLC Flush/Inval commands */ + struct rogue_fwif_slcflushinvaldata slc_flush_inval_data; + /* Data for cleanup commands */ + struct rogue_fwif_cleanup_request cleanup_data; + /* Data for power request commands */ + struct rogue_fwif_power_request pow_data; + /* Data for HWPerf control command */ + struct rogue_fwif_hwperf_ctrl hw_perf_ctrl; + /* + * Data for HWPerf configure, clear and enable performance + * counter block command + */ + struct rogue_fwif_hwperf_config_enable_blks + hw_perf_cfg_enable_blks; + /* + * Data for HWPerf enable or disable performance counter block + * commands + */ + struct rogue_fwif_hwperf_ctrl_blks hw_perf_ctrl_blks; + /* Data for HWPerf configure the custom counters to read */ + struct rogue_fwif_hwperf_select_custom_cntrs + hw_perf_select_cstm_cntrs; + /* Data for HWPerf configure Directly Addressable blocks */ + struct rogue_fwif_hwperf_config_da_blks hw_perf_cfg_da_blks; + /* Data for core clock speed change */ + struct rogue_fwif_coreclkspeedchange_data + core_clk_speed_change_data; + /* Feedback for Z/S Buffer backing/unbacking */ + struct rogue_fwif_zsbuffer_backing_data zs_buffer_backing_data; + /* Feedback for Freelist grow/shrink */ + struct rogue_fwif_freelist_gs_data free_list_gs_data; + /* Feedback for Freelists reconstruction*/ + struct rogue_fwif_freelists_reconstruction_data + free_lists_reconstruction_data; + /* Data for custom register configuration */ + struct rogue_fwif_regconfig_data reg_config_data; + /* Data for informing the FW about the write offset update */ + struct rogue_fwif_write_offset_update_data + write_offset_update_data; + /* Data for setting the max frequency/OPP */ + struct rogue_fwif_pdvfs_max_freq_data pdvfs_max_freq_data; + /* Data for setting the min frequency/OPP */ + struct rogue_fwif_pdvfs_min_freq_data pdvfs_min_freq_data; + /* Data for updating the Guest Online states */ + struct rogue_fwif_os_state_change_data cmd_os_online_state_data; + /* Dev address for TBI buffer allocated on demand */ + u32 tbi_buffer_fw_addr; + /* Data for dumping of register ranges */ + struct rogue_fwif_counter_dump_data counter_dump_config_data; + /* Data for signalling all unmet fences for a given CCB */ + struct rogue_fwif_kccb_cmd_force_update_data force_update_data; + } cmd_data __aligned(8); +} __aligned(8); + +PVR_FW_STRUCT_SIZE_ASSERT(struct rogue_fwif_kccb_cmd); + +/* + ****************************************************************************** + * Firmware CCB command structure for ROGUE + ****************************************************************************** + */ + +struct rogue_fwif_fwccb_cmd_zsbuffer_backing_data { + u32 zs_buffer_id; +}; + +struct rogue_fwif_fwccb_cmd_freelist_gs_data { + u32 freelist_id; +}; + +struct rogue_fwif_fwccb_cmd_freelists_reconstruction_data { + u32 freelist_count; + u32 hwr_counter; + u32 freelist_ids[ROGUE_FWIF_MAX_FREELISTS_TO_RECONSTRUCT]; +}; + +/* 1 if a page fault happened */ +#define ROGUE_FWIF_FWCCB_CMD_CONTEXT_RESET_FLAG_PF BIT(0) +/* 1 if applicable to all contexts */ +#define ROGUE_FWIF_FWCCB_CMD_CONTEXT_RESET_FLAG_ALL_CTXS BIT(1) + +struct rogue_fwif_fwccb_cmd_context_reset_data { + /* Context affected by the reset */ + u32 server_common_context_id; + /* Reason for reset */ + enum rogue_context_reset_reason reset_reason; + /* Data Master affected by the reset */ + u32 dm; + /* Job ref running at the time of reset */ + u32 reset_job_ref; + /* ROGUE_FWIF_FWCCB_CMD_CONTEXT_RESET_FLAG bitfield */ + u32 flags; + /* At what page catalog address */ + aligned_u64 pc_address; + /* Page fault address (only when applicable) */ + aligned_u64 fault_address; +}; + +struct rogue_fwif_fwccb_cmd_fw_pagefault_data { + /* Page fault address */ + u64 fw_fault_addr; +}; + +enum rogue_fwif_fwccb_cmd_type { + /* Requests ZSBuffer to be backed with physical pages */ + ROGUE_FWIF_FWCCB_CMD_ZSBUFFER_BACKING = 101U | + ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Requests ZSBuffer to be unbacked */ + ROGUE_FWIF_FWCCB_CMD_ZSBUFFER_UNBACKING = 102U | + ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Requests an on-demand freelist grow/shrink */ + ROGUE_FWIF_FWCCB_CMD_FREELIST_GROW = 103U | + ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Requests freelists reconstruction */ + ROGUE_FWIF_FWCCB_CMD_FREELISTS_RECONSTRUCTION = + 104U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Notifies host of a HWR event on a context */ + ROGUE_FWIF_FWCCB_CMD_CONTEXT_RESET_NOTIFICATION = + 105U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Requests an on-demand debug dump */ + ROGUE_FWIF_FWCCB_CMD_DEBUG_DUMP = 106U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + /* Requests an on-demand update on process stats */ + ROGUE_FWIF_FWCCB_CMD_UPDATE_STATS = 107U | + ROGUE_CMD_MAGIC_DWORD_SHIFTED, + + ROGUE_FWIF_FWCCB_CMD_CORE_CLK_RATE_CHANGE = + 108U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + ROGUE_FWIF_FWCCB_CMD_REQUEST_GPU_RESTART = + 109U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, + + /* Notifies host of a FW pagefault */ + ROGUE_FWIF_FWCCB_CMD_CONTEXT_FW_PF_NOTIFICATION = + 112U | ROGUE_CMD_MAGIC_DWORD_SHIFTED, +}; + +enum rogue_fwif_fwccb_cmd_update_stats_type { + /* + * PVRSRVStatsUpdateRenderContextStats should increase the value of the + * ui32TotalNumPartialRenders stat + */ + ROGUE_FWIF_FWCCB_CMD_UPDATE_NUM_PARTIAL_RENDERS = 1, + /* + * PVRSRVStatsUpdateRenderContextStats should increase the value of the + * ui32TotalNumOutOfMemory stat + */ + ROGUE_FWIF_FWCCB_CMD_UPDATE_NUM_OUT_OF_MEMORY, + /* + * PVRSRVStatsUpdateRenderContextStats should increase the value of the + * ui32NumGeomStores stat + */ + ROGUE_FWIF_FWCCB_CMD_UPDATE_NUM_GEOM_STORES, + /* + * PVRSRVStatsUpdateRenderContextStats should increase the value of the + * ui32NumFragStores stat + */ + ROGUE_FWIF_FWCCB_CMD_UPDATE_NUM_FRAG_STORES, + /* + * PVRSRVStatsUpdateRenderContextStats should increase the value of the + * ui32NumCDMStores stat + */ + ROGUE_FWIF_FWCCB_CMD_UPDATE_NUM_CDM_STORES, + /* + * PVRSRVStatsUpdateRenderContextStats should increase the value of the + * ui32NumTDMStores stat + */ + ROGUE_FWIF_FWCCB_CMD_UPDATE_NUM_TDM_STORES +}; + +struct rogue_fwif_fwccb_cmd_update_stats_data { + /* Element to update */ + enum rogue_fwif_fwccb_cmd_update_stats_type element_to_update; + /* The pid of the process whose stats are being updated */ + u32 pid_owner; + /* Adjustment to be made to the statistic */ + s32 adjustment_value; +}; + +struct rogue_fwif_fwccb_cmd_core_clk_rate_change_data { + u32 core_clk_rate; +} __aligned(8); + +struct rogue_fwif_fwccb_cmd { + /* Command type */ + enum rogue_fwif_fwccb_cmd_type cmd_type; + /* Compatibility and other flags */ + u32 fwccb_flags; + + union { + /* Data for Z/S-Buffer on-demand (un)backing*/ + struct rogue_fwif_fwccb_cmd_zsbuffer_backing_data + cmd_zs_buffer_backing; + /* Data for on-demand freelist grow/shrink */ + struct rogue_fwif_fwccb_cmd_freelist_gs_data cmd_free_list_gs; + /* Data for freelists reconstruction */ + struct rogue_fwif_fwccb_cmd_freelists_reconstruction_data + cmd_freelists_reconstruction; + /* Data for context reset notification */ + struct rogue_fwif_fwccb_cmd_context_reset_data + cmd_context_reset_notification; + /* Data for updating process stats */ + struct rogue_fwif_fwccb_cmd_update_stats_data + cmd_update_stats_data; + struct rogue_fwif_fwccb_cmd_core_clk_rate_change_data + cmd_core_clk_rate_change; + struct rogue_fwif_fwccb_cmd_fw_pagefault_data cmd_fw_pagefault; + } cmd_data __aligned(8); +} __aligned(8); + +PVR_FW_STRUCT_SIZE_ASSERT(struct rogue_fwif_fwccb_cmd); + +/* + ****************************************************************************** + * Workload estimation Firmware CCB command structure for ROGUE + ****************************************************************************** + */ +struct rogue_fwif_workest_fwccb_cmd { + /* Index for return data array */ + u16 return_data_index; + /* The cycles the workload took on the hardware */ + u32 cycles_taken; +}; + +/* + ****************************************************************************** + * Client CCB commands for ROGUE + ****************************************************************************** + */ + +/* + * Required memory alignment for 64-bit variables accessible by Meta + * (The gcc meta aligns 64-bit variables to 64-bit; therefore, memory shared + * between the host and meta that contains 64-bit variables has to maintain + * this alignment) + */ +#define ROGUE_FWIF_FWALLOC_ALIGN sizeof(u64) + +#define ROGUE_CCB_TYPE_TASK BIT(15) +#define ROGUE_CCB_FWALLOC_ALIGN(size) \ + (((size) + (ROGUE_FWIF_FWALLOC_ALIGN - 1)) & \ + ~(ROGUE_FWIF_FWALLOC_ALIGN - 1)) + +#define ROGUE_FWIF_CCB_CMD_TYPE_GEOM \ + (201U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK) +#define ROGUE_FWIF_CCB_CMD_TYPE_TQ_3D \ + (202U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK) +#define ROGUE_FWIF_CCB_CMD_TYPE_FRAG \ + (203U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK) +#define ROGUE_FWIF_CCB_CMD_TYPE_FRAG_PR \ + (204U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK) +#define ROGUE_FWIF_CCB_CMD_TYPE_CDM \ + (205U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK) +#define ROGUE_FWIF_CCB_CMD_TYPE_TQ_TDM \ + (206U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK) +#define ROGUE_FWIF_CCB_CMD_TYPE_FBSC_INVALIDATE \ + (207U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK) +#define ROGUE_FWIF_CCB_CMD_TYPE_TQ_2D \ + (208U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK) +#define ROGUE_FWIF_CCB_CMD_TYPE_PRE_TIMESTAMP \ + (209U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK) +#define ROGUE_FWIF_CCB_CMD_TYPE_NULL \ + (210U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK) +#define ROGUE_FWIF_CCB_CMD_TYPE_ABORT \ + (211U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK) + +/* Leave a gap between CCB specific commands and generic commands */ +#define ROGUE_FWIF_CCB_CMD_TYPE_FENCE (212U | ROGUE_CMD_MAGIC_DWORD_SHIFTED) +#define ROGUE_FWIF_CCB_CMD_TYPE_UPDATE (213U | ROGUE_CMD_MAGIC_DWORD_SHIFTED) +#define ROGUE_FWIF_CCB_CMD_TYPE_RMW_UPDATE \ + (214U | ROGUE_CMD_MAGIC_DWORD_SHIFTED) +#define ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR (215U | ROGUE_CMD_MAGIC_DWORD_SHIFTED) +#define ROGUE_FWIF_CCB_CMD_TYPE_PRIORITY (216U | ROGUE_CMD_MAGIC_DWORD_SHIFTED) +/* + * Pre and Post timestamp commands are supposed to sandwich the DM cmd. The + * padding code with the CCB wrap upsets the FW if we don't have the task type + * bit cleared for POST_TIMESTAMPs. That's why we have 2 different cmd types. + */ +#define ROGUE_FWIF_CCB_CMD_TYPE_POST_TIMESTAMP \ + (217U | ROGUE_CMD_MAGIC_DWORD_SHIFTED) +#define ROGUE_FWIF_CCB_CMD_TYPE_UNFENCED_UPDATE \ + (218U | ROGUE_CMD_MAGIC_DWORD_SHIFTED) +#define ROGUE_FWIF_CCB_CMD_TYPE_UNFENCED_RMW_UPDATE \ + (219U | ROGUE_CMD_MAGIC_DWORD_SHIFTED) + +#define ROGUE_FWIF_CCB_CMD_TYPE_PADDING (221U | ROGUE_CMD_MAGIC_DWORD_SHIFTED) + +struct rogue_fwif_workest_kick_data { + /* Index for the KM Workload estimation return data array */ + u16 return_data_index __aligned(8); + /* Predicted time taken to do the work in cycles */ + u32 cycles_prediction __aligned(8); + /* Deadline for the workload */ + aligned_u64 deadline; +}; + +struct rogue_fwif_ccb_cmd_header { + u32 cmd_type; + u32 cmd_size; + /* + * external job reference - provided by client and used in debug for + * tracking submitted work + */ + u32 ext_job_ref; + /* + * internal job reference - generated by services and used in debug for + * tracking submitted work + */ + u32 int_job_ref; + /* Workload Estimation - Workload Estimation Data */ + struct rogue_fwif_workest_kick_data work_est_kick_data __aligned(8); +}; + +/* + ****************************************************************************** + * Client CCB commands which are only required by the kernel + ****************************************************************************** + */ +struct rogue_fwif_cmd_priority { + s32 priority; +}; + +/* + ****************************************************************************** + * Signature and Checksums Buffer + ****************************************************************************** + */ +struct rogue_fwif_sigbuf_ctl { + /* Ptr to Signature Buffer memory */ + u32 buffer_fw_addr; + /* Amount of space left for storing regs in the buffer */ + u32 left_size_in_regs; +} __aligned(8); + +struct rogue_fwif_counter_dump_ctl { + /* Ptr to counter dump buffer */ + u32 buffer_fw_addr; + /* Amount of space for storing in the buffer */ + u32 size_in_dwords; +} __aligned(8); + +struct rogue_fwif_firmware_gcov_ctl { + /* Ptr to firmware gcov buffer */ + u32 buffer_fw_addr; + /* Amount of space for storing in the buffer */ + u32 size; +} __aligned(8); + +/* + ***************************************************************************** + * ROGUE Compatibility checks + ***************************************************************************** + */ + +/* + * WARNING: Whenever the layout of ROGUE_FWIF_COMPCHECKS_BVNC changes, the + * following define should be increased by 1 to indicate to the compatibility + * logic that layout has changed. + */ +#define ROGUE_FWIF_COMPCHECKS_LAYOUT_VERSION 3 + +struct rogue_fwif_compchecks_bvnc { + /* WARNING: This field must be defined as first one in this structure */ + u32 layout_version; + aligned_u64 bvnc; +} __aligned(8); + +struct rogue_fwif_init_options { + u8 os_count_support; + u8 padding[7]; +} __aligned(8); + +#define ROGUE_FWIF_COMPCHECKS_BVNC_DECLARE_AND_INIT(name) \ + struct rogue_fwif_compchecks_bvnc(name) = { \ + ROGUE_FWIF_COMPCHECKS_LAYOUT_VERSION, \ + 0, \ + } + +static inline void rogue_fwif_compchecks_bvnc_init(struct rogue_fwif_compchecks_bvnc *compchecks) +{ + compchecks->layout_version = ROGUE_FWIF_COMPCHECKS_LAYOUT_VERSION; + compchecks->bvnc = 0; +} + +struct rogue_fwif_compchecks { + /* hardware BVNC (from the ROGUE registers) */ + struct rogue_fwif_compchecks_bvnc hw_bvnc; + /* firmware BVNC */ + struct rogue_fwif_compchecks_bvnc fw_bvnc; + /* identifier of the FW processor version */ + u32 fw_processor_version; + /* software DDK version */ + u32 ddk_version; + /* software DDK build no. */ + u32 ddk_build; + /* build options bit-field */ + u32 build_options; + /* initialisation options bit-field */ + struct rogue_fwif_init_options init_options; + /* Information is valid */ + bool updated __aligned(4); + u32 padding; +} __aligned(8); + +/* + ****************************************************************************** + * Updated configuration post FW data init. + ****************************************************************************** + */ +struct rogue_fwif_runtime_cfg { + /* APM latency in ms before signalling IDLE to the host */ + u32 active_pm_latency_ms; + /* Compatibility and other flags */ + u32 runtime_cfg_flags; + /* + * If set, APM latency does not reset to system default each GPU power + * transition + */ + bool active_pm_latency_persistant __aligned(4); + /* Core clock speed, currently only used to calculate timer ticks */ + u32 core_clock_speed; + /* Last number of dusts change requested by the host */ + u32 default_dusts_num_init; + /* Periodic Hardware Reset configuration values */ + u32 phr_mode; + /* New number of milliseconds C/S is allowed to last */ + u32 hcs_deadline_ms; + /* The watchdog period in microseconds */ + u32 wdg_period_us; + /* Array of priorities per OS */ + u32 osid_priority[ROGUE_FW_MAX_NUM_OS]; + /* On-demand allocated HWPerf buffer address, to be passed to the FW */ + u32 hwperf_buf_fw_addr; + + bool padding __aligned(4); +}; + +/* + ***************************************************************************** + * Control data for ROGUE + ***************************************************************************** + */ + +#define ROGUE_FWIF_HWR_DEBUG_DUMP_ALL (99999U) + +enum rogue_fwif_tpu_dm { + ROGUE_FWIF_TPU_DM_PDM = 0, + ROGUE_FWIF_TPU_DM_VDM = 1, + ROGUE_FWIF_TPU_DM_CDM = 2, + ROGUE_FWIF_TPU_DM_TDM = 3, + ROGUE_FWIF_TPU_DM_LAST +}; + +enum rogue_fwif_gpio_val_mode { + /* No GPIO validation */ + ROGUE_FWIF_GPIO_VAL_OFF = 0, + /* + * Simple test case that initiates by sending data via the GPIO and then + * sends back any data received over the GPIO + */ + ROGUE_FWIF_GPIO_VAL_GENERAL = 1, + /* + * More complex test case that writes and reads data across the entire + * GPIO AP address range. + */ + ROGUE_FWIF_GPIO_VAL_AP = 2, + /* Validates the GPIO Testbench. */ + ROGUE_FWIF_GPIO_VAL_TESTBENCH = 5, + /* Send and then receive each byte in the range 0-255. */ + ROGUE_FWIF_GPIO_VAL_LOOPBACK = 6, + /* Send and then receive each power-of-2 byte in the range 0-255. */ + ROGUE_FWIF_GPIO_VAL_LOOPBACK_LITE = 7, + ROGUE_FWIF_GPIO_VAL_LAST +}; + +enum fw_perf_conf { + FW_PERF_CONF_NONE = 0, + FW_PERF_CONF_ICACHE = 1, + FW_PERF_CONF_DCACHE = 2, + FW_PERF_CONF_JTLB_INSTR = 5, + FW_PERF_CONF_INSTRUCTIONS = 6 +}; + +enum fw_boot_stage { + FW_BOOT_STAGE_TLB_INIT_FAILURE = -2, + FW_BOOT_STAGE_NOT_AVAILABLE = -1, + FW_BOOT_NOT_STARTED = 0, + FW_BOOT_BLDR_STARTED = 1, + FW_BOOT_CACHE_DONE, + FW_BOOT_TLB_DONE, + FW_BOOT_MAIN_STARTED, + FW_BOOT_ALIGNCHECKS_DONE, + FW_BOOT_INIT_DONE, +}; + +/* + * Kernel CCB return slot responses. Usage of bit-fields instead of bare + * integers allows FW to possibly pack-in several responses for each single kCCB + * command. + */ +/* Command executed (return status from FW) */ +#define ROGUE_FWIF_KCCB_RTN_SLOT_CMD_EXECUTED BIT(0) +/* A cleanup was requested but resource busy */ +#define ROGUE_FWIF_KCCB_RTN_SLOT_CLEANUP_BUSY BIT(1) +/* Poll failed in FW for a HW operation to complete */ +#define ROGUE_FWIF_KCCB_RTN_SLOT_POLL_FAILURE BIT(2) +/* Reset value of a kCCB return slot (set by host) */ +#define ROGUE_FWIF_KCCB_RTN_SLOT_NO_RESPONSE 0x0U + +struct rogue_fwif_connection_ctl { + /* Fw-Os connection states */ + enum rogue_fwif_connection_fw_state connection_fw_state; + enum rogue_fwif_connection_os_state connection_os_state; + u32 alive_fw_token; + u32 alive_os_token; +} __aligned(8); + +struct rogue_fwif_osinit { + /* Kernel CCB */ + u32 kernel_ccbctl_fw_addr; + u32 kernel_ccb_fw_addr; + u32 kernel_ccb_rtn_slots_fw_addr; + + /* Firmware CCB */ + u32 firmware_ccbctl_fw_addr; + u32 firmware_ccb_fw_addr; + + /* Workload Estimation Firmware CCB */ + u32 work_est_firmware_ccbctl_fw_addr; + u32 work_est_firmware_ccb_fw_addr; + + u32 rogue_fwif_hwr_info_buf_ctl_fw_addr; + + u32 hwr_debug_dump_limit; + + u32 fw_os_data_fw_addr; + + /* Compatibility checks to be populated by the Firmware */ + struct rogue_fwif_compchecks rogue_comp_checks; +} __aligned(8); + +/* BVNC Features */ +struct rogue_hwperf_bvnc_block { + /* Counter block ID, see ROGUE_HWPERF_CNTBLK_ID */ + u16 block_id; + + /* Number of counters in this block type */ + u16 num_counters; + + /* Number of blocks of this type */ + u16 num_blocks; + + u16 reserved; +}; + +#define ROGUE_HWPERF_MAX_BVNC_LEN (24) + +#define ROGUE_HWPERF_MAX_BVNC_BLOCK_LEN (16U) + +/* BVNC Features */ +struct rogue_hwperf_bvnc { + /* BVNC string */ + char bvnc_string[ROGUE_HWPERF_MAX_BVNC_LEN]; + /* See ROGUE_HWPERF_FEATURE_FLAGS */ + u32 bvnc_km_feature_flags; + /* Number of blocks described in aBvncBlocks */ + u16 num_bvnc_blocks; + /* Number of GPU cores present */ + u16 bvnc_gpu_cores; + /* Supported Performance Blocks for BVNC */ + struct rogue_hwperf_bvnc_block + bvnc_blocks[ROGUE_HWPERF_MAX_BVNC_BLOCK_LEN]; +}; + +PVR_FW_STRUCT_SIZE_ASSERT(struct rogue_hwperf_bvnc); + +struct rogue_fwif_sysinit { + /* Fault read address */ + aligned_u64 fault_phys_addr; + + /* PDS execution base */ + aligned_u64 pds_exec_base; + /* UCS execution base */ + aligned_u64 usc_exec_base; + /* FBCDC bindless texture state table base */ + aligned_u64 fbcdc_state_table_base; + aligned_u64 fbcdc_large_state_table_base; + /* Texture state base */ + aligned_u64 texture_heap_base; + + /* Event filter for Firmware events */ + u64 hw_perf_filter; + + aligned_u64 slc3_fence_dev_addr; + + u32 tpu_trilinear_frac_mask[ROGUE_FWIF_TPU_DM_LAST] __aligned(8); + + /* Signature and Checksum Buffers for DMs */ + struct rogue_fwif_sigbuf_ctl sigbuf_ctl[PVR_FWIF_DM_MAX]; + + struct rogue_fwif_pdvfs_opp pdvfs_opp_info; + + struct rogue_fwif_dma_addr coremem_data_store; + + struct rogue_fwif_counter_dump_ctl counter_dump_ctl; + + u32 filter_flags; + + u32 runtime_cfg_fw_addr; + + u32 trace_buf_ctl_fw_addr; + u32 fw_sys_data_fw_addr; + + u32 gpu_util_fw_cb_ctl_fw_addr; + u32 reg_cfg_fw_addr; + u32 hwperf_ctl_fw_addr; + + u32 align_checks; + + /* Core clock speed at FW boot time */ + u32 initial_core_clock_speed; + + /* APM latency in ms before signalling IDLE to the host */ + u32 active_pm_latency_ms; + + /* Flag to be set by the Firmware after successful start */ + bool firmware_started __aligned(4); + + /* Host/FW Trace synchronisation Partition Marker */ + u32 marker_val; + + /* Firmware initialization complete time */ + u32 firmware_started_timestamp; + + u32 jones_disable_mask; + + /* Firmware performance counter config */ + enum fw_perf_conf firmware_perf; + + /* + * FW Pointer to memory containing core clock rate in Hz. + * Firmware (PDVFS) updates the memory when running on non primary FW + * thread to communicate to host driver. + */ + u32 core_clock_rate_fw_addr; + + enum rogue_fwif_gpio_val_mode gpio_validation_mode; + + /* Used in HWPerf for decoding BVNC Features */ + struct rogue_hwperf_bvnc bvnc_km_feature_flags; + + /* Value to write into ROGUE_CR_TFBC_COMPRESSION_CONTROL */ + u32 tfbc_compression_control; +} __aligned(8); + +/* + ***************************************************************************** + * Timer correlation shared data and defines + ***************************************************************************** + */ + +struct rogue_fwif_time_corr { + aligned_u64 os_timestamp; + aligned_u64 os_mono_timestamp; + aligned_u64 cr_timestamp; + + /* + * Utility variable used to convert CR timer deltas to OS timer deltas + * (nS), where the deltas are relative to the timestamps above: + * deltaOS = (deltaCR * K) >> decimal_shift, see full explanation below + */ + aligned_u64 cr_delta_to_os_delta_kns; + + u32 core_clock_speed; + u32 reserved; +} __aligned(8); + +/* + * The following macros are used to help converting FW timestamps to the Host + * time domain. On the FW the ROGUE_CR_TIMER counter is used to keep track of + * time; it increments by 1 every 256 GPU clock ticks, so the general + * formula to perform the conversion is: + * + * [ GPU clock speed in Hz, if (scale == 10^9) then deltaOS is in nS, + * otherwise if (scale == 10^6) then deltaOS is in uS ] + * + * deltaCR * 256 256 * scale + * deltaOS = --------------- * scale = deltaCR * K [ K = --------------- ] + * GPUclockspeed GPUclockspeed + * + * The actual K is multiplied by 2^20 (and deltaCR * K is divided by 2^20) + * to get some better accuracy and to avoid returning 0 in the integer + * division 256000000/GPUfreq if GPUfreq is greater than 256MHz. + * This is the same as keeping K as a decimal number. + * + * The maximum deltaOS is slightly more than 5hrs for all GPU frequencies + * (deltaCR * K is more or less a constant), and it's relative to the base + * OS timestamp sampled as a part of the timer correlation data. + * This base is refreshed on GPU power-on, DVFS transition and periodic + * frequency calibration (executed every few seconds if the FW is doing + * some work), so as long as the GPU is doing something and one of these + * events is triggered then deltaCR * K will not overflow and deltaOS will be + * correct. + */ + +#define ROGUE_FWIF_CRDELTA_TO_OSDELTA_ACCURACY_SHIFT (20) + +#define ROGUE_FWIF_GET_DELTA_OSTIME_NS(delta_cr, k) \ + (((delta_cr) * (k)) >> ROGUE_FWIF_CRDELTA_TO_OSDELTA_ACCURACY_SHIFT) + +/* + ****************************************************************************** + * GPU Utilisation + ****************************************************************************** + */ + +/* See rogue_common.h for a list of GPU states */ +#define ROGUE_FWIF_GPU_UTIL_TIME_MASK \ + (0xFFFFFFFFFFFFFFFFull & ~ROGUE_FWIF_GPU_UTIL_STATE_MASK) + +#define ROGUE_FWIF_GPU_UTIL_GET_TIME(word) \ + ((word)(&ROGUE_FWIF_GPU_UTIL_TIME_MASK)) +#define ROGUE_FWIF_GPU_UTIL_GET_STATE(word) \ + ((word)(&ROGUE_FWIF_GPU_UTIL_STATE_MASK)) + +/* + * The OS timestamps computed by the FW are approximations of the real time, + * which means they could be slightly behind or ahead the real timer on the + * Host. In some cases we can perform subtractions between FW approximated + * timestamps and real OS timestamps, so we need a form of protection against + * negative results if for instance the FW one is a bit ahead of time. + */ +#define ROGUE_FWIF_GPU_UTIL_GET_PERIOD(newtime, oldtime) \ + (((newtime) > (oldtime)) ? ((newtime) - (oldtime)) : 0U) + +#define ROGUE_FWIF_GPU_UTIL_MAKE_WORD(time, state) \ + (ROGUE_FWIF_GPU_UTIL_GET_TIME(time) | \ + ROGUE_FWIF_GPU_UTIL_GET_STATE(state)) + +/* + * The timer correlation array must be big enough to ensure old entries won't be + * overwritten before all the HWPerf events linked to those entries are + * processed by the MISR. The update frequency of this array depends on how fast + * the system can change state (basically how small the APM latency is) and + * perform DVFS transitions. + * + * The minimum size is 2 (not 1) to avoid race conditions between the FW reading + * an entry while the Host is updating it. With 2 entries in the worst case the + * FW will read old data, which is still quite ok if the Host is updating the + * timer correlation at that time. + */ +#define ROGUE_FWIF_TIME_CORR_ARRAY_SIZE 256U +#define ROGUE_FWIF_TIME_CORR_CURR_INDEX(seqcount) \ + ((seqcount) % ROGUE_FWIF_TIME_CORR_ARRAY_SIZE) + +/* Make sure the timer correlation array size is a power of 2 */ +static_assert((ROGUE_FWIF_TIME_CORR_ARRAY_SIZE & + (ROGUE_FWIF_TIME_CORR_ARRAY_SIZE - 1U)) == 0U, + "ROGUE_FWIF_TIME_CORR_ARRAY_SIZE must be a power of two"); + +struct rogue_fwif_gpu_util_fwcb { + struct rogue_fwif_time_corr time_corr[ROGUE_FWIF_TIME_CORR_ARRAY_SIZE]; + u32 time_corr_seq_count; + + /* Compatibility and other flags */ + u32 gpu_util_flags; + + /* Last GPU state + OS time of the last state update */ + aligned_u64 last_word; + + /* Counters for the amount of time the GPU was active/idle/blocked */ + aligned_u64 stats_counters[PVR_FWIF_GPU_UTIL_STATE_NUM]; +} __aligned(8); + +struct rogue_fwif_rta_ctl { + /* Render number */ + u32 render_target_index; + /* index in RTA */ + u32 current_render_target; + /* total active RTs */ + u32 active_render_targets; + /* total active RTs from the first TA kick, for OOM */ + u32 cumul_active_render_targets; + /* Array of valid RT indices */ + u32 valid_render_targets_fw_addr; + /* Array of number of occurred partial renders per render target */ + u32 rta_num_partial_renders_fw_addr; + /* Number of render targets in the array */ + u32 max_rts; + /* Compatibility and other flags */ + u32 rta_ctl_flags; +} __aligned(8); + +struct rogue_fwif_freelist { + aligned_u64 freelist_dev_addr; + aligned_u64 current_dev_addr; + u32 current_stack_top; + u32 max_pages; + u32 grow_pages; + /* HW pages */ + u32 current_pages; + u32 allocated_page_count; + u32 allocated_mmu_page_count; + u32 freelist_id; + + bool grow_pending __aligned(4); + /* Pages that should be used only when OOM is reached */ + u32 ready_pages; + /* Compatibility and other flags */ + u32 freelist_flags; + /* PM Global PB on which Freelist is loaded */ + u32 pm_global_pb; + u32 padding; +} __aligned(8); + +/* + ****************************************************************************** + * HWRTData + ****************************************************************************** + */ + +/* HWRTData flags */ +/* Deprecated flags 1:0 */ +#define HWRTDATA_HAS_LAST_GEOM BIT(2) +#define HWRTDATA_PARTIAL_RENDERED BIT(3) +#define HWRTDATA_DISABLE_TILE_REORDERING BIT(4) +#define HWRTDATA_NEED_BRN65101_BLIT BIT(5) +#define HWRTDATA_FIRST_BRN65101_STRIP BIT(6) +#define HWRTDATA_NEED_BRN67182_2ND_RENDER BIT(7) + +enum rogue_fwif_rtdata_state { + ROGUE_FWIF_RTDATA_STATE_NONE = 0, + ROGUE_FWIF_RTDATA_STATE_KICK_GEOM, + ROGUE_FWIF_RTDATA_STATE_KICK_GEOM_FIRST, + ROGUE_FWIF_RTDATA_STATE_GEOM_FINISHED, + ROGUE_FWIF_RTDATA_STATE_KICK_FRAG, + ROGUE_FWIF_RTDATA_STATE_FRAG_FINISHED, + ROGUE_FWIF_RTDATA_STATE_FRAG_CONTEXT_STORED, + ROGUE_FWIF_RTDATA_STATE_GEOM_OUTOFMEM, + ROGUE_FWIF_RTDATA_STATE_PARTIALRENDERFINISHED, + /* + * In case of HWR, we can't set the RTDATA state to NONE, as this will + * cause any TA to become a first TA. To ensure all related TA's are + * skipped, we use the HWR state + */ + ROGUE_FWIF_RTDATA_STATE_HWR, + ROGUE_FWIF_RTDATA_STATE_UNKNOWN = 0x7FFFFFFFU +}; + +struct rogue_fwif_hwrtdata_common { + bool geom_caches_need_zeroing __aligned(4); + + u32 screen_pixel_max; + aligned_u64 multi_sample_ctl; + u64 flipped_multi_sample_ctl; + u32 tpc_stride; + u32 tpc_size; + u32 te_screen; + u32 mtile_stride; + u32 teaa; + u32 te_mtile1; + u32 te_mtile2; + u32 isp_merge_lower_x; + u32 isp_merge_lower_y; + u32 isp_merge_upper_x; + u32 isp_merge_upper_y; + u32 isp_merge_scale_x; + u32 isp_merge_scale_y; + u32 rgn_header_size; + u32 isp_mtile_size; + u32 padding; +} __aligned(8); + +struct rogue_fwif_hwrtdata { + /* MList Data Store */ + aligned_u64 pm_mlist_dev_addr; + + aligned_u64 vce_cat_base[4]; + aligned_u64 vce_last_cat_base[4]; + aligned_u64 te_cat_base[4]; + aligned_u64 te_last_cat_base[4]; + aligned_u64 alist_cat_base; + aligned_u64 alist_last_cat_base; + + aligned_u64 pm_alist_stack_pointer; + u32 pm_mlist_stack_pointer; + + u32 hwrt_data_common_fw_addr; + + u32 hwrt_data_flags; + enum rogue_fwif_rtdata_state state; + + u32 freelists_fw_addr[MAX_FREELISTS_SIZE] __aligned(8); + u32 freelist_hwr_snapshot[MAX_FREELISTS_SIZE]; + + aligned_u64 vheap_table_dev_addr; + + struct rogue_fwif_rta_ctl rta_ctl; + + aligned_u64 tail_ptrs_dev_addr; + aligned_u64 macrotile_array_dev_addr; + aligned_u64 rgn_header_dev_addr; + aligned_u64 rtc_dev_addr; + + u32 owner_geom_not_used_by_host __aligned(8); + + bool geom_caches_need_zeroing __aligned(4); + + struct rogue_fwif_cleanup_ctl cleanup_state __aligned(64); +} __aligned(8); + +/* + ****************************************************************************** + * Sync checkpoints + ****************************************************************************** + */ + +#define PVR_SYNC_CHECKPOINT_UNDEF 0x000 +#define PVR_SYNC_CHECKPOINT_ACTIVE 0xac1 /* Checkpoint has not signaled. */ +#define PVR_SYNC_CHECKPOINT_SIGNALED 0x519 /* Checkpoint has signaled. */ +#define PVR_SYNC_CHECKPOINT_ERRORED 0xeff /* Checkpoint has been errored. */ + +#include "pvr_rogue_fwif_check.h" + +#endif /* PVR_ROGUE_FWIF_H */ diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_check.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_check.h new file mode 100644 index 000000000000..51dc37e78f41 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_check.h @@ -0,0 +1,493 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_ROGUE_FWIF_CHECK_H +#define PVR_ROGUE_FWIF_CHECK_H + +#include <linux/build_bug.h> + +#define OFFSET_CHECK(type, member, offset) \ + static_assert(offsetof(type, member) == (offset), \ + "offsetof(" #type ", " #member ") incorrect") + +#define SIZE_CHECK(type, size) \ + static_assert(sizeof(type) == (size), #type " is incorrect size") + +OFFSET_CHECK(struct rogue_fwif_file_info_buf, path, 0); +OFFSET_CHECK(struct rogue_fwif_file_info_buf, info, 200); +OFFSET_CHECK(struct rogue_fwif_file_info_buf, line_num, 400); +SIZE_CHECK(struct rogue_fwif_file_info_buf, 408); + +OFFSET_CHECK(struct rogue_fwif_tracebuf_space, trace_pointer, 0); +OFFSET_CHECK(struct rogue_fwif_tracebuf_space, trace_buffer_fw_addr, 4); +OFFSET_CHECK(struct rogue_fwif_tracebuf_space, trace_buffer, 8); +OFFSET_CHECK(struct rogue_fwif_tracebuf_space, assert_buf, 16); +SIZE_CHECK(struct rogue_fwif_tracebuf_space, 424); + +OFFSET_CHECK(struct rogue_fwif_tracebuf, log_type, 0); +OFFSET_CHECK(struct rogue_fwif_tracebuf, tracebuf, 8); +OFFSET_CHECK(struct rogue_fwif_tracebuf, tracebuf_size_in_dwords, 856); +OFFSET_CHECK(struct rogue_fwif_tracebuf, tracebuf_flags, 860); +SIZE_CHECK(struct rogue_fwif_tracebuf, 864); + +OFFSET_CHECK(struct rogue_fw_fault_info, cr_timer, 0); +OFFSET_CHECK(struct rogue_fw_fault_info, os_timer, 8); +OFFSET_CHECK(struct rogue_fw_fault_info, data, 16); +OFFSET_CHECK(struct rogue_fw_fault_info, reserved, 20); +OFFSET_CHECK(struct rogue_fw_fault_info, fault_buf, 24); +SIZE_CHECK(struct rogue_fw_fault_info, 432); + +OFFSET_CHECK(struct rogue_fwif_sysdata, config_flags, 0); +OFFSET_CHECK(struct rogue_fwif_sysdata, config_flags_ext, 4); +OFFSET_CHECK(struct rogue_fwif_sysdata, pow_state, 8); +OFFSET_CHECK(struct rogue_fwif_sysdata, hw_perf_ridx, 12); +OFFSET_CHECK(struct rogue_fwif_sysdata, hw_perf_widx, 16); +OFFSET_CHECK(struct rogue_fwif_sysdata, hw_perf_wrap_count, 20); +OFFSET_CHECK(struct rogue_fwif_sysdata, hw_perf_size, 24); +OFFSET_CHECK(struct rogue_fwif_sysdata, hw_perf_drop_count, 28); +OFFSET_CHECK(struct rogue_fwif_sysdata, hw_perf_ut, 32); +OFFSET_CHECK(struct rogue_fwif_sysdata, first_drop_ordinal, 36); +OFFSET_CHECK(struct rogue_fwif_sysdata, last_drop_ordinal, 40); +OFFSET_CHECK(struct rogue_fwif_sysdata, os_runtime_flags_mirror, 44); +OFFSET_CHECK(struct rogue_fwif_sysdata, fault_info, 80); +OFFSET_CHECK(struct rogue_fwif_sysdata, fw_faults, 3536); +OFFSET_CHECK(struct rogue_fwif_sysdata, cr_poll_addr, 3540); +OFFSET_CHECK(struct rogue_fwif_sysdata, cr_poll_mask, 3548); +OFFSET_CHECK(struct rogue_fwif_sysdata, cr_poll_count, 3556); +OFFSET_CHECK(struct rogue_fwif_sysdata, start_idle_time, 3568); +OFFSET_CHECK(struct rogue_fwif_sysdata, hwr_state_flags, 3576); +OFFSET_CHECK(struct rogue_fwif_sysdata, hwr_recovery_flags, 3580); +OFFSET_CHECK(struct rogue_fwif_sysdata, fw_sys_data_flags, 3616); +OFFSET_CHECK(struct rogue_fwif_sysdata, mc_config, 3620); +SIZE_CHECK(struct rogue_fwif_sysdata, 3624); + +OFFSET_CHECK(struct rogue_fwif_slr_entry, timestamp, 0); +OFFSET_CHECK(struct rogue_fwif_slr_entry, fw_ctx_addr, 8); +OFFSET_CHECK(struct rogue_fwif_slr_entry, num_ufos, 12); +OFFSET_CHECK(struct rogue_fwif_slr_entry, ccb_name, 16); +SIZE_CHECK(struct rogue_fwif_slr_entry, 48); + +OFFSET_CHECK(struct rogue_fwif_osdata, fw_os_config_flags, 0); +OFFSET_CHECK(struct rogue_fwif_osdata, fw_sync_check_mark, 4); +OFFSET_CHECK(struct rogue_fwif_osdata, host_sync_check_mark, 8); +OFFSET_CHECK(struct rogue_fwif_osdata, forced_updates_requested, 12); +OFFSET_CHECK(struct rogue_fwif_osdata, slr_log_wp, 16); +OFFSET_CHECK(struct rogue_fwif_osdata, slr_log_first, 24); +OFFSET_CHECK(struct rogue_fwif_osdata, slr_log, 72); +OFFSET_CHECK(struct rogue_fwif_osdata, last_forced_update_time, 552); +OFFSET_CHECK(struct rogue_fwif_osdata, interrupt_count, 560); +OFFSET_CHECK(struct rogue_fwif_osdata, kccb_cmds_executed, 568); +OFFSET_CHECK(struct rogue_fwif_osdata, power_sync_fw_addr, 572); +OFFSET_CHECK(struct rogue_fwif_osdata, fw_os_data_flags, 576); +SIZE_CHECK(struct rogue_fwif_osdata, 584); + +OFFSET_CHECK(struct rogue_bifinfo, bif_req_status, 0); +OFFSET_CHECK(struct rogue_bifinfo, bif_mmu_status, 8); +OFFSET_CHECK(struct rogue_bifinfo, pc_address, 16); +OFFSET_CHECK(struct rogue_bifinfo, reserved, 24); +SIZE_CHECK(struct rogue_bifinfo, 32); + +OFFSET_CHECK(struct rogue_eccinfo, fault_gpu, 0); +SIZE_CHECK(struct rogue_eccinfo, 4); + +OFFSET_CHECK(struct rogue_mmuinfo, mmu_status, 0); +OFFSET_CHECK(struct rogue_mmuinfo, pc_address, 16); +OFFSET_CHECK(struct rogue_mmuinfo, reserved, 24); +SIZE_CHECK(struct rogue_mmuinfo, 32); + +OFFSET_CHECK(struct rogue_pollinfo, thread_num, 0); +OFFSET_CHECK(struct rogue_pollinfo, cr_poll_addr, 4); +OFFSET_CHECK(struct rogue_pollinfo, cr_poll_mask, 8); +OFFSET_CHECK(struct rogue_pollinfo, cr_poll_last_value, 12); +OFFSET_CHECK(struct rogue_pollinfo, reserved, 16); +SIZE_CHECK(struct rogue_pollinfo, 24); + +OFFSET_CHECK(struct rogue_tlbinfo, bad_addr, 0); +OFFSET_CHECK(struct rogue_tlbinfo, entry_lo, 4); +SIZE_CHECK(struct rogue_tlbinfo, 8); + +OFFSET_CHECK(struct rogue_hwrinfo, hwr_data, 0); +OFFSET_CHECK(struct rogue_hwrinfo, cr_timer, 32); +OFFSET_CHECK(struct rogue_hwrinfo, os_timer, 40); +OFFSET_CHECK(struct rogue_hwrinfo, frame_num, 48); +OFFSET_CHECK(struct rogue_hwrinfo, pid, 52); +OFFSET_CHECK(struct rogue_hwrinfo, active_hwrt_data, 56); +OFFSET_CHECK(struct rogue_hwrinfo, hwr_number, 60); +OFFSET_CHECK(struct rogue_hwrinfo, event_status, 64); +OFFSET_CHECK(struct rogue_hwrinfo, hwr_recovery_flags, 68); +OFFSET_CHECK(struct rogue_hwrinfo, hwr_type, 72); +OFFSET_CHECK(struct rogue_hwrinfo, dm, 76); +OFFSET_CHECK(struct rogue_hwrinfo, core_id, 80); +OFFSET_CHECK(struct rogue_hwrinfo, cr_time_of_kick, 88); +OFFSET_CHECK(struct rogue_hwrinfo, cr_time_hw_reset_start, 96); +OFFSET_CHECK(struct rogue_hwrinfo, cr_time_hw_reset_finish, 104); +OFFSET_CHECK(struct rogue_hwrinfo, cr_time_freelist_ready, 112); +OFFSET_CHECK(struct rogue_hwrinfo, reserved, 120); +SIZE_CHECK(struct rogue_hwrinfo, 136); + +OFFSET_CHECK(struct rogue_fwif_hwrinfobuf, hwr_info, 0); +OFFSET_CHECK(struct rogue_fwif_hwrinfobuf, hwr_counter, 2176); +OFFSET_CHECK(struct rogue_fwif_hwrinfobuf, write_index, 2180); +OFFSET_CHECK(struct rogue_fwif_hwrinfobuf, dd_req_count, 2184); +OFFSET_CHECK(struct rogue_fwif_hwrinfobuf, hwr_info_buf_flags, 2188); +OFFSET_CHECK(struct rogue_fwif_hwrinfobuf, hwr_dm_locked_up_count, 2192); +OFFSET_CHECK(struct rogue_fwif_hwrinfobuf, hwr_dm_overran_count, 2228); +OFFSET_CHECK(struct rogue_fwif_hwrinfobuf, hwr_dm_recovered_count, 2264); +OFFSET_CHECK(struct rogue_fwif_hwrinfobuf, hwr_dm_false_detect_count, 2300); +SIZE_CHECK(struct rogue_fwif_hwrinfobuf, 2336); + +OFFSET_CHECK(struct rogue_fwif_fwmemcontext, pc_dev_paddr, 0); +OFFSET_CHECK(struct rogue_fwif_fwmemcontext, page_cat_base_reg_set, 8); +OFFSET_CHECK(struct rogue_fwif_fwmemcontext, breakpoint_addr, 12); +OFFSET_CHECK(struct rogue_fwif_fwmemcontext, bp_handler_addr, 16); +OFFSET_CHECK(struct rogue_fwif_fwmemcontext, breakpoint_ctl, 20); +OFFSET_CHECK(struct rogue_fwif_fwmemcontext, fw_mem_ctx_flags, 24); +SIZE_CHECK(struct rogue_fwif_fwmemcontext, 32); + +OFFSET_CHECK(struct rogue_fwif_geom_ctx_state_per_geom, geom_reg_vdm_call_stack_pointer, 0); +OFFSET_CHECK(struct rogue_fwif_geom_ctx_state_per_geom, geom_reg_vdm_call_stack_pointer_init, 8); +OFFSET_CHECK(struct rogue_fwif_geom_ctx_state_per_geom, geom_reg_vbs_so_prim, 16); +OFFSET_CHECK(struct rogue_fwif_geom_ctx_state_per_geom, geom_current_idx, 32); +SIZE_CHECK(struct rogue_fwif_geom_ctx_state_per_geom, 40); + +OFFSET_CHECK(struct rogue_fwif_geom_ctx_state, geom_core, 0); +SIZE_CHECK(struct rogue_fwif_geom_ctx_state, 160); + +OFFSET_CHECK(struct rogue_fwif_frag_ctx_state, frag_reg_pm_deallocated_mask_status, 0); +OFFSET_CHECK(struct rogue_fwif_frag_ctx_state, frag_reg_dm_pds_mtilefree_status, 4); +OFFSET_CHECK(struct rogue_fwif_frag_ctx_state, ctx_state_flags, 8); +OFFSET_CHECK(struct rogue_fwif_frag_ctx_state, frag_reg_isp_store, 12); +SIZE_CHECK(struct rogue_fwif_frag_ctx_state, 16); + +OFFSET_CHECK(struct rogue_fwif_compute_ctx_state, ctx_state_flags, 0); +SIZE_CHECK(struct rogue_fwif_compute_ctx_state, 4); + +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, ccbctl_fw_addr, 0); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, ccb_fw_addr, 4); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, ccb_meta_dma_addr, 8); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, context_state_addr, 24); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, fw_com_ctx_flags, 28); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, priority, 32); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, priority_seq_num, 36); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, rf_cmd_addr, 40); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, stats_pending, 44); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, stats_num_stores, 48); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, stats_num_out_of_memory, 52); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, stats_num_partial_renders, 56); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, dm, 60); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, wait_signal_address, 64); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, wait_signal_node, 72); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, buf_stalled_node, 80); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, cbuf_queue_ctrl_addr, 88); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, robustness_address, 96); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, max_deadline_ms, 104); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, read_offset_needs_reset, 108); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, waiting_node, 112); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, run_node, 120); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, last_failed_ufo, 128); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, fw_mem_context_fw_addr, 136); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, server_common_context_id, 140); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, pid, 144); +OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, geom_oom_disabled, 148); +SIZE_CHECK(struct rogue_fwif_fwcommoncontext, 152); + +OFFSET_CHECK(struct rogue_fwif_ccb_ctl, write_offset, 0); +OFFSET_CHECK(struct rogue_fwif_ccb_ctl, padding, 4); +OFFSET_CHECK(struct rogue_fwif_ccb_ctl, read_offset, 128); +OFFSET_CHECK(struct rogue_fwif_ccb_ctl, wrap_mask, 132); +OFFSET_CHECK(struct rogue_fwif_ccb_ctl, cmd_size, 136); +OFFSET_CHECK(struct rogue_fwif_ccb_ctl, padding2, 140); +SIZE_CHECK(struct rogue_fwif_ccb_ctl, 144); + +OFFSET_CHECK(struct rogue_fwif_kccb_cmd_kick_data, context_fw_addr, 0); +OFFSET_CHECK(struct rogue_fwif_kccb_cmd_kick_data, client_woff_update, 4); +OFFSET_CHECK(struct rogue_fwif_kccb_cmd_kick_data, client_wrap_mask_update, 8); +OFFSET_CHECK(struct rogue_fwif_kccb_cmd_kick_data, num_cleanup_ctl, 12); +OFFSET_CHECK(struct rogue_fwif_kccb_cmd_kick_data, cleanup_ctl_fw_addr, 16); +OFFSET_CHECK(struct rogue_fwif_kccb_cmd_kick_data, work_est_cmd_header_offset, 28); +SIZE_CHECK(struct rogue_fwif_kccb_cmd_kick_data, 32); + +OFFSET_CHECK(struct rogue_fwif_kccb_cmd_combined_geom_frag_kick_data, geom_cmd_kick_data, 0); +OFFSET_CHECK(struct rogue_fwif_kccb_cmd_combined_geom_frag_kick_data, frag_cmd_kick_data, 32); +SIZE_CHECK(struct rogue_fwif_kccb_cmd_combined_geom_frag_kick_data, 64); + +OFFSET_CHECK(struct rogue_fwif_kccb_cmd_force_update_data, context_fw_addr, 0); +OFFSET_CHECK(struct rogue_fwif_kccb_cmd_force_update_data, ccb_fence_offset, 4); +SIZE_CHECK(struct rogue_fwif_kccb_cmd_force_update_data, 8); + +OFFSET_CHECK(struct rogue_fwif_cleanup_request, cleanup_type, 0); +OFFSET_CHECK(struct rogue_fwif_cleanup_request, cleanup_data, 4); +SIZE_CHECK(struct rogue_fwif_cleanup_request, 8); + +OFFSET_CHECK(struct rogue_fwif_power_request, pow_type, 0); +OFFSET_CHECK(struct rogue_fwif_power_request, power_req_data, 4); +SIZE_CHECK(struct rogue_fwif_power_request, 8); + +OFFSET_CHECK(struct rogue_fwif_slcflushinvaldata, context_fw_addr, 0); +OFFSET_CHECK(struct rogue_fwif_slcflushinvaldata, inval, 4); +OFFSET_CHECK(struct rogue_fwif_slcflushinvaldata, dm_context, 8); +OFFSET_CHECK(struct rogue_fwif_slcflushinvaldata, address, 16); +OFFSET_CHECK(struct rogue_fwif_slcflushinvaldata, size, 24); +SIZE_CHECK(struct rogue_fwif_slcflushinvaldata, 32); + +OFFSET_CHECK(struct rogue_fwif_hwperf_ctrl, opcode, 0); +OFFSET_CHECK(struct rogue_fwif_hwperf_ctrl, mask, 8); +SIZE_CHECK(struct rogue_fwif_hwperf_ctrl, 16); + +OFFSET_CHECK(struct rogue_fwif_hwperf_config_enable_blks, num_blocks, 0); +OFFSET_CHECK(struct rogue_fwif_hwperf_config_enable_blks, block_configs_fw_addr, 4); +SIZE_CHECK(struct rogue_fwif_hwperf_config_enable_blks, 8); + +OFFSET_CHECK(struct rogue_fwif_hwperf_config_da_blks, num_blocks, 0); +OFFSET_CHECK(struct rogue_fwif_hwperf_config_da_blks, block_configs_fw_addr, 4); +SIZE_CHECK(struct rogue_fwif_hwperf_config_da_blks, 8); + +OFFSET_CHECK(struct rogue_fwif_coreclkspeedchange_data, new_clock_speed, 0); +SIZE_CHECK(struct rogue_fwif_coreclkspeedchange_data, 4); + +OFFSET_CHECK(struct rogue_fwif_hwperf_ctrl_blks, enable, 0); +OFFSET_CHECK(struct rogue_fwif_hwperf_ctrl_blks, num_blocks, 4); +OFFSET_CHECK(struct rogue_fwif_hwperf_ctrl_blks, block_ids, 8); +SIZE_CHECK(struct rogue_fwif_hwperf_ctrl_blks, 40); + +OFFSET_CHECK(struct rogue_fwif_hwperf_select_custom_cntrs, custom_block, 0); +OFFSET_CHECK(struct rogue_fwif_hwperf_select_custom_cntrs, num_counters, 2); +OFFSET_CHECK(struct rogue_fwif_hwperf_select_custom_cntrs, custom_counter_ids_fw_addr, 4); +SIZE_CHECK(struct rogue_fwif_hwperf_select_custom_cntrs, 8); + +OFFSET_CHECK(struct rogue_fwif_zsbuffer_backing_data, zs_buffer_fw_addr, 0); +OFFSET_CHECK(struct rogue_fwif_zsbuffer_backing_data, done, 4); +SIZE_CHECK(struct rogue_fwif_zsbuffer_backing_data, 8); + +OFFSET_CHECK(struct rogue_fwif_freelist_gs_data, freelist_fw_addr, 0); +OFFSET_CHECK(struct rogue_fwif_freelist_gs_data, delta_pages, 4); +OFFSET_CHECK(struct rogue_fwif_freelist_gs_data, new_pages, 8); +OFFSET_CHECK(struct rogue_fwif_freelist_gs_data, ready_pages, 12); +SIZE_CHECK(struct rogue_fwif_freelist_gs_data, 16); + +OFFSET_CHECK(struct rogue_fwif_freelists_reconstruction_data, freelist_count, 0); +OFFSET_CHECK(struct rogue_fwif_freelists_reconstruction_data, freelist_ids, 4); +SIZE_CHECK(struct rogue_fwif_freelists_reconstruction_data, 76); + +OFFSET_CHECK(struct rogue_fwif_write_offset_update_data, context_fw_addr, 0); +SIZE_CHECK(struct rogue_fwif_write_offset_update_data, 8); + +OFFSET_CHECK(struct rogue_fwif_kccb_cmd, cmd_type, 0); +OFFSET_CHECK(struct rogue_fwif_kccb_cmd, kccb_flags, 4); +OFFSET_CHECK(struct rogue_fwif_kccb_cmd, cmd_data, 8); +SIZE_CHECK(struct rogue_fwif_kccb_cmd, 88); + +OFFSET_CHECK(struct rogue_fwif_fwccb_cmd_context_reset_data, server_common_context_id, 0); +OFFSET_CHECK(struct rogue_fwif_fwccb_cmd_context_reset_data, reset_reason, 4); +OFFSET_CHECK(struct rogue_fwif_fwccb_cmd_context_reset_data, dm, 8); +OFFSET_CHECK(struct rogue_fwif_fwccb_cmd_context_reset_data, reset_job_ref, 12); +OFFSET_CHECK(struct rogue_fwif_fwccb_cmd_context_reset_data, flags, 16); +OFFSET_CHECK(struct rogue_fwif_fwccb_cmd_context_reset_data, pc_address, 24); +OFFSET_CHECK(struct rogue_fwif_fwccb_cmd_context_reset_data, fault_address, 32); +SIZE_CHECK(struct rogue_fwif_fwccb_cmd_context_reset_data, 40); + +OFFSET_CHECK(struct rogue_fwif_fwccb_cmd_fw_pagefault_data, fw_fault_addr, 0); +SIZE_CHECK(struct rogue_fwif_fwccb_cmd_fw_pagefault_data, 8); + +OFFSET_CHECK(struct rogue_fwif_fwccb_cmd, cmd_type, 0); +OFFSET_CHECK(struct rogue_fwif_fwccb_cmd, fwccb_flags, 4); +OFFSET_CHECK(struct rogue_fwif_fwccb_cmd, cmd_data, 8); +SIZE_CHECK(struct rogue_fwif_fwccb_cmd, 88); + +OFFSET_CHECK(struct rogue_fwif_ccb_cmd_header, cmd_type, 0); +OFFSET_CHECK(struct rogue_fwif_ccb_cmd_header, cmd_size, 4); +OFFSET_CHECK(struct rogue_fwif_ccb_cmd_header, ext_job_ref, 8); +OFFSET_CHECK(struct rogue_fwif_ccb_cmd_header, int_job_ref, 12); +OFFSET_CHECK(struct rogue_fwif_ccb_cmd_header, work_est_kick_data, 16); +SIZE_CHECK(struct rogue_fwif_ccb_cmd_header, 40); + +OFFSET_CHECK(struct rogue_fwif_runtime_cfg, active_pm_latency_ms, 0); +OFFSET_CHECK(struct rogue_fwif_runtime_cfg, runtime_cfg_flags, 4); +OFFSET_CHECK(struct rogue_fwif_runtime_cfg, active_pm_latency_persistant, 8); +OFFSET_CHECK(struct rogue_fwif_runtime_cfg, core_clock_speed, 12); +OFFSET_CHECK(struct rogue_fwif_runtime_cfg, default_dusts_num_init, 16); +OFFSET_CHECK(struct rogue_fwif_runtime_cfg, phr_mode, 20); +OFFSET_CHECK(struct rogue_fwif_runtime_cfg, hcs_deadline_ms, 24); +OFFSET_CHECK(struct rogue_fwif_runtime_cfg, wdg_period_us, 28); +OFFSET_CHECK(struct rogue_fwif_runtime_cfg, osid_priority, 32); +OFFSET_CHECK(struct rogue_fwif_runtime_cfg, hwperf_buf_fw_addr, 64); +OFFSET_CHECK(struct rogue_fwif_runtime_cfg, padding, 68); +SIZE_CHECK(struct rogue_fwif_runtime_cfg, 72); + +OFFSET_CHECK(struct rogue_fwif_connection_ctl, connection_fw_state, 0); +OFFSET_CHECK(struct rogue_fwif_connection_ctl, connection_os_state, 4); +OFFSET_CHECK(struct rogue_fwif_connection_ctl, alive_fw_token, 8); +OFFSET_CHECK(struct rogue_fwif_connection_ctl, alive_os_token, 12); +SIZE_CHECK(struct rogue_fwif_connection_ctl, 16); + +OFFSET_CHECK(struct rogue_fwif_compchecks_bvnc, layout_version, 0); +OFFSET_CHECK(struct rogue_fwif_compchecks_bvnc, bvnc, 8); +SIZE_CHECK(struct rogue_fwif_compchecks_bvnc, 16); + +OFFSET_CHECK(struct rogue_fwif_init_options, os_count_support, 0); +SIZE_CHECK(struct rogue_fwif_init_options, 8); + +OFFSET_CHECK(struct rogue_fwif_compchecks, hw_bvnc, 0); +OFFSET_CHECK(struct rogue_fwif_compchecks, fw_bvnc, 16); +OFFSET_CHECK(struct rogue_fwif_compchecks, fw_processor_version, 32); +OFFSET_CHECK(struct rogue_fwif_compchecks, ddk_version, 36); +OFFSET_CHECK(struct rogue_fwif_compchecks, ddk_build, 40); +OFFSET_CHECK(struct rogue_fwif_compchecks, build_options, 44); +OFFSET_CHECK(struct rogue_fwif_compchecks, init_options, 48); +OFFSET_CHECK(struct rogue_fwif_compchecks, updated, 56); +SIZE_CHECK(struct rogue_fwif_compchecks, 64); + +OFFSET_CHECK(struct rogue_fwif_osinit, kernel_ccbctl_fw_addr, 0); +OFFSET_CHECK(struct rogue_fwif_osinit, kernel_ccb_fw_addr, 4); +OFFSET_CHECK(struct rogue_fwif_osinit, kernel_ccb_rtn_slots_fw_addr, 8); +OFFSET_CHECK(struct rogue_fwif_osinit, firmware_ccbctl_fw_addr, 12); +OFFSET_CHECK(struct rogue_fwif_osinit, firmware_ccb_fw_addr, 16); +OFFSET_CHECK(struct rogue_fwif_osinit, work_est_firmware_ccbctl_fw_addr, 20); +OFFSET_CHECK(struct rogue_fwif_osinit, work_est_firmware_ccb_fw_addr, 24); +OFFSET_CHECK(struct rogue_fwif_osinit, rogue_fwif_hwr_info_buf_ctl_fw_addr, 28); +OFFSET_CHECK(struct rogue_fwif_osinit, hwr_debug_dump_limit, 32); +OFFSET_CHECK(struct rogue_fwif_osinit, fw_os_data_fw_addr, 36); +OFFSET_CHECK(struct rogue_fwif_osinit, rogue_comp_checks, 40); +SIZE_CHECK(struct rogue_fwif_osinit, 104); + +OFFSET_CHECK(struct rogue_fwif_sigbuf_ctl, buffer_fw_addr, 0); +OFFSET_CHECK(struct rogue_fwif_sigbuf_ctl, left_size_in_regs, 4); +SIZE_CHECK(struct rogue_fwif_sigbuf_ctl, 8); + +OFFSET_CHECK(struct pdvfs_opp, volt, 0); +OFFSET_CHECK(struct pdvfs_opp, freq, 4); +SIZE_CHECK(struct pdvfs_opp, 8); + +OFFSET_CHECK(struct rogue_fwif_pdvfs_opp, opp_values, 0); +OFFSET_CHECK(struct rogue_fwif_pdvfs_opp, min_opp_point, 128); +OFFSET_CHECK(struct rogue_fwif_pdvfs_opp, max_opp_point, 132); +SIZE_CHECK(struct rogue_fwif_pdvfs_opp, 136); + +OFFSET_CHECK(struct rogue_fwif_counter_dump_ctl, buffer_fw_addr, 0); +OFFSET_CHECK(struct rogue_fwif_counter_dump_ctl, size_in_dwords, 4); +SIZE_CHECK(struct rogue_fwif_counter_dump_ctl, 8); + +OFFSET_CHECK(struct rogue_hwperf_bvnc, bvnc_string, 0); +OFFSET_CHECK(struct rogue_hwperf_bvnc, bvnc_km_feature_flags, 24); +OFFSET_CHECK(struct rogue_hwperf_bvnc, num_bvnc_blocks, 28); +OFFSET_CHECK(struct rogue_hwperf_bvnc, bvnc_gpu_cores, 30); +OFFSET_CHECK(struct rogue_hwperf_bvnc, bvnc_blocks, 32); +SIZE_CHECK(struct rogue_hwperf_bvnc, 160); + +OFFSET_CHECK(struct rogue_fwif_sysinit, fault_phys_addr, 0); +OFFSET_CHECK(struct rogue_fwif_sysinit, pds_exec_base, 8); +OFFSET_CHECK(struct rogue_fwif_sysinit, usc_exec_base, 16); +OFFSET_CHECK(struct rogue_fwif_sysinit, fbcdc_state_table_base, 24); +OFFSET_CHECK(struct rogue_fwif_sysinit, fbcdc_large_state_table_base, 32); +OFFSET_CHECK(struct rogue_fwif_sysinit, texture_heap_base, 40); +OFFSET_CHECK(struct rogue_fwif_sysinit, hw_perf_filter, 48); +OFFSET_CHECK(struct rogue_fwif_sysinit, slc3_fence_dev_addr, 56); +OFFSET_CHECK(struct rogue_fwif_sysinit, tpu_trilinear_frac_mask, 64); +OFFSET_CHECK(struct rogue_fwif_sysinit, sigbuf_ctl, 80); +OFFSET_CHECK(struct rogue_fwif_sysinit, pdvfs_opp_info, 152); +OFFSET_CHECK(struct rogue_fwif_sysinit, coremem_data_store, 288); +OFFSET_CHECK(struct rogue_fwif_sysinit, counter_dump_ctl, 304); +OFFSET_CHECK(struct rogue_fwif_sysinit, filter_flags, 312); +OFFSET_CHECK(struct rogue_fwif_sysinit, runtime_cfg_fw_addr, 316); +OFFSET_CHECK(struct rogue_fwif_sysinit, trace_buf_ctl_fw_addr, 320); +OFFSET_CHECK(struct rogue_fwif_sysinit, fw_sys_data_fw_addr, 324); +OFFSET_CHECK(struct rogue_fwif_sysinit, gpu_util_fw_cb_ctl_fw_addr, 328); +OFFSET_CHECK(struct rogue_fwif_sysinit, reg_cfg_fw_addr, 332); +OFFSET_CHECK(struct rogue_fwif_sysinit, hwperf_ctl_fw_addr, 336); +OFFSET_CHECK(struct rogue_fwif_sysinit, align_checks, 340); +OFFSET_CHECK(struct rogue_fwif_sysinit, initial_core_clock_speed, 344); +OFFSET_CHECK(struct rogue_fwif_sysinit, active_pm_latency_ms, 348); +OFFSET_CHECK(struct rogue_fwif_sysinit, firmware_started, 352); +OFFSET_CHECK(struct rogue_fwif_sysinit, marker_val, 356); +OFFSET_CHECK(struct rogue_fwif_sysinit, firmware_started_timestamp, 360); +OFFSET_CHECK(struct rogue_fwif_sysinit, jones_disable_mask, 364); +OFFSET_CHECK(struct rogue_fwif_sysinit, firmware_perf, 368); +OFFSET_CHECK(struct rogue_fwif_sysinit, core_clock_rate_fw_addr, 372); +OFFSET_CHECK(struct rogue_fwif_sysinit, gpio_validation_mode, 376); +OFFSET_CHECK(struct rogue_fwif_sysinit, bvnc_km_feature_flags, 380); +OFFSET_CHECK(struct rogue_fwif_sysinit, tfbc_compression_control, 540); +SIZE_CHECK(struct rogue_fwif_sysinit, 544); + +OFFSET_CHECK(struct rogue_fwif_gpu_util_fwcb, time_corr, 0); +OFFSET_CHECK(struct rogue_fwif_gpu_util_fwcb, time_corr_seq_count, 10240); +OFFSET_CHECK(struct rogue_fwif_gpu_util_fwcb, gpu_util_flags, 10244); +OFFSET_CHECK(struct rogue_fwif_gpu_util_fwcb, last_word, 10248); +OFFSET_CHECK(struct rogue_fwif_gpu_util_fwcb, stats_counters, 10256); +SIZE_CHECK(struct rogue_fwif_gpu_util_fwcb, 10280); + +OFFSET_CHECK(struct rogue_fwif_rta_ctl, render_target_index, 0); +OFFSET_CHECK(struct rogue_fwif_rta_ctl, current_render_target, 4); +OFFSET_CHECK(struct rogue_fwif_rta_ctl, active_render_targets, 8); +OFFSET_CHECK(struct rogue_fwif_rta_ctl, cumul_active_render_targets, 12); +OFFSET_CHECK(struct rogue_fwif_rta_ctl, valid_render_targets_fw_addr, 16); +OFFSET_CHECK(struct rogue_fwif_rta_ctl, rta_num_partial_renders_fw_addr, 20); +OFFSET_CHECK(struct rogue_fwif_rta_ctl, max_rts, 24); +OFFSET_CHECK(struct rogue_fwif_rta_ctl, rta_ctl_flags, 28); +SIZE_CHECK(struct rogue_fwif_rta_ctl, 32); + +OFFSET_CHECK(struct rogue_fwif_freelist, freelist_dev_addr, 0); +OFFSET_CHECK(struct rogue_fwif_freelist, current_dev_addr, 8); +OFFSET_CHECK(struct rogue_fwif_freelist, current_stack_top, 16); +OFFSET_CHECK(struct rogue_fwif_freelist, max_pages, 20); +OFFSET_CHECK(struct rogue_fwif_freelist, grow_pages, 24); +OFFSET_CHECK(struct rogue_fwif_freelist, current_pages, 28); +OFFSET_CHECK(struct rogue_fwif_freelist, allocated_page_count, 32); +OFFSET_CHECK(struct rogue_fwif_freelist, allocated_mmu_page_count, 36); +OFFSET_CHECK(struct rogue_fwif_freelist, freelist_id, 40); +OFFSET_CHECK(struct rogue_fwif_freelist, grow_pending, 44); +OFFSET_CHECK(struct rogue_fwif_freelist, ready_pages, 48); +OFFSET_CHECK(struct rogue_fwif_freelist, freelist_flags, 52); +OFFSET_CHECK(struct rogue_fwif_freelist, pm_global_pb, 56); +SIZE_CHECK(struct rogue_fwif_freelist, 64); + +OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, geom_caches_need_zeroing, 0); +OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, screen_pixel_max, 4); +OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, multi_sample_ctl, 8); +OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, flipped_multi_sample_ctl, 16); +OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, tpc_stride, 24); +OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, tpc_size, 28); +OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, te_screen, 32); +OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, mtile_stride, 36); +OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, teaa, 40); +OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, te_mtile1, 44); +OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, te_mtile2, 48); +OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, isp_merge_lower_x, 52); +OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, isp_merge_lower_y, 56); +OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, isp_merge_upper_x, 60); +OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, isp_merge_upper_y, 64); +OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, isp_merge_scale_x, 68); +OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, isp_merge_scale_y, 72); +OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, rgn_header_size, 76); +OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, isp_mtile_size, 80); +SIZE_CHECK(struct rogue_fwif_hwrtdata_common, 88); + +OFFSET_CHECK(struct rogue_fwif_hwrtdata, pm_mlist_dev_addr, 0); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, vce_cat_base, 8); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, vce_last_cat_base, 40); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, te_cat_base, 72); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, te_last_cat_base, 104); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, alist_cat_base, 136); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, alist_last_cat_base, 144); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, pm_alist_stack_pointer, 152); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, pm_mlist_stack_pointer, 160); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, hwrt_data_common_fw_addr, 164); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, hwrt_data_flags, 168); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, state, 172); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, freelists_fw_addr, 176); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, freelist_hwr_snapshot, 188); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, vheap_table_dev_addr, 200); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, rta_ctl, 208); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, tail_ptrs_dev_addr, 240); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, macrotile_array_dev_addr, 248); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, rgn_header_dev_addr, 256); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, rtc_dev_addr, 264); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, owner_geom_not_used_by_host, 272); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, geom_caches_need_zeroing, 276); +OFFSET_CHECK(struct rogue_fwif_hwrtdata, cleanup_state, 320); +SIZE_CHECK(struct rogue_fwif_hwrtdata, 384); + +OFFSET_CHECK(struct rogue_fwif_sync_checkpoint, state, 0); +OFFSET_CHECK(struct rogue_fwif_sync_checkpoint, fw_ref_count, 4); +SIZE_CHECK(struct rogue_fwif_sync_checkpoint, 8); + +#endif /* PVR_ROGUE_FWIF_CHECK_H */ diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_client.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_client.h new file mode 100644 index 000000000000..6e224400083a --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_client.h @@ -0,0 +1,373 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_ROGUE_FWIF_CLIENT_H +#define PVR_ROGUE_FWIF_CLIENT_H + +#include <linux/bits.h> +#include <linux/kernel.h> +#include <linux/sizes.h> +#include <linux/types.h> + +#include "pvr_rogue_fwif_shared.h" + +/* + * Page size used for Parameter Management. + */ +#define ROGUE_PM_PAGE_SIZE SZ_4K + +/* + * Minimum/Maximum PB size. + * + * Base page size is dependent on core: + * S6/S6XT/S7 = 50 pages + * S8XE = 40 pages + * S8XE with BRN66011 fixed = 25 pages + * + * Minimum PB = Base Pages + (NUM_TE_PIPES-1)*16K + (NUM_VCE_PIPES-1)*64K + + * IF_PM_PREALLOC(NUM_TE_PIPES*16K + NUM_VCE_PIPES*16K) + * + * Maximum PB size must ensure that no PM address space can be fully used, + * because if the full address space was used it would wrap and corrupt itself. + * Since there are two freelists (local is always minimum sized) this can be + * described as following three conditions being met: + * + * (Minimum PB + Maximum PB) < ALIST PM address space size (16GB) + * (Minimum PB + Maximum PB) < TE PM address space size (16GB) / NUM_TE_PIPES + * (Minimum PB + Maximum PB) < VCE PM address space size (16GB) / NUM_VCE_PIPES + * + * Since the max of NUM_TE_PIPES and NUM_VCE_PIPES is 4, we have a hard limit + * of 4GB minus the Minimum PB. For convenience we take the smaller power-of-2 + * value of 2GB. This is far more than any current applications use. + */ +#define ROGUE_PM_MAX_FREELIST_SIZE SZ_2G + +/* + * Flags supported by the geometry DM command i.e. &struct rogue_fwif_cmd_geom. + */ + +#define ROGUE_GEOM_FLAGS_FIRSTKICK BIT_MASK(0) +#define ROGUE_GEOM_FLAGS_LASTKICK BIT_MASK(1) +/* Use single core in a multi core setup. */ +#define ROGUE_GEOM_FLAGS_SINGLE_CORE BIT_MASK(3) + +/* + * Flags supported by the fragment DM command i.e. &struct rogue_fwif_cmd_frag. + */ + +/* Use single core in a multi core setup. */ +#define ROGUE_FRAG_FLAGS_SINGLE_CORE BIT_MASK(3) +/* Indicates whether this render produces visibility results. */ +#define ROGUE_FRAG_FLAGS_GET_VIS_RESULTS BIT_MASK(5) +/* Indicates whether a depth buffer is present. */ +#define ROGUE_FRAG_FLAGS_DEPTHBUFFER BIT_MASK(7) +/* Indicates whether a stencil buffer is present. */ +#define ROGUE_FRAG_FLAGS_STENCILBUFFER BIT_MASK(8) +/* Disable pixel merging for this render. */ +#define ROGUE_FRAG_FLAGS_DISABLE_PIXELMERGE BIT_MASK(15) +/* Indicates whether a scratch buffer is present. */ +#define ROGUE_FRAG_FLAGS_SCRATCHBUFFER BIT_MASK(19) +/* Disallow compute overlapped with this render. */ +#define ROGUE_FRAG_FLAGS_PREVENT_CDM_OVERLAP BIT_MASK(26) + +/* + * Flags supported by the compute DM command i.e. &struct rogue_fwif_cmd_compute. + */ + +#define ROGUE_COMPUTE_FLAG_PREVENT_ALL_OVERLAP BIT_MASK(2) +/*!< Use single core in a multi core setup. */ +#define ROGUE_COMPUTE_FLAG_SINGLE_CORE BIT_MASK(5) + +/* + * Flags supported by the transfer DM command i.e. &struct rogue_fwif_cmd_transfer. + */ + +/*!< Use single core in a multi core setup. */ +#define ROGUE_TRANSFER_FLAGS_SINGLE_CORE BIT_MASK(1) + +/* + ************************************************ + * Parameter/HWRTData control structures. + ************************************************ + */ + +/* + * Configuration registers which need to be loaded by the firmware before a geometry + * job can be started. + */ +struct rogue_fwif_geom_regs { + u64 vdm_ctrl_stream_base; + u64 tpu_border_colour_table; + + /* Only used when feature VDM_DRAWINDIRECT present. */ + u64 vdm_draw_indirect0; + /* Only used when feature VDM_DRAWINDIRECT present. */ + u32 vdm_draw_indirect1; + + u32 ppp_ctrl; + u32 te_psg; + /* Only used when BRN 49927 present. */ + u32 tpu; + + u32 vdm_context_resume_task0_size; + /* Only used when feature VDM_OBJECT_LEVEL_LLS present. */ + u32 vdm_context_resume_task3_size; + + /* Only used when BRN 56279 or BRN 67381 present. */ + u32 pds_ctrl; + + u32 view_idx; + + /* Only used when feature TESSELLATION present */ + u32 pds_coeff_free_prog; + + u32 padding; +}; + +/* Only used when BRN 44455 or BRN 63027 present. */ +struct rogue_fwif_dummy_rgnhdr_init_geom_regs { + u64 te_psgregion_addr; +}; + +/* + * Represents a geometry command that can be used to tile a whole scene's objects as + * per TA behavior. + */ +struct rogue_fwif_cmd_geom { + /* + * rogue_fwif_cmd_geom_frag_shared field must always be at the beginning of the + * struct. + * + * The command struct (rogue_fwif_cmd_geom) is shared between Client and + * Firmware. Kernel is unable to perform read/write operations on the + * command struct, the SHARED region is the only exception from this rule. + * This region must be the first member so that Kernel can easily access it. + * For more info, see rogue_fwif_cmd_geom_frag_shared definition. + */ + struct rogue_fwif_cmd_geom_frag_shared cmd_shared; + + struct rogue_fwif_geom_regs regs __aligned(8); + u32 flags __aligned(8); + + /* + * Holds the geometry/fragment fence value to allow the fragment partial render command + * to go through. + */ + struct rogue_fwif_ufo partial_render_geom_frag_fence; + + /* Only used when BRN 44455 or BRN 63027 present. */ + struct rogue_fwif_dummy_rgnhdr_init_geom_regs dummy_rgnhdr_init_geom_regs __aligned(8); + + /* Only used when BRN 61484 or BRN 66333 present. */ + u32 brn61484_66333_live_rt; + + u32 padding; +}; + +/* + * Configuration registers which need to be loaded by the firmware before ISP + * can be started. + */ +struct rogue_fwif_frag_regs { + u32 usc_pixel_output_ctrl; + +#define ROGUE_MAXIMUM_OUTPUT_REGISTERS_PER_PIXEL 8U + u32 usc_clear_register[ROGUE_MAXIMUM_OUTPUT_REGISTERS_PER_PIXEL]; + + u32 isp_bgobjdepth; + u32 isp_bgobjvals; + u32 isp_aa; + /* Only used when feature S7_TOP_INFRASTRUCTURE present. */ + u32 isp_xtp_pipe_enable; + + u32 isp_ctl; + + /* Only used when BRN 49927 present. */ + u32 tpu; + + u32 event_pixel_pds_info; + + /* Only used when feature CLUSTER_GROUPING present. */ + u32 pixel_phantom; + + u32 view_idx; + + u32 event_pixel_pds_data; + + /* Only used when BRN 65101 present. */ + u32 brn65101_event_pixel_pds_data; + + /* Only used when feature GPU_MULTICORE_SUPPORT or BRN 47217 present. */ + u32 isp_oclqry_stride; + + /* Only used when feature ZLS_SUBTILE present. */ + u32 isp_zls_pixels; + + /* Only used when feature ISP_ZLS_D24_S8_PACKING_OGL_MODE present. */ + u32 rgx_cr_blackpearl_fix; + + /* All values below the ALIGN(8) must be 64 bit. */ + aligned_u64 isp_scissor_base; + u64 isp_dbias_base; + u64 isp_oclqry_base; + u64 isp_zlsctl; + u64 isp_zload_store_base; + u64 isp_stencil_load_store_base; + + /* + * Only used when feature FBCDC_ALGORITHM present and value < 3 or feature + * FB_CDC_V4 present. Additionally, BRNs 48754, 60227, 72310 and 72311 must + * not be present. + */ + u64 fb_cdc_zls; + +#define ROGUE_PBE_WORDS_REQUIRED_FOR_RENDERS 3U + u64 pbe_word[8U][ROGUE_PBE_WORDS_REQUIRED_FOR_RENDERS]; + u64 tpu_border_colour_table; + u64 pds_bgnd[3U]; + + /* Only used when BRN 65101 present. */ + u64 pds_bgnd_brn65101[3U]; + + u64 pds_pr_bgnd[3U]; + + /* Only used when BRN 62850 or 62865 present. */ + u64 isp_dummy_stencil_store_base; + + /* Only used when BRN 66193 present. */ + u64 isp_dummy_depth_store_base; + + /* Only used when BRN 67182 present. */ + u32 rgnhdr_single_rt_size; + /* Only used when BRN 67182 present. */ + u32 rgnhdr_scratch_offset; +}; + +struct rogue_fwif_cmd_frag { + struct rogue_fwif_cmd_geom_frag_shared cmd_shared __aligned(8); + + struct rogue_fwif_frag_regs regs __aligned(8); + /* command control flags. */ + u32 flags; + /* Stride IN BYTES for Z-Buffer in case of RTAs. */ + u32 zls_stride; + /* Stride IN BYTES for S-Buffer in case of RTAs. */ + u32 sls_stride; + + /* Only used if feature GPU_MULTICORE_SUPPORT present. */ + u32 execute_count; +}; + +/* + * Configuration registers which need to be loaded by the firmware before CDM + * can be started. + */ +struct rogue_fwif_compute_regs { + u64 tpu_border_colour_table; + + /* Only used when feature CDM_USER_MODE_QUEUE present. */ + u64 cdm_cb_queue; + + /* Only used when feature CDM_USER_MODE_QUEUE present. */ + u64 cdm_cb_base; + /* Only used when feature CDM_USER_MODE_QUEUE present. */ + u64 cdm_cb; + + /* Only used when feature CDM_USER_MODE_QUEUE is not present. */ + u64 cdm_ctrl_stream_base; + + u64 cdm_context_state_base_addr; + + /* Only used when BRN 49927 is present. */ + u32 tpu; + u32 cdm_resume_pds1; + + /* Only used when feature COMPUTE_MORTON_CAPABLE present. */ + u32 cdm_item; + + /* Only used when feature CLUSTER_GROUPING present. */ + u32 compute_cluster; + + /* Only used when feature TPU_DM_GLOBAL_REGISTERS present. */ + u32 tpu_tag_cdm_ctrl; + + u32 padding; +}; + +struct rogue_fwif_cmd_compute { + /* Common command attributes */ + struct rogue_fwif_cmd_common common __aligned(8); + + /* CDM registers */ + struct rogue_fwif_compute_regs regs; + + /* Control flags */ + u32 flags __aligned(8); + + /* Only used when feature UNIFIED_STORE_VIRTUAL_PARTITIONING present. */ + u32 num_temp_regions; + + /* Only used when feature CDM_USER_MODE_QUEUE present. */ + u32 stream_start_offset; + + /* Only used when feature GPU_MULTICORE_SUPPORT present. */ + u32 execute_count; +}; + +struct rogue_fwif_transfer_regs { + /* + * All 32 bit values should be added in the top section. This then requires only a + * single RGXFW_ALIGN to align all the 64 bit values in the second section. + */ + u32 isp_bgobjvals; + + u32 usc_pixel_output_ctrl; + u32 usc_clear_register0; + u32 usc_clear_register1; + u32 usc_clear_register2; + u32 usc_clear_register3; + + u32 isp_mtile_size; + u32 isp_render_origin; + u32 isp_ctl; + + /* Only used when feature S7_TOP_INFRASTRUCTURE present. */ + u32 isp_xtp_pipe_enable; + u32 isp_aa; + + u32 event_pixel_pds_info; + + u32 event_pixel_pds_code; + u32 event_pixel_pds_data; + + u32 isp_render; + u32 isp_rgn; + + /* Only used when feature GPU_MULTICORE_SUPPORT present. */ + u32 frag_screen; + + /* All values below the aligned_u64 must be 64 bit. */ + aligned_u64 pds_bgnd0_base; + u64 pds_bgnd1_base; + u64 pds_bgnd3_sizeinfo; + + u64 isp_mtile_base; +#define ROGUE_PBE_WORDS_REQUIRED_FOR_TQS 3 + /* TQ_MAX_RENDER_TARGETS * PBE_STATE_SIZE */ + u64 pbe_wordx_mrty[3U * ROGUE_PBE_WORDS_REQUIRED_FOR_TQS]; +}; + +struct rogue_fwif_cmd_transfer { + /* Common command attributes */ + struct rogue_fwif_cmd_common common __aligned(8); + + struct rogue_fwif_transfer_regs regs __aligned(8); + + u32 flags; + + u32 padding; +}; + +#include "pvr_rogue_fwif_client_check.h" + +#endif /* PVR_ROGUE_FWIF_CLIENT_H */ diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_client_check.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_client_check.h new file mode 100644 index 000000000000..54aa4474163e --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_client_check.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_ROGUE_FWIF_CLIENT_CHECK_H +#define PVR_ROGUE_FWIF_CLIENT_CHECK_H + +#include <linux/build_bug.h> + +#define OFFSET_CHECK(type, member, offset) \ + static_assert(offsetof(type, member) == (offset), \ + "offsetof(" #type ", " #member ") incorrect") + +#define SIZE_CHECK(type, size) \ + static_assert(sizeof(type) == (size), #type " is incorrect size") + +OFFSET_CHECK(struct rogue_fwif_geom_regs, vdm_ctrl_stream_base, 0); +OFFSET_CHECK(struct rogue_fwif_geom_regs, tpu_border_colour_table, 8); +OFFSET_CHECK(struct rogue_fwif_geom_regs, vdm_draw_indirect0, 16); +OFFSET_CHECK(struct rogue_fwif_geom_regs, vdm_draw_indirect1, 24); +OFFSET_CHECK(struct rogue_fwif_geom_regs, ppp_ctrl, 28); +OFFSET_CHECK(struct rogue_fwif_geom_regs, te_psg, 32); +OFFSET_CHECK(struct rogue_fwif_geom_regs, tpu, 36); +OFFSET_CHECK(struct rogue_fwif_geom_regs, vdm_context_resume_task0_size, 40); +OFFSET_CHECK(struct rogue_fwif_geom_regs, vdm_context_resume_task3_size, 44); +OFFSET_CHECK(struct rogue_fwif_geom_regs, pds_ctrl, 48); +OFFSET_CHECK(struct rogue_fwif_geom_regs, view_idx, 52); +OFFSET_CHECK(struct rogue_fwif_geom_regs, pds_coeff_free_prog, 56); +SIZE_CHECK(struct rogue_fwif_geom_regs, 64); + +OFFSET_CHECK(struct rogue_fwif_dummy_rgnhdr_init_geom_regs, te_psgregion_addr, 0); +SIZE_CHECK(struct rogue_fwif_dummy_rgnhdr_init_geom_regs, 8); + +OFFSET_CHECK(struct rogue_fwif_cmd_geom, cmd_shared, 0); +OFFSET_CHECK(struct rogue_fwif_cmd_geom, regs, 16); +OFFSET_CHECK(struct rogue_fwif_cmd_geom, flags, 80); +OFFSET_CHECK(struct rogue_fwif_cmd_geom, partial_render_geom_frag_fence, 84); +OFFSET_CHECK(struct rogue_fwif_cmd_geom, dummy_rgnhdr_init_geom_regs, 96); +OFFSET_CHECK(struct rogue_fwif_cmd_geom, brn61484_66333_live_rt, 104); +SIZE_CHECK(struct rogue_fwif_cmd_geom, 112); + +OFFSET_CHECK(struct rogue_fwif_frag_regs, usc_pixel_output_ctrl, 0); +OFFSET_CHECK(struct rogue_fwif_frag_regs, usc_clear_register, 4); +OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_bgobjdepth, 36); +OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_bgobjvals, 40); +OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_aa, 44); +OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_xtp_pipe_enable, 48); +OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_ctl, 52); +OFFSET_CHECK(struct rogue_fwif_frag_regs, tpu, 56); +OFFSET_CHECK(struct rogue_fwif_frag_regs, event_pixel_pds_info, 60); +OFFSET_CHECK(struct rogue_fwif_frag_regs, pixel_phantom, 64); +OFFSET_CHECK(struct rogue_fwif_frag_regs, view_idx, 68); +OFFSET_CHECK(struct rogue_fwif_frag_regs, event_pixel_pds_data, 72); +OFFSET_CHECK(struct rogue_fwif_frag_regs, brn65101_event_pixel_pds_data, 76); +OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_oclqry_stride, 80); +OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_zls_pixels, 84); +OFFSET_CHECK(struct rogue_fwif_frag_regs, rgx_cr_blackpearl_fix, 88); +OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_scissor_base, 96); +OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_dbias_base, 104); +OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_oclqry_base, 112); +OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_zlsctl, 120); +OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_zload_store_base, 128); +OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_stencil_load_store_base, 136); +OFFSET_CHECK(struct rogue_fwif_frag_regs, fb_cdc_zls, 144); +OFFSET_CHECK(struct rogue_fwif_frag_regs, pbe_word, 152); +OFFSET_CHECK(struct rogue_fwif_frag_regs, tpu_border_colour_table, 344); +OFFSET_CHECK(struct rogue_fwif_frag_regs, pds_bgnd, 352); +OFFSET_CHECK(struct rogue_fwif_frag_regs, pds_bgnd_brn65101, 376); +OFFSET_CHECK(struct rogue_fwif_frag_regs, pds_pr_bgnd, 400); +OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_dummy_stencil_store_base, 424); +OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_dummy_depth_store_base, 432); +OFFSET_CHECK(struct rogue_fwif_frag_regs, rgnhdr_single_rt_size, 440); +OFFSET_CHECK(struct rogue_fwif_frag_regs, rgnhdr_scratch_offset, 444); +SIZE_CHECK(struct rogue_fwif_frag_regs, 448); + +OFFSET_CHECK(struct rogue_fwif_cmd_frag, cmd_shared, 0); +OFFSET_CHECK(struct rogue_fwif_cmd_frag, regs, 16); +OFFSET_CHECK(struct rogue_fwif_cmd_frag, flags, 464); +OFFSET_CHECK(struct rogue_fwif_cmd_frag, zls_stride, 468); +OFFSET_CHECK(struct rogue_fwif_cmd_frag, sls_stride, 472); +OFFSET_CHECK(struct rogue_fwif_cmd_frag, execute_count, 476); +SIZE_CHECK(struct rogue_fwif_cmd_frag, 480); + +OFFSET_CHECK(struct rogue_fwif_compute_regs, tpu_border_colour_table, 0); +OFFSET_CHECK(struct rogue_fwif_compute_regs, cdm_cb_queue, 8); +OFFSET_CHECK(struct rogue_fwif_compute_regs, cdm_cb_base, 16); +OFFSET_CHECK(struct rogue_fwif_compute_regs, cdm_cb, 24); +OFFSET_CHECK(struct rogue_fwif_compute_regs, cdm_ctrl_stream_base, 32); +OFFSET_CHECK(struct rogue_fwif_compute_regs, cdm_context_state_base_addr, 40); +OFFSET_CHECK(struct rogue_fwif_compute_regs, tpu, 48); +OFFSET_CHECK(struct rogue_fwif_compute_regs, cdm_resume_pds1, 52); +OFFSET_CHECK(struct rogue_fwif_compute_regs, cdm_item, 56); +OFFSET_CHECK(struct rogue_fwif_compute_regs, compute_cluster, 60); +OFFSET_CHECK(struct rogue_fwif_compute_regs, tpu_tag_cdm_ctrl, 64); +SIZE_CHECK(struct rogue_fwif_compute_regs, 72); + +OFFSET_CHECK(struct rogue_fwif_cmd_compute, common, 0); +OFFSET_CHECK(struct rogue_fwif_cmd_compute, regs, 8); +OFFSET_CHECK(struct rogue_fwif_cmd_compute, flags, 80); +OFFSET_CHECK(struct rogue_fwif_cmd_compute, num_temp_regions, 84); +OFFSET_CHECK(struct rogue_fwif_cmd_compute, stream_start_offset, 88); +OFFSET_CHECK(struct rogue_fwif_cmd_compute, execute_count, 92); +SIZE_CHECK(struct rogue_fwif_cmd_compute, 96); + +OFFSET_CHECK(struct rogue_fwif_transfer_regs, isp_bgobjvals, 0); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, usc_pixel_output_ctrl, 4); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, usc_clear_register0, 8); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, usc_clear_register1, 12); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, usc_clear_register2, 16); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, usc_clear_register3, 20); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, isp_mtile_size, 24); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, isp_render_origin, 28); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, isp_ctl, 32); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, isp_xtp_pipe_enable, 36); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, isp_aa, 40); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, event_pixel_pds_info, 44); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, event_pixel_pds_code, 48); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, event_pixel_pds_data, 52); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, isp_render, 56); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, isp_rgn, 60); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, frag_screen, 64); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, pds_bgnd0_base, 72); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, pds_bgnd1_base, 80); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, pds_bgnd3_sizeinfo, 88); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, isp_mtile_base, 96); +OFFSET_CHECK(struct rogue_fwif_transfer_regs, pbe_wordx_mrty, 104); +SIZE_CHECK(struct rogue_fwif_transfer_regs, 176); + +OFFSET_CHECK(struct rogue_fwif_cmd_transfer, common, 0); +OFFSET_CHECK(struct rogue_fwif_cmd_transfer, regs, 8); +OFFSET_CHECK(struct rogue_fwif_cmd_transfer, flags, 184); +SIZE_CHECK(struct rogue_fwif_cmd_transfer, 192); + +#endif /* PVR_ROGUE_FWIF_CLIENT_CHECK_H */ diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_common.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_common.h new file mode 100644 index 000000000000..6ebb95ba98a6 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_common.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_ROGUE_FWIF_COMMON_H +#define PVR_ROGUE_FWIF_COMMON_H + +#include <linux/build_bug.h> + +/* + * This macro represents a mask of LSBs that must be zero on data structure + * sizes and offsets to ensure they are 8-byte granular on types shared between + * the FW and host driver. + */ +#define PVR_FW_ALIGNMENT_LSB 7U + +/* Macro to test structure size alignment. */ +#define PVR_FW_STRUCT_SIZE_ASSERT(_a) \ + static_assert((sizeof(_a) & PVR_FW_ALIGNMENT_LSB) == 0U, \ + "Size of " #_a " is not properly aligned") + +/* The master definition for data masters known to the firmware. */ + +#define PVR_FWIF_DM_GP (0) +/* Either TDM or 2D DM is present. */ +/* When the 'tla' feature is present in the hw (as per @pvr_device_features). */ +#define PVR_FWIF_DM_2D (1) +/* + * When the 'fastrender_dm' feature is present in the hw (as per + * @pvr_device_features). + */ +#define PVR_FWIF_DM_TDM (1) + +#define PVR_FWIF_DM_GEOM (2) +#define PVR_FWIF_DM_FRAG (3) +#define PVR_FWIF_DM_CDM (4) +#define PVR_FWIF_DM_RAY (5) +#define PVR_FWIF_DM_GEOM2 (6) +#define PVR_FWIF_DM_GEOM3 (7) +#define PVR_FWIF_DM_GEOM4 (8) + +#define PVR_FWIF_DM_LAST PVR_FWIF_DM_GEOM4 + +/* Maximum number of DM in use: GP, 2D/TDM, GEOM, 3D, CDM, RAY, GEOM2, GEOM3, GEOM4 */ +#define PVR_FWIF_DM_MAX (PVR_FWIF_DM_LAST + 1U) + +/* GPU Utilisation states */ +#define PVR_FWIF_GPU_UTIL_STATE_IDLE 0U +#define PVR_FWIF_GPU_UTIL_STATE_ACTIVE 1U +#define PVR_FWIF_GPU_UTIL_STATE_BLOCKED 2U +#define PVR_FWIF_GPU_UTIL_STATE_NUM 3U +#define PVR_FWIF_GPU_UTIL_STATE_MASK 0x3ULL + +/* + * Maximum amount of register writes that can be done by the register + * programmer (FW or META DMA). This is not a HW limitation, it is only + * a protection against malformed inputs to the register programmer. + */ +#define PVR_MAX_NUM_REGISTER_PROGRAMMER_WRITES 128U + +#endif /* PVR_ROGUE_FWIF_COMMON_H */ diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_dev_info.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_dev_info.h new file mode 100644 index 000000000000..168277bce948 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_dev_info.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef __PVR_ROGUE_FWIF_DEV_INFO_H__ +#define __PVR_ROGUE_FWIF_DEV_INFO_H__ + +enum { + PVR_FW_HAS_BRN_44079 = 0, + PVR_FW_HAS_BRN_47217, + PVR_FW_HAS_BRN_48492, + PVR_FW_HAS_BRN_48545, + PVR_FW_HAS_BRN_49927, + PVR_FW_HAS_BRN_50767, + PVR_FW_HAS_BRN_51764, + PVR_FW_HAS_BRN_62269, + PVR_FW_HAS_BRN_63142, + PVR_FW_HAS_BRN_63553, + PVR_FW_HAS_BRN_66011, + PVR_FW_HAS_BRN_71242, + + PVR_FW_HAS_BRN_MAX +}; + +enum { + PVR_FW_HAS_ERN_35421 = 0, + PVR_FW_HAS_ERN_38020, + PVR_FW_HAS_ERN_38748, + PVR_FW_HAS_ERN_42064, + PVR_FW_HAS_ERN_42290, + PVR_FW_HAS_ERN_42606, + PVR_FW_HAS_ERN_47025, + PVR_FW_HAS_ERN_57596, + + PVR_FW_HAS_ERN_MAX +}; + +enum { + PVR_FW_HAS_FEATURE_AXI_ACELITE = 0, + PVR_FW_HAS_FEATURE_CDM_CONTROL_STREAM_FORMAT, + PVR_FW_HAS_FEATURE_CLUSTER_GROUPING, + PVR_FW_HAS_FEATURE_COMMON_STORE_SIZE_IN_DWORDS, + PVR_FW_HAS_FEATURE_COMPUTE, + PVR_FW_HAS_FEATURE_COMPUTE_MORTON_CAPABLE, + PVR_FW_HAS_FEATURE_COMPUTE_OVERLAP, + PVR_FW_HAS_FEATURE_COREID_PER_OS, + PVR_FW_HAS_FEATURE_DYNAMIC_DUST_POWER, + PVR_FW_HAS_FEATURE_ECC_RAMS, + PVR_FW_HAS_FEATURE_FBCDC, + PVR_FW_HAS_FEATURE_FBCDC_ALGORITHM, + PVR_FW_HAS_FEATURE_FBCDC_ARCHITECTURE, + PVR_FW_HAS_FEATURE_FBC_MAX_DEFAULT_DESCRIPTORS, + PVR_FW_HAS_FEATURE_FBC_MAX_LARGE_DESCRIPTORS, + PVR_FW_HAS_FEATURE_FB_CDC_V4, + PVR_FW_HAS_FEATURE_GPU_MULTICORE_SUPPORT, + PVR_FW_HAS_FEATURE_GPU_VIRTUALISATION, + PVR_FW_HAS_FEATURE_GS_RTA_SUPPORT, + PVR_FW_HAS_FEATURE_IRQ_PER_OS, + PVR_FW_HAS_FEATURE_ISP_MAX_TILES_IN_FLIGHT, + PVR_FW_HAS_FEATURE_ISP_SAMPLES_PER_PIXEL, + PVR_FW_HAS_FEATURE_ISP_ZLS_D24_S8_PACKING_OGL_MODE, + PVR_FW_HAS_FEATURE_LAYOUT_MARS, + PVR_FW_HAS_FEATURE_MAX_PARTITIONS, + PVR_FW_HAS_FEATURE_META, + PVR_FW_HAS_FEATURE_META_COREMEM_SIZE, + PVR_FW_HAS_FEATURE_MIPS, + PVR_FW_HAS_FEATURE_NUM_CLUSTERS, + PVR_FW_HAS_FEATURE_NUM_ISP_IPP_PIPES, + PVR_FW_HAS_FEATURE_NUM_OSIDS, + PVR_FW_HAS_FEATURE_NUM_RASTER_PIPES, + PVR_FW_HAS_FEATURE_PBE2_IN_XE, + PVR_FW_HAS_FEATURE_PBVNC_COREID_REG, + PVR_FW_HAS_FEATURE_PERFBUS, + PVR_FW_HAS_FEATURE_PERF_COUNTER_BATCH, + PVR_FW_HAS_FEATURE_PHYS_BUS_WIDTH, + PVR_FW_HAS_FEATURE_RISCV_FW_PROCESSOR, + PVR_FW_HAS_FEATURE_ROGUEXE, + PVR_FW_HAS_FEATURE_S7_TOP_INFRASTRUCTURE, + PVR_FW_HAS_FEATURE_SIMPLE_INTERNAL_PARAMETER_FORMAT, + PVR_FW_HAS_FEATURE_SIMPLE_INTERNAL_PARAMETER_FORMAT_V2, + PVR_FW_HAS_FEATURE_SIMPLE_PARAMETER_FORMAT_VERSION, + PVR_FW_HAS_FEATURE_SLC_BANKS, + PVR_FW_HAS_FEATURE_SLC_CACHE_LINE_SIZE_BITS, + PVR_FW_HAS_FEATURE_SLC_SIZE_CONFIGURABLE, + PVR_FW_HAS_FEATURE_SLC_SIZE_IN_KILOBYTES, + PVR_FW_HAS_FEATURE_SOC_TIMER, + PVR_FW_HAS_FEATURE_SYS_BUS_SECURE_RESET, + PVR_FW_HAS_FEATURE_TESSELLATION, + PVR_FW_HAS_FEATURE_TILE_REGION_PROTECTION, + PVR_FW_HAS_FEATURE_TILE_SIZE_X, + PVR_FW_HAS_FEATURE_TILE_SIZE_Y, + PVR_FW_HAS_FEATURE_TLA, + PVR_FW_HAS_FEATURE_TPU_CEM_DATAMASTER_GLOBAL_REGISTERS, + PVR_FW_HAS_FEATURE_TPU_DM_GLOBAL_REGISTERS, + PVR_FW_HAS_FEATURE_TPU_FILTERING_MODE_CONTROL, + PVR_FW_HAS_FEATURE_USC_MIN_OUTPUT_REGISTERS_PER_PIX, + PVR_FW_HAS_FEATURE_VDM_DRAWINDIRECT, + PVR_FW_HAS_FEATURE_VDM_OBJECT_LEVEL_LLS, + PVR_FW_HAS_FEATURE_VIRTUAL_ADDRESS_SPACE_BITS, + PVR_FW_HAS_FEATURE_WATCHDOG_TIMER, + PVR_FW_HAS_FEATURE_WORKGROUP_PROTECTION, + PVR_FW_HAS_FEATURE_XE_ARCHITECTURE, + PVR_FW_HAS_FEATURE_XE_MEMORY_HIERARCHY, + PVR_FW_HAS_FEATURE_XE_TPU2, + PVR_FW_HAS_FEATURE_XPU_MAX_REGBANKS_ADDR_WIDTH, + PVR_FW_HAS_FEATURE_XPU_MAX_SLAVES, + PVR_FW_HAS_FEATURE_XPU_REGISTER_BROADCAST, + PVR_FW_HAS_FEATURE_XT_TOP_INFRASTRUCTURE, + PVR_FW_HAS_FEATURE_ZLS_SUBTILE, + + PVR_FW_HAS_FEATURE_MAX +}; + +#endif /* __PVR_ROGUE_FWIF_DEV_INFO_H__ */ diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_resetframework.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_resetframework.h new file mode 100644 index 000000000000..1db1f4c532bc --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_resetframework.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_ROGUE_FWIF_RESETFRAMEWORK_H +#define PVR_ROGUE_FWIF_RESETFRAMEWORK_H + +#include <linux/bits.h> +#include <linux/types.h> + +#include "pvr_rogue_fwif_shared.h" + +struct rogue_fwif_rf_registers { + union { + u64 cdmreg_cdm_cb_base; + u64 cdmreg_cdm_ctrl_stream_base; + }; + u64 cdmreg_cdm_cb_queue; + u64 cdmreg_cdm_cb; +}; + +struct rogue_fwif_rf_cmd { + /* THIS MUST BE THE LAST MEMBER OF THE CONTAINING STRUCTURE */ + struct rogue_fwif_rf_registers fw_registers __aligned(8); +}; + +#define ROGUE_FWIF_RF_CMD_SIZE sizeof(struct rogue_fwif_rf_cmd) + +#endif /* PVR_ROGUE_FWIF_RESETFRAMEWORK_H */ diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_sf.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_sf.h new file mode 100644 index 000000000000..56e11009e123 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_sf.h @@ -0,0 +1,1648 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_ROGUE_FWIF_SF_H +#define PVR_ROGUE_FWIF_SF_H + +/* + ****************************************************************************** + * *DO*NOT* rearrange or delete lines in rogue_fw_log_sfgroups or stid_fmts + * WILL BREAK fw tracing message compatibility with previous + * fw versions. Only add new ones, if so required. + ****************************************************************************** + */ + +/* Available log groups. */ +enum rogue_fw_log_sfgroups { + ROGUE_FW_GROUP_NULL, + ROGUE_FW_GROUP_MAIN, + ROGUE_FW_GROUP_CLEANUP, + ROGUE_FW_GROUP_CSW, + ROGUE_FW_GROUP_PM, + ROGUE_FW_GROUP_RTD, + ROGUE_FW_GROUP_SPM, + ROGUE_FW_GROUP_MTS, + ROGUE_FW_GROUP_BIF, + ROGUE_FW_GROUP_MISC, + ROGUE_FW_GROUP_POW, + ROGUE_FW_GROUP_HWR, + ROGUE_FW_GROUP_HWP, + ROGUE_FW_GROUP_RPM, + ROGUE_FW_GROUP_DMA, + ROGUE_FW_GROUP_DBG, +}; + +#define PVR_SF_STRING_MAX_SIZE 256U + +/* pair of string format id and string formats */ +struct rogue_fw_stid_fmt { + u32 id; + char name[PVR_SF_STRING_MAX_SIZE]; +}; + +/* + * The symbolic names found in the table above are assigned an u32 value of + * the following format: + * 31 30 28 27 20 19 16 15 12 11 0 bits + * - --- ---- ---- ---- ---- ---- ---- ---- + * 0-11: id number + * 12-15: group id number + * 16-19: number of parameters + * 20-27: unused + * 28-30: active: identify SF packet, otherwise regular int32 + * 31: reserved for signed/unsigned compatibility + * + * The following macro assigns those values to the enum generated SF ids list. + */ +#define ROGUE_FW_LOG_IDMARKER (0x70000000U) +#define ROGUE_FW_LOG_CREATESFID(a, b, e) ((u32)(a) | ((u32)(b) << 12) | ((u32)(e) << 16) | \ + ROGUE_FW_LOG_IDMARKER) + +#define ROGUE_FW_LOG_IDMASK (0xFFF00000) +#define ROGUE_FW_LOG_VALIDID(I) (((I) & ROGUE_FW_LOG_IDMASK) == ROGUE_FW_LOG_IDMARKER) + +/* Return the group id that the given (enum generated) id belongs to */ +#define ROGUE_FW_SF_GID(x) (((u32)(x) >> 12) & 0xfU) +/* Returns how many arguments the SF(string format) for the given (enum generated) id requires */ +#define ROGUE_FW_SF_PARAMNUM(x) (((u32)(x) >> 16) & 0xfU) + +/* pair of string format id and string formats */ +struct rogue_km_stid_fmt { + u32 id; + const char *name; +}; + +static const struct rogue_km_stid_fmt stid_fmts[] = { + { ROGUE_FW_LOG_CREATESFID(0, ROGUE_FW_GROUP_NULL, 0), + "You should not use this string" }, + + { ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_MAIN, 6), + "Kick 3D: FWCtx 0x%08.8x @ %d, RTD 0x%08x. Partial render:%d, CSW resume:%d, prio:%d" }, + { ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_MAIN, 2), + "3D finished, HWRTData0State=%x, HWRTData1State=%x" }, + { ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_MAIN, 4), + "Kick 3D TQ: FWCtx 0x%08.8x @ %d, CSW resume:%d, prio: %d" }, + { ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_MAIN, 0), + "3D Transfer finished" }, + { ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_MAIN, 3), + "Kick Compute: FWCtx 0x%08.8x @ %d, prio: %d" }, + { ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_MAIN, 0), + "Compute finished" }, + { ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_MAIN, 7), + "Kick TA: FWCtx 0x%08.8x @ %d, RTD 0x%08x. First kick:%d, Last kick:%d, CSW resume:%d, prio:%d" }, + { ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_MAIN, 0), + "TA finished" }, + { ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_MAIN, 0), + "Restart TA after partial render" }, + { ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_MAIN, 0), + "Resume TA without partial render" }, + { ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_MAIN, 2), + "Out of memory! Context 0x%08x, HWRTData 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_MAIN, 3), + "Kick TLA: FWCtx 0x%08.8x @ %d, prio:%d" }, + { ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_MAIN, 0), + "TLA finished" }, + { ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_MAIN, 3), + "cCCB Woff update = %d, DM = %d, FWCtx = 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_MAIN, 2), + "UFO Checks for FWCtx 0x%08.8x @ %d" }, + { ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_MAIN, 3), + "UFO Check: [0x%08.8x] is 0x%08.8x requires 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_MAIN, 0), + "UFO Checks succeeded" }, + { ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_MAIN, 3), + "UFO PR-Check: [0x%08.8x] is 0x%08.8x requires >= 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_MAIN, 1), + "UFO SPM PR-Checks for FWCtx 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_MAIN, 4), + "UFO SPM special PR-Check: [0x%08.8x] is 0x%08.8x requires >= ????????, [0x%08.8x] is ???????? requires 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_MAIN, 2), + "UFO Updates for FWCtx 0x%08.8x @ %d" }, + { ROGUE_FW_LOG_CREATESFID(23, ROGUE_FW_GROUP_MAIN, 2), + "UFO Update: [0x%08.8x] = 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_MAIN, 1), + "ASSERT Failed: line %d of:" }, + { ROGUE_FW_LOG_CREATESFID(25, ROGUE_FW_GROUP_MAIN, 2), + "HWR: Lockup detected on DM%d, FWCtx: 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(26, ROGUE_FW_GROUP_MAIN, 3), + "HWR: Reset fw state for DM%d, FWCtx: 0x%08.8x, MemCtx: 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(27, ROGUE_FW_GROUP_MAIN, 0), + "HWR: Reset HW" }, + { ROGUE_FW_LOG_CREATESFID(28, ROGUE_FW_GROUP_MAIN, 0), + "HWR: Lockup recovered." }, + { ROGUE_FW_LOG_CREATESFID(29, ROGUE_FW_GROUP_MAIN, 1), + "HWR: False lockup detected for DM%u" }, + { ROGUE_FW_LOG_CREATESFID(30, ROGUE_FW_GROUP_MAIN, 3), + "Alignment check %d failed: host = 0x%x, fw = 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(31, ROGUE_FW_GROUP_MAIN, 0), + "GP USC triggered" }, + { ROGUE_FW_LOG_CREATESFID(32, ROGUE_FW_GROUP_MAIN, 2), + "Overallocating %u temporary registers and %u shared registers for breakpoint handler" }, + { ROGUE_FW_LOG_CREATESFID(33, ROGUE_FW_GROUP_MAIN, 1), + "Setting breakpoint: Addr 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(34, ROGUE_FW_GROUP_MAIN, 0), + "Store breakpoint state" }, + { ROGUE_FW_LOG_CREATESFID(35, ROGUE_FW_GROUP_MAIN, 0), + "Unsetting BP Registers" }, + { ROGUE_FW_LOG_CREATESFID(36, ROGUE_FW_GROUP_MAIN, 1), + "Active RTs expected to be zero, actually %u" }, + { ROGUE_FW_LOG_CREATESFID(37, ROGUE_FW_GROUP_MAIN, 1), + "RTC present, %u active render targets" }, + { ROGUE_FW_LOG_CREATESFID(38, ROGUE_FW_GROUP_MAIN, 1), + "Estimated Power 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(39, ROGUE_FW_GROUP_MAIN, 1), + "RTA render target %u" }, + { ROGUE_FW_LOG_CREATESFID(40, ROGUE_FW_GROUP_MAIN, 2), + "Kick RTA render %u of %u" }, + { ROGUE_FW_LOG_CREATESFID(41, ROGUE_FW_GROUP_MAIN, 3), + "HWR sizes check %d failed: addresses = %d, sizes = %d" }, + { ROGUE_FW_LOG_CREATESFID(42, ROGUE_FW_GROUP_MAIN, 1), + "Pow: DUSTS_ENABLE = 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(43, ROGUE_FW_GROUP_MAIN, 2), + "Pow: On(1)/Off(0): %d, Units: 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(44, ROGUE_FW_GROUP_MAIN, 2), + "Pow: Changing number of dusts from %d to %d" }, + { ROGUE_FW_LOG_CREATESFID(45, ROGUE_FW_GROUP_MAIN, 0), + "Pow: Sidekick ready to be powered down" }, + { ROGUE_FW_LOG_CREATESFID(46, ROGUE_FW_GROUP_MAIN, 2), + "Pow: Request to change num of dusts to %d (bPowRascalDust=%d)" }, + { ROGUE_FW_LOG_CREATESFID(47, ROGUE_FW_GROUP_MAIN, 0), + "No ZS Buffer used for partial render (store)" }, + { ROGUE_FW_LOG_CREATESFID(48, ROGUE_FW_GROUP_MAIN, 0), + "No Depth/Stencil Buffer used for partial render (load)" }, + { ROGUE_FW_LOG_CREATESFID(49, ROGUE_FW_GROUP_MAIN, 2), + "HWR: Lock-up DM%d FWCtx: 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(50, ROGUE_FW_GROUP_MAIN, 7), + "MLIST%d checker: CatBase TE=0x%08x (%d Pages), VCE=0x%08x (%d Pages), ALIST=0x%08x, IsTA=%d" }, + { ROGUE_FW_LOG_CREATESFID(51, ROGUE_FW_GROUP_MAIN, 3), + "MLIST%d checker: MList[%d] = 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(52, ROGUE_FW_GROUP_MAIN, 1), + "MLIST%d OK" }, + { ROGUE_FW_LOG_CREATESFID(53, ROGUE_FW_GROUP_MAIN, 1), + "MLIST%d is empty" }, + { ROGUE_FW_LOG_CREATESFID(54, ROGUE_FW_GROUP_MAIN, 8), + "MLIST%d checker: CatBase TE=0x%08x%08x, VCE=0x%08x%08x, ALIST=0x%08x%08x, IsTA=%d" }, + { ROGUE_FW_LOG_CREATESFID(55, ROGUE_FW_GROUP_MAIN, 0), + "3D OQ flush kick" }, + { ROGUE_FW_LOG_CREATESFID(56, ROGUE_FW_GROUP_MAIN, 1), + "HWPerf block ID (0x%x) unsupported by device" }, + { ROGUE_FW_LOG_CREATESFID(57, ROGUE_FW_GROUP_MAIN, 2), + "Setting breakpoint: Addr 0x%08.8x DM%u" }, + { ROGUE_FW_LOG_CREATESFID(58, ROGUE_FW_GROUP_MAIN, 3), + "Kick RTU: FWCtx 0x%08.8x @ %d, prio: %d" }, + { ROGUE_FW_LOG_CREATESFID(59, ROGUE_FW_GROUP_MAIN, 1), + "RDM finished on context %u" }, + { ROGUE_FW_LOG_CREATESFID(60, ROGUE_FW_GROUP_MAIN, 3), + "Kick SHG: FWCtx 0x%08.8x @ %d, prio: %d" }, + { ROGUE_FW_LOG_CREATESFID(61, ROGUE_FW_GROUP_MAIN, 0), + "SHG finished" }, + { ROGUE_FW_LOG_CREATESFID(62, ROGUE_FW_GROUP_MAIN, 1), + "FBA finished on context %u" }, + { ROGUE_FW_LOG_CREATESFID(63, ROGUE_FW_GROUP_MAIN, 0), + "UFO Checks failed" }, + { ROGUE_FW_LOG_CREATESFID(64, ROGUE_FW_GROUP_MAIN, 1), + "Kill DM%d start" }, + { ROGUE_FW_LOG_CREATESFID(65, ROGUE_FW_GROUP_MAIN, 1), + "Kill DM%d complete" }, + { ROGUE_FW_LOG_CREATESFID(66, ROGUE_FW_GROUP_MAIN, 2), + "FC%u cCCB Woff update = %u" }, + { ROGUE_FW_LOG_CREATESFID(67, ROGUE_FW_GROUP_MAIN, 4), + "Kick RTU: FWCtx 0x%08.8x @ %d, prio: %d, Frame Context: %d" }, + { ROGUE_FW_LOG_CREATESFID(68, ROGUE_FW_GROUP_MAIN, 0), + "GPU init" }, + { ROGUE_FW_LOG_CREATESFID(69, ROGUE_FW_GROUP_MAIN, 1), + "GPU Units init (# mask: 0x%x)" }, + { ROGUE_FW_LOG_CREATESFID(70, ROGUE_FW_GROUP_MAIN, 3), + "Register access cycles: read: %d cycles, write: %d cycles, iterations: %d" }, + { ROGUE_FW_LOG_CREATESFID(71, ROGUE_FW_GROUP_MAIN, 3), + "Register configuration added. Address: 0x%x Value: 0x%x%x" }, + { ROGUE_FW_LOG_CREATESFID(72, ROGUE_FW_GROUP_MAIN, 1), + "Register configuration applied to type %d. (0:pow on, 1:Rascal/dust init, 2-5: TA,3D,CDM,TLA, 6:All)" }, + { ROGUE_FW_LOG_CREATESFID(73, ROGUE_FW_GROUP_MAIN, 0), + "Perform TPC flush." }, + { ROGUE_FW_LOG_CREATESFID(74, ROGUE_FW_GROUP_MAIN, 0), + "GPU has locked up (see HWR logs for more info)" }, + { ROGUE_FW_LOG_CREATESFID(75, ROGUE_FW_GROUP_MAIN, 0), + "HWR has been triggered - GPU has overrun its deadline (see HWR logs)" }, + { ROGUE_FW_LOG_CREATESFID(76, ROGUE_FW_GROUP_MAIN, 0), + "HWR has been triggered - GPU has failed a poll (see HWR logs)" }, + { ROGUE_FW_LOG_CREATESFID(77, ROGUE_FW_GROUP_MAIN, 1), + "Doppler out of memory event for FC %u" }, + { ROGUE_FW_LOG_CREATESFID(78, ROGUE_FW_GROUP_MAIN, 3), + "UFO SPM special PR-Check: [0x%08.8x] is 0x%08.8x requires >= 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(79, ROGUE_FW_GROUP_MAIN, 3), + "UFO SPM special PR-Check: [0x%08.8x] is 0x%08.8x requires 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(80, ROGUE_FW_GROUP_MAIN, 1), + "TIMESTAMP -> [0x%08.8x]" }, + { ROGUE_FW_LOG_CREATESFID(81, ROGUE_FW_GROUP_MAIN, 2), + "UFO RMW Updates for FWCtx 0x%08.8x @ %d" }, + { ROGUE_FW_LOG_CREATESFID(82, ROGUE_FW_GROUP_MAIN, 2), + "UFO Update: [0x%08.8x] = 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(83, ROGUE_FW_GROUP_MAIN, 2), + "Kick Null cmd: FWCtx 0x%08.8x @ %d" }, + { ROGUE_FW_LOG_CREATESFID(84, ROGUE_FW_GROUP_MAIN, 2), + "RPM Out of memory! Context 0x%08x, SH requestor %d" }, + { ROGUE_FW_LOG_CREATESFID(85, ROGUE_FW_GROUP_MAIN, 4), + "Discard RTU due to RPM abort: FWCtx 0x%08.8x @ %d, prio: %d, Frame Context: %d" }, + { ROGUE_FW_LOG_CREATESFID(86, ROGUE_FW_GROUP_MAIN, 4), + "Deferring DM%u from running context 0x%08x @ %d (deferred DMs = 0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(87, ROGUE_FW_GROUP_MAIN, 4), + "Deferring DM%u from running context 0x%08x @ %d to let other deferred DMs run (deferred DMs = 0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(88, ROGUE_FW_GROUP_MAIN, 4), + "No longer deferring DM%u from running context = 0x%08x @ %d (deferred DMs = 0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(89, ROGUE_FW_GROUP_MAIN, 3), + "FWCCB for DM%u is full, we will have to wait for space! (Roff = %u, Woff = %u)" }, + { ROGUE_FW_LOG_CREATESFID(90, ROGUE_FW_GROUP_MAIN, 3), + "FWCCB for OSid %u is full, we will have to wait for space! (Roff = %u, Woff = %u)" }, + { ROGUE_FW_LOG_CREATESFID(91, ROGUE_FW_GROUP_MAIN, 1), + "Host Sync Partition marker: %d" }, + { ROGUE_FW_LOG_CREATESFID(92, ROGUE_FW_GROUP_MAIN, 1), + "Host Sync Partition repeat: %d" }, + { ROGUE_FW_LOG_CREATESFID(93, ROGUE_FW_GROUP_MAIN, 1), + "Core clock set to %d Hz" }, + { ROGUE_FW_LOG_CREATESFID(94, ROGUE_FW_GROUP_MAIN, 7), + "Compute Queue: FWCtx 0x%08.8x, prio: %d, queue: 0x%08x%08x (Roff = %u, Woff = %u, Size = %u)" }, + { ROGUE_FW_LOG_CREATESFID(95, ROGUE_FW_GROUP_MAIN, 3), + "Signal check failed, Required Data: 0x%x, Address: 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(96, ROGUE_FW_GROUP_MAIN, 5), + "Signal update, Snoop Filter: %u, MMU Ctx: %u, Signal Id: %u, Signals Base: 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(97, ROGUE_FW_GROUP_MAIN, 4), + "Signalled the previously waiting FWCtx: 0x%08.8x, OSId: %u, Signal Address: 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(98, ROGUE_FW_GROUP_MAIN, 0), + "Compute stalled" }, + { ROGUE_FW_LOG_CREATESFID(99, ROGUE_FW_GROUP_MAIN, 3), + "Compute stalled (Roff = %u, Woff = %u, Size = %u)" }, + { ROGUE_FW_LOG_CREATESFID(100, ROGUE_FW_GROUP_MAIN, 3), + "Compute resumed (Roff = %u, Woff = %u, Size = %u)" }, + { ROGUE_FW_LOG_CREATESFID(101, ROGUE_FW_GROUP_MAIN, 4), + "Signal update notification from the host, PC Physical Address: 0x%08x%08x, Signal Virtual Address: 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(102, ROGUE_FW_GROUP_MAIN, 4), + "Signal update from DM: %u, OSId: %u, PC Physical Address: 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(103, ROGUE_FW_GROUP_MAIN, 1), + "DM: %u signal check failed" }, + { ROGUE_FW_LOG_CREATESFID(104, ROGUE_FW_GROUP_MAIN, 3), + "Kick TDM: FWCtx 0x%08.8x @ %d, prio:%d" }, + { ROGUE_FW_LOG_CREATESFID(105, ROGUE_FW_GROUP_MAIN, 0), + "TDM finished" }, + { ROGUE_FW_LOG_CREATESFID(106, ROGUE_FW_GROUP_MAIN, 4), + "MMU_PM_CAT_BASE_TE[%d]_PIPE[%d]: 0x%08x 0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(107, ROGUE_FW_GROUP_MAIN, 0), + "BRN 54141 HIT" }, + { ROGUE_FW_LOG_CREATESFID(108, ROGUE_FW_GROUP_MAIN, 0), + "BRN 54141 Dummy TA kicked" }, + { ROGUE_FW_LOG_CREATESFID(109, ROGUE_FW_GROUP_MAIN, 0), + "BRN 54141 resume TA" }, + { ROGUE_FW_LOG_CREATESFID(110, ROGUE_FW_GROUP_MAIN, 0), + "BRN 54141 double hit after applying WA" }, + { ROGUE_FW_LOG_CREATESFID(111, ROGUE_FW_GROUP_MAIN, 2), + "BRN 54141 Dummy TA VDM base address: 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(112, ROGUE_FW_GROUP_MAIN, 4), + "Signal check failed, Required Data: 0x%x, Current Data: 0x%x, Address: 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(113, ROGUE_FW_GROUP_MAIN, 2), + "TDM stalled (Roff = %u, Woff = %u)" }, + { ROGUE_FW_LOG_CREATESFID(114, ROGUE_FW_GROUP_MAIN, 1), + "Write Offset update notification for stalled FWCtx 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(115, ROGUE_FW_GROUP_MAIN, 3), + "Changing OSid %d's priority from %u to %u" }, + { ROGUE_FW_LOG_CREATESFID(116, ROGUE_FW_GROUP_MAIN, 0), + "Compute resumed" }, + { ROGUE_FW_LOG_CREATESFID(117, ROGUE_FW_GROUP_MAIN, 7), + "Kick TLA: FWCtx 0x%08.8x @ %d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(118, ROGUE_FW_GROUP_MAIN, 7), + "Kick TDM: FWCtx 0x%08.8x @ %d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(119, ROGUE_FW_GROUP_MAIN, 11), + "Kick TA: FWCtx 0x%08.8x @ %d, RTD 0x%08x, First kick:%d, Last kick:%d, CSW resume:%d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(120, ROGUE_FW_GROUP_MAIN, 10), + "Kick 3D: FWCtx 0x%08.8x @ %d, RTD 0x%08x, Partial render:%d, CSW resume:%d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(121, ROGUE_FW_GROUP_MAIN, 8), + "Kick 3D TQ: FWCtx 0x%08.8x @ %d, CSW resume:%d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(122, ROGUE_FW_GROUP_MAIN, 6), + "Kick Compute: FWCtx 0x%08.8x @ %d. (PID:%d, prio:%d, ext:0x%08x, int:0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(123, ROGUE_FW_GROUP_MAIN, 8), + "Kick RTU: FWCtx 0x%08.8x @ %d, Frame Context:%d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(124, ROGUE_FW_GROUP_MAIN, 7), + "Kick SHG: FWCtx 0x%08.8x @ %d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(125, ROGUE_FW_GROUP_MAIN, 1), + "Reconfigure CSRM: special coeff support enable %d." }, + { ROGUE_FW_LOG_CREATESFID(127, ROGUE_FW_GROUP_MAIN, 1), + "TA requires max coeff mode, deferring: %d." }, + { ROGUE_FW_LOG_CREATESFID(128, ROGUE_FW_GROUP_MAIN, 1), + "3D requires max coeff mode, deferring: %d." }, + { ROGUE_FW_LOG_CREATESFID(129, ROGUE_FW_GROUP_MAIN, 1), + "Kill DM%d failed" }, + { ROGUE_FW_LOG_CREATESFID(130, ROGUE_FW_GROUP_MAIN, 2), + "Thread Queue is full, we will have to wait for space! (Roff = %u, Woff = %u)" }, + { ROGUE_FW_LOG_CREATESFID(131, ROGUE_FW_GROUP_MAIN, 3), + "Thread Queue is fencing, we are waiting for Roff = %d (Roff = %u, Woff = %u)" }, + { ROGUE_FW_LOG_CREATESFID(132, ROGUE_FW_GROUP_MAIN, 1), + "DM %d failed to Context Switch on time. Triggered HCS (see HWR logs)." }, + { ROGUE_FW_LOG_CREATESFID(133, ROGUE_FW_GROUP_MAIN, 1), + "HCS changed to %d ms" }, + { ROGUE_FW_LOG_CREATESFID(134, ROGUE_FW_GROUP_MAIN, 4), + "Updating Tiles In Flight (Dusts=%d, PartitionMask=0x%08x, ISPCtl=0x%08x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(135, ROGUE_FW_GROUP_MAIN, 2), + " Phantom %d: USCTiles=%d" }, + { ROGUE_FW_LOG_CREATESFID(136, ROGUE_FW_GROUP_MAIN, 0), + "Isolation grouping is disabled" }, + { ROGUE_FW_LOG_CREATESFID(137, ROGUE_FW_GROUP_MAIN, 1), + "Isolation group configured with a priority threshold of %d" }, + { ROGUE_FW_LOG_CREATESFID(138, ROGUE_FW_GROUP_MAIN, 1), + "OS %d has come online" }, + { ROGUE_FW_LOG_CREATESFID(139, ROGUE_FW_GROUP_MAIN, 1), + "OS %d has gone offline" }, + { ROGUE_FW_LOG_CREATESFID(140, ROGUE_FW_GROUP_MAIN, 4), + "Signalled the previously stalled FWCtx: 0x%08.8x, OSId: %u, Signal Address: 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(141, ROGUE_FW_GROUP_MAIN, 7), + "TDM Queue: FWCtx 0x%08.8x, prio: %d, queue: 0x%08x%08x (Roff = %u, Woff = %u, Size = %u)" }, + { ROGUE_FW_LOG_CREATESFID(142, ROGUE_FW_GROUP_MAIN, 6), + "Reset TDM Queue Read Offset: FWCtx 0x%08.8x, queue: 0x%08x%08x (Roff = %u becomes 0, Woff = %u, Size = %u)" }, + { ROGUE_FW_LOG_CREATESFID(143, ROGUE_FW_GROUP_MAIN, 5), + "User Mode Queue mismatched stream start: FWCtx 0x%08.8x, queue: 0x%08x%08x (Roff = %u, StreamStartOffset = %u)" }, + { ROGUE_FW_LOG_CREATESFID(144, ROGUE_FW_GROUP_MAIN, 0), + "GPU deinit" }, + { ROGUE_FW_LOG_CREATESFID(145, ROGUE_FW_GROUP_MAIN, 0), + "GPU units deinit" }, + { ROGUE_FW_LOG_CREATESFID(146, ROGUE_FW_GROUP_MAIN, 2), + "Initialised OS %d with config flags 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(147, ROGUE_FW_GROUP_MAIN, 2), + "UFO limit exceeded %d/%d" }, + { ROGUE_FW_LOG_CREATESFID(148, ROGUE_FW_GROUP_MAIN, 0), + "3D Dummy stencil store" }, + { ROGUE_FW_LOG_CREATESFID(149, ROGUE_FW_GROUP_MAIN, 3), + "Initialised OS %d with config flags 0x%08x and extended config flags 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(150, ROGUE_FW_GROUP_MAIN, 1), + "Unknown Command (eCmdType=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(151, ROGUE_FW_GROUP_MAIN, 4), + "UFO forced update: FWCtx 0x%08.8x @ %d [0x%08.8x] = 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(152, ROGUE_FW_GROUP_MAIN, 5), + "UFO forced update NOP: FWCtx 0x%08.8x @ %d [0x%08.8x] = 0x%08.8x, reason %d" }, + { ROGUE_FW_LOG_CREATESFID(153, ROGUE_FW_GROUP_MAIN, 3), + "TDM context switch check: Roff %u points to 0x%08x, Match=%u" }, + { ROGUE_FW_LOG_CREATESFID(154, ROGUE_FW_GROUP_MAIN, 6), + "OSid %d CCB init status: %d (1-ok 0-fail): kCCBCtl@0x%x kCCB@0x%x fwCCBCtl@0x%x fwCCB@0x%x" }, + { ROGUE_FW_LOG_CREATESFID(155, ROGUE_FW_GROUP_MAIN, 2), + "FW IRQ # %u @ %u" }, + { ROGUE_FW_LOG_CREATESFID(156, ROGUE_FW_GROUP_MAIN, 3), + "Setting breakpoint: Addr 0x%08.8x DM%u usc_breakpoint_ctrl_dm = %u" }, + { ROGUE_FW_LOG_CREATESFID(157, ROGUE_FW_GROUP_MAIN, 3), + "Invalid KCCB setup for OSid %u: KCCB 0x%08x, KCCB Ctrl 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(158, ROGUE_FW_GROUP_MAIN, 3), + "Invalid KCCB cmd (%u) for OSid %u @ KCCB 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(159, ROGUE_FW_GROUP_MAIN, 4), + "FW FAULT: At line %d in file 0x%08x%08x, additional data=0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(160, ROGUE_FW_GROUP_MAIN, 4), + "Invalid breakpoint: MemCtx 0x%08x Addr 0x%08.8x DM%u usc_breakpoint_ctrl_dm = %u" }, + { ROGUE_FW_LOG_CREATESFID(161, ROGUE_FW_GROUP_MAIN, 3), + "Discarding invalid SLC flushinval command for OSid %u: DM %u, FWCtx 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(162, ROGUE_FW_GROUP_MAIN, 4), + "Invalid Write Offset update notification from OSid %u to DM %u: FWCtx 0x%08x, MemCtx 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(163, ROGUE_FW_GROUP_MAIN, 4), + "Null FWCtx in KCCB kick cmd for OSid %u: KCCB 0x%08x, ROff %u, WOff %u" }, + { ROGUE_FW_LOG_CREATESFID(164, ROGUE_FW_GROUP_MAIN, 3), + "Checkpoint CCB for OSid %u is full, signalling host for full check state (Roff = %u, Woff = %u)" }, + { ROGUE_FW_LOG_CREATESFID(165, ROGUE_FW_GROUP_MAIN, 8), + "OSid %d CCB init status: %d (1-ok 0-fail): kCCBCtl@0x%x kCCB@0x%x fwCCBCtl@0x%x fwCCB@0x%x chptCCBCtl@0x%x chptCCB@0x%x" }, + { ROGUE_FW_LOG_CREATESFID(166, ROGUE_FW_GROUP_MAIN, 4), + "OSid %d fw state transition request: from %d to %d (0-offline 1-ready 2-active 3-offloading). Status %d (1-ok 0-fail)" }, + { ROGUE_FW_LOG_CREATESFID(167, ROGUE_FW_GROUP_MAIN, 2), + "OSid %u has %u stale commands in its KCCB" }, + { ROGUE_FW_LOG_CREATESFID(168, ROGUE_FW_GROUP_MAIN, 0), + "Applying VCE pause" }, + { ROGUE_FW_LOG_CREATESFID(169, ROGUE_FW_GROUP_MAIN, 3), + "OSid %u KCCB slot %u value updated to %u" }, + { ROGUE_FW_LOG_CREATESFID(170, ROGUE_FW_GROUP_MAIN, 7), + "Unknown KCCB Command: KCCBCtl=0x%08x, KCCB=0x%08x, Roff=%u, Woff=%u, Wrap=%u, Cmd=0x%08x, CmdType=0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(171, ROGUE_FW_GROUP_MAIN, 10), + "Unknown Client CCB Command processing fences: FWCtx=0x%08x, CCBCtl=0x%08x, CCB=0x%08x, Roff=%u, Doff=%u, Woff=%u, Wrap=%u, CmdHdr=0x%08x, CmdType=0x%08x, CmdSize=%u" }, + { ROGUE_FW_LOG_CREATESFID(172, ROGUE_FW_GROUP_MAIN, 10), + "Unknown Client CCB Command executing kick: FWCtx=0x%08x, CCBCtl=0x%08x, CCB=0x%08x, Roff=%u, Doff=%u, Woff=%u, Wrap=%u, CmdHdr=0x%08x, CmdType=0x%08x, CmdSize=%u" }, + { ROGUE_FW_LOG_CREATESFID(173, ROGUE_FW_GROUP_MAIN, 2), + "Null FWCtx in KCCB kick cmd for OSid %u with WOff %u" }, + { ROGUE_FW_LOG_CREATESFID(174, ROGUE_FW_GROUP_MAIN, 2), + "Discarding invalid SLC flushinval command for OSid %u, FWCtx 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(175, ROGUE_FW_GROUP_MAIN, 3), + "Invalid Write Offset update notification from OSid %u: FWCtx 0x%08x, MemCtx 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(176, ROGUE_FW_GROUP_MAIN, 2), + "Initialised Firmware with config flags 0x%08x and extended config flags 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(177, ROGUE_FW_GROUP_MAIN, 1), + "Set Periodic Hardware Reset Mode: %d" }, + { ROGUE_FW_LOG_CREATESFID(179, ROGUE_FW_GROUP_MAIN, 3), + "PHR mode %d, FW state: 0x%08x, HWR flags: 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(180, ROGUE_FW_GROUP_MAIN, 1), + "PHR mode %d triggered a reset" }, + { ROGUE_FW_LOG_CREATESFID(181, ROGUE_FW_GROUP_MAIN, 2), + "Signal update, Snoop Filter: %u, Signal Id: %u" }, + { ROGUE_FW_LOG_CREATESFID(182, ROGUE_FW_GROUP_MAIN, 1), + "WARNING: Skipping FW KCCB Cmd type %d which is not yet supported on Series8." }, + { ROGUE_FW_LOG_CREATESFID(183, ROGUE_FW_GROUP_MAIN, 4), + "MMU context cache data NULL, but cache flags=0x%x (sync counter=%u, update value=%u) OSId=%u" }, + { ROGUE_FW_LOG_CREATESFID(184, ROGUE_FW_GROUP_MAIN, 5), + "SLC range based flush: Context=%u VAddr=0x%02x%08x, Size=0x%08x, Invalidate=%d" }, + { ROGUE_FW_LOG_CREATESFID(185, ROGUE_FW_GROUP_MAIN, 3), + "FBSC invalidate for Context Set [0x%08x]: Entry mask 0x%08x%08x." }, + { ROGUE_FW_LOG_CREATESFID(186, ROGUE_FW_GROUP_MAIN, 3), + "TDM context switch check: Roff %u was not valid for kick starting at %u, moving back to %u" }, + { ROGUE_FW_LOG_CREATESFID(187, ROGUE_FW_GROUP_MAIN, 2), + "Signal updates: FIFO: %u, Signals: 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(188, ROGUE_FW_GROUP_MAIN, 2), + "Invalid FBSC cmd: FWCtx 0x%08x, MemCtx 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(189, ROGUE_FW_GROUP_MAIN, 0), + "Insert BRN68497 WA blit after TDM Context store." }, + { ROGUE_FW_LOG_CREATESFID(190, ROGUE_FW_GROUP_MAIN, 1), + "UFO Updates for previously finished FWCtx 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(191, ROGUE_FW_GROUP_MAIN, 1), + "RTC with RTA present, %u active render targets" }, + { ROGUE_FW_LOG_CREATESFID(192, ROGUE_FW_GROUP_MAIN, 0), + "Invalid RTA Set-up. The ValidRenderTargets array in RTACtl is Null!" }, + { ROGUE_FW_LOG_CREATESFID(193, ROGUE_FW_GROUP_MAIN, 2), + "Block 0x%x / Counter 0x%x INVALID and ignored" }, + { ROGUE_FW_LOG_CREATESFID(194, ROGUE_FW_GROUP_MAIN, 2), + "ECC fault GPU=0x%08x FW=0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(195, ROGUE_FW_GROUP_MAIN, 1), + "Processing XPU event on DM = %d" }, + { ROGUE_FW_LOG_CREATESFID(196, ROGUE_FW_GROUP_MAIN, 2), + "OSid %u failed to respond to the virtualisation watchdog in time. Timestamp of its last input = %u" }, + { ROGUE_FW_LOG_CREATESFID(197, ROGUE_FW_GROUP_MAIN, 1), + "GPU-%u has locked up (see HWR logs for more info)" }, + { ROGUE_FW_LOG_CREATESFID(198, ROGUE_FW_GROUP_MAIN, 3), + "Updating Tiles In Flight (Dusts=%d, PartitionMask=0x%08x, ISPCtl=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(199, ROGUE_FW_GROUP_MAIN, 0), + "GPU has locked up (see HWR logs for more info)" }, + { ROGUE_FW_LOG_CREATESFID(200, ROGUE_FW_GROUP_MAIN, 1), + "Reprocessing outstanding XPU events from cores 0x%02x" }, + { ROGUE_FW_LOG_CREATESFID(201, ROGUE_FW_GROUP_MAIN, 3), + "Secondary XPU event on DM=%d, CoreMask=0x%02x, Raised=0x%02x" }, + { ROGUE_FW_LOG_CREATESFID(202, ROGUE_FW_GROUP_MAIN, 8), + "TDM Queue: Core %u, FWCtx 0x%08.8x, prio: %d, queue: 0x%08x%08x (Roff = %u, Woff = %u, Size = %u)" }, + { ROGUE_FW_LOG_CREATESFID(203, ROGUE_FW_GROUP_MAIN, 3), + "TDM stalled Core %u (Roff = %u, Woff = %u)" }, + { ROGUE_FW_LOG_CREATESFID(204, ROGUE_FW_GROUP_MAIN, 8), + "Compute Queue: Core %u, FWCtx 0x%08.8x, prio: %d, queue: 0x%08x%08x (Roff = %u, Woff = %u, Size = %u)" }, + { ROGUE_FW_LOG_CREATESFID(205, ROGUE_FW_GROUP_MAIN, 4), + "Compute stalled core %u (Roff = %u, Woff = %u, Size = %u)" }, + { ROGUE_FW_LOG_CREATESFID(206, ROGUE_FW_GROUP_MAIN, 6), + "User Mode Queue mismatched stream start: Core %u, FWCtx 0x%08.8x, queue: 0x%08x%08x (Roff = %u, StreamStartOffset = %u)" }, + { ROGUE_FW_LOG_CREATESFID(207, ROGUE_FW_GROUP_MAIN, 3), + "TDM resumed core %u (Roff = %u, Woff = %u)" }, + { ROGUE_FW_LOG_CREATESFID(208, ROGUE_FW_GROUP_MAIN, 4), + "Compute resumed core %u (Roff = %u, Woff = %u, Size = %u)" }, + { ROGUE_FW_LOG_CREATESFID(209, ROGUE_FW_GROUP_MAIN, 2), + " Updated permission for OSid %u to perform MTS kicks: %u (1 = allowed, 0 = not allowed)" }, + { ROGUE_FW_LOG_CREATESFID(210, ROGUE_FW_GROUP_MAIN, 2), + "Mask = 0x%X, mask2 = 0x%X" }, + { ROGUE_FW_LOG_CREATESFID(211, ROGUE_FW_GROUP_MAIN, 3), + " core %u, reg = %u, mask = 0x%X)" }, + { ROGUE_FW_LOG_CREATESFID(212, ROGUE_FW_GROUP_MAIN, 1), + "ECC fault received from safety bus: 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(213, ROGUE_FW_GROUP_MAIN, 1), + "Safety Watchdog threshold period set to 0x%x clock cycles" }, + { ROGUE_FW_LOG_CREATESFID(214, ROGUE_FW_GROUP_MAIN, 0), + "MTS Safety Event triggered by the safety watchdog." }, + { ROGUE_FW_LOG_CREATESFID(215, ROGUE_FW_GROUP_MAIN, 3), + "DM%d USC tasks range limit 0 - %d, stride %d" }, + { ROGUE_FW_LOG_CREATESFID(216, ROGUE_FW_GROUP_MAIN, 1), + "ECC fault GPU=0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(217, ROGUE_FW_GROUP_MAIN, 0), + "GPU Hardware units reset to prevent transient faults." }, + { ROGUE_FW_LOG_CREATESFID(218, ROGUE_FW_GROUP_MAIN, 2), + "Kick Abort cmd: FWCtx 0x%08.8x @ %d" }, + { ROGUE_FW_LOG_CREATESFID(219, ROGUE_FW_GROUP_MAIN, 7), + "Kick Ray: FWCtx 0x%08.8x @ %d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(220, ROGUE_FW_GROUP_MAIN, 0), + "Ray finished" }, + { ROGUE_FW_LOG_CREATESFID(221, ROGUE_FW_GROUP_MAIN, 2), + "State of firmware's private data at boot time: %d (0 = uninitialised, 1 = initialised); Fw State Flags = 0x%08X" }, + { ROGUE_FW_LOG_CREATESFID(222, ROGUE_FW_GROUP_MAIN, 2), + "CFI Timeout detected (%d increasing to %d)" }, + { ROGUE_FW_LOG_CREATESFID(223, ROGUE_FW_GROUP_MAIN, 2), + "CFI Timeout detected for FBM (%d increasing to %d)" }, + { ROGUE_FW_LOG_CREATESFID(224, ROGUE_FW_GROUP_MAIN, 0), + "Geom OOM event not allowed" }, + { ROGUE_FW_LOG_CREATESFID(225, ROGUE_FW_GROUP_MAIN, 4), + "Changing OSid %d's priority from %u to %u; Isolation = %u (0 = off; 1 = on)" }, + { ROGUE_FW_LOG_CREATESFID(226, ROGUE_FW_GROUP_MAIN, 2), + "Skipping already executed TA FWCtx 0x%08.8x @ %d" }, + { ROGUE_FW_LOG_CREATESFID(227, ROGUE_FW_GROUP_MAIN, 2), + "Attempt to execute TA FWCtx 0x%08.8x @ %d ahead of time on other GEOM" }, + { ROGUE_FW_LOG_CREATESFID(228, ROGUE_FW_GROUP_MAIN, 8), + "Kick TDM: Kick ID %u FWCtx 0x%08.8x @ %d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(229, ROGUE_FW_GROUP_MAIN, 12), + "Kick TA: Kick ID %u FWCtx 0x%08.8x @ %d, RTD 0x%08x, First kick:%d, Last kick:%d, CSW resume:%d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(230, ROGUE_FW_GROUP_MAIN, 11), + "Kick 3D: Kick ID %u FWCtx 0x%08.8x @ %d, RTD 0x%08x, Partial render:%d, CSW resume:%d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(231, ROGUE_FW_GROUP_MAIN, 7), + "Kick Compute: Kick ID %u FWCtx 0x%08.8x @ %d. (PID:%d, prio:%d, ext:0x%08x, int:0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(232, ROGUE_FW_GROUP_MAIN, 1), + "TDM finished: Kick ID %u " }, + { ROGUE_FW_LOG_CREATESFID(233, ROGUE_FW_GROUP_MAIN, 1), + "TA finished: Kick ID %u " }, + { ROGUE_FW_LOG_CREATESFID(234, ROGUE_FW_GROUP_MAIN, 3), + "3D finished: Kick ID %u , HWRTData0State=%x, HWRTData1State=%x" }, + { ROGUE_FW_LOG_CREATESFID(235, ROGUE_FW_GROUP_MAIN, 1), + "Compute finished: Kick ID %u " }, + { ROGUE_FW_LOG_CREATESFID(236, ROGUE_FW_GROUP_MAIN, 10), + "Kick TDM: Kick ID %u FWCtx 0x%08.8x @ %d, Base 0x%08x%08x. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(237, ROGUE_FW_GROUP_MAIN, 8), + "Kick Ray: Kick ID %u FWCtx 0x%08.8x @ %d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(238, ROGUE_FW_GROUP_MAIN, 1), + "Ray finished: Kick ID %u " }, + + { ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_MTS, 2), + "Bg Task DM = %u, counted = %d" }, + { ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_MTS, 1), + "Bg Task complete DM = %u" }, + { ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_MTS, 3), + "Irq Task DM = %u, Breq = %d, SBIrq = 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_MTS, 1), + "Irq Task complete DM = %u" }, + { ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_MTS, 0), + "Kick MTS Bg task DM=All" }, + { ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_MTS, 1), + "Kick MTS Irq task DM=%d" }, + { ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_MTS, 2), + "Ready queue debug DM = %u, celltype = %d" }, + { ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_MTS, 2), + "Ready-to-run debug DM = %u, item = 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_MTS, 3), + "Client command header DM = %u, client CCB = 0x%x, cmd = 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_MTS, 3), + "Ready-to-run debug OSid = %u, DM = %u, item = 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_MTS, 3), + "Ready queue debug DM = %u, celltype = %d, OSid = %u" }, + { ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_MTS, 3), + "Bg Task DM = %u, counted = %d, OSid = %u" }, + { ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_MTS, 1), + "Bg Task complete DM Bitfield: %u" }, + { ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_MTS, 0), + "Irq Task complete." }, + { ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_MTS, 7), + "Discarded Command Type: %d OS ID = %d PID = %d context = 0x%08x cccb ROff = 0x%x, due to USC breakpoint hit by OS ID = %d PID = %d." }, + { ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_MTS, 4), + "KCCB Slot %u: DM=%u, Cmd=0x%08x, OSid=%u" }, + { ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_MTS, 2), + "KCCB Slot %u: Return value %u" }, + { ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_MTS, 1), + "Bg Task OSid = %u" }, + { ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_MTS, 3), + "KCCB Slot %u: Cmd=0x%08x, OSid=%u" }, + { ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_MTS, 1), + "Irq Task (EVENT_STATUS=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_MTS, 2), + "VZ sideband test, kicked with OSid=%u from MTS, OSid for test=%u" }, + + { ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_CLEANUP, 1), + "FwCommonContext [0x%08x] cleaned" }, + { ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_CLEANUP, 3), + "FwCommonContext [0x%08x] is busy: ReadOffset = %d, WriteOffset = %d" }, + { ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_CLEANUP, 2), + "HWRTData [0x%08x] for DM=%d, received cleanup request" }, + { ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_CLEANUP, 3), + "HWRTData [0x%08x] HW Context cleaned for DM%u, executed commands = %d" }, + { ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_CLEANUP, 2), + "HWRTData [0x%08x] HW Context for DM%u is busy" }, + { ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_CLEANUP, 2), + "HWRTData [0x%08x] HW Context %u cleaned" }, + { ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_CLEANUP, 1), + "Freelist [0x%08x] cleaned" }, + { ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_CLEANUP, 1), + "ZSBuffer [0x%08x] cleaned" }, + { ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_CLEANUP, 3), + "ZSBuffer [0x%08x] is busy: submitted = %d, executed = %d" }, + { ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_CLEANUP, 4), + "HWRTData [0x%08x] HW Context for DM%u is busy: submitted = %d, executed = %d" }, + { ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_CLEANUP, 2), + "HW Ray Frame data [0x%08x] for DM=%d, received cleanup request" }, + { ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_CLEANUP, 3), + "HW Ray Frame Data [0x%08x] cleaned for DM%u, executed commands = %d" }, + { ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_CLEANUP, 4), + "HW Ray Frame Data [0x%08x] for DM%u is busy: submitted = %d, executed = %d" }, + { ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_CLEANUP, 2), + "HW Ray Frame Data [0x%08x] HW Context %u cleaned" }, + { ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_CLEANUP, 1), + "Discarding invalid cleanup request of type 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_CLEANUP, 1), + "Received cleanup request for HWRTData [0x%08x]" }, + { ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_CLEANUP, 3), + "HWRTData [0x%08x] HW Context is busy: submitted = %d, executed = %d" }, + { ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_CLEANUP, 3), + "HWRTData [0x%08x] HW Context %u cleaned, executed commands = %d" }, + + { ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_CSW, 1), + "CDM FWCtx 0x%08.8x needs resume" }, + { ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_CSW, 3), + "*** CDM FWCtx 0x%08.8x resume from snapshot buffer 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_CSW, 1), + "CDM FWCtx shared alloc size load 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_CSW, 0), + "*** CDM FWCtx store complete" }, + { ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_CSW, 0), + "*** CDM FWCtx store start" }, + { ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_CSW, 0), + "CDM Soft Reset" }, + { ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_CSW, 1), + "3D FWCtx 0x%08.8x needs resume" }, + { ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_CSW, 1), + "*** 3D FWCtx 0x%08.8x resume" }, + { ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_CSW, 0), + "*** 3D context store complete" }, + { ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_CSW, 3), + "3D context store pipe state: 0x%08.8x 0x%08.8x 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_CSW, 0), + "*** 3D context store start" }, + { ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_CSW, 1), + "*** 3D TQ FWCtx 0x%08.8x resume" }, + { ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_CSW, 1), + "TA FWCtx 0x%08.8x needs resume" }, + { ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_CSW, 3), + "*** TA FWCtx 0x%08.8x resume from snapshot buffer 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_CSW, 2), + "TA context shared alloc size store 0x%x, load 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_CSW, 0), + "*** TA context store complete" }, + { ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_CSW, 0), + "*** TA context store start" }, + { ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_CSW, 3), + "Higher priority context scheduled for DM %u, old prio:%d, new prio:%d" }, + { ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_CSW, 2), + "Set FWCtx 0x%x priority to %u" }, + { ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_CSW, 2), + "3D context store pipe%d state: 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_CSW, 2), + "3D context resume pipe%d state: 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_CSW, 1), + "SHG FWCtx 0x%08.8x needs resume" }, + { ROGUE_FW_LOG_CREATESFID(23, ROGUE_FW_GROUP_CSW, 3), + "*** SHG FWCtx 0x%08.8x resume from snapshot buffer 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_CSW, 2), + "SHG context shared alloc size store 0x%x, load 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(25, ROGUE_FW_GROUP_CSW, 0), + "*** SHG context store complete" }, + { ROGUE_FW_LOG_CREATESFID(26, ROGUE_FW_GROUP_CSW, 0), + "*** SHG context store start" }, + { ROGUE_FW_LOG_CREATESFID(27, ROGUE_FW_GROUP_CSW, 1), + "Performing TA indirection, last used pipe %d" }, + { ROGUE_FW_LOG_CREATESFID(28, ROGUE_FW_GROUP_CSW, 0), + "CDM context store hit ctrl stream terminate. Skip resume." }, + { ROGUE_FW_LOG_CREATESFID(29, ROGUE_FW_GROUP_CSW, 4), + "*** CDM FWCtx 0x%08.8x resume from snapshot buffer 0x%08x%08x, shader state %u" }, + { ROGUE_FW_LOG_CREATESFID(30, ROGUE_FW_GROUP_CSW, 2), + "TA PDS/USC state buffer flip (%d->%d)" }, + { ROGUE_FW_LOG_CREATESFID(31, ROGUE_FW_GROUP_CSW, 0), + "TA context store hit BRN 52563: vertex store tasks outstanding" }, + { ROGUE_FW_LOG_CREATESFID(32, ROGUE_FW_GROUP_CSW, 1), + "TA USC poll failed (USC vertex task count: %d)" }, + { ROGUE_FW_LOG_CREATESFID(33, ROGUE_FW_GROUP_CSW, 0), + "TA context store deferred due to BRN 54141." }, + { ROGUE_FW_LOG_CREATESFID(34, ROGUE_FW_GROUP_CSW, 7), + "Higher priority context scheduled for DM %u. Prios (OSid, OSid Prio, Context Prio): Current: %u, %u, %u New: %u, %u, %u" }, + { ROGUE_FW_LOG_CREATESFID(35, ROGUE_FW_GROUP_CSW, 0), + "*** TDM context store start" }, + { ROGUE_FW_LOG_CREATESFID(36, ROGUE_FW_GROUP_CSW, 0), + "*** TDM context store complete" }, + { ROGUE_FW_LOG_CREATESFID(37, ROGUE_FW_GROUP_CSW, 2), + "TDM context needs resume, header [0x%08.8x, 0x%08.8x]" }, + { ROGUE_FW_LOG_CREATESFID(38, ROGUE_FW_GROUP_CSW, 8), + "Higher priority context scheduled for DM %u. Prios (OSid, OSid Prio, Context Prio): Current: %u, %u, %u New: %u, %u, %u. Hard Context Switching: %u" }, + { ROGUE_FW_LOG_CREATESFID(39, ROGUE_FW_GROUP_CSW, 3), + "3D context store pipe %2d (%2d) state: 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(40, ROGUE_FW_GROUP_CSW, 3), + "3D context resume pipe %2d (%2d) state: 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(41, ROGUE_FW_GROUP_CSW, 1), + "*** 3D context store start version %d (1=IPP_TILE, 2=ISP_TILE)" }, + { ROGUE_FW_LOG_CREATESFID(42, ROGUE_FW_GROUP_CSW, 3), + "3D context store pipe%d state: 0x%08.8x%08x" }, + { ROGUE_FW_LOG_CREATESFID(43, ROGUE_FW_GROUP_CSW, 3), + "3D context resume pipe%d state: 0x%08.8x%08x" }, + { ROGUE_FW_LOG_CREATESFID(44, ROGUE_FW_GROUP_CSW, 2), + "3D context resume IPP state: 0x%08.8x%08x" }, + { ROGUE_FW_LOG_CREATESFID(45, ROGUE_FW_GROUP_CSW, 1), + "All 3D pipes empty after ISP tile mode store! IPP_status: 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(46, ROGUE_FW_GROUP_CSW, 3), + "TDM context resume pipe%d state: 0x%08.8x%08x" }, + { ROGUE_FW_LOG_CREATESFID(47, ROGUE_FW_GROUP_CSW, 0), + "*** 3D context store start version 4" }, + { ROGUE_FW_LOG_CREATESFID(48, ROGUE_FW_GROUP_CSW, 2), + "Multicore context resume on DM%d active core mask 0x%04.4x" }, + { ROGUE_FW_LOG_CREATESFID(49, ROGUE_FW_GROUP_CSW, 2), + "Multicore context store on DM%d active core mask 0x%04.4x" }, + { ROGUE_FW_LOG_CREATESFID(50, ROGUE_FW_GROUP_CSW, 5), + "TDM context resume Core %d, pipe%d state: 0x%08.8x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(51, ROGUE_FW_GROUP_CSW, 0), + "*** RDM FWCtx store complete" }, + { ROGUE_FW_LOG_CREATESFID(52, ROGUE_FW_GROUP_CSW, 0), + "*** RDM FWCtx store start" }, + { ROGUE_FW_LOG_CREATESFID(53, ROGUE_FW_GROUP_CSW, 1), + "RDM FWCtx 0x%08.8x needs resume" }, + { ROGUE_FW_LOG_CREATESFID(54, ROGUE_FW_GROUP_CSW, 1), + "RDM FWCtx 0x%08.8x resume" }, + + { ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_BIF, 3), + "Activate MemCtx=0x%08x BIFreq=%d secure=%d" }, + { ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_BIF, 1), + "Deactivate MemCtx=0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_BIF, 1), + "Alloc PC reg %d" }, + { ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_BIF, 2), + "Grab reg set %d refcount now %d" }, + { ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_BIF, 2), + "Ungrab reg set %d refcount now %d" }, + { ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_BIF, 6), + "Setup reg=%d BIFreq=%d, expect=0x%08x%08x, actual=0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_BIF, 2), + "Trust enabled:%d, for BIFreq=%d" }, + { ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_BIF, 9), + "BIF Tiling Cfg %d base 0x%08x%08x len 0x%08x%08x enable %d stride %d --> 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_BIF, 4), + "Wrote the Value %d to OSID0, Cat Base %d, Register's contents are now 0x%08x 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_BIF, 3), + "Wrote the Value %d to OSID1, Context %d, Register's contents are now 0x%04x" }, + { ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_BIF, 7), + "ui32OSid = %u, Catbase = %u, Reg Address = 0x%x, Reg index = %u, Bitshift index = %u, Val = 0x%08x%08x" }, \ + { ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_BIF, 5), + "Map GPU memory DevVAddr 0x%x%08x, Size %u, Context ID %u, BIFREQ %u" }, + { ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_BIF, 1), + "Unmap GPU memory (event status 0x%x)" }, + { ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_BIF, 3), + "Activate MemCtx=0x%08x DM=%d secure=%d" }, + { ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_BIF, 6), + "Setup reg=%d DM=%d, expect=0x%08x%08x, actual=0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_BIF, 4), + "Map GPU memory DevVAddr 0x%x%08x, Size %u, Context ID %u" }, + { ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_BIF, 2), + "Trust enabled:%d, for DM=%d" }, + { ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_BIF, 5), + "Map GPU memory DevVAddr 0x%x%08x, Size %u, Context ID %u, DM %u" }, + { ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_BIF, 6), + "Setup register set=%d DM=%d, PC address=0x%08x%08x, OSid=%u, NewPCRegRequired=%d" }, + { ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_BIF, 3), + "Alloc PC set %d as register range [%u - %u]" }, + + { ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_MISC, 1), + "GPIO write 0x%02x" }, + { ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_MISC, 1), + "GPIO read 0x%02x" }, + { ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_MISC, 0), + "GPIO enabled" }, + { ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_MISC, 0), + "GPIO disabled" }, + { ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_MISC, 1), + "GPIO status=%d (0=OK, 1=Disabled)" }, + { ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_MISC, 2), + "GPIO_AP: Read address=0x%02x (%d byte(s))" }, + { ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_MISC, 2), + "GPIO_AP: Write address=0x%02x (%d byte(s))" }, + { ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_MISC, 0), + "GPIO_AP timeout!" }, + { ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_MISC, 1), + "GPIO_AP error. GPIO status=%d (0=OK, 1=Disabled)" }, + { ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_MISC, 1), + "GPIO already read 0x%02x" }, + { ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_MISC, 2), + "SR: Check buffer %d available returned %d" }, + { ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_MISC, 1), + "SR: Waiting for buffer %d" }, + { ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_MISC, 2), + "SR: Timeout waiting for buffer %d (after %d ticks)" }, + { ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_MISC, 2), + "SR: Skip frame check for strip %d returned %d (0=No skip, 1=Skip frame)" }, + { ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_MISC, 1), + "SR: Skip remaining strip %d in frame" }, + { ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_MISC, 1), + "SR: Inform HW that strip %d is a new frame" }, + { ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_MISC, 1), + "SR: Timeout waiting for INTERRUPT_FRAME_SKIP (after %d ticks)" }, + { ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_MISC, 1), + "SR: Strip mode is %d" }, + { ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_MISC, 1), + "SR: Strip Render start (strip %d)" }, + { ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_MISC, 1), + "SR: Strip Render complete (buffer %d)" }, + { ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_MISC, 1), + "SR: Strip Render fault (buffer %d)" }, + { ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_MISC, 1), + "TRP state: %d" }, + { ROGUE_FW_LOG_CREATESFID(23, ROGUE_FW_GROUP_MISC, 1), + "TRP failure: %d" }, + { ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_MISC, 1), + "SW TRP State: %d" }, + { ROGUE_FW_LOG_CREATESFID(25, ROGUE_FW_GROUP_MISC, 1), + "SW TRP failure: %d" }, + { ROGUE_FW_LOG_CREATESFID(26, ROGUE_FW_GROUP_MISC, 1), + "HW kick event (%u)" }, + { ROGUE_FW_LOG_CREATESFID(27, ROGUE_FW_GROUP_MISC, 4), + "GPU core (%u/%u): checksum 0x%08x vs. 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(28, ROGUE_FW_GROUP_MISC, 6), + "GPU core (%u/%u), unit (%u,%u): checksum 0x%08x vs. 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(29, ROGUE_FW_GROUP_MISC, 6), + "HWR: Core%u, Register=0x%08x, OldValue=0x%08x%08x, CurrValue=0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(30, ROGUE_FW_GROUP_MISC, 4), + "HWR: USC Core%u, ui32TotalSlotsUsedByDM=0x%08x, psDMHWCtl->ui32USCSlotsUsedByDM=0x%08x, bHWRNeeded=%u" }, + { ROGUE_FW_LOG_CREATESFID(31, ROGUE_FW_GROUP_MISC, 6), + "HWR: USC Core%u, Register=0x%08x, OldValue=0x%08x%08x, CurrValue=0x%08x%08x" }, + + { ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_PM, 10), + "ALIST%d SP = %u, MLIST%d SP = %u (VCE 0x%08x%08x, TE 0x%08x%08x, ALIST 0x%08x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_PM, 8), + "Is TA: %d, finished: %d on HW %u (HWRTData = 0x%08x, MemCtx = 0x%08x). FL different between TA/3D: global:%d, local:%d, mmu:%d" }, + { ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_PM, 14), + "UFL-3D-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u), FL-3D-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u), MFL-3D-Base: 0x%08x%08x (SP = %u, 4PT = %u)" }, + { ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_PM, 14), + "UFL-TA-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u), FL-TA-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u), MFL-TA-Base: 0x%08x%08x (SP = %u, 4PT = %u)" }, + { ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_PM, 5), + "Freelist grow completed [0x%08x]: added pages 0x%08x, total pages 0x%08x, new DevVirtAddr 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_PM, 1), + "Grow for freelist ID=0x%08x denied by host" }, + { ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_PM, 5), + "Freelist update completed [0x%08x]: old total pages 0x%08x, new total pages 0x%08x, new DevVirtAddr 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_PM, 1), + "Reconstruction of freelist ID=0x%08x failed" }, + { ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_PM, 2), + "Ignored attempt to pause or unpause the DM while there is no relevant operation in progress (0-TA,1-3D): %d, operation(0-unpause, 1-pause): %d" }, + { ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_PM, 2), + "Force free 3D Context memory, FWCtx: 0x%08x, status(1:success, 0:fail): %d" }, + { ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_PM, 1), + "PM pause TA ALLOC: PM_PAGE_MANAGEOP set to 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_PM, 1), + "PM unpause TA ALLOC: PM_PAGE_MANAGEOP set to 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_PM, 1), + "PM pause 3D DALLOC: PM_PAGE_MANAGEOP set to 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_PM, 1), + "PM unpause 3D DALLOC: PM_PAGE_MANAGEOP set to 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_PM, 1), + "PM ALLOC/DALLOC change was not actioned: PM_PAGE_MANAGEOP_STATUS=0x%x" }, + { ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_PM, 7), + "Is TA: %d, finished: %d on HW %u (HWRTData = 0x%08x, MemCtx = 0x%08x). FL different between TA/3D: global:%d, local:%d" }, + { ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_PM, 10), + "UFL-3D-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u), FL-3D-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u)" }, + { ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_PM, 10), + "UFL-TA-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u), FL-TA-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u)" }, + { ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_PM, 7), + "Freelist update completed [0x%08x / FL State 0x%08x%08x]: old total pages 0x%08x, new total pages 0x%08x, new DevVirtAddr 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_PM, 7), + "Freelist update failed [0x%08x / FL State 0x%08x%08x]: old total pages 0x%08x, new total pages 0x%08x, new DevVirtAddr 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_PM, 10), + "UFL-3D-State-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u), FL-3D-State-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u)" }, + { ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_PM, 10), + "UFL-TA-State-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u), FL-TA-State-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u)" }, + { ROGUE_FW_LOG_CREATESFID(23, ROGUE_FW_GROUP_PM, 5), + "Freelist 0x%08x base address from HW: 0x%02x%08x (expected value: 0x%02x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_PM, 5), + "Analysis of FL grow: Pause=(%u,%u) Paused+Valid(%u,%u) PMStateBuffer=0x%x" }, + { ROGUE_FW_LOG_CREATESFID(25, ROGUE_FW_GROUP_PM, 5), + "Attempt FL grow for FL: 0x%08x, new dev address: 0x%02x%08x, new page count: %u, new ready count: %u" }, + { ROGUE_FW_LOG_CREATESFID(26, ROGUE_FW_GROUP_PM, 5), + "Deferring FL grow for non-loaded FL: 0x%08x, new dev address: 0x%02x%08x, new page count: %u, new ready count: %u" }, + { ROGUE_FW_LOG_CREATESFID(27, ROGUE_FW_GROUP_PM, 4), + "Is GEOM: %d, finished: %d (HWRTData = 0x%08x, MemCtx = 0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(28, ROGUE_FW_GROUP_PM, 1), + "3D Timeout Now for FWCtx 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(29, ROGUE_FW_GROUP_PM, 1), + "GEOM PM Recycle for FWCtx 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(30, ROGUE_FW_GROUP_PM, 1), + "PM running primary config (Core %d)" }, + { ROGUE_FW_LOG_CREATESFID(31, ROGUE_FW_GROUP_PM, 1), + "PM running secondary config (Core %d)" }, + { ROGUE_FW_LOG_CREATESFID(32, ROGUE_FW_GROUP_PM, 1), + "PM running tertiary config (Core %d)" }, + { ROGUE_FW_LOG_CREATESFID(33, ROGUE_FW_GROUP_PM, 1), + "PM running quaternary config (Core %d)" }, + + { ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_RPM, 3), + "Global link list dynamic page count: vertex 0x%x, varying 0x%x, node 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_RPM, 3), + "Global link list static page count: vertex 0x%x, varying 0x%x, node 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_RPM, 0), + "RPM request failed. Waiting for freelist grow." }, + { ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_RPM, 0), + "RPM request failed. Aborting the current frame." }, + { ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_RPM, 1), + "RPM waiting for pending grow on freelist 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_RPM, 3), + "Request freelist grow [0x%08x] current pages %d, grow size %d" }, + { ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_RPM, 2), + "Freelist load: SHF = 0x%08x, SHG = 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_RPM, 2), + "SHF FPL register: 0x%08x.0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_RPM, 2), + "SHG FPL register: 0x%08x.0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_RPM, 5), + "Kernel requested RPM grow on freelist (type %d) at 0x%08x from current size %d to new size %d, RPM restart: %d (1=Yes)" }, + { ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_RPM, 0), + "Restarting SHG" }, + { ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_RPM, 0), + "Grow failed, aborting the current frame." }, + { ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_RPM, 1), + "RPM abort complete on HWFrameData [0x%08x]." }, + { ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_RPM, 1), + "RPM freelist cleanup [0x%08x] requires abort to proceed." }, + { ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_RPM, 2), + "RPM page table base register: 0x%08x.0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_RPM, 0), + "Issuing RPM abort." }, + { ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_RPM, 0), + "RPM OOM received but toggle bits indicate free pages available" }, + { ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_RPM, 0), + "RPM hardware timeout. Unable to process OOM event." }, + { ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_RPM, 5), + "SHF FL (0x%08x) load, FPL: 0x%08x.0x%08x, roff: 0x%08x, woff: 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_RPM, 5), + "SHG FL (0x%08x) load, FPL: 0x%08x.0x%08x, roff: 0x%08x, woff: 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_RPM, 3), + "SHF FL (0x%08x) store, roff: 0x%08x, woff: 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_RPM, 3), + "SHG FL (0x%08x) store, roff: 0x%08x, woff: 0x%08x" }, + + { ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_RTD, 2), + "3D RTData 0x%08x finished on HW context %u" }, + { ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_RTD, 2), + "3D RTData 0x%08x ready on HW context %u" }, + { ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_RTD, 4), + "CONTEXT_PB_BASE set to 0x%x, FL different between TA/3D: local: %d, global: %d, mmu: %d" }, + { ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_RTD, 2), + "Loading VFP table 0x%08x%08x for 3D" }, + { ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_RTD, 2), + "Loading VFP table 0x%08x%08x for TA" }, + { ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_RTD, 10), + "Load Freelist 0x%x type: %d (0:local,1:global,2:mmu) for DM%d: TotalPMPages = %d, FL-addr = 0x%08x%08x, stacktop = 0x%08x%08x, Alloc Page Count = %u, Alloc MMU Page Count = %u" }, + { ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_RTD, 0), + "Perform VHEAP table store" }, + { ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_RTD, 2), + "RTData 0x%08x: found match in Context=%d: Load=No, Store=No" }, + { ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_RTD, 2), + "RTData 0x%08x: found NULL in Context=%d: Load=Yes, Store=No" }, + { ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_RTD, 3), + "RTData 0x%08x: found state 3D finished (0x%08x) in Context=%d: Load=Yes, Store=Yes" }, + { ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_RTD, 3), + "RTData 0x%08x: found state TA finished (0x%08x) in Context=%d: Load=Yes, Store=Yes" }, + { ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_RTD, 5), + "Loading stack-pointers for %d (0:MidTA,1:3D) on context %d, MLIST = 0x%08x, ALIST = 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_RTD, 10), + "Store Freelist 0x%x type: %d (0:local,1:global,2:mmu) for DM%d: TotalPMPages = %d, FL-addr = 0x%08x%08x, stacktop = 0x%08x%08x, Alloc Page Count = %u, Alloc MMU Page Count = %u" }, + { ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_RTD, 2), + "TA RTData 0x%08x finished on HW context %u" }, + { ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_RTD, 2), + "TA RTData 0x%08x loaded on HW context %u" }, + { ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_RTD, 12), + "Store Freelist 0x%x type: %d (0:local,1:global,2:mmu) for DM%d: FL Total Pages %u (max=%u,grow size=%u), FL-addr = 0x%08x%08x, stacktop = 0x%08x%08x, Alloc Page Count = %u, Alloc MMU Page Count = %u" }, + { ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_RTD, 12), + "Load Freelist 0x%x type: %d (0:local,1:global,2:mmu) for DM%d: FL Total Pages %u (max=%u,grow size=%u), FL-addr = 0x%08x%08x, stacktop = 0x%08x%08x, Alloc Page Count = %u, Alloc MMU Page Count = %u" }, + { ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_RTD, 1), + "Freelist 0x%x RESET!!!!!!!!" }, + { ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_RTD, 5), + "Freelist 0x%x stacktop = 0x%08x%08x, Alloc Page Count = %u, Alloc MMU Page Count = %u" }, + { ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_RTD, 3), + "Request reconstruction of Freelist 0x%x type: %d (0:local,1:global,2:mmu) on HW context %u" }, + { ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_RTD, 1), + "Freelist reconstruction ACK from host (HWR state :%u)" }, + { ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_RTD, 0), + "Freelist reconstruction completed" }, + { ROGUE_FW_LOG_CREATESFID(23, ROGUE_FW_GROUP_RTD, 3), + "TA RTData 0x%08x loaded on HW context %u HWRTDataNeedsLoading=%d" }, + { ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_RTD, 3), + "TE Region headers base 0x%08x%08x (RGNHDR Init: %d)" }, + { ROGUE_FW_LOG_CREATESFID(25, ROGUE_FW_GROUP_RTD, 8), + "TA Buffers: FWCtx 0x%08x, RT 0x%08x, RTData 0x%08x, VHeap 0x%08x%08x, TPC 0x%08x%08x (MemCtx 0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(26, ROGUE_FW_GROUP_RTD, 2), + "3D RTData 0x%08x loaded on HW context %u" }, + { ROGUE_FW_LOG_CREATESFID(27, ROGUE_FW_GROUP_RTD, 4), + "3D Buffers: FWCtx 0x%08x, RT 0x%08x, RTData 0x%08x (MemCtx 0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(28, ROGUE_FW_GROUP_RTD, 2), + "Restarting TA after partial render, HWRTData0State=0x%x, HWRTData1State=0x%x" }, + { ROGUE_FW_LOG_CREATESFID(29, ROGUE_FW_GROUP_RTD, 3), + "CONTEXT_PB_BASE set to 0x%x, FL different between TA/3D: local: %d, global: %d" }, + { ROGUE_FW_LOG_CREATESFID(30, ROGUE_FW_GROUP_RTD, 12), + "Store Freelist 0x%x type: %d (0:local,1:global) for PMDM%d: FL Total Pages %u (max=%u,grow size=%u), FL-addr = 0x%08x%08x, stacktop = 0x%08x%08x, Alloc Page Count = %u, Alloc MMU Page Count = %u" }, + { ROGUE_FW_LOG_CREATESFID(31, ROGUE_FW_GROUP_RTD, 12), + "Load Freelist 0x%x type: %d (0:local,1:global) for PMDM%d: FL Total Pages %u (max=%u,grow size=%u), FL-addr = 0x%08x%08x, stacktop = 0x%08x%08x, Alloc Page Count = %u, Alloc MMU Page Count = %u" }, + { ROGUE_FW_LOG_CREATESFID(32, ROGUE_FW_GROUP_RTD, 5), + "3D Buffers: FWCtx 0x%08x, parent RT 0x%08x, RTData 0x%08x on ctx %d, (MemCtx 0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(33, ROGUE_FW_GROUP_RTD, 7), + "TA Buffers: FWCtx 0x%08x, RTData 0x%08x, VHeap 0x%08x%08x, TPC 0x%08x%08x (MemCtx 0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(34, ROGUE_FW_GROUP_RTD, 4), + "3D Buffers: FWCtx 0x%08x, RTData 0x%08x on ctx %d, (MemCtx 0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(35, ROGUE_FW_GROUP_RTD, 6), + "Load Freelist 0x%x type: %d (0:local,1:global) for PMDM%d: FL Total Pages %u (max=%u,grow size=%u)" }, + { ROGUE_FW_LOG_CREATESFID(36, ROGUE_FW_GROUP_RTD, 1), + "TA RTData 0x%08x marked as killed." }, + { ROGUE_FW_LOG_CREATESFID(37, ROGUE_FW_GROUP_RTD, 1), + "3D RTData 0x%08x marked as killed." }, + { ROGUE_FW_LOG_CREATESFID(38, ROGUE_FW_GROUP_RTD, 1), + "RTData 0x%08x will be killed after TA restart." }, + { ROGUE_FW_LOG_CREATESFID(39, ROGUE_FW_GROUP_RTD, 3), + "RTData 0x%08x Render State Buffer 0x%02x%08x will be reset." }, + { ROGUE_FW_LOG_CREATESFID(40, ROGUE_FW_GROUP_RTD, 3), + "GEOM RTData 0x%08x using Render State Buffer 0x%02x%08x." }, + { ROGUE_FW_LOG_CREATESFID(41, ROGUE_FW_GROUP_RTD, 3), + "FRAG RTData 0x%08x using Render State Buffer 0x%02x%08x." }, + + { ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_SPM, 0), + "Force Z-Load for partial render" }, + { ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_SPM, 0), + "Force Z-Store for partial render" }, + { ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_SPM, 1), + "3D MemFree: Local FL 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_SPM, 1), + "3D MemFree: MMU FL 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_SPM, 1), + "3D MemFree: Global FL 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_SPM, 6), + "OOM TA/3D PR Check: [0x%08.8x] is 0x%08.8x requires 0x%08.8x, HardwareSync Fence [0x%08.8x] is 0x%08.8x requires 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_SPM, 3), + "OOM TA_cmd=0x%08x, U-FL 0x%08x, N-FL 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_SPM, 5), + "OOM TA_cmd=0x%08x, OOM MMU:%d, U-FL 0x%08x, N-FL 0x%08x, MMU-FL 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_SPM, 0), + "Partial render avoided" }, + { ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_SPM, 0), + "Partial render discarded" }, + { ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_SPM, 0), + "Partial Render finished" }, + { ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_SPM, 0), + "SPM Owner = 3D-BG" }, + { ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_SPM, 0), + "SPM Owner = 3D-IRQ" }, + { ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_SPM, 0), + "SPM Owner = NONE" }, + { ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_SPM, 0), + "SPM Owner = TA-BG" }, + { ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_SPM, 0), + "SPM Owner = TA-IRQ" }, + { ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_SPM, 2), + "ZStore address 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_SPM, 2), + "SStore address 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_SPM, 2), + "ZLoad address 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_SPM, 2), + "SLoad address 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_SPM, 0), + "No deferred ZS Buffer provided" }, + { ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_SPM, 1), + "ZS Buffer successfully populated (ID=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(23, ROGUE_FW_GROUP_SPM, 1), + "No need to populate ZS Buffer (ID=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_SPM, 1), + "ZS Buffer successfully unpopulated (ID=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(25, ROGUE_FW_GROUP_SPM, 1), + "No need to unpopulate ZS Buffer (ID=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(26, ROGUE_FW_GROUP_SPM, 1), + "Send ZS-Buffer backing request to host (ID=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(27, ROGUE_FW_GROUP_SPM, 1), + "Send ZS-Buffer unbacking request to host (ID=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(28, ROGUE_FW_GROUP_SPM, 1), + "Don't send ZS-Buffer backing request. Previous request still pending (ID=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(29, ROGUE_FW_GROUP_SPM, 1), + "Don't send ZS-Buffer unbacking request. Previous request still pending (ID=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(30, ROGUE_FW_GROUP_SPM, 1), + "Partial Render waiting for ZBuffer to be backed (ID=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(31, ROGUE_FW_GROUP_SPM, 1), + "Partial Render waiting for SBuffer to be backed (ID=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(32, ROGUE_FW_GROUP_SPM, 0), + "SPM State = none" }, + { ROGUE_FW_LOG_CREATESFID(33, ROGUE_FW_GROUP_SPM, 0), + "SPM State = PR blocked" }, + { ROGUE_FW_LOG_CREATESFID(34, ROGUE_FW_GROUP_SPM, 0), + "SPM State = wait for grow" }, + { ROGUE_FW_LOG_CREATESFID(35, ROGUE_FW_GROUP_SPM, 0), + "SPM State = wait for HW" }, + { ROGUE_FW_LOG_CREATESFID(36, ROGUE_FW_GROUP_SPM, 0), + "SPM State = PR running" }, + { ROGUE_FW_LOG_CREATESFID(37, ROGUE_FW_GROUP_SPM, 0), + "SPM State = PR avoided" }, + { ROGUE_FW_LOG_CREATESFID(38, ROGUE_FW_GROUP_SPM, 0), + "SPM State = PR executed" }, + { ROGUE_FW_LOG_CREATESFID(39, ROGUE_FW_GROUP_SPM, 2), + "3DMemFree matches freelist 0x%08x (FL type = %u)" }, + { ROGUE_FW_LOG_CREATESFID(40, ROGUE_FW_GROUP_SPM, 0), + "Raise the 3DMemFreeDetected flag" }, + { ROGUE_FW_LOG_CREATESFID(41, ROGUE_FW_GROUP_SPM, 1), + "Wait for pending grow on Freelist 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(42, ROGUE_FW_GROUP_SPM, 1), + "ZS Buffer failed to be populated (ID=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(43, ROGUE_FW_GROUP_SPM, 5), + "Grow update inconsistency: FL addr: 0x%02x%08x, curr pages: %u, ready: %u, new: %u" }, + { ROGUE_FW_LOG_CREATESFID(44, ROGUE_FW_GROUP_SPM, 4), + "OOM: Resumed TA with ready pages, FL addr: 0x%02x%08x, current pages: %u, SP : %u" }, + { ROGUE_FW_LOG_CREATESFID(45, ROGUE_FW_GROUP_SPM, 5), + "Received grow update, FL addr: 0x%02x%08x, current pages: %u, ready pages: %u, threshold: %u" }, + { ROGUE_FW_LOG_CREATESFID(46, ROGUE_FW_GROUP_SPM, 1), + "No deferred partial render FW (Type=%d) Buffer provided" }, + { ROGUE_FW_LOG_CREATESFID(47, ROGUE_FW_GROUP_SPM, 1), + "No need to populate PR Buffer (ID=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(48, ROGUE_FW_GROUP_SPM, 1), + "No need to unpopulate PR Buffer (ID=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(49, ROGUE_FW_GROUP_SPM, 1), + "Send PR Buffer backing request to host (ID=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(50, ROGUE_FW_GROUP_SPM, 1), + "Send PR Buffer unbacking request to host (ID=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(51, ROGUE_FW_GROUP_SPM, 1), + "Don't send PR Buffer backing request. Previous request still pending (ID=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(52, ROGUE_FW_GROUP_SPM, 1), + "Don't send PR Buffer unbacking request. Previous request still pending (ID=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(53, ROGUE_FW_GROUP_SPM, 2), + "Partial Render waiting for Buffer %d type to be backed (ID=0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(54, ROGUE_FW_GROUP_SPM, 4), + "Received grow update, FL addr: 0x%02x%08x, new pages: %u, ready pages: %u" }, + { ROGUE_FW_LOG_CREATESFID(66, ROGUE_FW_GROUP_SPM, 3), + "OOM TA/3D PR Check: [0x%08.8x] is 0x%08.8x requires 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(67, ROGUE_FW_GROUP_SPM, 3), + "OOM: Resumed TA with ready pages, FL addr: 0x%02x%08x, current pages: %u" }, + { ROGUE_FW_LOG_CREATESFID(68, ROGUE_FW_GROUP_SPM, 3), + "OOM TA/3D PR deadlock unblocked reordering DM%d runlist head from Context 0x%08x to 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(69, ROGUE_FW_GROUP_SPM, 0), + "SPM State = PR force free" }, + + { ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_POW, 4), + "Check Pow state DM%d int: 0x%x, ext: 0x%x, pow flags: 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_POW, 3), + "GPU idle (might be powered down). Pow state int: 0x%x, ext: 0x%x, flags: 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_POW, 3), + "OS requested pow off (forced = %d), DM%d, pow flags: 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_POW, 4), + "Initiate powoff query. Inactive DMs: %d %d %d %d" }, + { ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_POW, 2), + "Any RD-DM pending? %d, Any RD-DM Active? %d" }, + { ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_POW, 3), + "GPU ready to be powered down. Pow state int: 0x%x, ext: 0x%x, flags: 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_POW, 2), + "HW Request On(1)/Off(0): %d, Units: 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_POW, 2), + "Request to change num of dusts to %d (Power flags=%d)" }, + { ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_POW, 2), + "Changing number of dusts from %d to %d" }, + { ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_POW, 0), + "Sidekick init" }, + { ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_POW, 1), + "Rascal+Dusts init (# dusts mask: 0x%x)" }, + { ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_POW, 0), + "Initiate powoff query for RD-DMs." }, + { ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_POW, 0), + "Initiate powoff query for TLA-DM." }, + { ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_POW, 2), + "Any RD-DM pending? %d, Any RD-DM Active? %d" }, + { ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_POW, 2), + "TLA-DM pending? %d, TLA-DM Active? %d" }, + { ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_POW, 1), + "Request power up due to BRN37270. Pow stat int: 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_POW, 3), + "Cancel power off request int: 0x%x, ext: 0x%x, pow flags: 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_POW, 1), + "OS requested forced IDLE, pow flags: 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_POW, 1), + "OS cancelled forced IDLE, pow flags: 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_POW, 3), + "Idle timer start. Pow state int: 0x%x, ext: 0x%x, flags: 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_POW, 3), + "Cancel idle timer. Pow state int: 0x%x, ext: 0x%x, flags: 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(23, ROGUE_FW_GROUP_POW, 2), + "Active PM latency set to %dms. Core clock: %d Hz" }, + { ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_POW, 2), + "Compute cluster mask change to 0x%x, %d dusts powered." }, + { ROGUE_FW_LOG_CREATESFID(25, ROGUE_FW_GROUP_POW, 0), + "Null command executed, repeating initiate powoff query for RD-DMs." }, + { ROGUE_FW_LOG_CREATESFID(26, ROGUE_FW_GROUP_POW, 1), + "Power monitor: Estimate of dynamic energy %u" }, + { ROGUE_FW_LOG_CREATESFID(27, ROGUE_FW_GROUP_POW, 3), + "Check Pow state: Int: 0x%x, Ext: 0x%x, Pow flags: 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(28, ROGUE_FW_GROUP_POW, 2), + "Proactive DVFS: New deadline, time = 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(29, ROGUE_FW_GROUP_POW, 2), + "Proactive DVFS: New workload, cycles = 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(30, ROGUE_FW_GROUP_POW, 1), + "Proactive DVFS: Proactive frequency calculated = %u" }, + { ROGUE_FW_LOG_CREATESFID(31, ROGUE_FW_GROUP_POW, 1), + "Proactive DVFS: Reactive utilisation = %u percent" }, + { ROGUE_FW_LOG_CREATESFID(32, ROGUE_FW_GROUP_POW, 2), + "Proactive DVFS: Reactive frequency calculated = %u.%u" }, + { ROGUE_FW_LOG_CREATESFID(33, ROGUE_FW_GROUP_POW, 1), + "Proactive DVFS: OPP Point Sent = 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(34, ROGUE_FW_GROUP_POW, 2), + "Proactive DVFS: Deadline removed = 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(35, ROGUE_FW_GROUP_POW, 2), + "Proactive DVFS: Workload removed = 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(36, ROGUE_FW_GROUP_POW, 1), + "Proactive DVFS: Throttle to a maximum = 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(37, ROGUE_FW_GROUP_POW, 0), + "Proactive DVFS: Failed to pass OPP point via GPIO." }, + { ROGUE_FW_LOG_CREATESFID(38, ROGUE_FW_GROUP_POW, 0), + "Proactive DVFS: Invalid node passed to function." }, + { ROGUE_FW_LOG_CREATESFID(39, ROGUE_FW_GROUP_POW, 1), + "Proactive DVFS: Guest OS attempted to do a privileged action. OSid = %u" }, + { ROGUE_FW_LOG_CREATESFID(40, ROGUE_FW_GROUP_POW, 1), + "Proactive DVFS: Unprofiled work started. Total unprofiled work present: %u" }, + { ROGUE_FW_LOG_CREATESFID(41, ROGUE_FW_GROUP_POW, 1), + "Proactive DVFS: Unprofiled work finished. Total unprofiled work present: %u" }, + { ROGUE_FW_LOG_CREATESFID(42, ROGUE_FW_GROUP_POW, 0), + "Proactive DVFS: Disabled: Not enabled by host." }, + { ROGUE_FW_LOG_CREATESFID(43, ROGUE_FW_GROUP_POW, 2), + "HW Request Completed(1)/Aborted(0): %d, Ticks: %d" }, + { ROGUE_FW_LOG_CREATESFID(44, ROGUE_FW_GROUP_POW, 1), + "Allowed number of dusts is %d due to BRN59042." }, + { ROGUE_FW_LOG_CREATESFID(45, ROGUE_FW_GROUP_POW, 3), + "Host timed out while waiting for a forced idle state. Pow state int: 0x%x, ext: 0x%x, flags: 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(46, ROGUE_FW_GROUP_POW, 5), + "Check Pow state: Int: 0x%x, Ext: 0x%x, Pow flags: 0x%x, Fence Counters: Check: %u - Update: %u" }, + { ROGUE_FW_LOG_CREATESFID(47, ROGUE_FW_GROUP_POW, 2), + "Proactive DVFS: OPP Point Sent = 0x%x, Success = 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(48, ROGUE_FW_GROUP_POW, 0), + "Proactive DVFS: GPU transitioned to idle" }, + { ROGUE_FW_LOG_CREATESFID(49, ROGUE_FW_GROUP_POW, 0), + "Proactive DVFS: GPU transitioned to active" }, + { ROGUE_FW_LOG_CREATESFID(50, ROGUE_FW_GROUP_POW, 1), + "Power counter dumping: Data truncated writing register %u. Buffer too small." }, + { ROGUE_FW_LOG_CREATESFID(51, ROGUE_FW_GROUP_POW, 0), + "Power controller returned ABORT for last request so retrying." }, + { ROGUE_FW_LOG_CREATESFID(52, ROGUE_FW_GROUP_POW, 2), + "Discarding invalid power request: type 0x%x, DM %u" }, + { ROGUE_FW_LOG_CREATESFID(53, ROGUE_FW_GROUP_POW, 2), + "Detected attempt to cancel forced idle while not forced idle (pow state 0x%x, pow flags 0x%x)" }, + { ROGUE_FW_LOG_CREATESFID(54, ROGUE_FW_GROUP_POW, 2), + "Detected attempt to force power off while not forced idle (pow state 0x%x, pow flags 0x%x)" }, + { ROGUE_FW_LOG_CREATESFID(55, ROGUE_FW_GROUP_POW, 1), + "Detected attempt to change dust count while not forced idle (pow state 0x%x)" }, + { ROGUE_FW_LOG_CREATESFID(56, ROGUE_FW_GROUP_POW, 3), + "Power monitor: Type = %d (0 = power, 1 = energy), Estimate result = 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(57, ROGUE_FW_GROUP_POW, 2), + "Conflicting clock frequency range: OPP min = %u, max = %u" }, + { ROGUE_FW_LOG_CREATESFID(58, ROGUE_FW_GROUP_POW, 1), + "Proactive DVFS: Set floor to a minimum = 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(59, ROGUE_FW_GROUP_POW, 2), + "OS requested pow off (forced = %d), pow flags: 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(60, ROGUE_FW_GROUP_POW, 1), + "Discarding invalid power request: type 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(61, ROGUE_FW_GROUP_POW, 3), + "Request to change SPU power state mask from 0x%x to 0x%x. Pow flags: 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(62, ROGUE_FW_GROUP_POW, 2), + "Changing SPU power state mask from 0x%x to 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(63, ROGUE_FW_GROUP_POW, 1), + "Detected attempt to change SPU power state mask while not forced idle (pow state 0x%x)" }, + { ROGUE_FW_LOG_CREATESFID(64, ROGUE_FW_GROUP_POW, 1), + "Invalid SPU power mask 0x%x! Changing to 1" }, + { ROGUE_FW_LOG_CREATESFID(65, ROGUE_FW_GROUP_POW, 2), + "Proactive DVFS: Send OPP %u with clock divider value %u" }, + { ROGUE_FW_LOG_CREATESFID(66, ROGUE_FW_GROUP_POW, 0), + "PPA block started in perf validation mode." }, + { ROGUE_FW_LOG_CREATESFID(67, ROGUE_FW_GROUP_POW, 1), + "Reset PPA block state %u (1=reset, 0=recalculate)." }, + { ROGUE_FW_LOG_CREATESFID(68, ROGUE_FW_GROUP_POW, 1), + "Power controller returned ABORT for Core-%d last request so retrying." }, + { ROGUE_FW_LOG_CREATESFID(69, ROGUE_FW_GROUP_POW, 3), + "HW Request On(1)/Off(0): %d, Units: 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(70, ROGUE_FW_GROUP_POW, 5), + "Request to change SPU power state mask from 0x%x to 0x%x and RAC from 0x%x to 0x%x. Pow flags: 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(71, ROGUE_FW_GROUP_POW, 4), + "Changing SPU power state mask from 0x%x to 0x%x and RAC from 0x%x to 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(72, ROGUE_FW_GROUP_POW, 2), + "RAC pending? %d, RAC Active? %d" }, + { ROGUE_FW_LOG_CREATESFID(73, ROGUE_FW_GROUP_POW, 0), + "Initiate powoff query for RAC." }, + + { ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_HWR, 2), + "Lockup detected on DM%d, FWCtx: 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_HWR, 3), + "Reset fw state for DM%d, FWCtx: 0x%08.8x, MemCtx: 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_HWR, 0), + "Reset HW" }, + { ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_HWR, 0), + "Lockup recovered." }, + { ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_HWR, 2), + "Lock-up DM%d FWCtx: 0x%08.8x" }, + { ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_HWR, 4), + "Lockup detected: GLB(%d->%d), PER-DM(0x%08x->0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_HWR, 3), + "Early fault detection: GLB(%d->%d), PER-DM(0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_HWR, 3), + "Hold scheduling due lockup: GLB(%d), PER-DM(0x%08x->0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_HWR, 4), + "False lockup detected: GLB(%d->%d), PER-DM(0x%08x->0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_HWR, 4), + "BRN37729: GLB(%d->%d), PER-DM(0x%08x->0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_HWR, 3), + "Freelists reconstructed: GLB(%d->%d), PER-DM(0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_HWR, 4), + "Reconstructing freelists: %u (0-No, 1-Yes): GLB(%d->%d), PER-DM(0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_HWR, 3), + "HW poll %u (0-Unset 1-Set) failed (reg:0x%08x val:0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_HWR, 2), + "Discarded cmd on DM%u FWCtx=0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_HWR, 6), + "Discarded cmd on DM%u (reason=%u) HWRTData=0x%08x (st: %d), FWCtx 0x%08x @ %d" }, + { ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_HWR, 2), + "PM fence WA could not be applied, Valid TA Setup: %d, RD powered off: %d" }, + { ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_HWR, 5), + "FL snapshot RTD 0x%08.8x - local (0x%08.8x): %d, global (0x%08.8x): %d" }, + { ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_HWR, 8), + "FL check RTD 0x%08.8x, discard: %d - local (0x%08.8x): s%d?=c%d, global (0x%08.8x): s%d?=c%d" }, + { ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_HWR, 2), + "FL reconstruction 0x%08.8x c%d" }, + { ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_HWR, 3), + "3D check: missing TA FWCtx 0x%08.8x @ %d, RTD 0x%08x." }, + { ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_HWR, 2), + "Reset HW (mmu:%d, extmem: %d)" }, + { ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_HWR, 4), + "Zero TA caches for FWCtx: 0x%08.8x (TPC addr: 0x%08x%08x, size: %d bytes)" }, + { ROGUE_FW_LOG_CREATESFID(23, ROGUE_FW_GROUP_HWR, 2), + "Recovery DM%u: Freelists reconstructed. New R-Flags=0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_HWR, 5), + "Recovery DM%u: FWCtx 0x%08x skipped to command @ %u. PR=%u. New R-Flags=0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(25, ROGUE_FW_GROUP_HWR, 1), + "Recovery DM%u: DM fully recovered" }, + { ROGUE_FW_LOG_CREATESFID(26, ROGUE_FW_GROUP_HWR, 2), + "DM%u: Hold scheduling due to R-Flag = 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(27, ROGUE_FW_GROUP_HWR, 0), + "Analysis: Need freelist reconstruction" }, + { ROGUE_FW_LOG_CREATESFID(28, ROGUE_FW_GROUP_HWR, 2), + "Analysis DM%u: Lockup FWCtx: 0x%08.8x. Need to skip to next command" }, + { ROGUE_FW_LOG_CREATESFID(29, ROGUE_FW_GROUP_HWR, 2), + "Analysis DM%u: Lockup while TA is OOM FWCtx: 0x%08.8x. Need to skip to next command" }, + { ROGUE_FW_LOG_CREATESFID(30, ROGUE_FW_GROUP_HWR, 2), + "Analysis DM%u: Lockup while partial render FWCtx: 0x%08.8x. Need PR cleanup" }, + { ROGUE_FW_LOG_CREATESFID(31, ROGUE_FW_GROUP_HWR, 0), + "GPU has locked up" }, + { ROGUE_FW_LOG_CREATESFID(32, ROGUE_FW_GROUP_HWR, 1), + "DM%u ready for HWR" }, + { ROGUE_FW_LOG_CREATESFID(33, ROGUE_FW_GROUP_HWR, 2), + "Recovery DM%u: Updated Recovery counter. New R-Flags=0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(34, ROGUE_FW_GROUP_HWR, 1), + "Analysis: BRN37729 detected, reset TA and re-kicked 0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(35, ROGUE_FW_GROUP_HWR, 1), + "DM%u timed out" }, + { ROGUE_FW_LOG_CREATESFID(36, ROGUE_FW_GROUP_HWR, 1), + "RGX_CR_EVENT_STATUS=0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(37, ROGUE_FW_GROUP_HWR, 2), + "DM%u lockup falsely detected, R-Flags=0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(38, ROGUE_FW_GROUP_HWR, 0), + "GPU has overrun its deadline" }, + { ROGUE_FW_LOG_CREATESFID(39, ROGUE_FW_GROUP_HWR, 0), + "GPU has failed a poll" }, + { ROGUE_FW_LOG_CREATESFID(40, ROGUE_FW_GROUP_HWR, 2), + "RGX DM%u phase count=0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(41, ROGUE_FW_GROUP_HWR, 2), + "Reset HW (loop:%d, poll failures: 0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(42, ROGUE_FW_GROUP_HWR, 1), + "MMU fault event: 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(43, ROGUE_FW_GROUP_HWR, 1), + "BIF1 page fault detected (Bank1 MMU Status: 0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(44, ROGUE_FW_GROUP_HWR, 1), + "Fast CRC Failed. Proceeding to full register checking (DM: %u)." }, + { ROGUE_FW_LOG_CREATESFID(45, ROGUE_FW_GROUP_HWR, 2), + "Meta MMU page fault detected (Meta MMU Status: 0x%08x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(46, ROGUE_FW_GROUP_HWR, 2), + "Fast CRC Check result for DM%u is HWRNeeded=%u" }, + { ROGUE_FW_LOG_CREATESFID(47, ROGUE_FW_GROUP_HWR, 2), + "Full Signature Check result for DM%u is HWRNeeded=%u" }, + { ROGUE_FW_LOG_CREATESFID(48, ROGUE_FW_GROUP_HWR, 3), + "Final result for DM%u is HWRNeeded=%u with HWRChecksToGo=%u" }, + { ROGUE_FW_LOG_CREATESFID(49, ROGUE_FW_GROUP_HWR, 3), + "USC Slots result for DM%u is HWRNeeded=%u USCSlotsUsedByDM=%d" }, + { ROGUE_FW_LOG_CREATESFID(50, ROGUE_FW_GROUP_HWR, 2), + "Deadline counter for DM%u is HWRDeadline=%u" }, + { ROGUE_FW_LOG_CREATESFID(51, ROGUE_FW_GROUP_HWR, 1), + "Holding Scheduling on OSid %u due to pending freelist reconstruction" }, + { ROGUE_FW_LOG_CREATESFID(52, ROGUE_FW_GROUP_HWR, 2), + "Requesting reconstruction for freelist 0x%x (ID=%d)" }, + { ROGUE_FW_LOG_CREATESFID(53, ROGUE_FW_GROUP_HWR, 1), + "Reconstruction of freelist ID=%d complete" }, + { ROGUE_FW_LOG_CREATESFID(54, ROGUE_FW_GROUP_HWR, 4), + "Reconstruction needed for freelist 0x%x (ID=%d) type: %d (0:local,1:global,2:mmu) on HW context %u" }, + { ROGUE_FW_LOG_CREATESFID(55, ROGUE_FW_GROUP_HWR, 1), + "Reconstruction of freelist ID=%d failed" }, + { ROGUE_FW_LOG_CREATESFID(56, ROGUE_FW_GROUP_HWR, 4), + "Restricting PDS Tasks to help other stalling DMs (RunningMask=0x%02x, StallingMask=0x%02x, PDS_CTRL=0x%08x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(57, ROGUE_FW_GROUP_HWR, 4), + "Unrestricting PDS Tasks again (RunningMask=0x%02x, StallingMask=0x%02x, PDS_CTRL=0x%08x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(58, ROGUE_FW_GROUP_HWR, 2), + "USC slots: %u used by DM%u" }, + { ROGUE_FW_LOG_CREATESFID(59, ROGUE_FW_GROUP_HWR, 1), + "USC slots: %u empty" }, + { ROGUE_FW_LOG_CREATESFID(60, ROGUE_FW_GROUP_HWR, 5), + "HCS DM%d's Context Switch failed to meet deadline. Current time: 0x%08x%08x, deadline: 0x%08x%08x" }, + { ROGUE_FW_LOG_CREATESFID(61, ROGUE_FW_GROUP_HWR, 1), + "Begin hardware reset (HWR Counter=%d)" }, + { ROGUE_FW_LOG_CREATESFID(62, ROGUE_FW_GROUP_HWR, 1), + "Finished hardware reset (HWR Counter=%d)" }, + { ROGUE_FW_LOG_CREATESFID(63, ROGUE_FW_GROUP_HWR, 2), + "Holding Scheduling on DM %u for OSid %u due to pending freelist reconstruction" }, + { ROGUE_FW_LOG_CREATESFID(64, ROGUE_FW_GROUP_HWR, 5), + "User Mode Queue ROff reset: FWCtx 0x%08.8x, queue: 0x%08x%08x (Roff = %u becomes StreamStartOffset = %u)" }, + { ROGUE_FW_LOG_CREATESFID(65, ROGUE_FW_GROUP_HWR, 4), + "Reconstruction needed for freelist 0x%x (ID=%d) type: %d (0:local,1:global) on HW context %u" }, + { ROGUE_FW_LOG_CREATESFID(66, ROGUE_FW_GROUP_HWR, 3), + "Mips page fault detected (BadVAddr: 0x%08x, EntryLo0: 0x%08x, EntryLo1: 0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(67, ROGUE_FW_GROUP_HWR, 1), + "At least one other DM is running okay so DM%u will get another chance" }, + { ROGUE_FW_LOG_CREATESFID(68, ROGUE_FW_GROUP_HWR, 2), + "Reconstructing in FW, FL: 0x%x (ID=%d)" }, + { ROGUE_FW_LOG_CREATESFID(69, ROGUE_FW_GROUP_HWR, 4), + "Zero RTC for FWCtx: 0x%08.8x (RTC addr: 0x%08x%08x, size: %d bytes)" }, + { ROGUE_FW_LOG_CREATESFID(70, ROGUE_FW_GROUP_HWR, 5), + "Reconstruction needed for freelist 0x%x (ID=%d) type: %d (0:local,1:global) phase: %d (0:TA, 1:3D) on HW context %u" }, + { ROGUE_FW_LOG_CREATESFID(71, ROGUE_FW_GROUP_HWR, 3), + "Start long HW poll %u (0-Unset 1-Set) for (reg:0x%08x val:0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(72, ROGUE_FW_GROUP_HWR, 1), + "End long HW poll (result=%d)" }, + { ROGUE_FW_LOG_CREATESFID(73, ROGUE_FW_GROUP_HWR, 3), + "DM%u has taken %d ticks and deadline is %d ticks" }, + { ROGUE_FW_LOG_CREATESFID(74, ROGUE_FW_GROUP_HWR, 5), + "USC Watchdog result for DM%u is HWRNeeded=%u Status=%u USCs={0x%x} with HWRChecksToGo=%u" }, + { ROGUE_FW_LOG_CREATESFID(75, ROGUE_FW_GROUP_HWR, 6), + "Reconstruction needed for freelist 0x%x (ID=%d) OSid: %d type: %d (0:local,1:global) phase: %d (0:TA, 1:3D) on HW context %u" }, + { ROGUE_FW_LOG_CREATESFID(76, ROGUE_FW_GROUP_HWR, 1), + "GPU-%u has locked up" }, + { ROGUE_FW_LOG_CREATESFID(77, ROGUE_FW_GROUP_HWR, 1), + "DM%u has locked up" }, + { ROGUE_FW_LOG_CREATESFID(78, ROGUE_FW_GROUP_HWR, 2), + "Core %d RGX_CR_EVENT_STATUS=0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(79, ROGUE_FW_GROUP_HWR, 2), + "RGX_CR_MULTICORE_EVENT_STATUS%u=0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(80, ROGUE_FW_GROUP_HWR, 5), + "BIF0 page fault detected (Core %d MMU Status: 0x%08x%08x Req Status: 0x%08x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(81, ROGUE_FW_GROUP_HWR, 3), + "MMU page fault detected (Core %d MMU Status: 0x%08x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(82, ROGUE_FW_GROUP_HWR, 4), + "MMU page fault detected (Core %d MMU Status: 0x%08x%08x 0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(83, ROGUE_FW_GROUP_HWR, 4), + "Reset HW (core:%d of %d, loop:%d, poll failures: 0x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(84, ROGUE_FW_GROUP_HWR, 3), + "Fast CRC Check result for Core%u, DM%u is HWRNeeded=%u" }, + { ROGUE_FW_LOG_CREATESFID(85, ROGUE_FW_GROUP_HWR, 3), + "Full Signature Check result for Core%u, DM%u is HWRNeeded=%u" }, + { ROGUE_FW_LOG_CREATESFID(86, ROGUE_FW_GROUP_HWR, 4), + "USC Slots result for Core%u, DM%u is HWRNeeded=%u USCSlotsUsedByDM=%d" }, + { ROGUE_FW_LOG_CREATESFID(87, ROGUE_FW_GROUP_HWR, 6), + "USC Watchdog result for Core%u DM%u is HWRNeeded=%u Status=%u USCs={0x%x} with HWRChecksToGo=%u" }, + { ROGUE_FW_LOG_CREATESFID(88, ROGUE_FW_GROUP_HWR, 3), + "RISC-V MMU page fault detected (FWCORE MMU Status 0x%08x Req Status 0x%08x%08x)" }, + { ROGUE_FW_LOG_CREATESFID(89, ROGUE_FW_GROUP_HWR, 2), + "TEXAS1_PFS poll failed on core %d with value 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(90, ROGUE_FW_GROUP_HWR, 2), + "BIF_PFS poll failed on core %d with value 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(91, ROGUE_FW_GROUP_HWR, 2), + "MMU_ABORT_PM_STATUS set poll failed on core %d with value 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(92, ROGUE_FW_GROUP_HWR, 2), + "MMU_ABORT_PM_STATUS unset poll failed on core %d with value 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(93, ROGUE_FW_GROUP_HWR, 2), + "MMU_CTRL_INVAL poll (all but fw) failed on core %d with value 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(94, ROGUE_FW_GROUP_HWR, 2), + "MMU_CTRL_INVAL poll (all) failed on core %d with value 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(95, ROGUE_FW_GROUP_HWR, 3), + "TEXAS%d_PFS poll failed on core %d with value 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(96, ROGUE_FW_GROUP_HWR, 3), + "Extra Registers Check result for Core%u, DM%u is HWRNeeded=%u" }, + { ROGUE_FW_LOG_CREATESFID(97, ROGUE_FW_GROUP_HWR, 1), + "FW attempted to write to read-only GPU address 0x%08x" }, + + { ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_HWP, 2), + "Block 0x%x mapped to Config Idx %u" }, + { ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_HWP, 1), + "Block 0x%x omitted from event - not enabled in HW" }, + { ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_HWP, 1), + "Block 0x%x included in event - enabled in HW" }, + { ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_HWP, 2), + "Select register state hi_0x%x lo_0x%x" }, + { ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_HWP, 1), + "Counter stream block header word 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_HWP, 1), + "Counter register offset 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_HWP, 1), + "Block 0x%x config unset, skipping" }, + { ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_HWP, 1), + "Accessing Indirect block 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_HWP, 1), + "Accessing Direct block 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_HWP, 1), + "Programmed counter select register at offset 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_HWP, 2), + "Block register offset 0x%x and value 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_HWP, 1), + "Reading config block from driver 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_HWP, 2), + "Reading block range 0x%x to 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_HWP, 1), + "Recording block 0x%x config from driver" }, + { ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_HWP, 0), + "Finished reading config block from driver" }, + { ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_HWP, 2), + "Custom Counter offset: 0x%x value: 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_HWP, 2), + "Select counter n:%u ID:0x%x" }, + { ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_HWP, 3), + "The counter ID 0x%x is not allowed. The package [b:%u, n:%u] will be discarded" }, + { ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_HWP, 1), + "Custom Counters filter status %d" }, + { ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_HWP, 2), + "The Custom block %d is not allowed. Use only blocks lower than %d. The package will be discarded" }, + { ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_HWP, 2), + "The package will be discarded because it contains %d counters IDs while the upper limit is %d" }, + { ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_HWP, 2), + "Check Filter 0x%x is 0x%x ?" }, + { ROGUE_FW_LOG_CREATESFID(23, ROGUE_FW_GROUP_HWP, 1), + "The custom block %u is reset" }, + { ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_HWP, 1), + "Encountered an invalid command (%d)" }, + { ROGUE_FW_LOG_CREATESFID(25, ROGUE_FW_GROUP_HWP, 2), + "HWPerf Queue is full, we will have to wait for space! (Roff = %u, Woff = %u)" }, + { ROGUE_FW_LOG_CREATESFID(26, ROGUE_FW_GROUP_HWP, 3), + "HWPerf Queue is fencing, we are waiting for Roff = %d (Roff = %u, Woff = %u)" }, + { ROGUE_FW_LOG_CREATESFID(27, ROGUE_FW_GROUP_HWP, 1), + "Custom Counter block: %d" }, + { ROGUE_FW_LOG_CREATESFID(28, ROGUE_FW_GROUP_HWP, 1), + "Block 0x%x ENABLED" }, + { ROGUE_FW_LOG_CREATESFID(29, ROGUE_FW_GROUP_HWP, 1), + "Block 0x%x DISABLED" }, + { ROGUE_FW_LOG_CREATESFID(30, ROGUE_FW_GROUP_HWP, 2), + "Accessing Indirect block 0x%x, instance %u" }, + { ROGUE_FW_LOG_CREATESFID(31, ROGUE_FW_GROUP_HWP, 2), + "Counter register 0x%x, Value 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(32, ROGUE_FW_GROUP_HWP, 1), + "Counters filter status %d" }, + { ROGUE_FW_LOG_CREATESFID(33, ROGUE_FW_GROUP_HWP, 2), + "Block 0x%x mapped to Ctl Idx %u" }, + { ROGUE_FW_LOG_CREATESFID(34, ROGUE_FW_GROUP_HWP, 0), + "Block(s) in use for workload estimation." }, + { ROGUE_FW_LOG_CREATESFID(35, ROGUE_FW_GROUP_HWP, 3), + "GPU %u Cycle counter 0x%x, Value 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(36, ROGUE_FW_GROUP_HWP, 3), + "GPU Mask 0x%x Cycle counter 0x%x, Value 0x%x" }, + { ROGUE_FW_LOG_CREATESFID(37, ROGUE_FW_GROUP_HWP, 1), + "Blocks IGNORED for GPU %u" }, + + { ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_DMA, 5), + "Transfer 0x%02x request: 0x%02x%08x -> 0x%08x, size %u" }, + { ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_DMA, 4), + "Transfer of type 0x%02x expected on channel %u, 0x%02x found, status %u" }, + { ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_DMA, 1), + "DMA Interrupt register 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_DMA, 1), + "Waiting for transfer of type 0x%02x completion..." }, + { ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_DMA, 3), + "Loading of cCCB data from FW common context 0x%08x (offset: %u, size: %u) failed" }, + { ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_DMA, 3), + "Invalid load of cCCB data from FW common context 0x%08x (offset: %u, size: %u)" }, + { ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_DMA, 1), + "Transfer 0x%02x request poll failure" }, + { ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_DMA, 2), + "Boot transfer(s) failed (code? %u, data? %u), used slower memcpy instead" }, + { ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_DMA, 7), + "Transfer 0x%02x request on ch. %u: system 0x%02x%08x, coremem 0x%08x, flags 0x%x, size %u" }, + + { ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_DBG, 2), + "0x%08x 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_DBG, 1), + "0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_DBG, 2), + "0x%08x 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_DBG, 3), + "0x%08x 0x%08x 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_DBG, 4), + "0x%08x 0x%08x 0x%08x 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_DBG, 5), + "0x%08x 0x%08x 0x%08x 0x%08x 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_DBG, 6), + "0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_DBG, 7), + "0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_DBG, 8), + "0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x" }, + { ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_DBG, 1), + "%d" }, + { ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_DBG, 2), + "%d %d" }, + { ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_DBG, 3), + "%d %d %d" }, + { ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_DBG, 4), + "%d %d %d %d" }, + { ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_DBG, 5), + "%d %d %d %d %d" }, + { ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_DBG, 6), + "%d %d %d %d %d %d" }, + { ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_DBG, 7), + "%d %d %d %d %d %d %d" }, + { ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_DBG, 8), + "%d %d %d %d %d %d %d %d" }, + { ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_DBG, 1), + "%u" }, + { ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_DBG, 2), + "%u %u" }, + { ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_DBG, 3), + "%u %u %u" }, + { ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_DBG, 4), + "%u %u %u %u" }, + { ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_DBG, 5), + "%u %u %u %u %u" }, + { ROGUE_FW_LOG_CREATESFID(23, ROGUE_FW_GROUP_DBG, 6), + "%u %u %u %u %u %u" }, + { ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_DBG, 7), + "%u %u %u %u %u %u %u" }, + { ROGUE_FW_LOG_CREATESFID(25, ROGUE_FW_GROUP_DBG, 8), + "%u %u %u %u %u %u %u %u" }, + + { ROGUE_FW_LOG_CREATESFID(65535, ROGUE_FW_GROUP_NULL, 15), + "You should not use this string" }, +}; + +#define ROGUE_FW_SF_FIRST ROGUE_FW_LOG_CREATESFID(0, ROGUE_FW_GROUP_NULL, 0) +#define ROGUE_FW_SF_MAIN_ASSERT_FAILED ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_MAIN, 1) +#define ROGUE_FW_SF_LAST ROGUE_FW_LOG_CREATESFID(65535, ROGUE_FW_GROUP_NULL, 15) + +#endif /* PVR_ROGUE_FWIF_SF_H */ diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_shared.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_shared.h new file mode 100644 index 000000000000..6c09c15bf9bd --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_shared.h @@ -0,0 +1,258 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_ROGUE_FWIF_SHARED_H +#define PVR_ROGUE_FWIF_SHARED_H + +#include <linux/compiler.h> +#include <linux/types.h> + +#define ROGUE_FWIF_NUM_RTDATAS 2U +#define ROGUE_FWIF_NUM_GEOMDATAS 1U +#define ROGUE_FWIF_NUM_RTDATA_FREELISTS 2U +#define ROGUE_NUM_GEOM_CORES 1U + +#define ROGUE_NUM_GEOM_CORES_SIZE 2U + +/* + * Maximum number of UFOs in a CCB command. + * The number is based on having 32 sync prims (as originally), plus 32 sync + * checkpoints. + * Once the use of sync prims is no longer supported, we will retain + * the same total (64) as the number of sync checkpoints which may be + * supporting a fence is not visible to the client driver and has to + * allow for the number of different timelines involved in fence merges. + */ +#define ROGUE_FWIF_CCB_CMD_MAX_UFOS (32U + 32U) + +/* + * This is a generic limit imposed on any DM (GEOMETRY,FRAGMENT,CDM,TDM,2D,TRANSFER) + * command passed through the bridge. + * Just across the bridge in the server, any incoming kick command size is + * checked against this maximum limit. + * In case the incoming command size is larger than the specified limit, + * the bridge call is retired with error. + */ +#define ROGUE_FWIF_DM_INDEPENDENT_KICK_CMD_SIZE (1024U) + +#define ROGUE_FWIF_PRBUFFER_START (0) +#define ROGUE_FWIF_PRBUFFER_ZSBUFFER (0) +#define ROGUE_FWIF_PRBUFFER_MSAABUFFER (1) +#define ROGUE_FWIF_PRBUFFER_MAXSUPPORTED (2) + +struct rogue_fwif_dma_addr { + aligned_u64 dev_addr; + u32 fw_addr; + u32 padding; +} __aligned(8); + +struct rogue_fwif_ufo { + u32 addr; + u32 value; +}; + +#define ROGUE_FWIF_UFO_ADDR_IS_SYNC_CHECKPOINT (1) + +struct rogue_fwif_sync_checkpoint { + u32 state; + u32 fw_ref_count; +}; + +struct rogue_fwif_cleanup_ctl { + /* Number of commands received by the FW */ + u32 submitted_commands; + /* Number of commands executed by the FW */ + u32 executed_commands; +} __aligned(8); + +/* + * Used to share frame numbers across UM-KM-FW, + * frame number is set in UM, + * frame number is required in both KM for HTB and FW for FW trace. + * + * May be used to house Kick flags in the future. + */ +struct rogue_fwif_cmd_common { + /* associated frame number */ + u32 frame_num; +}; + +/* + * Geometry and fragment commands require set of firmware addresses that are stored in the Kernel. + * Client has handle(s) to Kernel containers storing these addresses, instead of raw addresses. We + * have to patch/write these addresses in KM to prevent UM from controlling FW addresses directly. + * Typedefs for geometry and fragment commands are shared between Client and Firmware (both + * single-BVNC). Kernel is implemented in a multi-BVNC manner, so it can't use geometry|fragment + * CMD type definitions directly. Therefore we have a SHARED block that is shared between UM-KM-FW + * across all BVNC configurations. + */ +struct rogue_fwif_cmd_geom_frag_shared { + /* Common command attributes */ + struct rogue_fwif_cmd_common cmn; + + /* + * RTData associated with this command, this is used for context + * selection and for storing out HW-context, when TA is switched out for + * continuing later + */ + u32 hwrt_data_fw_addr; + + /* Supported PR Buffers like Z/S/MSAA Scratch */ + u32 pr_buffer_fw_addr[ROGUE_FWIF_PRBUFFER_MAXSUPPORTED]; +}; + +/* + * Client Circular Command Buffer (CCCB) control structure. + * This is shared between the Server and the Firmware and holds byte offsets + * into the CCCB as well as the wrapping mask to aid wrap around. A given + * snapshot of this queue with Cmd 1 running on the GPU might be: + * + * Roff Doff Woff + * [..........|-1----------|=2===|=3===|=4===|~5~~~~|~6~~~~|~7~~~~|..........] + * < runnable commands >< !ready to run > + * + * Cmd 1 : Currently executing on the GPU data master. + * Cmd 2,3,4: Fence dependencies met, commands runnable. + * Cmd 5... : Fence dependency not met yet. + */ +struct rogue_fwif_cccb_ctl { + /* Host write offset into CCB. This must be aligned to 16 bytes. */ + u32 write_offset; + /* + * Firmware read offset into CCB. Points to the command that is runnable + * on GPU, if R!=W + */ + u32 read_offset; + /* + * Firmware fence dependency offset. Points to commands not ready, i.e. + * fence dependencies are not met. + */ + u32 dep_offset; + /* Offset wrapping mask, total capacity in bytes of the CCB-1 */ + u32 wrap_mask; + + /* Only used if SUPPORT_AGP is present. */ + u32 read_offset2; + + /* Only used if SUPPORT_AGP4 is present. */ + u32 read_offset3; + /* Only used if SUPPORT_AGP4 is present. */ + u32 read_offset4; + + u32 padding; +} __aligned(8); + +#define ROGUE_FW_LOCAL_FREELIST (0) +#define ROGUE_FW_GLOBAL_FREELIST (1) +#define ROGUE_FW_FREELIST_TYPE_LAST ROGUE_FW_GLOBAL_FREELIST +#define ROGUE_FW_MAX_FREELISTS (ROGUE_FW_FREELIST_TYPE_LAST + 1U) + +struct rogue_fwif_geom_registers_caswitch { + u64 geom_reg_vdm_context_state_base_addr; + u64 geom_reg_vdm_context_state_resume_addr; + u64 geom_reg_ta_context_state_base_addr; + + struct { + u64 geom_reg_vdm_context_store_task0; + u64 geom_reg_vdm_context_store_task1; + u64 geom_reg_vdm_context_store_task2; + + /* VDM resume state update controls */ + u64 geom_reg_vdm_context_resume_task0; + u64 geom_reg_vdm_context_resume_task1; + u64 geom_reg_vdm_context_resume_task2; + + u64 geom_reg_vdm_context_store_task3; + u64 geom_reg_vdm_context_store_task4; + + u64 geom_reg_vdm_context_resume_task3; + u64 geom_reg_vdm_context_resume_task4; + } geom_state[2]; +}; + +#define ROGUE_FWIF_GEOM_REGISTERS_CSWITCH_SIZE \ + sizeof(struct rogue_fwif_geom_registers_caswitch) + +struct rogue_fwif_cdm_registers_cswitch { + u64 cdmreg_cdm_context_pds0; + u64 cdmreg_cdm_context_pds1; + u64 cdmreg_cdm_terminate_pds; + u64 cdmreg_cdm_terminate_pds1; + + /* CDM resume controls */ + u64 cdmreg_cdm_resume_pds0; + u64 cdmreg_cdm_context_pds0_b; + u64 cdmreg_cdm_resume_pds0_b; +}; + +struct rogue_fwif_static_rendercontext_state { + /* Geom registers for ctx switch */ + struct rogue_fwif_geom_registers_caswitch ctxswitch_regs[ROGUE_NUM_GEOM_CORES_SIZE] + __aligned(8); +}; + +#define ROGUE_FWIF_STATIC_RENDERCONTEXT_SIZE \ + sizeof(struct rogue_fwif_static_rendercontext_state) + +struct rogue_fwif_static_computecontext_state { + /* CDM registers for ctx switch */ + struct rogue_fwif_cdm_registers_cswitch ctxswitch_regs __aligned(8); +}; + +#define ROGUE_FWIF_STATIC_COMPUTECONTEXT_SIZE \ + sizeof(struct rogue_fwif_static_computecontext_state) + +enum rogue_fwif_prbuffer_state { + ROGUE_FWIF_PRBUFFER_UNBACKED = 0, + ROGUE_FWIF_PRBUFFER_BACKED, + ROGUE_FWIF_PRBUFFER_BACKING_PENDING, + ROGUE_FWIF_PRBUFFER_UNBACKING_PENDING, +}; + +struct rogue_fwif_prbuffer { + /* Buffer ID*/ + u32 buffer_id; + /* Needs On-demand Z/S/MSAA Buffer allocation */ + bool on_demand __aligned(4); + /* Z/S/MSAA -Buffer state */ + enum rogue_fwif_prbuffer_state state; + /* Cleanup state */ + struct rogue_fwif_cleanup_ctl cleanup_sate; + /* Compatibility and other flags */ + u32 prbuffer_flags; +} __aligned(8); + +/* Last reset reason for a context. */ +enum rogue_context_reset_reason { + /* No reset reason recorded */ + ROGUE_CONTEXT_RESET_REASON_NONE = 0, + /* Caused a reset due to locking up */ + ROGUE_CONTEXT_RESET_REASON_GUILTY_LOCKUP = 1, + /* Affected by another context locking up */ + ROGUE_CONTEXT_RESET_REASON_INNOCENT_LOCKUP = 2, + /* Overran the global deadline */ + ROGUE_CONTEXT_RESET_REASON_GUILTY_OVERRUNING = 3, + /* Affected by another context overrunning */ + ROGUE_CONTEXT_RESET_REASON_INNOCENT_OVERRUNING = 4, + /* Forced reset to ensure scheduling requirements */ + ROGUE_CONTEXT_RESET_REASON_HARD_CONTEXT_SWITCH = 5, + /* FW Safety watchdog triggered */ + ROGUE_CONTEXT_RESET_REASON_FW_WATCHDOG = 12, + /* FW page fault (no HWR) */ + ROGUE_CONTEXT_RESET_REASON_FW_PAGEFAULT = 13, + /* FW execution error (GPU reset requested) */ + ROGUE_CONTEXT_RESET_REASON_FW_EXEC_ERR = 14, + /* Host watchdog detected FW error */ + ROGUE_CONTEXT_RESET_REASON_HOST_WDG_FW_ERR = 15, + /* Geometry DM OOM event is not allowed */ + ROGUE_CONTEXT_GEOM_OOM_DISABLED = 16, +}; + +struct rogue_context_reset_reason_data { + enum rogue_context_reset_reason reset_reason; + u32 reset_ext_job_ref; +}; + +#include "pvr_rogue_fwif_shared_check.h" + +#endif /* PVR_ROGUE_FWIF_SHARED_H */ diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_shared_check.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_shared_check.h new file mode 100644 index 000000000000..597ed54bbd3a --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_shared_check.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_ROGUE_FWIF_SHARED_CHECK_H +#define PVR_ROGUE_FWIF_SHARED_CHECK_H + +#include <linux/build_bug.h> + +#define OFFSET_CHECK(type, member, offset) \ + static_assert(offsetof(type, member) == (offset), \ + "offsetof(" #type ", " #member ") incorrect") + +#define SIZE_CHECK(type, size) \ + static_assert(sizeof(type) == (size), #type " is incorrect size") + +OFFSET_CHECK(struct rogue_fwif_dma_addr, dev_addr, 0); +OFFSET_CHECK(struct rogue_fwif_dma_addr, fw_addr, 8); +SIZE_CHECK(struct rogue_fwif_dma_addr, 16); + +OFFSET_CHECK(struct rogue_fwif_ufo, addr, 0); +OFFSET_CHECK(struct rogue_fwif_ufo, value, 4); +SIZE_CHECK(struct rogue_fwif_ufo, 8); + +OFFSET_CHECK(struct rogue_fwif_cleanup_ctl, submitted_commands, 0); +OFFSET_CHECK(struct rogue_fwif_cleanup_ctl, executed_commands, 4); +SIZE_CHECK(struct rogue_fwif_cleanup_ctl, 8); + +OFFSET_CHECK(struct rogue_fwif_cccb_ctl, write_offset, 0); +OFFSET_CHECK(struct rogue_fwif_cccb_ctl, read_offset, 4); +OFFSET_CHECK(struct rogue_fwif_cccb_ctl, dep_offset, 8); +OFFSET_CHECK(struct rogue_fwif_cccb_ctl, wrap_mask, 12); +OFFSET_CHECK(struct rogue_fwif_cccb_ctl, read_offset2, 16); +OFFSET_CHECK(struct rogue_fwif_cccb_ctl, read_offset3, 20); +OFFSET_CHECK(struct rogue_fwif_cccb_ctl, read_offset4, 24); +SIZE_CHECK(struct rogue_fwif_cccb_ctl, 32); + +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_reg_vdm_context_state_base_addr, 0); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_reg_vdm_context_state_resume_addr, 8); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_reg_ta_context_state_base_addr, 16); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_state[0].geom_reg_vdm_context_store_task0, 24); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_state[0].geom_reg_vdm_context_store_task1, 32); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_state[0].geom_reg_vdm_context_store_task2, 40); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_state[0].geom_reg_vdm_context_resume_task0, 48); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_state[0].geom_reg_vdm_context_resume_task1, 56); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_state[0].geom_reg_vdm_context_resume_task2, 64); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_state[0].geom_reg_vdm_context_store_task3, 72); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_state[0].geom_reg_vdm_context_store_task4, 80); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_state[0].geom_reg_vdm_context_resume_task3, 88); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_state[0].geom_reg_vdm_context_resume_task4, 96); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_state[1].geom_reg_vdm_context_store_task0, 104); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_state[1].geom_reg_vdm_context_store_task1, 112); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_state[1].geom_reg_vdm_context_store_task2, 120); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_state[1].geom_reg_vdm_context_resume_task0, 128); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_state[1].geom_reg_vdm_context_resume_task1, 136); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_state[1].geom_reg_vdm_context_resume_task2, 144); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_state[1].geom_reg_vdm_context_store_task3, 152); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_state[1].geom_reg_vdm_context_store_task4, 160); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_state[1].geom_reg_vdm_context_resume_task3, 168); +OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch, + geom_state[1].geom_reg_vdm_context_resume_task4, 176); +SIZE_CHECK(struct rogue_fwif_geom_registers_caswitch, 184); + +OFFSET_CHECK(struct rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_context_pds0, 0); +OFFSET_CHECK(struct rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_context_pds1, 8); +OFFSET_CHECK(struct rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_terminate_pds, 16); +OFFSET_CHECK(struct rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_terminate_pds1, 24); +OFFSET_CHECK(struct rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_resume_pds0, 32); +OFFSET_CHECK(struct rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_context_pds0_b, 40); +OFFSET_CHECK(struct rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_resume_pds0_b, 48); +SIZE_CHECK(struct rogue_fwif_cdm_registers_cswitch, 56); + +OFFSET_CHECK(struct rogue_fwif_static_rendercontext_state, ctxswitch_regs, 0); +SIZE_CHECK(struct rogue_fwif_static_rendercontext_state, 368); + +OFFSET_CHECK(struct rogue_fwif_static_computecontext_state, ctxswitch_regs, 0); +SIZE_CHECK(struct rogue_fwif_static_computecontext_state, 56); + +OFFSET_CHECK(struct rogue_fwif_cmd_common, frame_num, 0); +SIZE_CHECK(struct rogue_fwif_cmd_common, 4); + +OFFSET_CHECK(struct rogue_fwif_cmd_geom_frag_shared, cmn, 0); +OFFSET_CHECK(struct rogue_fwif_cmd_geom_frag_shared, hwrt_data_fw_addr, 4); +OFFSET_CHECK(struct rogue_fwif_cmd_geom_frag_shared, pr_buffer_fw_addr, 8); +SIZE_CHECK(struct rogue_fwif_cmd_geom_frag_shared, 16); + +#endif /* PVR_ROGUE_FWIF_SHARED_CHECK_H */ diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_stream.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_stream.h new file mode 100644 index 000000000000..1c2c4ebedc25 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_stream.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_ROGUE_FWIF_STREAM_H +#define PVR_ROGUE_FWIF_STREAM_H + +/** + * DOC: Streams + * + * Commands are submitted to the kernel driver in the form of streams. + * + * A command stream has the following layout : + * - A 64-bit header containing: + * * A u32 containing the length of the main stream inclusive of the length of the header. + * * A u32 for padding. + * - The main stream data. + * - The extension stream (optional), which is composed of: + * * One or more headers. + * * The extension stream data, corresponding to the extension headers. + * + * The main stream provides the base command data. This has a fixed layout based on the features + * supported by a given GPU. + * + * The extension stream provides the command parameters that are required for BRNs & ERNs for the + * current GPU. This stream is comprised of one or more headers, followed by data for each given + * BRN/ERN. + * + * Each header is a u32 containing a bitmask of quirks & enhancements in the extension stream, a + * "type" field determining the set of quirks & enhancements the bitmask represents, and a + * continuation bit determining whether any more headers are present. The headers are then followed + * by command data; this is specific to each quirk/enhancement. All unused / reserved bits in the + * header must be set to 0. + * + * All parameters and headers in the main and extension streams must be naturally aligned. + * + * If a parameter appears in both the main and extension streams, then the extension parameter is + * used. + */ + +/* + * Stream extension header definition + */ +#define PVR_STREAM_EXTHDR_TYPE_SHIFT 29U +#define PVR_STREAM_EXTHDR_TYPE_MASK (7U << PVR_STREAM_EXTHDR_TYPE_SHIFT) +#define PVR_STREAM_EXTHDR_TYPE_MAX 8U +#define PVR_STREAM_EXTHDR_CONTINUATION BIT(28U) + +#define PVR_STREAM_EXTHDR_DATA_MASK ~(PVR_STREAM_EXTHDR_TYPE_MASK | PVR_STREAM_EXTHDR_CONTINUATION) + +/* + * Stream extension header - Geometry 0 + */ +#define PVR_STREAM_EXTHDR_TYPE_GEOM0 0U + +#define PVR_STREAM_EXTHDR_GEOM0_BRN49927 BIT(0U) + +#define PVR_STREAM_EXTHDR_GEOM0_VALID PVR_STREAM_EXTHDR_GEOM0_BRN49927 + +/* + * Stream extension header - Fragment 0 + */ +#define PVR_STREAM_EXTHDR_TYPE_FRAG0 0U + +#define PVR_STREAM_EXTHDR_FRAG0_BRN47217 BIT(0U) +#define PVR_STREAM_EXTHDR_FRAG0_BRN49927 BIT(1U) + +#define PVR_STREAM_EXTHDR_FRAG0_VALID PVR_STREAM_EXTHDR_FRAG0_BRN49927 + +/* + * Stream extension header - Compute 0 + */ +#define PVR_STREAM_EXTHDR_TYPE_COMPUTE0 0U + +#define PVR_STREAM_EXTHDR_COMPUTE0_BRN49927 BIT(0U) + +#define PVR_STREAM_EXTHDR_COMPUTE0_VALID PVR_STREAM_EXTHDR_COMPUTE0_BRN49927 + +#endif /* PVR_ROGUE_FWIF_STREAM_H */ diff --git a/drivers/gpu/drm/imagination/pvr_rogue_heap_config.h b/drivers/gpu/drm/imagination/pvr_rogue_heap_config.h new file mode 100644 index 000000000000..684766006703 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_rogue_heap_config.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_ROGUE_HEAP_CONFIG_H +#define PVR_ROGUE_HEAP_CONFIG_H + +#include <linux/sizes.h> + +/* + * ROGUE Device Virtual Address Space Definitions + * + * This file defines the ROGUE virtual address heaps that are used in + * application memory contexts. It also shows where the Firmware memory heap + * fits into this, but the firmware heap is only ever created in the + * kernel driver and never exposed to userspace. + * + * ROGUE_PDSCODEDATA_HEAP_BASE and ROGUE_USCCODE_HEAP_BASE will be programmed, + * on a global basis, into ROGUE_CR_PDS_EXEC_BASE and ROGUE_CR_USC_CODE_BASE_* + * respectively. Therefore if client drivers use multiple configs they must + * still be consistent with their definitions for these heaps. + * + * Base addresses have to be a multiple of 4MiB. + * Heaps must not start at 0x0000000000, as this is reserved for internal + * use within the driver. + * Range comments, those starting in column 0 below are a section heading of + * sorts and are above the heaps in that range. Often this is the reserved + * size of the heap within the range. + */ + +/* 0x00_0000_0000 ************************************************************/ + +/* 0x00_0000_0000 - 0x00_0040_0000 */ +/* 0 MiB to 4 MiB, size of 4 MiB : RESERVED */ + +/* 0x00_0040_0000 - 0x7F_FFC0_0000 **/ +/* 4 MiB to 512 GiB, size of 512 GiB less 4 MiB : RESERVED **/ + +/* 0x80_0000_0000 ************************************************************/ + +/* 0x80_0000_0000 - 0x9F_FFFF_FFFF **/ +/* 512 GiB to 640 GiB, size of 128 GiB : GENERAL_HEAP **/ +#define ROGUE_GENERAL_HEAP_BASE 0x8000000000ull +#define ROGUE_GENERAL_HEAP_SIZE SZ_128G + +/* 0xA0_0000_0000 - 0xAF_FFFF_FFFF */ +/* 640 GiB to 704 GiB, size of 64 GiB : FREE */ + +/* B0_0000_0000 - 0xB7_FFFF_FFFF */ +/* 704 GiB to 736 GiB, size of 32 GiB : FREE */ + +/* 0xB8_0000_0000 - 0xBF_FFFF_FFFF */ +/* 736 GiB to 768 GiB, size of 32 GiB : RESERVED */ + +/* 0xC0_0000_0000 ************************************************************/ + +/* 0xC0_0000_0000 - 0xD9_FFFF_FFFF */ +/* 768 GiB to 872 GiB, size of 104 GiB : FREE */ + +/* 0xDA_0000_0000 - 0xDA_FFFF_FFFF */ +/* 872 GiB to 876 GiB, size of 4 GiB : PDSCODEDATA_HEAP */ +#define ROGUE_PDSCODEDATA_HEAP_BASE 0xDA00000000ull +#define ROGUE_PDSCODEDATA_HEAP_SIZE SZ_4G + +/* 0xDB_0000_0000 - 0xDB_FFFF_FFFF */ +/* 876 GiB to 880 GiB, size of 256 MiB (reserved 4GiB) : BRN **/ +/* + * The BRN63142 quirk workaround requires Region Header memory to be at the top + * of a 16GiB aligned range. This is so when masked with 0x03FFFFFFFF the + * address will avoid aliasing PB addresses. Start at 879.75GiB. Size of 256MiB. + */ +#define ROGUE_RGNHDR_HEAP_BASE 0xDBF0000000ull +#define ROGUE_RGNHDR_HEAP_SIZE SZ_256M + +/* 0xDC_0000_0000 - 0xDF_FFFF_FFFF */ +/* 880 GiB to 896 GiB, size of 16 GiB : FREE */ + +/* 0xE0_0000_0000 - 0xE0_FFFF_FFFF */ +/* 896 GiB to 900 GiB, size of 4 GiB : USCCODE_HEAP */ +#define ROGUE_USCCODE_HEAP_BASE 0xE000000000ull +#define ROGUE_USCCODE_HEAP_SIZE SZ_4G + +/* 0xE1_0000_0000 - 0xE1_BFFF_FFFF */ +/* 900 GiB to 903 GiB, size of 3 GiB : RESERVED */ + +/* 0xE1_C000_000 - 0xE1_FFFF_FFFF */ +/* 903 GiB to 904 GiB, reserved 1 GiB, : FIRMWARE_HEAP */ +#define ROGUE_FW_HEAP_BASE 0xE1C0000000ull + +/* 0xE2_0000_0000 - 0xE3_FFFF_FFFF */ +/* 904 GiB to 912 GiB, size of 8 GiB : FREE */ + +/* 0xE4_0000_0000 - 0xE7_FFFF_FFFF */ +/* 912 GiB to 968 GiB, size of 16 GiB : TRANSFER_FRAG */ +#define ROGUE_TRANSFER_FRAG_HEAP_BASE 0xE400000000ull +#define ROGUE_TRANSFER_FRAG_HEAP_SIZE SZ_16G + +/* 0xE8_0000_0000 - 0xF1_FFFF_FFFF */ +/* 928 GiB to 968 GiB, size of 40 GiB : RESERVED */ + +/* 0xF2_0000_0000 - 0xF2_001F_FFFF **/ +/* 968 GiB to 969 GiB, size of 2 MiB : VISTEST_HEAP */ +#define ROGUE_VISTEST_HEAP_BASE 0xF200000000ull +#define ROGUE_VISTEST_HEAP_SIZE SZ_2M + +/* 0xF2_4000_0000 - 0xF2_FFFF_FFFF */ +/* 969 GiB to 972 GiB, size of 3 GiB : FREE */ + +/* 0xF3_0000_0000 - 0xFF_FFFF_FFFF */ +/* 972 GiB to 1024 GiB, size of 52 GiB : FREE */ + +/* 0xFF_FFFF_FFFF ************************************************************/ + +#endif /* PVR_ROGUE_HEAP_CONFIG_H */ diff --git a/drivers/gpu/drm/imagination/pvr_rogue_meta.h b/drivers/gpu/drm/imagination/pvr_rogue_meta.h new file mode 100644 index 000000000000..3020e6582daa --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_rogue_meta.h @@ -0,0 +1,356 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_ROGUE_META_H +#define PVR_ROGUE_META_H + +/***** The META HW register definitions in the file are updated manually *****/ + +#include <linux/bits.h> +#include <linux/types.h> + +/* + ****************************************************************************** + * META registers and MACROS + ***************************************************************************** + */ +#define META_CR_CTRLREG_BASE(t) (0x04800000U + (0x1000U * (t))) + +#define META_CR_TXPRIVEXT (0x048000E8) +#define META_CR_TXPRIVEXT_MINIM_EN BIT(7) + +#define META_CR_SYSC_JTAG_THREAD (0x04830030) +#define META_CR_SYSC_JTAG_THREAD_PRIV_EN (0x00000004) + +#define META_CR_PERF_COUNT0 (0x0480FFE0) +#define META_CR_PERF_COUNT1 (0x0480FFE8) +#define META_CR_PERF_COUNT_CTRL_SHIFT (28) +#define META_CR_PERF_COUNT_CTRL_MASK (0xF0000000) +#define META_CR_PERF_COUNT_CTRL_DCACHEHITS (8 << META_CR_PERF_COUNT_CTRL_SHIFT) +#define META_CR_PERF_COUNT_CTRL_ICACHEHITS (9 << META_CR_PERF_COUNT_CTRL_SHIFT) +#define META_CR_PERF_COUNT_CTRL_ICACHEMISS \ + (0xA << META_CR_PERF_COUNT_CTRL_SHIFT) +#define META_CR_PERF_COUNT_CTRL_ICORE (0xD << META_CR_PERF_COUNT_CTRL_SHIFT) +#define META_CR_PERF_COUNT_THR_SHIFT (24) +#define META_CR_PERF_COUNT_THR_MASK (0x0F000000) +#define META_CR_PERF_COUNT_THR_0 (0x1 << META_CR_PERF_COUNT_THR_SHIFT) +#define META_CR_PERF_COUNT_THR_1 (0x2 << META_CR_PERF_COUNT_THR_1) + +#define META_CR_TxVECINT_BHALT (0x04820500) +#define META_CR_PERF_ICORE0 (0x0480FFD0) +#define META_CR_PERF_ICORE1 (0x0480FFD8) +#define META_CR_PERF_ICORE_DCACHEMISS (0x8) + +#define META_CR_PERF_COUNT(ctrl, thr) \ + ((META_CR_PERF_COUNT_CTRL_##ctrl << META_CR_PERF_COUNT_CTRL_SHIFT) | \ + ((thr) << META_CR_PERF_COUNT_THR_SHIFT)) + +#define META_CR_TXUXXRXDT_OFFSET (META_CR_CTRLREG_BASE(0U) + 0x0000FFF0U) +#define META_CR_TXUXXRXRQ_OFFSET (META_CR_CTRLREG_BASE(0U) + 0x0000FFF8U) + +/* Poll for done. */ +#define META_CR_TXUXXRXRQ_DREADY_BIT (0x80000000U) +/* Set for read. */ +#define META_CR_TXUXXRXRQ_RDnWR_BIT (0x00010000U) +#define META_CR_TXUXXRXRQ_TX_S (12) +#define META_CR_TXUXXRXRQ_RX_S (4) +#define META_CR_TXUXXRXRQ_UXX_S (0) + +/* Internal ctrl regs. */ +#define META_CR_TXUIN_ID (0x0) +/* Data unit regs. */ +#define META_CR_TXUD0_ID (0x1) +/* Data unit regs. */ +#define META_CR_TXUD1_ID (0x2) +/* Address unit regs. */ +#define META_CR_TXUA0_ID (0x3) +/* Address unit regs. */ +#define META_CR_TXUA1_ID (0x4) +/* PC registers. */ +#define META_CR_TXUPC_ID (0x5) + +/* Macros to calculate register access values. */ +#define META_CR_CORE_REG(thr, reg_num, unit) \ + (((u32)(thr) << META_CR_TXUXXRXRQ_TX_S) | \ + ((u32)(reg_num) << META_CR_TXUXXRXRQ_RX_S) | \ + ((u32)(unit) << META_CR_TXUXXRXRQ_UXX_S)) + +#define META_CR_THR0_PC META_CR_CORE_REG(0, 0, META_CR_TXUPC_ID) +#define META_CR_THR0_PCX META_CR_CORE_REG(0, 1, META_CR_TXUPC_ID) +#define META_CR_THR0_SP META_CR_CORE_REG(0, 0, META_CR_TXUA0_ID) + +#define META_CR_THR1_PC META_CR_CORE_REG(1, 0, META_CR_TXUPC_ID) +#define META_CR_THR1_PCX META_CR_CORE_REG(1, 1, META_CR_TXUPC_ID) +#define META_CR_THR1_SP META_CR_CORE_REG(1, 0, META_CR_TXUA0_ID) + +#define SP_ACCESS(thread) META_CR_CORE_REG(thread, 0, META_CR_TXUA0_ID) +#define PC_ACCESS(thread) META_CR_CORE_REG(thread, 0, META_CR_TXUPC_ID) + +#define META_CR_COREREG_ENABLE (0x0000000U) +#define META_CR_COREREG_STATUS (0x0000010U) +#define META_CR_COREREG_DEFR (0x00000A0U) +#define META_CR_COREREG_PRIVEXT (0x00000E8U) + +#define META_CR_T0ENABLE_OFFSET \ + (META_CR_CTRLREG_BASE(0U) + META_CR_COREREG_ENABLE) +#define META_CR_T0STATUS_OFFSET \ + (META_CR_CTRLREG_BASE(0U) + META_CR_COREREG_STATUS) +#define META_CR_T0DEFR_OFFSET (META_CR_CTRLREG_BASE(0U) + META_CR_COREREG_DEFR) +#define META_CR_T0PRIVEXT_OFFSET \ + (META_CR_CTRLREG_BASE(0U) + META_CR_COREREG_PRIVEXT) + +#define META_CR_T1ENABLE_OFFSET \ + (META_CR_CTRLREG_BASE(1U) + META_CR_COREREG_ENABLE) +#define META_CR_T1STATUS_OFFSET \ + (META_CR_CTRLREG_BASE(1U) + META_CR_COREREG_STATUS) +#define META_CR_T1DEFR_OFFSET (META_CR_CTRLREG_BASE(1U) + META_CR_COREREG_DEFR) +#define META_CR_T1PRIVEXT_OFFSET \ + (META_CR_CTRLREG_BASE(1U) + META_CR_COREREG_PRIVEXT) + +#define META_CR_TXENABLE_ENABLE_BIT (0x00000001U) /* Set if running */ +#define META_CR_TXSTATUS_PRIV (0x00020000U) +#define META_CR_TXPRIVEXT_MINIM (0x00000080U) + +#define META_MEM_GLOBAL_RANGE_BIT (0x80000000U) + +#define META_CR_TXCLKCTRL (0x048000B0) +#define META_CR_TXCLKCTRL_ALL_ON (0x55111111) +#define META_CR_TXCLKCTRL_ALL_AUTO (0xAA222222) + +#define META_CR_MMCU_LOCAL_EBCTRL (0x04830600) +#define META_CR_MMCU_LOCAL_EBCTRL_ICWIN (0x3 << 14) +#define META_CR_MMCU_LOCAL_EBCTRL_DCWIN (0x3 << 6) +#define META_CR_SYSC_DCPART(n) (0x04830200 + (n) * 0x8) +#define META_CR_SYSC_DCPARTX_CACHED_WRITE_ENABLE (0x1 << 31) +#define META_CR_SYSC_ICPART(n) (0x04830220 + (n) * 0x8) +#define META_CR_SYSC_XCPARTX_LOCAL_ADDR_OFFSET_TOP_HALF (0x8 << 16) +#define META_CR_SYSC_XCPARTX_LOCAL_ADDR_FULL_CACHE (0xF) +#define META_CR_SYSC_XCPARTX_LOCAL_ADDR_HALF_CACHE (0x7) +#define META_CR_MMCU_DCACHE_CTRL (0x04830018) +#define META_CR_MMCU_ICACHE_CTRL (0x04830020) +#define META_CR_MMCU_XCACHE_CTRL_CACHE_HITS_EN (0x1) + +/* + ****************************************************************************** + * META LDR Format + ****************************************************************************** + */ +/* Block header structure. */ +struct rogue_meta_ldr_block_hdr { + u32 dev_id; + u32 sl_code; + u32 sl_data; + u16 pc_ctrl; + u16 crc; +}; + +/* High level data stream block structure. */ +struct rogue_meta_ldr_l1_data_blk { + u16 cmd; + u16 length; + u32 next; + u32 cmd_data[4]; +}; + +/* High level data stream block structure. */ +struct rogue_meta_ldr_l2_data_blk { + u16 tag; + u16 length; + u32 block_data[4]; +}; + +/* Config command structure. */ +struct rogue_meta_ldr_cfg_blk { + u32 type; + u32 block_data[4]; +}; + +/* Block type definitions */ +#define ROGUE_META_LDR_COMMENT_TYPE_MASK (0x0010U) +#define ROGUE_META_LDR_BLK_IS_COMMENT(x) (((x) & ROGUE_META_LDR_COMMENT_TYPE_MASK) != 0U) + +/* + * Command definitions + * Value Name Description + * 0 LoadMem Load memory with binary data. + * 1 LoadCore Load a set of core registers. + * 2 LoadMMReg Load a set of memory mapped registers. + * 3 StartThreads Set each thread PC and SP, then enable threads. + * 4 ZeroMem Zeros a memory region. + * 5 Config Perform a configuration command. + */ +#define ROGUE_META_LDR_CMD_MASK (0x000FU) + +#define ROGUE_META_LDR_CMD_LOADMEM (0x0000U) +#define ROGUE_META_LDR_CMD_LOADCORE (0x0001U) +#define ROGUE_META_LDR_CMD_LOADMMREG (0x0002U) +#define ROGUE_META_LDR_CMD_START_THREADS (0x0003U) +#define ROGUE_META_LDR_CMD_ZEROMEM (0x0004U) +#define ROGUE_META_LDR_CMD_CONFIG (0x0005U) + +/* + * Config Command definitions + * Value Name Description + * 0 Pause Pause for x times 100 instructions + * 1 Read Read a value from register - No value return needed. + * Utilises effects of issuing reads to certain registers + * 2 Write Write to mem location + * 3 MemSet Set mem to value + * 4 MemCheck check mem for specific value. + */ +#define ROGUE_META_LDR_CFG_PAUSE (0x0000) +#define ROGUE_META_LDR_CFG_READ (0x0001) +#define ROGUE_META_LDR_CFG_WRITE (0x0002) +#define ROGUE_META_LDR_CFG_MEMSET (0x0003) +#define ROGUE_META_LDR_CFG_MEMCHECK (0x0004) + +/* + ****************************************************************************** + * ROGUE FW segmented MMU definitions + ****************************************************************************** + */ +/* All threads can access the segment. */ +#define ROGUE_FW_SEGMMU_ALLTHRS (0xf << 8U) +/* Writable. */ +#define ROGUE_FW_SEGMMU_WRITEABLE (0x1U << 1U) +/* All threads can access and writable. */ +#define ROGUE_FW_SEGMMU_ALLTHRS_WRITEABLE \ + (ROGUE_FW_SEGMMU_ALLTHRS | ROGUE_FW_SEGMMU_WRITEABLE) + +/* Direct map region 10 used for mapping GPU memory - max 8MB. */ +#define ROGUE_FW_SEGMMU_DMAP_GPU_ID (10U) +#define ROGUE_FW_SEGMMU_DMAP_GPU_ADDR_START (0x07000000U) +#define ROGUE_FW_SEGMMU_DMAP_GPU_MAX_SIZE (0x00800000U) + +/* Segment IDs. */ +#define ROGUE_FW_SEGMMU_DATA_ID (1U) +#define ROGUE_FW_SEGMMU_BOOTLDR_ID (2U) +#define ROGUE_FW_SEGMMU_TEXT_ID (ROGUE_FW_SEGMMU_BOOTLDR_ID) + +/* + * SLC caching strategy in S7 and volcanic is emitted through the segment MMU. + * All the segments configured through the macro ROGUE_FW_SEGMMU_OUTADDR_TOP are + * CACHED in the SLC. + * The interface has been kept the same to simplify the code changes. + * The bifdm argument is ignored (no longer relevant) in S7 and volcanic. + */ +#define ROGUE_FW_SEGMMU_OUTADDR_TOP_VIVT_SLC(pers, slc_policy, mmu_ctx) \ + ((((u64)((pers) & 0x3)) << 52) | (((u64)((mmu_ctx) & 0xFF)) << 44) | \ + (((u64)((slc_policy) & 0x1)) << 40)) +#define ROGUE_FW_SEGMMU_OUTADDR_TOP_VIVT_SLC_CACHED(mmu_ctx) \ + ROGUE_FW_SEGMMU_OUTADDR_TOP_VIVT_SLC(0x3, 0x0, mmu_ctx) +#define ROGUE_FW_SEGMMU_OUTADDR_TOP_VIVT_SLC_UNCACHED(mmu_ctx) \ + ROGUE_FW_SEGMMU_OUTADDR_TOP_VIVT_SLC(0x0, 0x1, mmu_ctx) + +/* + * To configure the Page Catalog and BIF-DM fed into the BIF for Garten + * accesses through this segment. + */ +#define ROGUE_FW_SEGMMU_OUTADDR_TOP_SLC(pc, bifdm) \ + (((u64)((u64)(pc) & 0xFU) << 44U) | ((u64)((u64)(bifdm) & 0xFU) << 40U)) + +#define ROGUE_FW_SEGMMU_META_BIFDM_ID (0x7U) + +/* META segments have 4kB minimum size. */ +#define ROGUE_FW_SEGMMU_ALIGN (0x1000U) + +/* Segmented MMU registers (n = segment id). */ +#define META_CR_MMCU_SEGMENT_N_BASE(n) (0x04850000U + ((n) * 0x10U)) +#define META_CR_MMCU_SEGMENT_N_LIMIT(n) (0x04850004U + ((n) * 0x10U)) +#define META_CR_MMCU_SEGMENT_N_OUTA0(n) (0x04850008U + ((n) * 0x10U)) +#define META_CR_MMCU_SEGMENT_N_OUTA1(n) (0x0485000CU + ((n) * 0x10U)) + +/* + * The following defines must be recalculated if the Meta MMU segments used + * to access Host-FW data are changed + * Current combinations are: + * - SLC uncached, META cached, FW base address 0x70000000 + * - SLC uncached, META uncached, FW base address 0xF0000000 + * - SLC cached, META cached, FW base address 0x10000000 + * - SLC cached, META uncached, FW base address 0x90000000 + */ +#define ROGUE_FW_SEGMMU_DATA_BASE_ADDRESS (0x10000000U) +#define ROGUE_FW_SEGMMU_DATA_META_CACHED (0x0U) +#define ROGUE_FW_SEGMMU_DATA_META_UNCACHED (META_MEM_GLOBAL_RANGE_BIT) +#define ROGUE_FW_SEGMMU_DATA_META_CACHE_MASK (META_MEM_GLOBAL_RANGE_BIT) +/* + * For non-VIVT SLCs the cacheability of the FW data in the SLC is selected in + * the PTEs for the FW data, not in the Meta Segment MMU, which means these + * defines have no real effect in those cases. + */ +#define ROGUE_FW_SEGMMU_DATA_VIVT_SLC_CACHED (0x0U) +#define ROGUE_FW_SEGMMU_DATA_VIVT_SLC_UNCACHED (0x60000000U) +#define ROGUE_FW_SEGMMU_DATA_VIVT_SLC_CACHE_MASK (0x60000000U) + +/* + ****************************************************************************** + * ROGUE FW Bootloader defaults + ****************************************************************************** + */ +#define ROGUE_FW_BOOTLDR_META_ADDR (0x40000000U) +#define ROGUE_FW_BOOTLDR_DEVV_ADDR_0 (0xC0000000U) +#define ROGUE_FW_BOOTLDR_DEVV_ADDR_1 (0x000000E1) +#define ROGUE_FW_BOOTLDR_DEVV_ADDR \ + ((((u64)ROGUE_FW_BOOTLDR_DEVV_ADDR_1) << 32) | \ + ROGUE_FW_BOOTLDR_DEVV_ADDR_0) +#define ROGUE_FW_BOOTLDR_LIMIT (0x1FFFF000) +#define ROGUE_FW_MAX_BOOTLDR_OFFSET (0x1000) + +/* Bootloader configuration offset is in dwords (512 bytes) */ +#define ROGUE_FW_BOOTLDR_CONF_OFFSET (0x80) + +/* + ****************************************************************************** + * ROGUE META Stack + ****************************************************************************** + */ +#define ROGUE_META_STACK_SIZE (0x1000U) + +/* + ****************************************************************************** + * ROGUE META Core memory + ****************************************************************************** + */ +/* Code and data both map to the same physical memory. */ +#define ROGUE_META_COREMEM_CODE_ADDR (0x80000000U) +#define ROGUE_META_COREMEM_DATA_ADDR (0x82000000U) +#define ROGUE_META_COREMEM_OFFSET_MASK (0x01ffffffU) + +#define ROGUE_META_IS_COREMEM_CODE(a, b) \ + ({ \ + u32 _a = (a), _b = (b); \ + ((_a) >= ROGUE_META_COREMEM_CODE_ADDR) && \ + ((_a) < (ROGUE_META_COREMEM_CODE_ADDR + (_b))); \ + }) +#define ROGUE_META_IS_COREMEM_DATA(a, b) \ + ({ \ + u32 _a = (a), _b = (b); \ + ((_a) >= ROGUE_META_COREMEM_DATA_ADDR) && \ + ((_a) < (ROGUE_META_COREMEM_DATA_ADDR + (_b))); \ + }) +/* + ****************************************************************************** + * 2nd thread + ****************************************************************************** + */ +#define ROGUE_FW_THR1_PC (0x18930000) +#define ROGUE_FW_THR1_SP (0x78890000) + +/* + ****************************************************************************** + * META compatibility + ****************************************************************************** + */ + +#define META_CR_CORE_ID (0x04831000) +#define META_CR_CORE_ID_VER_SHIFT (16U) +#define META_CR_CORE_ID_VER_CLRMSK (0XFF00FFFFU) + +#define ROGUE_CR_META_MTP218_CORE_ID_VALUE 0x19 +#define ROGUE_CR_META_MTP219_CORE_ID_VALUE 0x1E +#define ROGUE_CR_META_LTP218_CORE_ID_VALUE 0x1C +#define ROGUE_CR_META_LTP217_CORE_ID_VALUE 0x1F + +#define ROGUE_FW_PROCESSOR_META "META" + +#endif /* PVR_ROGUE_META_H */ diff --git a/drivers/gpu/drm/imagination/pvr_rogue_mips.h b/drivers/gpu/drm/imagination/pvr_rogue_mips.h new file mode 100644 index 000000000000..41ed618fda3f --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_rogue_mips.h @@ -0,0 +1,335 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_ROGUE_MIPS_H +#define PVR_ROGUE_MIPS_H + +#include <linux/bits.h> +#include <linux/types.h> + +/* Utility defines for memory management. */ +#define ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K (12) +#define ROGUE_MIPSFW_PAGE_SIZE_4K (0x1 << ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K) +#define ROGUE_MIPSFW_PAGE_MASK_4K (ROGUE_MIPSFW_PAGE_SIZE_4K - 1) +#define ROGUE_MIPSFW_LOG2_PAGE_SIZE_64K (16) +#define ROGUE_MIPSFW_PAGE_SIZE_64K (0x1 << ROGUE_MIPSFW_LOG2_PAGE_SIZE_64K) +#define ROGUE_MIPSFW_PAGE_MASK_64K (ROGUE_MIPSFW_PAGE_SIZE_64K - 1) +#define ROGUE_MIPSFW_LOG2_PAGE_SIZE_256K (18) +#define ROGUE_MIPSFW_PAGE_SIZE_256K (0x1 << ROGUE_MIPSFW_LOG2_PAGE_SIZE_256K) +#define ROGUE_MIPSFW_PAGE_MASK_256K (ROGUE_MIPSFW_PAGE_SIZE_256K - 1) +#define ROGUE_MIPSFW_LOG2_PAGE_SIZE_1MB (20) +#define ROGUE_MIPSFW_PAGE_SIZE_1MB (0x1 << ROGUE_MIPSFW_LOG2_PAGE_SIZE_1MB) +#define ROGUE_MIPSFW_PAGE_MASK_1MB (ROGUE_MIPSFW_PAGE_SIZE_1MB - 1) +#define ROGUE_MIPSFW_LOG2_PAGE_SIZE_4MB (22) +#define ROGUE_MIPSFW_PAGE_SIZE_4MB (0x1 << ROGUE_MIPSFW_LOG2_PAGE_SIZE_4MB) +#define ROGUE_MIPSFW_PAGE_MASK_4MB (ROGUE_MIPSFW_PAGE_SIZE_4MB - 1) +#define ROGUE_MIPSFW_LOG2_PTE_ENTRY_SIZE (2) +/* log2 page table sizes dependent on FW heap size and page size (for each OS). */ +#define ROGUE_MIPSFW_LOG2_PAGETABLE_SIZE_4K(pvr_dev) ((pvr_dev)->fw_dev.fw_heap_info.log2_size - \ + ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K + \ + ROGUE_MIPSFW_LOG2_PTE_ENTRY_SIZE) +#define ROGUE_MIPSFW_LOG2_PAGETABLE_SIZE_64K(pvr_dev) ((pvr_dev)->fw_dev.fw_heap_info.log2_size - \ + ROGUE_MIPSFW_LOG2_PAGE_SIZE_64K + \ + ROGUE_MIPSFW_LOG2_PTE_ENTRY_SIZE) +/* Maximum number of page table pages (both Host and MIPS pages). */ +#define ROGUE_MIPSFW_MAX_NUM_PAGETABLE_PAGES (4) +/* Total number of TLB entries. */ +#define ROGUE_MIPSFW_NUMBER_OF_TLB_ENTRIES (16) +/* "Uncached" caching policy. */ +#define ROGUE_MIPSFW_UNCACHED_CACHE_POLICY (2) +/* "Write-back write-allocate" caching policy. */ +#define ROGUE_MIPSFW_WRITEBACK_CACHE_POLICY (3) +/* "Write-through no write-allocate" caching policy. */ +#define ROGUE_MIPSFW_WRITETHROUGH_CACHE_POLICY (1) +/* Cached policy used by MIPS in case of physical bus on 32 bit. */ +#define ROGUE_MIPSFW_CACHED_POLICY (ROGUE_MIPSFW_WRITEBACK_CACHE_POLICY) +/* Cached policy used by MIPS in case of physical bus on more than 32 bit. */ +#define ROGUE_MIPSFW_CACHED_POLICY_ABOVE_32BIT (ROGUE_MIPSFW_WRITETHROUGH_CACHE_POLICY) +/* Total number of Remap entries. */ +#define ROGUE_MIPSFW_NUMBER_OF_REMAP_ENTRIES (2 * ROGUE_MIPSFW_NUMBER_OF_TLB_ENTRIES) + +/* MIPS EntryLo/PTE format. */ + +#define ROGUE_MIPSFW_ENTRYLO_READ_INHIBIT_SHIFT (31U) +#define ROGUE_MIPSFW_ENTRYLO_READ_INHIBIT_CLRMSK (0X7FFFFFFF) +#define ROGUE_MIPSFW_ENTRYLO_READ_INHIBIT_EN (0X80000000) + +#define ROGUE_MIPSFW_ENTRYLO_EXEC_INHIBIT_SHIFT (30U) +#define ROGUE_MIPSFW_ENTRYLO_EXEC_INHIBIT_CLRMSK (0XBFFFFFFF) +#define ROGUE_MIPSFW_ENTRYLO_EXEC_INHIBIT_EN (0X40000000) + +/* Page Frame Number */ +#define ROGUE_MIPSFW_ENTRYLO_PFN_SHIFT (6) +#define ROGUE_MIPSFW_ENTRYLO_PFN_ALIGNSHIFT (12) +/* Mask used for the MIPS Page Table in case of physical bus on 32 bit. */ +#define ROGUE_MIPSFW_ENTRYLO_PFN_MASK (0x03FFFFC0) +#define ROGUE_MIPSFW_ENTRYLO_PFN_SIZE (20) +/* Mask used for the MIPS Page Table in case of physical bus on more than 32 bit. */ +#define ROGUE_MIPSFW_ENTRYLO_PFN_MASK_ABOVE_32BIT (0x3FFFFFC0) +#define ROGUE_MIPSFW_ENTRYLO_PFN_SIZE_ABOVE_32BIT (24) +#define ROGUE_MIPSFW_ADDR_TO_ENTRYLO_PFN_RSHIFT (ROGUE_MIPSFW_ENTRYLO_PFN_ALIGNSHIFT - \ + ROGUE_MIPSFW_ENTRYLO_PFN_SHIFT) + +#define ROGUE_MIPSFW_ENTRYLO_CACHE_POLICY_SHIFT (3U) +#define ROGUE_MIPSFW_ENTRYLO_CACHE_POLICY_CLRMSK (0XFFFFFFC7) + +#define ROGUE_MIPSFW_ENTRYLO_DIRTY_SHIFT (2U) +#define ROGUE_MIPSFW_ENTRYLO_DIRTY_CLRMSK (0XFFFFFFFB) +#define ROGUE_MIPSFW_ENTRYLO_DIRTY_EN (0X00000004) + +#define ROGUE_MIPSFW_ENTRYLO_VALID_SHIFT (1U) +#define ROGUE_MIPSFW_ENTRYLO_VALID_CLRMSK (0XFFFFFFFD) +#define ROGUE_MIPSFW_ENTRYLO_VALID_EN (0X00000002) + +#define ROGUE_MIPSFW_ENTRYLO_GLOBAL_SHIFT (0U) +#define ROGUE_MIPSFW_ENTRYLO_GLOBAL_CLRMSK (0XFFFFFFFE) +#define ROGUE_MIPSFW_ENTRYLO_GLOBAL_EN (0X00000001) + +#define ROGUE_MIPSFW_ENTRYLO_DVG (ROGUE_MIPSFW_ENTRYLO_DIRTY_EN | \ + ROGUE_MIPSFW_ENTRYLO_VALID_EN | \ + ROGUE_MIPSFW_ENTRYLO_GLOBAL_EN) +#define ROGUE_MIPSFW_ENTRYLO_UNCACHED (ROGUE_MIPSFW_UNCACHED_CACHE_POLICY << \ + ROGUE_MIPSFW_ENTRYLO_CACHE_POLICY_SHIFT) +#define ROGUE_MIPSFW_ENTRYLO_DVG_UNCACHED (ROGUE_MIPSFW_ENTRYLO_DVG | \ + ROGUE_MIPSFW_ENTRYLO_UNCACHED) + +/* Remap Range Config Addr Out. */ +/* These defines refer to the upper half of the Remap Range Config register. */ +#define ROGUE_MIPSFW_REMAP_RANGE_ADDR_OUT_MASK (0x0FFFFFF0) +#define ROGUE_MIPSFW_REMAP_RANGE_ADDR_OUT_SHIFT (4) /* wrt upper half of the register. */ +#define ROGUE_MIPSFW_REMAP_RANGE_ADDR_OUT_ALIGNSHIFT (12) +#define ROGUE_MIPSFW_ADDR_TO_RR_ADDR_OUT_RSHIFT (ROGUE_MIPSFW_REMAP_RANGE_ADDR_OUT_ALIGNSHIFT - \ + ROGUE_MIPSFW_REMAP_RANGE_ADDR_OUT_SHIFT) + +/* + * Pages to trampoline problematic physical addresses: + * - ROGUE_MIPSFW_BOOT_REMAP_PHYS_ADDR_IN : 0x1FC0_0000 + * - ROGUE_MIPSFW_DATA_REMAP_PHYS_ADDR_IN : 0x1FC0_1000 + * - ROGUE_MIPSFW_CODE_REMAP_PHYS_ADDR_IN : 0x1FC0_2000 + * - (benign trampoline) : 0x1FC0_3000 + * that would otherwise be erroneously remapped by the MIPS wrapper. + * (see "Firmware virtual layout and remap configuration" section below) + */ + +#define ROGUE_MIPSFW_TRAMPOLINE_LOG2_NUMPAGES (2) +#define ROGUE_MIPSFW_TRAMPOLINE_NUMPAGES BIT(ROGUE_MIPSFW_TRAMPOLINE_LOG2_NUMPAGES) +#define ROGUE_MIPSFW_TRAMPOLINE_SIZE (ROGUE_MIPSFW_TRAMPOLINE_NUMPAGES << \ + ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K) +#define ROGUE_MIPSFW_TRAMPOLINE_LOG2_SEGMENT_SIZE (ROGUE_MIPSFW_TRAMPOLINE_LOG2_NUMPAGES + \ + ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K) + +#define ROGUE_MIPSFW_TRAMPOLINE_TARGET_PHYS_ADDR (ROGUE_MIPSFW_BOOT_REMAP_PHYS_ADDR_IN) +#define ROGUE_MIPSFW_TRAMPOLINE_OFFSET(a) ((a) - ROGUE_MIPSFW_BOOT_REMAP_PHYS_ADDR_IN) + +#define ROGUE_MIPSFW_SENSITIVE_ADDR(a) (ROGUE_MIPSFW_BOOT_REMAP_PHYS_ADDR_IN == \ + (~((1 << ROGUE_MIPSFW_TRAMPOLINE_LOG2_SEGMENT_SIZE) - 1) \ + & (a))) + +/* Firmware virtual layout and remap configuration. */ +/* + * For each remap region we define: + * - the virtual base used by the Firmware to access code/data through that region + * - the microAptivAP physical address correspondent to the virtual base address, + * used as input address and remapped to the actual physical address + * - log2 of size of the region remapped by the MIPS wrapper, i.e. number of bits from + * the bottom of the base input address that survive onto the output address + * (this defines both the alignment and the maximum size of the remapped region) + * - one or more code/data segments within the remapped region. + */ + +/* Boot remap setup. */ +#define ROGUE_MIPSFW_BOOT_REMAP_VIRTUAL_BASE (0xBFC00000) +#define ROGUE_MIPSFW_BOOT_REMAP_PHYS_ADDR_IN (0x1FC00000) +#define ROGUE_MIPSFW_BOOT_REMAP_LOG2_SEGMENT_SIZE (12) +#define ROGUE_MIPSFW_BOOT_NMI_CODE_VIRTUAL_BASE (ROGUE_MIPSFW_BOOT_REMAP_VIRTUAL_BASE) + +/* Data remap setup. */ +#define ROGUE_MIPSFW_DATA_REMAP_VIRTUAL_BASE (0xBFC01000) +#define ROGUE_MIPSFW_DATA_CACHED_REMAP_VIRTUAL_BASE (0x9FC01000) +#define ROGUE_MIPSFW_DATA_REMAP_PHYS_ADDR_IN (0x1FC01000) +#define ROGUE_MIPSFW_DATA_REMAP_LOG2_SEGMENT_SIZE (12) +#define ROGUE_MIPSFW_BOOT_NMI_DATA_VIRTUAL_BASE (ROGUE_MIPSFW_DATA_REMAP_VIRTUAL_BASE) + +/* Code remap setup. */ +#define ROGUE_MIPSFW_CODE_REMAP_VIRTUAL_BASE (0x9FC02000) +#define ROGUE_MIPSFW_CODE_REMAP_PHYS_ADDR_IN (0x1FC02000) +#define ROGUE_MIPSFW_CODE_REMAP_LOG2_SEGMENT_SIZE (12) +#define ROGUE_MIPSFW_EXCEPTIONS_VIRTUAL_BASE (ROGUE_MIPSFW_CODE_REMAP_VIRTUAL_BASE) + +/* Permanent mappings setup. */ +#define ROGUE_MIPSFW_PT_VIRTUAL_BASE (0xCF000000) +#define ROGUE_MIPSFW_REGISTERS_VIRTUAL_BASE (0xCF800000) +#define ROGUE_MIPSFW_STACK_VIRTUAL_BASE (0xCF600000) + +/* Bootloader configuration data. */ +/* + * Bootloader configuration offset (where ROGUE_MIPSFW_BOOT_DATA lives) + * within the bootloader/NMI data page. + */ +#define ROGUE_MIPSFW_BOOTLDR_CONF_OFFSET (0x0) + +/* NMI shared data. */ +/* Base address of the shared data within the bootloader/NMI data page. */ +#define ROGUE_MIPSFW_NMI_SHARED_DATA_BASE (0x100) +/* Size used by Debug dump data. */ +#define ROGUE_MIPSFW_NMI_SHARED_SIZE (0x2B0) +/* Offsets in the NMI shared area in 32-bit words. */ +#define ROGUE_MIPSFW_NMI_SYNC_FLAG_OFFSET (0x0) +#define ROGUE_MIPSFW_NMI_STATE_OFFSET (0x1) +#define ROGUE_MIPSFW_NMI_ERROR_STATE_SET (0x1) + +/* MIPS boot stage. */ +#define ROGUE_MIPSFW_BOOT_STAGE_OFFSET (0x400) + +/* + * MIPS private data in the bootloader data page. + * Memory below this offset is used by the FW only, no interface data allowed. + */ +#define ROGUE_MIPSFW_PRIVATE_DATA_OFFSET (0x800) + +struct rogue_mipsfw_boot_data { + u64 stack_phys_addr; + u64 reg_base; + u64 pt_phys_addr[ROGUE_MIPSFW_MAX_NUM_PAGETABLE_PAGES]; + u32 pt_log2_page_size; + u32 pt_num_pages; + u32 reserved1; + u32 reserved2; +}; + +#define ROGUE_MIPSFW_GET_OFFSET_IN_DWORDS(offset) ((offset) / sizeof(u32)) +#define ROGUE_MIPSFW_GET_OFFSET_IN_QWORDS(offset) ((offset) / sizeof(u64)) + +/* Used for compatibility checks. */ +#define ROGUE_MIPSFW_ARCHTYPE_VER_CLRMSK (0xFFFFE3FFU) +#define ROGUE_MIPSFW_ARCHTYPE_VER_SHIFT (10U) +#define ROGUE_MIPSFW_CORE_ID_VALUE (0x001U) +#define ROGUE_FW_PROCESSOR_MIPS "MIPS" + +/* microAptivAP cache line size. */ +#define ROGUE_MIPSFW_MICROAPTIVEAP_CACHELINE_SIZE (16U) + +/* + * The SOCIF transactions are identified with the top 16 bits of the physical address emitted by + * the MIPS. + */ +#define ROGUE_MIPSFW_WRAPPER_CONFIG_REGBANK_ADDR_ALIGN (16U) + +/* Values to put in the MIPS selectors for performance counters. */ +/* Icache accesses in COUNTER0. */ +#define ROGUE_MIPSFW_PERF_COUNT_CTRL_ICACHE_ACCESSES_C0 (9U) +/* Icache misses in COUNTER1. */ +#define ROGUE_MIPSFW_PERF_COUNT_CTRL_ICACHE_MISSES_C1 (9U) + +/* Dcache accesses in COUNTER0. */ +#define ROGUE_MIPSFW_PERF_COUNT_CTRL_DCACHE_ACCESSES_C0 (10U) +/* Dcache misses in COUNTER1. */ +#define ROGUE_MIPSFW_PERF_COUNT_CTRL_DCACHE_MISSES_C1 (11U) + +/* ITLB instruction accesses in COUNTER0. */ +#define ROGUE_MIPSFW_PERF_COUNT_CTRL_ITLB_INSTR_ACCESSES_C0 (5U) +/* JTLB instruction accesses misses in COUNTER1. */ +#define ROGUE_MIPSFW_PERF_COUNT_CTRL_JTLB_INSTR_MISSES_C1 (7U) + + /* Instructions completed in COUNTER0. */ +#define ROGUE_MIPSFW_PERF_COUNT_CTRL_INSTR_COMPLETED_C0 (1U) +/* JTLB data misses in COUNTER1. */ +#define ROGUE_MIPSFW_PERF_COUNT_CTRL_JTLB_DATA_MISSES_C1 (8U) + +/* Shift for the Event field in the MIPS perf ctrl registers. */ +#define ROGUE_MIPSFW_PERF_COUNT_CTRL_EVENT_SHIFT (5U) + +/* Additional flags for performance counters. See MIPS manual for further reference. */ +#define ROGUE_MIPSFW_PERF_COUNT_CTRL_COUNT_USER_MODE (8U) +#define ROGUE_MIPSFW_PERF_COUNT_CTRL_COUNT_KERNEL_MODE (2U) +#define ROGUE_MIPSFW_PERF_COUNT_CTRL_COUNT_EXL (1U) + +#define ROGUE_MIPSFW_C0_NBHWIRQ 8 + +/* Macros to decode C0_Cause register. */ +#define ROGUE_MIPSFW_C0_CAUSE_EXCCODE(cause) (((cause) & 0x7c) >> 2) +#define ROGUE_MIPSFW_C0_CAUSE_EXCCODE_FWERROR 9 +/* Use only when Coprocessor Unusable exception. */ +#define ROGUE_MIPSFW_C0_CAUSE_UNUSABLE_UNIT(cause) (((cause) >> 28) & 0x3) +#define ROGUE_MIPSFW_C0_CAUSE_PENDING_HWIRQ(cause) (((cause) & 0x3fc00) >> 10) +#define ROGUE_MIPSFW_C0_CAUSE_FDCIPENDING BIT(21) +#define ROGUE_MIPSFW_C0_CAUSE_IV BIT(23) +#define ROGUE_MIPSFW_C0_CAUSE_IC BIT(25) +#define ROGUE_MIPSFW_C0_CAUSE_PCIPENDING BIT(26) +#define ROGUE_MIPSFW_C0_CAUSE_TIPENDING BIT(30) +#define ROGUE_MIPSFW_C0_CAUSE_BRANCH_DELAY BIT(31) + +/* Macros to decode C0_Debug register. */ +#define ROGUE_MIPSFW_C0_DEBUG_EXCCODE(debug) (((debug) >> 10) & 0x1f) +#define ROGUE_MIPSFW_C0_DEBUG_DSS BIT(0) +#define ROGUE_MIPSFW_C0_DEBUG_DBP BIT(1) +#define ROGUE_MIPSFW_C0_DEBUG_DDBL BIT(2) +#define ROGUE_MIPSFW_C0_DEBUG_DDBS BIT(3) +#define ROGUE_MIPSFW_C0_DEBUG_DIB BIT(4) +#define ROGUE_MIPSFW_C0_DEBUG_DINT BIT(5) +#define ROGUE_MIPSFW_C0_DEBUG_DIBIMPR BIT(6) +#define ROGUE_MIPSFW_C0_DEBUG_DDBLIMPR BIT(18) +#define ROGUE_MIPSFW_C0_DEBUG_DDBSIMPR BIT(19) +#define ROGUE_MIPSFW_C0_DEBUG_IEXI BIT(20) +#define ROGUE_MIPSFW_C0_DEBUG_DBUSEP BIT(21) +#define ROGUE_MIPSFW_C0_DEBUG_CACHEEP BIT(22) +#define ROGUE_MIPSFW_C0_DEBUG_MCHECKP BIT(23) +#define ROGUE_MIPSFW_C0_DEBUG_IBUSEP BIT(24) +#define ROGUE_MIPSFW_C0_DEBUG_DM BIT(30) +#define ROGUE_MIPSFW_C0_DEBUG_DBD BIT(31) + +/* Macros to decode TLB entries. */ +#define ROGUE_MIPSFW_TLB_GET_MASK(page_mask) (((page_mask) >> 13) & 0XFFFFU) +/* Page size in KB. */ +#define ROGUE_MIPSFW_TLB_GET_PAGE_SIZE(page_mask) ((((page_mask) | 0x1FFF) + 1) >> 11) +/* Page size in KB. */ +#define ROGUE_MIPSFW_TLB_GET_PAGE_MASK(page_size) ((((page_size) << 11) - 1) & ~0x7FF) +#define ROGUE_MIPSFW_TLB_GET_VPN2(entry_hi) ((entry_hi) >> 13) +#define ROGUE_MIPSFW_TLB_GET_COHERENCY(entry_lo) (((entry_lo) >> 3) & 0x7U) +#define ROGUE_MIPSFW_TLB_GET_PFN(entry_lo) (((entry_lo) >> 6) & 0XFFFFFU) +/* GET_PA uses a non-standard PFN mask for 36 bit addresses. */ +#define ROGUE_MIPSFW_TLB_GET_PA(entry_lo) (((u64)(entry_lo) & \ + ROGUE_MIPSFW_ENTRYLO_PFN_MASK_ABOVE_32BIT) << 6) +#define ROGUE_MIPSFW_TLB_GET_INHIBIT(entry_lo) (((entry_lo) >> 30) & 0x3U) +#define ROGUE_MIPSFW_TLB_GET_DGV(entry_lo) ((entry_lo) & 0x7U) +#define ROGUE_MIPSFW_TLB_GLOBAL BIT(0) +#define ROGUE_MIPSFW_TLB_VALID BIT(1) +#define ROGUE_MIPSFW_TLB_DIRTY BIT(2) +#define ROGUE_MIPSFW_TLB_XI BIT(30) +#define ROGUE_MIPSFW_TLB_RI BIT(31) + +#define ROGUE_MIPSFW_REMAP_GET_REGION_SIZE(region_size_encoding) (1 << (((region_size_encoding) \ + + 1) << 1)) + +struct rogue_mips_tlb_entry { + u32 tlb_page_mask; + u32 tlb_hi; + u32 tlb_lo0; + u32 tlb_lo1; +}; + +struct rogue_mips_remap_entry { + u32 remap_addr_in; /* Always 4k aligned. */ + u32 remap_addr_out; /* Always 4k aligned. */ + u32 remap_region_size; +}; + +struct rogue_mips_state { + u32 error_state; /* This must come first in the structure. */ + u32 error_epc; + u32 status_register; + u32 cause_register; + u32 bad_register; + u32 epc; + u32 sp; + u32 debug; + u32 depc; + u32 bad_instr; + u32 unmapped_address; + struct rogue_mips_tlb_entry tlb[ROGUE_MIPSFW_NUMBER_OF_TLB_ENTRIES]; + struct rogue_mips_remap_entry remap[ROGUE_MIPSFW_NUMBER_OF_REMAP_ENTRIES]; +}; + +#include "pvr_rogue_mips_check.h" + +#endif /* PVR_ROGUE_MIPS_H */ diff --git a/drivers/gpu/drm/imagination/pvr_rogue_mips_check.h b/drivers/gpu/drm/imagination/pvr_rogue_mips_check.h new file mode 100644 index 000000000000..824b4bf33ac1 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_rogue_mips_check.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_ROGUE_MIPS_CHECK_H +#define PVR_ROGUE_MIPS_CHECK_H + +#include <linux/build_bug.h> + +static_assert(offsetof(struct rogue_mips_tlb_entry, tlb_page_mask) == 0, + "offsetof(struct rogue_mips_tlb_entry, tlb_page_mask) incorrect"); +static_assert(offsetof(struct rogue_mips_tlb_entry, tlb_hi) == 4, + "offsetof(struct rogue_mips_tlb_entry, tlb_hi) incorrect"); +static_assert(offsetof(struct rogue_mips_tlb_entry, tlb_lo0) == 8, + "offsetof(struct rogue_mips_tlb_entry, tlb_lo0) incorrect"); +static_assert(offsetof(struct rogue_mips_tlb_entry, tlb_lo1) == 12, + "offsetof(struct rogue_mips_tlb_entry, tlb_lo1) incorrect"); +static_assert(sizeof(struct rogue_mips_tlb_entry) == 16, + "struct rogue_mips_tlb_entry is incorrect size"); + +static_assert(offsetof(struct rogue_mips_remap_entry, remap_addr_in) == 0, + "offsetof(struct rogue_mips_remap_entry, remap_addr_in) incorrect"); +static_assert(offsetof(struct rogue_mips_remap_entry, remap_addr_out) == 4, + "offsetof(struct rogue_mips_remap_entry, remap_addr_out) incorrect"); +static_assert(offsetof(struct rogue_mips_remap_entry, remap_region_size) == 8, + "offsetof(struct rogue_mips_remap_entry, remap_region_size) incorrect"); +static_assert(sizeof(struct rogue_mips_remap_entry) == 12, + "struct rogue_mips_remap_entry is incorrect size"); + +static_assert(offsetof(struct rogue_mips_state, error_state) == 0, + "offsetof(struct rogue_mips_state, error_state) incorrect"); +static_assert(offsetof(struct rogue_mips_state, error_epc) == 4, + "offsetof(struct rogue_mips_state, error_epc) incorrect"); +static_assert(offsetof(struct rogue_mips_state, status_register) == 8, + "offsetof(struct rogue_mips_state, status_register) incorrect"); +static_assert(offsetof(struct rogue_mips_state, cause_register) == 12, + "offsetof(struct rogue_mips_state, cause_register) incorrect"); +static_assert(offsetof(struct rogue_mips_state, bad_register) == 16, + "offsetof(struct rogue_mips_state, bad_register) incorrect"); +static_assert(offsetof(struct rogue_mips_state, epc) == 20, + "offsetof(struct rogue_mips_state, epc) incorrect"); +static_assert(offsetof(struct rogue_mips_state, sp) == 24, + "offsetof(struct rogue_mips_state, sp) incorrect"); +static_assert(offsetof(struct rogue_mips_state, debug) == 28, + "offsetof(struct rogue_mips_state, debug) incorrect"); +static_assert(offsetof(struct rogue_mips_state, depc) == 32, + "offsetof(struct rogue_mips_state, depc) incorrect"); +static_assert(offsetof(struct rogue_mips_state, bad_instr) == 36, + "offsetof(struct rogue_mips_state, bad_instr) incorrect"); +static_assert(offsetof(struct rogue_mips_state, unmapped_address) == 40, + "offsetof(struct rogue_mips_state, unmapped_address) incorrect"); +static_assert(offsetof(struct rogue_mips_state, tlb) == 44, + "offsetof(struct rogue_mips_state, tlb) incorrect"); +static_assert(offsetof(struct rogue_mips_state, remap) == 300, + "offsetof(struct rogue_mips_state, remap) incorrect"); +static_assert(sizeof(struct rogue_mips_state) == 684, + "struct rogue_mips_state is incorrect size"); + +#endif /* PVR_ROGUE_MIPS_CHECK_H */ diff --git a/drivers/gpu/drm/imagination/pvr_rogue_mmu_defs.h b/drivers/gpu/drm/imagination/pvr_rogue_mmu_defs.h new file mode 100644 index 000000000000..f361ccdd5405 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_rogue_mmu_defs.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +/* *** Autogenerated C -- do not edit *** */ + +#ifndef PVR_ROGUE_MMU_DEFS_H +#define PVR_ROGUE_MMU_DEFS_H + +#define ROGUE_MMU_DEFS_REVISION 0 + +#define ROGUE_BIF_DM_ENCODING_VERTEX (0x00000000U) +#define ROGUE_BIF_DM_ENCODING_PIXEL (0x00000001U) +#define ROGUE_BIF_DM_ENCODING_COMPUTE (0x00000002U) +#define ROGUE_BIF_DM_ENCODING_TLA (0x00000003U) +#define ROGUE_BIF_DM_ENCODING_PB_VCE (0x00000004U) +#define ROGUE_BIF_DM_ENCODING_PB_TE (0x00000005U) +#define ROGUE_BIF_DM_ENCODING_META (0x00000007U) +#define ROGUE_BIF_DM_ENCODING_HOST (0x00000008U) +#define ROGUE_BIF_DM_ENCODING_PM_ALIST (0x00000009U) + +#define ROGUE_MMUCTRL_VADDR_PC_INDEX_SHIFT (30U) +#define ROGUE_MMUCTRL_VADDR_PC_INDEX_CLRMSK (0xFFFFFF003FFFFFFFULL) +#define ROGUE_MMUCTRL_VADDR_PD_INDEX_SHIFT (21U) +#define ROGUE_MMUCTRL_VADDR_PD_INDEX_CLRMSK (0xFFFFFFFFC01FFFFFULL) +#define ROGUE_MMUCTRL_VADDR_PT_INDEX_SHIFT (12U) +#define ROGUE_MMUCTRL_VADDR_PT_INDEX_CLRMSK (0xFFFFFFFFFFE00FFFULL) + +#define ROGUE_MMUCTRL_ENTRIES_PC_VALUE (0x00000400U) +#define ROGUE_MMUCTRL_ENTRIES_PD_VALUE (0x00000200U) +#define ROGUE_MMUCTRL_ENTRIES_PT_VALUE (0x00000200U) + +#define ROGUE_MMUCTRL_ENTRY_SIZE_PC_VALUE (0x00000020U) +#define ROGUE_MMUCTRL_ENTRY_SIZE_PD_VALUE (0x00000040U) +#define ROGUE_MMUCTRL_ENTRY_SIZE_PT_VALUE (0x00000040U) + +#define ROGUE_MMUCTRL_PAGE_SIZE_MASK (0x00000007U) +#define ROGUE_MMUCTRL_PAGE_SIZE_4KB (0x00000000U) +#define ROGUE_MMUCTRL_PAGE_SIZE_16KB (0x00000001U) +#define ROGUE_MMUCTRL_PAGE_SIZE_64KB (0x00000002U) +#define ROGUE_MMUCTRL_PAGE_SIZE_256KB (0x00000003U) +#define ROGUE_MMUCTRL_PAGE_SIZE_1MB (0x00000004U) +#define ROGUE_MMUCTRL_PAGE_SIZE_2MB (0x00000005U) + +#define ROGUE_MMUCTRL_PAGE_4KB_RANGE_SHIFT (12U) +#define ROGUE_MMUCTRL_PAGE_4KB_RANGE_CLRMSK (0xFFFFFF0000000FFFULL) + +#define ROGUE_MMUCTRL_PAGE_16KB_RANGE_SHIFT (14U) +#define ROGUE_MMUCTRL_PAGE_16KB_RANGE_CLRMSK (0xFFFFFF0000003FFFULL) + +#define ROGUE_MMUCTRL_PAGE_64KB_RANGE_SHIFT (16U) +#define ROGUE_MMUCTRL_PAGE_64KB_RANGE_CLRMSK (0xFFFFFF000000FFFFULL) + +#define ROGUE_MMUCTRL_PAGE_256KB_RANGE_SHIFT (18U) +#define ROGUE_MMUCTRL_PAGE_256KB_RANGE_CLRMSK (0xFFFFFF000003FFFFULL) + +#define ROGUE_MMUCTRL_PAGE_1MB_RANGE_SHIFT (20U) +#define ROGUE_MMUCTRL_PAGE_1MB_RANGE_CLRMSK (0xFFFFFF00000FFFFFULL) + +#define ROGUE_MMUCTRL_PAGE_2MB_RANGE_SHIFT (21U) +#define ROGUE_MMUCTRL_PAGE_2MB_RANGE_CLRMSK (0xFFFFFF00001FFFFFULL) + +#define ROGUE_MMUCTRL_PT_BASE_4KB_RANGE_SHIFT (12U) +#define ROGUE_MMUCTRL_PT_BASE_4KB_RANGE_CLRMSK (0xFFFFFF0000000FFFULL) + +#define ROGUE_MMUCTRL_PT_BASE_16KB_RANGE_SHIFT (10U) +#define ROGUE_MMUCTRL_PT_BASE_16KB_RANGE_CLRMSK (0xFFFFFF00000003FFULL) + +#define ROGUE_MMUCTRL_PT_BASE_64KB_RANGE_SHIFT (8U) +#define ROGUE_MMUCTRL_PT_BASE_64KB_RANGE_CLRMSK (0xFFFFFF00000000FFULL) + +#define ROGUE_MMUCTRL_PT_BASE_256KB_RANGE_SHIFT (6U) +#define ROGUE_MMUCTRL_PT_BASE_256KB_RANGE_CLRMSK (0xFFFFFF000000003FULL) + +#define ROGUE_MMUCTRL_PT_BASE_1MB_RANGE_SHIFT (5U) +#define ROGUE_MMUCTRL_PT_BASE_1MB_RANGE_CLRMSK (0xFFFFFF000000001FULL) + +#define ROGUE_MMUCTRL_PT_BASE_2MB_RANGE_SHIFT (5U) +#define ROGUE_MMUCTRL_PT_BASE_2MB_RANGE_CLRMSK (0xFFFFFF000000001FULL) + +#define ROGUE_MMUCTRL_PT_DATA_PM_META_PROTECT_SHIFT (62U) +#define ROGUE_MMUCTRL_PT_DATA_PM_META_PROTECT_CLRMSK (0xBFFFFFFFFFFFFFFFULL) +#define ROGUE_MMUCTRL_PT_DATA_PM_META_PROTECT_EN (0x4000000000000000ULL) +#define ROGUE_MMUCTRL_PT_DATA_VP_PAGE_HI_SHIFT (40U) +#define ROGUE_MMUCTRL_PT_DATA_VP_PAGE_HI_CLRMSK (0xC00000FFFFFFFFFFULL) +#define ROGUE_MMUCTRL_PT_DATA_PAGE_SHIFT (12U) +#define ROGUE_MMUCTRL_PT_DATA_PAGE_CLRMSK (0xFFFFFF0000000FFFULL) +#define ROGUE_MMUCTRL_PT_DATA_VP_PAGE_LO_SHIFT (6U) +#define ROGUE_MMUCTRL_PT_DATA_VP_PAGE_LO_CLRMSK (0xFFFFFFFFFFFFF03FULL) +#define ROGUE_MMUCTRL_PT_DATA_ENTRY_PENDING_SHIFT (5U) +#define ROGUE_MMUCTRL_PT_DATA_ENTRY_PENDING_CLRMSK (0xFFFFFFFFFFFFFFDFULL) +#define ROGUE_MMUCTRL_PT_DATA_ENTRY_PENDING_EN (0x0000000000000020ULL) +#define ROGUE_MMUCTRL_PT_DATA_PM_SRC_SHIFT (4U) +#define ROGUE_MMUCTRL_PT_DATA_PM_SRC_CLRMSK (0xFFFFFFFFFFFFFFEFULL) +#define ROGUE_MMUCTRL_PT_DATA_PM_SRC_EN (0x0000000000000010ULL) +#define ROGUE_MMUCTRL_PT_DATA_SLC_BYPASS_CTRL_SHIFT (3U) +#define ROGUE_MMUCTRL_PT_DATA_SLC_BYPASS_CTRL_CLRMSK (0xFFFFFFFFFFFFFFF7ULL) +#define ROGUE_MMUCTRL_PT_DATA_SLC_BYPASS_CTRL_EN (0x0000000000000008ULL) +#define ROGUE_MMUCTRL_PT_DATA_CC_SHIFT (2U) +#define ROGUE_MMUCTRL_PT_DATA_CC_CLRMSK (0xFFFFFFFFFFFFFFFBULL) +#define ROGUE_MMUCTRL_PT_DATA_CC_EN (0x0000000000000004ULL) +#define ROGUE_MMUCTRL_PT_DATA_READ_ONLY_SHIFT (1U) +#define ROGUE_MMUCTRL_PT_DATA_READ_ONLY_CLRMSK (0xFFFFFFFFFFFFFFFDULL) +#define ROGUE_MMUCTRL_PT_DATA_READ_ONLY_EN (0x0000000000000002ULL) +#define ROGUE_MMUCTRL_PT_DATA_VALID_SHIFT (0U) +#define ROGUE_MMUCTRL_PT_DATA_VALID_CLRMSK (0xFFFFFFFFFFFFFFFEULL) +#define ROGUE_MMUCTRL_PT_DATA_VALID_EN (0x0000000000000001ULL) + +#define ROGUE_MMUCTRL_PD_DATA_ENTRY_PENDING_SHIFT (40U) +#define ROGUE_MMUCTRL_PD_DATA_ENTRY_PENDING_CLRMSK (0xFFFFFEFFFFFFFFFFULL) +#define ROGUE_MMUCTRL_PD_DATA_ENTRY_PENDING_EN (0x0000010000000000ULL) +#define ROGUE_MMUCTRL_PD_DATA_PT_BASE_SHIFT (5U) +#define ROGUE_MMUCTRL_PD_DATA_PT_BASE_CLRMSK (0xFFFFFF000000001FULL) +#define ROGUE_MMUCTRL_PD_DATA_PAGE_SIZE_SHIFT (1U) +#define ROGUE_MMUCTRL_PD_DATA_PAGE_SIZE_CLRMSK (0xFFFFFFFFFFFFFFF1ULL) +#define ROGUE_MMUCTRL_PD_DATA_PAGE_SIZE_4KB (0x0000000000000000ULL) +#define ROGUE_MMUCTRL_PD_DATA_PAGE_SIZE_16KB (0x0000000000000002ULL) +#define ROGUE_MMUCTRL_PD_DATA_PAGE_SIZE_64KB (0x0000000000000004ULL) +#define ROGUE_MMUCTRL_PD_DATA_PAGE_SIZE_256KB (0x0000000000000006ULL) +#define ROGUE_MMUCTRL_PD_DATA_PAGE_SIZE_1MB (0x0000000000000008ULL) +#define ROGUE_MMUCTRL_PD_DATA_PAGE_SIZE_2MB (0x000000000000000aULL) +#define ROGUE_MMUCTRL_PD_DATA_VALID_SHIFT (0U) +#define ROGUE_MMUCTRL_PD_DATA_VALID_CLRMSK (0xFFFFFFFFFFFFFFFEULL) +#define ROGUE_MMUCTRL_PD_DATA_VALID_EN (0x0000000000000001ULL) + +#define ROGUE_MMUCTRL_PC_DATA_PD_BASE_SHIFT (4U) +#define ROGUE_MMUCTRL_PC_DATA_PD_BASE_CLRMSK (0x0000000FU) +#define ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSHIFT (12U) +#define ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSIZE (4096U) +#define ROGUE_MMUCTRL_PC_DATA_ENTRY_PENDING_SHIFT (1U) +#define ROGUE_MMUCTRL_PC_DATA_ENTRY_PENDING_CLRMSK (0xFFFFFFFDU) +#define ROGUE_MMUCTRL_PC_DATA_ENTRY_PENDING_EN (0x00000002U) +#define ROGUE_MMUCTRL_PC_DATA_VALID_SHIFT (0U) +#define ROGUE_MMUCTRL_PC_DATA_VALID_CLRMSK (0xFFFFFFFEU) +#define ROGUE_MMUCTRL_PC_DATA_VALID_EN (0x00000001U) + +#endif /* PVR_ROGUE_MMU_DEFS_H */ diff --git a/drivers/gpu/drm/imagination/pvr_stream.c b/drivers/gpu/drm/imagination/pvr_stream.c new file mode 100644 index 000000000000..975336a4facf --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_stream.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_device.h" +#include "pvr_rogue_fwif_stream.h" +#include "pvr_stream.h" + +#include <linux/align.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <uapi/drm/pvr_drm.h> + +static __always_inline bool +stream_def_is_supported(struct pvr_device *pvr_dev, const struct pvr_stream_def *stream_def) +{ + if (stream_def->feature == PVR_FEATURE_NONE) + return true; + + if (!(stream_def->feature & PVR_FEATURE_NOT) && + pvr_device_has_feature(pvr_dev, stream_def->feature)) { + return true; + } + + if ((stream_def->feature & PVR_FEATURE_NOT) && + !pvr_device_has_feature(pvr_dev, stream_def->feature & ~PVR_FEATURE_NOT)) { + return true; + } + + return false; +} + +static int +pvr_stream_get_data(u8 *stream, u32 *stream_offset, u32 stream_size, u32 data_size, u32 align_size, + void *dest) +{ + *stream_offset = ALIGN(*stream_offset, align_size); + + if ((*stream_offset + data_size) > stream_size) + return -EINVAL; + + memcpy(dest, stream + *stream_offset, data_size); + + (*stream_offset) += data_size; + + return 0; +} + +/** + * pvr_stream_process_1() - Process a single stream and fill destination structure + * @pvr_dev: Device pointer. + * @stream_def: Stream definition. + * @nr_entries: Number of entries in &stream_def. + * @stream: Pointer to stream. + * @stream_offset: Starting offset within stream. + * @stream_size: Size of input stream, in bytes. + * @dest: Pointer to destination structure. + * @dest_size: Size of destination structure. + * @stream_offset_out: Pointer to variable to write updated stream offset to. May be NULL. + * + * Returns: + * * 0 on success, or + * * -%EINVAL on malformed stream. + */ +static int +pvr_stream_process_1(struct pvr_device *pvr_dev, const struct pvr_stream_def *stream_def, + u32 nr_entries, u8 *stream, u32 stream_offset, u32 stream_size, + u8 *dest, u32 dest_size, u32 *stream_offset_out) +{ + int err = 0; + u32 i; + + for (i = 0; i < nr_entries; i++) { + if (stream_def[i].offset >= dest_size) { + err = -EINVAL; + break; + } + + if (!stream_def_is_supported(pvr_dev, &stream_def[i])) + continue; + + switch (stream_def[i].size) { + case PVR_STREAM_SIZE_8: + err = pvr_stream_get_data(stream, &stream_offset, stream_size, sizeof(u8), + sizeof(u8), dest + stream_def[i].offset); + if (err) + return err; + break; + + case PVR_STREAM_SIZE_16: + err = pvr_stream_get_data(stream, &stream_offset, stream_size, sizeof(u16), + sizeof(u16), dest + stream_def[i].offset); + if (err) + return err; + break; + + case PVR_STREAM_SIZE_32: + err = pvr_stream_get_data(stream, &stream_offset, stream_size, sizeof(u32), + sizeof(u32), dest + stream_def[i].offset); + if (err) + return err; + break; + + case PVR_STREAM_SIZE_64: + err = pvr_stream_get_data(stream, &stream_offset, stream_size, sizeof(u64), + sizeof(u64), dest + stream_def[i].offset); + if (err) + return err; + break; + + case PVR_STREAM_SIZE_ARRAY: + err = pvr_stream_get_data(stream, &stream_offset, stream_size, + stream_def[i].array_size, sizeof(u64), + dest + stream_def[i].offset); + if (err) + return err; + break; + } + } + + if (stream_offset_out) + *stream_offset_out = stream_offset; + + return err; +} + +static int +pvr_stream_process_ext_stream(struct pvr_device *pvr_dev, + const struct pvr_stream_cmd_defs *cmd_defs, void *ext_stream, + u32 stream_offset, u32 ext_stream_size, void *dest) +{ + u32 musthave_masks[PVR_STREAM_EXTHDR_TYPE_MAX]; + u32 ext_header; + int err = 0; + u32 i; + + /* Copy "must have" mask from device. We clear this as we process the stream. */ + memcpy(musthave_masks, pvr_dev->stream_musthave_quirks[cmd_defs->type], + sizeof(musthave_masks)); + + do { + const struct pvr_stream_ext_header *header; + u32 type; + u32 data; + + err = pvr_stream_get_data(ext_stream, &stream_offset, ext_stream_size, sizeof(u32), + sizeof(ext_header), &ext_header); + if (err) + return err; + + type = (ext_header & PVR_STREAM_EXTHDR_TYPE_MASK) >> PVR_STREAM_EXTHDR_TYPE_SHIFT; + data = ext_header & PVR_STREAM_EXTHDR_DATA_MASK; + + if (type >= cmd_defs->ext_nr_headers) + return -EINVAL; + + header = &cmd_defs->ext_headers[type]; + if (data & ~header->valid_mask) + return -EINVAL; + + musthave_masks[type] &= ~data; + + for (i = 0; i < header->ext_streams_num; i++) { + const struct pvr_stream_ext_def *ext_def = &header->ext_streams[i]; + + if (!(ext_header & ext_def->header_mask)) + continue; + + if (!pvr_device_has_uapi_quirk(pvr_dev, ext_def->quirk)) + return -EINVAL; + + err = pvr_stream_process_1(pvr_dev, ext_def->stream, ext_def->stream_len, + ext_stream, stream_offset, + ext_stream_size, dest, + cmd_defs->dest_size, &stream_offset); + if (err) + return err; + } + } while (ext_header & PVR_STREAM_EXTHDR_CONTINUATION); + + /* + * Verify that "must have" mask is now zero. If it isn't then one of the "must have" quirks + * for this command was not present. + */ + for (i = 0; i < cmd_defs->ext_nr_headers; i++) { + if (musthave_masks[i]) + return -EINVAL; + } + + return 0; +} + +/** + * pvr_stream_process() - Build FW structure from stream + * @pvr_dev: Device pointer. + * @cmd_defs: Stream definition. + * @stream: Pointer to command stream. + * @stream_size: Size of command stream, in bytes. + * @dest_out: Pointer to destination buffer. + * + * Caller is responsible for freeing the output structure. + * + * Returns: + * * 0 on success, + * * -%ENOMEM on out of memory, or + * * -%EINVAL on malformed stream. + */ +int +pvr_stream_process(struct pvr_device *pvr_dev, const struct pvr_stream_cmd_defs *cmd_defs, + void *stream, u32 stream_size, void *dest_out) +{ + u32 stream_offset = 0; + u32 main_stream_len; + u32 padding; + int err; + + if (!stream || !stream_size) + return -EINVAL; + + err = pvr_stream_get_data(stream, &stream_offset, stream_size, sizeof(u32), + sizeof(u32), &main_stream_len); + if (err) + return err; + + /* + * u32 after stream length is padding to ensure u64 alignment, but may be used for expansion + * in the future. Verify it's zero. + */ + err = pvr_stream_get_data(stream, &stream_offset, stream_size, sizeof(u32), + sizeof(u32), &padding); + if (err) + return err; + + if (main_stream_len < stream_offset || main_stream_len > stream_size || padding) + return -EINVAL; + + err = pvr_stream_process_1(pvr_dev, cmd_defs->main_stream, cmd_defs->main_stream_len, + stream, stream_offset, main_stream_len, dest_out, + cmd_defs->dest_size, &stream_offset); + if (err) + return err; + + if (stream_offset < stream_size) { + err = pvr_stream_process_ext_stream(pvr_dev, cmd_defs, stream, stream_offset, + stream_size, dest_out); + if (err) + return err; + } else { + u32 i; + + /* + * If we don't have an extension stream then there must not be any "must have" + * quirks for this command. + */ + for (i = 0; i < cmd_defs->ext_nr_headers; i++) { + if (pvr_dev->stream_musthave_quirks[cmd_defs->type][i]) + return -EINVAL; + } + } + + return 0; +} + +/** + * pvr_stream_create_musthave_masks() - Create "must have" masks for streams based on current device + * quirks + * @pvr_dev: Device pointer. + */ +void +pvr_stream_create_musthave_masks(struct pvr_device *pvr_dev) +{ + memset(pvr_dev->stream_musthave_quirks, 0, sizeof(pvr_dev->stream_musthave_quirks)); + + if (pvr_device_has_uapi_quirk(pvr_dev, 47217)) + pvr_dev->stream_musthave_quirks[PVR_STREAM_TYPE_FRAG][0] |= + PVR_STREAM_EXTHDR_FRAG0_BRN47217; + + if (pvr_device_has_uapi_quirk(pvr_dev, 49927)) { + pvr_dev->stream_musthave_quirks[PVR_STREAM_TYPE_GEOM][0] |= + PVR_STREAM_EXTHDR_GEOM0_BRN49927; + pvr_dev->stream_musthave_quirks[PVR_STREAM_TYPE_FRAG][0] |= + PVR_STREAM_EXTHDR_FRAG0_BRN49927; + pvr_dev->stream_musthave_quirks[PVR_STREAM_TYPE_COMPUTE][0] |= + PVR_STREAM_EXTHDR_COMPUTE0_BRN49927; + } +} diff --git a/drivers/gpu/drm/imagination/pvr_stream.h b/drivers/gpu/drm/imagination/pvr_stream.h new file mode 100644 index 000000000000..d92acb3a61d7 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_stream.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_STREAM_H +#define PVR_STREAM_H + +#include <linux/bits.h> +#include <linux/limits.h> +#include <linux/types.h> + +struct pvr_device; + +struct pvr_job; + +enum pvr_stream_type { + PVR_STREAM_TYPE_GEOM = 0, + PVR_STREAM_TYPE_FRAG, + PVR_STREAM_TYPE_COMPUTE, + PVR_STREAM_TYPE_TRANSFER, + PVR_STREAM_TYPE_STATIC_RENDER_CONTEXT, + PVR_STREAM_TYPE_STATIC_COMPUTE_CONTEXT, + + PVR_STREAM_TYPE_MAX +}; + +enum pvr_stream_size { + PVR_STREAM_SIZE_8 = 0, + PVR_STREAM_SIZE_16, + PVR_STREAM_SIZE_32, + PVR_STREAM_SIZE_64, + PVR_STREAM_SIZE_ARRAY, +}; + +#define PVR_FEATURE_NOT BIT(31) +#define PVR_FEATURE_NONE U32_MAX + +struct pvr_stream_def { + u32 offset; + enum pvr_stream_size size; + u32 array_size; + u32 feature; +}; + +struct pvr_stream_ext_def { + const struct pvr_stream_def *stream; + u32 stream_len; + u32 header_mask; + u32 quirk; +}; + +struct pvr_stream_ext_header { + const struct pvr_stream_ext_def *ext_streams; + u32 ext_streams_num; + u32 valid_mask; +}; + +struct pvr_stream_cmd_defs { + enum pvr_stream_type type; + + const struct pvr_stream_def *main_stream; + u32 main_stream_len; + + u32 ext_nr_headers; + const struct pvr_stream_ext_header *ext_headers; + + size_t dest_size; +}; + +int +pvr_stream_process(struct pvr_device *pvr_dev, const struct pvr_stream_cmd_defs *cmd_defs, + void *stream, u32 stream_size, void *dest_out); +void +pvr_stream_create_musthave_masks(struct pvr_device *pvr_dev); + +#endif /* PVR_STREAM_H */ diff --git a/drivers/gpu/drm/imagination/pvr_stream_defs.c b/drivers/gpu/drm/imagination/pvr_stream_defs.c new file mode 100644 index 000000000000..f8bd1a8c01db --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_stream_defs.c @@ -0,0 +1,351 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_device_info.h" +#include "pvr_rogue_fwif_client.h" +#include "pvr_rogue_fwif_stream.h" +#include "pvr_stream.h" +#include "pvr_stream_defs.h" + +#include <linux/stddef.h> +#include <uapi/drm/pvr_drm.h> + +#define PVR_STREAM_DEF_SET(owner, member, _size, _array_size, _feature) \ + { .offset = offsetof(struct owner, member), \ + .size = (_size), \ + .array_size = (_array_size), \ + .feature = (_feature) } + +#define PVR_STREAM_DEF(owner, member, member_size) \ + PVR_STREAM_DEF_SET(owner, member, PVR_STREAM_SIZE_ ## member_size, 0, PVR_FEATURE_NONE) + +#define PVR_STREAM_DEF_FEATURE(owner, member, member_size, feature) \ + PVR_STREAM_DEF_SET(owner, member, PVR_STREAM_SIZE_ ## member_size, 0, feature) + +#define PVR_STREAM_DEF_NOT_FEATURE(owner, member, member_size, feature) \ + PVR_STREAM_DEF_SET(owner, member, PVR_STREAM_SIZE_ ## member_size, 0, \ + (feature) | PVR_FEATURE_NOT) + +#define PVR_STREAM_DEF_ARRAY(owner, member) \ + PVR_STREAM_DEF_SET(owner, member, PVR_STREAM_SIZE_ARRAY, \ + sizeof(((struct owner *)0)->member), PVR_FEATURE_NONE) + +#define PVR_STREAM_DEF_ARRAY_FEATURE(owner, member, feature) \ + PVR_STREAM_DEF_SET(owner, member, PVR_STREAM_SIZE_ARRAY, \ + sizeof(((struct owner *)0)->member), feature) + +#define PVR_STREAM_DEF_ARRAY_NOT_FEATURE(owner, member, feature) \ + PVR_STREAM_DEF_SET(owner, member, PVR_STREAM_SIZE_ARRAY, \ + sizeof(((struct owner *)0)->member), (feature) | PVR_FEATURE_NOT) + +/* + * When adding new parameters to the stream definition, the new parameters must go after the + * existing parameters, to preserve order. As parameters are naturally aligned, care must be taken + * with respect to implicit padding in the stream; padding should be minimised as much as possible. + */ +static const struct pvr_stream_def rogue_fwif_cmd_geom_stream[] = { + PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.vdm_ctrl_stream_base, 64), + PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.tpu_border_colour_table, 64), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_geom, regs.vdm_draw_indirect0, 64, + PVR_FEATURE_VDM_DRAWINDIRECT), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_geom, regs.vdm_draw_indirect1, 32, + PVR_FEATURE_VDM_DRAWINDIRECT), + PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.ppp_ctrl, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.te_psg, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.vdm_context_resume_task0_size, 32), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_geom, regs.vdm_context_resume_task3_size, 32, + PVR_FEATURE_VDM_OBJECT_LEVEL_LLS), + PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.view_idx, 32), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_geom, regs.pds_coeff_free_prog, 32, + PVR_FEATURE_TESSELLATION), +}; + +static const struct pvr_stream_def rogue_fwif_cmd_geom_stream_brn49927[] = { + PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.tpu, 32), +}; + +static const struct pvr_stream_ext_def cmd_geom_ext_streams_0[] = { + { + .stream = rogue_fwif_cmd_geom_stream_brn49927, + .stream_len = ARRAY_SIZE(rogue_fwif_cmd_geom_stream_brn49927), + .header_mask = PVR_STREAM_EXTHDR_GEOM0_BRN49927, + .quirk = 49927, + }, +}; + +static const struct pvr_stream_ext_header cmd_geom_ext_headers[] = { + { + .ext_streams = cmd_geom_ext_streams_0, + .ext_streams_num = ARRAY_SIZE(cmd_geom_ext_streams_0), + .valid_mask = PVR_STREAM_EXTHDR_GEOM0_VALID, + }, +}; + +const struct pvr_stream_cmd_defs pvr_cmd_geom_stream = { + .type = PVR_STREAM_TYPE_GEOM, + + .main_stream = rogue_fwif_cmd_geom_stream, + .main_stream_len = ARRAY_SIZE(rogue_fwif_cmd_geom_stream), + + .ext_nr_headers = ARRAY_SIZE(cmd_geom_ext_headers), + .ext_headers = cmd_geom_ext_headers, + + .dest_size = sizeof(struct rogue_fwif_cmd_geom), +}; + +static const struct pvr_stream_def rogue_fwif_cmd_frag_stream[] = { + PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_scissor_base, 64), + PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_dbias_base, 64), + PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_oclqry_base, 64), + PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_zlsctl, 64), + PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_zload_store_base, 64), + PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_stencil_load_store_base, 64), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, regs.fb_cdc_zls, 64, + PVR_FEATURE_REQUIRES_FB_CDC_ZLS_SETUP), + PVR_STREAM_DEF_ARRAY(rogue_fwif_cmd_frag, regs.pbe_word), + PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.tpu_border_colour_table, 64), + PVR_STREAM_DEF_ARRAY(rogue_fwif_cmd_frag, regs.pds_bgnd), + PVR_STREAM_DEF_ARRAY(rogue_fwif_cmd_frag, regs.pds_pr_bgnd), + PVR_STREAM_DEF_ARRAY(rogue_fwif_cmd_frag, regs.usc_clear_register), + PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.usc_pixel_output_ctrl, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_bgobjdepth, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_bgobjvals, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_aa, 32), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, regs.isp_xtp_pipe_enable, 32, + PVR_FEATURE_S7_TOP_INFRASTRUCTURE), + PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_ctl, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.event_pixel_pds_info, 32), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, regs.pixel_phantom, 32, + PVR_FEATURE_CLUSTER_GROUPING), + PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.view_idx, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.event_pixel_pds_data, 32), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, regs.isp_oclqry_stride, 32, + PVR_FEATURE_GPU_MULTICORE_SUPPORT), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, regs.isp_zls_pixels, 32, + PVR_FEATURE_ZLS_SUBTILE), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, regs.rgx_cr_blackpearl_fix, 32, + PVR_FEATURE_ISP_ZLS_D24_S8_PACKING_OGL_MODE), + PVR_STREAM_DEF(rogue_fwif_cmd_frag, zls_stride, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_frag, sls_stride, 32), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, execute_count, 32, + PVR_FEATURE_GPU_MULTICORE_SUPPORT), +}; + +static const struct pvr_stream_def rogue_fwif_cmd_frag_stream_brn47217[] = { + PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_oclqry_stride, 32), +}; + +static const struct pvr_stream_def rogue_fwif_cmd_frag_stream_brn49927[] = { + PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.tpu, 32), +}; + +static const struct pvr_stream_ext_def cmd_frag_ext_streams_0[] = { + { + .stream = rogue_fwif_cmd_frag_stream_brn47217, + .stream_len = ARRAY_SIZE(rogue_fwif_cmd_frag_stream_brn47217), + .header_mask = PVR_STREAM_EXTHDR_FRAG0_BRN47217, + .quirk = 47217, + }, + { + .stream = rogue_fwif_cmd_frag_stream_brn49927, + .stream_len = ARRAY_SIZE(rogue_fwif_cmd_frag_stream_brn49927), + .header_mask = PVR_STREAM_EXTHDR_FRAG0_BRN49927, + .quirk = 49927, + }, +}; + +static const struct pvr_stream_ext_header cmd_frag_ext_headers[] = { + { + .ext_streams = cmd_frag_ext_streams_0, + .ext_streams_num = ARRAY_SIZE(cmd_frag_ext_streams_0), + .valid_mask = PVR_STREAM_EXTHDR_FRAG0_VALID, + }, +}; + +const struct pvr_stream_cmd_defs pvr_cmd_frag_stream = { + .type = PVR_STREAM_TYPE_FRAG, + + .main_stream = rogue_fwif_cmd_frag_stream, + .main_stream_len = ARRAY_SIZE(rogue_fwif_cmd_frag_stream), + + .ext_nr_headers = ARRAY_SIZE(cmd_frag_ext_headers), + .ext_headers = cmd_frag_ext_headers, + + .dest_size = sizeof(struct rogue_fwif_cmd_frag), +}; + +static const struct pvr_stream_def rogue_fwif_cmd_compute_stream[] = { + PVR_STREAM_DEF(rogue_fwif_cmd_compute, regs.tpu_border_colour_table, 64), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, regs.cdm_cb_queue, 64, + PVR_FEATURE_CDM_USER_MODE_QUEUE), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, regs.cdm_cb_base, 64, + PVR_FEATURE_CDM_USER_MODE_QUEUE), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, regs.cdm_cb, 64, + PVR_FEATURE_CDM_USER_MODE_QUEUE), + PVR_STREAM_DEF_NOT_FEATURE(rogue_fwif_cmd_compute, regs.cdm_ctrl_stream_base, 64, + PVR_FEATURE_CDM_USER_MODE_QUEUE), + PVR_STREAM_DEF(rogue_fwif_cmd_compute, regs.cdm_context_state_base_addr, 64), + PVR_STREAM_DEF(rogue_fwif_cmd_compute, regs.cdm_resume_pds1, 32), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, regs.cdm_item, 32, + PVR_FEATURE_COMPUTE_MORTON_CAPABLE), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, regs.compute_cluster, 32, + PVR_FEATURE_CLUSTER_GROUPING), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, regs.tpu_tag_cdm_ctrl, 32, + PVR_FEATURE_TPU_DM_GLOBAL_REGISTERS), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, stream_start_offset, 32, + PVR_FEATURE_CDM_USER_MODE_QUEUE), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, execute_count, 32, + PVR_FEATURE_GPU_MULTICORE_SUPPORT), +}; + +static const struct pvr_stream_def rogue_fwif_cmd_compute_stream_brn49927[] = { + PVR_STREAM_DEF(rogue_fwif_cmd_compute, regs.tpu, 32), +}; + +static const struct pvr_stream_ext_def cmd_compute_ext_streams_0[] = { + { + .stream = rogue_fwif_cmd_compute_stream_brn49927, + .stream_len = ARRAY_SIZE(rogue_fwif_cmd_compute_stream_brn49927), + .header_mask = PVR_STREAM_EXTHDR_COMPUTE0_BRN49927, + .quirk = 49927, + }, +}; + +static const struct pvr_stream_ext_header cmd_compute_ext_headers[] = { + { + .ext_streams = cmd_compute_ext_streams_0, + .ext_streams_num = ARRAY_SIZE(cmd_compute_ext_streams_0), + .valid_mask = PVR_STREAM_EXTHDR_COMPUTE0_VALID, + }, +}; + +const struct pvr_stream_cmd_defs pvr_cmd_compute_stream = { + .type = PVR_STREAM_TYPE_COMPUTE, + + .main_stream = rogue_fwif_cmd_compute_stream, + .main_stream_len = ARRAY_SIZE(rogue_fwif_cmd_compute_stream), + + .ext_nr_headers = ARRAY_SIZE(cmd_compute_ext_headers), + .ext_headers = cmd_compute_ext_headers, + + .dest_size = sizeof(struct rogue_fwif_cmd_compute), +}; + +static const struct pvr_stream_def rogue_fwif_cmd_transfer_stream[] = { + PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.pds_bgnd0_base, 64), + PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.pds_bgnd1_base, 64), + PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.pds_bgnd3_sizeinfo, 64), + PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_mtile_base, 64), + PVR_STREAM_DEF_ARRAY(rogue_fwif_cmd_transfer, regs.pbe_wordx_mrty), + PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_bgobjvals, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.usc_pixel_output_ctrl, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.usc_clear_register0, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.usc_clear_register1, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.usc_clear_register2, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.usc_clear_register3, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_mtile_size, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_render_origin, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_ctl, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_aa, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.event_pixel_pds_info, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.event_pixel_pds_code, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.event_pixel_pds_data, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_render, 32), + PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_rgn, 32), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_transfer, regs.isp_xtp_pipe_enable, 32, + PVR_FEATURE_S7_TOP_INFRASTRUCTURE), + PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_transfer, regs.frag_screen, 32, + PVR_FEATURE_GPU_MULTICORE_SUPPORT), +}; + +const struct pvr_stream_cmd_defs pvr_cmd_transfer_stream = { + .type = PVR_STREAM_TYPE_TRANSFER, + + .main_stream = rogue_fwif_cmd_transfer_stream, + .main_stream_len = ARRAY_SIZE(rogue_fwif_cmd_transfer_stream), + + .ext_nr_headers = 0, + + .dest_size = sizeof(struct rogue_fwif_cmd_transfer), +}; + +static const struct pvr_stream_def rogue_fwif_static_render_context_state_stream[] = { + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_reg_vdm_context_state_base_addr, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_reg_vdm_context_state_resume_addr, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_reg_ta_context_state_base_addr, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_state[0].geom_reg_vdm_context_store_task0, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_state[0].geom_reg_vdm_context_store_task1, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_state[0].geom_reg_vdm_context_store_task2, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_state[0].geom_reg_vdm_context_store_task3, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_state[0].geom_reg_vdm_context_store_task4, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_state[0].geom_reg_vdm_context_resume_task0, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_state[0].geom_reg_vdm_context_resume_task1, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_state[0].geom_reg_vdm_context_resume_task2, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_state[0].geom_reg_vdm_context_resume_task3, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_state[0].geom_reg_vdm_context_resume_task4, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_state[1].geom_reg_vdm_context_store_task0, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_state[1].geom_reg_vdm_context_store_task1, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_state[1].geom_reg_vdm_context_store_task2, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_state[1].geom_reg_vdm_context_store_task3, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_state[1].geom_reg_vdm_context_store_task4, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_state[1].geom_reg_vdm_context_resume_task0, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_state[1].geom_reg_vdm_context_resume_task1, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_state[1].geom_reg_vdm_context_resume_task2, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_state[1].geom_reg_vdm_context_resume_task3, 64), + PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch, + geom_state[1].geom_reg_vdm_context_resume_task4, 64), +}; + +const struct pvr_stream_cmd_defs pvr_static_render_context_state_stream = { + .type = PVR_STREAM_TYPE_STATIC_RENDER_CONTEXT, + + .main_stream = rogue_fwif_static_render_context_state_stream, + .main_stream_len = ARRAY_SIZE(rogue_fwif_static_render_context_state_stream), + + .ext_nr_headers = 0, + + .dest_size = sizeof(struct rogue_fwif_geom_registers_caswitch), +}; + +static const struct pvr_stream_def rogue_fwif_static_compute_context_state_stream[] = { + PVR_STREAM_DEF(rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_context_pds0, 64), + PVR_STREAM_DEF(rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_context_pds1, 64), + PVR_STREAM_DEF(rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_terminate_pds, 64), + PVR_STREAM_DEF(rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_terminate_pds1, 64), + PVR_STREAM_DEF(rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_resume_pds0, 64), + PVR_STREAM_DEF(rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_context_pds0_b, 64), + PVR_STREAM_DEF(rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_resume_pds0_b, 64), +}; + +const struct pvr_stream_cmd_defs pvr_static_compute_context_state_stream = { + .type = PVR_STREAM_TYPE_STATIC_COMPUTE_CONTEXT, + + .main_stream = rogue_fwif_static_compute_context_state_stream, + .main_stream_len = ARRAY_SIZE(rogue_fwif_static_compute_context_state_stream), + + .ext_nr_headers = 0, + + .dest_size = sizeof(struct rogue_fwif_cdm_registers_cswitch), +}; diff --git a/drivers/gpu/drm/imagination/pvr_stream_defs.h b/drivers/gpu/drm/imagination/pvr_stream_defs.h new file mode 100644 index 000000000000..f33b82165833 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_stream_defs.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_STREAM_DEFS_H +#define PVR_STREAM_DEFS_H + +#include "pvr_stream.h" + +extern const struct pvr_stream_cmd_defs pvr_cmd_geom_stream; +extern const struct pvr_stream_cmd_defs pvr_cmd_frag_stream; +extern const struct pvr_stream_cmd_defs pvr_cmd_compute_stream; +extern const struct pvr_stream_cmd_defs pvr_cmd_transfer_stream; +extern const struct pvr_stream_cmd_defs pvr_static_render_context_state_stream; +extern const struct pvr_stream_cmd_defs pvr_static_compute_context_state_stream; + +#endif /* PVR_STREAM_DEFS_H */ diff --git a/drivers/gpu/drm/imagination/pvr_sync.c b/drivers/gpu/drm/imagination/pvr_sync.c new file mode 100644 index 000000000000..129f646d14ba --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_sync.c @@ -0,0 +1,289 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include <uapi/drm/pvr_drm.h> + +#include <drm/drm_syncobj.h> +#include <drm/gpu_scheduler.h> +#include <linux/xarray.h> +#include <linux/dma-fence-unwrap.h> + +#include "pvr_device.h" +#include "pvr_queue.h" +#include "pvr_sync.h" + +static int +pvr_check_sync_op(const struct drm_pvr_sync_op *sync_op) +{ + u8 handle_type; + + if (sync_op->flags & ~DRM_PVR_SYNC_OP_FLAGS_MASK) + return -EINVAL; + + handle_type = sync_op->flags & DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_MASK; + if (handle_type != DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_SYNCOBJ && + handle_type != DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_TIMELINE_SYNCOBJ) + return -EINVAL; + + if (handle_type == DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_SYNCOBJ && + sync_op->value != 0) + return -EINVAL; + + return 0; +} + +static void +pvr_sync_signal_free(struct pvr_sync_signal *sig_sync) +{ + if (!sig_sync) + return; + + drm_syncobj_put(sig_sync->syncobj); + dma_fence_chain_free(sig_sync->chain); + dma_fence_put(sig_sync->fence); + kfree(sig_sync); +} + +void +pvr_sync_signal_array_cleanup(struct xarray *array) +{ + struct pvr_sync_signal *sig_sync; + unsigned long i; + + xa_for_each(array, i, sig_sync) + pvr_sync_signal_free(sig_sync); + + xa_destroy(array); +} + +static struct pvr_sync_signal * +pvr_sync_signal_array_add(struct xarray *array, struct drm_file *file, u32 handle, u64 point) +{ + struct pvr_sync_signal *sig_sync; + struct dma_fence *cur_fence; + int err; + u32 id; + + sig_sync = kzalloc(sizeof(*sig_sync), GFP_KERNEL); + if (!sig_sync) + return ERR_PTR(-ENOMEM); + + sig_sync->handle = handle; + sig_sync->point = point; + + if (point > 0) { + sig_sync->chain = dma_fence_chain_alloc(); + if (!sig_sync->chain) { + err = -ENOMEM; + goto err_free_sig_sync; + } + } + + sig_sync->syncobj = drm_syncobj_find(file, handle); + if (!sig_sync->syncobj) { + err = -EINVAL; + goto err_free_sig_sync; + } + + /* Retrieve the current fence attached to that point. It's + * perfectly fine to get a NULL fence here, it just means there's + * no fence attached to that point yet. + */ + if (!drm_syncobj_find_fence(file, handle, point, 0, &cur_fence)) + sig_sync->fence = cur_fence; + + err = xa_alloc(array, &id, sig_sync, xa_limit_32b, GFP_KERNEL); + if (err) + goto err_free_sig_sync; + + return sig_sync; + +err_free_sig_sync: + pvr_sync_signal_free(sig_sync); + return ERR_PTR(err); +} + +static struct pvr_sync_signal * +pvr_sync_signal_array_search(struct xarray *array, u32 handle, u64 point) +{ + struct pvr_sync_signal *sig_sync; + unsigned long i; + + xa_for_each(array, i, sig_sync) { + if (handle == sig_sync->handle && point == sig_sync->point) + return sig_sync; + } + + return NULL; +} + +static struct pvr_sync_signal * +pvr_sync_signal_array_get(struct xarray *array, struct drm_file *file, u32 handle, u64 point) +{ + struct pvr_sync_signal *sig_sync; + + sig_sync = pvr_sync_signal_array_search(array, handle, point); + if (sig_sync) + return sig_sync; + + return pvr_sync_signal_array_add(array, file, handle, point); +} + +int +pvr_sync_signal_array_collect_ops(struct xarray *array, + struct drm_file *file, + u32 sync_op_count, + const struct drm_pvr_sync_op *sync_ops) +{ + for (u32 i = 0; i < sync_op_count; i++) { + struct pvr_sync_signal *sig_sync; + int ret; + + if (!(sync_ops[i].flags & DRM_PVR_SYNC_OP_FLAG_SIGNAL)) + continue; + + ret = pvr_check_sync_op(&sync_ops[i]); + if (ret) + return ret; + + sig_sync = pvr_sync_signal_array_get(array, file, + sync_ops[i].handle, + sync_ops[i].value); + if (IS_ERR(sig_sync)) + return PTR_ERR(sig_sync); + } + + return 0; +} + +int +pvr_sync_signal_array_update_fences(struct xarray *array, + u32 sync_op_count, + const struct drm_pvr_sync_op *sync_ops, + struct dma_fence *done_fence) +{ + for (u32 i = 0; i < sync_op_count; i++) { + struct dma_fence *old_fence; + struct pvr_sync_signal *sig_sync; + + if (!(sync_ops[i].flags & DRM_PVR_SYNC_OP_FLAG_SIGNAL)) + continue; + + sig_sync = pvr_sync_signal_array_search(array, sync_ops[i].handle, + sync_ops[i].value); + if (WARN_ON(!sig_sync)) + return -EINVAL; + + old_fence = sig_sync->fence; + sig_sync->fence = dma_fence_get(done_fence); + dma_fence_put(old_fence); + + if (WARN_ON(!sig_sync->fence)) + return -EINVAL; + } + + return 0; +} + +void +pvr_sync_signal_array_push_fences(struct xarray *array) +{ + struct pvr_sync_signal *sig_sync; + unsigned long i; + + xa_for_each(array, i, sig_sync) { + if (sig_sync->chain) { + drm_syncobj_add_point(sig_sync->syncobj, sig_sync->chain, + sig_sync->fence, sig_sync->point); + sig_sync->chain = NULL; + } else { + drm_syncobj_replace_fence(sig_sync->syncobj, sig_sync->fence); + } + } +} + +static int +pvr_sync_add_dep_to_job(struct drm_sched_job *job, struct dma_fence *f) +{ + struct dma_fence_unwrap iter; + u32 native_fence_count = 0; + struct dma_fence *uf; + int err = 0; + + dma_fence_unwrap_for_each(uf, &iter, f) { + if (pvr_queue_fence_is_ufo_backed(uf)) + native_fence_count++; + } + + /* No need to unwrap the fence if it's fully non-native. */ + if (!native_fence_count) + return drm_sched_job_add_dependency(job, f); + + dma_fence_unwrap_for_each(uf, &iter, f) { + /* There's no dma_fence_unwrap_stop() helper cleaning up the refs + * owned by dma_fence_unwrap(), so let's just iterate over all + * entries without doing anything when something failed. + */ + if (err) + continue; + + if (pvr_queue_fence_is_ufo_backed(uf)) { + struct drm_sched_fence *s_fence = to_drm_sched_fence(uf); + + /* If this is a native dependency, we wait for the scheduled fence, + * and we will let pvr_queue_run_job() issue FW waits. + */ + err = drm_sched_job_add_dependency(job, + dma_fence_get(&s_fence->scheduled)); + } else { + err = drm_sched_job_add_dependency(job, dma_fence_get(uf)); + } + } + + dma_fence_put(f); + return err; +} + +int +pvr_sync_add_deps_to_job(struct pvr_file *pvr_file, struct drm_sched_job *job, + u32 sync_op_count, + const struct drm_pvr_sync_op *sync_ops, + struct xarray *signal_array) +{ + int err = 0; + + if (!sync_op_count) + return 0; + + for (u32 i = 0; i < sync_op_count; i++) { + struct pvr_sync_signal *sig_sync; + struct dma_fence *fence; + + if (sync_ops[i].flags & DRM_PVR_SYNC_OP_FLAG_SIGNAL) + continue; + + err = pvr_check_sync_op(&sync_ops[i]); + if (err) + return err; + + sig_sync = pvr_sync_signal_array_search(signal_array, sync_ops[i].handle, + sync_ops[i].value); + if (sig_sync) { + if (WARN_ON(!sig_sync->fence)) + return -EINVAL; + + fence = dma_fence_get(sig_sync->fence); + } else { + err = drm_syncobj_find_fence(from_pvr_file(pvr_file), sync_ops[i].handle, + sync_ops[i].value, 0, &fence); + if (err) + return err; + } + + err = pvr_sync_add_dep_to_job(job, fence); + if (err) + return err; + } + + return 0; +} diff --git a/drivers/gpu/drm/imagination/pvr_sync.h b/drivers/gpu/drm/imagination/pvr_sync.h new file mode 100644 index 000000000000..db6ccfda104a --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_sync.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_SYNC_H +#define PVR_SYNC_H + +#include <uapi/drm/pvr_drm.h> + +/* Forward declaration from <linux/xarray.h>. */ +struct xarray; + +/* Forward declaration from <drm/drm_file.h>. */ +struct drm_file; + +/* Forward declaration from <drm/gpu_scheduler.h>. */ +struct drm_sched_job; + +/* Forward declaration from "pvr_device.h". */ +struct pvr_file; + +/** + * struct pvr_sync_signal - Object encoding a syncobj signal operation + * + * The job submission logic collects all signal operations in an array of + * pvr_sync_signal objects. This array also serves as a cache to get the + * latest dma_fence when multiple jobs are submitted at once, and one job + * signals a syncobj point that's later waited on by a subsequent job. + */ +struct pvr_sync_signal { + /** @handle: Handle of the syncobj to signal. */ + u32 handle; + + /** + * @point: Point to signal in the syncobj. + * + * Only relevant for timeline syncobjs. + */ + u64 point; + + /** @syncobj: Syncobj retrieved from the handle. */ + struct drm_syncobj *syncobj; + + /** + * @chain: Chain object used to link the new fence with the + * existing timeline syncobj. + * + * Should be zero when manipulating a regular syncobj. + */ + struct dma_fence_chain *chain; + + /** + * @fence: New fence object to attach to the syncobj. + * + * This pointer starts with the current fence bound to + * the <handle,point> pair. + */ + struct dma_fence *fence; +}; + +void +pvr_sync_signal_array_cleanup(struct xarray *array); + +int +pvr_sync_signal_array_collect_ops(struct xarray *array, + struct drm_file *file, + u32 sync_op_count, + const struct drm_pvr_sync_op *sync_ops); + +int +pvr_sync_signal_array_update_fences(struct xarray *array, + u32 sync_op_count, + const struct drm_pvr_sync_op *sync_ops, + struct dma_fence *done_fence); + +void +pvr_sync_signal_array_push_fences(struct xarray *array); + +int +pvr_sync_add_deps_to_job(struct pvr_file *pvr_file, struct drm_sched_job *job, + u32 sync_op_count, + const struct drm_pvr_sync_op *sync_ops, + struct xarray *signal_array); + +#endif /* PVR_SYNC_H */ diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c new file mode 100644 index 000000000000..e59517ba039e --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_vm.c @@ -0,0 +1,1090 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_vm.h" + +#include "pvr_device.h" +#include "pvr_drv.h" +#include "pvr_gem.h" +#include "pvr_mmu.h" +#include "pvr_rogue_fwif.h" +#include "pvr_rogue_heap_config.h" + +#include <drm/drm_exec.h> +#include <drm/drm_gem.h> +#include <drm/drm_gpuvm.h> + +#include <linux/container_of.h> +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/gfp_types.h> +#include <linux/kref.h> +#include <linux/mutex.h> +#include <linux/stddef.h> + +/** + * DOC: Memory context + * + * This is the "top level" datatype in the VM code. It's exposed in the public + * API as an opaque handle. + */ + +/** + * struct pvr_vm_context - Context type used to represent a single VM. + */ +struct pvr_vm_context { + /** + * @pvr_dev: The PowerVR device to which this context is bound. + * This binding is immutable for the life of the context. + */ + struct pvr_device *pvr_dev; + + /** @mmu_ctx: The context for binding to physical memory. */ + struct pvr_mmu_context *mmu_ctx; + + /** @gpuvm_mgr: GPUVM object associated with this context. */ + struct drm_gpuvm gpuvm_mgr; + + /** @lock: Global lock on this VM. */ + struct mutex lock; + + /** + * @fw_mem_ctx_obj: Firmware object representing firmware memory + * context. + */ + struct pvr_fw_object *fw_mem_ctx_obj; + + /** @ref_count: Reference count of object. */ + struct kref ref_count; + + /** + * @dummy_gem: GEM object to enable VM reservation. All private BOs + * should use the @dummy_gem.resv and not their own _resv field. + */ + struct drm_gem_object dummy_gem; +}; + +static inline +struct pvr_vm_context *to_pvr_vm_context(struct drm_gpuvm *gpuvm) +{ + return container_of(gpuvm, struct pvr_vm_context, gpuvm_mgr); +} + +struct pvr_vm_context *pvr_vm_context_get(struct pvr_vm_context *vm_ctx) +{ + if (vm_ctx) + kref_get(&vm_ctx->ref_count); + + return vm_ctx; +} + +/** + * pvr_vm_get_page_table_root_addr() - Get the DMA address of the root of the + * page table structure behind a VM context. + * @vm_ctx: Target VM context. + */ +dma_addr_t pvr_vm_get_page_table_root_addr(struct pvr_vm_context *vm_ctx) +{ + return pvr_mmu_get_root_table_dma_addr(vm_ctx->mmu_ctx); +} + +/** + * pvr_vm_get_dma_resv() - Expose the dma_resv owned by the VM context. + * @vm_ctx: Target VM context. + * + * This is used to allow private BOs to share a dma_resv for faster fence + * updates. + * + * Returns: The dma_resv pointer. + */ +struct dma_resv *pvr_vm_get_dma_resv(struct pvr_vm_context *vm_ctx) +{ + return vm_ctx->dummy_gem.resv; +} + +/** + * DOC: Memory mappings + */ + +/** + * struct pvr_vm_gpuva - Wrapper type representing a single VM mapping. + */ +struct pvr_vm_gpuva { + /** @base: The wrapped drm_gpuva object. */ + struct drm_gpuva base; +}; + +enum pvr_vm_bind_type { + PVR_VM_BIND_TYPE_MAP, + PVR_VM_BIND_TYPE_UNMAP, +}; + +/** + * struct pvr_vm_bind_op - Context of a map/unmap operation. + */ +struct pvr_vm_bind_op { + /** @type: Map or unmap. */ + enum pvr_vm_bind_type type; + + /** @pvr_obj: Object associated with mapping (map only). */ + struct pvr_gem_object *pvr_obj; + + /** + * @vm_ctx: VM context where the mapping will be created or destroyed. + */ + struct pvr_vm_context *vm_ctx; + + /** @mmu_op_ctx: MMU op context. */ + struct pvr_mmu_op_context *mmu_op_ctx; + + /** @gpuvm_bo: Prealloced wrapped BO for attaching to the gpuvm. */ + struct drm_gpuvm_bo *gpuvm_bo; + + /** + * @new_va: Prealloced VA mapping object (init in callback). + * Used when creating a mapping. + */ + struct pvr_vm_gpuva *new_va; + + /** + * @prev_va: Prealloced VA mapping object (init in callback). + * Used when a mapping or unmapping operation overlaps an existing + * mapping and splits away the beginning into a new mapping. + */ + struct pvr_vm_gpuva *prev_va; + + /** + * @next_va: Prealloced VA mapping object (init in callback). + * Used when a mapping or unmapping operation overlaps an existing + * mapping and splits away the end into a new mapping. + */ + struct pvr_vm_gpuva *next_va; + + /** @offset: Offset into @pvr_obj to begin mapping from. */ + u64 offset; + + /** @device_addr: Device-virtual address at the start of the mapping. */ + u64 device_addr; + + /** @size: Size of the desired mapping. */ + u64 size; +}; + +/** + * pvr_vm_bind_op_exec() - Execute a single bind op. + * @bind_op: Bind op context. + * + * Returns: + * * 0 on success, + * * Any error code returned by drm_gpuva_sm_map(), drm_gpuva_sm_unmap(), or + * a callback function. + */ +static int pvr_vm_bind_op_exec(struct pvr_vm_bind_op *bind_op) +{ + switch (bind_op->type) { + case PVR_VM_BIND_TYPE_MAP: + return drm_gpuvm_sm_map(&bind_op->vm_ctx->gpuvm_mgr, + bind_op, bind_op->device_addr, + bind_op->size, + gem_from_pvr_gem(bind_op->pvr_obj), + bind_op->offset); + + case PVR_VM_BIND_TYPE_UNMAP: + return drm_gpuvm_sm_unmap(&bind_op->vm_ctx->gpuvm_mgr, + bind_op, bind_op->device_addr, + bind_op->size); + } + + /* + * This shouldn't happen unless something went wrong + * in drm_sched. + */ + WARN_ON(1); + return -EINVAL; +} + +static void pvr_vm_bind_op_fini(struct pvr_vm_bind_op *bind_op) +{ + drm_gpuvm_bo_put(bind_op->gpuvm_bo); + + kfree(bind_op->new_va); + kfree(bind_op->prev_va); + kfree(bind_op->next_va); + + if (bind_op->pvr_obj) + pvr_gem_object_put(bind_op->pvr_obj); + + if (bind_op->mmu_op_ctx) + pvr_mmu_op_context_destroy(bind_op->mmu_op_ctx); +} + +static int +pvr_vm_bind_op_map_init(struct pvr_vm_bind_op *bind_op, + struct pvr_vm_context *vm_ctx, + struct pvr_gem_object *pvr_obj, u64 offset, + u64 device_addr, u64 size) +{ + struct drm_gem_object *obj = gem_from_pvr_gem(pvr_obj); + const bool is_user = vm_ctx != vm_ctx->pvr_dev->kernel_vm_ctx; + const u64 pvr_obj_size = pvr_gem_object_size(pvr_obj); + struct sg_table *sgt; + u64 offset_plus_size; + int err; + + if (check_add_overflow(offset, size, &offset_plus_size)) + return -EINVAL; + + if (is_user && + !pvr_find_heap_containing(vm_ctx->pvr_dev, device_addr, size)) { + return -EINVAL; + } + + if (!pvr_device_addr_and_size_are_valid(vm_ctx, device_addr, size) || + offset & ~PAGE_MASK || size & ~PAGE_MASK || + offset >= pvr_obj_size || offset_plus_size > pvr_obj_size) + return -EINVAL; + + bind_op->type = PVR_VM_BIND_TYPE_MAP; + + dma_resv_lock(obj->resv, NULL); + bind_op->gpuvm_bo = drm_gpuvm_bo_obtain(&vm_ctx->gpuvm_mgr, obj); + dma_resv_unlock(obj->resv); + if (IS_ERR(bind_op->gpuvm_bo)) + return PTR_ERR(bind_op->gpuvm_bo); + + bind_op->new_va = kzalloc(sizeof(*bind_op->new_va), GFP_KERNEL); + bind_op->prev_va = kzalloc(sizeof(*bind_op->prev_va), GFP_KERNEL); + bind_op->next_va = kzalloc(sizeof(*bind_op->next_va), GFP_KERNEL); + if (!bind_op->new_va || !bind_op->prev_va || !bind_op->next_va) { + err = -ENOMEM; + goto err_bind_op_fini; + } + + /* Pin pages so they're ready for use. */ + sgt = pvr_gem_object_get_pages_sgt(pvr_obj); + err = PTR_ERR_OR_ZERO(sgt); + if (err) + goto err_bind_op_fini; + + bind_op->mmu_op_ctx = + pvr_mmu_op_context_create(vm_ctx->mmu_ctx, sgt, offset, size); + err = PTR_ERR_OR_ZERO(bind_op->mmu_op_ctx); + if (err) { + bind_op->mmu_op_ctx = NULL; + goto err_bind_op_fini; + } + + bind_op->pvr_obj = pvr_obj; + bind_op->vm_ctx = vm_ctx; + bind_op->device_addr = device_addr; + bind_op->size = size; + bind_op->offset = offset; + + return 0; + +err_bind_op_fini: + pvr_vm_bind_op_fini(bind_op); + + return err; +} + +static int +pvr_vm_bind_op_unmap_init(struct pvr_vm_bind_op *bind_op, + struct pvr_vm_context *vm_ctx, u64 device_addr, + u64 size) +{ + int err; + + if (!pvr_device_addr_and_size_are_valid(vm_ctx, device_addr, size)) + return -EINVAL; + + bind_op->type = PVR_VM_BIND_TYPE_UNMAP; + + bind_op->prev_va = kzalloc(sizeof(*bind_op->prev_va), GFP_KERNEL); + bind_op->next_va = kzalloc(sizeof(*bind_op->next_va), GFP_KERNEL); + if (!bind_op->prev_va || !bind_op->next_va) { + err = -ENOMEM; + goto err_bind_op_fini; + } + + bind_op->mmu_op_ctx = + pvr_mmu_op_context_create(vm_ctx->mmu_ctx, NULL, 0, 0); + err = PTR_ERR_OR_ZERO(bind_op->mmu_op_ctx); + if (err) { + bind_op->mmu_op_ctx = NULL; + goto err_bind_op_fini; + } + + bind_op->vm_ctx = vm_ctx; + bind_op->device_addr = device_addr; + bind_op->size = size; + + return 0; + +err_bind_op_fini: + pvr_vm_bind_op_fini(bind_op); + + return err; +} + +/** + * pvr_vm_gpuva_map() - Insert a mapping into a memory context. + * @op: gpuva op containing the remap details. + * @op_ctx: Operation context. + * + * Context: Called by drm_gpuvm_sm_map following a successful mapping while + * @op_ctx.vm_ctx mutex is held. + * + * Return: + * * 0 on success, or + * * Any error returned by pvr_mmu_map(). + */ +static int +pvr_vm_gpuva_map(struct drm_gpuva_op *op, void *op_ctx) +{ + struct pvr_gem_object *pvr_gem = gem_to_pvr_gem(op->map.gem.obj); + struct pvr_vm_bind_op *ctx = op_ctx; + int err; + + if ((op->map.gem.offset | op->map.va.range) & ~PVR_DEVICE_PAGE_MASK) + return -EINVAL; + + err = pvr_mmu_map(ctx->mmu_op_ctx, op->map.va.range, pvr_gem->flags, + op->map.va.addr); + if (err) + return err; + + drm_gpuva_map(&ctx->vm_ctx->gpuvm_mgr, &ctx->new_va->base, &op->map); + drm_gpuva_link(&ctx->new_va->base, ctx->gpuvm_bo); + ctx->new_va = NULL; + + return 0; +} + +/** + * pvr_vm_gpuva_unmap() - Remove a mapping from a memory context. + * @op: gpuva op containing the unmap details. + * @op_ctx: Operation context. + * + * Context: Called by drm_gpuvm_sm_unmap following a successful unmapping while + * @op_ctx.vm_ctx mutex is held. + * + * Return: + * * 0 on success, or + * * Any error returned by pvr_mmu_unmap(). + */ +static int +pvr_vm_gpuva_unmap(struct drm_gpuva_op *op, void *op_ctx) +{ + struct pvr_vm_bind_op *ctx = op_ctx; + + int err = pvr_mmu_unmap(ctx->mmu_op_ctx, op->unmap.va->va.addr, + op->unmap.va->va.range); + + if (err) + return err; + + drm_gpuva_unmap(&op->unmap); + drm_gpuva_unlink(op->unmap.va); + + return 0; +} + +/** + * pvr_vm_gpuva_remap() - Remap a mapping within a memory context. + * @op: gpuva op containing the remap details. + * @op_ctx: Operation context. + * + * Context: Called by either drm_gpuvm_sm_map or drm_gpuvm_sm_unmap when a + * mapping or unmapping operation causes a region to be split. The + * @op_ctx.vm_ctx mutex is held. + * + * Return: + * * 0 on success, or + * * Any error returned by pvr_vm_gpuva_unmap() or pvr_vm_gpuva_unmap(). + */ +static int +pvr_vm_gpuva_remap(struct drm_gpuva_op *op, void *op_ctx) +{ + struct pvr_vm_bind_op *ctx = op_ctx; + u64 va_start = 0, va_range = 0; + int err; + + drm_gpuva_op_remap_to_unmap_range(&op->remap, &va_start, &va_range); + err = pvr_mmu_unmap(ctx->mmu_op_ctx, va_start, va_range); + if (err) + return err; + + /* No actual remap required: the page table tree depth is fixed to 3, + * and we use 4k page table entries only for now. + */ + drm_gpuva_remap(&ctx->prev_va->base, &ctx->next_va->base, &op->remap); + + if (op->remap.prev) { + pvr_gem_object_get(gem_to_pvr_gem(ctx->prev_va->base.gem.obj)); + drm_gpuva_link(&ctx->prev_va->base, ctx->gpuvm_bo); + ctx->prev_va = NULL; + } + + if (op->remap.next) { + pvr_gem_object_get(gem_to_pvr_gem(ctx->next_va->base.gem.obj)); + drm_gpuva_link(&ctx->next_va->base, ctx->gpuvm_bo); + ctx->next_va = NULL; + } + + drm_gpuva_unlink(op->remap.unmap->va); + + return 0; +} + +/* + * Public API + * + * For an overview of these functions, see *DOC: Public API* in "pvr_vm.h". + */ + +/** + * pvr_device_addr_is_valid() - Tests whether a device-virtual address + * is valid. + * @device_addr: Virtual device address to test. + * + * Return: + * * %true if @device_addr is within the valid range for a device page + * table and is aligned to the device page size, or + * * %false otherwise. + */ +bool +pvr_device_addr_is_valid(u64 device_addr) +{ + return (device_addr & ~PVR_PAGE_TABLE_ADDR_MASK) == 0 && + (device_addr & ~PVR_DEVICE_PAGE_MASK) == 0; +} + +/** + * pvr_device_addr_and_size_are_valid() - Tests whether a device-virtual + * address and associated size are both valid. + * @vm_ctx: Target VM context. + * @device_addr: Virtual device address to test. + * @size: Size of the range based at @device_addr to test. + * + * Calling pvr_device_addr_is_valid() twice (once on @size, and again on + * @device_addr + @size) to verify a device-virtual address range initially + * seems intuitive, but it produces a false-negative when the address range + * is right at the end of device-virtual address space. + * + * This function catches that corner case, as well as checking that + * @size is non-zero. + * + * Return: + * * %true if @device_addr is device page aligned; @size is device page + * aligned; the range specified by @device_addr and @size is within the + * bounds of the device-virtual address space, and @size is non-zero, or + * * %false otherwise. + */ +bool +pvr_device_addr_and_size_are_valid(struct pvr_vm_context *vm_ctx, + u64 device_addr, u64 size) +{ + return pvr_device_addr_is_valid(device_addr) && + drm_gpuvm_range_valid(&vm_ctx->gpuvm_mgr, device_addr, size) && + size != 0 && (size & ~PVR_DEVICE_PAGE_MASK) == 0 && + (device_addr + size <= PVR_PAGE_TABLE_ADDR_SPACE_SIZE); +} + +static void pvr_gpuvm_free(struct drm_gpuvm *gpuvm) +{ + kfree(to_pvr_vm_context(gpuvm)); +} + +static const struct drm_gpuvm_ops pvr_vm_gpuva_ops = { + .vm_free = pvr_gpuvm_free, + .sm_step_map = pvr_vm_gpuva_map, + .sm_step_remap = pvr_vm_gpuva_remap, + .sm_step_unmap = pvr_vm_gpuva_unmap, +}; + +static void +fw_mem_context_init(void *cpu_ptr, void *priv) +{ + struct rogue_fwif_fwmemcontext *fw_mem_ctx = cpu_ptr; + struct pvr_vm_context *vm_ctx = priv; + + fw_mem_ctx->pc_dev_paddr = pvr_vm_get_page_table_root_addr(vm_ctx); + fw_mem_ctx->page_cat_base_reg_set = ROGUE_FW_BIF_INVALID_PCSET; +} + +/** + * pvr_vm_create_context() - Create a new VM context. + * @pvr_dev: Target PowerVR device. + * @is_userspace_context: %true if this context is for userspace. This will + * create a firmware memory context for the VM context + * and disable warnings when tearing down mappings. + * + * Return: + * * A handle to the newly-minted VM context on success, + * * -%EINVAL if the feature "virtual address space bits" on @pvr_dev is + * missing or has an unsupported value, + * * -%ENOMEM if allocation of the structure behind the opaque handle fails, + * or + * * Any error encountered while setting up internal structures. + */ +struct pvr_vm_context * +pvr_vm_create_context(struct pvr_device *pvr_dev, bool is_userspace_context) +{ + struct drm_device *drm_dev = from_pvr_device(pvr_dev); + + struct pvr_vm_context *vm_ctx; + u16 device_addr_bits; + + int err; + + err = PVR_FEATURE_VALUE(pvr_dev, virtual_address_space_bits, + &device_addr_bits); + if (err) { + drm_err(drm_dev, + "Failed to get device virtual address space bits\n"); + return ERR_PTR(err); + } + + if (device_addr_bits != PVR_PAGE_TABLE_ADDR_BITS) { + drm_err(drm_dev, + "Device has unsupported virtual address space size\n"); + return ERR_PTR(-EINVAL); + } + + vm_ctx = kzalloc(sizeof(*vm_ctx), GFP_KERNEL); + if (!vm_ctx) + return ERR_PTR(-ENOMEM); + + vm_ctx->pvr_dev = pvr_dev; + + vm_ctx->mmu_ctx = pvr_mmu_context_create(pvr_dev); + err = PTR_ERR_OR_ZERO(vm_ctx->mmu_ctx); + if (err) + goto err_free; + + if (is_userspace_context) { + err = pvr_fw_object_create(pvr_dev, sizeof(struct rogue_fwif_fwmemcontext), + PVR_BO_FW_FLAGS_DEVICE_UNCACHED, + fw_mem_context_init, vm_ctx, &vm_ctx->fw_mem_ctx_obj); + + if (err) + goto err_page_table_destroy; + } + + drm_gem_private_object_init(&pvr_dev->base, &vm_ctx->dummy_gem, 0); + drm_gpuvm_init(&vm_ctx->gpuvm_mgr, + is_userspace_context ? "PowerVR-user-VM" : "PowerVR-FW-VM", + 0, &pvr_dev->base, &vm_ctx->dummy_gem, + 0, 1ULL << device_addr_bits, 0, 0, &pvr_vm_gpuva_ops); + + mutex_init(&vm_ctx->lock); + kref_init(&vm_ctx->ref_count); + + return vm_ctx; + +err_page_table_destroy: + pvr_mmu_context_destroy(vm_ctx->mmu_ctx); + +err_free: + kfree(vm_ctx); + + return ERR_PTR(err); +} + +/** + * pvr_vm_context_release() - Teardown a VM context. + * @ref_count: Pointer to reference counter of the VM context. + * + * This function ensures that no mappings are left dangling by unmapping them + * all in order of ascending device-virtual address. + */ +static void +pvr_vm_context_release(struct kref *ref_count) +{ + struct pvr_vm_context *vm_ctx = + container_of(ref_count, struct pvr_vm_context, ref_count); + + if (vm_ctx->fw_mem_ctx_obj) + pvr_fw_object_destroy(vm_ctx->fw_mem_ctx_obj); + + WARN_ON(pvr_vm_unmap(vm_ctx, vm_ctx->gpuvm_mgr.mm_start, + vm_ctx->gpuvm_mgr.mm_range)); + + pvr_mmu_context_destroy(vm_ctx->mmu_ctx); + drm_gem_private_object_fini(&vm_ctx->dummy_gem); + mutex_destroy(&vm_ctx->lock); + + drm_gpuvm_put(&vm_ctx->gpuvm_mgr); +} + +/** + * pvr_vm_context_lookup() - Look up VM context from handle + * @pvr_file: Pointer to pvr_file structure. + * @handle: Object handle. + * + * Takes reference on VM context object. Call pvr_vm_context_put() to release. + * + * Returns: + * * The requested object on success, or + * * %NULL on failure (object does not exist in list, or is not a VM context) + */ +struct pvr_vm_context * +pvr_vm_context_lookup(struct pvr_file *pvr_file, u32 handle) +{ + struct pvr_vm_context *vm_ctx; + + xa_lock(&pvr_file->vm_ctx_handles); + vm_ctx = xa_load(&pvr_file->vm_ctx_handles, handle); + if (vm_ctx) + kref_get(&vm_ctx->ref_count); + + xa_unlock(&pvr_file->vm_ctx_handles); + + return vm_ctx; +} + +/** + * pvr_vm_context_put() - Release a reference on a VM context + * @vm_ctx: Target VM context. + * + * Returns: + * * %true if the VM context was destroyed, or + * * %false if there are any references still remaining. + */ +bool +pvr_vm_context_put(struct pvr_vm_context *vm_ctx) +{ + if (vm_ctx) + return kref_put(&vm_ctx->ref_count, pvr_vm_context_release); + + return true; +} + +/** + * pvr_destroy_vm_contexts_for_file: Destroy any VM contexts associated with the + * given file. + * @pvr_file: Pointer to pvr_file structure. + * + * Removes all vm_contexts associated with @pvr_file from the device VM context + * list and drops initial references. vm_contexts will then be destroyed once + * all outstanding references are dropped. + */ +void pvr_destroy_vm_contexts_for_file(struct pvr_file *pvr_file) +{ + struct pvr_vm_context *vm_ctx; + unsigned long handle; + + xa_for_each(&pvr_file->vm_ctx_handles, handle, vm_ctx) { + /* vm_ctx is not used here because that would create a race with xa_erase */ + pvr_vm_context_put(xa_erase(&pvr_file->vm_ctx_handles, handle)); + } +} + +static int +pvr_vm_lock_extra(struct drm_gpuvm_exec *vm_exec) +{ + struct pvr_vm_bind_op *bind_op = vm_exec->extra.priv; + struct pvr_gem_object *pvr_obj = bind_op->pvr_obj; + + /* Unmap operations don't have an object to lock. */ + if (!pvr_obj) + return 0; + + /* Acquire lock on the GEM being mapped. */ + return drm_exec_lock_obj(&vm_exec->exec, gem_from_pvr_gem(pvr_obj)); +} + +/** + * pvr_vm_map() - Map a section of physical memory into a section of + * device-virtual memory. + * @vm_ctx: Target VM context. + * @pvr_obj: Target PowerVR memory object. + * @pvr_obj_offset: Offset into @pvr_obj to map from. + * @device_addr: Virtual device address at the start of the requested mapping. + * @size: Size of the requested mapping. + * + * No handle is returned to represent the mapping. Instead, callers should + * remember @device_addr and use that as a handle. + * + * Return: + * * 0 on success, + * * -%EINVAL if @device_addr is not a valid page-aligned device-virtual + * address; the region specified by @pvr_obj_offset and @size does not fall + * entirely within @pvr_obj, or any part of the specified region of @pvr_obj + * is not device-virtual page-aligned, + * * Any error encountered while performing internal operations required to + * destroy the mapping (returned from pvr_vm_gpuva_map or + * pvr_vm_gpuva_remap). + */ +int +pvr_vm_map(struct pvr_vm_context *vm_ctx, struct pvr_gem_object *pvr_obj, + u64 pvr_obj_offset, u64 device_addr, u64 size) +{ + struct pvr_vm_bind_op bind_op = {0}; + struct drm_gpuvm_exec vm_exec = { + .vm = &vm_ctx->gpuvm_mgr, + .flags = DRM_EXEC_INTERRUPTIBLE_WAIT | + DRM_EXEC_IGNORE_DUPLICATES, + .extra = { + .fn = pvr_vm_lock_extra, + .priv = &bind_op, + }, + }; + + int err = pvr_vm_bind_op_map_init(&bind_op, vm_ctx, pvr_obj, + pvr_obj_offset, device_addr, + size); + + if (err) + return err; + + pvr_gem_object_get(pvr_obj); + + err = drm_gpuvm_exec_lock(&vm_exec); + if (err) + goto err_cleanup; + + err = pvr_vm_bind_op_exec(&bind_op); + + drm_gpuvm_exec_unlock(&vm_exec); + +err_cleanup: + pvr_vm_bind_op_fini(&bind_op); + + return err; +} + +/** + * pvr_vm_unmap() - Unmap an already mapped section of device-virtual memory. + * @vm_ctx: Target VM context. + * @device_addr: Virtual device address at the start of the target mapping. + * @size: Size of the target mapping. + * + * Return: + * * 0 on success, + * * -%EINVAL if @device_addr is not a valid page-aligned device-virtual + * address, + * * Any error encountered while performing internal operations required to + * destroy the mapping (returned from pvr_vm_gpuva_unmap or + * pvr_vm_gpuva_remap). + */ +int +pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size) +{ + struct pvr_vm_bind_op bind_op = {0}; + struct drm_gpuvm_exec vm_exec = { + .vm = &vm_ctx->gpuvm_mgr, + .flags = DRM_EXEC_INTERRUPTIBLE_WAIT | + DRM_EXEC_IGNORE_DUPLICATES, + .extra = { + .fn = pvr_vm_lock_extra, + .priv = &bind_op, + }, + }; + + int err = pvr_vm_bind_op_unmap_init(&bind_op, vm_ctx, device_addr, + size); + if (err) + return err; + + err = drm_gpuvm_exec_lock(&vm_exec); + if (err) + goto err_cleanup; + + err = pvr_vm_bind_op_exec(&bind_op); + + drm_gpuvm_exec_unlock(&vm_exec); + +err_cleanup: + pvr_vm_bind_op_fini(&bind_op); + + return err; +} + +/* Static data areas are determined by firmware. */ +static const struct drm_pvr_static_data_area static_data_areas[] = { + { + .area_usage = DRM_PVR_STATIC_DATA_AREA_FENCE, + .location_heap_id = DRM_PVR_HEAP_GENERAL, + .offset = 0, + .size = 128, + }, + { + .area_usage = DRM_PVR_STATIC_DATA_AREA_YUV_CSC, + .location_heap_id = DRM_PVR_HEAP_GENERAL, + .offset = 128, + .size = 1024, + }, + { + .area_usage = DRM_PVR_STATIC_DATA_AREA_VDM_SYNC, + .location_heap_id = DRM_PVR_HEAP_PDS_CODE_DATA, + .offset = 0, + .size = 128, + }, + { + .area_usage = DRM_PVR_STATIC_DATA_AREA_EOT, + .location_heap_id = DRM_PVR_HEAP_PDS_CODE_DATA, + .offset = 128, + .size = 128, + }, + { + .area_usage = DRM_PVR_STATIC_DATA_AREA_VDM_SYNC, + .location_heap_id = DRM_PVR_HEAP_USC_CODE, + .offset = 0, + .size = 128, + }, +}; + +#define GET_RESERVED_SIZE(last_offset, last_size) round_up((last_offset) + (last_size), PAGE_SIZE) + +/* + * The values given to GET_RESERVED_SIZE() are taken from the last entry in the corresponding + * static data area for each heap. + */ +static const struct drm_pvr_heap pvr_heaps[] = { + [DRM_PVR_HEAP_GENERAL] = { + .base = ROGUE_GENERAL_HEAP_BASE, + .size = ROGUE_GENERAL_HEAP_SIZE, + .flags = 0, + .page_size_log2 = PVR_DEVICE_PAGE_SHIFT, + }, + [DRM_PVR_HEAP_PDS_CODE_DATA] = { + .base = ROGUE_PDSCODEDATA_HEAP_BASE, + .size = ROGUE_PDSCODEDATA_HEAP_SIZE, + .flags = 0, + .page_size_log2 = PVR_DEVICE_PAGE_SHIFT, + }, + [DRM_PVR_HEAP_USC_CODE] = { + .base = ROGUE_USCCODE_HEAP_BASE, + .size = ROGUE_USCCODE_HEAP_SIZE, + .flags = 0, + .page_size_log2 = PVR_DEVICE_PAGE_SHIFT, + }, + [DRM_PVR_HEAP_RGNHDR] = { + .base = ROGUE_RGNHDR_HEAP_BASE, + .size = ROGUE_RGNHDR_HEAP_SIZE, + .flags = 0, + .page_size_log2 = PVR_DEVICE_PAGE_SHIFT, + }, + [DRM_PVR_HEAP_VIS_TEST] = { + .base = ROGUE_VISTEST_HEAP_BASE, + .size = ROGUE_VISTEST_HEAP_SIZE, + .flags = 0, + .page_size_log2 = PVR_DEVICE_PAGE_SHIFT, + }, + [DRM_PVR_HEAP_TRANSFER_FRAG] = { + .base = ROGUE_TRANSFER_FRAG_HEAP_BASE, + .size = ROGUE_TRANSFER_FRAG_HEAP_SIZE, + .flags = 0, + .page_size_log2 = PVR_DEVICE_PAGE_SHIFT, + }, +}; + +int +pvr_static_data_areas_get(const struct pvr_device *pvr_dev, + struct drm_pvr_ioctl_dev_query_args *args) +{ + struct drm_pvr_dev_query_static_data_areas query = {0}; + int err; + + if (!args->pointer) { + args->size = sizeof(struct drm_pvr_dev_query_static_data_areas); + return 0; + } + + err = PVR_UOBJ_GET(query, args->size, args->pointer); + if (err < 0) + return err; + + if (!query.static_data_areas.array) { + query.static_data_areas.count = ARRAY_SIZE(static_data_areas); + query.static_data_areas.stride = sizeof(struct drm_pvr_static_data_area); + goto copy_out; + } + + if (query.static_data_areas.count > ARRAY_SIZE(static_data_areas)) + query.static_data_areas.count = ARRAY_SIZE(static_data_areas); + + err = PVR_UOBJ_SET_ARRAY(&query.static_data_areas, static_data_areas); + if (err < 0) + return err; + +copy_out: + err = PVR_UOBJ_SET(args->pointer, args->size, query); + if (err < 0) + return err; + + args->size = sizeof(query); + return 0; +} + +int +pvr_heap_info_get(const struct pvr_device *pvr_dev, + struct drm_pvr_ioctl_dev_query_args *args) +{ + struct drm_pvr_dev_query_heap_info query = {0}; + u64 dest; + int err; + + if (!args->pointer) { + args->size = sizeof(struct drm_pvr_dev_query_heap_info); + return 0; + } + + err = PVR_UOBJ_GET(query, args->size, args->pointer); + if (err < 0) + return err; + + if (!query.heaps.array) { + query.heaps.count = ARRAY_SIZE(pvr_heaps); + query.heaps.stride = sizeof(struct drm_pvr_heap); + goto copy_out; + } + + if (query.heaps.count > ARRAY_SIZE(pvr_heaps)) + query.heaps.count = ARRAY_SIZE(pvr_heaps); + + /* Region header heap is only present if BRN63142 is present. */ + dest = query.heaps.array; + for (size_t i = 0; i < query.heaps.count; i++) { + struct drm_pvr_heap heap = pvr_heaps[i]; + + if (i == DRM_PVR_HEAP_RGNHDR && !PVR_HAS_QUIRK(pvr_dev, 63142)) + heap.size = 0; + + err = PVR_UOBJ_SET(dest, query.heaps.stride, heap); + if (err < 0) + return err; + + dest += query.heaps.stride; + } + +copy_out: + err = PVR_UOBJ_SET(args->pointer, args->size, query); + if (err < 0) + return err; + + args->size = sizeof(query); + return 0; +} + +/** + * pvr_heap_contains_range() - Determine if a given heap contains the specified + * device-virtual address range. + * @pvr_heap: Target heap. + * @start: Inclusive start of the target range. + * @end: Inclusive end of the target range. + * + * It is an error to call this function with values of @start and @end that do + * not satisfy the condition @start <= @end. + */ +static __always_inline bool +pvr_heap_contains_range(const struct drm_pvr_heap *pvr_heap, u64 start, u64 end) +{ + return pvr_heap->base <= start && end < pvr_heap->base + pvr_heap->size; +} + +/** + * pvr_find_heap_containing() - Find a heap which contains the specified + * device-virtual address range. + * @pvr_dev: Target PowerVR device. + * @start: Start of the target range. + * @size: Size of the target range. + * + * Return: + * * A pointer to a constant instance of struct drm_pvr_heap representing the + * heap containing the entire range specified by @start and @size on + * success, or + * * %NULL if no such heap exists. + */ +const struct drm_pvr_heap * +pvr_find_heap_containing(struct pvr_device *pvr_dev, u64 start, u64 size) +{ + u64 end; + + if (check_add_overflow(start, size - 1, &end)) + return NULL; + + /* + * There are no guarantees about the order of address ranges in + * &pvr_heaps, so iterate over the entire array for a heap whose + * range completely encompasses the given range. + */ + for (u32 heap_id = 0; heap_id < ARRAY_SIZE(pvr_heaps); heap_id++) { + /* Filter heaps that present only with an associated quirk */ + if (heap_id == DRM_PVR_HEAP_RGNHDR && + !PVR_HAS_QUIRK(pvr_dev, 63142)) { + continue; + } + + if (pvr_heap_contains_range(&pvr_heaps[heap_id], start, end)) + return &pvr_heaps[heap_id]; + } + + return NULL; +} + +/** + * pvr_vm_find_gem_object() - Look up a buffer object from a given + * device-virtual address. + * @vm_ctx: [IN] Target VM context. + * @device_addr: [IN] Virtual device address at the start of the required + * object. + * @mapped_offset_out: [OUT] Pointer to location to write offset of the start + * of the mapped region within the buffer object. May be + * %NULL if this information is not required. + * @mapped_size_out: [OUT] Pointer to location to write size of the mapped + * region. May be %NULL if this information is not required. + * + * If successful, a reference will be taken on the buffer object. The caller + * must drop the reference with pvr_gem_object_put(). + * + * Return: + * * The PowerVR buffer object mapped at @device_addr if one exists, or + * * %NULL otherwise. + */ +struct pvr_gem_object * +pvr_vm_find_gem_object(struct pvr_vm_context *vm_ctx, u64 device_addr, + u64 *mapped_offset_out, u64 *mapped_size_out) +{ + struct pvr_gem_object *pvr_obj; + struct drm_gpuva *va; + + mutex_lock(&vm_ctx->lock); + + va = drm_gpuva_find_first(&vm_ctx->gpuvm_mgr, device_addr, 1); + if (!va) + goto err_unlock; + + pvr_obj = gem_to_pvr_gem(va->gem.obj); + pvr_gem_object_get(pvr_obj); + + if (mapped_offset_out) + *mapped_offset_out = va->gem.offset; + if (mapped_size_out) + *mapped_size_out = va->va.range; + + mutex_unlock(&vm_ctx->lock); + + return pvr_obj; + +err_unlock: + mutex_unlock(&vm_ctx->lock); + + return NULL; +} + +/** + * pvr_vm_get_fw_mem_context: Get object representing firmware memory context + * @vm_ctx: Target VM context. + * + * Returns: + * * FW object representing firmware memory context, or + * * %NULL if this VM context does not have a firmware memory context. + */ +struct pvr_fw_object * +pvr_vm_get_fw_mem_context(struct pvr_vm_context *vm_ctx) +{ + return vm_ctx->fw_mem_ctx_obj; +} diff --git a/drivers/gpu/drm/imagination/pvr_vm.h b/drivers/gpu/drm/imagination/pvr_vm.h new file mode 100644 index 000000000000..f2a6463f2b05 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_vm.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_VM_H +#define PVR_VM_H + +#include "pvr_rogue_mmu_defs.h" + +#include <uapi/drm/pvr_drm.h> + +#include <linux/types.h> + +/* Forward declaration from "pvr_device.h" */ +struct pvr_device; +struct pvr_file; + +/* Forward declaration from "pvr_gem.h" */ +struct pvr_gem_object; + +/* Forward declaration from "pvr_vm.c" */ +struct pvr_vm_context; + +/* Forward declaration from <uapi/drm/pvr_drm.h> */ +struct drm_pvr_ioctl_get_heap_info_args; + +/* Forward declaration from <drm/drm_exec.h> */ +struct drm_exec; + +/* Functions defined in pvr_vm.c */ + +bool pvr_device_addr_is_valid(u64 device_addr); +bool pvr_device_addr_and_size_are_valid(struct pvr_vm_context *vm_ctx, + u64 device_addr, u64 size); + +struct pvr_vm_context *pvr_vm_create_context(struct pvr_device *pvr_dev, + bool is_userspace_context); + +int pvr_vm_map(struct pvr_vm_context *vm_ctx, + struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset, + u64 device_addr, u64 size); +int pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size); + +dma_addr_t pvr_vm_get_page_table_root_addr(struct pvr_vm_context *vm_ctx); +struct dma_resv *pvr_vm_get_dma_resv(struct pvr_vm_context *vm_ctx); + +int pvr_static_data_areas_get(const struct pvr_device *pvr_dev, + struct drm_pvr_ioctl_dev_query_args *args); +int pvr_heap_info_get(const struct pvr_device *pvr_dev, + struct drm_pvr_ioctl_dev_query_args *args); +const struct drm_pvr_heap *pvr_find_heap_containing(struct pvr_device *pvr_dev, + u64 addr, u64 size); + +struct pvr_gem_object *pvr_vm_find_gem_object(struct pvr_vm_context *vm_ctx, + u64 device_addr, + u64 *mapped_offset_out, + u64 *mapped_size_out); + +struct pvr_fw_object * +pvr_vm_get_fw_mem_context(struct pvr_vm_context *vm_ctx); + +struct pvr_vm_context *pvr_vm_context_lookup(struct pvr_file *pvr_file, u32 handle); +struct pvr_vm_context *pvr_vm_context_get(struct pvr_vm_context *vm_ctx); +bool pvr_vm_context_put(struct pvr_vm_context *vm_ctx); +void pvr_destroy_vm_contexts_for_file(struct pvr_file *pvr_file); + +#endif /* PVR_VM_H */ diff --git a/drivers/gpu/drm/imagination/pvr_vm_mips.c b/drivers/gpu/drm/imagination/pvr_vm_mips.c new file mode 100644 index 000000000000..b7fef3c797e6 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_vm_mips.c @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_device.h" +#include "pvr_fw_mips.h" +#include "pvr_gem.h" +#include "pvr_mmu.h" +#include "pvr_rogue_mips.h" +#include "pvr_vm.h" +#include "pvr_vm_mips.h" + +#include <drm/drm_managed.h> +#include <linux/dma-mapping.h> +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/types.h> + +/** + * pvr_vm_mips_init() - Initialise MIPS FW pagetable + * @pvr_dev: Target PowerVR device. + * + * Returns: + * * 0 on success, + * * -%EINVAL, + * * Any error returned by pvr_gem_object_create(), or + * * And error returned by pvr_gem_object_vmap(). + */ +int +pvr_vm_mips_init(struct pvr_device *pvr_dev) +{ + u32 pt_size = 1 << ROGUE_MIPSFW_LOG2_PAGETABLE_SIZE_4K(pvr_dev); + struct device *dev = from_pvr_device(pvr_dev)->dev; + struct pvr_fw_mips_data *mips_data; + u32 phys_bus_width; + int page_nr; + int err; + + /* Page table size must be at most ROGUE_MIPSFW_MAX_NUM_PAGETABLE_PAGES * 4k pages. */ + if (pt_size > ROGUE_MIPSFW_MAX_NUM_PAGETABLE_PAGES * SZ_4K) + return -EINVAL; + + if (PVR_FEATURE_VALUE(pvr_dev, phys_bus_width, &phys_bus_width)) + return -EINVAL; + + mips_data = drmm_kzalloc(from_pvr_device(pvr_dev), sizeof(*mips_data), GFP_KERNEL); + if (!mips_data) + return -ENOMEM; + + for (page_nr = 0; page_nr < ARRAY_SIZE(mips_data->pt_pages); page_nr++) { + mips_data->pt_pages[page_nr] = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!mips_data->pt_pages[page_nr]) { + err = -ENOMEM; + goto err_free_pages; + } + + mips_data->pt_dma_addr[page_nr] = dma_map_page(dev, mips_data->pt_pages[page_nr], 0, + PAGE_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(dev, mips_data->pt_dma_addr[page_nr])) { + err = -ENOMEM; + __free_page(mips_data->pt_pages[page_nr]); + goto err_free_pages; + } + } + + mips_data->pt = vmap(mips_data->pt_pages, pt_size >> PAGE_SHIFT, VM_MAP, + pgprot_writecombine(PAGE_KERNEL)); + if (!mips_data->pt) { + err = -ENOMEM; + goto err_free_pages; + } + + mips_data->pfn_mask = (phys_bus_width > 32) ? ROGUE_MIPSFW_ENTRYLO_PFN_MASK_ABOVE_32BIT : + ROGUE_MIPSFW_ENTRYLO_PFN_MASK; + + mips_data->cache_policy = (phys_bus_width > 32) ? ROGUE_MIPSFW_CACHED_POLICY_ABOVE_32BIT : + ROGUE_MIPSFW_CACHED_POLICY; + + pvr_dev->fw_dev.processor_data.mips_data = mips_data; + + return 0; + +err_free_pages: + while (--page_nr >= 0) { + dma_unmap_page(from_pvr_device(pvr_dev)->dev, + mips_data->pt_dma_addr[page_nr], PAGE_SIZE, DMA_TO_DEVICE); + + __free_page(mips_data->pt_pages[page_nr]); + } + + return err; +} + +/** + * pvr_vm_mips_fini() - Release MIPS FW pagetable + * @pvr_dev: Target PowerVR device. + */ +void +pvr_vm_mips_fini(struct pvr_device *pvr_dev) +{ + struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev; + struct pvr_fw_mips_data *mips_data = fw_dev->processor_data.mips_data; + int page_nr; + + vunmap(mips_data->pt); + for (page_nr = ARRAY_SIZE(mips_data->pt_pages) - 1; page_nr >= 0; page_nr--) { + dma_unmap_page(from_pvr_device(pvr_dev)->dev, + mips_data->pt_dma_addr[page_nr], PAGE_SIZE, DMA_TO_DEVICE); + + __free_page(mips_data->pt_pages[page_nr]); + } + + fw_dev->processor_data.mips_data = NULL; +} + +static u32 +get_mips_pte_flags(bool read, bool write, u32 cache_policy) +{ + u32 flags = 0; + + if (read && write) /* Read/write. */ + flags |= ROGUE_MIPSFW_ENTRYLO_DIRTY_EN; + else if (write) /* Write only. */ + flags |= ROGUE_MIPSFW_ENTRYLO_READ_INHIBIT_EN; + else + WARN_ON(!read); + + flags |= cache_policy << ROGUE_MIPSFW_ENTRYLO_CACHE_POLICY_SHIFT; + + flags |= ROGUE_MIPSFW_ENTRYLO_VALID_EN | ROGUE_MIPSFW_ENTRYLO_GLOBAL_EN; + + return flags; +} + +/** + * pvr_vm_mips_map() - Map a FW object into MIPS address space + * @pvr_dev: Target PowerVR device. + * @fw_obj: FW object to map. + * + * Returns: + * * 0 on success, + * * -%EINVAL if object does not reside within FW address space, or + * * Any error returned by pvr_fw_object_get_dma_addr(). + */ +int +pvr_vm_mips_map(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj) +{ + struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev; + struct pvr_fw_mips_data *mips_data = fw_dev->processor_data.mips_data; + struct pvr_gem_object *pvr_obj = fw_obj->gem; + const u64 start = fw_obj->fw_mm_node.start; + const u64 size = fw_obj->fw_mm_node.size; + u64 end; + u32 cache_policy; + u32 pte_flags; + s32 start_pfn; + s32 end_pfn; + s32 pfn; + int err; + + if (check_add_overflow(start, size - 1, &end)) + return -EINVAL; + + if (start < ROGUE_FW_HEAP_BASE || + start >= ROGUE_FW_HEAP_BASE + fw_dev->fw_heap_info.raw_size || + end < ROGUE_FW_HEAP_BASE || + end >= ROGUE_FW_HEAP_BASE + fw_dev->fw_heap_info.raw_size || + (start & ROGUE_MIPSFW_PAGE_MASK_4K) || + ((end + 1) & ROGUE_MIPSFW_PAGE_MASK_4K)) + return -EINVAL; + + start_pfn = (start & fw_dev->fw_heap_info.offset_mask) >> ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K; + end_pfn = (end & fw_dev->fw_heap_info.offset_mask) >> ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K; + + if (pvr_obj->flags & PVR_BO_FW_FLAGS_DEVICE_UNCACHED) + cache_policy = ROGUE_MIPSFW_UNCACHED_CACHE_POLICY; + else + cache_policy = mips_data->cache_policy; + + pte_flags = get_mips_pte_flags(true, true, cache_policy); + + for (pfn = start_pfn; pfn <= end_pfn; pfn++) { + dma_addr_t dma_addr; + u32 pte; + + err = pvr_fw_object_get_dma_addr(fw_obj, + (pfn - start_pfn) << + ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K, + &dma_addr); + if (err) + goto err_unmap_pages; + + pte = ((dma_addr >> ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K) + << ROGUE_MIPSFW_ENTRYLO_PFN_SHIFT) & mips_data->pfn_mask; + pte |= pte_flags; + + WRITE_ONCE(mips_data->pt[pfn], pte); + } + + pvr_mmu_flush_request_all(pvr_dev); + + return 0; + +err_unmap_pages: + while (--pfn >= start_pfn) + WRITE_ONCE(mips_data->pt[pfn], 0); + + pvr_mmu_flush_request_all(pvr_dev); + WARN_ON(pvr_mmu_flush_exec(pvr_dev, true)); + + return err; +} + +/** + * pvr_vm_mips_unmap() - Unmap a FW object into MIPS address space + * @pvr_dev: Target PowerVR device. + * @fw_obj: FW object to unmap. + */ +void +pvr_vm_mips_unmap(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj) +{ + struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev; + struct pvr_fw_mips_data *mips_data = fw_dev->processor_data.mips_data; + const u64 start = fw_obj->fw_mm_node.start; + const u64 size = fw_obj->fw_mm_node.size; + const u64 end = start + size; + + const u32 start_pfn = (start & fw_dev->fw_heap_info.offset_mask) >> + ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K; + const u32 end_pfn = (end & fw_dev->fw_heap_info.offset_mask) >> + ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K; + + for (u32 pfn = start_pfn; pfn < end_pfn; pfn++) + WRITE_ONCE(mips_data->pt[pfn], 0); + + pvr_mmu_flush_request_all(pvr_dev); + WARN_ON(pvr_mmu_flush_exec(pvr_dev, true)); +} diff --git a/drivers/gpu/drm/imagination/pvr_vm_mips.h b/drivers/gpu/drm/imagination/pvr_vm_mips.h new file mode 100644 index 000000000000..0fd59f68fb5b --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_vm_mips.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_VM_MIPS_H +#define PVR_VM_MIPS_H + +/* Forward declaration from pvr_device.h. */ +struct pvr_device; + +/* Forward declaration from pvr_gem.h. */ +struct pvr_fw_object; + +int +pvr_vm_mips_init(struct pvr_device *pvr_dev); +void +pvr_vm_mips_fini(struct pvr_device *pvr_dev); +int +pvr_vm_mips_map(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj); +void +pvr_vm_mips_unmap(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj); + +#endif /* PVR_VM_MIPS_H */ diff --git a/drivers/gpu/drm/imx/dcss/dcss-drv.c b/drivers/gpu/drm/imx/dcss/dcss-drv.c index b61cec0cc79d..ad5f29ea8f6a 100644 --- a/drivers/gpu/drm/imx/dcss/dcss-drv.c +++ b/drivers/gpu/drm/imx/dcss/dcss-drv.c @@ -80,7 +80,7 @@ err: return err; } -static int dcss_drv_platform_remove(struct platform_device *pdev) +static void dcss_drv_platform_remove(struct platform_device *pdev) { struct dcss_drv *mdrv = dev_get_drvdata(&pdev->dev); @@ -88,8 +88,6 @@ static int dcss_drv_platform_remove(struct platform_device *pdev) dcss_dev_destroy(mdrv->dcss); kfree(mdrv); - - return 0; } static void dcss_drv_platform_shutdown(struct platform_device *pdev) @@ -120,7 +118,7 @@ MODULE_DEVICE_TABLE(of, dcss_of_match); static struct platform_driver dcss_platform_driver = { .probe = dcss_drv_platform_probe, - .remove = dcss_drv_platform_remove, + .remove_new = dcss_drv_platform_remove, .shutdown = dcss_drv_platform_shutdown, .driver = { .name = "imx-dcss", diff --git a/drivers/gpu/drm/imx/ipuv3/imx-ldb.c b/drivers/gpu/drm/imx/ipuv3/imx-ldb.c index 989eca32d325..53840ab054c7 100644 --- a/drivers/gpu/drm/imx/ipuv3/imx-ldb.c +++ b/drivers/gpu/drm/imx/ipuv3/imx-ldb.c @@ -12,8 +12,10 @@ #include <linux/mfd/syscon.h> #include <linux/mfd/syscon/imx6q-iomuxc-gpr.h> #include <linux/module.h> -#include <linux/of_device.h> +#include <linux/of.h> #include <linux/of_graph.h> +#include <linux/platform_device.h> +#include <linux/property.h> #include <linux/regmap.h> #include <linux/videodev2.h> @@ -617,7 +619,6 @@ static int imx_ldb_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; - const struct of_device_id *of_id = of_match_device(imx_ldb_dt_ids, dev); struct device_node *child; struct imx_ldb *imx_ldb; int dual; @@ -638,9 +639,7 @@ static int imx_ldb_probe(struct platform_device *pdev) regmap_write(imx_ldb->regmap, IOMUXC_GPR2, 0); imx_ldb->dev = dev; - - if (of_id) - imx_ldb->lvds_mux = of_id->data; + imx_ldb->lvds_mux = device_get_match_data(dev); dual = of_property_read_bool(np, "fsl,dual-channel"); if (dual) diff --git a/drivers/gpu/drm/imx/lcdc/imx-lcdc.c b/drivers/gpu/drm/imx/lcdc/imx-lcdc.c index 22b65f4a0e30..43ddf3a9810b 100644 --- a/drivers/gpu/drm/imx/lcdc/imx-lcdc.c +++ b/drivers/gpu/drm/imx/lcdc/imx-lcdc.c @@ -342,21 +342,12 @@ static const struct drm_mode_config_helper_funcs imx_lcdc_mode_config_helpers = .atomic_commit_tail = drm_atomic_helper_commit_tail_rpm, }; -static void imx_lcdc_release(struct drm_device *drm) -{ - struct imx_lcdc *lcdc = imx_lcdc_from_drmdev(drm); - - drm_kms_helper_poll_fini(drm); - kfree(lcdc); -} - DEFINE_DRM_GEM_DMA_FOPS(imx_lcdc_drm_fops); static struct drm_driver imx_lcdc_drm_driver = { .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, .fops = &imx_lcdc_drm_fops, DRM_GEM_DMA_DRIVER_OPS_VMAP, - .release = imx_lcdc_release, .name = "imx-lcdc", .desc = "i.MX LCDC driver", .date = "20200716", @@ -515,14 +506,12 @@ static int imx_lcdc_probe(struct platform_device *pdev) return 0; } -static int imx_lcdc_remove(struct platform_device *pdev) +static void imx_lcdc_remove(struct platform_device *pdev) { struct drm_device *drm = platform_get_drvdata(pdev); drm_dev_unregister(drm); drm_atomic_helper_shutdown(drm); - - return 0; } static void imx_lcdc_shutdown(struct platform_device *pdev) @@ -536,7 +525,7 @@ static struct platform_driver imx_lcdc_driver = { .of_match_table = imx_lcdc_of_dev_id, }, .probe = imx_lcdc_probe, - .remove = imx_lcdc_remove, + .remove_new = imx_lcdc_remove, .shutdown = imx_lcdc_shutdown, }; module_platform_driver(imx_lcdc_driver); diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c index 24035b53441c..169b83987ce2 100644 --- a/drivers/gpu/drm/kmb/kmb_drv.c +++ b/drivers/gpu/drm/kmb/kmb_drv.c @@ -448,7 +448,7 @@ static const struct drm_driver kmb_driver = { .minor = DRIVER_MINOR, }; -static int kmb_remove(struct platform_device *pdev) +static void kmb_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct drm_device *drm = dev_get_drvdata(dev); @@ -473,7 +473,6 @@ static int kmb_remove(struct platform_device *pdev) /* Unregister DSI host */ kmb_dsi_host_unregister(kmb->kmb_dsi); drm_atomic_helper_shutdown(drm); - return 0; } static int kmb_probe(struct platform_device *pdev) @@ -621,7 +620,7 @@ static SIMPLE_DEV_PM_OPS(kmb_pm_ops, kmb_pm_suspend, kmb_pm_resume); static struct platform_driver kmb_platform_driver = { .probe = kmb_probe, - .remove = kmb_remove, + .remove_new = kmb_remove, .driver = { .name = "kmb-drm", .pm = &kmb_pm_ops, diff --git a/drivers/gpu/drm/lima/lima_device.c b/drivers/gpu/drm/lima/lima_device.c index 02cef0cea657..0bf7105c8748 100644 --- a/drivers/gpu/drm/lima/lima_device.c +++ b/drivers/gpu/drm/lima/lima_device.c @@ -514,7 +514,7 @@ int lima_device_suspend(struct device *dev) /* check any task running */ for (i = 0; i < lima_pipe_num; i++) { - if (atomic_read(&ldev->pipe[i].base.hw_rq_count)) + if (atomic_read(&ldev->pipe[i].base.credit_count)) return -EBUSY; } diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c index 295f0353a02e..c3bf8cda8498 100644 --- a/drivers/gpu/drm/lima/lima_sched.c +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -123,7 +123,7 @@ int lima_sched_task_init(struct lima_sched_task *task, for (i = 0; i < num_bos; i++) drm_gem_object_get(&bos[i]->base.base); - err = drm_sched_job_init(&task->base, &context->base, vm); + err = drm_sched_job_init(&task->base, &context->base, 1, vm); if (err) { kfree(task->bos); return err; @@ -488,7 +488,7 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name) INIT_WORK(&pipe->recover_work, lima_sched_recover_work); - return drm_sched_init(&pipe->base, &lima_sched_ops, + return drm_sched_init(&pipe->base, &lima_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, 1, lima_job_hang_limit, diff --git a/drivers/gpu/drm/loongson/Kconfig b/drivers/gpu/drm/loongson/Kconfig index df6946d505fa..8e59753e532d 100644 --- a/drivers/gpu/drm/loongson/Kconfig +++ b/drivers/gpu/drm/loongson/Kconfig @@ -3,6 +3,7 @@ config DRM_LOONGSON tristate "DRM support for Loongson Graphics" depends on DRM && PCI && MMU + depends on LOONGARCH || MIPS || COMPILE_TEST select DRM_KMS_HELPER select DRM_TTM select I2C diff --git a/drivers/gpu/drm/loongson/lsdc_plane.c b/drivers/gpu/drm/loongson/lsdc_plane.c index 0d5094633222..d227a2c1dcf1 100644 --- a/drivers/gpu/drm/loongson/lsdc_plane.c +++ b/drivers/gpu/drm/loongson/lsdc_plane.c @@ -9,7 +9,6 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_framebuffer.h> #include <drm/drm_gem_atomic_helper.h> -#include <drm/drm_plane_helper.h> #include "lsdc_drv.h" #include "lsdc_regs.h" diff --git a/drivers/gpu/drm/mediatek/Makefile b/drivers/gpu/drm/mediatek/Makefile index d4d193f60271..5e4436403b8d 100644 --- a/drivers/gpu/drm/mediatek/Makefile +++ b/drivers/gpu/drm/mediatek/Makefile @@ -16,7 +16,8 @@ mediatek-drm-y := mtk_disp_aal.o \ mtk_dsi.o \ mtk_dpi.o \ mtk_ethdr.o \ - mtk_mdp_rdma.o + mtk_mdp_rdma.o \ + mtk_padding.o obj-$(CONFIG_DRM_MEDIATEK) += mediatek-drm.o diff --git a/drivers/gpu/drm/mediatek/mtk_cec.c b/drivers/gpu/drm/mediatek/mtk_cec.c index f47f417d8ba6..8519e9bade36 100644 --- a/drivers/gpu/drm/mediatek/mtk_cec.c +++ b/drivers/gpu/drm/mediatek/mtk_cec.c @@ -185,7 +185,6 @@ static int mtk_cec_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct mtk_cec *cec; - struct resource *res; int ret; cec = devm_kzalloc(dev, sizeof(*cec), GFP_KERNEL); @@ -195,8 +194,7 @@ static int mtk_cec_probe(struct platform_device *pdev) platform_set_drvdata(pdev, cec); spin_lock_init(&cec->lock); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - cec->regs = devm_ioremap_resource(dev, res); + cec->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(cec->regs)) { ret = PTR_ERR(cec->regs); dev_err(dev, "Failed to ioremap cec: %d\n", ret); diff --git a/drivers/gpu/drm/mediatek/mtk_disp_aal.c b/drivers/gpu/drm/mediatek/mtk_disp_aal.c index 2209159d8855..40fe403086c3 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_aal.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_aal.c @@ -168,7 +168,6 @@ static int mtk_disp_aal_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct mtk_disp_aal *priv; - struct resource *res; int ret; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -181,8 +180,7 @@ static int mtk_disp_aal_probe(struct platform_device *pdev) return PTR_ERR(priv->clk); } - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - priv->regs = devm_ioremap_resource(dev, res); + priv->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(priv->regs)) { dev_err(dev, "failed to ioremap aal\n"); return PTR_ERR(priv->regs); diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c b/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c index 4234ff7485e8..465cddce0d32 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c @@ -153,7 +153,6 @@ static int mtk_disp_ccorr_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct mtk_disp_ccorr *priv; - struct resource *res; int ret; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -166,8 +165,7 @@ static int mtk_disp_ccorr_probe(struct platform_device *pdev) return PTR_ERR(priv->clk); } - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - priv->regs = devm_ioremap_resource(dev, res); + priv->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(priv->regs)) { dev_err(dev, "failed to ioremap ccorr\n"); return PTR_ERR(priv->regs); diff --git a/drivers/gpu/drm/mediatek/mtk_disp_drv.h b/drivers/gpu/drm/mediatek/mtk_disp_drv.h index 1311562d25cc..74fa56339383 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_drv.h +++ b/drivers/gpu/drm/mediatek/mtk_disp_drv.h @@ -110,6 +110,8 @@ void mtk_ovl_adaptor_connect(struct device *dev, struct device *mmsys_dev, unsigned int next); void mtk_ovl_adaptor_disconnect(struct device *dev, struct device *mmsys_dev, unsigned int next); +int mtk_ovl_adaptor_power_on(struct device *dev); +void mtk_ovl_adaptor_power_off(struct device *dev); int mtk_ovl_adaptor_clk_enable(struct device *dev); void mtk_ovl_adaptor_clk_disable(struct device *dev); void mtk_ovl_adaptor_config(struct device *dev, unsigned int w, @@ -151,6 +153,8 @@ void mtk_rdma_disable_vblank(struct device *dev); const u32 *mtk_rdma_get_formats(struct device *dev); size_t mtk_rdma_get_num_formats(struct device *dev); +int mtk_mdp_rdma_power_on(struct device *dev); +void mtk_mdp_rdma_power_off(struct device *dev); int mtk_mdp_rdma_clk_enable(struct device *dev); void mtk_mdp_rdma_clk_disable(struct device *dev); void mtk_mdp_rdma_start(struct device *dev, struct cmdq_pkt *cmdq_pkt); @@ -160,4 +164,8 @@ void mtk_mdp_rdma_config(struct device *dev, struct mtk_mdp_rdma_cfg *cfg, const u32 *mtk_mdp_rdma_get_formats(struct device *dev); size_t mtk_mdp_rdma_get_num_formats(struct device *dev); +int mtk_padding_clk_enable(struct device *dev); +void mtk_padding_clk_disable(struct device *dev); +void mtk_padding_start(struct device *dev); +void mtk_padding_stop(struct device *dev); #endif diff --git a/drivers/gpu/drm/mediatek/mtk_disp_merge.c b/drivers/gpu/drm/mediatek/mtk_disp_merge.c index e525a6b9e5b0..22f768d923d5 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_merge.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_merge.c @@ -103,7 +103,7 @@ void mtk_merge_stop_cmdq(struct device *dev, struct cmdq_pkt *cmdq_pkt) mtk_ddp_write(cmdq_pkt, 0, &priv->cmdq_reg, priv->regs, DISP_REG_MERGE_CTRL); - if (priv->async_clk) + if (!cmdq_pkt && priv->async_clk) reset_control_reset(priv->reset_ctl); } diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c index 6bf6367853fb..12a37f740bf4 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c @@ -27,13 +27,14 @@ #define MTK_OVL_ADAPTOR_LAYER_NUM 4 enum mtk_ovl_adaptor_comp_type { - OVL_ADAPTOR_TYPE_RDMA = 0, - OVL_ADAPTOR_TYPE_MERGE, OVL_ADAPTOR_TYPE_ETHDR, + OVL_ADAPTOR_TYPE_MDP_RDMA, + OVL_ADAPTOR_TYPE_MERGE, OVL_ADAPTOR_TYPE_NUM, }; enum mtk_ovl_adaptor_comp_id { + OVL_ADAPTOR_ETHDR0, OVL_ADAPTOR_MDP_RDMA0, OVL_ADAPTOR_MDP_RDMA1, OVL_ADAPTOR_MDP_RDMA2, @@ -46,13 +47,14 @@ enum mtk_ovl_adaptor_comp_id { OVL_ADAPTOR_MERGE1, OVL_ADAPTOR_MERGE2, OVL_ADAPTOR_MERGE3, - OVL_ADAPTOR_ETHDR0, OVL_ADAPTOR_ID_MAX }; struct ovl_adaptor_comp_match { enum mtk_ovl_adaptor_comp_type type; + enum mtk_ddp_comp_id comp_id; int alias_id; + const struct mtk_ddp_comp_funcs *funcs; }; struct mtk_disp_ovl_adaptor { @@ -62,25 +64,44 @@ struct mtk_disp_ovl_adaptor { }; static const char * const private_comp_stem[OVL_ADAPTOR_TYPE_NUM] = { - [OVL_ADAPTOR_TYPE_RDMA] = "vdo1-rdma", - [OVL_ADAPTOR_TYPE_MERGE] = "merge", [OVL_ADAPTOR_TYPE_ETHDR] = "ethdr", + [OVL_ADAPTOR_TYPE_MDP_RDMA] = "vdo1-rdma", + [OVL_ADAPTOR_TYPE_MERGE] = "merge", +}; + +static const struct mtk_ddp_comp_funcs ethdr = { + .clk_enable = mtk_ethdr_clk_enable, + .clk_disable = mtk_ethdr_clk_disable, + .start = mtk_ethdr_start, + .stop = mtk_ethdr_stop, +}; + +static const struct mtk_ddp_comp_funcs merge = { + .clk_enable = mtk_merge_clk_enable, + .clk_disable = mtk_merge_clk_disable, +}; + +static const struct mtk_ddp_comp_funcs rdma = { + .power_on = mtk_mdp_rdma_power_on, + .power_off = mtk_mdp_rdma_power_off, + .clk_enable = mtk_mdp_rdma_clk_enable, + .clk_disable = mtk_mdp_rdma_clk_disable, }; static const struct ovl_adaptor_comp_match comp_matches[OVL_ADAPTOR_ID_MAX] = { - [OVL_ADAPTOR_MDP_RDMA0] = { OVL_ADAPTOR_TYPE_RDMA, 0 }, - [OVL_ADAPTOR_MDP_RDMA1] = { OVL_ADAPTOR_TYPE_RDMA, 1 }, - [OVL_ADAPTOR_MDP_RDMA2] = { OVL_ADAPTOR_TYPE_RDMA, 2 }, - [OVL_ADAPTOR_MDP_RDMA3] = { OVL_ADAPTOR_TYPE_RDMA, 3 }, - [OVL_ADAPTOR_MDP_RDMA4] = { OVL_ADAPTOR_TYPE_RDMA, 4 }, - [OVL_ADAPTOR_MDP_RDMA5] = { OVL_ADAPTOR_TYPE_RDMA, 5 }, - [OVL_ADAPTOR_MDP_RDMA6] = { OVL_ADAPTOR_TYPE_RDMA, 6 }, - [OVL_ADAPTOR_MDP_RDMA7] = { OVL_ADAPTOR_TYPE_RDMA, 7 }, - [OVL_ADAPTOR_MERGE0] = { OVL_ADAPTOR_TYPE_MERGE, 1 }, - [OVL_ADAPTOR_MERGE1] = { OVL_ADAPTOR_TYPE_MERGE, 2 }, - [OVL_ADAPTOR_MERGE2] = { OVL_ADAPTOR_TYPE_MERGE, 3 }, - [OVL_ADAPTOR_MERGE3] = { OVL_ADAPTOR_TYPE_MERGE, 4 }, - [OVL_ADAPTOR_ETHDR0] = { OVL_ADAPTOR_TYPE_ETHDR, 0 }, + [OVL_ADAPTOR_ETHDR0] = { OVL_ADAPTOR_TYPE_ETHDR, DDP_COMPONENT_ETHDR_MIXER, 0, ðdr }, + [OVL_ADAPTOR_MDP_RDMA0] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA0, 0, &rdma }, + [OVL_ADAPTOR_MDP_RDMA1] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA1, 1, &rdma }, + [OVL_ADAPTOR_MDP_RDMA2] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA2, 2, &rdma }, + [OVL_ADAPTOR_MDP_RDMA3] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA3, 3, &rdma }, + [OVL_ADAPTOR_MDP_RDMA4] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA4, 4, &rdma }, + [OVL_ADAPTOR_MDP_RDMA5] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA5, 5, &rdma }, + [OVL_ADAPTOR_MDP_RDMA6] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA6, 6, &rdma }, + [OVL_ADAPTOR_MDP_RDMA7] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA7, 7, &rdma }, + [OVL_ADAPTOR_MERGE0] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE1, 1, &merge }, + [OVL_ADAPTOR_MERGE1] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE2, 2, &merge }, + [OVL_ADAPTOR_MERGE2] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE3, 3, &merge }, + [OVL_ADAPTOR_MERGE3] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE4, 4, &merge }, }; void mtk_ovl_adaptor_layer_config(struct device *dev, unsigned int idx, @@ -172,68 +193,112 @@ void mtk_ovl_adaptor_config(struct device *dev, unsigned int w, void mtk_ovl_adaptor_start(struct device *dev) { + int i; struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev); - mtk_ethdr_start(ovl_adaptor->ovl_adaptor_comp[OVL_ADAPTOR_ETHDR0]); + for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) { + if (!ovl_adaptor->ovl_adaptor_comp[i] || + !comp_matches[i].funcs->start) + continue; + + comp_matches[i].funcs->start(ovl_adaptor->ovl_adaptor_comp[i]); + } } void mtk_ovl_adaptor_stop(struct device *dev) { + int i; struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev); - mtk_ethdr_stop(ovl_adaptor->ovl_adaptor_comp[OVL_ADAPTOR_ETHDR0]); + for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) { + if (!ovl_adaptor->ovl_adaptor_comp[i] || + !comp_matches[i].funcs->stop) + continue; + + comp_matches[i].funcs->stop(ovl_adaptor->ovl_adaptor_comp[i]); + } } -int mtk_ovl_adaptor_clk_enable(struct device *dev) +/** + * power_off - Power off the devices in OVL adaptor + * @dev: Device to be powered off + * @num: Number of the devices to be powered off + * + * Calls the .power_off() ovl_adaptor component callback if it is present. + */ +static inline void power_off(struct device *dev, int num) { struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev); - struct device *comp; - int ret; int i; - for (i = 0; i < OVL_ADAPTOR_MERGE0; i++) { - comp = ovl_adaptor->ovl_adaptor_comp[i]; - ret = pm_runtime_get_sync(comp); + if (num > OVL_ADAPTOR_ID_MAX) + num = OVL_ADAPTOR_ID_MAX; + + for (i = num - 1; i >= 0; i--) { + if (!ovl_adaptor->ovl_adaptor_comp[i] || + !comp_matches[i].funcs->power_off) + continue; + + comp_matches[i].funcs->power_off(ovl_adaptor->ovl_adaptor_comp[i]); + } +} + +/** + * mtk_ovl_adaptor_power_on - Power on the devices in OVL adaptor + * @dev: Device to be powered on + * + * Different from OVL, OVL adaptor is a pseudo device so + * we didn't define it in the device tree, pm_runtime_resume_and_get() + * called by .atomic_enable() power on no device in OVL adaptor, + * we have to implement a function to do the job instead. + * + * Return: Zero for success or negative number for failure. + */ +int mtk_ovl_adaptor_power_on(struct device *dev) +{ + struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev); + int i, ret; + + for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) { + if (!ovl_adaptor->ovl_adaptor_comp[i] || + !comp_matches[i].funcs->power_on) + continue; + + ret = comp_matches[i].funcs->power_on(ovl_adaptor->ovl_adaptor_comp[i]); if (ret < 0) { dev_err(dev, "Failed to enable power domain %d, err %d\n", i, ret); - goto pwr_err; + power_off(dev, i); + return ret; } } + return 0; +} + +void mtk_ovl_adaptor_power_off(struct device *dev) +{ + power_off(dev, OVL_ADAPTOR_ID_MAX); +} + +int mtk_ovl_adaptor_clk_enable(struct device *dev) +{ + struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev); + struct device *comp; + int ret; + int i; for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) { comp = ovl_adaptor->ovl_adaptor_comp[i]; - - if (i < OVL_ADAPTOR_MERGE0) - ret = mtk_mdp_rdma_clk_enable(comp); - else if (i < OVL_ADAPTOR_ETHDR0) - ret = mtk_merge_clk_enable(comp); - else - ret = mtk_ethdr_clk_enable(comp); + if (!comp || !comp_matches[i].funcs->clk_enable) + continue; + ret = comp_matches[i].funcs->clk_enable(comp); if (ret) { dev_err(dev, "Failed to enable clock %d, err %d\n", i, ret); - goto clk_err; + while (--i >= 0) + comp_matches[i].funcs->clk_disable(comp); + return ret; } } - - return ret; - -clk_err: - while (--i >= 0) { - comp = ovl_adaptor->ovl_adaptor_comp[i]; - if (i < OVL_ADAPTOR_MERGE0) - mtk_mdp_rdma_clk_disable(comp); - else if (i < OVL_ADAPTOR_ETHDR0) - mtk_merge_clk_disable(comp); - else - mtk_ethdr_clk_disable(comp); - } - i = OVL_ADAPTOR_MERGE0; - -pwr_err: - while (--i >= 0) - pm_runtime_put(ovl_adaptor->ovl_adaptor_comp[i]); - - return ret; + return 0; } void mtk_ovl_adaptor_clk_disable(struct device *dev) @@ -244,15 +309,11 @@ void mtk_ovl_adaptor_clk_disable(struct device *dev) for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) { comp = ovl_adaptor->ovl_adaptor_comp[i]; - - if (i < OVL_ADAPTOR_MERGE0) { - mtk_mdp_rdma_clk_disable(comp); + if (!comp || !comp_matches[i].funcs->clk_disable) + continue; + comp_matches[i].funcs->clk_disable(comp); + if (i < OVL_ADAPTOR_MERGE0) pm_runtime_put(comp); - } else if (i < OVL_ADAPTOR_ETHDR0) { - mtk_merge_clk_disable(comp); - } else { - mtk_ethdr_clk_disable(comp); - } } } @@ -314,40 +375,31 @@ size_t mtk_ovl_adaptor_get_num_formats(struct device *dev) void mtk_ovl_adaptor_add_comp(struct device *dev, struct mtk_mutex *mutex) { - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA0); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA1); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA2); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA3); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA4); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA5); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA6); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA7); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MERGE1); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MERGE2); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MERGE3); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MERGE4); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_ETHDR_MIXER); + int i; + struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev); + + for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) { + if (!ovl_adaptor->ovl_adaptor_comp[i]) + continue; + mtk_mutex_add_comp(mutex, comp_matches[i].comp_id); + } } void mtk_ovl_adaptor_remove_comp(struct device *dev, struct mtk_mutex *mutex) { - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA0); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA1); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA2); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA3); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA4); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA5); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA6); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA7); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MERGE1); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MERGE2); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MERGE3); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MERGE4); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_ETHDR_MIXER); + int i; + struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev); + + for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) { + if (!ovl_adaptor->ovl_adaptor_comp[i]) + continue; + mtk_mutex_remove_comp(mutex, comp_matches[i].comp_id); + } } void mtk_ovl_adaptor_connect(struct device *dev, struct device *mmsys_dev, unsigned int next) { + mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_ETHDR_MIXER, next); mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MDP_RDMA0, DDP_COMPONENT_MERGE1); mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MDP_RDMA1, DDP_COMPONENT_MERGE1); mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MDP_RDMA2, DDP_COMPONENT_MERGE2); @@ -355,11 +407,11 @@ void mtk_ovl_adaptor_connect(struct device *dev, struct device *mmsys_dev, unsig mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MERGE2, DDP_COMPONENT_ETHDR_MIXER); mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MERGE3, DDP_COMPONENT_ETHDR_MIXER); mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MERGE4, DDP_COMPONENT_ETHDR_MIXER); - mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_ETHDR_MIXER, next); } void mtk_ovl_adaptor_disconnect(struct device *dev, struct device *mmsys_dev, unsigned int next) { + mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_ETHDR_MIXER, next); mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MDP_RDMA0, DDP_COMPONENT_MERGE1); mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MDP_RDMA1, DDP_COMPONENT_MERGE1); mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MDP_RDMA2, DDP_COMPONENT_MERGE2); @@ -367,7 +419,6 @@ void mtk_ovl_adaptor_disconnect(struct device *dev, struct device *mmsys_dev, un mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MERGE2, DDP_COMPONENT_ETHDR_MIXER); mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MERGE3, DDP_COMPONENT_ETHDR_MIXER); mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MERGE4, DDP_COMPONENT_ETHDR_MIXER); - mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_ETHDR_MIXER, next); } static int ovl_adaptor_comp_get_id(struct device *dev, struct device_node *node, @@ -386,17 +437,10 @@ static int ovl_adaptor_comp_get_id(struct device *dev, struct device_node *node, } static const struct of_device_id mtk_ovl_adaptor_comp_dt_ids[] = { - { - .compatible = "mediatek,mt8195-vdo1-rdma", - .data = (void *)OVL_ADAPTOR_TYPE_RDMA, - }, { - .compatible = "mediatek,mt8195-disp-merge", - .data = (void *)OVL_ADAPTOR_TYPE_MERGE, - }, { - .compatible = "mediatek,mt8195-disp-ethdr", - .data = (void *)OVL_ADAPTOR_TYPE_ETHDR, - }, - {}, + { .compatible = "mediatek,mt8195-disp-ethdr", .data = (void *)OVL_ADAPTOR_TYPE_ETHDR }, + { .compatible = "mediatek,mt8195-disp-merge", .data = (void *)OVL_ADAPTOR_TYPE_MERGE }, + { .compatible = "mediatek,mt8195-vdo1-rdma", .data = (void *)OVL_ADAPTOR_TYPE_MDP_RDMA }, + { /* sentinel */ } }; static int compare_of(struct device *dev, void *data) @@ -531,16 +575,15 @@ static int mtk_disp_ovl_adaptor_probe(struct platform_device *pdev) return ret; } -static int mtk_disp_ovl_adaptor_remove(struct platform_device *pdev) +static void mtk_disp_ovl_adaptor_remove(struct platform_device *pdev) { component_master_del(&pdev->dev, &mtk_disp_ovl_adaptor_master_ops); pm_runtime_disable(&pdev->dev); - return 0; } struct platform_driver mtk_disp_ovl_adaptor_driver = { .probe = mtk_disp_ovl_adaptor_probe, - .remove = mtk_disp_ovl_adaptor_remove, + .remove_new = mtk_disp_ovl_adaptor_remove, .driver = { .name = "mediatek-disp-ovl-adaptor", .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/mediatek/mtk_dp.c b/drivers/gpu/drm/mediatek/mtk_dp.c index e4c16ba9902d..2136a596efa1 100644 --- a/drivers/gpu/drm/mediatek/mtk_dp.c +++ b/drivers/gpu/drm/mediatek/mtk_dp.c @@ -2818,3 +2818,4 @@ MODULE_AUTHOR("Markus Schneider-Pargmann <msp@baylibre.com>"); MODULE_AUTHOR("Bo-Chen Chen <rex-bc.chen@mediatek.com>"); MODULE_DESCRIPTION("MediaTek DisplayPort Driver"); MODULE_LICENSE("GPL"); +MODULE_SOFTDEP("pre: phy_mtk_dp"); diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index 4e3d9f7b4d8c..beb7d9d08e97 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -966,20 +966,6 @@ static const struct mtk_dpi_conf mt8186_conf = { .csc_enable_bit = CSC_ENABLE, }; -static const struct mtk_dpi_conf mt8188_dpintf_conf = { - .cal_factor = mt8195_dpintf_calculate_factor, - .max_clock_khz = 600000, - .output_fmts = mt8195_output_fmts, - .num_output_fmts = ARRAY_SIZE(mt8195_output_fmts), - .pixels_per_iter = 4, - .input_2pixel = false, - .dimension_mask = DPINTF_HPW_MASK, - .hvsize_mask = DPINTF_HSIZE_MASK, - .channel_swap_shift = DPINTF_CH_SWAP, - .yuv422_en_bit = DPINTF_YUV422_EN, - .csc_enable_bit = DPINTF_CSC_ENABLE, -}; - static const struct mtk_dpi_conf mt8192_conf = { .cal_factor = mt8183_calculate_factor, .reg_h_fre_con = 0xe0, @@ -1103,7 +1089,7 @@ static const struct of_device_id mtk_dpi_of_ids[] = { { .compatible = "mediatek,mt8173-dpi", .data = &mt8173_conf }, { .compatible = "mediatek,mt8183-dpi", .data = &mt8183_conf }, { .compatible = "mediatek,mt8186-dpi", .data = &mt8186_conf }, - { .compatible = "mediatek,mt8188-dp-intf", .data = &mt8188_dpintf_conf }, + { .compatible = "mediatek,mt8188-dp-intf", .data = &mt8195_dpintf_conf }, { .compatible = "mediatek,mt8192-dpi", .data = &mt8192_conf }, { .compatible = "mediatek,mt8195-dp-intf", .data = &mt8195_dpintf_conf }, { /* sentinel */ }, diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index db43f9dff912..c729af3b9822 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -721,7 +721,7 @@ static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc, DRM_DEBUG_DRIVER("%s %d\n", __func__, crtc->base.id); - ret = pm_runtime_resume_and_get(comp->dev); + ret = mtk_ddp_comp_power_on(comp); if (ret < 0) { DRM_DEV_ERROR(comp->dev, "Failed to enable power domain: %d\n", ret); return; @@ -731,7 +731,7 @@ static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc, ret = mtk_crtc_ddp_hw_init(mtk_crtc); if (ret) { - pm_runtime_put(comp->dev); + mtk_ddp_comp_power_off(comp); return; } @@ -744,7 +744,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc, { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0]; - int i, ret; + int i; DRM_DEBUG_DRIVER("%s %d\n", __func__, crtc->base.id); if (!mtk_crtc->enabled) @@ -774,9 +774,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc, drm_crtc_vblank_off(crtc); mtk_crtc_ddp_hw_fini(mtk_crtc); - ret = pm_runtime_put(comp->dev); - if (ret < 0) - DRM_DEV_ERROR(comp->dev, "Failed to disable power domain: %d\n", ret); + mtk_ddp_comp_power_off(comp); mtk_crtc->enabled = false; } diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c index 3046c0409353..a9b5a21cde2d 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c @@ -398,6 +398,8 @@ static const struct mtk_ddp_comp_funcs ddp_ufoe = { }; static const struct mtk_ddp_comp_funcs ddp_ovl_adaptor = { + .power_on = mtk_ovl_adaptor_power_on, + .power_off = mtk_ovl_adaptor_power_off, .clk_enable = mtk_ovl_adaptor_clk_enable, .clk_disable = mtk_ovl_adaptor_clk_disable, .config = mtk_ovl_adaptor_config, diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h index 4bae55bdb034..15b2eafff438 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h @@ -7,6 +7,7 @@ #define MTK_DRM_DDP_COMP_H #include <linux/io.h> +#include <linux/pm_runtime.h> #include <linux/soc/mediatek/mtk-cmdq.h> #include <linux/soc/mediatek/mtk-mmsys.h> #include <linux/soc/mediatek/mtk-mutex.h> @@ -46,6 +47,8 @@ enum mtk_ddp_comp_type { struct mtk_ddp_comp; struct cmdq_pkt; struct mtk_ddp_comp_funcs { + int (*power_on)(struct device *dev); + void (*power_off)(struct device *dev); int (*clk_enable)(struct device *dev); void (*clk_disable)(struct device *dev); void (*config)(struct device *dev, unsigned int w, @@ -92,6 +95,23 @@ struct mtk_ddp_comp { const struct mtk_ddp_comp_funcs *funcs; }; +static inline int mtk_ddp_comp_power_on(struct mtk_ddp_comp *comp) +{ + if (comp->funcs && comp->funcs->power_on) + return comp->funcs->power_on(comp->dev); + else + return pm_runtime_resume_and_get(comp->dev); + return 0; +} + +static inline void mtk_ddp_comp_power_off(struct mtk_ddp_comp *comp) +{ + if (comp->funcs && comp->funcs->power_off) + comp->funcs->power_off(comp->dev); + else + pm_runtime_put(comp->dev); +} + static inline int mtk_ddp_comp_clk_enable(struct mtk_ddp_comp *comp) { if (comp->funcs && comp->funcs->clk_enable) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 2b0c35cacbc6..14a1e0157cc4 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -5,7 +5,6 @@ */ #include <linux/component.h> -#include <linux/iommu.h> #include <linux/module.h> #include <linux/of.h> #include <linux/of_platform.h> @@ -611,9 +610,6 @@ static int mtk_drm_bind(struct device *dev) struct drm_device *drm; int ret, i; - if (!iommu_present(&platform_bus_type)) - return -EPROBE_DEFER; - pdev = of_find_device_by_node(private->mutex_node); if (!pdev) { dev_err(dev, "Waiting for disp-mutex device %pOF\n", @@ -1003,6 +999,7 @@ static struct platform_driver * const mtk_drm_drivers[] = { &mtk_dsi_driver, &mtk_ethdr_driver, &mtk_mdp_rdma_driver, + &mtk_padding_driver, }; static int __init mtk_drm_init(void) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.h b/drivers/gpu/drm/mediatek/mtk_drm_drv.h index 6f98fff4f1a4..33fadb08dc1c 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.h @@ -77,5 +77,5 @@ extern struct platform_driver mtk_dpi_driver; extern struct platform_driver mtk_dsi_driver; extern struct platform_driver mtk_ethdr_driver; extern struct platform_driver mtk_mdp_rdma_driver; - +extern struct platform_driver mtk_padding_driver; #endif /* MTK_DRM_DRV_H */ diff --git a/drivers/gpu/drm/mediatek/mtk_ethdr.c b/drivers/gpu/drm/mediatek/mtk_ethdr.c index db7ac666ec5e..6a5d0c345aab 100644 --- a/drivers/gpu/drm/mediatek/mtk_ethdr.c +++ b/drivers/gpu/drm/mediatek/mtk_ethdr.c @@ -346,10 +346,9 @@ static int mtk_ethdr_probe(struct platform_device *pdev) return ret; } -static int mtk_ethdr_remove(struct platform_device *pdev) +static void mtk_ethdr_remove(struct platform_device *pdev) { component_del(&pdev->dev, &mtk_ethdr_component_ops); - return 0; } static const struct of_device_id mtk_ethdr_driver_dt_match[] = { @@ -361,7 +360,7 @@ MODULE_DEVICE_TABLE(of, mtk_ethdr_driver_dt_match); struct platform_driver mtk_ethdr_driver = { .probe = mtk_ethdr_probe, - .remove = mtk_ethdr_remove, + .remove_new = mtk_ethdr_remove, .driver = { .name = "mediatek-disp-ethdr", .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c b/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c index c3adaeefd551..ee9ce9b6d078 100644 --- a/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c +++ b/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c @@ -242,12 +242,27 @@ size_t mtk_mdp_rdma_get_num_formats(struct device *dev) return ARRAY_SIZE(formats); } +int mtk_mdp_rdma_power_on(struct device *dev) +{ + int ret = pm_runtime_resume_and_get(dev); + + if (ret < 0) { + dev_err(dev, "Failed to power on: %d\n", ret); + return ret; + } + return 0; +} + +void mtk_mdp_rdma_power_off(struct device *dev) +{ + pm_runtime_put(dev); +} + int mtk_mdp_rdma_clk_enable(struct device *dev) { struct mtk_mdp_rdma *rdma = dev_get_drvdata(dev); - clk_prepare_enable(rdma->clk); - return 0; + return clk_prepare_enable(rdma->clk); } void mtk_mdp_rdma_clk_disable(struct device *dev) diff --git a/drivers/gpu/drm/mediatek/mtk_padding.c b/drivers/gpu/drm/mediatek/mtk_padding.c new file mode 100644 index 000000000000..0d6451c149b6 --- /dev/null +++ b/drivers/gpu/drm/mediatek/mtk_padding.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023 MediaTek Inc. + */ + +#include <linux/clk.h> +#include <linux/component.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/soc/mediatek/mtk-cmdq.h> + +#include "mtk_disp_drv.h" +#include "mtk_drm_crtc.h" +#include "mtk_drm_ddp_comp.h" + +#define PADDING_CONTROL_REG 0x00 +#define PADDING_BYPASS BIT(0) +#define PADDING_ENABLE BIT(1) +#define PADDING_PIC_SIZE_REG 0x04 +#define PADDING_H_REG 0x08 /* horizontal */ +#define PADDING_V_REG 0x0c /* vertical */ +#define PADDING_COLOR_REG 0x10 + +/** + * struct mtk_padding - Basic information of the Padding + * @clk: Clock of the module + * @reg: Virtual address of the Padding for CPU to access + * @cmdq_reg: CMDQ setting of the Padding + * + * Every Padding should have different clock source, register base, and + * CMDQ settings, we stored these differences all together. + */ +struct mtk_padding { + struct clk *clk; + void __iomem *reg; + struct cmdq_client_reg cmdq_reg; +}; + +int mtk_padding_clk_enable(struct device *dev) +{ + struct mtk_padding *padding = dev_get_drvdata(dev); + + return clk_prepare_enable(padding->clk); +} + +void mtk_padding_clk_disable(struct device *dev) +{ + struct mtk_padding *padding = dev_get_drvdata(dev); + + clk_disable_unprepare(padding->clk); +} + +void mtk_padding_start(struct device *dev) +{ + struct mtk_padding *padding = dev_get_drvdata(dev); + + writel(PADDING_ENABLE | PADDING_BYPASS, + padding->reg + PADDING_CONTROL_REG); + + /* + * Notice that even the padding is in bypass mode, + * all the settings must be cleared to 0 or + * undefined behaviors could happen + */ + writel(0, padding->reg + PADDING_PIC_SIZE_REG); + writel(0, padding->reg + PADDING_H_REG); + writel(0, padding->reg + PADDING_V_REG); + writel(0, padding->reg + PADDING_COLOR_REG); +} + +void mtk_padding_stop(struct device *dev) +{ + struct mtk_padding *padding = dev_get_drvdata(dev); + + writel(0, padding->reg + PADDING_CONTROL_REG); +} + +static int mtk_padding_bind(struct device *dev, struct device *master, void *data) +{ + return 0; +} + +static void mtk_padding_unbind(struct device *dev, struct device *master, void *data) +{ +} + +static const struct component_ops mtk_padding_component_ops = { + .bind = mtk_padding_bind, + .unbind = mtk_padding_unbind, +}; + +static int mtk_padding_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct mtk_padding *priv; + struct resource *res; + int ret; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->clk = devm_clk_get(dev, NULL); + if (IS_ERR(priv->clk)) { + dev_err(dev, "failed to get clk\n"); + return PTR_ERR(priv->clk); + } + + priv->reg = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(priv->reg)) { + dev_err(dev, "failed to do ioremap\n"); + return PTR_ERR(priv->reg); + } + +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + ret = cmdq_dev_get_client_reg(dev, &priv->cmdq_reg, 0); + if (ret) { + dev_err(dev, "failed to get gce client reg\n"); + return ret; + } +#endif + + platform_set_drvdata(pdev, priv); + + ret = devm_pm_runtime_enable(dev); + if (ret) + return ret; + + ret = component_add(dev, &mtk_padding_component_ops); + if (ret) { + pm_runtime_disable(dev); + return dev_err_probe(dev, ret, "failed to add component\n"); + } + + return 0; +} + +static int mtk_padding_remove(struct platform_device *pdev) +{ + component_del(&pdev->dev, &mtk_padding_component_ops); + return 0; +} + +static const struct of_device_id mtk_padding_driver_dt_match[] = { + { .compatible = "mediatek,mt8188-disp-padding" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, mtk_padding_driver_dt_match); + +struct platform_driver mtk_padding_driver = { + .probe = mtk_padding_probe, + .remove = mtk_padding_remove, + .driver = { + .name = "mediatek-disp-padding", + .owner = THIS_MODULE, + .of_match_table = mtk_padding_driver_dt_match, + }, +}; diff --git a/drivers/gpu/drm/meson/meson_dw_mipi_dsi.c b/drivers/gpu/drm/meson/meson_dw_mipi_dsi.c index e5fe4e994f43..a6bc1bdb3d0d 100644 --- a/drivers/gpu/drm/meson/meson_dw_mipi_dsi.c +++ b/drivers/gpu/drm/meson/meson_dw_mipi_dsi.c @@ -323,13 +323,11 @@ static int meson_dw_mipi_dsi_probe(struct platform_device *pdev) return 0; } -static int meson_dw_mipi_dsi_remove(struct platform_device *pdev) +static void meson_dw_mipi_dsi_remove(struct platform_device *pdev) { struct meson_dw_mipi_dsi *mipi_dsi = platform_get_drvdata(pdev); dw_mipi_dsi_remove(mipi_dsi->dmd); - - return 0; } static const struct of_device_id meson_dw_mipi_dsi_of_table[] = { @@ -340,7 +338,7 @@ MODULE_DEVICE_TABLE(of, meson_dw_mipi_dsi_of_table); static struct platform_driver meson_dw_mipi_dsi_platform_driver = { .probe = meson_dw_mipi_dsi_probe, - .remove = meson_dw_mipi_dsi_remove, + .remove_new = meson_dw_mipi_dsi_remove, .driver = { .name = DRIVER_NAME, .of_match_table = meson_dw_mipi_dsi_of_table, diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index 6309a857ca31..f202f26adab2 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -6,6 +6,7 @@ config DRM_MSM depends on ARCH_QCOM || SOC_IMX5 || COMPILE_TEST depends on COMMON_CLK depends on IOMMU_SUPPORT + depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n depends on QCOM_OCMEM || QCOM_OCMEM=n depends on QCOM_LLCC || QCOM_LLCC=n depends on QCOM_COMMAND_DB || QCOM_COMMAND_DB=n @@ -16,6 +17,7 @@ config DRM_MSM select DRM_DP_AUX_BUS select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HELPER + select DRM_EXEC select DRM_KMS_HELPER select DRM_PANEL select DRM_BRIDGE diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 49671364fdcf..b1173128b5b9 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -63,6 +63,7 @@ msm-$(CONFIG_DRM_MSM_DPU) += \ disp/dpu1/dpu_encoder_phys_wb.o \ disp/dpu1/dpu_formats.o \ disp/dpu1/dpu_hw_catalog.o \ + disp/dpu1/dpu_hw_cdm.o \ disp/dpu1/dpu_hw_ctl.o \ disp/dpu1/dpu_hw_dsc.o \ disp/dpu1/dpu_hw_dsc_1_2.o \ diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index e5916c106796..c003f970189b 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -684,7 +684,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); - u32 regbit; + u32 hbb; int ret; gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); @@ -820,18 +820,15 @@ static int a5xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F); - /* Set the highest bank bit */ - if (adreno_is_a540(adreno_gpu) || adreno_is_a530(adreno_gpu)) - regbit = 2; - else - regbit = 1; + BUG_ON(adreno_gpu->ubwc_config.highest_bank_bit < 13); + hbb = adreno_gpu->ubwc_config.highest_bank_bit - 13; - gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, regbit << 7); - gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, regbit << 1); + gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, hbb << 7); + gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, hbb << 1); if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) || adreno_is_a540(adreno_gpu)) - gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, regbit); + gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, hbb); /* Disable All flat shading optimization (ALLFLATOPTDIS) */ gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, (1 << 10)); @@ -1785,5 +1782,11 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) /* Set up the preemption specific bits and pieces for each ringbuffer */ a5xx_preempt_init(gpu); + /* Set the highest bank bit */ + if (adreno_is_a540(adreno_gpu) || adreno_is_a530(adreno_gpu)) + adreno_gpu->ubwc_config.highest_bank_bit = 15; + else + adreno_gpu->ubwc_config.highest_bank_bit = 14; + return gpu; } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 7a0220d29a23..c0bc924cd302 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1270,87 +1270,92 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu) gpu_write(gpu, REG_A6XX_CP_PROTECT(count_max - 1), regs[i]); } -static void a6xx_set_ubwc_config(struct msm_gpu *gpu) +static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); /* Unknown, introduced with A650 family, related to UBWC mode/ver 4 */ - u32 rgb565_predicator = 0; + gpu->ubwc_config.rgb565_predicator = 0; /* Unknown, introduced with A650 family */ - u32 uavflagprd_inv = 0; + gpu->ubwc_config.uavflagprd_inv = 0; /* Whether the minimum access length is 64 bits */ - u32 min_acc_len = 0; + gpu->ubwc_config.min_acc_len = 0; /* Entirely magic, per-GPU-gen value */ - u32 ubwc_mode = 0; + gpu->ubwc_config.ubwc_mode = 0; /* * The Highest Bank Bit value represents the bit of the highest DDR bank. - * We then subtract 13 from it (13 is the minimum value allowed by hw) and - * write the lowest two bits of the remaining value as hbb_lo and the - * one above it as hbb_hi to the hardware. This should ideally use DRAM - * type detection. + * This should ideally use DRAM type detection. */ - u32 hbb_hi = 0; - u32 hbb_lo = 2; - /* Unknown, introduced with A640/680 */ - u32 amsbc = 0; + gpu->ubwc_config.highest_bank_bit = 15; - if (adreno_is_a610(adreno_gpu)) { - /* HBB = 14 */ - hbb_lo = 1; - min_acc_len = 1; - ubwc_mode = 1; + if (adreno_is_a610(gpu)) { + gpu->ubwc_config.highest_bank_bit = 14; + gpu->ubwc_config.min_acc_len = 1; + gpu->ubwc_config.ubwc_mode = 1; } /* a618 is using the hw default values */ - if (adreno_is_a618(adreno_gpu)) + if (adreno_is_a618(gpu)) return; - if (adreno_is_a619_holi(adreno_gpu)) - hbb_lo = 0; + if (adreno_is_a619_holi(gpu)) + gpu->ubwc_config.highest_bank_bit = 13; - if (adreno_is_a640_family(adreno_gpu)) - amsbc = 1; + if (adreno_is_a640_family(gpu)) + gpu->ubwc_config.amsbc = 1; - if (adreno_is_a650(adreno_gpu) || - adreno_is_a660(adreno_gpu) || - adreno_is_a730(adreno_gpu) || - adreno_is_a740_family(adreno_gpu)) { + if (adreno_is_a650(gpu) || + adreno_is_a660(gpu) || + adreno_is_a690(gpu) || + adreno_is_a730(gpu) || + adreno_is_a740_family(gpu)) { /* TODO: get ddr type from bootloader and use 2 for LPDDR4 */ - hbb_lo = 3; - amsbc = 1; - rgb565_predicator = 1; - uavflagprd_inv = 2; + gpu->ubwc_config.highest_bank_bit = 16; + gpu->ubwc_config.amsbc = 1; + gpu->ubwc_config.rgb565_predicator = 1; + gpu->ubwc_config.uavflagprd_inv = 2; } - if (adreno_is_a690(adreno_gpu)) { - hbb_lo = 2; - amsbc = 1; - rgb565_predicator = 1; - uavflagprd_inv = 2; + if (adreno_is_7c3(gpu)) { + gpu->ubwc_config.highest_bank_bit = 14; + gpu->ubwc_config.amsbc = 1; + gpu->ubwc_config.rgb565_predicator = 1; + gpu->ubwc_config.uavflagprd_inv = 2; } +} - if (adreno_is_7c3(adreno_gpu)) { - hbb_lo = 1; - amsbc = 1; - rgb565_predicator = 1; - uavflagprd_inv = 2; - } +static void a6xx_set_ubwc_config(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + /* + * We subtract 13 from the highest bank bit (13 is the minimum value + * allowed by hw) and write the lowest two bits of the remaining value + * as hbb_lo and the one above it as hbb_hi to the hardware. + */ + BUG_ON(adreno_gpu->ubwc_config.highest_bank_bit < 13); + u32 hbb = adreno_gpu->ubwc_config.highest_bank_bit - 13; + u32 hbb_hi = hbb >> 2; + u32 hbb_lo = hbb & 3; gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, - rgb565_predicator << 11 | hbb_hi << 10 | amsbc << 4 | - min_acc_len << 3 | hbb_lo << 1 | ubwc_mode); + adreno_gpu->ubwc_config.rgb565_predicator << 11 | + hbb_hi << 10 | adreno_gpu->ubwc_config.amsbc << 4 | + adreno_gpu->ubwc_config.min_acc_len << 3 | + hbb_lo << 1 | adreno_gpu->ubwc_config.ubwc_mode); gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, hbb_hi << 4 | - min_acc_len << 3 | hbb_lo << 1 | ubwc_mode); + adreno_gpu->ubwc_config.min_acc_len << 3 | + hbb_lo << 1 | adreno_gpu->ubwc_config.ubwc_mode); gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, hbb_hi << 10 | - uavflagprd_inv << 4 | min_acc_len << 3 | - hbb_lo << 1 | ubwc_mode); + adreno_gpu->ubwc_config.uavflagprd_inv << 4 | + adreno_gpu->ubwc_config.min_acc_len << 3 | + hbb_lo << 1 | adreno_gpu->ubwc_config.ubwc_mode); if (adreno_is_a7xx(adreno_gpu)) gpu_write(gpu, REG_A7XX_GRAS_NC_MODE_CNTL, FIELD_PREP(GENMASK(8, 5), hbb_lo)); - gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, min_acc_len << 23 | hbb_lo << 21); + gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, + adreno_gpu->ubwc_config.min_acc_len << 23 | hbb_lo << 21); } static int a6xx_cp_init(struct msm_gpu *gpu) @@ -1741,7 +1746,9 @@ static int hw_init(struct msm_gpu *gpu) /* Setting the primFifo thresholds default values, * and vccCacheSkipDis=1 bit (0x200) for A640 and newer */ - if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu) || adreno_is_a690(adreno_gpu)) + if (adreno_is_a690(adreno_gpu)) + gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00800200); + else if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200); else if (adreno_is_a640_family(adreno_gpu) || adreno_is_7c3(adreno_gpu)) gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00200200); @@ -1775,6 +1782,8 @@ static int hw_init(struct msm_gpu *gpu) if (adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)) gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0xcfffff); + else if (adreno_is_a690(adreno_gpu)) + gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x4fffff); else if (adreno_is_a619(adreno_gpu)) gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3fffff); else if (adreno_is_a610(adreno_gpu)) @@ -1782,7 +1791,7 @@ static int hw_init(struct msm_gpu *gpu) else gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x1fffff); - gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, 1); + gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, BIT(7) | 0x1); /* Set weights for bicubic filtering */ if (adreno_is_a650_family(adreno_gpu)) { @@ -1808,12 +1817,17 @@ static int hw_init(struct msm_gpu *gpu) a6xx_set_cp_protect(gpu); if (adreno_is_a660_family(adreno_gpu)) { - gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1); + if (adreno_is_a690(adreno_gpu)) + gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x00028801); + else + gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1); gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0); } + if (adreno_is_a690(adreno_gpu)) + gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x90); /* Set dualQ + disable afull for A660 GPU */ - if (adreno_is_a660(adreno_gpu)) + else if (adreno_is_a660(adreno_gpu)) gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906); else if (adreno_is_a7xx(adreno_gpu)) gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, @@ -2908,5 +2922,7 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a6xx_fault_handler); + a6xx_calc_ubwc_config(adreno_gpu); + return gpu; } diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 41b13dec9bef..2ce7d7b1690d 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -464,7 +464,7 @@ static const struct adreno_info gpulist[] = { { 190, 1 }, ), }, { - .chip_ids = ADRENO_CHIP_IDS(0x06080000), + .chip_ids = ADRENO_CHIP_IDS(0x06080001), .family = ADRENO_6XX_GEN2, .revn = 680, .fw = { @@ -841,7 +841,8 @@ static void suspend_scheduler(struct msm_gpu *gpu) */ for (i = 0; i < gpu->nr_rings; i++) { struct drm_gpu_scheduler *sched = &gpu->rb[i]->sched; - kthread_park(sched->thread); + + drm_sched_wqueue_stop(sched); } } @@ -851,7 +852,8 @@ static void resume_scheduler(struct msm_gpu *gpu) for (i = 0; i < gpu->nr_rings; i++) { struct drm_gpu_scheduler *sched = &gpu->rb[i]->sched; - kthread_unpark(sched->thread); + + drm_sched_wqueue_start(sched); } } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 3fe9fd240cc7..074fb498706f 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -373,6 +373,9 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx, return -EINVAL; *value = ctx->aspace->va_size; return 0; + case MSM_PARAM_HIGHEST_BANK_BIT: + *value = adreno_gpu->ubwc_config.highest_bank_bit; + return 0; default: DBG("%s: invalid param: %u", gpu->name, param); return -EINVAL; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 80b3f6312116..bc14df96feb0 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -165,6 +165,15 @@ struct adreno_gpu { /* firmware: */ const struct firmware *fw[ADRENO_FW_MAX]; + struct { + u32 rgb565_predicator; + u32 uavflagprd_inv; + u32 min_acc_len; + u32 ubwc_mode; + u32 highest_bank_bit; + u32 amsbc; + } ubwc_config; + /* * Register offsets are different between some GPUs. * GPU specific offsets will be exported by GPU specific diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h new file mode 100644 index 000000000000..eb5dfff2ec4f --- /dev/null +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h @@ -0,0 +1,457 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022. Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2015-2018, 2020 The Linux Foundation. All rights reserved. + */ + +#ifndef _DPU_10_0_SM8650_H +#define _DPU_10_0_SM8650_H + +static const struct dpu_caps sm8650_dpu_caps = { + .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, + .max_mixer_blendstages = 0xb, + .has_src_split = true, + .has_dim_layer = true, + .has_idle_pc = true, + .has_3d_merge = true, + .max_linewidth = 8192, + .pixel_ram_size = DEFAULT_PIXEL_RAM_SIZE, +}; + +static const struct dpu_mdp_cfg sm8650_mdp = { + .name = "top_0", + .base = 0, .len = 0x494, + .features = BIT(DPU_MDP_PERIPH_0_REMOVED), + .clk_ctrls = { + [DPU_CLK_CTRL_REG_DMA] = { .reg_off = 0x2bc, .bit_off = 20 }, + }, +}; + +/* FIXME: get rid of DPU_CTL_SPLIT_DISPLAY in favour of proper ACTIVE_CTL support */ +static const struct dpu_ctl_cfg sm8650_ctl[] = { + { + .name = "ctl_0", .id = CTL_0, + .base = 0x15000, .len = 0x1000, + .features = CTL_SM8550_MASK | BIT(DPU_CTL_SPLIT_DISPLAY), + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 9), + }, { + .name = "ctl_1", .id = CTL_1, + .base = 0x16000, .len = 0x1000, + .features = CTL_SM8550_MASK | BIT(DPU_CTL_SPLIT_DISPLAY), + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 10), + }, { + .name = "ctl_2", .id = CTL_2, + .base = 0x17000, .len = 0x1000, + .features = CTL_SM8550_MASK, + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 11), + }, { + .name = "ctl_3", .id = CTL_3, + .base = 0x18000, .len = 0x1000, + .features = CTL_SM8550_MASK, + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 12), + }, { + .name = "ctl_4", .id = CTL_4, + .base = 0x19000, .len = 0x1000, + .features = CTL_SM8550_MASK, + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 13), + }, { + .name = "ctl_5", .id = CTL_5, + .base = 0x1a000, .len = 0x1000, + .features = CTL_SM8550_MASK, + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 23), + }, +}; + +static const struct dpu_sspp_cfg sm8650_sspp[] = { + { + .name = "sspp_0", .id = SSPP_VIG0, + .base = 0x4000, .len = 0x344, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_3, + .xin_id = 0, + .type = SSPP_TYPE_VIG, + }, { + .name = "sspp_1", .id = SSPP_VIG1, + .base = 0x6000, .len = 0x344, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_3, + .xin_id = 4, + .type = SSPP_TYPE_VIG, + }, { + .name = "sspp_2", .id = SSPP_VIG2, + .base = 0x8000, .len = 0x344, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_3, + .xin_id = 8, + .type = SSPP_TYPE_VIG, + }, { + .name = "sspp_3", .id = SSPP_VIG3, + .base = 0xa000, .len = 0x344, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_3, + .xin_id = 12, + .type = SSPP_TYPE_VIG, + }, { + .name = "sspp_8", .id = SSPP_DMA0, + .base = 0x24000, .len = 0x344, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 1, + .type = SSPP_TYPE_DMA, + }, { + .name = "sspp_9", .id = SSPP_DMA1, + .base = 0x26000, .len = 0x344, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 5, + .type = SSPP_TYPE_DMA, + }, { + .name = "sspp_10", .id = SSPP_DMA2, + .base = 0x28000, .len = 0x344, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 9, + .type = SSPP_TYPE_DMA, + }, { + .name = "sspp_11", .id = SSPP_DMA3, + .base = 0x2a000, .len = 0x344, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 13, + .type = SSPP_TYPE_DMA, + }, { + .name = "sspp_12", .id = SSPP_DMA4, + .base = 0x2c000, .len = 0x344, + .features = DMA_CURSOR_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 14, + .type = SSPP_TYPE_DMA, + }, { + .name = "sspp_13", .id = SSPP_DMA5, + .base = 0x2e000, .len = 0x344, + .features = DMA_CURSOR_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 15, + .type = SSPP_TYPE_DMA, + }, +}; + +static const struct dpu_lm_cfg sm8650_lm[] = { + { + .name = "lm_0", .id = LM_0, + .base = 0x44000, .len = 0x400, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_1, + .pingpong = PINGPONG_0, + .dspp = DSPP_0, + }, { + .name = "lm_1", .id = LM_1, + .base = 0x45000, .len = 0x400, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_0, + .pingpong = PINGPONG_1, + .dspp = DSPP_1, + }, { + .name = "lm_2", .id = LM_2, + .base = 0x46000, .len = 0x400, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_3, + .pingpong = PINGPONG_2, + }, { + .name = "lm_3", .id = LM_3, + .base = 0x47000, .len = 0x400, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_2, + .pingpong = PINGPONG_3, + }, { + .name = "lm_4", .id = LM_4, + .base = 0x48000, .len = 0x400, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_5, + .pingpong = PINGPONG_4, + }, { + .name = "lm_5", .id = LM_5, + .base = 0x49000, .len = 0x400, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_4, + .pingpong = PINGPONG_5, + }, +}; + +static const struct dpu_dspp_cfg sm8650_dspp[] = { + { + .name = "dspp_0", .id = DSPP_0, + .base = 0x54000, .len = 0x1800, + .features = DSPP_SC7180_MASK, + .sblk = &sdm845_dspp_sblk, + }, { + .name = "dspp_1", .id = DSPP_1, + .base = 0x56000, .len = 0x1800, + .features = DSPP_SC7180_MASK, + .sblk = &sdm845_dspp_sblk, + }, { + .name = "dspp_2", .id = DSPP_2, + .base = 0x58000, .len = 0x1800, + .features = DSPP_SC7180_MASK, + .sblk = &sdm845_dspp_sblk, + }, { + .name = "dspp_3", .id = DSPP_3, + .base = 0x5a000, .len = 0x1800, + .features = DSPP_SC7180_MASK, + .sblk = &sdm845_dspp_sblk, + }, +}; + +static const struct dpu_pingpong_cfg sm8650_pp[] = { + { + .name = "pingpong_0", .id = PINGPONG_0, + .base = 0x69000, .len = 0, + .features = BIT(DPU_PINGPONG_DITHER), + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_0, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8), + }, { + .name = "pingpong_1", .id = PINGPONG_1, + .base = 0x6a000, .len = 0, + .features = BIT(DPU_PINGPONG_DITHER), + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_0, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 9), + }, { + .name = "pingpong_2", .id = PINGPONG_2, + .base = 0x6b000, .len = 0, + .features = BIT(DPU_PINGPONG_DITHER), + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_1, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 10), + }, { + .name = "pingpong_3", .id = PINGPONG_3, + .base = 0x6c000, .len = 0, + .features = BIT(DPU_PINGPONG_DITHER), + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_1, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 11), + }, { + .name = "pingpong_4", .id = PINGPONG_4, + .base = 0x6d000, .len = 0, + .features = BIT(DPU_PINGPONG_DITHER), + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_2, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 30), + }, { + .name = "pingpong_5", .id = PINGPONG_5, + .base = 0x6e000, .len = 0, + .features = BIT(DPU_PINGPONG_DITHER), + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_2, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 31), + }, { + .name = "pingpong_6", .id = PINGPONG_6, + .base = 0x66000, .len = 0, + .features = BIT(DPU_PINGPONG_DITHER), + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_3, + }, { + .name = "pingpong_7", .id = PINGPONG_7, + .base = 0x66400, .len = 0, + .features = BIT(DPU_PINGPONG_DITHER), + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_3, + }, { + .name = "pingpong_8", .id = PINGPONG_8, + .base = 0x7e000, .len = 0, + .features = BIT(DPU_PINGPONG_DITHER), + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_4, + }, { + .name = "pingpong_9", .id = PINGPONG_9, + .base = 0x7e400, .len = 0, + .features = BIT(DPU_PINGPONG_DITHER), + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_4, + }, +}; + +static const struct dpu_merge_3d_cfg sm8650_merge_3d[] = { + { + .name = "merge_3d_0", .id = MERGE_3D_0, + .base = 0x4e000, .len = 0x8, + }, { + .name = "merge_3d_1", .id = MERGE_3D_1, + .base = 0x4f000, .len = 0x8, + }, { + .name = "merge_3d_2", .id = MERGE_3D_2, + .base = 0x50000, .len = 0x8, + }, { + .name = "merge_3d_3", .id = MERGE_3D_3, + .base = 0x66700, .len = 0x8, + }, { + .name = "merge_3d_4", .id = MERGE_3D_4, + .base = 0x7e700, .len = 0x8, + }, +}; + +/* + * NOTE: Each display compression engine (DCE) contains dual hard + * slice DSC encoders so both share same base address but with + * its own different sub block address. + */ +static const struct dpu_dsc_cfg sm8650_dsc[] = { + { + .name = "dce_0_0", .id = DSC_0, + .base = 0x80000, .len = 0x6, + .features = BIT(DPU_DSC_HW_REV_1_2) | BIT(DPU_DSC_NATIVE_42x_EN), + .sblk = &dsc_sblk_0, + }, { + .name = "dce_0_1", .id = DSC_1, + .base = 0x80000, .len = 0x6, + .features = BIT(DPU_DSC_HW_REV_1_2) | BIT(DPU_DSC_NATIVE_42x_EN), + .sblk = &dsc_sblk_1, + }, { + .name = "dce_1_0", .id = DSC_2, + .base = 0x81000, .len = 0x6, + .features = BIT(DPU_DSC_HW_REV_1_2) | BIT(DPU_DSC_NATIVE_42x_EN), + .sblk = &dsc_sblk_0, + }, { + .name = "dce_1_1", .id = DSC_3, + .base = 0x81000, .len = 0x6, + .features = BIT(DPU_DSC_HW_REV_1_2) | BIT(DPU_DSC_NATIVE_42x_EN), + .sblk = &dsc_sblk_1, + }, { + .name = "dce_2_0", .id = DSC_4, + .base = 0x82000, .len = 0x6, + .features = BIT(DPU_DSC_HW_REV_1_2), + .sblk = &dsc_sblk_0, + }, { + .name = "dce_2_1", .id = DSC_5, + .base = 0x82000, .len = 0x6, + .features = BIT(DPU_DSC_HW_REV_1_2), + .sblk = &dsc_sblk_1, + }, +}; + +static const struct dpu_wb_cfg sm8650_wb[] = { + { + .name = "wb_2", .id = WB_2, + .base = 0x65000, .len = 0x2c8, + .features = WB_SM8250_MASK, + .format_list = wb2_formats_rgb, + .num_formats = ARRAY_SIZE(wb2_formats_rgb), + .xin_id = 6, + .vbif_idx = VBIF_RT, + .maxlinewidth = 4096, + .intr_wb_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 4), + }, +}; + +static const struct dpu_intf_cfg sm8650_intf[] = { + { + .name = "intf_0", .id = INTF_0, + .base = 0x34000, .len = 0x280, + .features = INTF_SC7280_MASK, + .type = INTF_DP, + .controller_id = MSM_DP_CONTROLLER_0, + .prog_fetch_lines_worst_case = 24, + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), + }, { + .name = "intf_1", .id = INTF_1, + .base = 0x35000, .len = 0x300, + .features = INTF_SC7280_MASK, + .type = INTF_DSI, + .controller_id = MSM_DSI_CONTROLLER_0, + .prog_fetch_lines_worst_case = 24, + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), + .intr_tear_rd_ptr = DPU_IRQ_IDX(MDP_INTF1_TEAR_INTR, 2), + }, { + .name = "intf_2", .id = INTF_2, + .base = 0x36000, .len = 0x300, + .features = INTF_SC7280_MASK, + .type = INTF_DSI, + .controller_id = MSM_DSI_CONTROLLER_1, + .prog_fetch_lines_worst_case = 24, + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), + .intr_tear_rd_ptr = DPU_IRQ_IDX(MDP_INTF2_TEAR_INTR, 2), + }, { + .name = "intf_3", .id = INTF_3, + .base = 0x37000, .len = 0x280, + .features = INTF_SC7280_MASK, + .type = INTF_DP, + .controller_id = MSM_DP_CONTROLLER_1, + .prog_fetch_lines_worst_case = 24, + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 30), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 31), + }, +}; + +static const struct dpu_perf_cfg sm8650_perf_data = { + .max_bw_low = 17000000, + .max_bw_high = 27000000, + .min_core_ib = 2500000, + .min_llcc_ib = 0, + .min_dram_ib = 800000, + .min_prefill_lines = 35, + /* FIXME: lut tables */ + .danger_lut_tbl = {0x3ffff, 0x3ffff, 0x0}, + .safe_lut_tbl = {0xfe00, 0xfe00, 0xffff}, + .qos_lut_tbl = { + {.nentry = ARRAY_SIZE(sc7180_qos_linear), + .entries = sc7180_qos_linear + }, + {.nentry = ARRAY_SIZE(sc7180_qos_macrotile), + .entries = sc7180_qos_macrotile + }, + {.nentry = ARRAY_SIZE(sc7180_qos_nrt), + .entries = sc7180_qos_nrt + }, + /* TODO: macrotile-qseed is different from macrotile */ + }, + .cdp_cfg = { + {.rd_enable = 1, .wr_enable = 1}, + {.rd_enable = 1, .wr_enable = 0} + }, + .clk_inefficiency_factor = 105, + .bw_inefficiency_factor = 120, +}; + +static const struct dpu_mdss_version sm8650_mdss_ver = { + .core_major_ver = 10, + .core_minor_ver = 0, +}; + +const struct dpu_mdss_cfg dpu_sm8650_cfg = { + .mdss_ver = &sm8650_mdss_ver, + .caps = &sm8650_dpu_caps, + .mdp = &sm8650_mdp, + .ctl_count = ARRAY_SIZE(sm8650_ctl), + .ctl = sm8650_ctl, + .sspp_count = ARRAY_SIZE(sm8650_sspp), + .sspp = sm8650_sspp, + .mixer_count = ARRAY_SIZE(sm8650_lm), + .mixer = sm8650_lm, + .dspp_count = ARRAY_SIZE(sm8650_dspp), + .dspp = sm8650_dspp, + .pingpong_count = ARRAY_SIZE(sm8650_pp), + .pingpong = sm8650_pp, + .dsc_count = ARRAY_SIZE(sm8650_dsc), + .dsc = sm8650_dsc, + .merge_3d_count = ARRAY_SIZE(sm8650_merge_3d), + .merge_3d = sm8650_merge_3d, + .wb_count = ARRAY_SIZE(sm8650_wb), + .wb = sm8650_wb, + .intf_count = ARRAY_SIZE(sm8650_intf), + .intf = sm8650_intf, + .vbif_count = ARRAY_SIZE(sm8650_vbif), + .vbif = sm8650_vbif, + .perf = &sm8650_perf_data, +}; + +#endif diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h index aa1867943c9f..1d3e9666c741 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h @@ -10,7 +10,6 @@ static const struct dpu_caps msm8998_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0x7, - .qseed_type = DPU_SSPP_SCALER_QSEED3, .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, @@ -70,7 +69,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1ac, .features = VIG_MSM8998_MASK, - .sblk = &msm8998_vig_sblk_0, + .sblk = &dpu_vig_sblk_qseed3_1_2, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -78,7 +77,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1ac, .features = VIG_MSM8998_MASK, - .sblk = &msm8998_vig_sblk_1, + .sblk = &dpu_vig_sblk_qseed3_1_2, .xin_id = 4, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG1, @@ -86,7 +85,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1ac, .features = VIG_MSM8998_MASK, - .sblk = &msm8998_vig_sblk_2, + .sblk = &dpu_vig_sblk_qseed3_1_2, .xin_id = 8, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG2, @@ -94,7 +93,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1ac, .features = VIG_MSM8998_MASK, - .sblk = &msm8998_vig_sblk_3, + .sblk = &dpu_vig_sblk_qseed3_1_2, .xin_id = 12, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG3, @@ -102,7 +101,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1ac, .features = DMA_MSM8998_MASK, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -110,7 +109,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1ac, .features = DMA_MSM8998_MASK, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, @@ -118,7 +117,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1ac, .features = DMA_CURSOR_MSM8998_MASK, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, @@ -126,7 +125,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x1ac, .features = DMA_CURSOR_MSM8998_MASK, - .sblk = &sdm845_dma_sblk_3, + .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA3, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h index 38ac0c1a134b..7a23389a5732 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h @@ -10,7 +10,6 @@ static const struct dpu_caps sdm845_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0xb, - .qseed_type = DPU_SSPP_SCALER_QSEED3, .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, @@ -68,7 +67,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1c8, .features = VIG_SDM845_MASK_SDMA, - .sblk = &sdm845_vig_sblk_0, + .sblk = &dpu_vig_sblk_qseed3_1_3, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -76,7 +75,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1c8, .features = VIG_SDM845_MASK_SDMA, - .sblk = &sdm845_vig_sblk_1, + .sblk = &dpu_vig_sblk_qseed3_1_3, .xin_id = 4, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG1, @@ -84,7 +83,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1c8, .features = VIG_SDM845_MASK_SDMA, - .sblk = &sdm845_vig_sblk_2, + .sblk = &dpu_vig_sblk_qseed3_1_3, .xin_id = 8, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG2, @@ -92,7 +91,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1c8, .features = VIG_SDM845_MASK_SDMA, - .sblk = &sdm845_vig_sblk_3, + .sblk = &dpu_vig_sblk_qseed3_1_3, .xin_id = 12, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG3, @@ -100,7 +99,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1c8, .features = DMA_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -108,7 +107,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1c8, .features = DMA_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, @@ -116,7 +115,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1c8, .features = DMA_CURSOR_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, @@ -124,7 +123,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x1c8, .features = DMA_CURSOR_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_3, + .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA3, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h new file mode 100644 index 000000000000..cbbdaebe357e --- /dev/null +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2022. Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023, Richard Acayan. All rights reserved. + */ + +#ifndef _DPU_4_1_SDM670_H +#define _DPU_4_1_SDM670_H + +static const struct dpu_mdp_cfg sdm670_mdp = { + .name = "top_0", + .base = 0x0, .len = 0x45c, + .features = BIT(DPU_MDP_AUDIO_SELECT), + .clk_ctrls = { + [DPU_CLK_CTRL_VIG0] = { .reg_off = 0x2ac, .bit_off = 0 }, + [DPU_CLK_CTRL_VIG1] = { .reg_off = 0x2b4, .bit_off = 0 }, + [DPU_CLK_CTRL_DMA0] = { .reg_off = 0x2ac, .bit_off = 8 }, + [DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 }, + [DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2bc, .bit_off = 8 }, + }, +}; + +static const struct dpu_sspp_cfg sdm670_sspp[] = { + { + .name = "sspp_0", .id = SSPP_VIG0, + .base = 0x4000, .len = 0x1c8, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_1_3, + .xin_id = 0, + .type = SSPP_TYPE_VIG, + .clk_ctrl = DPU_CLK_CTRL_VIG0, + }, { + .name = "sspp_1", .id = SSPP_VIG1, + .base = 0x6000, .len = 0x1c8, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_1_3, + .xin_id = 4, + .type = SSPP_TYPE_VIG, + .clk_ctrl = DPU_CLK_CTRL_VIG0, + }, { + .name = "sspp_8", .id = SSPP_DMA0, + .base = 0x24000, .len = 0x1c8, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 1, + .type = SSPP_TYPE_DMA, + .clk_ctrl = DPU_CLK_CTRL_DMA0, + }, { + .name = "sspp_9", .id = SSPP_DMA1, + .base = 0x26000, .len = 0x1c8, + .features = DMA_CURSOR_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 5, + .type = SSPP_TYPE_DMA, + .clk_ctrl = DPU_CLK_CTRL_DMA1, + }, { + .name = "sspp_10", .id = SSPP_DMA2, + .base = 0x28000, .len = 0x1c8, + .features = DMA_CURSOR_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 9, + .type = SSPP_TYPE_DMA, + .clk_ctrl = DPU_CLK_CTRL_DMA2, + }, +}; + +static const struct dpu_dsc_cfg sdm670_dsc[] = { + { + .name = "dsc_0", .id = DSC_0, + .base = 0x80000, .len = 0x140, + }, { + .name = "dsc_1", .id = DSC_1, + .base = 0x80400, .len = 0x140, + }, +}; + +static const struct dpu_mdss_version sdm670_mdss_ver = { + .core_major_ver = 4, + .core_minor_ver = 1, +}; + +const struct dpu_mdss_cfg dpu_sdm670_cfg = { + .mdss_ver = &sdm670_mdss_ver, + .caps = &sdm845_dpu_caps, + .mdp = &sdm670_mdp, + .ctl_count = ARRAY_SIZE(sdm845_ctl), + .ctl = sdm845_ctl, + .sspp_count = ARRAY_SIZE(sdm670_sspp), + .sspp = sdm670_sspp, + .mixer_count = ARRAY_SIZE(sdm845_lm), + .mixer = sdm845_lm, + .pingpong_count = ARRAY_SIZE(sdm845_pp), + .pingpong = sdm845_pp, + .dsc_count = ARRAY_SIZE(sdm670_dsc), + .dsc = sdm670_dsc, + .intf_count = ARRAY_SIZE(sdm845_intf), + .intf = sdm845_intf, + .vbif_count = ARRAY_SIZE(sdm845_vbif), + .vbif = sdm845_vbif, + .perf = &sdm845_perf_data, +}; + +#endif diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h index 9392ad2b4d3f..145f3d5953a3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h @@ -10,7 +10,6 @@ static const struct dpu_caps sm8150_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0xb, - .qseed_type = DPU_SSPP_SCALER_QSEED3, .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, @@ -77,7 +76,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sdm845_vig_sblk_0, + .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -85,7 +84,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sdm845_vig_sblk_1, + .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 4, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG1, @@ -93,7 +92,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sdm845_vig_sblk_2, + .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 8, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG2, @@ -101,7 +100,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sdm845_vig_sblk_3, + .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 12, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG3, @@ -109,7 +108,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f0, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -117,7 +116,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f0, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, @@ -125,7 +124,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f0, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, @@ -133,7 +132,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x1f0, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_3, + .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA3, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h index e07f4c8c25b9..9e3bec8bc121 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h @@ -10,7 +10,6 @@ static const struct dpu_caps sc8180x_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0xb, - .qseed_type = DPU_SSPP_SCALER_QSEED3, .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, @@ -76,7 +75,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sdm845_vig_sblk_0, + .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -84,7 +83,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sdm845_vig_sblk_1, + .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 4, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG1, @@ -92,7 +91,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sdm845_vig_sblk_2, + .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 8, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG2, @@ -100,7 +99,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sdm845_vig_sblk_3, + .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 12, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG3, @@ -108,7 +107,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f0, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -116,7 +115,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f0, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, @@ -124,7 +123,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f0, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, @@ -132,7 +131,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x1f0, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_3, + .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA3, @@ -367,6 +366,7 @@ static const struct dpu_perf_cfg sc8180x_perf_data = { .min_llcc_ib = 800000, .min_dram_ib = 800000, .danger_lut_tbl = {0xf, 0xffff, 0x0}, + .safe_lut_tbl = {0xfff0, 0xf000, 0xffff}, .qos_lut_tbl = { {.nentry = ARRAY_SIZE(sc7180_qos_linear), .entries = sc7180_qos_linear diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h index cec7af6667dc..76b2ec0d2489 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h @@ -68,8 +68,8 @@ static const struct dpu_sspp_cfg sm6125_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f0, - .features = VIG_SM6125_MASK, - .sblk = &sm6125_vig_sblk_0, + .features = VIG_SDM845_MASK, + .sblk = &dpu_vig_sblk_qseed3_2_4, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -77,7 +77,7 @@ static const struct dpu_sspp_cfg sm6125_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f0, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -85,7 +85,7 @@ static const struct dpu_sspp_cfg sm6125_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f0, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h index 94278a3e3483..a57d50b1f028 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h @@ -10,7 +10,6 @@ static const struct dpu_caps sm8250_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0xb, - .qseed_type = DPU_SSPP_SCALER_QSEED4, .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, @@ -32,7 +31,7 @@ static const struct dpu_mdp_cfg sm8250_mdp = { [DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2bc, .bit_off = 8 }, [DPU_CLK_CTRL_DMA3] = { .reg_off = 0x2c4, .bit_off = 8 }, [DPU_CLK_CTRL_REG_DMA] = { .reg_off = 0x2bc, .bit_off = 20 }, - [DPU_CLK_CTRL_WB2] = { .reg_off = 0x3b8, .bit_off = 24 }, + [DPU_CLK_CTRL_WB2] = { .reg_off = 0x2bc, .bit_off = 16 }, }, }; @@ -75,32 +74,32 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, - .features = VIG_SC7180_MASK_SDMA, - .sblk = &sm8250_vig_sblk_0, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, }, { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1f8, - .features = VIG_SC7180_MASK_SDMA, - .sblk = &sm8250_vig_sblk_1, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 4, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG1, }, { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1f8, - .features = VIG_SC7180_MASK_SDMA, - .sblk = &sm8250_vig_sblk_2, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 8, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG2, }, { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1f8, - .features = VIG_SC7180_MASK_SDMA, - .sblk = &sm8250_vig_sblk_3, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 12, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG3, @@ -108,7 +107,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f8, .features = DMA_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -116,7 +115,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f8, .features = DMA_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, @@ -124,7 +123,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f8, .features = DMA_CURSOR_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, @@ -132,7 +131,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x1f8, .features = DMA_CURSOR_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_3, + .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA3, @@ -337,8 +336,8 @@ static const struct dpu_wb_cfg sm8250_wb[] = { .name = "wb_2", .id = WB_2, .base = 0x65000, .len = 0x2c8, .features = WB_SM8250_MASK, - .format_list = wb2_formats, - .num_formats = ARRAY_SIZE(wb2_formats), + .format_list = wb2_formats_rgb_yuv, + .num_formats = ARRAY_SIZE(wb2_formats_rgb_yuv), .clk_ctrl = DPU_CLK_CTRL_WB2, .xin_id = 6, .vbif_idx = VBIF_RT, @@ -385,6 +384,7 @@ const struct dpu_mdss_cfg dpu_sm8250_cfg = { .mdss_ver = &sm8250_mdss_ver, .caps = &sm8250_dpu_caps, .mdp = &sm8250_mdp, + .cdm = &sc7280_cdm, .ctl_count = ARRAY_SIZE(sm8250_ctl), .ctl = sm8250_ctl, .sspp_count = ARRAY_SIZE(sm8250_sspp), diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h index c0d88ddccb28..7382ebb6e5b2 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h @@ -10,7 +10,6 @@ static const struct dpu_caps sc7180_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0x9, - .qseed_type = DPU_SSPP_SCALER_QSEED4, .has_dim_layer = true, .has_idle_pc = true, .max_linewidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH, @@ -25,7 +24,7 @@ static const struct dpu_mdp_cfg sc7180_mdp = { [DPU_CLK_CTRL_DMA0] = { .reg_off = 0x2ac, .bit_off = 8 }, [DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 }, [DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2c4, .bit_off = 8 }, - [DPU_CLK_CTRL_WB2] = { .reg_off = 0x3b8, .bit_off = 24 }, + [DPU_CLK_CTRL_WB2] = { .reg_off = 0x2bc, .bit_off = 16 }, }, }; @@ -52,8 +51,8 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, - .features = VIG_SC7180_MASK, - .sblk = &sc7180_vig_sblk_0, + .features = VIG_SDM845_MASK, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -61,7 +60,7 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f8, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -69,7 +68,7 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f8, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, @@ -77,7 +76,7 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f8, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, @@ -158,8 +157,8 @@ static const struct dpu_wb_cfg sc7180_wb[] = { .name = "wb_2", .id = WB_2, .base = 0x65000, .len = 0x2c8, .features = WB_SM8250_MASK, - .format_list = wb2_formats, - .num_formats = ARRAY_SIZE(wb2_formats), + .format_list = wb2_formats_rgb, + .num_formats = ARRAY_SIZE(wb2_formats_rgb), .clk_ctrl = DPU_CLK_CTRL_WB2, .xin_id = 6, .vbif_idx = VBIF_RT, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h index 57ce14c18def..43f64a005f5a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h @@ -10,7 +10,6 @@ static const struct dpu_caps sm6115_dpu_caps = { .max_mixer_width = DEFAULT_DPU_LINE_WIDTH, .max_mixer_blendstages = 0x4, - .qseed_type = DPU_SSPP_SCALER_QSEED4, .has_dim_layer = true, .has_idle_pc = true, .max_linewidth = 2160, @@ -39,8 +38,8 @@ static const struct dpu_sspp_cfg sm6115_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, - .features = VIG_SC7180_MASK, - .sblk = &sm6115_vig_sblk_0, + .features = VIG_SDM845_MASK, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -48,7 +47,7 @@ static const struct dpu_sspp_cfg sm6115_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f8, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h index 62db84bd15f2..e17a30be7525 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h @@ -11,7 +11,6 @@ static const struct dpu_caps sm6350_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0x7, - .qseed_type = DPU_SSPP_SCALER_QSEED4, .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, @@ -59,8 +58,8 @@ static const struct dpu_sspp_cfg sm6350_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, - .features = VIG_SC7180_MASK, - .sblk = &sc7180_vig_sblk_0, + .features = VIG_SDM845_MASK, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -68,7 +67,7 @@ static const struct dpu_sspp_cfg sm6350_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f8, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -76,7 +75,7 @@ static const struct dpu_sspp_cfg sm6350_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f8, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, @@ -84,7 +83,7 @@ static const struct dpu_sspp_cfg sm6350_sspp[] = { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f8, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h index fb36fba5171c..3cbb2fe8aba2 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h @@ -39,7 +39,7 @@ static const struct dpu_sspp_cfg qcm2290_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, .features = VIG_QCM2290_MASK, - .sblk = &qcm2290_vig_sblk_0, + .sblk = &dpu_vig_sblk_noscale, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -47,7 +47,7 @@ static const struct dpu_sspp_cfg qcm2290_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f8, .features = DMA_SDM845_MASK, - .sblk = &qcm2290_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h index 5a3aad364c78..a06c8634d2d7 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h @@ -11,7 +11,6 @@ static const struct dpu_caps sm6375_dpu_caps = { .max_mixer_width = DEFAULT_DPU_LINE_WIDTH, .max_mixer_blendstages = 0x4, - .qseed_type = DPU_SSPP_SCALER_QSEED4, .has_dim_layer = true, .has_idle_pc = true, .max_linewidth = 2160, @@ -40,8 +39,8 @@ static const struct dpu_sspp_cfg sm6375_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, - .features = VIG_SC7180_MASK, - .sblk = &sm6115_vig_sblk_0, + .features = VIG_SDM845_MASK, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -49,7 +48,7 @@ static const struct dpu_sspp_cfg sm6375_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f8, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h index 1709ba57f384..aced16e350da 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h @@ -10,7 +10,6 @@ static const struct dpu_caps sm8350_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0xb, - .qseed_type = DPU_SSPP_SCALER_QSEED4, .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, @@ -31,6 +30,7 @@ static const struct dpu_mdp_cfg sm8350_mdp = { [DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 }, [DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2bc, .bit_off = 8 }, [DPU_CLK_CTRL_DMA3] = { .reg_off = 0x2c4, .bit_off = 8 }, + [DPU_CLK_CTRL_WB2] = { .reg_off = 0x2bc, .bit_off = 16 }, [DPU_CLK_CTRL_REG_DMA] = { .reg_off = 0x2bc, .bit_off = 20 }, }, }; @@ -74,64 +74,64 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, - .features = VIG_SC7180_MASK, - .sblk = &sm8250_vig_sblk_0, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, }, { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1f8, - .features = VIG_SC7180_MASK, - .sblk = &sm8250_vig_sblk_1, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 4, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG1, }, { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1f8, - .features = VIG_SC7180_MASK, - .sblk = &sm8250_vig_sblk_2, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 8, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG2, }, { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1f8, - .features = VIG_SC7180_MASK, - .sblk = &sm8250_vig_sblk_3, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 12, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG3, }, { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f8, - .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_0, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, }, { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f8, - .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_1, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, }, { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f8, - .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_2, + .features = DMA_CURSOR_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, }, { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x1f8, - .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_3, + .features = DMA_CURSOR_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA3, @@ -298,6 +298,21 @@ static const struct dpu_dsc_cfg sm8350_dsc[] = { }, }; +static const struct dpu_wb_cfg sm8350_wb[] = { + { + .name = "wb_2", .id = WB_2, + .base = 0x65000, .len = 0x2c8, + .features = WB_SM8250_MASK, + .format_list = wb2_formats_rgb, + .num_formats = ARRAY_SIZE(wb2_formats_rgb), + .clk_ctrl = DPU_CLK_CTRL_WB2, + .xin_id = 6, + .vbif_idx = VBIF_RT, + .maxlinewidth = 4096, + .intr_wb_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 4), + }, +}; + static const struct dpu_intf_cfg sm8350_intf[] = { { .name = "intf_0", .id = INTF_0, @@ -393,6 +408,8 @@ const struct dpu_mdss_cfg dpu_sm8350_cfg = { .dsc = sm8350_dsc, .merge_3d_count = ARRAY_SIZE(sm8350_merge_3d), .merge_3d = sm8350_merge_3d, + .wb_count = ARRAY_SIZE(sm8350_wb), + .wb = sm8350_wb, .intf_count = ARRAY_SIZE(sm8350_intf), .intf = sm8350_intf, .vbif_count = ARRAY_SIZE(sdm845_vbif), diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h index 15942fa5a8e0..2f153e0b5c6a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h @@ -10,7 +10,6 @@ static const struct dpu_caps sc7280_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0x7, - .qseed_type = DPU_SSPP_SCALER_QSEED4, .has_dim_layer = true, .has_idle_pc = true, .max_linewidth = 2400, @@ -25,7 +24,7 @@ static const struct dpu_mdp_cfg sc7280_mdp = { [DPU_CLK_CTRL_DMA0] = { .reg_off = 0x2ac, .bit_off = 8 }, [DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 }, [DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2c4, .bit_off = 8 }, - [DPU_CLK_CTRL_WB2] = { .reg_off = 0x3b8, .bit_off = 24 }, + [DPU_CLK_CTRL_WB2] = { .reg_off = 0x2bc, .bit_off = 16 }, }, }; @@ -58,7 +57,7 @@ static const struct dpu_sspp_cfg sc7280_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, .features = VIG_SC7280_MASK_SDMA, - .sblk = &sc7280_vig_sblk_0, + .sblk = &dpu_vig_sblk_qseed3_3_0_rot_v2, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -66,7 +65,7 @@ static const struct dpu_sspp_cfg sc7280_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f8, .features = DMA_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -74,7 +73,7 @@ static const struct dpu_sspp_cfg sc7280_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f8, .features = DMA_CURSOR_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, @@ -82,7 +81,7 @@ static const struct dpu_sspp_cfg sc7280_sspp[] = { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f8, .features = DMA_CURSOR_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, @@ -170,8 +169,8 @@ static const struct dpu_wb_cfg sc7280_wb[] = { .name = "wb_2", .id = WB_2, .base = 0x65000, .len = 0x2c8, .features = WB_SM8250_MASK, - .format_list = wb2_formats, - .num_formats = ARRAY_SIZE(wb2_formats), + .format_list = wb2_formats_rgb_yuv, + .num_formats = ARRAY_SIZE(wb2_formats_rgb_yuv), .clk_ctrl = DPU_CLK_CTRL_WB2, .xin_id = 6, .vbif_idx = VBIF_RT, @@ -249,6 +248,7 @@ const struct dpu_mdss_cfg dpu_sc7280_cfg = { .mdss_ver = &sc7280_mdss_ver, .caps = &sc7280_dpu_caps, .mdp = &sc7280_mdp, + .cdm = &sc7280_cdm, .ctl_count = ARRAY_SIZE(sc7280_ctl), .ctl = sc7280_ctl, .sspp_count = ARRAY_SIZE(sc7280_sspp), diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h index 4c0528794e7a..0d143e390eca 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h @@ -10,7 +10,6 @@ static const struct dpu_caps sc8280xp_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 11, - .qseed_type = DPU_SSPP_SCALER_QSEED4, .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, @@ -75,32 +74,32 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x2ac, - .features = VIG_SC7180_MASK, - .sblk = &sm8250_vig_sblk_0, + .features = VIG_SDM845_MASK, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, }, { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x2ac, - .features = VIG_SC7180_MASK, - .sblk = &sm8250_vig_sblk_1, + .features = VIG_SDM845_MASK, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 4, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG1, }, { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x2ac, - .features = VIG_SC7180_MASK, - .sblk = &sm8250_vig_sblk_2, + .features = VIG_SDM845_MASK, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 8, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG2, }, { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x2ac, - .features = VIG_SC7180_MASK, - .sblk = &sm8250_vig_sblk_3, + .features = VIG_SDM845_MASK, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 12, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG3, @@ -108,7 +107,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x2ac, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -116,7 +115,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x2ac, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, @@ -124,7 +123,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x2ac, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, @@ -132,7 +131,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x2ac, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_3, + .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA3, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h index 7742f52be859..a1779c5597ae 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h @@ -10,7 +10,6 @@ static const struct dpu_caps sm8450_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0xb, - .qseed_type = DPU_SSPP_SCALER_QSEED4, .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, @@ -32,6 +31,7 @@ static const struct dpu_mdp_cfg sm8450_mdp = { [DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 }, [DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2bc, .bit_off = 8 }, [DPU_CLK_CTRL_DMA3] = { .reg_off = 0x2c4, .bit_off = 8 }, + [DPU_CLK_CTRL_WB2] = { .reg_off = 0x2bc, .bit_off = 16 }, [DPU_CLK_CTRL_REG_DMA] = { .reg_off = 0x2bc, .bit_off = 20 }, }, }; @@ -75,64 +75,64 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x32c, - .features = VIG_SC7180_MASK, - .sblk = &sm8250_vig_sblk_0, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_1, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, }, { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x32c, - .features = VIG_SC7180_MASK, - .sblk = &sm8250_vig_sblk_1, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_1, .xin_id = 4, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG1, }, { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x32c, - .features = VIG_SC7180_MASK, - .sblk = &sm8250_vig_sblk_2, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_1, .xin_id = 8, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG2, }, { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x32c, - .features = VIG_SC7180_MASK, - .sblk = &sm8250_vig_sblk_3, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_1, .xin_id = 12, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG3, }, { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x32c, - .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_0, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, }, { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x32c, - .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_1, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, }, { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x32c, - .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_2, + .features = DMA_CURSOR_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, }, { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x32c, - .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_3, + .features = DMA_CURSOR_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA3, @@ -316,6 +316,21 @@ static const struct dpu_dsc_cfg sm8450_dsc[] = { }, }; +static const struct dpu_wb_cfg sm8450_wb[] = { + { + .name = "wb_2", .id = WB_2, + .base = 0x65000, .len = 0x2c8, + .features = WB_SM8250_MASK, + .format_list = wb2_formats_rgb, + .num_formats = ARRAY_SIZE(wb2_formats_rgb), + .clk_ctrl = DPU_CLK_CTRL_WB2, + .xin_id = 6, + .vbif_idx = VBIF_RT, + .maxlinewidth = 4096, + .intr_wb_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 4), + }, +}; + static const struct dpu_intf_cfg sm8450_intf[] = { { .name = "intf_0", .id = INTF_0, @@ -411,6 +426,8 @@ const struct dpu_mdss_cfg dpu_sm8450_cfg = { .dsc = sm8450_dsc, .merge_3d_count = ARRAY_SIZE(sm8450_merge_3d), .merge_3d = sm8450_merge_3d, + .wb_count = ARRAY_SIZE(sm8450_wb), + .wb = sm8450_wb, .intf_count = ARRAY_SIZE(sm8450_intf), .intf = sm8450_intf, .vbif_count = ARRAY_SIZE(sdm845_vbif), diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h index 69b80af6566a..ad48defa154f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h @@ -10,7 +10,6 @@ static const struct dpu_caps sm8550_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0xb, - .qseed_type = DPU_SSPP_SCALER_QSEED4, .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, @@ -67,71 +66,71 @@ static const struct dpu_sspp_cfg sm8550_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x344, - .features = VIG_SC7180_MASK, - .sblk = &sm8550_vig_sblk_0, + .features = VIG_SDM845_MASK, + .sblk = &dpu_vig_sblk_qseed3_3_2, .xin_id = 0, .type = SSPP_TYPE_VIG, }, { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x344, - .features = VIG_SC7180_MASK, - .sblk = &sm8550_vig_sblk_1, + .features = VIG_SDM845_MASK, + .sblk = &dpu_vig_sblk_qseed3_3_2, .xin_id = 4, .type = SSPP_TYPE_VIG, }, { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x344, - .features = VIG_SC7180_MASK, - .sblk = &sm8550_vig_sblk_2, + .features = VIG_SDM845_MASK, + .sblk = &dpu_vig_sblk_qseed3_3_2, .xin_id = 8, .type = SSPP_TYPE_VIG, }, { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x344, - .features = VIG_SC7180_MASK, - .sblk = &sm8550_vig_sblk_3, + .features = VIG_SDM845_MASK, + .sblk = &dpu_vig_sblk_qseed3_3_2, .xin_id = 12, .type = SSPP_TYPE_VIG, }, { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x344, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, }, { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x344, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, }, { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x344, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, }, { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x344, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_3, + .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, }, { .name = "sspp_12", .id = SSPP_DMA4, .base = 0x2c000, .len = 0x344, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sm8550_dma_sblk_4, + .sblk = &dpu_dma_sblk, .xin_id = 14, .type = SSPP_TYPE_DMA, }, { .name = "sspp_13", .id = SSPP_DMA5, .base = 0x2e000, .len = 0x344, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sm8550_dma_sblk_5, + .sblk = &dpu_dma_sblk, .xin_id = 15, .type = SSPP_TYPE_DMA, }, @@ -316,8 +315,8 @@ static const struct dpu_wb_cfg sm8550_wb[] = { .name = "wb_2", .id = WB_2, .base = 0x65000, .len = 0x2c8, .features = WB_SM8250_MASK, - .format_list = wb2_formats, - .num_formats = ARRAY_SIZE(wb2_formats), + .format_list = wb2_formats_rgb, + .num_formats = ARRAY_SIZE(wb2_formats_rgb), .xin_id = 6, .vbif_idx = VBIF_RT, .maxlinewidth = 4096, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index 3c475f8042b0..88c2e51ab166 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) 2014-2021 The Linux Foundation. All rights reserved. * Copyright (C) 2013 Red Hat * Author: Rob Clark <robdclark@gmail.com> @@ -51,17 +51,6 @@ static struct dpu_kms *_dpu_crtc_get_kms(struct drm_crtc *crtc) return to_dpu_kms(priv->kms); } -static void dpu_crtc_destroy(struct drm_crtc *crtc) -{ - struct dpu_crtc *dpu_crtc = to_dpu_crtc(crtc); - - if (!crtc) - return; - - drm_crtc_cleanup(crtc); - kfree(dpu_crtc); -} - static struct drm_encoder *get_encoder_from_crtc(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; @@ -125,7 +114,7 @@ static void dpu_crtc_setup_lm_misr(struct dpu_crtc_state *crtc_state) continue; /* Calculate MISR over 1 frame */ - m->hw_lm->ops.setup_misr(m->hw_lm, true, 1); + m->hw_lm->ops.setup_misr(m->hw_lm); } } @@ -1435,7 +1424,6 @@ static int dpu_crtc_late_register(struct drm_crtc *crtc) static const struct drm_crtc_funcs dpu_crtc_funcs = { .set_config = drm_atomic_helper_set_config, - .destroy = dpu_crtc_destroy, .page_flip = drm_atomic_helper_page_flip, .reset = dpu_crtc_reset, .atomic_duplicate_state = dpu_crtc_duplicate_state, @@ -1469,9 +1457,13 @@ struct drm_crtc *dpu_crtc_init(struct drm_device *dev, struct drm_plane *plane, struct dpu_crtc *dpu_crtc; int i, ret; - dpu_crtc = kzalloc(sizeof(*dpu_crtc), GFP_KERNEL); - if (!dpu_crtc) - return ERR_PTR(-ENOMEM); + dpu_crtc = drmm_crtc_alloc_with_planes(dev, struct dpu_crtc, base, + plane, cursor, + &dpu_crtc_funcs, + NULL); + + if (IS_ERR(dpu_crtc)) + return ERR_CAST(dpu_crtc); crtc = &dpu_crtc->base; crtc->dev = dev; @@ -1491,9 +1483,6 @@ struct drm_crtc *dpu_crtc_init(struct drm_device *dev, struct drm_plane *plane, dpu_crtc_frame_event_work); } - drm_crtc_init_with_planes(dev, crtc, plane, cursor, &dpu_crtc_funcs, - NULL); - drm_crtc_helper_add(crtc, &dpu_crtc_helper_funcs); if (dpu_kms->catalog->dspp_count) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 1cf7ff6caff4..83380bc92a00 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -2,7 +2,7 @@ /* * Copyright (C) 2013 Red Hat * Copyright (c) 2014-2018, 2020-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. * * Author: Rob Clark <robdclark@gmail.com> */ @@ -16,6 +16,7 @@ #include <drm/drm_crtc.h> #include <drm/drm_file.h> #include <drm/drm_probe_helper.h> +#include <drm/drm_framebuffer.h> #include "msm_drv.h" #include "dpu_kms.h" @@ -26,6 +27,7 @@ #include "dpu_hw_dspp.h" #include "dpu_hw_dsc.h" #include "dpu_hw_merge3d.h" +#include "dpu_hw_cdm.h" #include "dpu_formats.h" #include "dpu_encoder_phys.h" #include "dpu_crtc.h" @@ -39,6 +41,9 @@ #define DPU_ERROR_ENC(e, fmt, ...) DPU_ERROR("enc%d " fmt,\ (e) ? (e)->base.base.id : -1, ##__VA_ARGS__) +#define DPU_ERROR_ENC_RATELIMITED(e, fmt, ...) DPU_ERROR_RATELIMITED("enc%d " fmt,\ + (e) ? (e)->base.base.id : -1, ##__VA_ARGS__) + /* * Two to anticipate panels that can do cmd/vid dynamic switching * plan is to create all possible physical encoder types, and switch between @@ -151,6 +156,8 @@ enum dpu_enc_rc_states { * @crtc_frame_event_cb: callback handler for frame event * @crtc_frame_event_cb_data: callback handler private data * @frame_done_timeout_ms: frame done timeout in ms + * @frame_done_timeout_cnt: atomic counter tracking the number of frame + * done timeouts * @frame_done_timer: watchdog timer for frame done event * @disp_info: local copy of msm_display_info struct * @idle_pc_supported: indicate if idle power collaps is supported @@ -184,13 +191,13 @@ struct dpu_encoder_virt { struct drm_crtc *crtc; struct drm_connector *connector; - struct dentry *debugfs_root; struct mutex enc_lock; DECLARE_BITMAP(frame_busy_mask, MAX_PHYS_ENCODERS_PER_VIRTUAL); void (*crtc_frame_event_cb)(void *, u32 event); void *crtc_frame_event_cb_data; atomic_t frame_done_timeout_ms; + atomic_t frame_done_timeout_cnt; struct timer_list frame_done_timer; struct msm_display_info disp_info; @@ -255,7 +262,7 @@ void dpu_encoder_setup_misr(const struct drm_encoder *drm_enc) if (!phys->hw_intf || !phys->hw_intf->ops.setup_misr) continue; - phys->hw_intf->ops.setup_misr(phys->hw_intf, true, 1); + phys->hw_intf->ops.setup_misr(phys->hw_intf); } } @@ -439,41 +446,6 @@ int dpu_encoder_get_linecount(struct drm_encoder *drm_enc) return linecount; } -static void dpu_encoder_destroy(struct drm_encoder *drm_enc) -{ - struct dpu_encoder_virt *dpu_enc = NULL; - int i = 0; - - if (!drm_enc) { - DPU_ERROR("invalid encoder\n"); - return; - } - - dpu_enc = to_dpu_encoder_virt(drm_enc); - DPU_DEBUG_ENC(dpu_enc, "\n"); - - mutex_lock(&dpu_enc->enc_lock); - - for (i = 0; i < dpu_enc->num_phys_encs; i++) { - struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; - - if (phys->ops.destroy) { - phys->ops.destroy(phys); - --dpu_enc->num_phys_encs; - dpu_enc->phys_encs[i] = NULL; - } - } - - if (dpu_enc->num_phys_encs) - DPU_ERROR_ENC(dpu_enc, "expected 0 num_phys_encs not %d\n", - dpu_enc->num_phys_encs); - dpu_enc->num_phys_encs = 0; - mutex_unlock(&dpu_enc->enc_lock); - - drm_encoder_cleanup(drm_enc); - mutex_destroy(&dpu_enc->enc_lock); -} - void dpu_encoder_helper_split_config( struct dpu_encoder_phys *phys_enc, enum dpu_intf interface) @@ -614,6 +586,7 @@ static int dpu_encoder_virt_atomic_check( struct drm_display_mode *adj_mode; struct msm_display_topology topology; struct dpu_global_state *global_state; + struct drm_framebuffer *fb; struct drm_dsc_config *dsc; int i = 0; int ret = 0; @@ -655,6 +628,22 @@ static int dpu_encoder_virt_atomic_check( topology = dpu_encoder_get_topology(dpu_enc, dpu_kms, adj_mode, crtc_state, dsc); /* + * Use CDM only for writeback at the moment as other interfaces cannot handle it. + * if writeback itself cannot handle cdm for some reason it will fail in its atomic_check() + * earlier. + */ + if (dpu_enc->disp_info.intf_type == INTF_WB && conn_state->writeback_job) { + fb = conn_state->writeback_job->fb; + + if (fb && DPU_FORMAT_IS_YUV(to_dpu_format(msm_framebuffer_format(fb)))) + topology.needs_cdm = true; + if (topology.needs_cdm && !dpu_enc->cur_master->hw_cdm) + crtc_state->mode_changed = true; + else if (!topology.needs_cdm && dpu_enc->cur_master->hw_cdm) + crtc_state->mode_changed = true; + } + + /* * Release and Allocate resources on every modeset * Dont allocate when active is false. */ @@ -1094,6 +1083,15 @@ static void dpu_encoder_virt_atomic_mode_set(struct drm_encoder *drm_enc, dpu_enc->dsc_mask = dsc_mask; + if (dpu_enc->disp_info.intf_type == INTF_WB && conn_state->writeback_job) { + struct dpu_hw_blk *hw_cdm = NULL; + + dpu_rm_get_assigned_resources(&dpu_kms->rm, global_state, + drm_enc->base.id, DPU_HW_BLK_CDM, + &hw_cdm, 1); + dpu_enc->cur_master->hw_cdm = hw_cdm ? to_dpu_hw_cdm(hw_cdm) : NULL; + } + cstate = to_dpu_crtc_state(crtc_state); for (i = 0; i < num_lm; i++) { @@ -1204,6 +1202,8 @@ static void dpu_encoder_virt_atomic_enable(struct drm_encoder *drm_enc, dpu_enc->dsc = dpu_encoder_get_dsc_config(drm_enc); + atomic_set(&dpu_enc->frame_done_timeout_cnt, 0); + if (disp_info->intf_type == INTF_DP) dpu_enc->wide_bus_en = msm_dp_wide_bus_available(priv->dp[index]); else if (disp_info->intf_type == INTF_DSI) @@ -2080,6 +2080,15 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc) phys_enc->hw_pp->merge_3d->idx); } + if (phys_enc->hw_cdm) { + if (phys_enc->hw_cdm->ops.bind_pingpong_blk && phys_enc->hw_pp) + phys_enc->hw_cdm->ops.bind_pingpong_blk(phys_enc->hw_cdm, + PINGPONG_NONE); + if (phys_enc->hw_ctl->ops.update_pending_flush_cdm) + phys_enc->hw_ctl->ops.update_pending_flush_cdm(phys_enc->hw_ctl, + phys_enc->hw_cdm->idx); + } + if (dpu_enc->dsc) { dpu_encoder_unprep_dsc(dpu_enc); dpu_enc->dsc = NULL; @@ -2108,18 +2117,20 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc) #ifdef CONFIG_DEBUG_FS static int _dpu_encoder_status_show(struct seq_file *s, void *data) { - struct dpu_encoder_virt *dpu_enc = s->private; + struct drm_encoder *drm_enc = s->private; + struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(drm_enc); int i; mutex_lock(&dpu_enc->enc_lock); for (i = 0; i < dpu_enc->num_phys_encs; i++) { struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; - seq_printf(s, "intf:%d wb:%d vsync:%8d underrun:%8d ", + seq_printf(s, "intf:%d wb:%d vsync:%8d underrun:%8d frame_done_cnt:%d", phys->hw_intf ? phys->hw_intf->idx - INTF_0 : -1, phys->hw_wb ? phys->hw_wb->idx - WB_0 : -1, atomic_read(&phys->vsync_cnt), - atomic_read(&phys->underrun_cnt)); + atomic_read(&phys->underrun_cnt), + atomic_read(&dpu_enc->frame_done_timeout_cnt)); seq_printf(s, "mode: %s\n", dpu_encoder_helper_get_intf_type(phys->intf_mode)); } @@ -2130,49 +2141,18 @@ static int _dpu_encoder_status_show(struct seq_file *s, void *data) DEFINE_SHOW_ATTRIBUTE(_dpu_encoder_status); -static int _dpu_encoder_init_debugfs(struct drm_encoder *drm_enc) +static void dpu_encoder_debugfs_init(struct drm_encoder *drm_enc, struct dentry *root) { - struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(drm_enc); - - char name[12]; - - if (!drm_enc->dev) { - DPU_ERROR("invalid encoder or kms\n"); - return -EINVAL; - } - - snprintf(name, sizeof(name), "encoder%u", drm_enc->base.id); - - /* create overall sub-directory for the encoder */ - dpu_enc->debugfs_root = debugfs_create_dir(name, - drm_enc->dev->primary->debugfs_root); - /* don't error check these */ debugfs_create_file("status", 0600, - dpu_enc->debugfs_root, dpu_enc, &_dpu_encoder_status_fops); - - return 0; + root, drm_enc, &_dpu_encoder_status_fops); } #else -static int _dpu_encoder_init_debugfs(struct drm_encoder *drm_enc) -{ - return 0; -} +#define dpu_encoder_debugfs_init NULL #endif -static int dpu_encoder_late_register(struct drm_encoder *encoder) -{ - return _dpu_encoder_init_debugfs(encoder); -} - -static void dpu_encoder_early_unregister(struct drm_encoder *encoder) -{ - struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(encoder); - - debugfs_remove_recursive(dpu_enc->debugfs_root); -} - static int dpu_encoder_virt_add_phys_encs( + struct drm_device *dev, struct msm_display_info *disp_info, struct dpu_encoder_virt *dpu_enc, struct dpu_enc_phys_init_params *params) @@ -2194,7 +2174,7 @@ static int dpu_encoder_virt_add_phys_encs( if (disp_info->intf_type == INTF_WB) { - enc = dpu_encoder_phys_wb_init(params); + enc = dpu_encoder_phys_wb_init(dev, params); if (IS_ERR(enc)) { DPU_ERROR_ENC(dpu_enc, "failed to init wb enc: %ld\n", @@ -2205,7 +2185,7 @@ static int dpu_encoder_virt_add_phys_encs( dpu_enc->phys_encs[dpu_enc->num_phys_encs] = enc; ++dpu_enc->num_phys_encs; } else if (disp_info->is_cmd_mode) { - enc = dpu_encoder_phys_cmd_init(params); + enc = dpu_encoder_phys_cmd_init(dev, params); if (IS_ERR(enc)) { DPU_ERROR_ENC(dpu_enc, "failed to init cmd enc: %ld\n", @@ -2216,7 +2196,7 @@ static int dpu_encoder_virt_add_phys_encs( dpu_enc->phys_encs[dpu_enc->num_phys_encs] = enc; ++dpu_enc->num_phys_encs; } else { - enc = dpu_encoder_phys_vid_init(params); + enc = dpu_encoder_phys_vid_init(dev, params); if (IS_ERR(enc)) { DPU_ERROR_ENC(dpu_enc, "failed to init vid enc: %ld\n", @@ -2305,7 +2285,7 @@ static int dpu_encoder_setup_display(struct dpu_encoder_virt *dpu_enc, break; } - ret = dpu_encoder_virt_add_phys_encs(disp_info, + ret = dpu_encoder_virt_add_phys_encs(dpu_kms->dev, disp_info, dpu_enc, &phys_params); if (ret) { DPU_ERROR_ENC(dpu_enc, "failed to add phys encs\n"); @@ -2339,7 +2319,10 @@ static void dpu_encoder_frame_done_timeout(struct timer_list *t) return; } - DPU_ERROR_ENC(dpu_enc, "frame done timeout\n"); + DPU_ERROR_ENC_RATELIMITED(dpu_enc, "frame done timeout\n"); + + if (atomic_inc_return(&dpu_enc->frame_done_timeout_cnt) == 1) + msm_disp_snapshot_state(drm_enc->dev); event = DPU_ENCODER_FRAME_EVENT_ERROR; trace_dpu_enc_frame_done_timeout(DRMID(drm_enc), event); @@ -2354,9 +2337,7 @@ static const struct drm_encoder_helper_funcs dpu_encoder_helper_funcs = { }; static const struct drm_encoder_funcs dpu_encoder_funcs = { - .destroy = dpu_encoder_destroy, - .late_register = dpu_encoder_late_register, - .early_unregister = dpu_encoder_early_unregister, + .debugfs_init = dpu_encoder_debugfs_init, }; struct drm_encoder *dpu_encoder_init(struct drm_device *dev, @@ -2365,20 +2346,13 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev, { struct msm_drm_private *priv = dev->dev_private; struct dpu_kms *dpu_kms = to_dpu_kms(priv->kms); - struct drm_encoder *drm_enc = NULL; - struct dpu_encoder_virt *dpu_enc = NULL; - int ret = 0; - - dpu_enc = devm_kzalloc(dev->dev, sizeof(*dpu_enc), GFP_KERNEL); - if (!dpu_enc) - return ERR_PTR(-ENOMEM); + struct dpu_encoder_virt *dpu_enc; + int ret; - ret = drm_encoder_init(dev, &dpu_enc->base, &dpu_encoder_funcs, - drm_enc_mode, NULL); - if (ret) { - devm_kfree(dev->dev, dpu_enc); - return ERR_PTR(ret); - } + dpu_enc = drmm_encoder_alloc(dev, struct dpu_encoder_virt, base, + &dpu_encoder_funcs, drm_enc_mode, NULL); + if (IS_ERR(dpu_enc)) + return ERR_CAST(dpu_enc); drm_encoder_helper_add(&dpu_enc->base, &dpu_encoder_helper_funcs); @@ -2388,10 +2362,13 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev, mutex_init(&dpu_enc->rc_lock); ret = dpu_encoder_setup_display(dpu_enc, dpu_kms, disp_info); - if (ret) - goto fail; + if (ret) { + DPU_ERROR("failed to setup encoder\n"); + return ERR_PTR(-ENOMEM); + } atomic_set(&dpu_enc->frame_done_timeout_ms, 0); + atomic_set(&dpu_enc->frame_done_timeout_cnt, 0); timer_setup(&dpu_enc->frame_done_timer, dpu_encoder_frame_done_timeout, 0); @@ -2404,13 +2381,6 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev, DPU_DEBUG_ENC(dpu_enc, "created\n"); return &dpu_enc->base; - -fail: - DPU_ERROR("failed to create encoder\n"); - if (drm_enc) - dpu_encoder_destroy(drm_enc); - - return ERR_PTR(ret); } int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc, @@ -2437,9 +2407,6 @@ int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc, case MSM_ENC_TX_COMPLETE: fn_wait = phys->ops.wait_for_tx_complete; break; - case MSM_ENC_VBLANK: - fn_wait = phys->ops.wait_for_vblank; - break; default: DPU_ERROR_ENC(dpu_enc, "unknown wait event %d\n", event); @@ -2497,7 +2464,6 @@ void dpu_encoder_phys_init(struct dpu_encoder_phys *phys_enc, phys_enc->enc_spinlock = p->enc_spinlock; phys_enc->enable_state = DPU_ENC_DISABLED; - atomic_set(&phys_enc->vblank_refcount, 0); atomic_set(&phys_enc->pending_kickoff_cnt, 0); atomic_set(&phys_enc->pending_ctlstart_cnt, 0); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h index 6f04c3d56e77..993f26343331 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h @@ -14,8 +14,10 @@ #include "dpu_hw_intf.h" #include "dpu_hw_wb.h" #include "dpu_hw_pingpong.h" +#include "dpu_hw_cdm.h" #include "dpu_hw_ctl.h" #include "dpu_hw_top.h" +#include "dpu_hw_util.h" #include "dpu_encoder.h" #include "dpu_crtc.h" @@ -72,7 +74,6 @@ struct dpu_encoder_phys; * @enable: DRM Call. Enable a DRM mode. * @disable: DRM Call. Disable mode. * @atomic_check: DRM Call. Atomic check new DRM state. - * @destroy: DRM Call. Destroy and release resources. * @control_vblank_irq Register/Deregister for VBLANK IRQ * @wait_for_commit_done: Wait for hardware to have flushed the * current pending frames to hardware @@ -102,11 +103,9 @@ struct dpu_encoder_phys_ops { int (*atomic_check)(struct dpu_encoder_phys *encoder, struct drm_crtc_state *crtc_state, struct drm_connector_state *conn_state); - void (*destroy)(struct dpu_encoder_phys *encoder); int (*control_vblank_irq)(struct dpu_encoder_phys *enc, bool enable); int (*wait_for_commit_done)(struct dpu_encoder_phys *phys_enc); int (*wait_for_tx_complete)(struct dpu_encoder_phys *phys_enc); - int (*wait_for_vblank)(struct dpu_encoder_phys *phys_enc); void (*prepare_for_kickoff)(struct dpu_encoder_phys *phys_enc); void (*handle_post_kickoff)(struct dpu_encoder_phys *phys_enc); void (*trigger_start)(struct dpu_encoder_phys *phys_enc); @@ -153,8 +152,10 @@ enum dpu_intr_idx { * @hw_pp: Hardware interface to the ping pong registers * @hw_intf: Hardware interface to the intf registers * @hw_wb: Hardware interface to the wb registers + * @hw_cdm: Hardware interface to the CDM registers * @dpu_kms: Pointer to the dpu_kms top level * @cached_mode: DRM mode cached at mode_set time, acted on in enable + * @vblank_ctl_lock: Vblank ctl mutex lock to protect vblank_refcount * @enabled: Whether the encoder has enabled and running a mode * @split_role: Role to play in a split-panel configuration * @intf_mode: Interface mode @@ -181,13 +182,15 @@ struct dpu_encoder_phys { struct dpu_hw_pingpong *hw_pp; struct dpu_hw_intf *hw_intf; struct dpu_hw_wb *hw_wb; + struct dpu_hw_cdm *hw_cdm; struct dpu_kms *dpu_kms; struct drm_display_mode cached_mode; + struct mutex vblank_ctl_lock; enum dpu_enc_split_role split_role; enum dpu_intf_mode intf_mode; spinlock_t *enc_spinlock; enum dpu_enc_enable_state enable_state; - atomic_t vblank_refcount; + int vblank_refcount; atomic_t vsync_cnt; atomic_t underrun_cnt; atomic_t pending_ctlstart_cnt; @@ -210,6 +213,7 @@ static inline int dpu_encoder_phys_inc_pending(struct dpu_encoder_phys *phys) * @wbirq_refcount: Reference count of writeback interrupt * @wb_done_timeout_cnt: number of wb done irq timeout errors * @wb_cfg: writeback block config to store fb related details + * @cdm_cfg: cdm block config needed to store writeback block's CDM configuration * @wb_conn: backpointer to writeback connector * @wb_job: backpointer to current writeback job * @dest: dpu buffer layout for current writeback output buffer @@ -219,6 +223,7 @@ struct dpu_encoder_phys_wb { atomic_t wbirq_refcount; int wb_done_timeout_cnt; struct dpu_hw_wb_cfg wb_cfg; + struct dpu_hw_cdm_cfg cdm_cfg; struct drm_writeback_connector *wb_conn; struct drm_writeback_job *wb_job; struct dpu_hw_fmt_layout dest; @@ -281,22 +286,24 @@ struct dpu_encoder_wait_info { * @p: Pointer to init params structure * Return: Error code or newly allocated encoder */ -struct dpu_encoder_phys *dpu_encoder_phys_vid_init( +struct dpu_encoder_phys *dpu_encoder_phys_vid_init(struct drm_device *dev, struct dpu_enc_phys_init_params *p); /** * dpu_encoder_phys_cmd_init - Construct a new command mode physical encoder + * @dev: Corresponding device for devres management * @p: Pointer to init params structure * Return: Error code or newly allocated encoder */ -struct dpu_encoder_phys *dpu_encoder_phys_cmd_init( +struct dpu_encoder_phys *dpu_encoder_phys_cmd_init(struct drm_device *dev, struct dpu_enc_phys_init_params *p); /** * dpu_encoder_phys_wb_init - initialize writeback encoder + * @dev: Corresponding device for devres management * @init: Pointer to init info structure with initialization params */ -struct dpu_encoder_phys *dpu_encoder_phys_wb_init( +struct dpu_encoder_phys *dpu_encoder_phys_wb_init(struct drm_device *dev, struct dpu_enc_phys_init_params *p); /** diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c index be185fe69793..a301e2833177 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c @@ -13,6 +13,8 @@ #include "dpu_trace.h" #include "disp/msm_disp_snapshot.h" +#include <drm/drm_managed.h> + #define DPU_DEBUG_CMDENC(e, fmt, ...) DPU_DEBUG("enc%d intf%d " fmt, \ (e) && (e)->base.parent ? \ (e)->base.parent->base.id : -1, \ @@ -244,7 +246,8 @@ static int dpu_encoder_phys_cmd_control_vblank_irq( return -EINVAL; } - refcount = atomic_read(&phys_enc->vblank_refcount); + mutex_lock(&phys_enc->vblank_ctl_lock); + refcount = phys_enc->vblank_refcount; /* Slave encoders don't report vblank */ if (!dpu_encoder_phys_cmd_is_master(phys_enc)) @@ -260,16 +263,24 @@ static int dpu_encoder_phys_cmd_control_vblank_irq( phys_enc->hw_pp->idx - PINGPONG_0, enable ? "true" : "false", refcount); - if (enable && atomic_inc_return(&phys_enc->vblank_refcount) == 1) - ret = dpu_core_irq_register_callback(phys_enc->dpu_kms, - phys_enc->irq[INTR_IDX_RDPTR], - dpu_encoder_phys_cmd_te_rd_ptr_irq, - phys_enc); - else if (!enable && atomic_dec_return(&phys_enc->vblank_refcount) == 0) - ret = dpu_core_irq_unregister_callback(phys_enc->dpu_kms, - phys_enc->irq[INTR_IDX_RDPTR]); + if (enable) { + if (phys_enc->vblank_refcount == 0) + ret = dpu_core_irq_register_callback(phys_enc->dpu_kms, + phys_enc->irq[INTR_IDX_RDPTR], + dpu_encoder_phys_cmd_te_rd_ptr_irq, + phys_enc); + if (!ret) + phys_enc->vblank_refcount++; + } else if (!enable) { + if (phys_enc->vblank_refcount == 1) + ret = dpu_core_irq_unregister_callback(phys_enc->dpu_kms, + phys_enc->irq[INTR_IDX_RDPTR]); + if (!ret) + phys_enc->vblank_refcount--; + } end: + mutex_unlock(&phys_enc->vblank_ctl_lock); if (ret) { DRM_ERROR("vblank irq err id:%u pp:%d ret:%d, enable %s/%d\n", DRMID(phys_enc->parent), @@ -285,7 +296,7 @@ static void dpu_encoder_phys_cmd_irq_control(struct dpu_encoder_phys *phys_enc, { trace_dpu_enc_phys_cmd_irq_ctrl(DRMID(phys_enc->parent), phys_enc->hw_pp->idx - PINGPONG_0, - enable, atomic_read(&phys_enc->vblank_refcount)); + enable, phys_enc->vblank_refcount); if (enable) { dpu_core_irq_register_callback(phys_enc->dpu_kms, @@ -558,14 +569,6 @@ static void dpu_encoder_phys_cmd_disable(struct dpu_encoder_phys *phys_enc) phys_enc->enable_state = DPU_ENC_DISABLED; } -static void dpu_encoder_phys_cmd_destroy(struct dpu_encoder_phys *phys_enc) -{ - struct dpu_encoder_phys_cmd *cmd_enc = - to_dpu_encoder_phys_cmd(phys_enc); - - kfree(cmd_enc); -} - static void dpu_encoder_phys_cmd_prepare_for_kickoff( struct dpu_encoder_phys *phys_enc) { @@ -681,33 +684,6 @@ static int dpu_encoder_phys_cmd_wait_for_commit_done( return _dpu_encoder_phys_cmd_wait_for_ctl_start(phys_enc); } -static int dpu_encoder_phys_cmd_wait_for_vblank( - struct dpu_encoder_phys *phys_enc) -{ - int rc = 0; - struct dpu_encoder_phys_cmd *cmd_enc; - struct dpu_encoder_wait_info wait_info; - - cmd_enc = to_dpu_encoder_phys_cmd(phys_enc); - - /* only required for master controller */ - if (!dpu_encoder_phys_cmd_is_master(phys_enc)) - return rc; - - wait_info.wq = &cmd_enc->pending_vblank_wq; - wait_info.atomic_cnt = &cmd_enc->pending_vblank_cnt; - wait_info.timeout_ms = KICKOFF_TIMEOUT_MS; - - atomic_inc(&cmd_enc->pending_vblank_cnt); - - rc = dpu_encoder_helper_wait_for_irq(phys_enc, - phys_enc->irq[INTR_IDX_RDPTR], - dpu_encoder_phys_cmd_te_rd_ptr_irq, - &wait_info); - - return rc; -} - static void dpu_encoder_phys_cmd_handle_post_kickoff( struct dpu_encoder_phys *phys_enc) { @@ -731,12 +707,10 @@ static void dpu_encoder_phys_cmd_init_ops( ops->atomic_mode_set = dpu_encoder_phys_cmd_atomic_mode_set; ops->enable = dpu_encoder_phys_cmd_enable; ops->disable = dpu_encoder_phys_cmd_disable; - ops->destroy = dpu_encoder_phys_cmd_destroy; ops->control_vblank_irq = dpu_encoder_phys_cmd_control_vblank_irq; ops->wait_for_commit_done = dpu_encoder_phys_cmd_wait_for_commit_done; ops->prepare_for_kickoff = dpu_encoder_phys_cmd_prepare_for_kickoff; ops->wait_for_tx_complete = dpu_encoder_phys_cmd_wait_for_tx_complete; - ops->wait_for_vblank = dpu_encoder_phys_cmd_wait_for_vblank; ops->trigger_start = dpu_encoder_phys_cmd_trigger_start; ops->needs_single_flush = dpu_encoder_phys_cmd_needs_single_flush; ops->irq_control = dpu_encoder_phys_cmd_irq_control; @@ -746,7 +720,7 @@ static void dpu_encoder_phys_cmd_init_ops( ops->get_line_count = dpu_encoder_phys_cmd_get_line_count; } -struct dpu_encoder_phys *dpu_encoder_phys_cmd_init( +struct dpu_encoder_phys *dpu_encoder_phys_cmd_init(struct drm_device *dev, struct dpu_enc_phys_init_params *p) { struct dpu_encoder_phys *phys_enc = NULL; @@ -754,7 +728,7 @@ struct dpu_encoder_phys *dpu_encoder_phys_cmd_init( DPU_DEBUG("intf\n"); - cmd_enc = kzalloc(sizeof(*cmd_enc), GFP_KERNEL); + cmd_enc = drmm_kzalloc(dev, sizeof(*cmd_enc), GFP_KERNEL); if (!cmd_enc) { DPU_ERROR("failed to allocate\n"); return ERR_PTR(-ENOMEM); @@ -763,6 +737,9 @@ struct dpu_encoder_phys *dpu_encoder_phys_cmd_init( dpu_encoder_phys_init(phys_enc, p); + mutex_init(&phys_enc->vblank_ctl_lock); + phys_enc->vblank_refcount = 0; + dpu_encoder_phys_cmd_init_ops(&phys_enc->ops); phys_enc->intf_mode = INTF_MODE_CMD; cmd_enc->stream_sel = 0; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c index a01fda711883..d0f56c5c4cce 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c @@ -11,6 +11,8 @@ #include "dpu_trace.h" #include "disp/msm_disp_snapshot.h" +#include <drm/drm_managed.h> + #define DPU_DEBUG_VIDENC(e, fmt, ...) DPU_DEBUG("enc%d intf%d " fmt, \ (e) && (e)->parent ? \ (e)->parent->base.id : -1, \ @@ -364,7 +366,8 @@ static int dpu_encoder_phys_vid_control_vblank_irq( int ret = 0; int refcount; - refcount = atomic_read(&phys_enc->vblank_refcount); + mutex_lock(&phys_enc->vblank_ctl_lock); + refcount = phys_enc->vblank_refcount; /* Slave encoders don't report vblank */ if (!dpu_encoder_phys_vid_is_master(phys_enc)) @@ -377,18 +380,26 @@ static int dpu_encoder_phys_vid_control_vblank_irq( } DRM_DEBUG_VBL("id:%u enable=%d/%d\n", DRMID(phys_enc->parent), enable, - atomic_read(&phys_enc->vblank_refcount)); + refcount); - if (enable && atomic_inc_return(&phys_enc->vblank_refcount) == 1) - ret = dpu_core_irq_register_callback(phys_enc->dpu_kms, - phys_enc->irq[INTR_IDX_VSYNC], - dpu_encoder_phys_vid_vblank_irq, - phys_enc); - else if (!enable && atomic_dec_return(&phys_enc->vblank_refcount) == 0) - ret = dpu_core_irq_unregister_callback(phys_enc->dpu_kms, - phys_enc->irq[INTR_IDX_VSYNC]); + if (enable) { + if (phys_enc->vblank_refcount == 0) + ret = dpu_core_irq_register_callback(phys_enc->dpu_kms, + phys_enc->irq[INTR_IDX_VSYNC], + dpu_encoder_phys_vid_vblank_irq, + phys_enc); + if (!ret) + phys_enc->vblank_refcount++; + } else if (!enable) { + if (phys_enc->vblank_refcount == 1) + ret = dpu_core_irq_unregister_callback(phys_enc->dpu_kms, + phys_enc->irq[INTR_IDX_VSYNC]); + if (!ret) + phys_enc->vblank_refcount--; + } end: + mutex_unlock(&phys_enc->vblank_ctl_lock); if (ret) { DRM_ERROR("failed: id:%u intf:%d ret:%d enable:%d refcnt:%d\n", DRMID(phys_enc->parent), @@ -438,13 +449,7 @@ skip_flush: phys_enc->enable_state = DPU_ENC_ENABLING; } -static void dpu_encoder_phys_vid_destroy(struct dpu_encoder_phys *phys_enc) -{ - DPU_DEBUG_VIDENC(phys_enc, "\n"); - kfree(phys_enc); -} - -static int dpu_encoder_phys_vid_wait_for_vblank( +static int dpu_encoder_phys_vid_wait_for_tx_complete( struct dpu_encoder_phys *phys_enc) { struct dpu_encoder_wait_info wait_info; @@ -558,7 +563,7 @@ static void dpu_encoder_phys_vid_disable(struct dpu_encoder_phys *phys_enc) * scanout buffer) don't latch properly.. */ if (dpu_encoder_phys_vid_is_master(phys_enc)) { - ret = dpu_encoder_phys_vid_wait_for_vblank(phys_enc); + ret = dpu_encoder_phys_vid_wait_for_tx_complete(phys_enc); if (ret) { atomic_set(&phys_enc->pending_kickoff_cnt, 0); DRM_ERROR("wait disable failed: id:%u intf:%d ret:%d\n", @@ -578,7 +583,7 @@ static void dpu_encoder_phys_vid_disable(struct dpu_encoder_phys *phys_enc) spin_lock_irqsave(phys_enc->enc_spinlock, lock_flags); dpu_encoder_phys_inc_pending(phys_enc); spin_unlock_irqrestore(phys_enc->enc_spinlock, lock_flags); - ret = dpu_encoder_phys_vid_wait_for_vblank(phys_enc); + ret = dpu_encoder_phys_vid_wait_for_tx_complete(phys_enc); if (ret) { atomic_set(&phys_enc->pending_kickoff_cnt, 0); DRM_ERROR("wait disable failed: id:%u intf:%d ret:%d\n", @@ -618,7 +623,7 @@ static void dpu_encoder_phys_vid_irq_control(struct dpu_encoder_phys *phys_enc, trace_dpu_enc_phys_vid_irq_ctrl(DRMID(phys_enc->parent), phys_enc->hw_intf->idx - INTF_0, enable, - atomic_read(&phys_enc->vblank_refcount)); + phys_enc->vblank_refcount); if (enable) { ret = dpu_encoder_phys_vid_control_vblank_irq(phys_enc, true); @@ -681,11 +686,9 @@ static void dpu_encoder_phys_vid_init_ops(struct dpu_encoder_phys_ops *ops) ops->atomic_mode_set = dpu_encoder_phys_vid_atomic_mode_set; ops->enable = dpu_encoder_phys_vid_enable; ops->disable = dpu_encoder_phys_vid_disable; - ops->destroy = dpu_encoder_phys_vid_destroy; ops->control_vblank_irq = dpu_encoder_phys_vid_control_vblank_irq; ops->wait_for_commit_done = dpu_encoder_phys_vid_wait_for_commit_done; - ops->wait_for_vblank = dpu_encoder_phys_vid_wait_for_vblank; - ops->wait_for_tx_complete = dpu_encoder_phys_vid_wait_for_vblank; + ops->wait_for_tx_complete = dpu_encoder_phys_vid_wait_for_tx_complete; ops->irq_control = dpu_encoder_phys_vid_irq_control; ops->prepare_for_kickoff = dpu_encoder_phys_vid_prepare_for_kickoff; ops->handle_post_kickoff = dpu_encoder_phys_vid_handle_post_kickoff; @@ -694,7 +697,7 @@ static void dpu_encoder_phys_vid_init_ops(struct dpu_encoder_phys_ops *ops) ops->get_frame_count = dpu_encoder_phys_vid_get_frame_count; } -struct dpu_encoder_phys *dpu_encoder_phys_vid_init( +struct dpu_encoder_phys *dpu_encoder_phys_vid_init(struct drm_device *dev, struct dpu_enc_phys_init_params *p) { struct dpu_encoder_phys *phys_enc = NULL; @@ -704,7 +707,7 @@ struct dpu_encoder_phys *dpu_encoder_phys_vid_init( return ERR_PTR(-EINVAL); } - phys_enc = kzalloc(sizeof(*phys_enc), GFP_KERNEL); + phys_enc = drmm_kzalloc(dev, sizeof(*phys_enc), GFP_KERNEL); if (!phys_enc) { DPU_ERROR("failed to create encoder due to memory allocation error\n"); return ERR_PTR(-ENOMEM); @@ -713,6 +716,8 @@ struct dpu_encoder_phys *dpu_encoder_phys_vid_init( DPU_DEBUG_VIDENC(phys_enc, "\n"); dpu_encoder_phys_init(phys_enc, p); + mutex_init(&phys_enc->vblank_ctl_lock); + phys_enc->vblank_refcount = 0; dpu_encoder_phys_vid_init_ops(&phys_enc->ops); phys_enc->intf_mode = INTF_MODE_VIDEO; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c index 0b6a761d68b7..4cd2d9e3131a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c @@ -8,6 +8,7 @@ #include <linux/debugfs.h> #include <drm/drm_framebuffer.h> +#include <drm/drm_managed.h> #include "dpu_encoder_phys.h" #include "dpu_formats.h" @@ -206,13 +207,14 @@ static void dpu_encoder_phys_wb_setup_fb(struct dpu_encoder_phys *phys_enc, } /** - * dpu_encoder_phys_wb_setup_cdp - setup chroma down prefetch block + * dpu_encoder_phys_wb_setup_ctl - setup wb pipeline for ctl path * @phys_enc:Pointer to physical encoder */ -static void dpu_encoder_phys_wb_setup_cdp(struct dpu_encoder_phys *phys_enc) +static void dpu_encoder_phys_wb_setup_ctl(struct dpu_encoder_phys *phys_enc) { struct dpu_hw_wb *hw_wb; struct dpu_hw_ctl *ctl; + struct dpu_hw_cdm *hw_cdm; if (!phys_enc) { DPU_ERROR("invalid encoder\n"); @@ -221,6 +223,7 @@ static void dpu_encoder_phys_wb_setup_cdp(struct dpu_encoder_phys *phys_enc) hw_wb = phys_enc->hw_wb; ctl = phys_enc->hw_ctl; + hw_cdm = phys_enc->hw_cdm; if (test_bit(DPU_CTL_ACTIVE_CFG, &ctl->caps->features) && (phys_enc->hw_ctl && @@ -237,6 +240,9 @@ static void dpu_encoder_phys_wb_setup_cdp(struct dpu_encoder_phys *phys_enc) if (mode_3d && hw_pp && hw_pp->merge_3d) intf_cfg.merge_3d = hw_pp->merge_3d->idx; + if (hw_cdm) + intf_cfg.cdm = hw_cdm->idx; + if (phys_enc->hw_pp->merge_3d && phys_enc->hw_pp->merge_3d->ops.setup_3d_mode) phys_enc->hw_pp->merge_3d->ops.setup_3d_mode(phys_enc->hw_pp->merge_3d, mode_3d); @@ -259,6 +265,96 @@ static void dpu_encoder_phys_wb_setup_cdp(struct dpu_encoder_phys *phys_enc) } /** + * dpu_encoder_helper_phys_setup_cdm - setup chroma down sampling block + * This API does not handle DPU_CHROMA_H1V2. + * @phys_enc:Pointer to physical encoder + */ +static void dpu_encoder_helper_phys_setup_cdm(struct dpu_encoder_phys *phys_enc) +{ + struct dpu_hw_cdm *hw_cdm; + struct dpu_hw_cdm_cfg *cdm_cfg; + struct dpu_hw_pingpong *hw_pp; + struct dpu_encoder_phys_wb *wb_enc; + const struct msm_format *format; + const struct dpu_format *dpu_fmt; + struct drm_writeback_job *wb_job; + int ret; + + if (!phys_enc) + return; + + wb_enc = to_dpu_encoder_phys_wb(phys_enc); + cdm_cfg = &wb_enc->cdm_cfg; + hw_pp = phys_enc->hw_pp; + hw_cdm = phys_enc->hw_cdm; + wb_job = wb_enc->wb_job; + + format = msm_framebuffer_format(wb_enc->wb_job->fb); + dpu_fmt = dpu_get_dpu_format_ext(format->pixel_format, wb_job->fb->modifier); + + if (!hw_cdm) + return; + + if (!DPU_FORMAT_IS_YUV(dpu_fmt)) { + DPU_DEBUG("[enc:%d] cdm_disable fmt:%x\n", DRMID(phys_enc->parent), + dpu_fmt->base.pixel_format); + if (hw_cdm->ops.bind_pingpong_blk) + hw_cdm->ops.bind_pingpong_blk(hw_cdm, PINGPONG_NONE); + + return; + } + + memset(cdm_cfg, 0, sizeof(struct dpu_hw_cdm_cfg)); + + cdm_cfg->output_width = wb_job->fb->width; + cdm_cfg->output_height = wb_job->fb->height; + cdm_cfg->output_fmt = dpu_fmt; + cdm_cfg->output_type = CDM_CDWN_OUTPUT_WB; + cdm_cfg->output_bit_depth = DPU_FORMAT_IS_DX(dpu_fmt) ? + CDM_CDWN_OUTPUT_10BIT : CDM_CDWN_OUTPUT_8BIT; + cdm_cfg->csc_cfg = &dpu_csc10_rgb2yuv_601l; + + /* enable 10 bit logic */ + switch (cdm_cfg->output_fmt->chroma_sample) { + case DPU_CHROMA_RGB: + cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE; + cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE; + break; + case DPU_CHROMA_H2V1: + cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE; + cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE; + break; + case DPU_CHROMA_420: + cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE; + cdm_cfg->v_cdwn_type = CDM_CDWN_OFFSITE; + break; + case DPU_CHROMA_H1V2: + default: + DPU_ERROR("[enc:%d] unsupported chroma sampling type\n", + DRMID(phys_enc->parent)); + cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE; + cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE; + break; + } + + DPU_DEBUG("[enc:%d] cdm_enable:%d,%d,%X,%d,%d,%d,%d]\n", + DRMID(phys_enc->parent), cdm_cfg->output_width, + cdm_cfg->output_height, cdm_cfg->output_fmt->base.pixel_format, + cdm_cfg->output_type, cdm_cfg->output_bit_depth, + cdm_cfg->h_cdwn_type, cdm_cfg->v_cdwn_type); + + if (hw_cdm->ops.enable) { + cdm_cfg->pp_id = hw_pp->idx; + ret = hw_cdm->ops.enable(hw_cdm, cdm_cfg); + if (ret < 0) { + DPU_ERROR("[enc:%d] failed to enable CDM; ret:%d\n", + DRMID(phys_enc->parent), ret); + return; + } + } +} + +/** * dpu_encoder_phys_wb_atomic_check - verify and fixup given atomic states * @phys_enc: Pointer to physical encoder * @crtc_state: Pointer to CRTC atomic state @@ -307,7 +403,7 @@ static int dpu_encoder_phys_wb_atomic_check( return -EINVAL; } - return 0; + return drm_atomic_helper_check_wb_connector_state(conn_state->connector, conn_state->state); } @@ -320,6 +416,7 @@ static void _dpu_encoder_phys_wb_update_flush(struct dpu_encoder_phys *phys_enc) struct dpu_hw_wb *hw_wb; struct dpu_hw_ctl *hw_ctl; struct dpu_hw_pingpong *hw_pp; + struct dpu_hw_cdm *hw_cdm; u32 pending_flush = 0; if (!phys_enc) @@ -328,6 +425,7 @@ static void _dpu_encoder_phys_wb_update_flush(struct dpu_encoder_phys *phys_enc) hw_wb = phys_enc->hw_wb; hw_pp = phys_enc->hw_pp; hw_ctl = phys_enc->hw_ctl; + hw_cdm = phys_enc->hw_cdm; DPU_DEBUG("[wb:%d]\n", hw_wb->idx - WB_0); @@ -343,6 +441,9 @@ static void _dpu_encoder_phys_wb_update_flush(struct dpu_encoder_phys *phys_enc) hw_ctl->ops.update_pending_flush_merge_3d(hw_ctl, hw_pp->merge_3d->idx); + if (hw_cdm && hw_ctl->ops.update_pending_flush_cdm) + hw_ctl->ops.update_pending_flush_cdm(hw_ctl, hw_cdm->idx); + if (hw_ctl->ops.get_pending_flush) pending_flush = hw_ctl->ops.get_pending_flush(hw_ctl); @@ -374,8 +475,9 @@ static void dpu_encoder_phys_wb_setup( dpu_encoder_phys_wb_setup_fb(phys_enc, fb); - dpu_encoder_phys_wb_setup_cdp(phys_enc); + dpu_encoder_helper_phys_setup_cdm(phys_enc); + dpu_encoder_phys_wb_setup_ctl(phys_enc); } /** @@ -580,20 +682,6 @@ static void dpu_encoder_phys_wb_disable(struct dpu_encoder_phys *phys_enc) phys_enc->enable_state = DPU_ENC_DISABLED; } -/** - * dpu_encoder_phys_wb_destroy - destroy writeback encoder - * @phys_enc: Pointer to physical encoder - */ -static void dpu_encoder_phys_wb_destroy(struct dpu_encoder_phys *phys_enc) -{ - if (!phys_enc) - return; - - DPU_DEBUG("[wb:%d]\n", phys_enc->hw_wb->idx - WB_0); - - kfree(phys_enc); -} - static void dpu_encoder_phys_wb_prepare_wb_job(struct dpu_encoder_phys *phys_enc, struct drm_writeback_job *job) { @@ -689,7 +777,6 @@ static void dpu_encoder_phys_wb_init_ops(struct dpu_encoder_phys_ops *ops) ops->atomic_mode_set = dpu_encoder_phys_wb_atomic_mode_set; ops->enable = dpu_encoder_phys_wb_enable; ops->disable = dpu_encoder_phys_wb_disable; - ops->destroy = dpu_encoder_phys_wb_destroy; ops->atomic_check = dpu_encoder_phys_wb_atomic_check; ops->wait_for_commit_done = dpu_encoder_phys_wb_wait_for_commit_done; ops->prepare_for_kickoff = dpu_encoder_phys_wb_prepare_for_kickoff; @@ -705,9 +792,10 @@ static void dpu_encoder_phys_wb_init_ops(struct dpu_encoder_phys_ops *ops) /** * dpu_encoder_phys_wb_init - initialize writeback encoder + * @dev: Corresponding device for devres management * @p: Pointer to init info structure with initialization params */ -struct dpu_encoder_phys *dpu_encoder_phys_wb_init( +struct dpu_encoder_phys *dpu_encoder_phys_wb_init(struct drm_device *dev, struct dpu_enc_phys_init_params *p) { struct dpu_encoder_phys *phys_enc = NULL; @@ -720,7 +808,7 @@ struct dpu_encoder_phys *dpu_encoder_phys_wb_init( return ERR_PTR(-EINVAL); } - wb_enc = kzalloc(sizeof(*wb_enc), GFP_KERNEL); + wb_enc = drmm_kzalloc(dev, sizeof(*wb_enc), GFP_KERNEL); if (!wb_enc) { DPU_ERROR("failed to allocate wb phys_enc enc\n"); return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index a1aada630780..54e8717403a0 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -22,23 +22,14 @@ BIT(DPU_SSPP_CSC_10BIT)) #define VIG_MSM8998_MASK \ - (VIG_MASK | BIT(DPU_SSPP_SCALER_QSEED3)) + (VIG_MASK | BIT(DPU_SSPP_SCALER_QSEED3_COMPATIBLE)) #define VIG_SDM845_MASK \ - (VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3)) + (VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3_COMPATIBLE)) #define VIG_SDM845_MASK_SDMA \ (VIG_SDM845_MASK | BIT(DPU_SSPP_SMART_DMA_V2)) -#define VIG_SC7180_MASK \ - (VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED4)) - -#define VIG_SM6125_MASK \ - (VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3LITE)) - -#define VIG_SC7180_MASK_SDMA \ - (VIG_SC7180_MASK | BIT(DPU_SSPP_SMART_DMA_V2)) - #define VIG_QCM2290_MASK (VIG_BASE_MASK | BIT(DPU_SSPP_QOS_8LVL)) #define DMA_MSM8998_MASK \ @@ -47,7 +38,7 @@ BIT(DPU_SSPP_CDP) | BIT(DPU_SSPP_EXCL_RECT)) #define VIG_SC7280_MASK \ - (VIG_SC7180_MASK | BIT(DPU_SSPP_INLINE_ROTATION)) + (VIG_SDM845_MASK | BIT(DPU_SSPP_INLINE_ROTATION)) #define VIG_SC7280_MASK_SDMA \ (VIG_SC7280_MASK | BIT(DPU_SSPP_SMART_DMA_V2)) @@ -211,7 +202,7 @@ static const u32 rotation_v2_formats[] = { /* TODO add formats after validation */ }; -static const uint32_t wb2_formats[] = { +static const u32 wb2_formats_rgb[] = { DRM_FORMAT_RGB565, DRM_FORMAT_BGR565, DRM_FORMAT_RGB888, @@ -245,21 +236,56 @@ static const uint32_t wb2_formats[] = { DRM_FORMAT_XBGR4444, }; +static const u32 wb2_formats_rgb_yuv[] = { + DRM_FORMAT_RGB565, + DRM_FORMAT_BGR565, + DRM_FORMAT_RGB888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_RGBX8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ARGB1555, + DRM_FORMAT_RGBA5551, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_RGBX5551, + DRM_FORMAT_ARGB4444, + DRM_FORMAT_RGBA4444, + DRM_FORMAT_RGBX4444, + DRM_FORMAT_XRGB4444, + DRM_FORMAT_BGR565, + DRM_FORMAT_BGR888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_BGRA8888, + DRM_FORMAT_BGRX8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ABGR1555, + DRM_FORMAT_BGRA5551, + DRM_FORMAT_XBGR1555, + DRM_FORMAT_BGRX5551, + DRM_FORMAT_ABGR4444, + DRM_FORMAT_BGRA4444, + DRM_FORMAT_BGRX4444, + DRM_FORMAT_XBGR4444, + DRM_FORMAT_NV12, +}; + /************************************************************* * SSPP sub blocks config *************************************************************/ +#define SSPP_SCALER_VER(maj, min) (((maj) << 16) | (min)) + /* SSPP common configuration */ -#define _VIG_SBLK(sdma_pri, qseed_ver) \ +#define _VIG_SBLK(scaler_ver) \ { \ .maxdwnscale = MAX_DOWNSCALE_RATIO, \ .maxupscale = MAX_UPSCALE_RATIO, \ - .smart_dma_priority = sdma_pri, \ .scaler_blk = {.name = "scaler", \ - .id = qseed_ver, \ + .version = scaler_ver, \ .base = 0xa00, .len = 0xa0,}, \ .csc_blk = {.name = "csc", \ - .id = DPU_SSPP_CSC_10BIT, \ .base = 0x1a00, .len = 0x100,}, \ .format_list = plane_formats_yuv, \ .num_formats = ARRAY_SIZE(plane_formats_yuv), \ @@ -268,16 +294,14 @@ static const uint32_t wb2_formats[] = { .rotation_cfg = NULL, \ } -#define _VIG_SBLK_ROT(sdma_pri, qseed_ver, rot_cfg) \ +#define _VIG_SBLK_ROT(scaler_ver, rot_cfg) \ { \ .maxdwnscale = MAX_DOWNSCALE_RATIO, \ .maxupscale = MAX_UPSCALE_RATIO, \ - .smart_dma_priority = sdma_pri, \ .scaler_blk = {.name = "scaler", \ - .id = qseed_ver, \ + .version = scaler_ver, \ .base = 0xa00, .len = 0xa0,}, \ .csc_blk = {.name = "csc", \ - .id = DPU_SSPP_CSC_10BIT, \ .base = 0x1a00, .len = 0x100,}, \ .format_list = plane_formats_yuv, \ .num_formats = ARRAY_SIZE(plane_formats_yuv), \ @@ -286,91 +310,64 @@ static const uint32_t wb2_formats[] = { .rotation_cfg = rot_cfg, \ } -#define _DMA_SBLK(sdma_pri) \ +#define _VIG_SBLK_NOSCALE() \ + { \ + .maxdwnscale = SSPP_UNITY_SCALE, \ + .maxupscale = SSPP_UNITY_SCALE, \ + .format_list = plane_formats_yuv, \ + .num_formats = ARRAY_SIZE(plane_formats_yuv), \ + .virt_format_list = plane_formats, \ + .virt_num_formats = ARRAY_SIZE(plane_formats), \ + } + +#define _DMA_SBLK() \ { \ .maxdwnscale = SSPP_UNITY_SCALE, \ .maxupscale = SSPP_UNITY_SCALE, \ - .smart_dma_priority = sdma_pri, \ .format_list = plane_formats, \ .num_formats = ARRAY_SIZE(plane_formats), \ .virt_format_list = plane_formats, \ .virt_num_formats = ARRAY_SIZE(plane_formats), \ } -static const struct dpu_sspp_sub_blks msm8998_vig_sblk_0 = - _VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3); -static const struct dpu_sspp_sub_blks msm8998_vig_sblk_1 = - _VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3); -static const struct dpu_sspp_sub_blks msm8998_vig_sblk_2 = - _VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3); -static const struct dpu_sspp_sub_blks msm8998_vig_sblk_3 = - _VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3); - static const struct dpu_rotation_cfg dpu_rot_sc7280_cfg_v2 = { .rot_maxheight = 1088, .rot_num_formats = ARRAY_SIZE(rotation_v2_formats), .rot_format_list = rotation_v2_formats, }; -static const struct dpu_sspp_sub_blks sdm845_vig_sblk_0 = - _VIG_SBLK(5, DPU_SSPP_SCALER_QSEED3); -static const struct dpu_sspp_sub_blks sdm845_vig_sblk_1 = - _VIG_SBLK(6, DPU_SSPP_SCALER_QSEED3); -static const struct dpu_sspp_sub_blks sdm845_vig_sblk_2 = - _VIG_SBLK(7, DPU_SSPP_SCALER_QSEED3); -static const struct dpu_sspp_sub_blks sdm845_vig_sblk_3 = - _VIG_SBLK(8, DPU_SSPP_SCALER_QSEED3); - -static const struct dpu_sspp_sub_blks sdm845_dma_sblk_0 = _DMA_SBLK(1); -static const struct dpu_sspp_sub_blks sdm845_dma_sblk_1 = _DMA_SBLK(2); -static const struct dpu_sspp_sub_blks sdm845_dma_sblk_2 = _DMA_SBLK(3); -static const struct dpu_sspp_sub_blks sdm845_dma_sblk_3 = _DMA_SBLK(4); - -static const struct dpu_sspp_sub_blks sc7180_vig_sblk_0 = - _VIG_SBLK(4, DPU_SSPP_SCALER_QSEED4); - -static const struct dpu_sspp_sub_blks sc7280_vig_sblk_0 = - _VIG_SBLK_ROT(4, DPU_SSPP_SCALER_QSEED4, &dpu_rot_sc7280_cfg_v2); - -static const struct dpu_sspp_sub_blks sm6115_vig_sblk_0 = - _VIG_SBLK(2, DPU_SSPP_SCALER_QSEED4); - -static const struct dpu_sspp_sub_blks sm6125_vig_sblk_0 = - _VIG_SBLK(3, DPU_SSPP_SCALER_QSEED3LITE); - -static const struct dpu_sspp_sub_blks sm8250_vig_sblk_0 = - _VIG_SBLK(5, DPU_SSPP_SCALER_QSEED4); -static const struct dpu_sspp_sub_blks sm8250_vig_sblk_1 = - _VIG_SBLK(6, DPU_SSPP_SCALER_QSEED4); -static const struct dpu_sspp_sub_blks sm8250_vig_sblk_2 = - _VIG_SBLK(7, DPU_SSPP_SCALER_QSEED4); -static const struct dpu_sspp_sub_blks sm8250_vig_sblk_3 = - _VIG_SBLK(8, DPU_SSPP_SCALER_QSEED4); - -static const struct dpu_sspp_sub_blks sm8550_vig_sblk_0 = - _VIG_SBLK(7, DPU_SSPP_SCALER_QSEED4); -static const struct dpu_sspp_sub_blks sm8550_vig_sblk_1 = - _VIG_SBLK(8, DPU_SSPP_SCALER_QSEED4); -static const struct dpu_sspp_sub_blks sm8550_vig_sblk_2 = - _VIG_SBLK(9, DPU_SSPP_SCALER_QSEED4); -static const struct dpu_sspp_sub_blks sm8550_vig_sblk_3 = - _VIG_SBLK(10, DPU_SSPP_SCALER_QSEED4); -static const struct dpu_sspp_sub_blks sm8550_dma_sblk_4 = _DMA_SBLK(5); -static const struct dpu_sspp_sub_blks sm8550_dma_sblk_5 = _DMA_SBLK(6); - -#define _VIG_SBLK_NOSCALE(sdma_pri) \ - { \ - .maxdwnscale = SSPP_UNITY_SCALE, \ - .maxupscale = SSPP_UNITY_SCALE, \ - .smart_dma_priority = sdma_pri, \ - .format_list = plane_formats_yuv, \ - .num_formats = ARRAY_SIZE(plane_formats_yuv), \ - .virt_format_list = plane_formats, \ - .virt_num_formats = ARRAY_SIZE(plane_formats), \ - } +static const struct dpu_sspp_sub_blks dpu_vig_sblk_noscale = + _VIG_SBLK_NOSCALE(); + +static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_1_2 = + _VIG_SBLK(SSPP_SCALER_VER(1, 2)); + +static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_1_3 = + _VIG_SBLK(SSPP_SCALER_VER(1, 3)); + +static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_1_4 = + _VIG_SBLK(SSPP_SCALER_VER(1, 4)); + +static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_2_4 = + _VIG_SBLK(SSPP_SCALER_VER(2, 4)); + +static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_0 = + _VIG_SBLK(SSPP_SCALER_VER(3, 0)); + +static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_0_rot_v2 = + _VIG_SBLK_ROT(SSPP_SCALER_VER(3, 0), + &dpu_rot_sc7280_cfg_v2); + +static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_1 = + _VIG_SBLK(SSPP_SCALER_VER(3, 1)); + +static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_2 = + _VIG_SBLK(SSPP_SCALER_VER(3, 2)); + +static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_3 = + _VIG_SBLK(SSPP_SCALER_VER(3, 3)); -static const struct dpu_sspp_sub_blks qcm2290_vig_sblk_0 = _VIG_SBLK_NOSCALE(2); -static const struct dpu_sspp_sub_blks qcm2290_dma_sblk_0 = _DMA_SBLK(1); +static const struct dpu_sspp_sub_blks dpu_dma_sblk = _DMA_SBLK(); /************************************************************* * MIXER sub blocks config @@ -422,12 +419,12 @@ static const struct dpu_lm_sub_blks qcm2290_lm_sblk = { * DSPP sub blocks config *************************************************************/ static const struct dpu_dspp_sub_blks msm8998_dspp_sblk = { - .pcc = {.name = "pcc", .id = DPU_DSPP_PCC, .base = 0x1700, + .pcc = {.name = "pcc", .base = 0x1700, .len = 0x90, .version = 0x10007}, }; static const struct dpu_dspp_sub_blks sdm845_dspp_sblk = { - .pcc = {.name = "pcc", .id = DPU_DSPP_PCC, .base = 0x1700, + .pcc = {.name = "pcc", .base = 0x1700, .len = 0x90, .version = 0x40000}, }; @@ -435,19 +432,19 @@ static const struct dpu_dspp_sub_blks sdm845_dspp_sblk = { * PINGPONG sub blocks config *************************************************************/ static const struct dpu_pingpong_sub_blks sdm845_pp_sblk_te = { - .te2 = {.name = "te2", .id = DPU_PINGPONG_TE2, .base = 0x2000, .len = 0x0, + .te2 = {.name = "te2", .base = 0x2000, .len = 0x0, .version = 0x1}, - .dither = {.name = "dither", .id = DPU_PINGPONG_DITHER, .base = 0x30e0, + .dither = {.name = "dither", .base = 0x30e0, .len = 0x20, .version = 0x10000}, }; static const struct dpu_pingpong_sub_blks sdm845_pp_sblk = { - .dither = {.name = "dither", .id = DPU_PINGPONG_DITHER, .base = 0x30e0, + .dither = {.name = "dither", .base = 0x30e0, .len = 0x20, .version = 0x10000}, }; static const struct dpu_pingpong_sub_blks sc7280_pp_sblk = { - .dither = {.name = "dither", .id = DPU_PINGPONG_DITHER, .base = 0xe0, + .dither = {.name = "dither", .base = 0xe0, .len = 0x20, .version = 0x20000}, }; @@ -465,6 +462,16 @@ static const struct dpu_dsc_sub_blks dsc_sblk_1 = { }; /************************************************************* + * CDM block config + *************************************************************/ +static const struct dpu_cdm_cfg sc7280_cdm = { + .name = "cdm_0", + .id = CDM_0, + .len = 0x228, + .base = 0x79200, +}; + +/************************************************************* * VBIF sub blocks config *************************************************************/ /* VBIF QOS remap */ @@ -472,6 +479,7 @@ static const u32 msm8998_rt_pri_lvl[] = {1, 2, 2, 2}; static const u32 msm8998_nrt_pri_lvl[] = {1, 1, 1, 1}; static const u32 sdm845_rt_pri_lvl[] = {3, 3, 4, 4, 5, 5, 6, 6}; static const u32 sdm845_nrt_pri_lvl[] = {3, 3, 3, 3, 3, 3, 3, 3}; +static const u32 sm8650_rt_pri_lvl[] = {4, 4, 5, 5, 5, 5, 5, 6}; static const struct dpu_vbif_dynamic_ot_cfg msm8998_ot_rdwr_cfg[] = { { @@ -558,6 +566,26 @@ static const struct dpu_vbif_cfg sm8550_vbif[] = { }, }; +static const struct dpu_vbif_cfg sm8650_vbif[] = { + { + .name = "vbif_rt", .id = VBIF_RT, + .base = 0, .len = 0x1074, + .features = BIT(DPU_VBIF_QOS_REMAP), + .xin_halt_timeout = 0x4000, + .qos_rp_remap_size = 0x40, + .qos_rt_tbl = { + .npriority_lvl = ARRAY_SIZE(sm8650_rt_pri_lvl), + .priority_lvl = sm8650_rt_pri_lvl, + }, + .qos_nrt_tbl = { + .npriority_lvl = ARRAY_SIZE(sdm845_nrt_pri_lvl), + .priority_lvl = sdm845_nrt_pri_lvl, + }, + .memtype_count = 16, + .memtype = {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}, + }, +}; + /************************************************************* * PERF data config *************************************************************/ @@ -654,6 +682,7 @@ static const struct dpu_qos_lut_entry sc7180_qos_nrt[] = { #include "catalog/dpu_3_0_msm8998.h" #include "catalog/dpu_4_0_sdm845.h" +#include "catalog/dpu_4_1_sdm670.h" #include "catalog/dpu_5_0_sm8150.h" #include "catalog/dpu_5_1_sc8180x.h" @@ -673,3 +702,5 @@ static const struct dpu_qos_lut_entry sc7180_qos_nrt[] = { #include "catalog/dpu_8_1_sm8450.h" #include "catalog/dpu_9_0_sm8550.h" + +#include "catalog/dpu_10_0_sm8650.h" diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h index df024e10d3a3..ba82ef4560a6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h @@ -51,9 +51,7 @@ enum { /** * SSPP sub-blocks/features * @DPU_SSPP_SCALER_QSEED2, QSEED2 algorithm support - * @DPU_SSPP_SCALER_QSEED3, QSEED3 alogorithm support - * @DPU_SSPP_SCALER_QSEED3LITE, QSEED3 Lite alogorithm support - * @DPU_SSPP_SCALER_QSEED4, QSEED4 algorithm support + * @DPU_SSPP_SCALER_QSEED3_COMPATIBLE, QSEED3-compatible alogorithm support (includes QSEED3, QSEED3LITE and QSEED4) * @DPU_SSPP_SCALER_RGB, RGB Scaler, supported by RGB pipes * @DPU_SSPP_CSC, Support of Color space converion * @DPU_SSPP_CSC_10BIT, Support of 10-bit Color space conversion @@ -71,9 +69,7 @@ enum { */ enum { DPU_SSPP_SCALER_QSEED2 = 0x1, - DPU_SSPP_SCALER_QSEED3, - DPU_SSPP_SCALER_QSEED3LITE, - DPU_SSPP_SCALER_QSEED4, + DPU_SSPP_SCALER_QSEED3_COMPATIBLE, DPU_SSPP_SCALER_RGB, DPU_SSPP_CSC, DPU_SSPP_CSC_10BIT, @@ -249,49 +245,50 @@ enum { unsigned long features /** - * MACRO DPU_HW_SUBBLK_INFO - information of HW sub-block inside DPU - * @name: string name for debug purposes - * @id: enum identifying this sub-block - * @base: offset of this sub-block relative to the block - * offset - * @len register block length of this sub-block - */ -#define DPU_HW_SUBBLK_INFO \ - char name[DPU_HW_BLK_NAME_LEN]; \ - u32 id; \ - u32 base; \ - u32 len - -/** * struct dpu_scaler_blk: Scaler information - * @info: HW register and features supported by this sub-blk - * @version: qseed block revision + * @name: string name for debug purposes + * @base: offset of this sub-block relative to the block offset + * @len: register block length of this sub-block + * @version: qseed block revision, on QSEED3+ platforms this is the value of + * scaler_blk.base + QSEED3_HW_VERSION registers. */ struct dpu_scaler_blk { - DPU_HW_SUBBLK_INFO; + char name[DPU_HW_BLK_NAME_LEN]; + u32 base; + u32 len; u32 version; }; struct dpu_csc_blk { - DPU_HW_SUBBLK_INFO; + char name[DPU_HW_BLK_NAME_LEN]; + u32 base; + u32 len; }; /** * struct dpu_pp_blk : Pixel processing sub-blk information - * @info: HW register and features supported by this sub-blk + * @name: string name for debug purposes + * @base: offset of this sub-block relative to the block offset + * @len: register block length of this sub-block * @version: HW Algorithm version */ struct dpu_pp_blk { - DPU_HW_SUBBLK_INFO; + char name[DPU_HW_BLK_NAME_LEN]; + u32 base; + u32 len; u32 version; }; /** * struct dpu_dsc_blk - DSC Encoder sub-blk information - * @info: HW register and features supported by this sub-blk + * @name: string name for debug purposes + * @base: offset of this sub-block relative to the block offset + * @len: register block length of this sub-block */ struct dpu_dsc_blk { - DPU_HW_SUBBLK_INFO; + char name[DPU_HW_BLK_NAME_LEN]; + u32 base; + u32 len; }; /** @@ -341,7 +338,6 @@ struct dpu_rotation_cfg { * @max_mixer_width max layer mixer line width support. * @max_mixer_blendstages max layer mixer blend stages or * supported z order - * @qseed_type qseed2 or qseed3 support. * @has_src_split source split feature status * @has_dim_layer dim layer feature status * @has_idle_pc indicate if idle power collapse feature is supported @@ -354,7 +350,6 @@ struct dpu_rotation_cfg { struct dpu_caps { u32 max_mixer_width; u32 max_mixer_blendstages; - u32 qseed_type; bool has_src_split; bool has_dim_layer; bool has_idle_pc; @@ -371,7 +366,6 @@ struct dpu_caps { * common: Pointer to common configurations shared by sub blocks * @maxdwnscale: max downscale ratio supported(without DECIMATION) * @maxupscale: maxupscale ratio supported - * @smart_dma_priority: hw priority of rect1 of multirect pipe * @max_per_pipe_bw: maximum allowable bandwidth of this pipe in kBps * @qseed_ver: qseed version * @scaler_blk: @@ -385,7 +379,6 @@ struct dpu_caps { struct dpu_sspp_sub_blks { u32 maxdwnscale; u32 maxupscale; - u32 smart_dma_priority; u32 max_per_pipe_bw; u32 qseed_ver; struct dpu_scaler_blk scaler_blk; @@ -690,6 +683,17 @@ struct dpu_vbif_cfg { }; /** + * struct dpu_cdm_cfg - information of chroma down blocks + * @name string name for debug purposes + * @id enum identifying this block + * @base register offset of this block + * @features bit mask identifying sub-blocks/features + */ +struct dpu_cdm_cfg { + DPU_HW_BLK_INFO; +}; + +/** * Define CDP use cases * @DPU_PERF_CDP_UDAGE_RT: real-time use cases * @DPU_PERF_CDP_USAGE_NRT: non real-time use cases such as WFD @@ -812,6 +816,8 @@ struct dpu_mdss_cfg { u32 wb_count; const struct dpu_wb_cfg *wb; + const struct dpu_cdm_cfg *cdm; + u32 ad_count; u32 dspp_count; @@ -827,6 +833,7 @@ struct dpu_mdss_cfg { extern const struct dpu_mdss_cfg dpu_msm8998_cfg; extern const struct dpu_mdss_cfg dpu_sdm845_cfg; +extern const struct dpu_mdss_cfg dpu_sdm670_cfg; extern const struct dpu_mdss_cfg dpu_sm8150_cfg; extern const struct dpu_mdss_cfg dpu_sc8180x_cfg; extern const struct dpu_mdss_cfg dpu_sm8250_cfg; @@ -841,5 +848,6 @@ extern const struct dpu_mdss_cfg dpu_sc7280_cfg; extern const struct dpu_mdss_cfg dpu_sc8280xp_cfg; extern const struct dpu_mdss_cfg dpu_sm8450_cfg; extern const struct dpu_mdss_cfg dpu_sm8550_cfg; +extern const struct dpu_mdss_cfg dpu_sm8650_cfg; #endif /* _DPU_HW_CATALOG_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.c new file mode 100644 index 000000000000..e9cdc7934a49 --- /dev/null +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023, The Linux Foundation. All rights reserved. + */ + +#include <linux/bitfield.h> + +#include <drm/drm_managed.h> + +#include "dpu_hw_mdss.h" +#include "dpu_hw_util.h" +#include "dpu_hw_catalog.h" +#include "dpu_hw_cdm.h" +#include "dpu_kms.h" + +#define CDM_CSC_10_OPMODE 0x000 +#define CDM_CSC_10_BASE 0x004 + +#define CDM_CDWN2_OP_MODE 0x100 +#define CDM_CDWN2_CLAMP_OUT 0x104 +#define CDM_CDWN2_PARAMS_3D_0 0x108 +#define CDM_CDWN2_PARAMS_3D_1 0x10C +#define CDM_CDWN2_COEFF_COSITE_H_0 0x110 +#define CDM_CDWN2_COEFF_COSITE_H_1 0x114 +#define CDM_CDWN2_COEFF_COSITE_H_2 0x118 +#define CDM_CDWN2_COEFF_OFFSITE_H_0 0x11C +#define CDM_CDWN2_COEFF_OFFSITE_H_1 0x120 +#define CDM_CDWN2_COEFF_OFFSITE_H_2 0x124 +#define CDM_CDWN2_COEFF_COSITE_V 0x128 +#define CDM_CDWN2_COEFF_OFFSITE_V 0x12C +#define CDM_CDWN2_OUT_SIZE 0x130 + +#define CDM_HDMI_PACK_OP_MODE 0x200 +#define CDM_CSC_10_MATRIX_COEFF_0 0x004 + +#define CDM_MUX 0x224 + +/* CDM CDWN2 sub-block bit definitions */ +#define CDM_CDWN2_OP_MODE_EN BIT(0) +#define CDM_CDWN2_OP_MODE_ENABLE_H BIT(1) +#define CDM_CDWN2_OP_MODE_ENABLE_V BIT(2) +#define CDM_CDWN2_OP_MODE_BITS_OUT_8BIT BIT(7) +#define CDM_CDWN2_V_PIXEL_METHOD_MASK GENMASK(6, 5) +#define CDM_CDWN2_H_PIXEL_METHOD_MASK GENMASK(4, 3) + +/* CDM CSC10 sub-block bit definitions */ +#define CDM_CSC10_OP_MODE_EN BIT(0) +#define CDM_CSC10_OP_MODE_SRC_FMT_YUV BIT(1) +#define CDM_CSC10_OP_MODE_DST_FMT_YUV BIT(2) + +/* CDM HDMI pack sub-block bit definitions */ +#define CDM_HDMI_PACK_OP_MODE_EN BIT(0) + +/* + * Horizontal coefficients for cosite chroma downscale + * s13 representation of coefficients + */ +static u32 cosite_h_coeff[] = {0x00000016, 0x000001cc, 0x0100009e}; + +/* + * Horizontal coefficients for offsite chroma downscale + */ +static u32 offsite_h_coeff[] = {0x000b0005, 0x01db01eb, 0x00e40046}; + +/* + * Vertical coefficients for cosite chroma downscale + */ +static u32 cosite_v_coeff[] = {0x00080004}; +/* + * Vertical coefficients for offsite chroma downscale + */ +static u32 offsite_v_coeff[] = {0x00060002}; + +static int dpu_hw_cdm_setup_cdwn(struct dpu_hw_cdm *ctx, struct dpu_hw_cdm_cfg *cfg) +{ + struct dpu_hw_blk_reg_map *c = &ctx->hw; + u32 opmode; + u32 out_size; + + switch (cfg->h_cdwn_type) { + case CDM_CDWN_DISABLE: + opmode = 0; + break; + case CDM_CDWN_PIXEL_DROP: + opmode = CDM_CDWN2_OP_MODE_ENABLE_H | + FIELD_PREP(CDM_CDWN2_H_PIXEL_METHOD_MASK, + CDM_CDWN2_METHOD_PIXEL_DROP); + break; + case CDM_CDWN_AVG: + opmode = CDM_CDWN2_OP_MODE_ENABLE_H | + FIELD_PREP(CDM_CDWN2_H_PIXEL_METHOD_MASK, + CDM_CDWN2_METHOD_AVG); + break; + case CDM_CDWN_COSITE: + opmode = CDM_CDWN2_OP_MODE_ENABLE_H | + FIELD_PREP(CDM_CDWN2_H_PIXEL_METHOD_MASK, + CDM_CDWN2_METHOD_COSITE); + DPU_REG_WRITE(c, CDM_CDWN2_COEFF_COSITE_H_0, + cosite_h_coeff[0]); + DPU_REG_WRITE(c, CDM_CDWN2_COEFF_COSITE_H_1, + cosite_h_coeff[1]); + DPU_REG_WRITE(c, CDM_CDWN2_COEFF_COSITE_H_2, + cosite_h_coeff[2]); + break; + case CDM_CDWN_OFFSITE: + opmode = CDM_CDWN2_OP_MODE_ENABLE_H | + FIELD_PREP(CDM_CDWN2_H_PIXEL_METHOD_MASK, CDM_CDWN2_METHOD_OFFSITE); + DPU_REG_WRITE(c, CDM_CDWN2_COEFF_OFFSITE_H_0, + offsite_h_coeff[0]); + DPU_REG_WRITE(c, CDM_CDWN2_COEFF_OFFSITE_H_1, + offsite_h_coeff[1]); + DPU_REG_WRITE(c, CDM_CDWN2_COEFF_OFFSITE_H_2, + offsite_h_coeff[2]); + break; + default: + DPU_ERROR("%s invalid horz down sampling type\n", __func__); + return -EINVAL; + } + + switch (cfg->v_cdwn_type) { + case CDM_CDWN_DISABLE: + /* if its only Horizontal downsample, we dont need to do anything here */ + break; + case CDM_CDWN_PIXEL_DROP: + opmode |= CDM_CDWN2_OP_MODE_ENABLE_V | + FIELD_PREP(CDM_CDWN2_V_PIXEL_METHOD_MASK, + CDM_CDWN2_METHOD_PIXEL_DROP); + break; + case CDM_CDWN_AVG: + opmode |= CDM_CDWN2_OP_MODE_ENABLE_V | + FIELD_PREP(CDM_CDWN2_V_PIXEL_METHOD_MASK, + CDM_CDWN2_METHOD_AVG); + break; + case CDM_CDWN_COSITE: + opmode |= CDM_CDWN2_OP_MODE_ENABLE_V | + FIELD_PREP(CDM_CDWN2_V_PIXEL_METHOD_MASK, + CDM_CDWN2_METHOD_COSITE); + DPU_REG_WRITE(c, + CDM_CDWN2_COEFF_COSITE_V, + cosite_v_coeff[0]); + break; + case CDM_CDWN_OFFSITE: + opmode |= CDM_CDWN2_OP_MODE_ENABLE_V | + FIELD_PREP(CDM_CDWN2_V_PIXEL_METHOD_MASK, + CDM_CDWN2_METHOD_OFFSITE); + DPU_REG_WRITE(c, + CDM_CDWN2_COEFF_OFFSITE_V, + offsite_v_coeff[0]); + break; + default: + return -EINVAL; + } + + if (cfg->output_bit_depth != CDM_CDWN_OUTPUT_10BIT) + opmode |= CDM_CDWN2_OP_MODE_BITS_OUT_8BIT; + + if (cfg->v_cdwn_type || cfg->h_cdwn_type) + opmode |= CDM_CDWN2_OP_MODE_EN; /* EN CDWN module */ + else + opmode &= ~CDM_CDWN2_OP_MODE_EN; + + out_size = (cfg->output_width & 0xFFFF) | ((cfg->output_height & 0xFFFF) << 16); + DPU_REG_WRITE(c, CDM_CDWN2_OUT_SIZE, out_size); + DPU_REG_WRITE(c, CDM_CDWN2_OP_MODE, opmode); + DPU_REG_WRITE(c, CDM_CDWN2_CLAMP_OUT, ((0x3FF << 16) | 0x0)); + + return 0; +} + +static int dpu_hw_cdm_enable(struct dpu_hw_cdm *ctx, struct dpu_hw_cdm_cfg *cdm) +{ + struct dpu_hw_blk_reg_map *c = &ctx->hw; + const struct dpu_format *fmt; + u32 opmode = 0; + u32 csc = 0; + + if (!ctx || !cdm) + return -EINVAL; + + fmt = cdm->output_fmt; + + if (!DPU_FORMAT_IS_YUV(fmt)) + return -EINVAL; + + dpu_hw_csc_setup(&ctx->hw, CDM_CSC_10_MATRIX_COEFF_0, cdm->csc_cfg, true); + dpu_hw_cdm_setup_cdwn(ctx, cdm); + + if (cdm->output_type == CDM_CDWN_OUTPUT_HDMI) { + if (fmt->chroma_sample != DPU_CHROMA_H1V2) + return -EINVAL; /*unsupported format */ + opmode = CDM_HDMI_PACK_OP_MODE_EN; + opmode |= (fmt->chroma_sample << 1); + } + + csc |= CDM_CSC10_OP_MODE_DST_FMT_YUV; + csc &= ~CDM_CSC10_OP_MODE_SRC_FMT_YUV; + csc |= CDM_CSC10_OP_MODE_EN; + + if (ctx && ctx->ops.bind_pingpong_blk) + ctx->ops.bind_pingpong_blk(ctx, cdm->pp_id); + + DPU_REG_WRITE(c, CDM_CSC_10_OPMODE, csc); + DPU_REG_WRITE(c, CDM_HDMI_PACK_OP_MODE, opmode); + return 0; +} + +static void dpu_hw_cdm_bind_pingpong_blk(struct dpu_hw_cdm *ctx, const enum dpu_pingpong pp) +{ + struct dpu_hw_blk_reg_map *c; + int mux_cfg; + + c = &ctx->hw; + + mux_cfg = DPU_REG_READ(c, CDM_MUX); + mux_cfg &= ~0xf; + + if (pp) + mux_cfg |= (pp - PINGPONG_0) & 0x7; + else + mux_cfg |= 0xf; + + DPU_REG_WRITE(c, CDM_MUX, mux_cfg); +} + +struct dpu_hw_cdm *dpu_hw_cdm_init(struct drm_device *dev, + const struct dpu_cdm_cfg *cfg, void __iomem *addr, + const struct dpu_mdss_version *mdss_rev) +{ + struct dpu_hw_cdm *c; + + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); + if (!c) + return ERR_PTR(-ENOMEM); + + c->hw.blk_addr = addr + cfg->base; + c->hw.log_mask = DPU_DBG_MASK_CDM; + + /* Assign ops */ + c->idx = cfg->id; + c->caps = cfg; + + c->ops.enable = dpu_hw_cdm_enable; + if (mdss_rev->core_major_ver >= 5) + c->ops.bind_pingpong_blk = dpu_hw_cdm_bind_pingpong_blk; + + return c; +} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.h new file mode 100644 index 000000000000..348424df87c6 --- /dev/null +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.h @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023, The Linux Foundation. All rights reserved. + */ + +#ifndef _DPU_HW_CDM_H +#define _DPU_HW_CDM_H + +#include "dpu_hw_mdss.h" +#include "dpu_hw_top.h" + +struct dpu_hw_cdm; + +/** + * struct dpu_hw_cdm_cfg : current configuration of CDM block + * + * @output_width: output ROI width of CDM block + * @output_height: output ROI height of CDM block + * @output_bit_depth: output bit-depth of CDM block + * @h_cdwn_type: downsample type used for horizontal pixels + * @v_cdwn_type: downsample type used for vertical pixels + * @output_fmt: handle to dpu_format of CDM block + * @csc_cfg: handle to CSC matrix programmed for CDM block + * @output_type: interface to which CDM is paired (HDMI/WB) + * @pp_id: ping-pong block to which CDM is bound to + */ +struct dpu_hw_cdm_cfg { + u32 output_width; + u32 output_height; + u32 output_bit_depth; + u32 h_cdwn_type; + u32 v_cdwn_type; + const struct dpu_format *output_fmt; + const struct dpu_csc_cfg *csc_cfg; + u32 output_type; + int pp_id; +}; + +/* + * These values are used indicate which type of downsample is used + * in the horizontal/vertical direction for the CDM block. + */ +enum dpu_hw_cdwn_type { + CDM_CDWN_DISABLE, + CDM_CDWN_PIXEL_DROP, + CDM_CDWN_AVG, + CDM_CDWN_COSITE, + CDM_CDWN_OFFSITE, +}; + +/* + * CDM block can be paired with WB or HDMI block. These values match + * the input with which the CDM block is paired. + */ +enum dpu_hw_cdwn_output_type { + CDM_CDWN_OUTPUT_HDMI, + CDM_CDWN_OUTPUT_WB, +}; + +/* + * CDM block can give an 8-bit or 10-bit output. These values + * are used to indicate the output bit depth of CDM block + */ +enum dpu_hw_cdwn_output_bit_depth { + CDM_CDWN_OUTPUT_8BIT, + CDM_CDWN_OUTPUT_10BIT, +}; + +/* + * CDM block can downsample using different methods. These values + * are used to indicate the downsample method which can be used + * either in the horizontal or vertical direction. + */ +enum dpu_hw_cdwn_op_mode_method_h_v { + CDM_CDWN2_METHOD_PIXEL_DROP, + CDM_CDWN2_METHOD_AVG, + CDM_CDWN2_METHOD_COSITE, + CDM_CDWN2_METHOD_OFFSITE +}; + +/** + * struct dpu_hw_cdm_ops : Interface to the chroma down Hw driver functions + * Assumption is these functions will be called after + * clocks are enabled + * @enable: Enables the output to interface and programs the + * output packer + * @bind_pingpong_blk: enable/disable the connection with pingpong which + * will feed pixels to this cdm + */ +struct dpu_hw_cdm_ops { + /** + * Enable the CDM module + * @cdm Pointer to chroma down context + */ + int (*enable)(struct dpu_hw_cdm *cdm, struct dpu_hw_cdm_cfg *cfg); + + /** + * Enable/disable the connection with pingpong + * @cdm Pointer to chroma down context + * @pp pingpong block id. + */ + void (*bind_pingpong_blk)(struct dpu_hw_cdm *cdm, const enum dpu_pingpong pp); +}; + +/** + * struct dpu_hw_cdm - cdm description + * @base: Hardware block base structure + * @hw: Block hardware details + * @idx: CDM index + * @caps: Pointer to cdm_cfg + * @ops: handle to operations possible for this CDM + */ +struct dpu_hw_cdm { + struct dpu_hw_blk base; + struct dpu_hw_blk_reg_map hw; + + /* chroma down */ + const struct dpu_cdm_cfg *caps; + enum dpu_cdm idx; + + /* ops */ + struct dpu_hw_cdm_ops ops; +}; + +/** + * dpu_hw_cdm_init - initializes the cdm hw driver object. + * should be called once before accessing every cdm. + * @dev: DRM device handle + * @cdm: CDM catalog entry for which driver object is required + * @addr : mapped register io address of MDSS + * @mdss_rev: mdss hw core revision + */ +struct dpu_hw_cdm *dpu_hw_cdm_init(struct drm_device *dev, + const struct dpu_cdm_cfg *cdm, void __iomem *addr, + const struct dpu_mdss_version *mdss_rev); + +static inline struct dpu_hw_cdm *to_dpu_hw_cdm(struct dpu_hw_blk *hw) +{ + return container_of(hw, struct dpu_hw_cdm, base); +} + +#endif /*_DPU_HW_CDM_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c index 86182c734606..e76565c3e6a4 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c @@ -4,6 +4,9 @@ */ #include <linux/delay.h> + +#include <drm/drm_managed.h> + #include "dpu_hwio.h" #include "dpu_hw_ctl.h" #include "dpu_kms.h" @@ -29,11 +32,13 @@ #define CTL_DSC_ACTIVE 0x0E8 #define CTL_WB_ACTIVE 0x0EC #define CTL_INTF_ACTIVE 0x0F4 +#define CTL_CDM_ACTIVE 0x0F8 #define CTL_FETCH_PIPE_ACTIVE 0x0FC #define CTL_MERGE_3D_FLUSH 0x100 #define CTL_DSC_FLUSH 0x104 #define CTL_WB_FLUSH 0x108 #define CTL_INTF_FLUSH 0x110 +#define CTL_CDM_FLUSH 0x114 #define CTL_INTF_MASTER 0x134 #define CTL_DSPP_n_FLUSH(n) ((0x13C) + ((n) * 4)) @@ -43,6 +48,7 @@ #define DPU_REG_RESET_TIMEOUT_US 2000 #define MERGE_3D_IDX 23 #define DSC_IDX 22 +#define CDM_IDX 26 #define INTF_IDX 31 #define WB_IDX 16 #define DSPP_IDX 29 /* From DPU hw rev 7.x.x */ @@ -104,6 +110,7 @@ static inline void dpu_hw_ctl_clear_pending_flush(struct dpu_hw_ctl *ctx) ctx->pending_wb_flush_mask = 0; ctx->pending_merge_3d_flush_mask = 0; ctx->pending_dsc_flush_mask = 0; + ctx->pending_cdm_flush_mask = 0; memset(ctx->pending_dspp_flush_mask, 0, sizeof(ctx->pending_dspp_flush_mask)); @@ -148,6 +155,10 @@ static inline void dpu_hw_ctl_trigger_flush_v1(struct dpu_hw_ctl *ctx) DPU_REG_WRITE(&ctx->hw, CTL_DSC_FLUSH, ctx->pending_dsc_flush_mask); + if (ctx->pending_flush_mask & BIT(CDM_IDX)) + DPU_REG_WRITE(&ctx->hw, CTL_CDM_FLUSH, + ctx->pending_cdm_flush_mask); + DPU_REG_WRITE(&ctx->hw, CTL_FLUSH, ctx->pending_flush_mask); } @@ -279,6 +290,13 @@ static void dpu_hw_ctl_update_pending_flush_wb(struct dpu_hw_ctl *ctx, } } +static void dpu_hw_ctl_update_pending_flush_cdm(struct dpu_hw_ctl *ctx, enum dpu_cdm cdm_num) +{ + /* update pending flush only if CDM_0 is flushed */ + if (cdm_num == CDM_0) + ctx->pending_flush_mask |= BIT(CDM_IDX); +} + static void dpu_hw_ctl_update_pending_flush_wb_v1(struct dpu_hw_ctl *ctx, enum dpu_wb wb) { @@ -307,6 +325,12 @@ static void dpu_hw_ctl_update_pending_flush_dsc_v1(struct dpu_hw_ctl *ctx, ctx->pending_flush_mask |= BIT(DSC_IDX); } +static void dpu_hw_ctl_update_pending_flush_cdm_v1(struct dpu_hw_ctl *ctx, enum dpu_cdm cdm_num) +{ + ctx->pending_cdm_flush_mask |= BIT(cdm_num - CDM_0); + ctx->pending_flush_mask |= BIT(CDM_IDX); +} + static void dpu_hw_ctl_update_pending_flush_dspp(struct dpu_hw_ctl *ctx, enum dpu_dspp dspp, u32 dspp_sub_blk) { @@ -540,6 +564,9 @@ static void dpu_hw_ctl_intf_cfg_v1(struct dpu_hw_ctl *ctx, if (cfg->dsc) DPU_REG_WRITE(c, CTL_DSC_ACTIVE, cfg->dsc); + + if (cfg->cdm) + DPU_REG_WRITE(c, CTL_CDM_ACTIVE, cfg->cdm); } static void dpu_hw_ctl_intf_cfg(struct dpu_hw_ctl *ctx, @@ -583,6 +610,7 @@ static void dpu_hw_ctl_reset_intf_cfg_v1(struct dpu_hw_ctl *ctx, u32 wb_active = 0; u32 merge3d_active = 0; u32 dsc_active; + u32 cdm_active; /* * This API resets each portion of the CTL path namely, @@ -618,6 +646,12 @@ static void dpu_hw_ctl_reset_intf_cfg_v1(struct dpu_hw_ctl *ctx, dsc_active &= ~cfg->dsc; DPU_REG_WRITE(c, CTL_DSC_ACTIVE, dsc_active); } + + if (cfg->cdm) { + cdm_active = DPU_REG_READ(c, CTL_CDM_ACTIVE); + cdm_active &= ~cfg->cdm; + DPU_REG_WRITE(c, CTL_CDM_ACTIVE, cdm_active); + } } static void dpu_hw_ctl_set_fetch_pipe_active(struct dpu_hw_ctl *ctx, @@ -651,12 +685,14 @@ static void _setup_ctl_ops(struct dpu_hw_ctl_ops *ops, ops->update_pending_flush_wb = dpu_hw_ctl_update_pending_flush_wb_v1; ops->update_pending_flush_dsc = dpu_hw_ctl_update_pending_flush_dsc_v1; + ops->update_pending_flush_cdm = dpu_hw_ctl_update_pending_flush_cdm_v1; } else { ops->trigger_flush = dpu_hw_ctl_trigger_flush; ops->setup_intf_cfg = dpu_hw_ctl_intf_cfg; ops->update_pending_flush_intf = dpu_hw_ctl_update_pending_flush_intf; ops->update_pending_flush_wb = dpu_hw_ctl_update_pending_flush_wb; + ops->update_pending_flush_cdm = dpu_hw_ctl_update_pending_flush_cdm; } ops->clear_pending_flush = dpu_hw_ctl_clear_pending_flush; ops->update_pending_flush = dpu_hw_ctl_update_pending_flush; @@ -680,14 +716,15 @@ static void _setup_ctl_ops(struct dpu_hw_ctl_ops *ops, ops->set_active_pipes = dpu_hw_ctl_set_fetch_pipe_active; }; -struct dpu_hw_ctl *dpu_hw_ctl_init(const struct dpu_ctl_cfg *cfg, - void __iomem *addr, - u32 mixer_count, - const struct dpu_lm_cfg *mixer) +struct dpu_hw_ctl *dpu_hw_ctl_init(struct drm_device *dev, + const struct dpu_ctl_cfg *cfg, + void __iomem *addr, + u32 mixer_count, + const struct dpu_lm_cfg *mixer) { struct dpu_hw_ctl *c; - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); @@ -702,8 +739,3 @@ struct dpu_hw_ctl *dpu_hw_ctl_init(const struct dpu_ctl_cfg *cfg, return c; } - -void dpu_hw_ctl_destroy(struct dpu_hw_ctl *ctx) -{ - kfree(ctx); -} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h index 1c242298ff2e..ff85b5ee0acf 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h @@ -39,6 +39,7 @@ struct dpu_hw_stage_cfg { * @mode_3d: 3d mux configuration * @merge_3d: 3d merge block used * @intf_mode_sel: Interface mode, cmd / vid + * @cdm: CDM block used * @stream_sel: Stream selection for multi-stream interfaces * @dsc: DSC BIT masks used */ @@ -48,6 +49,7 @@ struct dpu_hw_intf_cfg { enum dpu_3d_blend_mode mode_3d; enum dpu_merge_3d merge_3d; enum dpu_ctl_mode_sel intf_mode_sel; + enum dpu_cdm cdm; int stream_sel; unsigned int dsc; }; @@ -167,6 +169,14 @@ struct dpu_hw_ctl_ops { enum dpu_dsc blk); /** + * OR in the given flushbits to the cached pending_(cdm_)flush_mask + * No effect on hardware + * @ctx: ctl path ctx pointer + * @cdm_num: idx of cdm to be flushed + */ + void (*update_pending_flush_cdm)(struct dpu_hw_ctl *ctx, enum dpu_cdm cdm_num); + + /** * Write the value of the pending_flush_mask to hardware * @ctx : ctl path ctx pointer */ @@ -239,6 +249,7 @@ struct dpu_hw_ctl_ops { * @pending_intf_flush_mask: pending INTF flush * @pending_wb_flush_mask: pending WB flush * @pending_dsc_flush_mask: pending DSC flush + * @pending_cdm_flush_mask: pending CDM flush * @ops: operation list */ struct dpu_hw_ctl { @@ -256,6 +267,7 @@ struct dpu_hw_ctl { u32 pending_merge_3d_flush_mask; u32 pending_dspp_flush_mask[DSPP_MAX - DSPP_0]; u32 pending_dsc_flush_mask; + u32 pending_cdm_flush_mask; /* ops */ struct dpu_hw_ctl_ops ops; @@ -274,20 +286,16 @@ static inline struct dpu_hw_ctl *to_dpu_hw_ctl(struct dpu_hw_blk *hw) /** * dpu_hw_ctl_init() - Initializes the ctl_path hw driver object. * Should be called before accessing any ctl_path register. + * @dev: Corresponding device for devres management * @cfg: ctl_path catalog entry for which driver object is required * @addr: mapped register io address of MDP * @mixer_count: Number of mixers in @mixer * @mixer: Pointer to an array of Layer Mixers defined in the catalog */ -struct dpu_hw_ctl *dpu_hw_ctl_init(const struct dpu_ctl_cfg *cfg, - void __iomem *addr, - u32 mixer_count, - const struct dpu_lm_cfg *mixer); - -/** - * dpu_hw_ctl_destroy(): Destroys ctl driver context - * should be called to free the context - */ -void dpu_hw_ctl_destroy(struct dpu_hw_ctl *ctx); +struct dpu_hw_ctl *dpu_hw_ctl_init(struct drm_device *dev, + const struct dpu_ctl_cfg *cfg, + void __iomem *addr, + u32 mixer_count, + const struct dpu_lm_cfg *mixer); #endif /*_DPU_HW_CTL_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c index 509dbaa51d87..5e9aad1b2aa2 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c @@ -3,6 +3,8 @@ * Copyright (c) 2020-2022, Linaro Limited */ +#include <drm/drm_managed.h> + #include <drm/display/drm_dsc_helper.h> #include "dpu_kms.h" @@ -188,12 +190,13 @@ static void _setup_dsc_ops(struct dpu_hw_dsc_ops *ops, ops->dsc_bind_pingpong_blk = dpu_hw_dsc_bind_pingpong_blk; }; -struct dpu_hw_dsc *dpu_hw_dsc_init(const struct dpu_dsc_cfg *cfg, +struct dpu_hw_dsc *dpu_hw_dsc_init(struct drm_device *dev, + const struct dpu_dsc_cfg *cfg, void __iomem *addr) { struct dpu_hw_dsc *c; - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); @@ -206,8 +209,3 @@ struct dpu_hw_dsc *dpu_hw_dsc_init(const struct dpu_dsc_cfg *cfg, return c; } - -void dpu_hw_dsc_destroy(struct dpu_hw_dsc *dsc) -{ - kfree(dsc); -} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.h index d5b597ab8c5c..989c88d2449b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.h @@ -64,20 +64,24 @@ struct dpu_hw_dsc { /** * dpu_hw_dsc_init() - Initializes the DSC hw driver object. + * @dev: Corresponding device for devres management * @cfg: DSC catalog entry for which driver object is required * @addr: Mapped register io address of MDP * Return: Error code or allocated dpu_hw_dsc context */ -struct dpu_hw_dsc *dpu_hw_dsc_init(const struct dpu_dsc_cfg *cfg, - void __iomem *addr); +struct dpu_hw_dsc *dpu_hw_dsc_init(struct drm_device *dev, + const struct dpu_dsc_cfg *cfg, + void __iomem *addr); /** * dpu_hw_dsc_init_1_2() - initializes the v1.2 DSC hw driver object + * @dev: Corresponding device for devres management * @cfg: DSC catalog entry for which driver object is required * @addr: Mapped register io address of MDP * Returns: Error code or allocated dpu_hw_dsc context */ -struct dpu_hw_dsc *dpu_hw_dsc_init_1_2(const struct dpu_dsc_cfg *cfg, +struct dpu_hw_dsc *dpu_hw_dsc_init_1_2(struct drm_device *dev, + const struct dpu_dsc_cfg *cfg, void __iomem *addr); /** diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc_1_2.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc_1_2.c index 24fe1d98eb86..ba193b0376fe 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc_1_2.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc_1_2.c @@ -4,6 +4,8 @@ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved */ +#include <drm/drm_managed.h> + #include <drm/display/drm_dsc_helper.h> #include "dpu_kms.h" @@ -367,12 +369,13 @@ static void _setup_dcs_ops_1_2(struct dpu_hw_dsc_ops *ops, ops->dsc_bind_pingpong_blk = dpu_hw_dsc_bind_pingpong_blk_1_2; } -struct dpu_hw_dsc *dpu_hw_dsc_init_1_2(const struct dpu_dsc_cfg *cfg, +struct dpu_hw_dsc *dpu_hw_dsc_init_1_2(struct drm_device *dev, + const struct dpu_dsc_cfg *cfg, void __iomem *addr) { struct dpu_hw_dsc *c; - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c index 9419b2209af8..b1da88e2935f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c @@ -2,6 +2,8 @@ /* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. */ +#include <drm/drm_managed.h> + #include "dpu_hwio.h" #include "dpu_hw_catalog.h" #include "dpu_hw_lm.h" @@ -68,15 +70,16 @@ static void _setup_dspp_ops(struct dpu_hw_dspp *c, c->ops.setup_pcc = dpu_setup_dspp_pcc; } -struct dpu_hw_dspp *dpu_hw_dspp_init(const struct dpu_dspp_cfg *cfg, - void __iomem *addr) +struct dpu_hw_dspp *dpu_hw_dspp_init(struct drm_device *dev, + const struct dpu_dspp_cfg *cfg, + void __iomem *addr) { struct dpu_hw_dspp *c; if (!addr) return ERR_PTR(-EINVAL); - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); @@ -90,10 +93,3 @@ struct dpu_hw_dspp *dpu_hw_dspp_init(const struct dpu_dspp_cfg *cfg, return c; } - -void dpu_hw_dspp_destroy(struct dpu_hw_dspp *dspp) -{ - kfree(dspp); -} - - diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.h index bea965681330..3b435690b6cc 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.h @@ -81,18 +81,14 @@ static inline struct dpu_hw_dspp *to_dpu_hw_dspp(struct dpu_hw_blk *hw) /** * dpu_hw_dspp_init() - Initializes the DSPP hw driver object. * should be called once before accessing every DSPP. + * @dev: Corresponding device for devres management * @cfg: DSPP catalog entry for which driver object is required * @addr: Mapped register io address of MDP * Return: pointer to structure or ERR_PTR */ -struct dpu_hw_dspp *dpu_hw_dspp_init(const struct dpu_dspp_cfg *cfg, - void __iomem *addr); - -/** - * dpu_hw_dspp_destroy(): Destroys DSPP driver context - * @dspp: Pointer to DSPP driver context - */ -void dpu_hw_dspp_destroy(struct dpu_hw_dspp *dspp); +struct dpu_hw_dspp *dpu_hw_dspp_init(struct drm_device *dev, + const struct dpu_dspp_cfg *cfg, + void __iomem *addr); #endif /*_DPU_HW_DSPP_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c index 088807db2c83..946dd0135dff 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c @@ -6,6 +6,8 @@ #include <linux/debugfs.h> #include <linux/slab.h> +#include <drm/drm_managed.h> + #include "dpu_core_irq.h" #include "dpu_kms.h" #include "dpu_hw_interrupts.h" @@ -472,8 +474,9 @@ u32 dpu_core_irq_read(struct dpu_kms *dpu_kms, return intr_status; } -struct dpu_hw_intr *dpu_hw_intr_init(void __iomem *addr, - const struct dpu_mdss_cfg *m) +struct dpu_hw_intr *dpu_hw_intr_init(struct drm_device *dev, + void __iomem *addr, + const struct dpu_mdss_cfg *m) { struct dpu_hw_intr *intr; unsigned int i; @@ -481,7 +484,7 @@ struct dpu_hw_intr *dpu_hw_intr_init(void __iomem *addr, if (!addr || !m) return ERR_PTR(-EINVAL); - intr = kzalloc(sizeof(*intr), GFP_KERNEL); + intr = drmm_kzalloc(dev, sizeof(*intr), GFP_KERNEL); if (!intr) return ERR_PTR(-ENOMEM); @@ -512,11 +515,6 @@ struct dpu_hw_intr *dpu_hw_intr_init(void __iomem *addr, return intr; } -void dpu_hw_intr_destroy(struct dpu_hw_intr *intr) -{ - kfree(intr); -} - int dpu_core_irq_register_callback(struct dpu_kms *dpu_kms, unsigned int irq_idx, void (*irq_cb)(void *arg), diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h index 53a21ebc57e8..564b750a28fe 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h @@ -70,15 +70,12 @@ struct dpu_hw_intr { /** * dpu_hw_intr_init(): Initializes the interrupts hw object + * @dev: Corresponding device for devres management * @addr: mapped register io address of MDP * @m: pointer to MDSS catalog data */ -struct dpu_hw_intr *dpu_hw_intr_init(void __iomem *addr, - const struct dpu_mdss_cfg *m); +struct dpu_hw_intr *dpu_hw_intr_init(struct drm_device *dev, + void __iomem *addr, + const struct dpu_mdss_cfg *m); -/** - * dpu_hw_intr_destroy(): Cleanup interrutps hw object - * @intr: pointer to interrupts hw object - */ -void dpu_hw_intr_destroy(struct dpu_hw_intr *intr); #endif diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c index e8b8908d3e12..6bba531d6dc4 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. */ @@ -12,6 +12,8 @@ #include <linux/iopoll.h> +#include <drm/drm_managed.h> + #define INTF_TIMING_ENGINE_EN 0x000 #define INTF_CONFIG 0x004 #define INTF_HSYNC_CTL 0x008 @@ -318,9 +320,9 @@ static u32 dpu_hw_intf_get_line_count(struct dpu_hw_intf *intf) return DPU_REG_READ(c, INTF_LINE_COUNT); } -static void dpu_hw_intf_setup_misr(struct dpu_hw_intf *intf, bool enable, u32 frame_count) +static void dpu_hw_intf_setup_misr(struct dpu_hw_intf *intf) { - dpu_hw_setup_misr(&intf->hw, INTF_MISR_CTRL, enable, frame_count); + dpu_hw_setup_misr(&intf->hw, INTF_MISR_CTRL, 0x1); } static int dpu_hw_intf_collect_misr(struct dpu_hw_intf *intf, u32 *misr_value) @@ -527,8 +529,10 @@ static void dpu_hw_intf_program_intf_cmd_cfg(struct dpu_hw_intf *ctx, DPU_REG_WRITE(&ctx->hw, INTF_CONFIG2, intf_cfg2); } -struct dpu_hw_intf *dpu_hw_intf_init(const struct dpu_intf_cfg *cfg, - void __iomem *addr, const struct dpu_mdss_version *mdss_rev) +struct dpu_hw_intf *dpu_hw_intf_init(struct drm_device *dev, + const struct dpu_intf_cfg *cfg, + void __iomem *addr, + const struct dpu_mdss_version *mdss_rev) { struct dpu_hw_intf *c; @@ -537,7 +541,7 @@ struct dpu_hw_intf *dpu_hw_intf_init(const struct dpu_intf_cfg *cfg, return NULL; } - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); @@ -581,9 +585,3 @@ struct dpu_hw_intf *dpu_hw_intf_init(const struct dpu_intf_cfg *cfg, return c; } - -void dpu_hw_intf_destroy(struct dpu_hw_intf *intf) -{ - kfree(intf); -} - diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h index c539025c418b..0bd57a32144a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. */ @@ -95,7 +95,7 @@ struct dpu_hw_intf_ops { void (*bind_pingpong_blk)(struct dpu_hw_intf *intf, const enum dpu_pingpong pp); - void (*setup_misr)(struct dpu_hw_intf *intf, bool enable, u32 frame_count); + void (*setup_misr)(struct dpu_hw_intf *intf); int (*collect_misr)(struct dpu_hw_intf *intf, u32 *misr_value); // Tearcheck on INTF since DPU 5.0.0 @@ -131,17 +131,14 @@ struct dpu_hw_intf { /** * dpu_hw_intf_init() - Initializes the INTF driver for the passed * interface catalog entry. + * @dev: Corresponding device for devres management * @cfg: interface catalog entry for which driver object is required * @addr: mapped register io address of MDP * @mdss_rev: dpu core's major and minor versions */ -struct dpu_hw_intf *dpu_hw_intf_init(const struct dpu_intf_cfg *cfg, - void __iomem *addr, const struct dpu_mdss_version *mdss_rev); - -/** - * dpu_hw_intf_destroy(): Destroys INTF driver context - * @intf: Pointer to INTF driver context - */ -void dpu_hw_intf_destroy(struct dpu_hw_intf *intf); +struct dpu_hw_intf *dpu_hw_intf_init(struct drm_device *dev, + const struct dpu_intf_cfg *cfg, + void __iomem *addr, + const struct dpu_mdss_version *mdss_rev); #endif /*_DPU_HW_INTF_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c index d1c3bd8379ea..1d3ccf3228c6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c @@ -1,9 +1,11 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) 2015-2021, The Linux Foundation. All rights reserved. */ +#include <drm/drm_managed.h> + #include "dpu_kms.h" #include "dpu_hw_catalog.h" #include "dpu_hwio.h" @@ -81,9 +83,9 @@ static void dpu_hw_lm_setup_border_color(struct dpu_hw_mixer *ctx, } } -static void dpu_hw_lm_setup_misr(struct dpu_hw_mixer *ctx, bool enable, u32 frame_count) +static void dpu_hw_lm_setup_misr(struct dpu_hw_mixer *ctx) { - dpu_hw_setup_misr(&ctx->hw, LM_MISR_CTRL, enable, frame_count); + dpu_hw_setup_misr(&ctx->hw, LM_MISR_CTRL, 0x0); } static int dpu_hw_lm_collect_misr(struct dpu_hw_mixer *ctx, u32 *misr_value) @@ -156,8 +158,9 @@ static void _setup_mixer_ops(struct dpu_hw_lm_ops *ops, ops->collect_misr = dpu_hw_lm_collect_misr; } -struct dpu_hw_mixer *dpu_hw_lm_init(const struct dpu_lm_cfg *cfg, - void __iomem *addr) +struct dpu_hw_mixer *dpu_hw_lm_init(struct drm_device *dev, + const struct dpu_lm_cfg *cfg, + void __iomem *addr) { struct dpu_hw_mixer *c; @@ -166,7 +169,7 @@ struct dpu_hw_mixer *dpu_hw_lm_init(const struct dpu_lm_cfg *cfg, return NULL; } - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); @@ -180,8 +183,3 @@ struct dpu_hw_mixer *dpu_hw_lm_init(const struct dpu_lm_cfg *cfg, return c; } - -void dpu_hw_lm_destroy(struct dpu_hw_mixer *lm) -{ - kfree(lm); -} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h index 36992d046a53..0a3381755249 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) 2015-2021, The Linux Foundation. All rights reserved. */ @@ -57,7 +58,7 @@ struct dpu_hw_lm_ops { /** * setup_misr: Enable/disable MISR */ - void (*setup_misr)(struct dpu_hw_mixer *ctx, bool enable, u32 frame_count); + void (*setup_misr)(struct dpu_hw_mixer *ctx); /** * collect_misr: Read MISR signature @@ -95,16 +96,12 @@ static inline struct dpu_hw_mixer *to_dpu_hw_mixer(struct dpu_hw_blk *hw) /** * dpu_hw_lm_init() - Initializes the mixer hw driver object. * should be called once before accessing every mixer. + * @dev: Corresponding device for devres management * @cfg: mixer catalog entry for which driver object is required * @addr: mapped register io address of MDP */ -struct dpu_hw_mixer *dpu_hw_lm_init(const struct dpu_lm_cfg *cfg, - void __iomem *addr); - -/** - * dpu_hw_lm_destroy(): Destroys layer mixer driver context - * @lm: Pointer to LM driver context - */ -void dpu_hw_lm_destroy(struct dpu_hw_mixer *lm); +struct dpu_hw_mixer *dpu_hw_lm_init(struct drm_device *dev, + const struct dpu_lm_cfg *cfg, + void __iomem *addr); #endif /*_DPU_HW_LM_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h index d85157acfbf8..5df545904057 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h @@ -98,6 +98,7 @@ enum dpu_hw_blk_type { DPU_HW_BLK_DSPP, DPU_HW_BLK_MERGE_3D, DPU_HW_BLK_DSC, + DPU_HW_BLK_CDM, DPU_HW_BLK_MAX, }; @@ -185,6 +186,11 @@ enum dpu_dsc { DSC_MAX }; +enum dpu_cdm { + CDM_0 = 1, + CDM_MAX +}; + enum dpu_pingpong { PINGPONG_NONE, PINGPONG_0, @@ -195,6 +201,8 @@ enum dpu_pingpong { PINGPONG_5, PINGPONG_6, PINGPONG_7, + PINGPONG_8, + PINGPONG_9, PINGPONG_S0, PINGPONG_MAX }; @@ -204,6 +212,7 @@ enum dpu_merge_3d { MERGE_3D_1, MERGE_3D_2, MERGE_3D_3, + MERGE_3D_4, MERGE_3D_MAX }; @@ -458,6 +467,7 @@ struct dpu_mdss_color { #define DPU_DBG_MASK_ROT (1 << 9) #define DPU_DBG_MASK_DSPP (1 << 10) #define DPU_DBG_MASK_DSC (1 << 11) +#define DPU_DBG_MASK_CDM (1 << 12) /** * struct dpu_hw_tear_check - Struct contains parameters to configure diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.c index 90e0e05eff8d..ddfa40a959cb 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.c @@ -4,6 +4,8 @@ #include <linux/iopoll.h> +#include <drm/drm_managed.h> + #include "dpu_hw_mdss.h" #include "dpu_hwio.h" #include "dpu_hw_catalog.h" @@ -37,12 +39,13 @@ static void _setup_merge_3d_ops(struct dpu_hw_merge_3d *c, c->ops.setup_3d_mode = dpu_hw_merge_3d_setup_3d_mode; }; -struct dpu_hw_merge_3d *dpu_hw_merge_3d_init(const struct dpu_merge_3d_cfg *cfg, - void __iomem *addr) +struct dpu_hw_merge_3d *dpu_hw_merge_3d_init(struct drm_device *dev, + const struct dpu_merge_3d_cfg *cfg, + void __iomem *addr) { struct dpu_hw_merge_3d *c; - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); @@ -55,8 +58,3 @@ struct dpu_hw_merge_3d *dpu_hw_merge_3d_init(const struct dpu_merge_3d_cfg *cfg, return c; } - -void dpu_hw_merge_3d_destroy(struct dpu_hw_merge_3d *hw) -{ - kfree(hw); -} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.h index 19cec5e88722..c192f02ec1ab 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.h @@ -48,18 +48,13 @@ static inline struct dpu_hw_merge_3d *to_dpu_hw_merge_3d(struct dpu_hw_blk *hw) /** * dpu_hw_merge_3d_init() - Initializes the merge_3d driver for the passed * merge3d catalog entry. + * @dev: Corresponding device for devres management * @cfg: Pingpong catalog entry for which driver object is required * @addr: Mapped register io address of MDP * Return: Error code or allocated dpu_hw_merge_3d context */ -struct dpu_hw_merge_3d *dpu_hw_merge_3d_init(const struct dpu_merge_3d_cfg *cfg, - void __iomem *addr); - -/** - * dpu_hw_merge_3d_destroy - destroys merge_3d driver context - * should be called to free the context - * @pp: Pointer to PP driver context returned by dpu_hw_merge_3d_init - */ -void dpu_hw_merge_3d_destroy(struct dpu_hw_merge_3d *pp); +struct dpu_hw_merge_3d *dpu_hw_merge_3d_init(struct drm_device *dev, + const struct dpu_merge_3d_cfg *cfg, + void __iomem *addr); #endif /*_DPU_HW_MERGE3D_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c index 057cac7f5d93..2db4c6fba37a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c @@ -4,6 +4,8 @@ #include <linux/iopoll.h> +#include <drm/drm_managed.h> + #include "dpu_hw_mdss.h" #include "dpu_hwio.h" #include "dpu_hw_catalog.h" @@ -281,12 +283,14 @@ static int dpu_hw_pp_setup_dsc(struct dpu_hw_pingpong *pp) return 0; } -struct dpu_hw_pingpong *dpu_hw_pingpong_init(const struct dpu_pingpong_cfg *cfg, - void __iomem *addr, const struct dpu_mdss_version *mdss_rev) +struct dpu_hw_pingpong *dpu_hw_pingpong_init(struct drm_device *dev, + const struct dpu_pingpong_cfg *cfg, + void __iomem *addr, + const struct dpu_mdss_version *mdss_rev) { struct dpu_hw_pingpong *c; - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); @@ -317,8 +321,3 @@ struct dpu_hw_pingpong *dpu_hw_pingpong_init(const struct dpu_pingpong_cfg *cfg, return c; } - -void dpu_hw_pingpong_destroy(struct dpu_hw_pingpong *pp) -{ - kfree(pp); -} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h index 0d541ca5b056..a48b69fd79a3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h @@ -121,19 +121,15 @@ static inline struct dpu_hw_pingpong *to_dpu_hw_pingpong(struct dpu_hw_blk *hw) /** * dpu_hw_pingpong_init() - initializes the pingpong driver for the passed * pingpong catalog entry. + * @dev: Corresponding device for devres management * @cfg: Pingpong catalog entry for which driver object is required * @addr: Mapped register io address of MDP * @mdss_rev: dpu core's major and minor versions * Return: Error code or allocated dpu_hw_pingpong context */ -struct dpu_hw_pingpong *dpu_hw_pingpong_init(const struct dpu_pingpong_cfg *cfg, - void __iomem *addr, const struct dpu_mdss_version *mdss_rev); - -/** - * dpu_hw_pingpong_destroy - destroys pingpong driver context - * should be called to free the context - * @pp: Pointer to PP driver context returned by dpu_hw_pingpong_init - */ -void dpu_hw_pingpong_destroy(struct dpu_hw_pingpong *pp); +struct dpu_hw_pingpong *dpu_hw_pingpong_init(struct drm_device *dev, + const struct dpu_pingpong_cfg *cfg, + void __iomem *addr, + const struct dpu_mdss_version *mdss_rev); #endif /*_DPU_HW_PINGPONG_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c index 8e3c65989c49..0bf8a83e8df3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c @@ -11,6 +11,7 @@ #include "msm_mdss.h" #include <drm/drm_file.h> +#include <drm/drm_managed.h> #define DPU_FETCH_CONFIG_RESET_VALUE 0x00000087 @@ -395,15 +396,6 @@ static void _dpu_hw_sspp_setup_scaler3(struct dpu_hw_sspp *ctx, format); } -static u32 _dpu_hw_sspp_get_scaler3_ver(struct dpu_hw_sspp *ctx) -{ - if (!ctx) - return 0; - - return dpu_hw_get_scaler3_ver(&ctx->hw, - ctx->cap->sblk->scaler_blk.base); -} - /* * dpu_hw_sspp_setup_rects() */ @@ -614,12 +606,8 @@ static void _setup_layer_ops(struct dpu_hw_sspp *c, test_bit(DPU_SSPP_SMART_DMA_V2, &c->cap->features)) c->ops.setup_multirect = dpu_hw_sspp_setup_multirect; - if (test_bit(DPU_SSPP_SCALER_QSEED3, &features) || - test_bit(DPU_SSPP_SCALER_QSEED3LITE, &features) || - test_bit(DPU_SSPP_SCALER_QSEED4, &features)) { + if (test_bit(DPU_SSPP_SCALER_QSEED3_COMPATIBLE, &features)) c->ops.setup_scaler = _dpu_hw_sspp_setup_scaler3; - c->ops.get_scaler_ver = _dpu_hw_sspp_get_scaler3_ver; - } if (test_bit(DPU_SSPP_CDP, &features)) c->ops.setup_cdp = dpu_hw_sspp_setup_cdp; @@ -654,10 +642,7 @@ int _dpu_hw_sspp_init_debugfs(struct dpu_hw_sspp *hw_pipe, struct dpu_kms *kms, cfg->len, kms); - if (cfg->features & BIT(DPU_SSPP_SCALER_QSEED3) || - cfg->features & BIT(DPU_SSPP_SCALER_QSEED3LITE) || - cfg->features & BIT(DPU_SSPP_SCALER_QSEED2) || - cfg->features & BIT(DPU_SSPP_SCALER_QSEED4)) + if (sblk->scaler_blk.len) dpu_debugfs_create_regset32("scaler_blk", 0400, debugfs_root, sblk->scaler_blk.base + cfg->base, @@ -685,16 +670,18 @@ int _dpu_hw_sspp_init_debugfs(struct dpu_hw_sspp *hw_pipe, struct dpu_kms *kms, } #endif -struct dpu_hw_sspp *dpu_hw_sspp_init(const struct dpu_sspp_cfg *cfg, - void __iomem *addr, const struct msm_mdss_data *mdss_data, - const struct dpu_mdss_version *mdss_rev) +struct dpu_hw_sspp *dpu_hw_sspp_init(struct drm_device *dev, + const struct dpu_sspp_cfg *cfg, + void __iomem *addr, + const struct msm_mdss_data *mdss_data, + const struct dpu_mdss_version *mdss_rev) { struct dpu_hw_sspp *hw_pipe; if (!addr) return ERR_PTR(-EINVAL); - hw_pipe = kzalloc(sizeof(*hw_pipe), GFP_KERNEL); + hw_pipe = drmm_kzalloc(dev, sizeof(*hw_pipe), GFP_KERNEL); if (!hw_pipe) return ERR_PTR(-ENOMEM); @@ -709,9 +696,3 @@ struct dpu_hw_sspp *dpu_hw_sspp_init(const struct dpu_sspp_cfg *cfg, return hw_pipe; } - -void dpu_hw_sspp_destroy(struct dpu_hw_sspp *ctx) -{ - kfree(ctx); -} - diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h index f93969fddb22..b7dc52312c39 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h @@ -22,21 +22,6 @@ struct dpu_hw_sspp; #define DPU_SSPP_SOLID_FILL BIT(4) /** - * Define all scaler feature bits in catalog - */ -#define DPU_SSPP_SCALER (BIT(DPU_SSPP_SCALER_RGB) | \ - BIT(DPU_SSPP_SCALER_QSEED2) | \ - BIT(DPU_SSPP_SCALER_QSEED3) | \ - BIT(DPU_SSPP_SCALER_QSEED3LITE) | \ - BIT(DPU_SSPP_SCALER_QSEED4)) - -/* - * Define all CSC feature bits in catalog - */ -#define DPU_SSPP_CSC_ANY (BIT(DPU_SSPP_CSC) | \ - BIT(DPU_SSPP_CSC_10BIT)) - -/** * Component indices */ enum { @@ -297,12 +282,6 @@ struct dpu_hw_sspp_ops { const struct dpu_format *format); /** - * get_scaler_ver - get scaler h/w version - * @ctx: Pointer to pipe context - */ - u32 (*get_scaler_ver)(struct dpu_hw_sspp *ctx); - - /** * setup_cdp - setup client driven prefetch * @pipe: Pointer to software pipe context * @fmt: format used by the sw pipe @@ -339,21 +318,17 @@ struct dpu_kms; /** * dpu_hw_sspp_init() - Initializes the sspp hw driver object. * Should be called once before accessing every pipe. + * @dev: Corresponding device for devres management * @cfg: Pipe catalog entry for which driver object is required * @addr: Mapped register io address of MDP * @mdss_data: UBWC / MDSS configuration data * @mdss_rev: dpu core's major and minor versions */ -struct dpu_hw_sspp *dpu_hw_sspp_init(const struct dpu_sspp_cfg *cfg, - void __iomem *addr, const struct msm_mdss_data *mdss_data, - const struct dpu_mdss_version *mdss_rev); - -/** - * dpu_hw_sspp_destroy(): Destroys SSPP driver context - * should be called during Hw pipe cleanup. - * @ctx: Pointer to SSPP driver context returned by dpu_hw_sspp_init - */ -void dpu_hw_sspp_destroy(struct dpu_hw_sspp *ctx); +struct dpu_hw_sspp *dpu_hw_sspp_init(struct drm_device *dev, + const struct dpu_sspp_cfg *cfg, + void __iomem *addr, + const struct msm_mdss_data *mdss_data, + const struct dpu_mdss_version *mdss_rev); int _dpu_hw_sspp_init_debugfs(struct dpu_hw_sspp *hw_pipe, struct dpu_kms *kms, struct dentry *entry); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c index 24e734768a72..05e48cf4ec1d 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c @@ -2,6 +2,8 @@ /* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. */ +#include <drm/drm_managed.h> + #include "dpu_hwio.h" #include "dpu_hw_catalog.h" #include "dpu_hw_top.h" @@ -247,16 +249,17 @@ static void _setup_mdp_ops(struct dpu_hw_mdp_ops *ops, ops->intf_audio_select = dpu_hw_intf_audio_select; } -struct dpu_hw_mdp *dpu_hw_mdptop_init(const struct dpu_mdp_cfg *cfg, - void __iomem *addr, - const struct dpu_mdss_cfg *m) +struct dpu_hw_mdp *dpu_hw_mdptop_init(struct drm_device *dev, + const struct dpu_mdp_cfg *cfg, + void __iomem *addr, + const struct dpu_mdss_cfg *m) { struct dpu_hw_mdp *mdp; if (!addr) return ERR_PTR(-EINVAL); - mdp = kzalloc(sizeof(*mdp), GFP_KERNEL); + mdp = drmm_kzalloc(dev, sizeof(*mdp), GFP_KERNEL); if (!mdp) return ERR_PTR(-ENOMEM); @@ -271,9 +274,3 @@ struct dpu_hw_mdp *dpu_hw_mdptop_init(const struct dpu_mdp_cfg *cfg, return mdp; } - -void dpu_hw_mdp_destroy(struct dpu_hw_mdp *mdp) -{ - kfree(mdp); -} - diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.h index 8b1463d2b2f0..6f3dc98087df 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.h @@ -145,13 +145,15 @@ struct dpu_hw_mdp { /** * dpu_hw_mdptop_init - initializes the top driver for the passed config + * @dev: Corresponding device for devres management * @cfg: MDP TOP configuration from catalog * @addr: Mapped register io address of MDP * @m: Pointer to mdss catalog data */ -struct dpu_hw_mdp *dpu_hw_mdptop_init(const struct dpu_mdp_cfg *cfg, - void __iomem *addr, - const struct dpu_mdss_cfg *m); +struct dpu_hw_mdp *dpu_hw_mdptop_init(struct drm_device *dev, + const struct dpu_mdp_cfg *cfg, + void __iomem *addr, + const struct dpu_mdss_cfg *m); void dpu_hw_mdp_destroy(struct dpu_hw_mdp *mdp); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c index 18b16b2d2bf5..dd475827314e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. */ #define pr_fmt(fmt) "[drm:%s:%d] " fmt, __func__, __LINE__ @@ -381,12 +381,6 @@ end: DPU_REG_WRITE(c, QSEED3_OP_MODE + scaler_offset, op_mode); } -u32 dpu_hw_get_scaler3_ver(struct dpu_hw_blk_reg_map *c, - u32 scaler_offset) -{ - return DPU_REG_READ(c, QSEED3_HW_VERSION + scaler_offset); -} - void dpu_hw_csc_setup(struct dpu_hw_blk_reg_map *c, u32 csc_reg_off, const struct dpu_csc_cfg *data, bool csc10) @@ -481,9 +475,11 @@ void _dpu_hw_setup_qos_lut(struct dpu_hw_blk_reg_map *c, u32 offset, cfg->danger_safe_en ? QOS_QOS_CTRL_DANGER_SAFE_EN : 0); } +/* + * note: Aside from encoders, input_sel should be set to 0x0 by default + */ void dpu_hw_setup_misr(struct dpu_hw_blk_reg_map *c, - u32 misr_ctrl_offset, - bool enable, u32 frame_count) + u32 misr_ctrl_offset, u8 input_sel) { u32 config = 0; @@ -492,15 +488,9 @@ void dpu_hw_setup_misr(struct dpu_hw_blk_reg_map *c, /* Clear old MISR value (in case it's read before a new value is calculated)*/ wmb(); - if (enable) { - config = (frame_count & MISR_FRAME_COUNT_MASK) | - MISR_CTRL_ENABLE | MISR_CTRL_FREE_RUN_MASK; - - DPU_REG_WRITE(c, misr_ctrl_offset, config); - } else { - DPU_REG_WRITE(c, misr_ctrl_offset, 0); - } - + config = MISR_FRAME_COUNT | MISR_CTRL_ENABLE | MISR_CTRL_FREE_RUN_MASK | + ((input_sel & 0xF) << 24); + DPU_REG_WRITE(c, misr_ctrl_offset, config); } int dpu_hw_collect_misr(struct dpu_hw_blk_reg_map *c, @@ -567,3 +557,47 @@ bool dpu_hw_clk_force_ctrl(struct dpu_hw_blk_reg_map *c, return clk_forced_on; } + +#define TO_S15D16(_x_)((_x_) << 7) + +const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = { + { + /* S15.16 format */ + 0x00012A00, 0x00000000, 0x00019880, + 0x00012A00, 0xFFFF9B80, 0xFFFF3000, + 0x00012A00, 0x00020480, 0x00000000, + }, + /* signed bias */ + { 0xfff0, 0xff80, 0xff80,}, + { 0x0, 0x0, 0x0,}, + /* unsigned clamp */ + { 0x10, 0xeb, 0x10, 0xf0, 0x10, 0xf0,}, + { 0x00, 0xff, 0x00, 0xff, 0x00, 0xff,}, +}; + +const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = { + { + /* S15.16 format */ + 0x00012A00, 0x00000000, 0x00019880, + 0x00012A00, 0xFFFF9B80, 0xFFFF3000, + 0x00012A00, 0x00020480, 0x00000000, + }, + /* signed bias */ + { 0xffc0, 0xfe00, 0xfe00,}, + { 0x0, 0x0, 0x0,}, + /* unsigned clamp */ + { 0x40, 0x3ac, 0x40, 0x3c0, 0x40, 0x3c0,}, + { 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,}, +}; + +const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l = { + { + TO_S15D16(0x0083), TO_S15D16(0x0102), TO_S15D16(0x0032), + TO_S15D16(0x1fb5), TO_S15D16(0x1f6c), TO_S15D16(0x00e1), + TO_S15D16(0x00e1), TO_S15D16(0x1f45), TO_S15D16(0x1fdc) + }, + { 0x00, 0x00, 0x00 }, + { 0x0040, 0x0200, 0x0200 }, + { 0x000, 0x3ff, 0x000, 0x3ff, 0x000, 0x3ff }, + { 0x040, 0x3ac, 0x040, 0x3c0, 0x040, 0x3c0 }, +}; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h index 4bea139081bc..64ded69fa903 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) 2015-2021, The Linux Foundation. All rights reserved. */ @@ -13,12 +13,18 @@ #include "dpu_hw_catalog.h" #define REG_MASK(n) ((BIT(n)) - 1) -#define MISR_FRAME_COUNT_MASK 0xFF +#define MISR_FRAME_COUNT 0x1 #define MISR_CTRL_ENABLE BIT(8) #define MISR_CTRL_STATUS BIT(9) #define MISR_CTRL_STATUS_CLEAR BIT(10) #define MISR_CTRL_FREE_RUN_MASK BIT(31) +#define TO_S15D16(_x_)((_x_) << 7) + +extern const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L; +extern const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L; +extern const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l; + /* * This is the common struct maintained by each sub block * for mapping the register offsets in this block to the @@ -340,9 +346,6 @@ void dpu_hw_setup_scaler3(struct dpu_hw_blk_reg_map *c, u32 scaler_offset, u32 scaler_version, const struct dpu_format *format); -u32 dpu_hw_get_scaler3_ver(struct dpu_hw_blk_reg_map *c, - u32 scaler_offset); - void dpu_hw_csc_setup(struct dpu_hw_blk_reg_map *c, u32 csc_reg_off, const struct dpu_csc_cfg *data, bool csc10); @@ -358,9 +361,7 @@ void _dpu_hw_setup_qos_lut(struct dpu_hw_blk_reg_map *c, u32 offset, const struct dpu_hw_qos_cfg *cfg); void dpu_hw_setup_misr(struct dpu_hw_blk_reg_map *c, - u32 misr_ctrl_offset, - bool enable, - u32 frame_count); + u32 misr_ctrl_offset, u8 input_sel); int dpu_hw_collect_misr(struct dpu_hw_blk_reg_map *c, u32 misr_ctrl_offset, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c index a5121a50b2bb..98e34afde2d2 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c @@ -2,6 +2,8 @@ /* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. */ +#include <drm/drm_managed.h> + #include "dpu_hwio.h" #include "dpu_hw_catalog.h" #include "dpu_hw_vbif.h" @@ -211,12 +213,13 @@ static void _setup_vbif_ops(struct dpu_hw_vbif_ops *ops, ops->set_write_gather_en = dpu_hw_set_write_gather_en; } -struct dpu_hw_vbif *dpu_hw_vbif_init(const struct dpu_vbif_cfg *cfg, - void __iomem *addr) +struct dpu_hw_vbif *dpu_hw_vbif_init(struct drm_device *dev, + const struct dpu_vbif_cfg *cfg, + void __iomem *addr) { struct dpu_hw_vbif *c; - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); @@ -234,8 +237,3 @@ struct dpu_hw_vbif *dpu_hw_vbif_init(const struct dpu_vbif_cfg *cfg, return c; } - -void dpu_hw_vbif_destroy(struct dpu_hw_vbif *vbif) -{ - kfree(vbif); -} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.h index 7e10d2a172b4..e2b4307500e4 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.h @@ -108,12 +108,12 @@ struct dpu_hw_vbif { /** * dpu_hw_vbif_init() - Initializes the VBIF driver for the passed * VBIF catalog entry. + * @dev: Corresponding device for devres management * @cfg: VBIF catalog entry for which driver object is required * @addr: Mapped register io address of MDSS */ -struct dpu_hw_vbif *dpu_hw_vbif_init(const struct dpu_vbif_cfg *cfg, - void __iomem *addr); - -void dpu_hw_vbif_destroy(struct dpu_hw_vbif *vbif); +struct dpu_hw_vbif *dpu_hw_vbif_init(struct drm_device *dev, + const struct dpu_vbif_cfg *cfg, + void __iomem *addr); #endif /*_DPU_HW_VBIF_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c index 9668fb97c047..e75995f7fcea 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c @@ -3,6 +3,8 @@ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved */ +#include <drm/drm_managed.h> + #include "dpu_hw_mdss.h" #include "dpu_hwio.h" #include "dpu_hw_catalog.h" @@ -87,6 +89,9 @@ static void dpu_hw_wb_setup_format(struct dpu_hw_wb *ctx, dst_format |= BIT(14); /* DST_ALPHA_X */ } + if (DPU_FORMAT_IS_YUV(fmt)) + dst_format |= BIT(15); + pattern = (fmt->element[3] << 24) | (fmt->element[2] << 16) | (fmt->element[1] << 8) | @@ -208,15 +213,17 @@ static void _setup_wb_ops(struct dpu_hw_wb_ops *ops, ops->setup_clk_force_ctrl = dpu_hw_wb_setup_clk_force_ctrl; } -struct dpu_hw_wb *dpu_hw_wb_init(const struct dpu_wb_cfg *cfg, - void __iomem *addr, const struct dpu_mdss_version *mdss_rev) +struct dpu_hw_wb *dpu_hw_wb_init(struct drm_device *dev, + const struct dpu_wb_cfg *cfg, + void __iomem *addr, + const struct dpu_mdss_version *mdss_rev) { struct dpu_hw_wb *c; if (!addr) return ERR_PTR(-EINVAL); - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); @@ -230,8 +237,3 @@ struct dpu_hw_wb *dpu_hw_wb_init(const struct dpu_wb_cfg *cfg, return c; } - -void dpu_hw_wb_destroy(struct dpu_hw_wb *hw_wb) -{ - kfree(hw_wb); -} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h index 88792f450a92..e671796ea379 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h @@ -76,18 +76,15 @@ struct dpu_hw_wb { /** * dpu_hw_wb_init() - Initializes the writeback hw driver object. + * @dev: Corresponding device for devres management * @cfg: wb_path catalog entry for which driver object is required * @addr: mapped register io address of MDP * @mdss_rev: dpu core's major and minor versions * Return: Error code or allocated dpu_hw_wb context */ -struct dpu_hw_wb *dpu_hw_wb_init(const struct dpu_wb_cfg *cfg, - void __iomem *addr, const struct dpu_mdss_version *mdss_rev); - -/** - * dpu_hw_wb_destroy(): Destroy writeback hw driver object. - * @hw_wb: Pointer to writeback hw driver object - */ -void dpu_hw_wb_destroy(struct dpu_hw_wb *hw_wb); +struct dpu_hw_wb *dpu_hw_wb_init(struct drm_device *dev, + const struct dpu_wb_cfg *cfg, + void __iomem *addr, + const struct dpu_mdss_version *mdss_rev); #endif /*_DPU_HW_WB_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index fe7267b3bff5..723cc1d82143 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -274,9 +274,6 @@ static int dpu_kms_debugfs_init(struct msm_kms *kms, struct drm_minor *minor) struct dpu_kms *dpu_kms = to_dpu_kms(kms); void *p = dpu_hw_util_get_log_mask_ptr(); struct dentry *entry; - struct drm_device *dev; - struct msm_drm_private *priv; - int i; if (!p) return -EINVAL; @@ -285,9 +282,6 @@ static int dpu_kms_debugfs_init(struct msm_kms *kms, struct drm_minor *minor) if (minor->type != DRM_MINOR_PRIMARY) return 0; - dev = dpu_kms->dev; - priv = dev->dev_private; - entry = debugfs_create_dir("debug", minor->debugfs_root); debugfs_create_x32(DPU_DEBUGFS_HWMASKNAME, 0600, entry, p); @@ -297,11 +291,6 @@ static int dpu_kms_debugfs_init(struct msm_kms *kms, struct drm_minor *minor) dpu_debugfs_core_irq_init(dpu_kms, entry); dpu_debugfs_sspp_init(dpu_kms, entry); - for (i = 0; i < ARRAY_SIZE(priv->dp); i++) { - if (priv->dp[i]) - msm_dp_debugfs_init(priv->dp[i], minor); - } - return dpu_core_perf_debugfs_init(dpu_kms, entry); } #endif @@ -597,7 +586,6 @@ static int _dpu_kms_initialize_displayport(struct drm_device *dev, rc = msm_dp_modeset_init(priv->dp[i], dev, encoder); if (rc) { DPU_ERROR("modeset_init failed for DP, rc = %d\n", rc); - drm_encoder_cleanup(encoder); return rc; } } @@ -630,7 +618,6 @@ static int _dpu_kms_initialize_hdmi(struct drm_device *dev, rc = msm_hdmi_modeset_init(priv->hdmi, dev, encoder); if (rc) { DPU_ERROR("modeset_init failed for DP, rc = %d\n", rc); - drm_encoder_cleanup(encoder); return rc; } @@ -662,7 +649,6 @@ static int _dpu_kms_initialize_writeback(struct drm_device *dev, n_formats); if (rc) { DPU_ERROR("dpu_writeback_init, rc = %d\n", rc); - drm_encoder_cleanup(encoder); return rc; } @@ -806,30 +792,17 @@ static void _dpu_kms_hw_destroy(struct dpu_kms *dpu_kms) { int i; - if (dpu_kms->hw_intr) - dpu_hw_intr_destroy(dpu_kms->hw_intr); dpu_kms->hw_intr = NULL; /* safe to call these more than once during shutdown */ _dpu_kms_mmu_destroy(dpu_kms); - if (dpu_kms->catalog) { - for (i = 0; i < ARRAY_SIZE(dpu_kms->hw_vbif); i++) { - if (dpu_kms->hw_vbif[i]) { - dpu_hw_vbif_destroy(dpu_kms->hw_vbif[i]); - dpu_kms->hw_vbif[i] = NULL; - } - } + for (i = 0; i < ARRAY_SIZE(dpu_kms->hw_vbif); i++) { + dpu_kms->hw_vbif[i] = NULL; } - if (dpu_kms->rm_init) - dpu_rm_destroy(&dpu_kms->rm); - dpu_kms->rm_init = false; - dpu_kms->catalog = NULL; - if (dpu_kms->hw_mdp) - dpu_hw_mdp_destroy(dpu_kms->hw_mdp); dpu_kms->hw_mdp = NULL; } @@ -856,7 +829,6 @@ static int dpu_irq_postinstall(struct msm_kms *kms) { struct msm_drm_private *priv; struct dpu_kms *dpu_kms = to_dpu_kms(kms); - int i; if (!dpu_kms || !dpu_kms->dev) return -EINVAL; @@ -865,9 +837,6 @@ static int dpu_irq_postinstall(struct msm_kms *kms) if (!priv) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(priv->dp); i++) - msm_dp_irq_postinstall(priv->dp[i]); - return 0; } @@ -975,6 +944,10 @@ static void dpu_kms_mdp_snapshot(struct msm_disp_state *disp_state, struct msm_k } } + if (cat->cdm) + msm_disp_snapshot_add_block(disp_state, cat->cdm->len, + dpu_kms->mmio + cat->cdm->base, cat->cdm->name); + pm_runtime_put_sync(&dpu_kms->pdev->dev); } @@ -1078,7 +1051,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms) if (!dpu_kms->catalog) { DPU_ERROR("device config not known!\n"); rc = -EINVAL; - goto power_error; + goto err_pm_put; } /* @@ -1088,49 +1061,48 @@ static int dpu_kms_hw_init(struct msm_kms *kms) rc = _dpu_kms_mmu_init(dpu_kms); if (rc) { DPU_ERROR("dpu_kms_mmu_init failed: %d\n", rc); - goto power_error; + goto err_pm_put; } dpu_kms->mdss = msm_mdss_get_mdss_data(dpu_kms->pdev->dev.parent); if (IS_ERR(dpu_kms->mdss)) { rc = PTR_ERR(dpu_kms->mdss); DPU_ERROR("failed to get MDSS data: %d\n", rc); - goto power_error; + goto err_pm_put; } if (!dpu_kms->mdss) { rc = -EINVAL; DPU_ERROR("NULL MDSS data\n"); - goto power_error; + goto err_pm_put; } - rc = dpu_rm_init(&dpu_kms->rm, dpu_kms->catalog, dpu_kms->mdss, dpu_kms->mmio); + rc = dpu_rm_init(dev, &dpu_kms->rm, dpu_kms->catalog, dpu_kms->mdss, dpu_kms->mmio); if (rc) { DPU_ERROR("rm init failed: %d\n", rc); - goto power_error; + goto err_pm_put; } - dpu_kms->rm_init = true; - - dpu_kms->hw_mdp = dpu_hw_mdptop_init(dpu_kms->catalog->mdp, + dpu_kms->hw_mdp = dpu_hw_mdptop_init(dev, + dpu_kms->catalog->mdp, dpu_kms->mmio, dpu_kms->catalog); if (IS_ERR(dpu_kms->hw_mdp)) { rc = PTR_ERR(dpu_kms->hw_mdp); DPU_ERROR("failed to get hw_mdp: %d\n", rc); dpu_kms->hw_mdp = NULL; - goto power_error; + goto err_pm_put; } for (i = 0; i < dpu_kms->catalog->vbif_count; i++) { struct dpu_hw_vbif *hw; const struct dpu_vbif_cfg *vbif = &dpu_kms->catalog->vbif[i]; - hw = dpu_hw_vbif_init(vbif, dpu_kms->vbif[vbif->id]); + hw = dpu_hw_vbif_init(dev, vbif, dpu_kms->vbif[vbif->id]); if (IS_ERR(hw)) { rc = PTR_ERR(hw); DPU_ERROR("failed to init vbif %d: %d\n", vbif->id, rc); - goto power_error; + goto err_pm_put; } dpu_kms->hw_vbif[vbif->id] = hw; @@ -1146,15 +1118,15 @@ static int dpu_kms_hw_init(struct msm_kms *kms) rc = dpu_core_perf_init(&dpu_kms->perf, dpu_kms->catalog->perf, max_core_clk_rate); if (rc) { DPU_ERROR("failed to init perf %d\n", rc); - goto perf_err; + goto err_pm_put; } - dpu_kms->hw_intr = dpu_hw_intr_init(dpu_kms->mmio, dpu_kms->catalog); - if (IS_ERR_OR_NULL(dpu_kms->hw_intr)) { + dpu_kms->hw_intr = dpu_hw_intr_init(dev, dpu_kms->mmio, dpu_kms->catalog); + if (IS_ERR(dpu_kms->hw_intr)) { rc = PTR_ERR(dpu_kms->hw_intr); DPU_ERROR("hw_intr init failed: %d\n", rc); dpu_kms->hw_intr = NULL; - goto hw_intr_init_err; + goto err_pm_put; } dev->mode_config.min_width = 0; @@ -1179,7 +1151,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms) rc = _dpu_kms_drm_obj_init(dpu_kms); if (rc) { DPU_ERROR("modeset init failed: %d\n", rc); - goto drm_obj_init_err; + goto err_pm_put; } dpu_vbif_init_memtypes(dpu_kms); @@ -1188,10 +1160,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms) return 0; -drm_obj_init_err: -hw_intr_init_err: -perf_err: -power_error: +err_pm_put: pm_runtime_put_sync(&dpu_kms->pdev->dev); error: _dpu_kms_hw_destroy(dpu_kms); @@ -1349,6 +1318,7 @@ static const struct dev_pm_ops dpu_pm_ops = { static const struct of_device_id dpu_dt_match[] = { { .compatible = "qcom,msm8998-dpu", .data = &dpu_msm8998_cfg, }, { .compatible = "qcom,qcm2290-dpu", .data = &dpu_qcm2290_cfg, }, + { .compatible = "qcom,sdm670-dpu", .data = &dpu_sdm670_cfg, }, { .compatible = "qcom,sdm845-dpu", .data = &dpu_sdm845_cfg, }, { .compatible = "qcom,sc7180-dpu", .data = &dpu_sc7180_cfg, }, { .compatible = "qcom,sc7280-dpu", .data = &dpu_sc7280_cfg, }, @@ -1363,6 +1333,7 @@ static const struct of_device_id dpu_dt_match[] = { { .compatible = "qcom,sm8350-dpu", .data = &dpu_sm8350_cfg, }, { .compatible = "qcom,sm8450-dpu", .data = &dpu_sm8450_cfg, }, { .compatible = "qcom,sm8550-dpu", .data = &dpu_sm8550_cfg, }, + { .compatible = "qcom,sm8650-dpu", .data = &dpu_sm8650_cfg, }, {} }; MODULE_DEVICE_TABLE(of, dpu_dt_match); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h index b6f53ca6e962..d1207f4ec3ae 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h @@ -51,6 +51,7 @@ } while (0) #define DPU_ERROR(fmt, ...) pr_err("[dpu error]" fmt, ##__VA_ARGS__) +#define DPU_ERROR_RATELIMITED(fmt, ...) pr_err_ratelimited("[dpu error]" fmt, ##__VA_ARGS__) /** * ktime_compare_safe - compare two ktime structures @@ -88,7 +89,6 @@ struct dpu_kms { struct drm_private_obj global_state; struct dpu_rm rm; - bool rm_init; struct dpu_hw_vbif *hw_vbif[VBIF_MAX]; struct dpu_hw_mdp *hw_mdp; @@ -136,6 +136,7 @@ struct dpu_global_state { uint32_t ctl_to_enc_id[CTL_MAX - CTL_0]; uint32_t dspp_to_enc_id[DSPP_MAX - DSPP_0]; uint32_t dsc_to_enc_id[DSC_MAX - DSC_0]; + uint32_t cdm_to_enc_id; }; struct dpu_global_state diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 3eef5e025e12..ff975ad51145 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -21,6 +21,7 @@ #include "dpu_kms.h" #include "dpu_formats.h" #include "dpu_hw_sspp.h" +#include "dpu_hw_util.h" #include "dpu_trace.h" #include "dpu_crtc.h" #include "dpu_vbif.h" @@ -78,8 +79,6 @@ static const uint32_t qcom_compressed_supported_formats[] = { struct dpu_plane { struct drm_plane base; - struct mutex lock; - enum dpu_sspp pipe; uint32_t color_fill; @@ -470,8 +469,7 @@ static void _dpu_plane_setup_scaler3(struct dpu_hw_sspp *pipe_hw, scale_cfg->src_height[i] /= chroma_subsmpl_v; } - if (pipe_hw->cap->features & - BIT(DPU_SSPP_SCALER_QSEED4)) { + if (pipe_hw->cap->sblk->scaler_blk.version >= 0x3000) { scale_cfg->preload_x[i] = DPU_QSEED4_DEFAULT_PRELOAD_H; scale_cfg->preload_y[i] = DPU_QSEED4_DEFAULT_PRELOAD_V; } else { @@ -511,36 +509,6 @@ static void _dpu_plane_setup_pixel_ext(struct dpu_hw_scaler3_cfg *scale_cfg, } } -static const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = { - { - /* S15.16 format */ - 0x00012A00, 0x00000000, 0x00019880, - 0x00012A00, 0xFFFF9B80, 0xFFFF3000, - 0x00012A00, 0x00020480, 0x00000000, - }, - /* signed bias */ - { 0xfff0, 0xff80, 0xff80,}, - { 0x0, 0x0, 0x0,}, - /* unsigned clamp */ - { 0x10, 0xeb, 0x10, 0xf0, 0x10, 0xf0,}, - { 0x00, 0xff, 0x00, 0xff, 0x00, 0xff,}, -}; - -static const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = { - { - /* S15.16 format */ - 0x00012A00, 0x00000000, 0x00019880, - 0x00012A00, 0xFFFF9B80, 0xFFFF3000, - 0x00012A00, 0x00020480, 0x00000000, - }, - /* signed bias */ - { 0xffc0, 0xfe00, 0xfe00,}, - { 0x0, 0x0, 0x0,}, - /* unsigned clamp */ - { 0x40, 0x3ac, 0x40, 0x3c0, 0x40, 0x3c0,}, - { 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,}, -}; - static const struct dpu_csc_cfg *_dpu_plane_get_csc(struct dpu_sw_pipe *pipe, const struct dpu_format *fmt) { @@ -774,8 +742,8 @@ static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu, min_src_size = DPU_FORMAT_IS_YUV(fmt) ? 2 : 1; if (DPU_FORMAT_IS_YUV(fmt) && - (!(pipe->sspp->cap->features & DPU_SSPP_SCALER) || - !(pipe->sspp->cap->features & DPU_SSPP_CSC_ANY))) { + (!pipe->sspp->cap->sblk->scaler_blk.len || + !pipe->sspp->cap->sblk->csc_blk.len)) { DPU_DEBUG_PLANE(pdpu, "plane doesn't have scaler/csc for yuv\n"); return -EINVAL; @@ -824,6 +792,8 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, plane); int ret = 0, min_scale; struct dpu_plane *pdpu = to_dpu_plane(plane); + struct dpu_kms *kms = _dpu_plane_get_kms(&pdpu->base); + u64 max_mdp_clk_rate = kms->perf.max_core_clk_rate; struct dpu_plane_state *pstate = to_dpu_plane_state(new_plane_state); struct dpu_sw_pipe *pipe = &pstate->pipe; struct dpu_sw_pipe *r_pipe = &pstate->r_pipe; @@ -892,14 +862,16 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, max_linewidth = pdpu->catalog->caps->max_linewidth; - if (drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) { + if ((drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) || + _dpu_plane_calc_clk(&crtc_state->adjusted_mode, pipe_cfg) > max_mdp_clk_rate) { /* * In parallel multirect case only the half of the usual width * is supported for tiled formats. If we are here, we know that * full width is more than max_linewidth, thus each rect is * wider than allowed. */ - if (DPU_FORMAT_IS_UBWC(fmt)) { + if (DPU_FORMAT_IS_UBWC(fmt) && + drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) { DPU_DEBUG_PLANE(pdpu, "invalid src " DRM_RECT_FMT " line:%u, tiled format\n", DRM_RECT_ARG(&pipe_cfg->src_rect), max_linewidth); return -E2BIG; @@ -1213,29 +1185,6 @@ static void dpu_plane_atomic_update(struct drm_plane *plane, } } -static void dpu_plane_destroy(struct drm_plane *plane) -{ - struct dpu_plane *pdpu = plane ? to_dpu_plane(plane) : NULL; - struct dpu_plane_state *pstate; - - DPU_DEBUG_PLANE(pdpu, "\n"); - - if (pdpu) { - pstate = to_dpu_plane_state(plane->state); - _dpu_plane_set_qos_ctrl(plane, &pstate->pipe, false); - - if (pstate->r_pipe.sspp) - _dpu_plane_set_qos_ctrl(plane, &pstate->r_pipe, false); - - mutex_destroy(&pdpu->lock); - - /* this will destroy the states as well */ - drm_plane_cleanup(plane); - - kfree(pdpu); - } -} - static void dpu_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state) { @@ -1405,7 +1354,6 @@ static bool dpu_plane_format_mod_supported(struct drm_plane *plane, static const struct drm_plane_funcs dpu_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, - .destroy = dpu_plane_destroy, .reset = dpu_plane_reset, .atomic_duplicate_state = dpu_plane_duplicate_state, .atomic_destroy_state = dpu_plane_destroy_state, @@ -1433,35 +1381,28 @@ struct drm_plane *dpu_plane_init(struct drm_device *dev, struct dpu_hw_sspp *pipe_hw; uint32_t num_formats; uint32_t supported_rotations; - int ret = -EINVAL; - - /* create and zero local structure */ - pdpu = kzalloc(sizeof(*pdpu), GFP_KERNEL); - if (!pdpu) { - DPU_ERROR("[%u]failed to allocate local plane struct\n", pipe); - ret = -ENOMEM; - return ERR_PTR(ret); - } - - /* cache local stuff for later */ - plane = &pdpu->base; - pdpu->pipe = pipe; + int ret; /* initialize underlying h/w driver */ pipe_hw = dpu_rm_get_sspp(&kms->rm, pipe); if (!pipe_hw || !pipe_hw->cap || !pipe_hw->cap->sblk) { DPU_ERROR("[%u]SSPP is invalid\n", pipe); - goto clean_plane; + return ERR_PTR(-EINVAL); } format_list = pipe_hw->cap->sblk->format_list; num_formats = pipe_hw->cap->sblk->num_formats; - ret = drm_universal_plane_init(dev, plane, 0xff, &dpu_plane_funcs, + pdpu = drmm_universal_plane_alloc(dev, struct dpu_plane, base, + 0xff, &dpu_plane_funcs, format_list, num_formats, supported_format_modifiers, type, NULL); - if (ret) - goto clean_plane; + if (IS_ERR(pdpu)) + return ERR_CAST(pdpu); + + /* cache local stuff for later */ + plane = &pdpu->base; + pdpu->pipe = pipe; pdpu->catalog = kms->catalog; @@ -1488,13 +1429,7 @@ struct drm_plane *dpu_plane_init(struct drm_device *dev, /* success! finalize initialization */ drm_plane_helper_add(plane, &dpu_plane_helper_funcs); - mutex_init(&pdpu->lock); - DPU_DEBUG("%s created for pipe:%u id:%u\n", plane->name, pipe, plane->base.id); return plane; - -clean_plane: - kfree(pdpu); - return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c index 8759466e2f37..b58a9c2ae326 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c @@ -8,6 +8,7 @@ #include "dpu_kms.h" #include "dpu_hw_lm.h" #include "dpu_hw_ctl.h" +#include "dpu_hw_cdm.h" #include "dpu_hw_pingpong.h" #include "dpu_hw_sspp.h" #include "dpu_hw_intf.h" @@ -34,72 +35,8 @@ struct dpu_rm_requirements { struct msm_display_topology topology; }; -int dpu_rm_destroy(struct dpu_rm *rm) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(rm->dspp_blks); i++) { - struct dpu_hw_dspp *hw; - - if (rm->dspp_blks[i]) { - hw = to_dpu_hw_dspp(rm->dspp_blks[i]); - dpu_hw_dspp_destroy(hw); - } - } - for (i = 0; i < ARRAY_SIZE(rm->pingpong_blks); i++) { - struct dpu_hw_pingpong *hw; - - if (rm->pingpong_blks[i]) { - hw = to_dpu_hw_pingpong(rm->pingpong_blks[i]); - dpu_hw_pingpong_destroy(hw); - } - } - for (i = 0; i < ARRAY_SIZE(rm->merge_3d_blks); i++) { - struct dpu_hw_merge_3d *hw; - - if (rm->merge_3d_blks[i]) { - hw = to_dpu_hw_merge_3d(rm->merge_3d_blks[i]); - dpu_hw_merge_3d_destroy(hw); - } - } - for (i = 0; i < ARRAY_SIZE(rm->mixer_blks); i++) { - struct dpu_hw_mixer *hw; - - if (rm->mixer_blks[i]) { - hw = to_dpu_hw_mixer(rm->mixer_blks[i]); - dpu_hw_lm_destroy(hw); - } - } - for (i = 0; i < ARRAY_SIZE(rm->ctl_blks); i++) { - struct dpu_hw_ctl *hw; - - if (rm->ctl_blks[i]) { - hw = to_dpu_hw_ctl(rm->ctl_blks[i]); - dpu_hw_ctl_destroy(hw); - } - } - for (i = 0; i < ARRAY_SIZE(rm->hw_intf); i++) - dpu_hw_intf_destroy(rm->hw_intf[i]); - - for (i = 0; i < ARRAY_SIZE(rm->dsc_blks); i++) { - struct dpu_hw_dsc *hw; - - if (rm->dsc_blks[i]) { - hw = to_dpu_hw_dsc(rm->dsc_blks[i]); - dpu_hw_dsc_destroy(hw); - } - } - - for (i = 0; i < ARRAY_SIZE(rm->hw_wb); i++) - dpu_hw_wb_destroy(rm->hw_wb[i]); - - for (i = 0; i < ARRAY_SIZE(rm->hw_sspp); i++) - dpu_hw_sspp_destroy(rm->hw_sspp[i]); - - return 0; -} - -int dpu_rm_init(struct dpu_rm *rm, +int dpu_rm_init(struct drm_device *dev, + struct dpu_rm *rm, const struct dpu_mdss_cfg *cat, const struct msm_mdss_data *mdss_data, void __iomem *mmio) @@ -119,7 +56,7 @@ int dpu_rm_init(struct dpu_rm *rm, struct dpu_hw_mixer *hw; const struct dpu_lm_cfg *lm = &cat->mixer[i]; - hw = dpu_hw_lm_init(lm, mmio); + hw = dpu_hw_lm_init(dev, lm, mmio); if (IS_ERR(hw)) { rc = PTR_ERR(hw); DPU_ERROR("failed lm object creation: err %d\n", rc); @@ -132,7 +69,7 @@ int dpu_rm_init(struct dpu_rm *rm, struct dpu_hw_merge_3d *hw; const struct dpu_merge_3d_cfg *merge_3d = &cat->merge_3d[i]; - hw = dpu_hw_merge_3d_init(merge_3d, mmio); + hw = dpu_hw_merge_3d_init(dev, merge_3d, mmio); if (IS_ERR(hw)) { rc = PTR_ERR(hw); DPU_ERROR("failed merge_3d object creation: err %d\n", @@ -146,7 +83,7 @@ int dpu_rm_init(struct dpu_rm *rm, struct dpu_hw_pingpong *hw; const struct dpu_pingpong_cfg *pp = &cat->pingpong[i]; - hw = dpu_hw_pingpong_init(pp, mmio, cat->mdss_ver); + hw = dpu_hw_pingpong_init(dev, pp, mmio, cat->mdss_ver); if (IS_ERR(hw)) { rc = PTR_ERR(hw); DPU_ERROR("failed pingpong object creation: err %d\n", @@ -162,7 +99,7 @@ int dpu_rm_init(struct dpu_rm *rm, struct dpu_hw_intf *hw; const struct dpu_intf_cfg *intf = &cat->intf[i]; - hw = dpu_hw_intf_init(intf, mmio, cat->mdss_ver); + hw = dpu_hw_intf_init(dev, intf, mmio, cat->mdss_ver); if (IS_ERR(hw)) { rc = PTR_ERR(hw); DPU_ERROR("failed intf object creation: err %d\n", rc); @@ -175,7 +112,7 @@ int dpu_rm_init(struct dpu_rm *rm, struct dpu_hw_wb *hw; const struct dpu_wb_cfg *wb = &cat->wb[i]; - hw = dpu_hw_wb_init(wb, mmio, cat->mdss_ver); + hw = dpu_hw_wb_init(dev, wb, mmio, cat->mdss_ver); if (IS_ERR(hw)) { rc = PTR_ERR(hw); DPU_ERROR("failed wb object creation: err %d\n", rc); @@ -188,7 +125,7 @@ int dpu_rm_init(struct dpu_rm *rm, struct dpu_hw_ctl *hw; const struct dpu_ctl_cfg *ctl = &cat->ctl[i]; - hw = dpu_hw_ctl_init(ctl, mmio, cat->mixer_count, cat->mixer); + hw = dpu_hw_ctl_init(dev, ctl, mmio, cat->mixer_count, cat->mixer); if (IS_ERR(hw)) { rc = PTR_ERR(hw); DPU_ERROR("failed ctl object creation: err %d\n", rc); @@ -201,7 +138,7 @@ int dpu_rm_init(struct dpu_rm *rm, struct dpu_hw_dspp *hw; const struct dpu_dspp_cfg *dspp = &cat->dspp[i]; - hw = dpu_hw_dspp_init(dspp, mmio); + hw = dpu_hw_dspp_init(dev, dspp, mmio); if (IS_ERR(hw)) { rc = PTR_ERR(hw); DPU_ERROR("failed dspp object creation: err %d\n", rc); @@ -215,9 +152,9 @@ int dpu_rm_init(struct dpu_rm *rm, const struct dpu_dsc_cfg *dsc = &cat->dsc[i]; if (test_bit(DPU_DSC_HW_REV_1_2, &dsc->features)) - hw = dpu_hw_dsc_init_1_2(dsc, mmio); + hw = dpu_hw_dsc_init_1_2(dev, dsc, mmio); else - hw = dpu_hw_dsc_init(dsc, mmio); + hw = dpu_hw_dsc_init(dev, dsc, mmio); if (IS_ERR(hw)) { rc = PTR_ERR(hw); @@ -231,7 +168,7 @@ int dpu_rm_init(struct dpu_rm *rm, struct dpu_hw_sspp *hw; const struct dpu_sspp_cfg *sspp = &cat->sspp[i]; - hw = dpu_hw_sspp_init(sspp, mmio, mdss_data, cat->mdss_ver); + hw = dpu_hw_sspp_init(dev, sspp, mmio, mdss_data, cat->mdss_ver); if (IS_ERR(hw)) { rc = PTR_ERR(hw); DPU_ERROR("failed sspp object creation: err %d\n", rc); @@ -240,11 +177,21 @@ int dpu_rm_init(struct dpu_rm *rm, rm->hw_sspp[sspp->id - SSPP_NONE] = hw; } + if (cat->cdm) { + struct dpu_hw_cdm *hw; + + hw = dpu_hw_cdm_init(dev, cat->cdm, mmio, cat->mdss_ver); + if (IS_ERR(hw)) { + rc = PTR_ERR(hw); + DPU_ERROR("failed cdm object creation: err %d\n", rc); + goto fail; + } + rm->cdm_blk = &hw->base; + } + return 0; fail: - dpu_rm_destroy(rm); - return rc ? rc : -EFAULT; } @@ -488,6 +435,26 @@ static int _dpu_rm_reserve_dsc(struct dpu_rm *rm, return 0; } +static int _dpu_rm_reserve_cdm(struct dpu_rm *rm, + struct dpu_global_state *global_state, + struct drm_encoder *enc) +{ + /* try allocating only one CDM block */ + if (!rm->cdm_blk) { + DPU_ERROR("CDM block does not exist\n"); + return -EIO; + } + + if (global_state->cdm_to_enc_id) { + DPU_ERROR("CDM_0 is already allocated\n"); + return -EIO; + } + + global_state->cdm_to_enc_id = enc->base.id; + + return 0; +} + static int _dpu_rm_make_reservation( struct dpu_rm *rm, struct dpu_global_state *global_state, @@ -513,6 +480,14 @@ static int _dpu_rm_make_reservation( if (ret) return ret; + if (reqs->topology.needs_cdm) { + ret = _dpu_rm_reserve_cdm(rm, global_state, enc); + if (ret) { + DPU_ERROR("unable to find CDM blk\n"); + return ret; + } + } + return ret; } @@ -523,9 +498,9 @@ static int _dpu_rm_populate_requirements( { reqs->topology = req_topology; - DRM_DEBUG_KMS("num_lm: %d num_dsc: %d num_intf: %d\n", + DRM_DEBUG_KMS("num_lm: %d num_dsc: %d num_intf: %d cdm: %d\n", reqs->topology.num_lm, reqs->topology.num_dsc, - reqs->topology.num_intf); + reqs->topology.num_intf, reqs->topology.needs_cdm); return 0; } @@ -554,6 +529,7 @@ void dpu_rm_release(struct dpu_global_state *global_state, ARRAY_SIZE(global_state->dsc_to_enc_id), enc->base.id); _dpu_rm_clear_mapping(global_state->dspp_to_enc_id, ARRAY_SIZE(global_state->dspp_to_enc_id), enc->base.id); + _dpu_rm_clear_mapping(&global_state->cdm_to_enc_id, 1, enc->base.id); } int dpu_rm_reserve( @@ -627,6 +603,11 @@ int dpu_rm_get_assigned_resources(struct dpu_rm *rm, hw_to_enc_id = global_state->dsc_to_enc_id; max_blks = ARRAY_SIZE(rm->dsc_blks); break; + case DPU_HW_BLK_CDM: + hw_blks = &rm->cdm_blk; + hw_to_enc_id = &global_state->cdm_to_enc_id; + max_blks = 1; + break; default: DPU_ERROR("blk type %d not managed by rm\n", type); return 0; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h index 2b551566cbf4..e3f83ebc656b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h @@ -22,6 +22,7 @@ struct dpu_global_state; * @hw_wb: array of wb hardware resources * @dspp_blks: array of dspp hardware resources * @hw_sspp: array of sspp hardware resources + * @cdm_blk: cdm hardware resource */ struct dpu_rm { struct dpu_hw_blk *pingpong_blks[PINGPONG_MAX - PINGPONG_0]; @@ -33,30 +34,26 @@ struct dpu_rm { struct dpu_hw_blk *merge_3d_blks[MERGE_3D_MAX - MERGE_3D_0]; struct dpu_hw_blk *dsc_blks[DSC_MAX - DSC_0]; struct dpu_hw_sspp *hw_sspp[SSPP_MAX - SSPP_NONE]; + struct dpu_hw_blk *cdm_blk; }; /** * dpu_rm_init - Read hardware catalog and create reservation tracking objects * for all HW blocks. + * @dev: Corresponding device for devres management * @rm: DPU Resource Manager handle * @cat: Pointer to hardware catalog * @mdss_data: Pointer to MDSS / UBWC configuration * @mmio: mapped register io address of MDP * @Return: 0 on Success otherwise -ERROR */ -int dpu_rm_init(struct dpu_rm *rm, +int dpu_rm_init(struct drm_device *dev, + struct dpu_rm *rm, const struct dpu_mdss_cfg *cat, const struct msm_mdss_data *mdss_data, void __iomem *mmio); /** - * dpu_rm_destroy - Free all memory allocated by dpu_rm_init - * @rm: DPU Resource Manager handle - * @Return: 0 on Success otherwise -ERROR - */ -int dpu_rm_destroy(struct dpu_rm *rm); - -/** * dpu_rm_reserve - Given a CRTC->Encoder->Connector display chain, analyze * the use connections and user requirements, specified through related * topology control properties, and reserve hardware blocks to that diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c index 169f9de4a12a..75f93e346282 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c @@ -6,6 +6,7 @@ #include <drm/drm_crtc.h> #include <drm/drm_flip_work.h> +#include <drm/drm_managed.h> #include <drm/drm_mode.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> @@ -123,16 +124,6 @@ static void unref_cursor_worker(struct drm_flip_work *work, void *val) drm_gem_object_put(val); } -static void mdp4_crtc_destroy(struct drm_crtc *crtc) -{ - struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); - - drm_crtc_cleanup(crtc); - drm_flip_work_cleanup(&mdp4_crtc->unref_cursor_work); - - kfree(mdp4_crtc); -} - /* statically (for now) map planes to mixer stage (z-order): */ static const int idxs[] = { [VG1] = 1, @@ -269,6 +260,7 @@ static void mdp4_crtc_atomic_disable(struct drm_crtc *crtc, { struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); struct mdp4_kms *mdp4_kms = get_kms(crtc); + unsigned long flags; DBG("%s", mdp4_crtc->name); @@ -281,6 +273,14 @@ static void mdp4_crtc_atomic_disable(struct drm_crtc *crtc, mdp_irq_unregister(&mdp4_kms->base, &mdp4_crtc->err); mdp4_disable(mdp4_kms); + if (crtc->state->event && !crtc->state->active) { + WARN_ON(mdp4_crtc->event); + spin_lock_irqsave(&mdp4_kms->dev->event_lock, flags); + drm_crtc_send_vblank_event(crtc, crtc->state->event); + crtc->state->event = NULL; + spin_unlock_irqrestore(&mdp4_kms->dev->event_lock, flags); + } + mdp4_crtc->enabled = false; } @@ -475,7 +475,6 @@ static int mdp4_crtc_cursor_move(struct drm_crtc *crtc, int x, int y) static const struct drm_crtc_funcs mdp4_crtc_funcs = { .set_config = drm_atomic_helper_set_config, - .destroy = mdp4_crtc_destroy, .page_flip = drm_atomic_helper_page_flip, .cursor_set = mdp4_crtc_cursor_set, .cursor_move = mdp4_crtc_cursor_move, @@ -616,6 +615,13 @@ static const char *dma_names[] = { "DMA_P", "DMA_S", "DMA_E", }; +static void mdp4_crtc_flip_cleanup(struct drm_device *dev, void *ptr) +{ + struct mdp4_crtc *mdp4_crtc = ptr; + + drm_flip_work_cleanup(&mdp4_crtc->unref_cursor_work); +} + /* initialize crtc */ struct drm_crtc *mdp4_crtc_init(struct drm_device *dev, struct drm_plane *plane, int id, int ovlp_id, @@ -623,10 +629,13 @@ struct drm_crtc *mdp4_crtc_init(struct drm_device *dev, { struct drm_crtc *crtc = NULL; struct mdp4_crtc *mdp4_crtc; + int ret; - mdp4_crtc = kzalloc(sizeof(*mdp4_crtc), GFP_KERNEL); - if (!mdp4_crtc) - return ERR_PTR(-ENOMEM); + mdp4_crtc = drmm_crtc_alloc_with_planes(dev, struct mdp4_crtc, base, + plane, NULL, + &mdp4_crtc_funcs, NULL); + if (IS_ERR(mdp4_crtc)) + return ERR_CAST(mdp4_crtc); crtc = &mdp4_crtc->base; @@ -648,9 +657,10 @@ struct drm_crtc *mdp4_crtc_init(struct drm_device *dev, drm_flip_work_init(&mdp4_crtc->unref_cursor_work, "unref cursor", unref_cursor_worker); + ret = drmm_add_action_or_reset(dev, mdp4_crtc_flip_cleanup, mdp4_crtc); + if (ret) + return ERR_PTR(ret); - drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp4_crtc_funcs, - NULL); drm_crtc_helper_add(crtc, &mdp4_crtc_helper_funcs); return crtc; diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c index 39b8fe53c29d..74dafe7106be 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c @@ -26,18 +26,6 @@ static struct mdp4_kms *get_kms(struct drm_encoder *encoder) return to_mdp4_kms(to_mdp_kms(priv->kms)); } -static void mdp4_dsi_encoder_destroy(struct drm_encoder *encoder) -{ - struct mdp4_dsi_encoder *mdp4_dsi_encoder = to_mdp4_dsi_encoder(encoder); - - drm_encoder_cleanup(encoder); - kfree(mdp4_dsi_encoder); -} - -static const struct drm_encoder_funcs mdp4_dsi_encoder_funcs = { - .destroy = mdp4_dsi_encoder_destroy, -}; - static void mdp4_dsi_encoder_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) @@ -148,28 +136,18 @@ static const struct drm_encoder_helper_funcs mdp4_dsi_encoder_helper_funcs = { /* initialize encoder */ struct drm_encoder *mdp4_dsi_encoder_init(struct drm_device *dev) { - struct drm_encoder *encoder = NULL; + struct drm_encoder *encoder; struct mdp4_dsi_encoder *mdp4_dsi_encoder; - int ret; - mdp4_dsi_encoder = kzalloc(sizeof(*mdp4_dsi_encoder), GFP_KERNEL); - if (!mdp4_dsi_encoder) { - ret = -ENOMEM; - goto fail; - } + mdp4_dsi_encoder = drmm_encoder_alloc(dev, struct mdp4_dsi_encoder, base, + NULL, DRM_MODE_ENCODER_DSI, NULL); + if (IS_ERR(mdp4_dsi_encoder)) + return ERR_CAST(mdp4_dsi_encoder); encoder = &mdp4_dsi_encoder->base; - drm_encoder_init(dev, encoder, &mdp4_dsi_encoder_funcs, - DRM_MODE_ENCODER_DSI, NULL); drm_encoder_helper_add(encoder, &mdp4_dsi_encoder_helper_funcs); return encoder; - -fail: - if (encoder) - mdp4_dsi_encoder_destroy(encoder); - - return ERR_PTR(ret); } #endif /* CONFIG_DRM_MSM_DSI */ diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dtv_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dtv_encoder.c index 88645dbc3785..3b70764b48c4 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dtv_encoder.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dtv_encoder.c @@ -25,17 +25,6 @@ static struct mdp4_kms *get_kms(struct drm_encoder *encoder) return to_mdp4_kms(to_mdp_kms(priv->kms)); } -static void mdp4_dtv_encoder_destroy(struct drm_encoder *encoder) -{ - struct mdp4_dtv_encoder *mdp4_dtv_encoder = to_mdp4_dtv_encoder(encoder); - drm_encoder_cleanup(encoder); - kfree(mdp4_dtv_encoder); -} - -static const struct drm_encoder_funcs mdp4_dtv_encoder_funcs = { - .destroy = mdp4_dtv_encoder_destroy, -}; - static void mdp4_dtv_encoder_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) @@ -173,41 +162,29 @@ long mdp4_dtv_round_pixclk(struct drm_encoder *encoder, unsigned long rate) /* initialize encoder */ struct drm_encoder *mdp4_dtv_encoder_init(struct drm_device *dev) { - struct drm_encoder *encoder = NULL; + struct drm_encoder *encoder; struct mdp4_dtv_encoder *mdp4_dtv_encoder; - int ret; - mdp4_dtv_encoder = kzalloc(sizeof(*mdp4_dtv_encoder), GFP_KERNEL); - if (!mdp4_dtv_encoder) { - ret = -ENOMEM; - goto fail; - } + mdp4_dtv_encoder = drmm_encoder_alloc(dev, struct mdp4_dtv_encoder, base, + NULL, DRM_MODE_ENCODER_TMDS, NULL); + if (IS_ERR(mdp4_dtv_encoder)) + return ERR_CAST(mdp4_dtv_encoder); encoder = &mdp4_dtv_encoder->base; - drm_encoder_init(dev, encoder, &mdp4_dtv_encoder_funcs, - DRM_MODE_ENCODER_TMDS, NULL); drm_encoder_helper_add(encoder, &mdp4_dtv_encoder_helper_funcs); mdp4_dtv_encoder->hdmi_clk = devm_clk_get(dev->dev, "hdmi_clk"); if (IS_ERR(mdp4_dtv_encoder->hdmi_clk)) { DRM_DEV_ERROR(dev->dev, "failed to get hdmi_clk\n"); - ret = PTR_ERR(mdp4_dtv_encoder->hdmi_clk); - goto fail; + return ERR_CAST(mdp4_dtv_encoder->hdmi_clk); } mdp4_dtv_encoder->mdp_clk = devm_clk_get(dev->dev, "tv_clk"); if (IS_ERR(mdp4_dtv_encoder->mdp_clk)) { DRM_DEV_ERROR(dev->dev, "failed to get tv_clk\n"); - ret = PTR_ERR(mdp4_dtv_encoder->mdp_clk); - goto fail; + return ERR_CAST(mdp4_dtv_encoder->mdp_clk); } return encoder; - -fail: - if (encoder) - mdp4_dtv_encoder_destroy(encoder); - - return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c index 10eb3e5b218e..576995ddce37 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c @@ -18,7 +18,7 @@ struct mdp4_lcdc_encoder { struct drm_panel *panel; struct clk *lcdc_clk; unsigned long int pixclock; - struct regulator *regs[3]; + struct regulator_bulk_data regs[3]; bool enabled; uint32_t bsc; }; @@ -30,18 +30,6 @@ static struct mdp4_kms *get_kms(struct drm_encoder *encoder) return to_mdp4_kms(to_mdp_kms(priv->kms)); } -static void mdp4_lcdc_encoder_destroy(struct drm_encoder *encoder) -{ - struct mdp4_lcdc_encoder *mdp4_lcdc_encoder = - to_mdp4_lcdc_encoder(encoder); - drm_encoder_cleanup(encoder); - kfree(mdp4_lcdc_encoder); -} - -static const struct drm_encoder_funcs mdp4_lcdc_encoder_funcs = { - .destroy = mdp4_lcdc_encoder_destroy, -}; - /* this should probably be a helper: */ static struct drm_connector *get_connector(struct drm_encoder *encoder) { @@ -271,12 +259,10 @@ static void mdp4_lcdc_encoder_mode_set(struct drm_encoder *encoder, static void mdp4_lcdc_encoder_disable(struct drm_encoder *encoder) { - struct drm_device *dev = encoder->dev; struct mdp4_lcdc_encoder *mdp4_lcdc_encoder = to_mdp4_lcdc_encoder(encoder); struct mdp4_kms *mdp4_kms = get_kms(encoder); struct drm_panel *panel; - int i, ret; if (WARN_ON(!mdp4_lcdc_encoder->enabled)) return; @@ -301,11 +287,8 @@ static void mdp4_lcdc_encoder_disable(struct drm_encoder *encoder) clk_disable_unprepare(mdp4_lcdc_encoder->lcdc_clk); - for (i = 0; i < ARRAY_SIZE(mdp4_lcdc_encoder->regs); i++) { - ret = regulator_disable(mdp4_lcdc_encoder->regs[i]); - if (ret) - DRM_DEV_ERROR(dev->dev, "failed to disable regulator: %d\n", ret); - } + regulator_bulk_disable(ARRAY_SIZE(mdp4_lcdc_encoder->regs), + mdp4_lcdc_encoder->regs); mdp4_lcdc_encoder->enabled = false; } @@ -319,7 +302,7 @@ static void mdp4_lcdc_encoder_enable(struct drm_encoder *encoder) struct mdp4_kms *mdp4_kms = get_kms(encoder); struct drm_panel *panel; uint32_t config; - int i, ret; + int ret; if (WARN_ON(mdp4_lcdc_encoder->enabled)) return; @@ -339,11 +322,10 @@ static void mdp4_lcdc_encoder_enable(struct drm_encoder *encoder) mdp4_crtc_set_config(encoder->crtc, config); mdp4_crtc_set_intf(encoder->crtc, INTF_LCDC_DTV, 0); - for (i = 0; i < ARRAY_SIZE(mdp4_lcdc_encoder->regs); i++) { - ret = regulator_enable(mdp4_lcdc_encoder->regs[i]); - if (ret) - DRM_DEV_ERROR(dev->dev, "failed to enable regulator: %d\n", ret); - } + ret = regulator_bulk_enable(ARRAY_SIZE(mdp4_lcdc_encoder->regs), + mdp4_lcdc_encoder->regs); + if (ret) + DRM_DEV_ERROR(dev->dev, "failed to enable regulators: %d\n", ret); DBG("setting lcdc_clk=%lu", pc); ret = clk_set_rate(mdp4_lcdc_encoder->lcdc_clk, pc); @@ -383,63 +365,38 @@ long mdp4_lcdc_round_pixclk(struct drm_encoder *encoder, unsigned long rate) struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev, struct device_node *panel_node) { - struct drm_encoder *encoder = NULL; + struct drm_encoder *encoder; struct mdp4_lcdc_encoder *mdp4_lcdc_encoder; - struct regulator *reg; int ret; - mdp4_lcdc_encoder = kzalloc(sizeof(*mdp4_lcdc_encoder), GFP_KERNEL); - if (!mdp4_lcdc_encoder) { - ret = -ENOMEM; - goto fail; - } + mdp4_lcdc_encoder = drmm_encoder_alloc(dev, struct mdp4_lcdc_encoder, base, + NULL, DRM_MODE_ENCODER_LVDS, NULL); + if (IS_ERR(mdp4_lcdc_encoder)) + return ERR_CAST(mdp4_lcdc_encoder); mdp4_lcdc_encoder->panel_node = panel_node; encoder = &mdp4_lcdc_encoder->base; - drm_encoder_init(dev, encoder, &mdp4_lcdc_encoder_funcs, - DRM_MODE_ENCODER_LVDS, NULL); drm_encoder_helper_add(encoder, &mdp4_lcdc_encoder_helper_funcs); /* TODO: do we need different pll in other cases? */ mdp4_lcdc_encoder->lcdc_clk = mpd4_lvds_pll_init(dev); if (IS_ERR(mdp4_lcdc_encoder->lcdc_clk)) { DRM_DEV_ERROR(dev->dev, "failed to get lvds_clk\n"); - ret = PTR_ERR(mdp4_lcdc_encoder->lcdc_clk); - goto fail; + return ERR_CAST(mdp4_lcdc_encoder->lcdc_clk); } /* TODO: different regulators in other cases? */ - reg = devm_regulator_get(dev->dev, "lvds-vccs-3p3v"); - if (IS_ERR(reg)) { - ret = PTR_ERR(reg); - DRM_DEV_ERROR(dev->dev, "failed to get lvds-vccs-3p3v: %d\n", ret); - goto fail; - } - mdp4_lcdc_encoder->regs[0] = reg; - - reg = devm_regulator_get(dev->dev, "lvds-pll-vdda"); - if (IS_ERR(reg)) { - ret = PTR_ERR(reg); - DRM_DEV_ERROR(dev->dev, "failed to get lvds-pll-vdda: %d\n", ret); - goto fail; - } - mdp4_lcdc_encoder->regs[1] = reg; + mdp4_lcdc_encoder->regs[0].supply = "lvds-vccs-3p3v"; + mdp4_lcdc_encoder->regs[1].supply = "lvds-vccs-3p3v"; + mdp4_lcdc_encoder->regs[2].supply = "lvds-vdda"; - reg = devm_regulator_get(dev->dev, "lvds-vdda"); - if (IS_ERR(reg)) { - ret = PTR_ERR(reg); - DRM_DEV_ERROR(dev->dev, "failed to get lvds-vdda: %d\n", ret); - goto fail; - } - mdp4_lcdc_encoder->regs[2] = reg; + ret = devm_regulator_bulk_get(dev->dev, + ARRAY_SIZE(mdp4_lcdc_encoder->regs), + mdp4_lcdc_encoder->regs); + if (ret) + return ERR_PTR(ret); return encoder; - -fail: - if (encoder) - mdp4_lcdc_encoder_destroy(encoder); - - return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c index 694d54341337..c5179e4c393c 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c @@ -1350,23 +1350,17 @@ int mdp5_cfg_get_hw_rev(struct mdp5_cfg_handler *cfg_handler) return cfg_handler->revision; } -void mdp5_cfg_destroy(struct mdp5_cfg_handler *cfg_handler) -{ - kfree(cfg_handler); -} - struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms, uint32_t major, uint32_t minor) { struct drm_device *dev = mdp5_kms->dev; struct mdp5_cfg_handler *cfg_handler; const struct mdp5_cfg_handler *cfg_handlers; - int i, ret = 0, num_handlers; + int i, num_handlers; - cfg_handler = kzalloc(sizeof(*cfg_handler), GFP_KERNEL); + cfg_handler = devm_kzalloc(dev->dev, sizeof(*cfg_handler), GFP_KERNEL); if (unlikely(!cfg_handler)) { - ret = -ENOMEM; - goto fail; + return ERR_PTR(-ENOMEM); } switch (major) { @@ -1381,8 +1375,7 @@ struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms, default: DRM_DEV_ERROR(dev->dev, "unexpected MDP major version: v%d.%d\n", major, minor); - ret = -ENXIO; - goto fail; + return ERR_PTR(-ENXIO); } /* only after mdp5_cfg global pointer's init can we access the hw */ @@ -1396,8 +1389,7 @@ struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms, if (unlikely(!mdp5_cfg)) { DRM_DEV_ERROR(dev->dev, "unexpected MDP minor revision: v%d.%d\n", major, minor); - ret = -ENXIO; - goto fail; + return ERR_PTR(-ENXIO); } cfg_handler->revision = minor; @@ -1406,10 +1398,4 @@ struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms, DBG("MDP5: %s hw config selected", mdp5_cfg->name); return cfg_handler; - -fail: - if (cfg_handler) - mdp5_cfg_destroy(cfg_handler); - - return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.h index c2502cc33864..26c5d8b4ab46 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.h +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.h @@ -121,6 +121,5 @@ int mdp5_cfg_get_hw_rev(struct mdp5_cfg_handler *cfg_hnd); struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms, uint32_t major, uint32_t minor); -void mdp5_cfg_destroy(struct mdp5_cfg_handler *cfg_hnd); #endif /* __MDP5_CFG_H__ */ diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c index 86036dd4e1e8..4a3db2ea1689 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c @@ -13,6 +13,7 @@ #include <drm/drm_crtc.h> #include <drm/drm_flip_work.h> #include <drm/drm_fourcc.h> +#include <drm/drm_managed.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> @@ -172,14 +173,11 @@ static void unref_cursor_worker(struct drm_flip_work *work, void *val) drm_gem_object_put(val); } -static void mdp5_crtc_destroy(struct drm_crtc *crtc) +static void mdp5_crtc_flip_cleanup(struct drm_device *dev, void *ptr) { - struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc); + struct mdp5_crtc *mdp5_crtc = ptr; - drm_crtc_cleanup(crtc); drm_flip_work_cleanup(&mdp5_crtc->unref_cursor_work); - - kfree(mdp5_crtc); } static inline u32 mdp5_lm_use_fg_alpha_mask(enum mdp_mixer_stage_id stage) @@ -1147,7 +1145,6 @@ static void mdp5_crtc_reset(struct drm_crtc *crtc) static const struct drm_crtc_funcs mdp5_crtc_no_lm_cursor_funcs = { .set_config = drm_atomic_helper_set_config, - .destroy = mdp5_crtc_destroy, .page_flip = drm_atomic_helper_page_flip, .reset = mdp5_crtc_reset, .atomic_duplicate_state = mdp5_crtc_duplicate_state, @@ -1161,7 +1158,6 @@ static const struct drm_crtc_funcs mdp5_crtc_no_lm_cursor_funcs = { static const struct drm_crtc_funcs mdp5_crtc_funcs = { .set_config = drm_atomic_helper_set_config, - .destroy = mdp5_crtc_destroy, .page_flip = drm_atomic_helper_page_flip, .reset = mdp5_crtc_reset, .atomic_duplicate_state = mdp5_crtc_duplicate_state, @@ -1327,10 +1323,16 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev, { struct drm_crtc *crtc = NULL; struct mdp5_crtc *mdp5_crtc; + int ret; - mdp5_crtc = kzalloc(sizeof(*mdp5_crtc), GFP_KERNEL); - if (!mdp5_crtc) - return ERR_PTR(-ENOMEM); + mdp5_crtc = drmm_crtc_alloc_with_planes(dev, struct mdp5_crtc, base, + plane, cursor_plane, + cursor_plane ? + &mdp5_crtc_no_lm_cursor_funcs : + &mdp5_crtc_funcs, + NULL); + if (IS_ERR(mdp5_crtc)) + return ERR_CAST(mdp5_crtc); crtc = &mdp5_crtc->base; @@ -1346,13 +1348,11 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev, mdp5_crtc->lm_cursor_enabled = cursor_plane ? false : true; - drm_crtc_init_with_planes(dev, crtc, plane, cursor_plane, - cursor_plane ? - &mdp5_crtc_no_lm_cursor_funcs : - &mdp5_crtc_funcs, NULL); - drm_flip_work_init(&mdp5_crtc->unref_cursor_work, "unref cursor", unref_cursor_worker); + ret = drmm_add_action_or_reset(dev, mdp5_crtc_flip_cleanup, mdp5_crtc); + if (ret) + return ERR_PTR(ret); drm_crtc_helper_add(crtc, &mdp5_crtc_helper_funcs); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c index 1220f2b20e05..666de99a46a5 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c @@ -681,11 +681,6 @@ void mdp5_ctlm_hw_reset(struct mdp5_ctl_manager *ctl_mgr) } } -void mdp5_ctlm_destroy(struct mdp5_ctl_manager *ctl_mgr) -{ - kfree(ctl_mgr); -} - struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev, void __iomem *mmio_base, struct mdp5_cfg_handler *cfg_hnd) { @@ -697,18 +692,16 @@ struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev, unsigned long flags; int c, ret; - ctl_mgr = kzalloc(sizeof(*ctl_mgr), GFP_KERNEL); + ctl_mgr = devm_kzalloc(dev->dev, sizeof(*ctl_mgr), GFP_KERNEL); if (!ctl_mgr) { DRM_DEV_ERROR(dev->dev, "failed to allocate CTL manager\n"); - ret = -ENOMEM; - goto fail; + return ERR_PTR(-ENOMEM); } if (WARN_ON(ctl_cfg->count > MAX_CTL)) { DRM_DEV_ERROR(dev->dev, "Increase static pool size to at least %d\n", ctl_cfg->count); - ret = -ENOSPC; - goto fail; + return ERR_PTR(-ENOSPC); } /* initialize the CTL manager: */ @@ -727,7 +720,7 @@ struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev, DRM_DEV_ERROR(dev->dev, "CTL_%d: base is null!\n", c); ret = -EINVAL; spin_unlock_irqrestore(&ctl_mgr->pool_lock, flags); - goto fail; + return ERR_PTR(ret); } ctl->ctlm = ctl_mgr; ctl->id = c; @@ -755,10 +748,4 @@ struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev, DBG("Pool of %d CTLs created.", ctl_mgr->nctl); return ctl_mgr; - -fail: - if (ctl_mgr) - mdp5_ctlm_destroy(ctl_mgr); - - return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h index c2af68aa77ae..9020e8efc4e4 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h @@ -17,7 +17,6 @@ struct mdp5_ctl_manager; struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev, void __iomem *mmio_base, struct mdp5_cfg_handler *cfg_hnd); void mdp5_ctlm_hw_reset(struct mdp5_ctl_manager *ctlm); -void mdp5_ctlm_destroy(struct mdp5_ctl_manager *ctlm); /* * CTL prototypes: diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c index 79d67c495780..8db97083e14d 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c @@ -16,17 +16,6 @@ static struct mdp5_kms *get_kms(struct drm_encoder *encoder) return to_mdp5_kms(to_mdp_kms(priv->kms)); } -static void mdp5_encoder_destroy(struct drm_encoder *encoder) -{ - struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder); - drm_encoder_cleanup(encoder); - kfree(mdp5_encoder); -} - -static const struct drm_encoder_funcs mdp5_encoder_funcs = { - .destroy = mdp5_encoder_destroy, -}; - static void mdp5_vid_encoder_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) @@ -342,13 +331,11 @@ struct drm_encoder *mdp5_encoder_init(struct drm_device *dev, struct mdp5_encoder *mdp5_encoder; int enc_type = (intf->type == INTF_DSI) ? DRM_MODE_ENCODER_DSI : DRM_MODE_ENCODER_TMDS; - int ret; - mdp5_encoder = kzalloc(sizeof(*mdp5_encoder), GFP_KERNEL); - if (!mdp5_encoder) { - ret = -ENOMEM; - goto fail; - } + mdp5_encoder = drmm_encoder_alloc(dev, struct mdp5_encoder, base, + NULL, enc_type, NULL); + if (IS_ERR(mdp5_encoder)) + return ERR_CAST(mdp5_encoder); encoder = &mdp5_encoder->base; mdp5_encoder->ctl = ctl; @@ -356,15 +343,7 @@ struct drm_encoder *mdp5_encoder_init(struct drm_device *dev, spin_lock_init(&mdp5_encoder->intf_lock); - drm_encoder_init(dev, encoder, &mdp5_encoder_funcs, enc_type, NULL); - drm_encoder_helper_add(encoder, &mdp5_encoder_helper_funcs); return encoder; - -fail: - if (encoder) - mdp5_encoder_destroy(encoder); - - return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index ec933d597e20..0827634664ae 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -209,13 +209,6 @@ static void mdp5_kms_destroy(struct msm_kms *kms) { struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms)); struct msm_gem_address_space *aspace = kms->aspace; - int i; - - for (i = 0; i < mdp5_kms->num_hwmixers; i++) - mdp5_mixer_destroy(mdp5_kms->hwmixers[i]); - - for (i = 0; i < mdp5_kms->num_hwpipes; i++) - mdp5_pipe_destroy(mdp5_kms->hwpipes[i]); if (aspace) { aspace->mmu->funcs->detach(aspace->mmu); @@ -623,18 +616,6 @@ fail: static void mdp5_destroy(struct mdp5_kms *mdp5_kms) { - int i; - - if (mdp5_kms->ctlm) - mdp5_ctlm_destroy(mdp5_kms->ctlm); - if (mdp5_kms->smp) - mdp5_smp_destroy(mdp5_kms->smp); - if (mdp5_kms->cfg) - mdp5_cfg_destroy(mdp5_kms->cfg); - - for (i = 0; i < mdp5_kms->num_intfs; i++) - kfree(mdp5_kms->intfs[i]); - if (mdp5_kms->rpm_enabled) pm_runtime_disable(&mdp5_kms->pdev->dev); @@ -652,7 +633,7 @@ static int construct_pipes(struct mdp5_kms *mdp5_kms, int cnt, for (i = 0; i < cnt; i++) { struct mdp5_hw_pipe *hwpipe; - hwpipe = mdp5_pipe_init(pipes[i], offsets[i], caps); + hwpipe = mdp5_pipe_init(dev, pipes[i], offsets[i], caps); if (IS_ERR(hwpipe)) { ret = PTR_ERR(hwpipe); DRM_DEV_ERROR(dev->dev, "failed to construct pipe for %s (%d)\n", @@ -724,7 +705,7 @@ static int hwmixer_init(struct mdp5_kms *mdp5_kms) for (i = 0; i < hw_cfg->lm.count; i++) { struct mdp5_hw_mixer *mixer; - mixer = mdp5_mixer_init(&hw_cfg->lm.instances[i]); + mixer = mdp5_mixer_init(dev, &hw_cfg->lm.instances[i]); if (IS_ERR(mixer)) { ret = PTR_ERR(mixer); DRM_DEV_ERROR(dev->dev, "failed to construct LM%d (%d)\n", @@ -755,7 +736,7 @@ static int interface_init(struct mdp5_kms *mdp5_kms) if (intf_types[i] == INTF_DISABLED) continue; - intf = kzalloc(sizeof(*intf), GFP_KERNEL); + intf = devm_kzalloc(dev->dev, sizeof(*intf), GFP_KERNEL); if (!intf) { DRM_DEV_ERROR(dev->dev, "failed to construct INTF%d\n", i); return -ENOMEM; diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c index 2536def2a000..2822b533f807 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c @@ -140,20 +140,16 @@ int mdp5_mixer_release(struct drm_atomic_state *s, struct mdp5_hw_mixer *mixer) return 0; } -void mdp5_mixer_destroy(struct mdp5_hw_mixer *mixer) -{ - kfree(mixer); -} - static const char * const mixer_names[] = { "LM0", "LM1", "LM2", "LM3", "LM4", "LM5", }; -struct mdp5_hw_mixer *mdp5_mixer_init(const struct mdp5_lm_instance *lm) +struct mdp5_hw_mixer *mdp5_mixer_init(struct drm_device *dev, + const struct mdp5_lm_instance *lm) { struct mdp5_hw_mixer *mixer; - mixer = kzalloc(sizeof(*mixer), GFP_KERNEL); + mixer = devm_kzalloc(dev->dev, sizeof(*mixer), GFP_KERNEL); if (!mixer) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h index 545ee223b9d7..2bedd75835bc 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h @@ -25,8 +25,8 @@ struct mdp5_hw_mixer_state { struct drm_crtc *hwmixer_to_crtc[8]; }; -struct mdp5_hw_mixer *mdp5_mixer_init(const struct mdp5_lm_instance *lm); -void mdp5_mixer_destroy(struct mdp5_hw_mixer *lm); +struct mdp5_hw_mixer *mdp5_mixer_init(struct drm_device *dev, + const struct mdp5_lm_instance *lm); int mdp5_mixer_assign(struct drm_atomic_state *s, struct drm_crtc *crtc, uint32_t caps, struct mdp5_hw_mixer **mixer, struct mdp5_hw_mixer **r_mixer); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c index e4b8a789835a..99b2c30b1d48 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c @@ -151,17 +151,13 @@ int mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe) return 0; } -void mdp5_pipe_destroy(struct mdp5_hw_pipe *hwpipe) -{ - kfree(hwpipe); -} - -struct mdp5_hw_pipe *mdp5_pipe_init(enum mdp5_pipe pipe, +struct mdp5_hw_pipe *mdp5_pipe_init(struct drm_device *dev, + enum mdp5_pipe pipe, uint32_t reg_offset, uint32_t caps) { struct mdp5_hw_pipe *hwpipe; - hwpipe = kzalloc(sizeof(*hwpipe), GFP_KERNEL); + hwpipe = devm_kzalloc(dev->dev, sizeof(*hwpipe), GFP_KERNEL); if (!hwpipe) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h index cca67938cab2..452138821f60 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h @@ -39,8 +39,8 @@ int mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane, struct mdp5_hw_pipe **r_hwpipe); int mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe); -struct mdp5_hw_pipe *mdp5_pipe_init(enum mdp5_pipe pipe, +struct mdp5_hw_pipe *mdp5_pipe_init(struct drm_device *dev, + enum mdp5_pipe pipe, uint32_t reg_offset, uint32_t caps); -void mdp5_pipe_destroy(struct mdp5_hw_pipe *hwpipe); #endif /* __MDP5_PIPE_H__ */ diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c index b68682c1b5bc..8b59562e29e2 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c @@ -370,23 +370,17 @@ void mdp5_smp_dump(struct mdp5_smp *smp, struct drm_printer *p) drm_modeset_unlock(&mdp5_kms->glob_state_lock); } -void mdp5_smp_destroy(struct mdp5_smp *smp) -{ - kfree(smp); -} struct mdp5_smp *mdp5_smp_init(struct mdp5_kms *mdp5_kms, const struct mdp5_smp_block *cfg) { + struct drm_device *dev = mdp5_kms->dev; struct mdp5_smp_state *state; struct mdp5_global_state *global_state; struct mdp5_smp *smp; - int ret; - smp = kzalloc(sizeof(*smp), GFP_KERNEL); - if (unlikely(!smp)) { - ret = -ENOMEM; - goto fail; - } + smp = devm_kzalloc(dev->dev, sizeof(*smp), GFP_KERNEL); + if (unlikely(!smp)) + return ERR_PTR(-ENOMEM); smp->dev = mdp5_kms->dev; smp->blk_cnt = cfg->mmb_count; @@ -400,9 +394,4 @@ struct mdp5_smp *mdp5_smp_init(struct mdp5_kms *mdp5_kms, const struct mdp5_smp_ memcpy(smp->reserved, cfg->reserved, sizeof(smp->reserved)); return smp; -fail: - if (smp) - mdp5_smp_destroy(smp); - - return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.h index ba5618e136c3..d8b6a11413d9 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.h +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.h @@ -68,7 +68,6 @@ struct mdp5_smp; struct mdp5_smp *mdp5_smp_init(struct mdp5_kms *mdp5_kms, const struct mdp5_smp_block *cfg); -void mdp5_smp_destroy(struct mdp5_smp *smp); void mdp5_smp_dump(struct mdp5_smp *smp, struct drm_printer *p); diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c index 8e3b677f35e6..03f4951c49f4 100644 --- a/drivers/gpu/drm/msm/dp/dp_aux.c +++ b/drivers/gpu/drm/msm/dp/dp_aux.c @@ -291,6 +291,10 @@ static ssize_t dp_aux_transfer(struct drm_dp_aux *dp_aux, return -EINVAL; } + ret = pm_runtime_resume_and_get(dp_aux->dev); + if (ret) + return ret; + mutex_lock(&aux->mutex); if (!aux->initted) { ret = -EIO; @@ -364,6 +368,7 @@ static ssize_t dp_aux_transfer(struct drm_dp_aux *dp_aux, exit: mutex_unlock(&aux->mutex); + pm_runtime_put_sync(dp_aux->dev); return ret; } @@ -474,7 +479,6 @@ void dp_aux_deinit(struct drm_dp_aux *dp_aux) int dp_aux_register(struct drm_dp_aux *dp_aux) { - struct dp_aux_private *aux; int ret; if (!dp_aux) { @@ -482,12 +486,7 @@ int dp_aux_register(struct drm_dp_aux *dp_aux) return -EINVAL; } - aux = container_of(dp_aux, struct dp_aux_private, dp_aux); - - aux->dp_aux.name = "dpu_dp_aux"; - aux->dp_aux.dev = aux->dev; - aux->dp_aux.transfer = dp_aux_transfer; - ret = drm_dp_aux_register(&aux->dp_aux); + ret = drm_dp_aux_register(dp_aux); if (ret) { DRM_ERROR("%s: failed to register drm aux: %d\n", __func__, ret); @@ -502,6 +501,21 @@ void dp_aux_unregister(struct drm_dp_aux *dp_aux) drm_dp_aux_unregister(dp_aux); } +static int dp_wait_hpd_asserted(struct drm_dp_aux *dp_aux, + unsigned long wait_us) +{ + int ret; + struct dp_aux_private *aux; + + aux = container_of(dp_aux, struct dp_aux_private, dp_aux); + + pm_runtime_get_sync(aux->dev); + ret = dp_catalog_aux_wait_for_hpd_connect_state(aux->catalog); + pm_runtime_put_sync(aux->dev); + + return ret; +} + struct drm_dp_aux *dp_aux_get(struct device *dev, struct dp_catalog *catalog, bool is_edp) { @@ -525,6 +539,17 @@ struct drm_dp_aux *dp_aux_get(struct device *dev, struct dp_catalog *catalog, aux->catalog = catalog; aux->retry_cnt = 0; + /* + * Use the drm_dp_aux_init() to use the aux adapter + * before registering AUX with the DRM device so that + * msm eDP panel can be detected by generic_dep_panel_probe(). + */ + aux->dp_aux.name = "dpu_dp_aux"; + aux->dp_aux.dev = dev; + aux->dp_aux.transfer = dp_aux_transfer; + aux->dp_aux.wait_hpd_asserted = dp_wait_hpd_asserted; + drm_dp_aux_init(&aux->dp_aux); + return &aux->dp_aux; } diff --git a/drivers/gpu/drm/msm/dp/dp_debug.c b/drivers/gpu/drm/msm/dp/dp_debug.c index 3bba901afe33..6c281dc095b9 100644 --- a/drivers/gpu/drm/msm/dp/dp_debug.c +++ b/drivers/gpu/drm/msm/dp/dp_debug.c @@ -19,13 +19,9 @@ #define DEBUG_NAME "msm_dp" struct dp_debug_private { - struct dentry *root; - struct dp_link *link; struct dp_panel *panel; struct drm_connector *connector; - struct device *dev; - struct drm_device *drm_dev; struct dp_debug dp_debug; }; @@ -204,35 +200,33 @@ static const struct file_operations test_active_fops = { .write = dp_test_active_write }; -static void dp_debug_init(struct dp_debug *dp_debug, struct drm_minor *minor) +static void dp_debug_init(struct dp_debug *dp_debug, struct dentry *root, bool is_edp) { - char path[64]; struct dp_debug_private *debug = container_of(dp_debug, struct dp_debug_private, dp_debug); - snprintf(path, sizeof(path), "msm_dp-%s", debug->connector->name); - - debug->root = debugfs_create_dir(path, minor->debugfs_root); - - debugfs_create_file("dp_debug", 0444, debug->root, + debugfs_create_file("dp_debug", 0444, root, debug, &dp_debug_fops); - debugfs_create_file("msm_dp_test_active", 0444, - debug->root, - debug, &test_active_fops); + if (!is_edp) { + debugfs_create_file("msm_dp_test_active", 0444, + root, + debug, &test_active_fops); - debugfs_create_file("msm_dp_test_data", 0444, - debug->root, - debug, &dp_test_data_fops); + debugfs_create_file("msm_dp_test_data", 0444, + root, + debug, &dp_test_data_fops); - debugfs_create_file("msm_dp_test_type", 0444, - debug->root, - debug, &dp_test_type_fops); + debugfs_create_file("msm_dp_test_type", 0444, + root, + debug, &dp_test_type_fops); + } } struct dp_debug *dp_debug_get(struct device *dev, struct dp_panel *panel, struct dp_link *link, - struct drm_connector *connector, struct drm_minor *minor) + struct drm_connector *connector, + struct dentry *root, bool is_edp) { struct dp_debug_private *debug; struct dp_debug *dp_debug; @@ -253,46 +247,15 @@ struct dp_debug *dp_debug_get(struct device *dev, struct dp_panel *panel, debug->dp_debug.debug_en = false; debug->link = link; debug->panel = panel; - debug->dev = dev; - debug->drm_dev = minor->dev; - debug->connector = connector; dp_debug = &debug->dp_debug; dp_debug->vdisplay = 0; dp_debug->hdisplay = 0; dp_debug->vrefresh = 0; - dp_debug_init(dp_debug, minor); + dp_debug_init(dp_debug, root, is_edp); return dp_debug; error: return ERR_PTR(rc); } - -static int dp_debug_deinit(struct dp_debug *dp_debug) -{ - struct dp_debug_private *debug; - - if (!dp_debug) - return -EINVAL; - - debug = container_of(dp_debug, struct dp_debug_private, dp_debug); - - debugfs_remove_recursive(debug->root); - - return 0; -} - -void dp_debug_put(struct dp_debug *dp_debug) -{ - struct dp_debug_private *debug; - - if (!dp_debug) - return; - - debug = container_of(dp_debug, struct dp_debug_private, dp_debug); - - dp_debug_deinit(dp_debug); - - devm_kfree(debug->dev, debug); -} diff --git a/drivers/gpu/drm/msm/dp/dp_debug.h b/drivers/gpu/drm/msm/dp/dp_debug.h index 124227873d58..9b3b2e702f65 100644 --- a/drivers/gpu/drm/msm/dp/dp_debug.h +++ b/drivers/gpu/drm/msm/dp/dp_debug.h @@ -34,7 +34,8 @@ struct dp_debug { * @panel: instance of panel module * @link: instance of link module * @connector: double pointer to display connector - * @minor: pointer to drm minor number after device registration + * @root: connector's debugfs root + * @is_edp: set for eDP connectors / panels * return: pointer to allocated debug module data * * This function sets up the debug module and provides a way @@ -43,31 +44,21 @@ struct dp_debug { struct dp_debug *dp_debug_get(struct device *dev, struct dp_panel *panel, struct dp_link *link, struct drm_connector *connector, - struct drm_minor *minor); - -/** - * dp_debug_put() - * - * Cleans up dp_debug instance - * - * @dp_debug: instance of dp_debug - */ -void dp_debug_put(struct dp_debug *dp_debug); + struct dentry *root, + bool is_edp); #else static inline struct dp_debug *dp_debug_get(struct device *dev, struct dp_panel *panel, struct dp_link *link, - struct drm_connector *connector, struct drm_minor *minor) + struct drm_connector *connector, + struct dentry *root, + bool is_edp) { return ERR_PTR(-EINVAL); } -static inline void dp_debug_put(struct dp_debug *dp_debug) -{ -} - #endif /* defined(CONFIG_DEBUG_FS) */ #endif /* _DP_DEBUG_H_ */ diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 1b88fb52726f..d37d599aec27 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -49,13 +49,11 @@ enum { ST_CONNECTED, ST_DISCONNECT_PENDING, ST_DISPLAY_OFF, - ST_SUSPENDED, }; enum { EV_NO_EVENT, /* hpd events */ - EV_HPD_INIT_SETUP, EV_HPD_PLUG_INT, EV_IRQ_HPD_INT, EV_HPD_UNPLUG_INT, @@ -170,6 +168,11 @@ static const struct msm_dp_desc sm8350_dp_descs[] = { {} }; +static const struct msm_dp_desc sm8650_dp_descs[] = { + { .io_start = 0x0af54000, .id = MSM_DP_CONTROLLER_0, .connector_type = DRM_MODE_CONNECTOR_DisplayPort }, + {} +}; + static const struct of_device_id dp_dt_match[] = { { .compatible = "qcom,sc7180-dp", .data = &sc7180_dp_descs }, { .compatible = "qcom,sc7280-dp", .data = &sc7280_dp_descs }, @@ -180,6 +183,7 @@ static const struct of_device_id dp_dt_match[] = { { .compatible = "qcom,sc8280xp-edp", .data = &sc8280xp_edp_descs }, { .compatible = "qcom,sdm845-dp", .data = &sc7180_dp_descs }, { .compatible = "qcom,sm8350-dp", .data = &sm8350_dp_descs }, + { .compatible = "qcom,sm8650-dp", .data = &sm8650_dp_descs }, {} }; @@ -275,11 +279,6 @@ static int dp_display_bind(struct device *dev, struct device *master, dp->dp_display.drm_dev = drm; priv->dp[dp->id] = &dp->dp_display; - rc = dp->parser->parse(dp->parser); - if (rc) { - DRM_ERROR("device tree parsing failed\n"); - goto end; - } dp->drm_dev = drm; @@ -290,11 +289,6 @@ static int dp_display_bind(struct device *dev, struct device *master, goto end; } - rc = dp_power_client_init(dp->power); - if (rc) { - DRM_ERROR("Power client create failed\n"); - goto end; - } rc = dp_register_audio_driver(dev, dp->audio); if (rc) { @@ -319,15 +313,10 @@ static void dp_display_unbind(struct device *dev, struct device *master, struct dp_display_private *dp = dev_get_dp_display_private(dev); struct msm_drm_private *priv = dev_get_drvdata(master); - /* disable all HPD interrupts */ - if (dp->core_initialized) - dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_INT_MASK, false); - kthread_stop(dp->ev_tsk); of_dp_aux_depopulate_bus(dp->aux); - dp_power_client_deinit(dp->power); dp_unregister_audio_driver(dev, dp->audio); dp_aux_unregister(dp->aux); dp->drm_dev = NULL; @@ -340,27 +329,10 @@ static const struct component_ops dp_display_comp_ops = { .unbind = dp_display_unbind, }; -static void dp_display_send_hpd_event(struct msm_dp *dp_display) -{ - struct dp_display_private *dp; - struct drm_connector *connector; - - dp = container_of(dp_display, struct dp_display_private, dp_display); - - connector = dp->dp_display.connector; - drm_helper_hpd_irq_event(connector->dev); -} - - static int dp_display_send_hpd_notification(struct dp_display_private *dp, bool hpd) { - if ((hpd && dp->dp_display.is_connected) || - (!hpd && !dp->dp_display.is_connected)) { - drm_dbg_dp(dp->drm_dev, "HPD already %s\n", - (hpd ? "on" : "off")); - return 0; - } + struct drm_bridge *bridge = dp->dp_display.bridge; /* reset video pattern flag on disconnect */ if (!hpd) { @@ -372,11 +344,11 @@ static int dp_display_send_hpd_notification(struct dp_display_private *dp, dp->panel->downstream_ports); } - dp->dp_display.is_connected = hpd; + dp->dp_display.link_ready = hpd; drm_dbg_dp(dp->drm_dev, "type=%d hpd=%d\n", dp->dp_display.connector_type, hpd); - dp_display_send_hpd_event(&dp->dp_display); + drm_bridge_hpd_notify(bridge, dp->dp_display.link_ready); return 0; } @@ -575,6 +547,7 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) { u32 state; int ret; + struct platform_device *pdev = dp->dp_display.pdev; mutex_lock(&dp->event_mutex); @@ -582,7 +555,7 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) drm_dbg_dp(dp->drm_dev, "Before, type=%d hpd_state=%d\n", dp->dp_display.connector_type, state); - if (state == ST_DISPLAY_OFF || state == ST_SUSPENDED) { + if (state == ST_DISPLAY_OFF) { mutex_unlock(&dp->event_mutex); return 0; } @@ -599,7 +572,14 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) return 0; } - ret = dp_display_usbpd_configure_cb(&dp->dp_display.pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret) { + DRM_ERROR("failed to pm_runtime_resume\n"); + mutex_unlock(&dp->event_mutex); + return ret; + } + + ret = dp_display_usbpd_configure_cb(&pdev->dev); if (ret) { /* link train failed */ dp->hpd_state = ST_DISCONNECTED; } else { @@ -631,6 +611,7 @@ static void dp_display_handle_plugged_change(struct msm_dp *dp_display, static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) { u32 state; + struct platform_device *pdev = dp->dp_display.pdev; mutex_lock(&dp->event_mutex); @@ -681,6 +662,7 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) dp->dp_display.connector_type, state); /* uevent will complete disconnection part */ + pm_runtime_put_sync(&pdev->dev); mutex_unlock(&dp->event_mutex); return 0; } @@ -696,7 +678,7 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data) drm_dbg_dp(dp->drm_dev, "Before, type=%d hpd_state=%d\n", dp->dp_display.connector_type, state); - if (state == ST_DISPLAY_OFF || state == ST_SUSPENDED) { + if (state == ST_DISPLAY_OFF) { mutex_unlock(&dp->event_mutex); return 0; } @@ -720,7 +702,6 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data) static void dp_display_deinit_sub_modules(struct dp_display_private *dp) { - dp_debug_put(dp->debug); dp_audio_put(dp->audio); dp_panel_put(dp->panel); dp_aux_put(dp->aux); @@ -918,7 +899,7 @@ int dp_display_set_plugged_cb(struct msm_dp *dp_display, dp_display->plugged_cb = fn; dp_display->codec_dev = codec_dev; - plugged = dp_display->is_connected; + plugged = dp_display->link_ready; dp_display_handle_plugged_change(dp_display, plugged); return 0; @@ -1108,9 +1089,6 @@ static int hpd_event_thread(void *data) spin_unlock_irqrestore(&dp_priv->event_lock, flag); switch (todo->event_id) { - case EV_HPD_INIT_SETUP: - dp_display_host_init(dp_priv); - break; case EV_HPD_PLUG_INT: dp_hpd_plug_handle(dp_priv, todo->data); break; @@ -1189,27 +1167,21 @@ static irqreturn_t dp_display_irq_handler(int irq, void *dev_id) return ret; } -int dp_display_request_irq(struct msm_dp *dp_display) +static int dp_display_request_irq(struct dp_display_private *dp) { int rc = 0; - struct dp_display_private *dp; - - if (!dp_display) { - DRM_ERROR("invalid input\n"); - return -EINVAL; - } + struct platform_device *pdev = dp->dp_display.pdev; - dp = container_of(dp_display, struct dp_display_private, dp_display); - - dp->irq = irq_of_parse_and_map(dp->dp_display.pdev->dev.of_node, 0); - if (!dp->irq) { + dp->irq = platform_get_irq(pdev, 0); + if (dp->irq < 0) { DRM_ERROR("failed to get irq\n"); - return -EINVAL; + return dp->irq; } - rc = devm_request_irq(dp_display->drm_dev->dev, dp->irq, - dp_display_irq_handler, - IRQF_TRIGGER_HIGH, "dp_display_isr", dp); + rc = devm_request_irq(&pdev->dev, dp->irq, dp_display_irq_handler, + IRQF_TRIGGER_HIGH|IRQF_NO_AUTOEN, + "dp_display_isr", dp); + if (rc < 0) { DRM_ERROR("failed to request IRQ%u: %d\n", dp->irq, rc); @@ -1238,6 +1210,29 @@ static const struct msm_dp_desc *dp_display_get_desc(struct platform_device *pde return NULL; } +static int dp_display_get_next_bridge(struct msm_dp *dp); + +static int dp_display_probe_tail(struct device *dev) +{ + struct msm_dp *dp = dev_get_drvdata(dev); + int ret; + + ret = dp_display_get_next_bridge(dp); + if (ret) + return ret; + + ret = component_add(dev, &dp_display_comp_ops); + if (ret) + DRM_ERROR("component add failed, rc=%d\n", ret); + + return ret; +} + +static int dp_auxbus_done_probe(struct drm_dp_aux *aux) +{ + return dp_display_probe_tail(aux->dev); +} + static int dp_display_probe(struct platform_device *pdev) { int rc = 0; @@ -1271,6 +1266,18 @@ static int dp_display_probe(struct platform_device *pdev) return -EPROBE_DEFER; } + rc = dp->parser->parse(dp->parser); + if (rc) { + DRM_ERROR("device tree parsing failed\n"); + goto err; + } + + rc = dp_power_client_init(dp->power); + if (rc) { + DRM_ERROR("Power client create failed\n"); + goto err; + } + /* setup event q */ mutex_init(&dp->event_mutex); init_waitqueue_head(&dp->event_q); @@ -1283,13 +1290,31 @@ static int dp_display_probe(struct platform_device *pdev) platform_set_drvdata(pdev, &dp->dp_display); - rc = component_add(&pdev->dev, &dp_display_comp_ops); - if (rc) { - DRM_ERROR("component add failed, rc=%d\n", rc); - dp_display_deinit_sub_modules(dp); + rc = devm_pm_runtime_enable(&pdev->dev); + if (rc) + goto err; + + rc = dp_display_request_irq(dp); + if (rc) + goto err; + + if (dp->dp_display.is_edp) { + rc = devm_of_dp_aux_populate_bus(dp->aux, dp_auxbus_done_probe); + if (rc) { + DRM_ERROR("eDP auxbus population failed, rc=%d\n", rc); + goto err; + } + } else { + rc = dp_display_probe_tail(&pdev->dev); + if (rc) + goto err; } return rc; + +err: + dp_display_deinit_sub_modules(dp); + return rc; } static void dp_display_remove(struct platform_device *pdev) @@ -1298,113 +1323,50 @@ static void dp_display_remove(struct platform_device *pdev) component_del(&pdev->dev, &dp_display_comp_ops); dp_display_deinit_sub_modules(dp); - platform_set_drvdata(pdev, NULL); } -static int dp_pm_resume(struct device *dev) +static int dp_pm_runtime_suspend(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct msm_dp *dp_display = platform_get_drvdata(pdev); - struct dp_display_private *dp; - int sink_count = 0; - - dp = container_of(dp_display, struct dp_display_private, dp_display); - - mutex_lock(&dp->event_mutex); - - drm_dbg_dp(dp->drm_dev, - "Before, type=%d core_inited=%d phy_inited=%d power_on=%d\n", - dp->dp_display.connector_type, dp->core_initialized, - dp->phy_initialized, dp_display->power_on); - - /* start from disconnected state */ - dp->hpd_state = ST_DISCONNECTED; - - /* turn on dp ctrl/phy */ - dp_display_host_init(dp); - - if (dp_display->is_edp) - dp_catalog_ctrl_hpd_enable(dp->catalog); + struct dp_display_private *dp = dev_get_dp_display_private(dev); - if (dp_catalog_link_is_connected(dp->catalog)) { - /* - * set sink to normal operation mode -- D0 - * before dpcd read - */ - dp_display_host_phy_init(dp); - dp_link_psm_config(dp->link, &dp->panel->link_info, false); - sink_count = drm_dp_read_sink_count(dp->aux); - if (sink_count < 0) - sink_count = 0; + disable_irq(dp->irq); + if (dp->dp_display.is_edp) { dp_display_host_phy_exit(dp); + dp_catalog_ctrl_hpd_disable(dp->catalog); } - - dp->link->sink_count = sink_count; - /* - * can not declared display is connected unless - * HDMI cable is plugged in and sink_count of - * dongle become 1 - * also only signal audio when disconnected - */ - if (dp->link->sink_count) { - dp->dp_display.is_connected = true; - } else { - dp->dp_display.is_connected = false; - dp_display_handle_plugged_change(dp_display, false); - } - - drm_dbg_dp(dp->drm_dev, - "After, type=%d sink=%d conn=%d core_init=%d phy_init=%d power=%d\n", - dp->dp_display.connector_type, dp->link->sink_count, - dp->dp_display.is_connected, dp->core_initialized, - dp->phy_initialized, dp_display->power_on); - - mutex_unlock(&dp->event_mutex); + dp_display_host_deinit(dp); return 0; } -static int dp_pm_suspend(struct device *dev) +static int dp_pm_runtime_resume(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct msm_dp *dp_display = platform_get_drvdata(pdev); - struct dp_display_private *dp; - - dp = container_of(dp_display, struct dp_display_private, dp_display); - - mutex_lock(&dp->event_mutex); - - drm_dbg_dp(dp->drm_dev, - "Before, type=%d core_inited=%d phy_inited=%d power_on=%d\n", - dp->dp_display.connector_type, dp->core_initialized, - dp->phy_initialized, dp_display->power_on); - - /* mainlink enabled */ - if (dp_power_clk_status(dp->power, DP_CTRL_PM)) - dp_ctrl_off_link_stream(dp->ctrl); - - dp_display_host_phy_exit(dp); - - /* host_init will be called at pm_resume */ - dp_display_host_deinit(dp); - - dp->hpd_state = ST_SUSPENDED; - - drm_dbg_dp(dp->drm_dev, - "After, type=%d core_inited=%d phy_inited=%d power_on=%d\n", - dp->dp_display.connector_type, dp->core_initialized, - dp->phy_initialized, dp_display->power_on); + struct dp_display_private *dp = dev_get_dp_display_private(dev); - mutex_unlock(&dp->event_mutex); + /* + * for eDP, host cotroller, HPD block and PHY are enabled here + * but with HPD irq disabled + * + * for DP, only host controller is enabled here. + * HPD block is enabled at dp_bridge_hpd_enable() + * PHY will be enabled at plugin handler later + */ + dp_display_host_init(dp); + if (dp->dp_display.is_edp) { + dp_catalog_ctrl_hpd_enable(dp->catalog); + dp_display_host_phy_init(dp); + } + enable_irq(dp->irq); return 0; } static const struct dev_pm_ops dp_pm_ops = { - .suspend = dp_pm_suspend, - .resume = dp_pm_resume, + SET_RUNTIME_PM_OPS(dp_pm_runtime_suspend, dp_pm_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) }; static struct platform_driver dp_display_driver = { @@ -1434,19 +1396,6 @@ void __exit msm_dp_unregister(void) platform_driver_unregister(&dp_display_driver); } -void msm_dp_irq_postinstall(struct msm_dp *dp_display) -{ - struct dp_display_private *dp; - - if (!dp_display) - return; - - dp = container_of(dp_display, struct dp_display_private, dp_display); - - if (!dp_display->is_edp) - dp_add_event(dp, EV_HPD_INIT_SETUP, 0, 0); -} - bool msm_dp_wide_bus_available(const struct msm_dp *dp_display) { struct dp_display_private *dp; @@ -1456,7 +1405,7 @@ bool msm_dp_wide_bus_available(const struct msm_dp *dp_display) return dp->wide_bus_en; } -void msm_dp_debugfs_init(struct msm_dp *dp_display, struct drm_minor *minor) +void dp_display_debugfs_init(struct msm_dp *dp_display, struct dentry *root, bool is_edp) { struct dp_display_private *dp; struct device *dev; @@ -1467,7 +1416,7 @@ void msm_dp_debugfs_init(struct msm_dp *dp_display, struct drm_minor *minor) dp->debug = dp_debug_get(dev, dp->panel, dp->link, dp->dp_display.connector, - minor); + root, is_edp); if (IS_ERR(dp->debug)) { rc = PTR_ERR(dp->debug); DRM_ERROR("failed to initialize debug, rc = %d\n", rc); @@ -1479,33 +1428,8 @@ static int dp_display_get_next_bridge(struct msm_dp *dp) { int rc; struct dp_display_private *dp_priv; - struct device_node *aux_bus; - struct device *dev; dp_priv = container_of(dp, struct dp_display_private, dp_display); - dev = &dp_priv->dp_display.pdev->dev; - aux_bus = of_get_child_by_name(dev->of_node, "aux-bus"); - - if (aux_bus && dp->is_edp) { - dp_display_host_init(dp_priv); - dp_catalog_ctrl_hpd_enable(dp_priv->catalog); - dp_display_host_phy_init(dp_priv); - - /* - * The code below assumes that the panel will finish probing - * by the time devm_of_dp_aux_populate_ep_devices() returns. - * This isn't a great assumption since it will fail if the - * panel driver is probed asynchronously but is the best we - * can do without a bigger driver reorganization. - */ - rc = of_dp_aux_populate_bus(dp_priv->aux, NULL); - of_node_put(aux_bus); - if (rc) - goto error; - } else if (dp->is_edp) { - DRM_ERROR("eDP aux_bus not found\n"); - return -ENODEV; - } /* * External bridges are mandatory for eDP interfaces: one has to @@ -1514,21 +1438,13 @@ static int dp_display_get_next_bridge(struct msm_dp *dp) * For DisplayPort interfaces external bridges are optional, so * silently ignore an error if one is not present (-ENODEV). */ - rc = devm_dp_parser_find_next_bridge(dp->drm_dev->dev, dp_priv->parser); + rc = devm_dp_parser_find_next_bridge(&dp->pdev->dev, dp_priv->parser); if (!dp->is_edp && rc == -ENODEV) return 0; - if (!rc) { + if (!rc) dp->next_bridge = dp_priv->parser->next_bridge; - return 0; - } -error: - if (dp->is_edp) { - of_dp_aux_depopulate_bus(dp_priv->aux); - dp_display_host_phy_exit(dp_priv); - dp_display_host_deinit(dp_priv); - } return rc; } @@ -1542,16 +1458,6 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev, dp_priv = container_of(dp_display, struct dp_display_private, dp_display); - ret = dp_display_request_irq(dp_display); - if (ret) { - DRM_ERROR("request_irq failed, ret=%d\n", ret); - return ret; - } - - ret = dp_display_get_next_bridge(dp_display); - if (ret) - return ret; - ret = dp_bridge_init(dp_display, dev, encoder); if (ret) { DRM_DEV_ERROR(dev->dev, @@ -1593,6 +1499,11 @@ void dp_bridge_atomic_enable(struct drm_bridge *drm_bridge, dp_hpd_plug_handle(dp_display, 0); mutex_lock(&dp_display->event_mutex); + if (pm_runtime_resume_and_get(&dp->pdev->dev)) { + DRM_ERROR("failed to pm_runtime_resume\n"); + mutex_unlock(&dp_display->event_mutex); + return; + } state = dp_display->hpd_state; if (state != ST_DISPLAY_OFF && state != ST_MAINLINK_READY) { @@ -1657,10 +1568,9 @@ void dp_bridge_atomic_post_disable(struct drm_bridge *drm_bridge, mutex_lock(&dp_display->event_mutex); state = dp_display->hpd_state; - if (state != ST_DISCONNECT_PENDING && state != ST_CONNECTED) { - mutex_unlock(&dp_display->event_mutex); - return; - } + if (state != ST_DISCONNECT_PENDING && state != ST_CONNECTED) + drm_dbg_dp(dp->drm_dev, "type=%d wrong hpd_state=%d\n", + dp->connector_type, state); dp_display_disable(dp_display); @@ -1673,6 +1583,8 @@ void dp_bridge_atomic_post_disable(struct drm_bridge *drm_bridge, } drm_dbg_dp(dp->drm_dev, "type=%d Done\n", dp->connector_type); + + pm_runtime_put_sync(&dp->pdev->dev); mutex_unlock(&dp_display->event_mutex); } @@ -1711,7 +1623,21 @@ void dp_bridge_hpd_enable(struct drm_bridge *bridge) struct msm_dp *dp_display = dp_bridge->dp_display; struct dp_display_private *dp = container_of(dp_display, struct dp_display_private, dp_display); + /* + * this is for external DP with hpd irq enabled case, + * step-1: dp_pm_runtime_resume() enable dp host only + * step-2: enable hdp block and have hpd irq enabled here + * step-3: waiting for plugin irq while phy is not initialized + * step-4: DP PHY is initialized at plugin handler before link training + * + */ mutex_lock(&dp->event_mutex); + if (pm_runtime_resume_and_get(&dp_display->pdev->dev)) { + DRM_ERROR("failed to resume power\n"); + mutex_unlock(&dp->event_mutex); + return; + } + dp_catalog_ctrl_hpd_enable(dp->catalog); /* enable HDP interrupts */ @@ -1733,6 +1659,8 @@ void dp_bridge_hpd_disable(struct drm_bridge *bridge) dp_catalog_ctrl_hpd_disable(dp->catalog); dp_display->internal_hpd = false; + + pm_runtime_put_sync(&dp_display->pdev->dev); mutex_unlock(&dp->event_mutex); } @@ -1747,13 +1675,8 @@ void dp_bridge_hpd_notify(struct drm_bridge *bridge, if (dp_display->internal_hpd) return; - if (!dp->core_initialized) { - drm_dbg_dp(dp->drm_dev, "not initialized\n"); - return; - } - - if (!dp_display->is_connected && status == connector_status_connected) + if (!dp_display->link_ready && status == connector_status_connected) dp_add_event(dp, EV_HPD_PLUG_INT, 0, 0); - else if (dp_display->is_connected && status == connector_status_disconnected) + else if (dp_display->link_ready && status == connector_status_disconnected) dp_add_event(dp, EV_HPD_UNPLUG_INT, 0, 0); } diff --git a/drivers/gpu/drm/msm/dp/dp_display.h b/drivers/gpu/drm/msm/dp/dp_display.h index f66cdbc35785..102f3507d824 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.h +++ b/drivers/gpu/drm/msm/dp/dp_display.h @@ -17,7 +17,7 @@ struct msm_dp { struct drm_bridge *bridge; struct drm_connector *connector; struct drm_bridge *next_bridge; - bool is_connected; + bool link_ready; bool audio_enabled; bool power_on; unsigned int connector_type; @@ -36,11 +36,11 @@ struct msm_dp { int dp_display_set_plugged_cb(struct msm_dp *dp_display, hdmi_codec_plugged_cb fn, struct device *codec_dev); int dp_display_get_modes(struct msm_dp *dp_display); -int dp_display_request_irq(struct msm_dp *dp_display); bool dp_display_check_video_test(struct msm_dp *dp_display); int dp_display_get_test_bpp(struct msm_dp *dp_display); void dp_display_signal_audio_start(struct msm_dp *dp_display); void dp_display_signal_audio_complete(struct msm_dp *dp_display); void dp_display_set_psr(struct msm_dp *dp, bool enter); +void dp_display_debugfs_init(struct msm_dp *dp_display, struct dentry *dentry, bool is_edp); #endif /* _DP_DISPLAY_H_ */ diff --git a/drivers/gpu/drm/msm/dp/dp_drm.c b/drivers/gpu/drm/msm/dp/dp_drm.c index e3bdd7dd4cdc..46e6889037e8 100644 --- a/drivers/gpu/drm/msm/dp/dp_drm.c +++ b/drivers/gpu/drm/msm/dp/dp_drm.c @@ -24,10 +24,10 @@ static enum drm_connector_status dp_bridge_detect(struct drm_bridge *bridge) dp = to_dp_bridge(bridge)->dp_display; - drm_dbg_dp(dp->drm_dev, "is_connected = %s\n", - (dp->is_connected) ? "true" : "false"); + drm_dbg_dp(dp->drm_dev, "link_ready = %s\n", + (dp->link_ready) ? "true" : "false"); - return (dp->is_connected) ? connector_status_connected : + return (dp->link_ready) ? connector_status_connected : connector_status_disconnected; } @@ -40,8 +40,8 @@ static int dp_bridge_atomic_check(struct drm_bridge *bridge, dp = to_dp_bridge(bridge)->dp_display; - drm_dbg_dp(dp->drm_dev, "is_connected = %s\n", - (dp->is_connected) ? "true" : "false"); + drm_dbg_dp(dp->drm_dev, "link_ready = %s\n", + (dp->link_ready) ? "true" : "false"); /* * There is no protection in the DRM framework to check if the display @@ -55,7 +55,7 @@ static int dp_bridge_atomic_check(struct drm_bridge *bridge, * After that this piece of code can be removed. */ if (bridge->ops & DRM_BRIDGE_OP_HPD) - return (dp->is_connected) ? 0 : -ENOTCONN; + return (dp->link_ready) ? 0 : -ENOTCONN; return 0; } @@ -78,7 +78,7 @@ static int dp_bridge_get_modes(struct drm_bridge *bridge, struct drm_connector * dp = to_dp_bridge(bridge)->dp_display; /* pluggable case assumes EDID is read when HPD */ - if (dp->is_connected) { + if (dp->link_ready) { rc = dp_display_get_modes(dp); if (rc <= 0) { DRM_ERROR("failed to get DP sink modes, rc=%d\n", rc); @@ -90,6 +90,13 @@ static int dp_bridge_get_modes(struct drm_bridge *bridge, struct drm_connector * return rc; } +static void dp_bridge_debugfs_init(struct drm_bridge *bridge, struct dentry *root) +{ + struct msm_dp *dp = to_dp_bridge(bridge)->dp_display; + + dp_display_debugfs_init(dp, root, false); +} + static const struct drm_bridge_funcs dp_bridge_ops = { .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, @@ -105,6 +112,7 @@ static const struct drm_bridge_funcs dp_bridge_ops = { .hpd_enable = dp_bridge_hpd_enable, .hpd_disable = dp_bridge_hpd_disable, .hpd_notify = dp_bridge_hpd_notify, + .debugfs_init = dp_bridge_debugfs_init, }; static int edp_bridge_atomic_check(struct drm_bridge *drm_bridge, @@ -260,6 +268,13 @@ static enum drm_mode_status edp_bridge_mode_valid(struct drm_bridge *bridge, return MODE_OK; } +static void edp_bridge_debugfs_init(struct drm_bridge *bridge, struct dentry *root) +{ + struct msm_dp *dp = to_dp_bridge(bridge)->dp_display; + + dp_display_debugfs_init(dp, root, true); +} + static const struct drm_bridge_funcs edp_bridge_ops = { .atomic_enable = edp_bridge_atomic_enable, .atomic_disable = edp_bridge_atomic_disable, @@ -270,6 +285,7 @@ static const struct drm_bridge_funcs edp_bridge_ops = { .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, .atomic_check = edp_bridge_atomic_check, + .debugfs_init = edp_bridge_debugfs_init, }; int dp_bridge_init(struct msm_dp *dp_display, struct drm_device *dev, diff --git a/drivers/gpu/drm/msm/dp/dp_power.c b/drivers/gpu/drm/msm/dp/dp_power.c index 5cb84ca40e9e..c4843dd69f47 100644 --- a/drivers/gpu/drm/msm/dp/dp_power.c +++ b/drivers/gpu/drm/msm/dp/dp_power.c @@ -152,45 +152,17 @@ int dp_power_client_init(struct dp_power *dp_power) power = container_of(dp_power, struct dp_power_private, dp_power); - pm_runtime_enable(power->dev); - return dp_power_clk_init(power); } -void dp_power_client_deinit(struct dp_power *dp_power) -{ - struct dp_power_private *power; - - power = container_of(dp_power, struct dp_power_private, dp_power); - - pm_runtime_disable(power->dev); -} - int dp_power_init(struct dp_power *dp_power) { - int rc = 0; - struct dp_power_private *power = NULL; - - power = container_of(dp_power, struct dp_power_private, dp_power); - - pm_runtime_get_sync(power->dev); - - rc = dp_power_clk_enable(dp_power, DP_CORE_PM, true); - if (rc) - pm_runtime_put_sync(power->dev); - - return rc; + return dp_power_clk_enable(dp_power, DP_CORE_PM, true); } int dp_power_deinit(struct dp_power *dp_power) { - struct dp_power_private *power; - - power = container_of(dp_power, struct dp_power_private, dp_power); - - dp_power_clk_enable(dp_power, DP_CORE_PM, false); - pm_runtime_put_sync(power->dev); - return 0; + return dp_power_clk_enable(dp_power, DP_CORE_PM, false); } struct dp_power *dp_power_get(struct device *dev, struct dp_parser *parser) diff --git a/drivers/gpu/drm/msm/dp/dp_power.h b/drivers/gpu/drm/msm/dp/dp_power.h index a3dec200785e..55ada51edb57 100644 --- a/drivers/gpu/drm/msm/dp/dp_power.h +++ b/drivers/gpu/drm/msm/dp/dp_power.h @@ -81,17 +81,6 @@ int dp_power_clk_enable(struct dp_power *power, enum dp_pm_type pm_type, int dp_power_client_init(struct dp_power *power); /** - * dp_power_clinet_deinit() - de-initialize clock and regulator modules - * - * @power: instance of power module - * return: 0 for success, error for failure. - * - * This API will de-initialize the DisplayPort's clocks and regulator - * modules. - */ -void dp_power_client_deinit(struct dp_power *power); - -/** * dp_power_get() - configure and get the DisplayPort power module data * * @parser: instance of parser module diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c index 1f98ff74ceb0..10ba7d153d1c 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c @@ -190,6 +190,21 @@ static const struct msm_dsi_config sm8550_dsi_cfg = { }, }; +static const struct regulator_bulk_data sm8650_dsi_regulators[] = { + { .supply = "vdda", .init_load_uA = 16600 }, /* 1.2 V */ +}; + +static const struct msm_dsi_config sm8650_dsi_cfg = { + .io_offset = DSI_6G_REG_SHIFT, + .regulator_data = sm8650_dsi_regulators, + .num_regulators = ARRAY_SIZE(sm8650_dsi_regulators), + .bus_clk_names = dsi_v2_4_clk_names, + .num_bus_clks = ARRAY_SIZE(dsi_v2_4_clk_names), + .io_start = { + { 0xae94000, 0xae96000 }, + }, +}; + static const struct regulator_bulk_data sc7280_dsi_regulators[] = { { .supply = "vdda", .init_load_uA = 8350 }, /* 1.2 V */ { .supply = "refgen" }, @@ -281,6 +296,8 @@ static const struct msm_dsi_cfg_handler dsi_cfg_handlers[] = { &sdm845_dsi_cfg, &msm_dsi_6g_v2_host_ops}, {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_7_0, &sm8550_dsi_cfg, &msm_dsi_6g_v2_host_ops}, + {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_8_0, + &sm8650_dsi_cfg, &msm_dsi_6g_v2_host_ops}, }; const struct msm_dsi_cfg_handler *msm_dsi_cfg_get(u32 major, u32 minor) diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.h b/drivers/gpu/drm/msm/dsi/dsi_cfg.h index 43f0dd74edb6..4c9b4b37681b 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.h +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.h @@ -28,6 +28,7 @@ #define MSM_DSI_6G_VER_MINOR_V2_5_0 0x20050000 #define MSM_DSI_6G_VER_MINOR_V2_6_0 0x20060000 #define MSM_DSI_6G_VER_MINOR_V2_7_0 0x20070000 +#define MSM_DSI_6G_VER_MINOR_V2_8_0 0x20080000 #define MSM_DSI_V2_VER_MINOR_8064 0x0 diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 05621e5e7d63..24a347fe2998 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -516,7 +516,9 @@ static int dsi_phy_enable_resource(struct msm_dsi_phy *phy) struct device *dev = &phy->pdev->dev; int ret; - pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); + if (ret) + return ret; ret = clk_prepare_enable(phy->ahb_clk); if (ret) { @@ -585,6 +587,8 @@ static const struct of_device_id dsi_phy_dt_match[] = { .data = &dsi_phy_5nm_8450_cfgs }, { .compatible = "qcom,sm8550-dsi-phy-4nm", .data = &dsi_phy_4nm_8550_cfgs }, + { .compatible = "qcom,sm8650-dsi-phy-4nm", + .data = &dsi_phy_4nm_8650_cfgs }, #endif {} }; @@ -689,6 +693,10 @@ static int dsi_phy_driver_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(phy->ahb_clk), "Unable to get ahb clk\n"); + ret = devm_pm_runtime_enable(&pdev->dev); + if (ret) + return ret; + /* PLL init will call into clk_register which requires * register access, so we need to enable power and ahb clock. */ diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h index 8b640d174785..e4275d3ad581 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h @@ -62,6 +62,7 @@ extern const struct msm_dsi_phy_cfg dsi_phy_7nm_7280_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_5nm_8350_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_5nm_8450_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_4nm_8550_cfgs; +extern const struct msm_dsi_phy_cfg dsi_phy_4nm_8650_cfgs; struct msm_dsi_dphy_timing { u32 clk_zero; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 89a6344bc865..82d015aa2d63 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -1121,6 +1121,10 @@ static const struct regulator_bulk_data dsi_phy_7nm_37750uA_regulators[] = { { .supply = "vdds", .init_load_uA = 37550 }, }; +static const struct regulator_bulk_data dsi_phy_7nm_98000uA_regulators[] = { + { .supply = "vdds", .init_load_uA = 98000 }, +}; + static const struct regulator_bulk_data dsi_phy_7nm_97800uA_regulators[] = { { .supply = "vdds", .init_load_uA = 97800 }, }; @@ -1281,3 +1285,26 @@ const struct msm_dsi_phy_cfg dsi_phy_4nm_8550_cfgs = { .num_dsi_phy = 2, .quirks = DSI_PHY_7NM_QUIRK_V5_2, }; + +const struct msm_dsi_phy_cfg dsi_phy_4nm_8650_cfgs = { + .has_phy_lane = true, + .regulator_data = dsi_phy_7nm_98000uA_regulators, + .num_regulators = ARRAY_SIZE(dsi_phy_7nm_98000uA_regulators), + .ops = { + .enable = dsi_7nm_phy_enable, + .disable = dsi_7nm_phy_disable, + .pll_init = dsi_pll_7nm_init, + .save_pll_state = dsi_7nm_pll_save_state, + .restore_pll_state = dsi_7nm_pll_restore_state, + .set_continuous_clock = dsi_7nm_set_continuous_clock, + }, + .min_pll_rate = 600000000UL, +#ifdef CONFIG_64BIT + .max_pll_rate = 5000000000UL, +#else + .max_pll_rate = ULONG_MAX, +#endif + .io_start = { 0xae95000, 0xae97000 }, + .num_dsi_phy = 2, + .quirks = DSI_PHY_7NM_QUIRK_V5_2, +}; diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index 04d304eed223..4494f6d1c7cb 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -304,36 +304,21 @@ int msm_debugfs_late_init(struct drm_device *dev) return ret; } -void msm_debugfs_init(struct drm_minor *minor) +static void msm_debugfs_gpu_init(struct drm_minor *minor) { struct drm_device *dev = minor->dev; struct msm_drm_private *priv = dev->dev_private; struct dentry *gpu_devfreq; - drm_debugfs_create_files(msm_debugfs_list, - ARRAY_SIZE(msm_debugfs_list), - minor->debugfs_root, minor); - debugfs_create_file("gpu", S_IRUSR, minor->debugfs_root, dev, &msm_gpu_fops); - if (priv->kms) { - drm_debugfs_create_files(msm_kms_debugfs_list, - ARRAY_SIZE(msm_kms_debugfs_list), - minor->debugfs_root, minor); - debugfs_create_file("kms", S_IRUSR, minor->debugfs_root, - dev, &msm_kms_fops); - } - debugfs_create_u32("hangcheck_period_ms", 0600, minor->debugfs_root, &priv->hangcheck_period); debugfs_create_bool("disable_err_irq", 0600, minor->debugfs_root, &priv->disable_err_irq); - debugfs_create_file("shrink", S_IRWXU, minor->debugfs_root, - dev, &shrink_fops); - gpu_devfreq = debugfs_create_dir("devfreq", minor->debugfs_root); debugfs_create_bool("idle_clamp",0600, gpu_devfreq, @@ -344,6 +329,30 @@ void msm_debugfs_init(struct drm_minor *minor) debugfs_create_u32("downdifferential",0600, gpu_devfreq, &priv->gpu_devfreq_config.downdifferential); +} + +void msm_debugfs_init(struct drm_minor *minor) +{ + struct drm_device *dev = minor->dev; + struct msm_drm_private *priv = dev->dev_private; + + drm_debugfs_create_files(msm_debugfs_list, + ARRAY_SIZE(msm_debugfs_list), + minor->debugfs_root, minor); + + if (priv->gpu_pdev) + msm_debugfs_gpu_init(minor); + + if (priv->kms) { + drm_debugfs_create_files(msm_kms_debugfs_list, + ARRAY_SIZE(msm_kms_debugfs_list), + minor->debugfs_root, minor); + debugfs_create_file("kms", S_IRUSR, minor->debugfs_root, + dev, &msm_kms_fops); + } + + debugfs_create_file("shrink", S_IRWXU, minor->debugfs_root, + dev, &shrink_fops); if (priv->kms && priv->kms->funcs->debugfs_init) priv->kms->funcs->debugfs_init(priv->kms, minor); diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 3f217b578293..50b65ffc24b1 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -37,9 +37,10 @@ * - 1.9.0 - Add MSM_SUBMIT_FENCE_SN_IN * - 1.10.0 - Add MSM_SUBMIT_BO_NO_IMPLICIT * - 1.11.0 - Add wait boost (MSM_WAIT_FENCE_BOOST, MSM_PREP_BOOST) + * - 1.12.0 - Add MSM_INFO_SET_METADATA and MSM_INFO_GET_METADATA */ #define MSM_VERSION_MAJOR 1 -#define MSM_VERSION_MINOR 10 +#define MSM_VERSION_MINOR 12 #define MSM_VERSION_PATCHLEVEL 0 static void msm_deinit_vram(struct drm_device *ddev); @@ -544,6 +545,85 @@ static int msm_ioctl_gem_info_set_iova(struct drm_device *dev, return msm_gem_set_iova(obj, ctx->aspace, iova); } +static int msm_ioctl_gem_info_set_metadata(struct drm_gem_object *obj, + __user void *metadata, + u32 metadata_size) +{ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + void *buf; + int ret; + + /* Impose a moderate upper bound on metadata size: */ + if (metadata_size > 128) { + return -EOVERFLOW; + } + + /* Use a temporary buf to keep copy_from_user() outside of gem obj lock: */ + buf = memdup_user(metadata, metadata_size); + if (IS_ERR(buf)) + return PTR_ERR(buf); + + ret = msm_gem_lock_interruptible(obj); + if (ret) + goto out; + + msm_obj->metadata = + krealloc(msm_obj->metadata, metadata_size, GFP_KERNEL); + msm_obj->metadata_size = metadata_size; + memcpy(msm_obj->metadata, buf, metadata_size); + + msm_gem_unlock(obj); + +out: + kfree(buf); + + return ret; +} + +static int msm_ioctl_gem_info_get_metadata(struct drm_gem_object *obj, + __user void *metadata, + u32 *metadata_size) +{ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + void *buf; + int ret, len; + + if (!metadata) { + /* + * Querying the size is inherently racey, but + * EXT_external_objects expects the app to confirm + * via device and driver UUIDs that the exporter and + * importer versions match. All we can do from the + * kernel side is check the length under obj lock + * when userspace tries to retrieve the metadata + */ + *metadata_size = msm_obj->metadata_size; + return 0; + } + + ret = msm_gem_lock_interruptible(obj); + if (ret) + return ret; + + /* Avoid copy_to_user() under gem obj lock: */ + len = msm_obj->metadata_size; + buf = kmemdup(msm_obj->metadata, len, GFP_KERNEL); + + msm_gem_unlock(obj); + + if (*metadata_size < len) { + ret = -ETOOSMALL; + } else if (copy_to_user(metadata, buf, len)) { + ret = -EFAULT; + } else { + *metadata_size = len; + } + + kfree(buf); + + return 0; +} + static int msm_ioctl_gem_info(struct drm_device *dev, void *data, struct drm_file *file) { @@ -566,6 +646,8 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data, break; case MSM_INFO_SET_NAME: case MSM_INFO_GET_NAME: + case MSM_INFO_SET_METADATA: + case MSM_INFO_GET_METADATA: break; default: return -EINVAL; @@ -618,7 +700,7 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data, break; case MSM_INFO_GET_NAME: if (args->value && (args->len < strlen(msm_obj->name))) { - ret = -EINVAL; + ret = -ETOOSMALL; break; } args->len = strlen(msm_obj->name); @@ -628,6 +710,14 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data, ret = -EFAULT; } break; + case MSM_INFO_SET_METADATA: + ret = msm_ioctl_gem_info_set_metadata( + obj, u64_to_user_ptr(args->value), args->len); + break; + case MSM_INFO_GET_METADATA: + ret = msm_ioctl_gem_info_get_metadata( + obj, u64_to_user_ptr(args->value), &args->len); + break; } drm_gem_object_put(obj); diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index cd5bf658df66..16a7cbc0b7dd 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -78,12 +78,10 @@ enum msm_dsi_controller { * enum msm_event_wait - type of HW events to wait for * @MSM_ENC_COMMIT_DONE - wait for the driver to flush the registers to HW * @MSM_ENC_TX_COMPLETE - wait for the HW to transfer the frame to panel - * @MSM_ENC_VBLANK - wait for the HW VBLANK event (for driver-internal waiters) */ enum msm_event_wait { MSM_ENC_COMMIT_DONE = 0, MSM_ENC_TX_COMPLETE, - MSM_ENC_VBLANK, }; /** @@ -92,12 +90,14 @@ enum msm_event_wait { * @num_intf: number of interfaces the panel is mounted on * @num_dspp: number of dspp blocks used * @num_dsc: number of Display Stream Compression (DSC) blocks used + * @needs_cdm: indicates whether cdm block is needed for this display topology */ struct msm_display_topology { u32 num_lm; u32 num_intf; u32 num_dspp; u32 num_dsc; + bool needs_cdm; }; /* Commit/Event thread specific structure */ @@ -386,10 +386,8 @@ int __init msm_dp_register(void); void __exit msm_dp_unregister(void); int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev, struct drm_encoder *encoder); -void msm_dp_irq_postinstall(struct msm_dp *dp_display); void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm_dp *dp_display); -void msm_dp_debugfs_init(struct msm_dp *dp_display, struct drm_minor *minor); bool msm_dp_wide_bus_available(const struct msm_dp *dp_display); #else @@ -407,19 +405,10 @@ static inline int msm_dp_modeset_init(struct msm_dp *dp_display, return -EINVAL; } -static inline void msm_dp_irq_postinstall(struct msm_dp *dp_display) -{ -} - static inline void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm_dp *dp_display) { } -static inline void msm_dp_debugfs_init(struct msm_dp *dp_display, - struct drm_minor *minor) -{ -} - static inline bool msm_dp_wide_bus_available(const struct msm_dp *dp_display) { return false; diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index db1e748daa75..175ee4ab8a6f 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -226,9 +226,9 @@ static struct page **msm_gem_pin_pages_locked(struct drm_gem_object *obj, msm_gem_assert_locked(obj); - if (GEM_WARN_ON(msm_obj->madv > madv)) { - DRM_DEV_ERROR(obj->dev->dev, "Invalid madv state: %u vs %u\n", - msm_obj->madv, madv); + if (msm_obj->madv > madv) { + DRM_DEV_DEBUG_DRIVER(obj->dev->dev, "Invalid madv state: %u vs %u\n", + msm_obj->madv, madv); return ERR_PTR(-EBUSY); } @@ -1058,6 +1058,7 @@ static void msm_gem_free_object(struct drm_gem_object *obj) drm_gem_object_release(obj); + kfree(msm_obj->metadata); kfree(msm_obj); } diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 8ddef5443140..8d414b072c29 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -9,6 +9,7 @@ #include <linux/kref.h> #include <linux/dma-resv.h> +#include "drm/drm_exec.h" #include "drm/gpu_scheduler.h" #include "msm_drv.h" @@ -108,6 +109,10 @@ struct msm_gem_object { char name[32]; /* Identifier to print for the debugfs files */ + /* userspace metadata backchannel */ + void *metadata; + u32 metadata_size; + /** * pin_count: Number of times the pages are pinned * @@ -254,7 +259,7 @@ struct msm_gem_submit { struct msm_gpu *gpu; struct msm_gem_address_space *aspace; struct list_head node; /* node in ring submit list */ - struct ww_acquire_ctx ticket; + struct drm_exec exec; uint32_t seqno; /* Sequence number of the submit on the ring */ /* Hw fence, which is created when the scheduler executes the job, and @@ -270,9 +275,9 @@ struct msm_gem_submit { int fence_id; /* key into queue->fence_idr */ struct msm_gpu_submitqueue *queue; struct pid *pid; /* submitting process */ - bool fault_dumped; /* Limit devcoredump dumping to one per submit */ - bool valid; /* true if no cmdstream patching needed */ - bool in_rb; /* "sudo" mode, copy cmds into RB */ + bool bos_pinned : 1; + bool fault_dumped:1;/* Limit devcoredump dumping to one per submit */ + bool in_rb : 1; /* "sudo" mode, copy cmds into RB */ struct msm_ringbuffer *ring; unsigned int nr_cmds; unsigned int nr_bos; @@ -287,10 +292,6 @@ struct msm_gem_submit { struct drm_msm_gem_submit_reloc *relocs; } *cmd; /* array of size nr_cmds */ struct { -/* make sure these don't conflict w/ MSM_SUBMIT_BO_x */ -#define BO_VALID 0x8000 /* is current addr in cmdstream correct/valid? */ -#define BO_LOCKED 0x4000 /* obj lock is held */ -#define BO_PINNED 0x2000 /* obj (pages) is pinned and on active list */ uint32_t flags; union { struct drm_gem_object *obj; diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 5a7d48c02c4b..07ca4ddfe4e3 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -75,7 +75,7 @@ static bool wait_for_idle(struct drm_gem_object *obj) { enum dma_resv_usage usage = dma_resv_usage_rw(true); - return dma_resv_wait_timeout(obj->resv, usage, false, 1000) > 0; + return dma_resv_wait_timeout(obj->resv, usage, false, 10) > 0; } static bool diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 99744de6c05a..fba78193127d 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -17,6 +17,12 @@ #include "msm_gem.h" #include "msm_gpu_trace.h" +/* For userspace errors, use DRM_UT_DRIVER.. so that userspace can enable + * error msgs for debugging, but we don't spam dmesg by default + */ +#define SUBMIT_ERROR(submit, fmt, ...) \ + DRM_DEV_DEBUG_DRIVER((submit)->dev->dev, fmt, ##__VA_ARGS__) + /* * Cmdstream submission: */ @@ -37,7 +43,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, if (sz > SIZE_MAX) return ERR_PTR(-ENOMEM); - submit = kzalloc(sz, GFP_KERNEL); + submit = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN); if (!submit) return ERR_PTR(-ENOMEM); @@ -48,7 +54,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, return ERR_PTR(ret); } - ret = drm_sched_job_init(&submit->base, queue->entity, queue); + ret = drm_sched_job_init(&submit->base, queue->entity, 1, queue); if (ret) { kfree(submit->hw_fence); kfree(submit); @@ -136,7 +142,7 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, if ((submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) || !(submit_bo.flags & MANDATORY_FLAGS)) { - DRM_ERROR("invalid flags: %x\n", submit_bo.flags); + SUBMIT_ERROR(submit, "invalid flags: %x\n", submit_bo.flags); ret = -EINVAL; i = 0; goto out; @@ -144,8 +150,6 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, submit->bos[i].handle = submit_bo.handle; submit->bos[i].flags = submit_bo.flags; - /* in validate_objects() we figure out if this is true: */ - submit->bos[i].iova = submit_bo.presumed; } spin_lock(&file->table_lock); @@ -158,7 +162,7 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, */ obj = idr_find(&file->object_idr, submit->bos[i].handle); if (!obj) { - DRM_ERROR("invalid handle %u at index %u\n", submit->bos[i].handle, i); + SUBMIT_ERROR(submit, "invalid handle %u at index %u\n", submit->bos[i].handle, i); ret = -EINVAL; goto out_unlock; } @@ -202,13 +206,13 @@ static int submit_lookup_cmds(struct msm_gem_submit *submit, case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: break; default: - DRM_ERROR("invalid type: %08x\n", submit_cmd.type); + SUBMIT_ERROR(submit, "invalid type: %08x\n", submit_cmd.type); return -EINVAL; } if (submit_cmd.size % 4) { - DRM_ERROR("non-aligned cmdstream buffer size: %u\n", - submit_cmd.size); + SUBMIT_ERROR(submit, "non-aligned cmdstream buffer size: %u\n", + submit_cmd.size); ret = -EINVAL; goto out; } @@ -228,7 +232,7 @@ static int submit_lookup_cmds(struct msm_gem_submit *submit, ret = -ENOMEM; goto out; } - submit->cmd[i].relocs = kmalloc(sz, GFP_KERNEL); + submit->cmd[i].relocs = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN); if (!submit->cmd[i].relocs) { ret = -ENOMEM; goto out; @@ -244,101 +248,30 @@ out: return ret; } -/* Unwind bo state, according to cleanup_flags. In the success case, only - * the lock is dropped at the end of the submit (and active/pin ref is dropped - * later when the submit is retired). - */ -static void submit_cleanup_bo(struct msm_gem_submit *submit, int i, - unsigned cleanup_flags) -{ - struct drm_gem_object *obj = submit->bos[i].obj; - unsigned flags = submit->bos[i].flags & cleanup_flags; - - /* - * Clear flags bit before dropping lock, so that the msm_job_run() - * path isn't racing with submit_cleanup() (ie. the read/modify/ - * write is protected by the obj lock in all paths) - */ - submit->bos[i].flags &= ~cleanup_flags; - - if (flags & BO_PINNED) - msm_gem_unpin_locked(obj); - - if (flags & BO_LOCKED) - dma_resv_unlock(obj->resv); -} - -static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i) -{ - unsigned cleanup_flags = BO_PINNED | BO_LOCKED; - submit_cleanup_bo(submit, i, cleanup_flags); - - if (!(submit->bos[i].flags & BO_VALID)) - submit->bos[i].iova = 0; -} - /* This is where we make sure all the bo's are reserved and pin'd: */ static int submit_lock_objects(struct msm_gem_submit *submit) { - int contended, slow_locked = -1, i, ret = 0; - -retry: - for (i = 0; i < submit->nr_bos; i++) { - struct drm_gem_object *obj = submit->bos[i].obj; - - if (slow_locked == i) - slow_locked = -1; + int ret; - contended = i; + drm_exec_init(&submit->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, submit->nr_bos); - if (!(submit->bos[i].flags & BO_LOCKED)) { - ret = dma_resv_lock_interruptible(obj->resv, - &submit->ticket); + drm_exec_until_all_locked (&submit->exec) { + for (unsigned i = 0; i < submit->nr_bos; i++) { + struct drm_gem_object *obj = submit->bos[i].obj; + ret = drm_exec_prepare_obj(&submit->exec, obj, 1); + drm_exec_retry_on_contention(&submit->exec); if (ret) - goto fail; - submit->bos[i].flags |= BO_LOCKED; + goto error; } } - ww_acquire_done(&submit->ticket); - return 0; -fail: - if (ret == -EALREADY) { - DRM_ERROR("handle %u at index %u already on submit list\n", - submit->bos[i].handle, i); - ret = -EINVAL; - } - - for (; i >= 0; i--) - submit_unlock_unpin_bo(submit, i); - - if (slow_locked > 0) - submit_unlock_unpin_bo(submit, slow_locked); - - if (ret == -EDEADLK) { - struct drm_gem_object *obj = submit->bos[contended].obj; - /* we lost out in a seqno race, lock and retry.. */ - ret = dma_resv_lock_slow_interruptible(obj->resv, - &submit->ticket); - if (!ret) { - submit->bos[contended].flags |= BO_LOCKED; - slow_locked = contended; - goto retry; - } - - /* Not expecting -EALREADY here, if the bo was already - * locked, we should have gotten -EALREADY already from - * the dma_resv_lock_interruptable() call. - */ - WARN_ON_ONCE(ret == -EALREADY); - } - +error: return ret; } -static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) +static int submit_fence_sync(struct msm_gem_submit *submit) { int i, ret = 0; @@ -346,22 +279,6 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) struct drm_gem_object *obj = submit->bos[i].obj; bool write = submit->bos[i].flags & MSM_SUBMIT_BO_WRITE; - /* NOTE: _reserve_shared() must happen before - * _add_shared_fence(), which makes this a slightly - * strange place to call it. OTOH this is a - * convenient can-fail point to hook it in. - */ - ret = dma_resv_reserve_fences(obj->resv, 1); - if (ret) - return ret; - - /* If userspace has determined that explicit fencing is - * used, it can disable implicit sync on the entire - * submit: - */ - if (no_implicit) - continue; - /* Otherwise userspace can ask for implicit sync to be * disabled on specific buffers. This is useful for internal * usermode driver managed buffers, suballocation, etc. @@ -384,8 +301,6 @@ static int submit_pin_objects(struct msm_gem_submit *submit) struct msm_drm_private *priv = submit->dev->dev_private; int i, ret = 0; - submit->valid = true; - for (i = 0; i < submit->nr_bos; i++) { struct drm_gem_object *obj = submit->bos[i].obj; struct msm_gem_vma *vma; @@ -401,14 +316,7 @@ static int submit_pin_objects(struct msm_gem_submit *submit) if (ret) break; - if (vma->iova == submit->bos[i].iova) { - submit->bos[i].flags |= BO_VALID; - } else { - submit->bos[i].iova = vma->iova; - /* iova changed, so address in cmdstream is not valid: */ - submit->bos[i].flags &= ~BO_VALID; - submit->valid = false; - } + submit->bos[i].iova = vma->iova; } /* @@ -421,13 +329,28 @@ static int submit_pin_objects(struct msm_gem_submit *submit) mutex_lock(&priv->lru.lock); for (i = 0; i < submit->nr_bos; i++) { msm_gem_pin_obj_locked(submit->bos[i].obj); - submit->bos[i].flags |= BO_PINNED; } mutex_unlock(&priv->lru.lock); + submit->bos_pinned = true; + return ret; } +static void submit_unpin_objects(struct msm_gem_submit *submit) +{ + if (!submit->bos_pinned) + return; + + for (int i = 0; i < submit->nr_bos; i++) { + struct drm_gem_object *obj = submit->bos[i].obj; + + msm_gem_unpin_locked(obj); + } + + submit->bos_pinned = false; +} + static void submit_attach_object_fences(struct msm_gem_submit *submit) { int i; @@ -445,11 +368,11 @@ static void submit_attach_object_fences(struct msm_gem_submit *submit) } static int submit_bo(struct msm_gem_submit *submit, uint32_t idx, - struct drm_gem_object **obj, uint64_t *iova, bool *valid) + struct drm_gem_object **obj, uint64_t *iova) { if (idx >= submit->nr_bos) { - DRM_ERROR("invalid buffer index: %u (out of %u)\n", - idx, submit->nr_bos); + SUBMIT_ERROR(submit, "invalid buffer index: %u (out of %u)\n", + idx, submit->nr_bos); return -EINVAL; } @@ -457,8 +380,6 @@ static int submit_bo(struct msm_gem_submit *submit, uint32_t idx, *obj = submit->bos[idx].obj; if (iova) *iova = submit->bos[idx].iova; - if (valid) - *valid = !!(submit->bos[idx].flags & BO_VALID); return 0; } @@ -471,11 +392,8 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob uint32_t *ptr; int ret = 0; - if (!nr_relocs) - return 0; - if (offset % 4) { - DRM_ERROR("non-aligned cmdstream buffer: %u\n", offset); + SUBMIT_ERROR(submit, "non-aligned cmdstream buffer: %u\n", offset); return -EINVAL; } @@ -494,11 +412,10 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob struct drm_msm_gem_submit_reloc submit_reloc = relocs[i]; uint32_t off; uint64_t iova; - bool valid; if (submit_reloc.submit_offset % 4) { - DRM_ERROR("non-aligned reloc offset: %u\n", - submit_reloc.submit_offset); + SUBMIT_ERROR(submit, "non-aligned reloc offset: %u\n", + submit_reloc.submit_offset); ret = -EINVAL; goto out; } @@ -508,18 +425,15 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob if ((off >= (obj->size / 4)) || (off < last_offset)) { - DRM_ERROR("invalid offset %u at reloc %u\n", off, i); + SUBMIT_ERROR(submit, "invalid offset %u at reloc %u\n", off, i); ret = -EINVAL; goto out; } - ret = submit_bo(submit, submit_reloc.reloc_idx, NULL, &iova, &valid); + ret = submit_bo(submit, submit_reloc.reloc_idx, NULL, &iova); if (ret) goto out; - if (valid) - continue; - iova += submit_reloc.reloc_offset; if (submit_reloc.shift < 0) @@ -544,18 +458,14 @@ out: */ static void submit_cleanup(struct msm_gem_submit *submit, bool error) { - unsigned cleanup_flags = BO_LOCKED; - unsigned i; - - if (error) - cleanup_flags |= BO_PINNED; - - for (i = 0; i < submit->nr_bos; i++) { - struct drm_gem_object *obj = submit->bos[i].obj; - submit_cleanup_bo(submit, i, cleanup_flags); - if (error) - drm_gem_object_put(obj); + if (error) { + submit_unpin_objects(submit); + /* job wasn't enqueued to scheduler, so early retirement: */ + msm_submit_retire(submit); } + + if (submit->exec.objects) + drm_exec_fini(&submit->exec); } void msm_submit_retire(struct msm_gem_submit *submit) @@ -749,7 +659,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct msm_submit_post_dep *post_deps = NULL; struct drm_syncobj **syncobjs_to_reset = NULL; int out_fence_fd = -1; - bool has_ww_ticket = false; unsigned i; int ret; @@ -855,15 +764,15 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, goto out; /* copy_*_user while holding a ww ticket upsets lockdep */ - ww_acquire_init(&submit->ticket, &reservation_ww_class); - has_ww_ticket = true; ret = submit_lock_objects(submit); if (ret) goto out; - ret = submit_fence_sync(submit, !!(args->flags & MSM_SUBMIT_NO_IMPLICIT)); - if (ret) - goto out; + if (!(args->flags & MSM_SUBMIT_NO_IMPLICIT)) { + ret = submit_fence_sync(submit); + if (ret) + goto out; + } ret = submit_pin_objects(submit); if (ret) @@ -873,32 +782,27 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct drm_gem_object *obj; uint64_t iova; - ret = submit_bo(submit, submit->cmd[i].idx, - &obj, &iova, NULL); + ret = submit_bo(submit, submit->cmd[i].idx, &obj, &iova); if (ret) goto out; if (!submit->cmd[i].size || ((submit->cmd[i].size + submit->cmd[i].offset) > obj->size / 4)) { - DRM_ERROR("invalid cmdstream size: %u\n", submit->cmd[i].size * 4); + SUBMIT_ERROR(submit, "invalid cmdstream size: %u\n", submit->cmd[i].size * 4); ret = -EINVAL; goto out; } submit->cmd[i].iova = iova + (submit->cmd[i].offset * 4); - if (submit->valid) + if (likely(!submit->cmd[i].nr_relocs)) continue; if (!gpu->allow_relocs) { - if (submit->cmd[i].nr_relocs) { - DRM_ERROR("relocs not allowed\n"); - ret = -EINVAL; - goto out; - } - - continue; + SUBMIT_ERROR(submit, "relocs not allowed\n"); + ret = -EINVAL; + goto out; } ret = submit_reloc(submit, obj, submit->cmd[i].offset * 4, @@ -974,6 +878,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, } } + if (ret) + goto out; + submit_attach_object_fences(submit); /* The scheduler owns a ref now: */ @@ -993,8 +900,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, out: submit_cleanup(submit, !!ret); - if (has_ww_ticket) - ww_acquire_fini(&submit->ticket); out_unlock: mutex_unlock(&queue->lock); out_post_unlock: diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 7f64c6667300..095390774f22 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -292,8 +292,7 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, /* Set the active crash state to be dumped on failure */ gpu->crashstate = state; - /* FIXME: Release the crashstate if this errors out? */ - dev_coredumpm(gpu->dev->dev, THIS_MODULE, gpu, 0, GFP_KERNEL, + dev_coredumpm(&gpu->pdev->dev, THIS_MODULE, gpu, 0, GFP_KERNEL, msm_gpu_devcoredump_read, msm_gpu_devcoredump_free); } #else @@ -366,29 +365,31 @@ static void recover_worker(struct kthread_work *work) DRM_DEV_ERROR(dev->dev, "%s: hangcheck recover!\n", gpu->name); submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1); - if (submit) { - /* Increment the fault counts */ - submit->queue->faults++; - if (submit->aspace) - submit->aspace->faults++; - get_comm_cmdline(submit, &comm, &cmd); + /* + * If the submit retired while we were waiting for the worker to run, + * or waiting to acquire the gpu lock, then nothing more to do. + */ + if (!submit) + goto out_unlock; - if (comm && cmd) { - DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n", - gpu->name, comm, cmd); + /* Increment the fault counts */ + submit->queue->faults++; + if (submit->aspace) + submit->aspace->faults++; - msm_rd_dump_submit(priv->hangrd, submit, - "offending task: %s (%s)", comm, cmd); - } else { - msm_rd_dump_submit(priv->hangrd, submit, NULL); - } + get_comm_cmdline(submit, &comm, &cmd); + + if (comm && cmd) { + DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n", + gpu->name, comm, cmd); + + msm_rd_dump_submit(priv->hangrd, submit, + "offending task: %s (%s)", comm, cmd); } else { - /* - * We couldn't attribute this fault to any particular context, - * so increment the global fault count instead. - */ - gpu->global_faults++; + DRM_DEV_ERROR(dev->dev, "%s: offending task: unknown\n", gpu->name); + + msm_rd_dump_submit(priv->hangrd, submit, NULL); } /* Record the crash state */ @@ -441,6 +442,7 @@ static void recover_worker(struct kthread_work *work) pm_runtime_put(&gpu->pdev->dev); +out_unlock: mutex_unlock(&gpu->lock); msm_gpu_retire(gpu); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 4252e3839fbc..2bfcb222e353 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -347,7 +347,7 @@ struct msm_gpu_perfcntr { * DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some * cases, so we don't use it (no need for kernel generated jobs). */ -#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - DRM_SCHED_PRIORITY_MIN) +#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_LOW - DRM_SCHED_PRIORITY_HIGH) /** * struct msm_file_private - per-drm_file context diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 6865db1e3ce8..455b2e3a0cdd 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -28,6 +28,8 @@ #define MIN_IB_BW 400000000UL /* Min ib vote 400MB */ +#define DEFAULT_REG_BW 153600 /* Used in mdss fbdev driver */ + struct msm_mdss { struct device *dev; @@ -40,8 +42,9 @@ struct msm_mdss { struct irq_domain *domain; } irq_controller; const struct msm_mdss_data *mdss_data; - struct icc_path *path[2]; - u32 num_paths; + struct icc_path *mdp_path[2]; + u32 num_mdp_paths; + struct icc_path *reg_bus_path; }; static int msm_mdss_parse_data_bus_icc_path(struct device *dev, @@ -49,38 +52,26 @@ static int msm_mdss_parse_data_bus_icc_path(struct device *dev, { struct icc_path *path0; struct icc_path *path1; + struct icc_path *reg_bus_path; - path0 = of_icc_get(dev, "mdp0-mem"); + path0 = devm_of_icc_get(dev, "mdp0-mem"); if (IS_ERR_OR_NULL(path0)) return PTR_ERR_OR_ZERO(path0); - msm_mdss->path[0] = path0; - msm_mdss->num_paths = 1; + msm_mdss->mdp_path[0] = path0; + msm_mdss->num_mdp_paths = 1; - path1 = of_icc_get(dev, "mdp1-mem"); + path1 = devm_of_icc_get(dev, "mdp1-mem"); if (!IS_ERR_OR_NULL(path1)) { - msm_mdss->path[1] = path1; - msm_mdss->num_paths++; + msm_mdss->mdp_path[1] = path1; + msm_mdss->num_mdp_paths++; } - return 0; -} - -static void msm_mdss_put_icc_path(void *data) -{ - struct msm_mdss *msm_mdss = data; - int i; - - for (i = 0; i < msm_mdss->num_paths; i++) - icc_put(msm_mdss->path[i]); -} - -static void msm_mdss_icc_request_bw(struct msm_mdss *msm_mdss, unsigned long bw) -{ - int i; + reg_bus_path = of_icc_get(dev, "cpu-cfg"); + if (!IS_ERR_OR_NULL(reg_bus_path)) + msm_mdss->reg_bus_path = reg_bus_path; - for (i = 0; i < msm_mdss->num_paths; i++) - icc_set_bw(msm_mdss->path[i], 0, Bps_to_icc(bw)); + return 0; } static void msm_mdss_irq(struct irq_desc *desc) @@ -236,14 +227,22 @@ const struct msm_mdss_data *msm_mdss_get_mdss_data(struct device *dev) static int msm_mdss_enable(struct msm_mdss *msm_mdss) { - int ret; + int ret, i; /* * Several components have AXI clocks that can only be turned on if * the interconnect is enabled (non-zero bandwidth). Let's make sure * that the interconnects are at least at a minimum amount. */ - msm_mdss_icc_request_bw(msm_mdss, MIN_IB_BW); + for (i = 0; i < msm_mdss->num_mdp_paths; i++) + icc_set_bw(msm_mdss->mdp_path[i], 0, Bps_to_icc(MIN_IB_BW)); + + if (msm_mdss->mdss_data && msm_mdss->mdss_data->reg_bus_bw) + icc_set_bw(msm_mdss->reg_bus_path, 0, + msm_mdss->mdss_data->reg_bus_bw); + else + icc_set_bw(msm_mdss->reg_bus_path, 0, + DEFAULT_REG_BW); ret = clk_bulk_prepare_enable(msm_mdss->num_clocks, msm_mdss->clocks); if (ret) { @@ -295,8 +294,15 @@ static int msm_mdss_enable(struct msm_mdss *msm_mdss) static int msm_mdss_disable(struct msm_mdss *msm_mdss) { + int i; + clk_bulk_disable_unprepare(msm_mdss->num_clocks, msm_mdss->clocks); - msm_mdss_icc_request_bw(msm_mdss, 0); + + for (i = 0; i < msm_mdss->num_mdp_paths; i++) + icc_set_bw(msm_mdss->mdp_path[i], 0, 0); + + if (msm_mdss->reg_bus_path) + icc_set_bw(msm_mdss->reg_bus_path, 0, 0); return 0; } @@ -384,6 +390,8 @@ static struct msm_mdss *msm_mdss_init(struct platform_device *pdev, bool is_mdp5 if (!msm_mdss) return ERR_PTR(-ENOMEM); + msm_mdss->mdss_data = of_device_get_match_data(&pdev->dev); + msm_mdss->mmio = devm_platform_ioremap_resource_byname(pdev, is_mdp5 ? "mdss_phys" : "mdss"); if (IS_ERR(msm_mdss->mmio)) return ERR_CAST(msm_mdss->mmio); @@ -393,9 +401,6 @@ static struct msm_mdss *msm_mdss_init(struct platform_device *pdev, bool is_mdp5 ret = msm_mdss_parse_data_bus_icc_path(&pdev->dev, msm_mdss); if (ret) return ERR_PTR(ret); - ret = devm_add_action_or_reset(&pdev->dev, msm_mdss_put_icc_path, msm_mdss); - if (ret) - return ERR_PTR(ret); if (is_mdp5) ret = mdp5_mdss_parse_clock(pdev, &msm_mdss->clocks); @@ -477,8 +482,6 @@ static int mdss_probe(struct platform_device *pdev) if (IS_ERR(mdss)) return PTR_ERR(mdss); - mdss->mdss_data = of_device_get_match_data(&pdev->dev); - platform_set_drvdata(pdev, mdss); /* @@ -510,11 +513,13 @@ static const struct msm_mdss_data msm8998_data = { .ubwc_enc_version = UBWC_1_0, .ubwc_dec_version = UBWC_1_0, .highest_bank_bit = 2, + .reg_bus_bw = 76800, }; static const struct msm_mdss_data qcm2290_data = { /* no UBWC */ .highest_bank_bit = 0x2, + .reg_bus_bw = 76800, }; static const struct msm_mdss_data sc7180_data = { @@ -522,6 +527,7 @@ static const struct msm_mdss_data sc7180_data = { .ubwc_dec_version = UBWC_2_0, .ubwc_static = 0x1e, .highest_bank_bit = 0x3, + .reg_bus_bw = 76800, }; static const struct msm_mdss_data sc7280_data = { @@ -531,6 +537,7 @@ static const struct msm_mdss_data sc7280_data = { .ubwc_static = 1, .highest_bank_bit = 1, .macrotile_mode = 1, + .reg_bus_bw = 74000, }; static const struct msm_mdss_data sc8180x_data = { @@ -538,6 +545,7 @@ static const struct msm_mdss_data sc8180x_data = { .ubwc_dec_version = UBWC_3_0, .highest_bank_bit = 3, .macrotile_mode = 1, + .reg_bus_bw = 76800, }; static const struct msm_mdss_data sc8280xp_data = { @@ -545,14 +553,22 @@ static const struct msm_mdss_data sc8280xp_data = { .ubwc_dec_version = UBWC_4_0, .ubwc_swizzle = 6, .ubwc_static = 1, - .highest_bank_bit = 2, + .highest_bank_bit = 3, .macrotile_mode = 1, + .reg_bus_bw = 76800, +}; + +static const struct msm_mdss_data sdm670_data = { + .ubwc_enc_version = UBWC_2_0, + .ubwc_dec_version = UBWC_2_0, + .highest_bank_bit = 1, }; static const struct msm_mdss_data sdm845_data = { .ubwc_enc_version = UBWC_2_0, .ubwc_dec_version = UBWC_2_0, .highest_bank_bit = 2, + .reg_bus_bw = 76800, }; static const struct msm_mdss_data sm6350_data = { @@ -561,12 +577,14 @@ static const struct msm_mdss_data sm6350_data = { .ubwc_swizzle = 6, .ubwc_static = 0x1e, .highest_bank_bit = 1, + .reg_bus_bw = 76800, }; static const struct msm_mdss_data sm8150_data = { .ubwc_enc_version = UBWC_3_0, .ubwc_dec_version = UBWC_3_0, .highest_bank_bit = 2, + .reg_bus_bw = 76800, }; static const struct msm_mdss_data sm6115_data = { @@ -575,6 +593,7 @@ static const struct msm_mdss_data sm6115_data = { .ubwc_swizzle = 7, .ubwc_static = 0x11f, .highest_bank_bit = 0x1, + .reg_bus_bw = 76800, }; static const struct msm_mdss_data sm6125_data = { @@ -592,6 +611,18 @@ static const struct msm_mdss_data sm8250_data = { /* TODO: highest_bank_bit = 2 for LP_DDR4 */ .highest_bank_bit = 3, .macrotile_mode = 1, + .reg_bus_bw = 76800, +}; + +static const struct msm_mdss_data sm8350_data = { + .ubwc_enc_version = UBWC_4_0, + .ubwc_dec_version = UBWC_4_0, + .ubwc_swizzle = 6, + .ubwc_static = 1, + /* TODO: highest_bank_bit = 2 for LP_DDR4 */ + .highest_bank_bit = 3, + .macrotile_mode = 1, + .reg_bus_bw = 74000, }; static const struct msm_mdss_data sm8550_data = { @@ -602,11 +633,13 @@ static const struct msm_mdss_data sm8550_data = { /* TODO: highest_bank_bit = 2 for LP_DDR4 */ .highest_bank_bit = 3, .macrotile_mode = 1, + .reg_bus_bw = 57000, }; static const struct of_device_id mdss_dt_match[] = { { .compatible = "qcom,mdss" }, { .compatible = "qcom,msm8998-mdss", .data = &msm8998_data }, { .compatible = "qcom,qcm2290-mdss", .data = &qcm2290_data }, + { .compatible = "qcom,sdm670-mdss", .data = &sdm670_data }, { .compatible = "qcom,sdm845-mdss", .data = &sdm845_data }, { .compatible = "qcom,sc7180-mdss", .data = &sc7180_data }, { .compatible = "qcom,sc7280-mdss", .data = &sc7280_data }, @@ -618,9 +651,10 @@ static const struct of_device_id mdss_dt_match[] = { { .compatible = "qcom,sm6375-mdss", .data = &sm6350_data }, { .compatible = "qcom,sm8150-mdss", .data = &sm8150_data }, { .compatible = "qcom,sm8250-mdss", .data = &sm8250_data }, - { .compatible = "qcom,sm8350-mdss", .data = &sm8250_data }, - { .compatible = "qcom,sm8450-mdss", .data = &sm8250_data }, + { .compatible = "qcom,sm8350-mdss", .data = &sm8350_data }, + { .compatible = "qcom,sm8450-mdss", .data = &sm8350_data }, { .compatible = "qcom,sm8550-mdss", .data = &sm8550_data }, + { .compatible = "qcom,sm8650-mdss", .data = &sm8550_data}, {} }; MODULE_DEVICE_TABLE(of, mdss_dt_match); diff --git a/drivers/gpu/drm/msm/msm_mdss.h b/drivers/gpu/drm/msm/msm_mdss.h index 02bbab42adbc..3afef4b1786d 100644 --- a/drivers/gpu/drm/msm/msm_mdss.h +++ b/drivers/gpu/drm/msm/msm_mdss.h @@ -14,6 +14,7 @@ struct msm_mdss_data { u32 ubwc_static; u32 highest_bank_bit; u32 macrotile_mode; + u32 reg_bus_bw; }; #define UBWC_1_0 0x10000000 diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index 5adc51f7ab59..ca44fd291c5b 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -270,6 +270,9 @@ int msm_rd_debugfs_init(struct drm_minor *minor) struct msm_rd_state *rd; int ret; + if (!priv->gpu_pdev) + return 0; + /* only create on first minor: */ if (priv->rd) return 0; diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 95257ab0185d..4bc13f7d005a 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -29,9 +29,10 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) struct drm_gem_object *obj = submit->bos[i].obj; msm_gem_unpin_active(obj); - submit->bos[i].flags &= ~BO_PINNED; } + submit->bos_pinned = false; + mutex_unlock(&priv->lru.lock); msm_gpu_submit(gpu, submit); @@ -94,7 +95,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, /* currently managing hangcheck ourselves: */ sched_timeout = MAX_SCHEDULE_TIMEOUT; - ret = drm_sched_init(&ring->sched, &msm_sched_ops, + ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, num_hw_submissions, 0, sched_timeout, NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev); diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c index 625c1bfc4173..b483ef48216a 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c @@ -11,9 +11,10 @@ #include <linux/clk.h> #include <linux/dma-mapping.h> #include <linux/io.h> +#include <linux/mod_devicetable.h> #include <linux/module.h> -#include <linux/of_device.h> #include <linux/platform_device.h> +#include <linux/property.h> #include <linux/pm_runtime.h> #include <drm/drm_atomic_helper.h> @@ -346,18 +347,13 @@ MODULE_DEVICE_TABLE(of, mxsfb_dt_ids); static int mxsfb_probe(struct platform_device *pdev) { struct drm_device *drm; - const struct of_device_id *of_id = - of_match_device(mxsfb_dt_ids, &pdev->dev); int ret; - if (!pdev->dev.of_node) - return -ENODEV; - drm = drm_dev_alloc(&mxsfb_driver, &pdev->dev); if (IS_ERR(drm)) return PTR_ERR(drm); - ret = mxsfb_load(drm, of_id->data); + ret = mxsfb_load(drm, device_get_match_data(&pdev->dev)); if (ret) goto err_free; diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 118807e38422..8d37a694b772 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -38,7 +38,9 @@ #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_edid.h> +#include <drm/drm_eld.h> #include <drm/drm_fb_helper.h> +#include <drm/drm_fixed.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> @@ -945,7 +947,8 @@ nv50_msto_prepare(struct drm_atomic_state *state, if (ret == 0) { nvif_outp_dp_mst_vcpi(&mstm->outp->outp, msto->head->base.index, payload->vc_start_slot, payload->time_slots, - payload->pbn, payload->time_slots * mst_state->pbn_div); + payload->pbn, + payload->time_slots * dfixed_trunc(mst_state->pbn_div)); } else { nvif_outp_dp_mst_vcpi(&mstm->outp->outp, msto->head->base.index, 0, 0, 0, 0); } @@ -982,15 +985,14 @@ nv50_msto_atomic_check(struct drm_encoder *encoder, const int clock = crtc_state->adjusted_mode.clock; asyh->or.bpc = connector->display_info.bpc; - asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, asyh->or.bpc * 3, - false); + asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, asyh->or.bpc * 3 << 4); } mst_state = drm_atomic_get_mst_topology_state(state, &mstm->mgr); if (IS_ERR(mst_state)) return PTR_ERR(mst_state); - if (!mst_state->pbn_div) { + if (!mst_state->pbn_div.full) { struct nouveau_encoder *outp = mstc->mstm->outp; mst_state->pbn_div = drm_dp_get_vc_payload_bw(&mstm->mgr, diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index 2edd7bb13fae..a04156ca8390 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -127,21 +127,14 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16, { struct nouveau_abi16_ntfy *ntfy, *temp; - /* When a client exits without waiting for it's queued up jobs to - * finish it might happen that we fault the channel. This is due to - * drm_file_free() calling drm_gem_release() before the postclose() - * callback. Hence, we can't tear down this scheduler entity before - * uvmm mappings are unmapped. Currently, we can't detect this case. - * - * However, this should be rare and harmless, since the channel isn't - * needed anymore. - */ - nouveau_sched_entity_fini(&chan->sched_entity); + /* Cancel all jobs from the entity's queue. */ + drm_sched_entity_fini(&chan->sched.entity); - /* wait for all activity to stop before cleaning up */ if (chan->chan) nouveau_channel_idle(chan->chan); + nouveau_sched_fini(&chan->sched); + /* cleanup notifier state */ list_for_each_entry_safe(ntfy, temp, &chan->notifiers, head) { nouveau_abi16_ntfy_fini(chan, ntfy); @@ -344,8 +337,8 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) if (ret) goto done; - ret = nouveau_sched_entity_init(&chan->sched_entity, &drm->sched, - drm->sched_wq); + ret = nouveau_sched_init(&chan->sched, drm, drm->sched_wq, + chan->chan->dma.ib_max); if (ret) goto done; diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h index 9f538486c10e..1f5e243c0c75 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.h +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h @@ -26,7 +26,7 @@ struct nouveau_abi16_chan { struct nouveau_bo *ntfy; struct nouveau_vma *ntfy_vma; struct nvkm_mm heap; - struct nouveau_sched_entity sched_entity; + struct nouveau_sched sched; }; struct nouveau_abi16 { diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 280d1d9a559b..00cc7d1abaa3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -148,10 +148,17 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo) * If nouveau_bo_new() allocated this buffer, the GEM object was never * initialized, so don't attempt to release it. */ - if (bo->base.dev) + if (bo->base.dev) { + /* Gem objects not being shared with other VMs get their + * dma_resv from a root GEM object. + */ + if (nvbo->no_share) + drm_gem_object_put(nvbo->r_obj); + drm_gem_object_release(&bo->base); - else + } else { dma_resv_fini(&bo->base._resv); + } kfree(nvbo); } @@ -1055,17 +1062,18 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, { struct nouveau_drm *drm = nouveau_bdev(bo->bdev); struct nouveau_bo *nvbo = nouveau_bo(bo); + struct drm_gem_object *obj = &bo->base; struct ttm_resource *old_reg = bo->resource; struct nouveau_drm_tile *new_tile = NULL; int ret = 0; - if (new_reg->mem_type == TTM_PL_TT) { ret = nouveau_ttm_tt_bind(bo->bdev, bo->ttm, new_reg); if (ret) return ret; } + drm_gpuvm_bo_gem_evict(obj, evict); nouveau_bo_move_ntfy(bo, new_reg); ret = ttm_bo_wait_ctx(bo, ctx); if (ret) @@ -1130,6 +1138,7 @@ out: out_ntfy: if (ret) { nouveau_bo_move_ntfy(bo, bo->resource); + drm_gpuvm_bo_gem_evict(obj, !evict); } return ret; } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h index 07f671cf895e..70c551921a9e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.h +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h @@ -26,6 +26,11 @@ struct nouveau_bo { struct list_head entry; int pbbo_index; bool validate_mapped; + + /* Root GEM object we derive the dma_resv of in case this BO is not + * shared between VMs. + */ + struct drm_gem_object *r_obj; bool no_share; /* GPU address space is independent of CPU word size */ diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 50589f982d1a..6f6c31a9937b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -190,6 +190,8 @@ nouveau_cli_work_queue(struct nouveau_cli *cli, struct dma_fence *fence, static void nouveau_cli_fini(struct nouveau_cli *cli) { + struct nouveau_uvmm *uvmm = nouveau_cli_uvmm_locked(cli); + /* All our channels are dead now, which means all the fences they * own are signalled, and all callback functions have been called. * @@ -199,8 +201,9 @@ nouveau_cli_fini(struct nouveau_cli *cli) WARN_ON(!list_empty(&cli->worker)); usif_client_fini(cli); - nouveau_uvmm_fini(&cli->uvmm); - nouveau_sched_entity_fini(&cli->sched_entity); + nouveau_sched_fini(&cli->sched); + if (uvmm) + nouveau_uvmm_fini(uvmm); nouveau_vmm_fini(&cli->svm); nouveau_vmm_fini(&cli->vmm); nvif_mmu_dtor(&cli->mmu); @@ -307,8 +310,17 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname, cli->mem = &mems[ret]; - ret = nouveau_sched_entity_init(&cli->sched_entity, &drm->sched, - drm->sched_wq); + /* Don't pass in the (shared) sched_wq in order to let + * nouveau_sched_init() create a dedicated one for VM_BIND jobs. + * + * This is required to ensure that for VM_BIND jobs free_job() work and + * run_job() work can always run concurrently and hence, free_job() work + * can never stall run_job() work. For EXEC jobs we don't have this + * requirement, since EXEC job's free_job() does not require to take any + * locks which indirectly or directly are held for allocations + * elsewhere. + */ + ret = nouveau_sched_init(&cli->sched, drm, NULL, 1); if (ret) goto done; @@ -579,13 +591,16 @@ nouveau_drm_device_init(struct drm_device *dev) nvif_parent_ctor(&nouveau_parent, &drm->parent); drm->master.base.object.parent = &drm->parent; - ret = nouveau_sched_init(drm); - if (ret) + drm->sched_wq = alloc_workqueue("nouveau_sched_wq_shared", 0, + WQ_MAX_ACTIVE); + if (!drm->sched_wq) { + ret = -ENOMEM; goto fail_alloc; + } ret = nouveau_cli_init(drm, "DRM-master", &drm->master); if (ret) - goto fail_sched; + goto fail_wq; ret = nouveau_cli_init(drm, "DRM", &drm->client); if (ret) @@ -655,8 +670,8 @@ fail_ttm: nouveau_cli_fini(&drm->client); fail_master: nouveau_cli_fini(&drm->master); -fail_sched: - nouveau_sched_fini(drm); +fail_wq: + destroy_workqueue(drm->sched_wq); fail_alloc: nvif_parent_dtor(&drm->parent); kfree(drm); @@ -708,10 +723,9 @@ nouveau_drm_device_fini(struct drm_device *dev) } mutex_unlock(&drm->clients_lock); - nouveau_sched_fini(drm); - nouveau_cli_fini(&drm->client); nouveau_cli_fini(&drm->master); + destroy_workqueue(drm->sched_wq); nvif_parent_dtor(&drm->parent); mutex_destroy(&drm->clients_lock); kfree(drm); diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index e73a233c6572..8a6d94c8b163 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -93,9 +93,12 @@ struct nouveau_cli { struct nvif_mmu mmu; struct nouveau_vmm vmm; struct nouveau_vmm svm; - struct nouveau_uvmm uvmm; + struct { + struct nouveau_uvmm *ptr; + bool disabled; + } uvmm; - struct nouveau_sched_entity sched_entity; + struct nouveau_sched sched; const struct nvif_mclass *mem; @@ -121,10 +124,7 @@ struct nouveau_cli_work { static inline struct nouveau_uvmm * nouveau_cli_uvmm(struct nouveau_cli *cli) { - if (!cli || !cli->uvmm.vmm.cli) - return NULL; - - return &cli->uvmm; + return cli ? cli->uvmm.ptr : NULL; } static inline struct nouveau_uvmm * @@ -258,6 +258,9 @@ struct nouveau_drm { u64 context_base; } *runl; + /* Workqueue used for channel schedulers. */ + struct workqueue_struct *sched_wq; + /* context for accelerated drm-internal operations */ struct nouveau_channel *cechan; struct nouveau_channel *channel; @@ -298,10 +301,6 @@ struct nouveau_drm { struct mutex lock; bool component_registered; } audio; - - struct drm_gpu_scheduler sched; - struct workqueue_struct *sched_wq; - }; static inline struct nouveau_drm * diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c index 9a5ef574744b..bc5d71b79ab2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.c +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: MIT -#include <drm/drm_exec.h> - #include "nouveau_drv.h" #include "nouveau_gem.h" #include "nouveau_mem.h" @@ -86,14 +84,12 @@ */ static int -nouveau_exec_job_submit(struct nouveau_job *job) +nouveau_exec_job_submit(struct nouveau_job *job, + struct drm_gpuvm_exec *vme) { struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); struct nouveau_cli *cli = job->cli; struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli); - struct drm_exec *exec = &job->exec; - struct drm_gem_object *obj; - unsigned long index; int ret; /* Create a new fence, but do not emit yet. */ @@ -102,52 +98,29 @@ nouveau_exec_job_submit(struct nouveau_job *job) return ret; nouveau_uvmm_lock(uvmm); - drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT | - DRM_EXEC_IGNORE_DUPLICATES); - drm_exec_until_all_locked(exec) { - struct drm_gpuva *va; - - drm_gpuvm_for_each_va(va, &uvmm->base) { - if (unlikely(va == &uvmm->base.kernel_alloc_node)) - continue; - - ret = drm_exec_prepare_obj(exec, va->gem.obj, 1); - drm_exec_retry_on_contention(exec); - if (ret) - goto err_uvmm_unlock; - } + ret = drm_gpuvm_exec_lock(vme); + if (ret) { + nouveau_uvmm_unlock(uvmm); + return ret; } nouveau_uvmm_unlock(uvmm); - drm_exec_for_each_locked_object(exec, index, obj) { - struct nouveau_bo *nvbo = nouveau_gem_object(obj); - - ret = nouveau_bo_validate(nvbo, true, false); - if (ret) - goto err_exec_fini; + ret = drm_gpuvm_exec_validate(vme); + if (ret) { + drm_gpuvm_exec_unlock(vme); + return ret; } return 0; - -err_uvmm_unlock: - nouveau_uvmm_unlock(uvmm); -err_exec_fini: - drm_exec_fini(exec); - return ret; - } static void -nouveau_exec_job_armed_submit(struct nouveau_job *job) +nouveau_exec_job_armed_submit(struct nouveau_job *job, + struct drm_gpuvm_exec *vme) { - struct drm_exec *exec = &job->exec; - struct drm_gem_object *obj; - unsigned long index; - - drm_exec_for_each_locked_object(exec, index, obj) - dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage); - - drm_exec_fini(exec); + drm_gpuvm_exec_resv_add_fence(vme, job->done_fence, + job->resv_usage, job->resv_usage); + drm_gpuvm_exec_unlock(vme); } static struct dma_fence * @@ -192,6 +165,7 @@ nouveau_exec_job_free(struct nouveau_job *job) { struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); + nouveau_job_done(job); nouveau_job_free(job); kfree(exec_job->fence); @@ -211,8 +185,6 @@ nouveau_exec_job_timeout(struct nouveau_job *job) NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n", chan->chid); - nouveau_sched_entity_fini(job->entity); - return DRM_GPU_SCHED_STAT_NOMINAL; } @@ -259,10 +231,12 @@ nouveau_exec_job_init(struct nouveau_exec_job **pjob, } } + args.file_priv = __args->file_priv; job->chan = __args->chan; - args.sched_entity = __args->sched_entity; - args.file_priv = __args->file_priv; + args.sched = __args->sched; + /* Plus one to account for the HW fence. */ + args.credits = job->push.count + 1; args.in_sync.count = __args->in_sync.count; args.in_sync.s = __args->in_sync.s; @@ -415,7 +389,7 @@ nouveau_exec_ioctl_exec(struct drm_device *dev, if (ret) goto out; - args.sched_entity = &chan16->sched_entity; + args.sched = &chan16->sched; args.file_priv = file_priv; args.chan = chan; diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.h b/drivers/gpu/drm/nouveau/nouveau_exec.h index 5488d337bcc0..9b3b151facfd 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.h +++ b/drivers/gpu/drm/nouveau/nouveau_exec.h @@ -3,16 +3,12 @@ #ifndef __NOUVEAU_EXEC_H__ #define __NOUVEAU_EXEC_H__ -#include <drm/drm_exec.h> - #include "nouveau_drv.h" #include "nouveau_sched.h" struct nouveau_exec_job_args { struct drm_file *file_priv; - struct nouveau_sched_entity *sched_entity; - - struct drm_exec exec; + struct nouveau_sched *sched; struct nouveau_channel *chan; struct { diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index a0d303e5ce3d..49c2bcbef129 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -111,7 +111,8 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv) if (vmm->vmm.object.oclass < NVIF_CLASS_VMM_NV50) return 0; - if (nvbo->no_share && uvmm && &uvmm->resv != nvbo->bo.base.resv) + if (nvbo->no_share && uvmm && + drm_gpuvm_resv(&uvmm->base) != nvbo->bo.base.resv) return -EPERM; ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL); @@ -245,7 +246,7 @@ nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain, if (unlikely(!uvmm)) return -EINVAL; - resv = &uvmm->resv; + resv = drm_gpuvm_resv(&uvmm->base); } if (!(domain & (NOUVEAU_GEM_DOMAIN_VRAM | NOUVEAU_GEM_DOMAIN_GART))) @@ -288,6 +289,11 @@ nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain, if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) nvbo->valid_domains &= domain; + if (nvbo->no_share) { + nvbo->r_obj = drm_gpuvm_resv_obj(&uvmm->base); + drm_gem_object_get(nvbo->r_obj); + } + *pnvbo = nvbo; return 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.c b/drivers/gpu/drm/nouveau/nouveau_platform.c index 23cd43a7fd19..bf2dc7567ea4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_platform.c +++ b/drivers/gpu/drm/nouveau/nouveau_platform.c @@ -43,11 +43,10 @@ static int nouveau_platform_probe(struct platform_device *pdev) return 0; } -static int nouveau_platform_remove(struct platform_device *pdev) +static void nouveau_platform_remove(struct platform_device *pdev) { struct drm_device *dev = platform_get_drvdata(pdev); nouveau_drm_device_remove(dev); - return 0; } #if IS_ENABLED(CONFIG_OF) @@ -93,5 +92,5 @@ struct platform_driver nouveau_platform_driver = { .of_match_table = of_match_ptr(nouveau_platform_match), }, .probe = nouveau_platform_probe, - .remove = nouveau_platform_remove, + .remove_new = nouveau_platform_remove, }; diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c index 7c376c4ccdcf..dd98f6910f9c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.c +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c @@ -12,30 +12,28 @@ #include "nouveau_abi16.h" #include "nouveau_sched.h" -/* FIXME - * - * We want to make sure that jobs currently executing can't be deferred by - * other jobs competing for the hardware. Otherwise we might end up with job - * timeouts just because of too many clients submitting too many jobs. We don't - * want jobs to time out because of system load, but because of the job being - * too bulky. - * - * For now allow for up to 16 concurrent jobs in flight until we know how many - * rings the hardware can process in parallel. - */ -#define NOUVEAU_SCHED_HW_SUBMISSIONS 16 #define NOUVEAU_SCHED_JOB_TIMEOUT_MS 10000 +/* Starts at 0, since the DRM scheduler interprets those parameters as (initial) + * index to the run-queue array. + */ +enum nouveau_sched_priority { + NOUVEAU_SCHED_PRIORITY_SINGLE = DRM_SCHED_PRIORITY_KERNEL, + NOUVEAU_SCHED_PRIORITY_COUNT, +}; + int nouveau_job_init(struct nouveau_job *job, struct nouveau_job_args *args) { - struct nouveau_sched_entity *entity = args->sched_entity; + struct nouveau_sched *sched = args->sched; int ret; + INIT_LIST_HEAD(&job->entry); + job->file_priv = args->file_priv; job->cli = nouveau_cli(args->file_priv); - job->entity = entity; + job->sched = sched; job->sync = args->sync; job->resv_usage = args->resv_usage; @@ -86,10 +84,10 @@ nouveau_job_init(struct nouveau_job *job, ret = -ENOMEM; goto err_free_objs; } - } - ret = drm_sched_job_init(&job->base, &entity->base, NULL); + ret = drm_sched_job_init(&job->base, &sched->entity, + args->credits, NULL); if (ret) goto err_free_chains; @@ -109,6 +107,27 @@ return ret; } void +nouveau_job_fini(struct nouveau_job *job) +{ + dma_fence_put(job->done_fence); + drm_sched_job_cleanup(&job->base); + + job->ops->free(job); +} + +void +nouveau_job_done(struct nouveau_job *job) +{ + struct nouveau_sched *sched = job->sched; + + spin_lock(&sched->job.list.lock); + list_del(&job->entry); + spin_unlock(&sched->job.list.lock); + + wake_up(&sched->job.wq); +} + +void nouveau_job_free(struct nouveau_job *job) { kfree(job->in_sync.data); @@ -117,13 +136,6 @@ nouveau_job_free(struct nouveau_job *job) kfree(job->out_sync.chains); } -void nouveau_job_fini(struct nouveau_job *job) -{ - dma_fence_put(job->done_fence); - drm_sched_job_cleanup(&job->base); - job->ops->free(job); -} - static int sync_find_fence(struct nouveau_job *job, struct drm_nouveau_sync *sync, @@ -261,8 +273,13 @@ nouveau_job_fence_attach(struct nouveau_job *job) int nouveau_job_submit(struct nouveau_job *job) { - struct nouveau_sched_entity *entity = to_nouveau_sched_entity(job->base.entity); + struct nouveau_sched *sched = job->sched; struct dma_fence *done_fence = NULL; + struct drm_gpuvm_exec vm_exec = { + .vm = &nouveau_cli_uvmm(job->cli)->base, + .flags = DRM_EXEC_IGNORE_DUPLICATES, + .num_fences = 1, + }; int ret; ret = nouveau_job_add_deps(job); @@ -276,46 +293,29 @@ nouveau_job_submit(struct nouveau_job *job) /* Make sure the job appears on the sched_entity's queue in the same * order as it was submitted. */ - mutex_lock(&entity->mutex); + mutex_lock(&sched->mutex); /* Guarantee we won't fail after the submit() callback returned * successfully. */ if (job->ops->submit) { - ret = job->ops->submit(job); + ret = job->ops->submit(job, &vm_exec); if (ret) goto err_cleanup; } + /* Submit was successful; add the job to the schedulers job list. */ + spin_lock(&sched->job.list.lock); + list_add(&job->entry, &sched->job.list.head); + spin_unlock(&sched->job.list.lock); + drm_sched_job_arm(&job->base); job->done_fence = dma_fence_get(&job->base.s_fence->finished); if (job->sync) done_fence = dma_fence_get(job->done_fence); - /* If a sched job depends on a dma-fence from a job from the same GPU - * scheduler instance, but a different scheduler entity, the GPU - * scheduler does only wait for the particular job to be scheduled, - * rather than for the job to fully complete. This is due to the GPU - * scheduler assuming that there is a scheduler instance per ring. - * However, the current implementation, in order to avoid arbitrary - * amounts of kthreads, has a single scheduler instance while scheduler - * entities represent rings. - * - * As a workaround, set the DRM_SCHED_FENCE_DONT_PIPELINE for all - * out-fences in order to force the scheduler to wait for full job - * completion for dependent jobs from different entities and same - * scheduler instance. - * - * There is some work in progress [1] to address the issues of firmware - * schedulers; once it is in-tree the scheduler topology in Nouveau - * should be re-worked accordingly. - * - * [1] https://lore.kernel.org/dri-devel/20230801205103.627779-1-matthew.brost@intel.com/ - */ - set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &job->done_fence->flags); - if (job->ops->armed_submit) - job->ops->armed_submit(job); + job->ops->armed_submit(job, &vm_exec); nouveau_job_fence_attach(job); @@ -326,7 +326,7 @@ nouveau_job_submit(struct nouveau_job *job) drm_sched_entity_push_job(&job->base); - mutex_unlock(&entity->mutex); + mutex_unlock(&sched->mutex); if (done_fence) { dma_fence_wait(done_fence, true); @@ -336,20 +336,13 @@ nouveau_job_submit(struct nouveau_job *job) return 0; err_cleanup: - mutex_unlock(&entity->mutex); + mutex_unlock(&sched->mutex); nouveau_job_fence_attach_cleanup(job); err: job->state = NOUVEAU_JOB_SUBMIT_FAILED; return ret; } -bool -nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity, - struct work_struct *work) -{ - return queue_work(entity->sched_wq, work); -} - static struct dma_fence * nouveau_job_run(struct nouveau_job *job) { @@ -399,50 +392,82 @@ nouveau_sched_free_job(struct drm_sched_job *sched_job) nouveau_job_fini(job); } -int nouveau_sched_entity_init(struct nouveau_sched_entity *entity, - struct drm_gpu_scheduler *sched, - struct workqueue_struct *sched_wq) -{ - mutex_init(&entity->mutex); - spin_lock_init(&entity->job.list.lock); - INIT_LIST_HEAD(&entity->job.list.head); - init_waitqueue_head(&entity->job.wq); - - entity->sched_wq = sched_wq; - return drm_sched_entity_init(&entity->base, - DRM_SCHED_PRIORITY_NORMAL, - &sched, 1, NULL); -} - -void -nouveau_sched_entity_fini(struct nouveau_sched_entity *entity) -{ - drm_sched_entity_destroy(&entity->base); -} - static const struct drm_sched_backend_ops nouveau_sched_ops = { .run_job = nouveau_sched_run_job, .timedout_job = nouveau_sched_timedout_job, .free_job = nouveau_sched_free_job, }; -int nouveau_sched_init(struct nouveau_drm *drm) +int +nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm, + struct workqueue_struct *wq, u32 credit_limit) { - struct drm_gpu_scheduler *sched = &drm->sched; + struct drm_gpu_scheduler *drm_sched = &sched->base; + struct drm_sched_entity *entity = &sched->entity; long job_hang_limit = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS); + int ret; - drm->sched_wq = create_singlethread_workqueue("nouveau_sched_wq"); - if (!drm->sched_wq) - return -ENOMEM; + if (!wq) { + wq = alloc_workqueue("nouveau_sched_wq_%d", 0, WQ_MAX_ACTIVE, + current->pid); + if (!wq) + return -ENOMEM; + + sched->wq = wq; + } - return drm_sched_init(sched, &nouveau_sched_ops, - DRM_SCHED_PRIORITY_COUNT, - NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit, - NULL, NULL, "nouveau_sched", drm->dev->dev); + ret = drm_sched_init(drm_sched, &nouveau_sched_ops, wq, + NOUVEAU_SCHED_PRIORITY_COUNT, + credit_limit, 0, job_hang_limit, + NULL, NULL, "nouveau_sched", drm->dev->dev); + if (ret) + goto fail_wq; + + /* Using DRM_SCHED_PRIORITY_KERNEL, since that's what we're required to use + * when we want to have a single run-queue only. + * + * It's not documented, but one will find out when trying to use any + * other priority running into faults, because the scheduler uses the + * priority as array index. + * + * Can't use NOUVEAU_SCHED_PRIORITY_SINGLE either, because it's not + * matching the enum type used in drm_sched_entity_init(). + */ + ret = drm_sched_entity_init(entity, DRM_SCHED_PRIORITY_KERNEL, + &drm_sched, 1, NULL); + if (ret) + goto fail_sched; + + mutex_init(&sched->mutex); + spin_lock_init(&sched->job.list.lock); + INIT_LIST_HEAD(&sched->job.list.head); + init_waitqueue_head(&sched->job.wq); + + return 0; + +fail_sched: + drm_sched_fini(drm_sched); +fail_wq: + if (sched->wq) + destroy_workqueue(sched->wq); + return ret; } -void nouveau_sched_fini(struct nouveau_drm *drm) +void +nouveau_sched_fini(struct nouveau_sched *sched) { - destroy_workqueue(drm->sched_wq); - drm_sched_fini(&drm->sched); + struct drm_gpu_scheduler *drm_sched = &sched->base; + struct drm_sched_entity *entity = &sched->entity; + + rmb(); /* for list_empty to work without lock */ + wait_event(sched->job.wq, list_empty(&sched->job.list.head)); + + drm_sched_entity_fini(entity); + drm_sched_fini(drm_sched); + + /* Destroy workqueue after scheduler tear down, otherwise it might still + * be in use. + */ + if (sched->wq) + destroy_workqueue(sched->wq); } diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h index 27ac19792597..a6528f5981e6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.h +++ b/drivers/gpu/drm/nouveau/nouveau_sched.h @@ -5,7 +5,7 @@ #include <linux/types.h> -#include <drm/drm_exec.h> +#include <drm/drm_gpuvm.h> #include <drm/gpu_scheduler.h> #include "nouveau_drv.h" @@ -26,7 +26,8 @@ enum nouveau_job_state { struct nouveau_job_args { struct drm_file *file_priv; - struct nouveau_sched_entity *sched_entity; + struct nouveau_sched *sched; + u32 credits; enum dma_resv_usage resv_usage; bool sync; @@ -49,12 +50,12 @@ struct nouveau_job { enum nouveau_job_state state; - struct nouveau_sched_entity *entity; + struct nouveau_sched *sched; + struct list_head entry; struct drm_file *file_priv; struct nouveau_cli *cli; - struct drm_exec exec; enum dma_resv_usage resv_usage; struct dma_fence *done_fence; @@ -76,8 +77,8 @@ struct nouveau_job { /* If .submit() returns without any error, it is guaranteed that * armed_submit() is called. */ - int (*submit)(struct nouveau_job *); - void (*armed_submit)(struct nouveau_job *); + int (*submit)(struct nouveau_job *, struct drm_gpuvm_exec *); + void (*armed_submit)(struct nouveau_job *, struct drm_gpuvm_exec *); struct dma_fence *(*run)(struct nouveau_job *); void (*free)(struct nouveau_job *); enum drm_gpu_sched_stat (*timeout)(struct nouveau_job *); @@ -90,20 +91,17 @@ int nouveau_job_ucopy_syncs(struct nouveau_job_args *args, int nouveau_job_init(struct nouveau_job *job, struct nouveau_job_args *args); -void nouveau_job_free(struct nouveau_job *job); - -int nouveau_job_submit(struct nouveau_job *job); void nouveau_job_fini(struct nouveau_job *job); +int nouveau_job_submit(struct nouveau_job *job); +void nouveau_job_done(struct nouveau_job *job); +void nouveau_job_free(struct nouveau_job *job); -#define to_nouveau_sched_entity(entity) \ - container_of((entity), struct nouveau_sched_entity, base) - -struct nouveau_sched_entity { - struct drm_sched_entity base; +struct nouveau_sched { + struct drm_gpu_scheduler base; + struct drm_sched_entity entity; + struct workqueue_struct *wq; struct mutex mutex; - struct workqueue_struct *sched_wq; - struct { struct { struct list_head head; @@ -113,15 +111,8 @@ struct nouveau_sched_entity { } job; }; -int nouveau_sched_entity_init(struct nouveau_sched_entity *entity, - struct drm_gpu_scheduler *sched, - struct workqueue_struct *sched_wq); -void nouveau_sched_entity_fini(struct nouveau_sched_entity *entity); - -bool nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity, - struct work_struct *work); - -int nouveau_sched_init(struct nouveau_drm *drm); -void nouveau_sched_fini(struct nouveau_drm *drm); +int nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm, + struct workqueue_struct *wq, u32 credit_limit); +void nouveau_sched_fini(struct nouveau_sched *sched); #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index 5cf892c50f43..4f223c972c6a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -62,6 +62,8 @@ struct bind_job_op { enum vm_bind_op op; u32 flags; + struct drm_gpuvm_bo *vm_bo; + struct { u64 addr; u64 range; @@ -436,8 +438,9 @@ nouveau_uvma_region_complete(struct nouveau_uvma_region *reg) static void op_map_prepare_unwind(struct nouveau_uvma *uvma) { + struct drm_gpuva *va = &uvma->va; nouveau_uvma_gem_put(uvma); - drm_gpuva_remove(&uvma->va); + drm_gpuva_remove(va); nouveau_uvma_free(uvma); } @@ -466,6 +469,7 @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, break; case DRM_GPUVA_OP_REMAP: { struct drm_gpuva_op_remap *r = &op->remap; + struct drm_gpuva *va = r->unmap->va; if (r->next) op_map_prepare_unwind(new->next); @@ -473,7 +477,7 @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, if (r->prev) op_map_prepare_unwind(new->prev); - op_unmap_prepare_unwind(r->unmap->va); + op_unmap_prepare_unwind(va); break; } case DRM_GPUVA_OP_UNMAP: @@ -604,6 +608,9 @@ op_unmap_prepare(struct drm_gpuva_op_unmap *u) drm_gpuva_unmap(u); } +/* + * Note: @args should not be NULL when calling for a map operation. + */ static int nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, struct nouveau_uvma_prealloc *new, @@ -624,7 +631,7 @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, if (ret) goto unwind; - if (args && vmm_get_range) { + if (vmm_get_range) { ret = nouveau_uvmm_vmm_get(uvmm, vmm_get_start, vmm_get_range); if (ret) { @@ -632,6 +639,7 @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, goto unwind; } } + break; } case DRM_GPUVA_OP_REMAP: { @@ -929,25 +937,13 @@ nouveau_uvmm_sm_unmap_cleanup(struct nouveau_uvmm *uvmm, static int nouveau_uvmm_validate_range(struct nouveau_uvmm *uvmm, u64 addr, u64 range) { - u64 end = addr + range; - u64 kernel_managed_end = uvmm->kernel_managed_addr + - uvmm->kernel_managed_size; - if (addr & ~PAGE_MASK) return -EINVAL; if (range & ~PAGE_MASK) return -EINVAL; - if (end <= addr) - return -EINVAL; - - if (addr < NOUVEAU_VA_SPACE_START || - end > NOUVEAU_VA_SPACE_END) - return -EINVAL; - - if (addr < kernel_managed_end && - end > uvmm->kernel_managed_addr) + if (!drm_gpuvm_range_valid(&uvmm->base, addr, range)) return -EINVAL; return 0; @@ -970,6 +966,12 @@ nouveau_uvmm_bind_job_free(struct kref *kref) { struct nouveau_uvmm_bind_job *job = container_of(kref, struct nouveau_uvmm_bind_job, kref); + struct bind_job_op *op, *next; + + list_for_each_op_safe(op, next, &job->ops) { + list_del(&op->entry); + kfree(op); + } nouveau_job_free(&job->base); kfree(job); @@ -1011,14 +1013,16 @@ bind_validate_op(struct nouveau_job *job, static void bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range) { - struct nouveau_uvmm_bind_job *bind_job; - struct nouveau_sched_entity *entity = job->entity; + struct nouveau_sched *sched = job->sched; + struct nouveau_job *__job; struct bind_job_op *op; u64 end = addr + range; again: - spin_lock(&entity->job.list.lock); - list_for_each_entry(bind_job, &entity->job.list.head, entry) { + spin_lock(&sched->job.list.lock); + list_for_each_entry(__job, &sched->job.list.head, entry) { + struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(__job); + list_for_each_op(op, &bind_job->ops) { if (op->op == OP_UNMAP) { u64 op_addr = op->va.addr; @@ -1026,7 +1030,7 @@ again: if (!(end <= op_addr || addr >= op_end)) { nouveau_uvmm_bind_job_get(bind_job); - spin_unlock(&entity->job.list.lock); + spin_unlock(&sched->job.list.lock); wait_for_completion(&bind_job->complete); nouveau_uvmm_bind_job_put(bind_job); goto again; @@ -1034,7 +1038,7 @@ again: } } } - spin_unlock(&entity->job.list.lock); + spin_unlock(&sched->job.list.lock); } static int @@ -1113,22 +1117,28 @@ bind_validate_region(struct nouveau_job *job) } static void -bind_link_gpuvas(struct drm_gpuva_ops *ops, struct nouveau_uvma_prealloc *new) +bind_link_gpuvas(struct bind_job_op *bop) { + struct nouveau_uvma_prealloc *new = &bop->new; + struct drm_gpuvm_bo *vm_bo = bop->vm_bo; + struct drm_gpuva_ops *ops = bop->ops; struct drm_gpuva_op *op; drm_gpuva_for_each_op(op, ops) { switch (op->op) { case DRM_GPUVA_OP_MAP: - drm_gpuva_link(&new->map->va); + drm_gpuva_link(&new->map->va, vm_bo); break; - case DRM_GPUVA_OP_REMAP: + case DRM_GPUVA_OP_REMAP: { + struct drm_gpuva *va = op->remap.unmap->va; + if (op->remap.prev) - drm_gpuva_link(&new->prev->va); + drm_gpuva_link(&new->prev->va, va->vm_bo); if (op->remap.next) - drm_gpuva_link(&new->next->va); - drm_gpuva_unlink(op->remap.unmap->va); + drm_gpuva_link(&new->next->va, va->vm_bo); + drm_gpuva_unlink(va); break; + } case DRM_GPUVA_OP_UNMAP: drm_gpuva_unlink(op->unmap.va); break; @@ -1139,21 +1149,70 @@ bind_link_gpuvas(struct drm_gpuva_ops *ops, struct nouveau_uvma_prealloc *new) } static int -nouveau_uvmm_bind_job_submit(struct nouveau_job *job) +bind_lock_validate(struct nouveau_job *job, struct drm_exec *exec, + unsigned int num_fences) +{ + struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job); + struct bind_job_op *op; + int ret; + + list_for_each_op(op, &bind_job->ops) { + struct drm_gpuva_op *va_op; + + if (!op->ops) + continue; + + drm_gpuva_for_each_op(va_op, op->ops) { + struct drm_gem_object *obj = op_gem_obj(va_op); + + if (unlikely(!obj)) + continue; + + ret = drm_exec_prepare_obj(exec, obj, num_fences); + if (ret) + return ret; + + /* Don't validate GEMs backing mappings we're about to + * unmap, it's not worth the effort. + */ + if (va_op->op == DRM_GPUVA_OP_UNMAP) + continue; + + ret = nouveau_bo_validate(nouveau_gem_object(obj), + true, false); + if (ret) + return ret; + } + } + + return 0; +} + +static int +nouveau_uvmm_bind_job_submit(struct nouveau_job *job, + struct drm_gpuvm_exec *vme) { struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli); struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job); - struct nouveau_sched_entity *entity = job->entity; - struct drm_exec *exec = &job->exec; + struct drm_exec *exec = &vme->exec; struct bind_job_op *op; int ret; list_for_each_op(op, &bind_job->ops) { if (op->op == OP_MAP) { - op->gem.obj = drm_gem_object_lookup(job->file_priv, - op->gem.handle); - if (!op->gem.obj) + struct drm_gem_object *obj = op->gem.obj = + drm_gem_object_lookup(job->file_priv, + op->gem.handle); + if (!obj) return -ENOENT; + + dma_resv_lock(obj->resv, NULL); + op->vm_bo = drm_gpuvm_bo_obtain(&uvmm->base, obj); + dma_resv_unlock(obj->resv); + if (IS_ERR(op->vm_bo)) + return PTR_ERR(op->vm_bo); + + drm_gpuvm_bo_extobj_add(op->vm_bo); } ret = bind_validate_op(job, op); @@ -1176,6 +1235,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job) * unwind all GPU VA space changes on failure. */ nouveau_uvmm_lock(uvmm); + list_for_each_op(op, &bind_job->ops) { switch (op->op) { case OP_MAP_SPARSE: @@ -1287,55 +1347,13 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job) } } - drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT | - DRM_EXEC_IGNORE_DUPLICATES); + drm_exec_init(exec, vme->flags, 0); drm_exec_until_all_locked(exec) { - list_for_each_op(op, &bind_job->ops) { - struct drm_gpuva_op *va_op; - - if (IS_ERR_OR_NULL(op->ops)) - continue; - - drm_gpuva_for_each_op(va_op, op->ops) { - struct drm_gem_object *obj = op_gem_obj(va_op); - - if (unlikely(!obj)) - continue; - - ret = drm_exec_prepare_obj(exec, obj, 1); - drm_exec_retry_on_contention(exec); - if (ret) { - op = list_last_op(&bind_job->ops); - goto unwind; - } - } - } - } - - list_for_each_op(op, &bind_job->ops) { - struct drm_gpuva_op *va_op; - - if (IS_ERR_OR_NULL(op->ops)) - continue; - - drm_gpuva_for_each_op(va_op, op->ops) { - struct drm_gem_object *obj = op_gem_obj(va_op); - - if (unlikely(!obj)) - continue; - - /* Don't validate GEMs backing mappings we're about to - * unmap, it's not worth the effort. - */ - if (unlikely(va_op->op == DRM_GPUVA_OP_UNMAP)) - continue; - - ret = nouveau_bo_validate(nouveau_gem_object(obj), - true, false); - if (ret) { - op = list_last_op(&bind_job->ops); - goto unwind; - } + ret = bind_lock_validate(job, exec, vme->num_fences); + drm_exec_retry_on_contention(exec); + if (ret) { + op = list_last_op(&bind_job->ops); + goto unwind; } } @@ -1364,7 +1382,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job) case OP_UNMAP_SPARSE: case OP_MAP: case OP_UNMAP: - bind_link_gpuvas(op->ops, &op->new); + bind_link_gpuvas(op); break; default: break; @@ -1372,10 +1390,6 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job) } nouveau_uvmm_unlock(uvmm); - spin_lock(&entity->job.list.lock); - list_add(&bind_job->entry, &entity->job.list.head); - spin_unlock(&entity->job.list.lock); - return 0; unwind_continue: @@ -1410,21 +1424,17 @@ unwind: } nouveau_uvmm_unlock(uvmm); - drm_exec_fini(exec); + drm_gpuvm_exec_unlock(vme); return ret; } static void -nouveau_uvmm_bind_job_armed_submit(struct nouveau_job *job) +nouveau_uvmm_bind_job_armed_submit(struct nouveau_job *job, + struct drm_gpuvm_exec *vme) { - struct drm_exec *exec = &job->exec; - struct drm_gem_object *obj; - unsigned long index; - - drm_exec_for_each_locked_object(exec, index, obj) - dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage); - - drm_exec_fini(exec); + drm_gpuvm_exec_resv_add_fence(vme, job->done_fence, + job->resv_usage, job->resv_usage); + drm_gpuvm_exec_unlock(vme); } static struct dma_fence * @@ -1462,14 +1472,11 @@ out: } static void -nouveau_uvmm_bind_job_free_work_fn(struct work_struct *work) +nouveau_uvmm_bind_job_cleanup(struct nouveau_job *job) { - struct nouveau_uvmm_bind_job *bind_job = - container_of(work, struct nouveau_uvmm_bind_job, work); - struct nouveau_job *job = &bind_job->base; + struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job); struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli); - struct nouveau_sched_entity *entity = job->entity; - struct bind_job_op *op, *next; + struct bind_job_op *op; list_for_each_op(op, &bind_job->ops) { struct drm_gem_object *obj = op->gem.obj; @@ -1511,42 +1518,27 @@ nouveau_uvmm_bind_job_free_work_fn(struct work_struct *work) if (!IS_ERR_OR_NULL(op->ops)) drm_gpuva_ops_free(&uvmm->base, op->ops); + if (!IS_ERR_OR_NULL(op->vm_bo)) { + dma_resv_lock(obj->resv, NULL); + drm_gpuvm_bo_put(op->vm_bo); + dma_resv_unlock(obj->resv); + } + if (obj) drm_gem_object_put(obj); } - spin_lock(&entity->job.list.lock); - list_del(&bind_job->entry); - spin_unlock(&entity->job.list.lock); - + nouveau_job_done(job); complete_all(&bind_job->complete); - wake_up(&entity->job.wq); - - /* Remove and free ops after removing the bind job from the job list to - * avoid races against bind_validate_map_sparse(). - */ - list_for_each_op_safe(op, next, &bind_job->ops) { - list_del(&op->entry); - kfree(op); - } nouveau_uvmm_bind_job_put(bind_job); } -static void -nouveau_uvmm_bind_job_free_qwork(struct nouveau_job *job) -{ - struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job); - struct nouveau_sched_entity *entity = job->entity; - - nouveau_sched_entity_qwork(entity, &bind_job->work); -} - static struct nouveau_job_ops nouveau_bind_job_ops = { .submit = nouveau_uvmm_bind_job_submit, .armed_submit = nouveau_uvmm_bind_job_armed_submit, .run = nouveau_uvmm_bind_job_run, - .free = nouveau_uvmm_bind_job_free_qwork, + .free = nouveau_uvmm_bind_job_cleanup, }; static int @@ -1607,7 +1599,6 @@ nouveau_uvmm_bind_job_init(struct nouveau_uvmm_bind_job **pjob, return ret; INIT_LIST_HEAD(&job->ops); - INIT_LIST_HEAD(&job->entry); for (i = 0; i < __args->op.count; i++) { ret = bind_job_op_from_uop(&op, &__args->op.s[i]); @@ -1618,11 +1609,12 @@ nouveau_uvmm_bind_job_init(struct nouveau_uvmm_bind_job **pjob, } init_completion(&job->complete); - INIT_WORK(&job->work, nouveau_uvmm_bind_job_free_work_fn); - args.sched_entity = __args->sched_entity; args.file_priv = __args->file_priv; + args.sched = __args->sched; + args.credits = 1; + args.in_sync.count = __args->in_sync.count; args.in_sync.s = __args->in_sync.s; @@ -1648,18 +1640,6 @@ err_free: return ret; } -int -nouveau_uvmm_ioctl_vm_init(struct drm_device *dev, - void *data, - struct drm_file *file_priv) -{ - struct nouveau_cli *cli = nouveau_cli(file_priv); - struct drm_nouveau_vm_init *init = data; - - return nouveau_uvmm_init(&cli->uvmm, cli, init->kernel_managed_addr, - init->kernel_managed_size); -} - static int nouveau_uvmm_vm_bind(struct nouveau_uvmm_bind_job_args *args) { @@ -1760,7 +1740,7 @@ nouveau_uvmm_ioctl_vm_bind(struct drm_device *dev, if (ret) return ret; - args.sched_entity = &cli->sched_entity; + args.sched = &cli->sched; args.file_priv = file_priv; ret = nouveau_uvmm_vm_bind(&args); @@ -1776,15 +1756,18 @@ void nouveau_uvmm_bo_map_all(struct nouveau_bo *nvbo, struct nouveau_mem *mem) { struct drm_gem_object *obj = &nvbo->bo.base; + struct drm_gpuvm_bo *vm_bo; struct drm_gpuva *va; dma_resv_assert_held(obj->resv); - drm_gem_for_each_gpuva(va, obj) { - struct nouveau_uvma *uvma = uvma_from_va(va); + drm_gem_for_each_gpuvm_bo(vm_bo, obj) { + drm_gpuvm_bo_for_each_va(va, vm_bo) { + struct nouveau_uvma *uvma = uvma_from_va(va); - nouveau_uvma_map(uvma, mem); - drm_gpuva_invalidate(va, false); + nouveau_uvma_map(uvma, mem); + drm_gpuva_invalidate(va, false); + } } } @@ -1792,29 +1775,62 @@ void nouveau_uvmm_bo_unmap_all(struct nouveau_bo *nvbo) { struct drm_gem_object *obj = &nvbo->bo.base; + struct drm_gpuvm_bo *vm_bo; struct drm_gpuva *va; dma_resv_assert_held(obj->resv); - drm_gem_for_each_gpuva(va, obj) { - struct nouveau_uvma *uvma = uvma_from_va(va); + drm_gem_for_each_gpuvm_bo(vm_bo, obj) { + drm_gpuvm_bo_for_each_va(va, vm_bo) { + struct nouveau_uvma *uvma = uvma_from_va(va); - nouveau_uvma_unmap(uvma); - drm_gpuva_invalidate(va, true); + nouveau_uvma_unmap(uvma); + drm_gpuva_invalidate(va, true); + } } } +static void +nouveau_uvmm_free(struct drm_gpuvm *gpuvm) +{ + struct nouveau_uvmm *uvmm = uvmm_from_gpuvm(gpuvm); + + kfree(uvmm); +} + +static int +nouveau_uvmm_bo_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) +{ + struct nouveau_bo *nvbo = nouveau_gem_object(vm_bo->obj); + + return nouveau_bo_validate(nvbo, true, false); +} + +static const struct drm_gpuvm_ops gpuvm_ops = { + .vm_free = nouveau_uvmm_free, + .vm_bo_validate = nouveau_uvmm_bo_validate, +}; + int -nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli, - u64 kernel_managed_addr, u64 kernel_managed_size) +nouveau_uvmm_ioctl_vm_init(struct drm_device *dev, + void *data, + struct drm_file *file_priv) { + struct nouveau_uvmm *uvmm; + struct nouveau_cli *cli = nouveau_cli(file_priv); + struct drm_device *drm = cli->drm->dev; + struct drm_gem_object *r_obj; + struct drm_nouveau_vm_init *init = data; + u64 kernel_managed_end; int ret; - u64 kernel_managed_end = kernel_managed_addr + kernel_managed_size; - mutex_init(&uvmm->mutex); - dma_resv_init(&uvmm->resv); - mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN); - mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex); + if (check_add_overflow(init->kernel_managed_addr, + init->kernel_managed_size, + &kernel_managed_end)) + return -EINVAL; + + if (kernel_managed_end > NOUVEAU_VA_SPACE_END) + return -EINVAL; mutex_lock(&cli->mutex); @@ -1823,39 +1839,48 @@ nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli, goto out_unlock; } - if (kernel_managed_end <= kernel_managed_addr) { - ret = -EINVAL; + uvmm = kzalloc(sizeof(*uvmm), GFP_KERNEL); + if (!uvmm) { + ret = -ENOMEM; goto out_unlock; } - if (kernel_managed_end > NOUVEAU_VA_SPACE_END) { - ret = -EINVAL; + r_obj = drm_gpuvm_resv_object_alloc(drm); + if (!r_obj) { + kfree(uvmm); + ret = -ENOMEM; goto out_unlock; } - uvmm->kernel_managed_addr = kernel_managed_addr; - uvmm->kernel_managed_size = kernel_managed_size; + mutex_init(&uvmm->mutex); + mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN); + mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex); - drm_gpuvm_init(&uvmm->base, cli->name, + drm_gpuvm_init(&uvmm->base, cli->name, 0, drm, r_obj, NOUVEAU_VA_SPACE_START, NOUVEAU_VA_SPACE_END, - kernel_managed_addr, kernel_managed_size, - NULL); + init->kernel_managed_addr, + init->kernel_managed_size, + &gpuvm_ops); + /* GPUVM takes care from here on. */ + drm_gem_object_put(r_obj); ret = nvif_vmm_ctor(&cli->mmu, "uvmm", cli->vmm.vmm.object.oclass, RAW, - kernel_managed_addr, kernel_managed_size, - NULL, 0, &cli->uvmm.vmm.vmm); + init->kernel_managed_addr, + init->kernel_managed_size, + NULL, 0, &uvmm->vmm.vmm); if (ret) - goto out_free_gpuva_mgr; + goto out_gpuvm_fini; - cli->uvmm.vmm.cli = cli; + uvmm->vmm.cli = cli; + cli->uvmm.ptr = uvmm; mutex_unlock(&cli->mutex); return 0; -out_free_gpuva_mgr: - drm_gpuvm_destroy(&uvmm->base); +out_gpuvm_fini: + drm_gpuvm_put(&uvmm->base); out_unlock: mutex_unlock(&cli->mutex); return ret; @@ -1867,15 +1892,8 @@ nouveau_uvmm_fini(struct nouveau_uvmm *uvmm) MA_STATE(mas, &uvmm->region_mt, 0, 0); struct nouveau_uvma_region *reg; struct nouveau_cli *cli = uvmm->vmm.cli; - struct nouveau_sched_entity *entity = &cli->sched_entity; struct drm_gpuva *va, *next; - if (!cli) - return; - - rmb(); /* for list_empty to work without lock */ - wait_event(entity->job.wq, list_empty(&entity->job.list.head)); - nouveau_uvmm_lock(uvmm); drm_gpuvm_for_each_va_safe(va, next, &uvmm->base) { struct nouveau_uvma *uvma = uvma_from_va(va); @@ -1910,8 +1928,6 @@ nouveau_uvmm_fini(struct nouveau_uvmm *uvmm) mutex_lock(&cli->mutex); nouveau_vmm_fini(&uvmm->vmm); - drm_gpuvm_destroy(&uvmm->base); + drm_gpuvm_put(&uvmm->base); mutex_unlock(&cli->mutex); - - dma_resv_fini(&uvmm->resv); } diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.h b/drivers/gpu/drm/nouveau/nouveau_uvmm.h index a308c59760a5..9d3c348581eb 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h @@ -12,12 +12,6 @@ struct nouveau_uvmm { struct nouveau_vmm vmm; struct maple_tree region_mt; struct mutex mutex; - struct dma_resv resv; - - u64 kernel_managed_addr; - u64 kernel_managed_size; - - bool disabled; }; struct nouveau_uvma_region { @@ -50,8 +44,6 @@ struct nouveau_uvmm_bind_job { struct nouveau_job base; struct kref kref; - struct list_head entry; - struct work_struct work; struct completion complete; /* struct bind_job_op */ @@ -60,7 +52,7 @@ struct nouveau_uvmm_bind_job { struct nouveau_uvmm_bind_job_args { struct drm_file *file_priv; - struct nouveau_sched_entity *sched_entity; + struct nouveau_sched *sched; unsigned int flags; @@ -82,8 +74,6 @@ struct nouveau_uvmm_bind_job_args { #define to_uvmm_bind_job(job) container_of((job), struct nouveau_uvmm_bind_job, base) -int nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli, - u64 kernel_managed_addr, u64 kernel_managed_size); void nouveau_uvmm_fini(struct nouveau_uvmm *uvmm); void nouveau_uvmm_bo_map_all(struct nouveau_bo *nvbov, struct nouveau_mem *mem); diff --git a/drivers/gpu/drm/nouveau/nv04_fence.c b/drivers/gpu/drm/nouveau/nv04_fence.c index 5b71a5a5cd85..cdbc75e3d1f6 100644 --- a/drivers/gpu/drm/nouveau/nv04_fence.c +++ b/drivers/gpu/drm/nouveau/nv04_fence.c @@ -39,7 +39,7 @@ struct nv04_fence_priv { static int nv04_fence_emit(struct nouveau_fence *fence) { - struct nvif_push *push = fence->channel->chan.push; + struct nvif_push *push = unrcu_pointer(fence->channel)->chan.push; int ret = PUSH_WAIT(push, 2); if (ret == 0) { PUSH_NVSQ(push, NV_SW, 0x0150, fence->base.seqno); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c index 87a62d4ff4bd..7d4716dcd512 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c @@ -24,7 +24,6 @@ #include "chan.h" #include "chid.h" #include "cgrp.h" -#include "chid.h" #include "runl.h" #include "priv.h" diff --git a/drivers/gpu/drm/omapdrm/dss/dispc.c b/drivers/gpu/drm/omapdrm/dss/dispc.c index c26aab4939fa..993691b3cc7e 100644 --- a/drivers/gpu/drm/omapdrm/dss/dispc.c +++ b/drivers/gpu/drm/omapdrm/dss/dispc.c @@ -22,11 +22,11 @@ #include <linux/hardirq.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <linux/property.h> #include <linux/sizes.h> #include <linux/mfd/syscon.h> #include <linux/regmap.h> #include <linux/of.h> -#include <linux/of_device.h> #include <linux/component.h> #include <linux/sys_soc.h> #include <drm/drm_fourcc.h> @@ -4765,7 +4765,7 @@ static int dispc_bind(struct device *dev, struct device *master, void *data) if (soc) dispc->feat = soc->data; else - dispc->feat = of_match_device(dispc_of_match, &pdev->dev)->data; + dispc->feat = device_get_match_data(&pdev->dev); r = dispc_errata_i734_wa_init(dispc); if (r) diff --git a/drivers/gpu/drm/omapdrm/dss/dss.c b/drivers/gpu/drm/omapdrm/dss/dss.c index 02955f976845..988888e164d7 100644 --- a/drivers/gpu/drm/omapdrm/dss/dss.c +++ b/drivers/gpu/drm/omapdrm/dss/dss.c @@ -22,12 +22,13 @@ #include <linux/pinctrl/consumer.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <linux/property.h> #include <linux/gfp.h> #include <linux/sizes.h> #include <linux/mfd/syscon.h> #include <linux/regmap.h> #include <linux/of.h> -#include <linux/of_device.h> +#include <linux/of_platform.h> #include <linux/of_graph.h> #include <linux/regulator/consumer.h> #include <linux/suspend.h> @@ -1445,7 +1446,7 @@ static int dss_probe(struct platform_device *pdev) if (soc) dss->feat = soc->data; else - dss->feat = of_match_device(dss_of_match, &pdev->dev)->data; + dss->feat = device_get_match_data(&pdev->dev); /* Map I/O registers, get and setup clocks. */ dss->base = devm_platform_ioremap_resource(pdev, 0); diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c index b2835b3ea6f5..6598c9c08ba1 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.c +++ b/drivers/gpu/drm/omapdrm/omap_drv.c @@ -69,7 +69,6 @@ static void omap_atomic_commit_tail(struct drm_atomic_state *old_state) { struct drm_device *dev = old_state->dev; struct omap_drm_private *priv = dev->dev_private; - bool fence_cookie = dma_fence_begin_signalling(); dispc_runtime_get(priv->dispc); @@ -92,6 +91,8 @@ static void omap_atomic_commit_tail(struct drm_atomic_state *old_state) omap_atomic_wait_for_completion(dev, old_state); drm_atomic_helper_commit_planes(dev, old_state, 0); + + drm_atomic_helper_commit_hw_done(old_state); } else { /* * OMAP3 DSS seems to have issues with the work-around above, @@ -101,11 +102,9 @@ static void omap_atomic_commit_tail(struct drm_atomic_state *old_state) drm_atomic_helper_commit_planes(dev, old_state, 0); drm_atomic_helper_commit_modeset_enables(dev, old_state); - } - drm_atomic_helper_commit_hw_done(old_state); - - dma_fence_end_signalling(fence_cookie); + drm_atomic_helper_commit_hw_done(old_state); + } /* * Wait for completion of the page flips to ensure that old buffers diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index c48fa531ca32..3421e8389222 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -48,7 +48,7 @@ struct omap_gem_object { * OMAP_BO_MEM_DMA_API flag set) * * - buffers imported from dmabuf (with the OMAP_BO_MEM_DMABUF flag set) - * if they are physically contiguous (when sgt->orig_nents == 1) + * if they are physically contiguous * * - buffers mapped through the TILER when pin_cnt is not zero, in which * case the DMA address points to the TILER aperture @@ -148,12 +148,18 @@ u64 omap_gem_mmap_offset(struct drm_gem_object *obj) return drm_vma_node_offset_addr(&obj->vma_node); } +static bool omap_gem_sgt_is_contiguous(struct sg_table *sgt, size_t size) +{ + return !(drm_prime_get_contiguous_size(sgt) < size); +} + static bool omap_gem_is_contiguous(struct omap_gem_object *omap_obj) { if (omap_obj->flags & OMAP_BO_MEM_DMA_API) return true; - if ((omap_obj->flags & OMAP_BO_MEM_DMABUF) && omap_obj->sgt->nents == 1) + if ((omap_obj->flags & OMAP_BO_MEM_DMABUF) && + omap_gem_sgt_is_contiguous(omap_obj->sgt, omap_obj->base.size)) return true; return false; @@ -1385,7 +1391,7 @@ struct drm_gem_object *omap_gem_new_dmabuf(struct drm_device *dev, size_t size, union omap_gem_size gsize; /* Without a DMM only physically contiguous buffers can be supported. */ - if (sgt->orig_nents != 1 && !priv->has_dmm) + if (!omap_gem_sgt_is_contiguous(sgt, size) && !priv->has_dmm) return ERR_PTR(-EINVAL); gsize.bytes = PAGE_ALIGN(size); @@ -1399,7 +1405,7 @@ struct drm_gem_object *omap_gem_new_dmabuf(struct drm_device *dev, size_t size, omap_obj->sgt = sgt; - if (sgt->orig_nents == 1) { + if (omap_gem_sgt_is_contiguous(sgt, size)) { omap_obj->dma_addr = sg_dma_address(sgt->sgl); } else { /* Create pages list from sgt */ diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index 99e14dc212ec..dad938cf6dec 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -194,6 +194,15 @@ config DRM_PANEL_ILITEK_ILI9341 QVGA (240x320) RGB panels. support serial & parallel rgb interface. +config DRM_PANEL_ILITEK_ILI9805 + tristate "Ilitek ILI9805-based panels" + depends on OF + depends on DRM_MIPI_DSI + depends on BACKLIGHT_CLASS_DEVICE + help + Say Y if you want to enable support for panels based on the + Ilitek ILI9805 controller. + config DRM_PANEL_ILITEK_ILI9881C tristate "Ilitek ILI9881C-based panels" depends on OF @@ -735,6 +744,15 @@ config DRM_PANEL_SITRONIX_ST7789V Say Y here if you want to enable support for the Sitronix ST7789V controller for 240x320 LCD panels +config DRM_PANEL_SYNAPTICS_R63353 + tristate "Synaptics R63353-based panels" + depends on OF + depends on DRM_MIPI_DSI + depends on BACKLIGHT_CLASS_DEVICE + help + Say Y if you want to enable support for panels based on the + Synaptics R63353 controller. + config DRM_PANEL_SONY_ACX565AKM tristate "Sony ACX565AKM panel" depends on GPIOLIB && OF && SPI diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile index d10c3de51c6d..d94a644d0a6c 100644 --- a/drivers/gpu/drm/panel/Makefile +++ b/drivers/gpu/drm/panel/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D) += panel-feiyang-fy07024di26a30d obj-$(CONFIG_DRM_PANEL_HIMAX_HX8394) += panel-himax-hx8394.o obj-$(CONFIG_DRM_PANEL_ILITEK_IL9322) += panel-ilitek-ili9322.o obj-$(CONFIG_DRM_PANEL_ILITEK_ILI9341) += panel-ilitek-ili9341.o +obj-$(CONFIG_DRM_PANEL_ILITEK_ILI9805) += panel-ilitek-ili9805.o obj-$(CONFIG_DRM_PANEL_ILITEK_ILI9881C) += panel-ilitek-ili9881c.o obj-$(CONFIG_DRM_PANEL_ILITEK_ILI9882T) += panel-ilitek-ili9882t.o obj-$(CONFIG_DRM_PANEL_INNOLUX_EJ030NA) += panel-innolux-ej030na.o @@ -74,6 +75,7 @@ obj-$(CONFIG_DRM_PANEL_SHARP_LS060T1SX01) += panel-sharp-ls060t1sx01.o obj-$(CONFIG_DRM_PANEL_SITRONIX_ST7701) += panel-sitronix-st7701.o obj-$(CONFIG_DRM_PANEL_SITRONIX_ST7703) += panel-sitronix-st7703.o obj-$(CONFIG_DRM_PANEL_SITRONIX_ST7789V) += panel-sitronix-st7789v.o +obj-$(CONFIG_DRM_PANEL_SYNAPTICS_R63353) += panel-synaptics-r63353.o obj-$(CONFIG_DRM_PANEL_SONY_ACX565AKM) += panel-sony-acx565akm.o obj-$(CONFIG_DRM_PANEL_SONY_TD4353_JDI) += panel-sony-td4353-jdi.o obj-$(CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521) += panel-sony-tulip-truly-nt35521.o diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index 95c8472d878a..a0b6f69b916f 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -203,6 +203,9 @@ struct edp_panel_entry { /** @name: Name of this panel (for printing to logs). */ const char *name; + + /** @override_edid_mode: Override the mode obtained by edid. */ + const struct drm_display_mode *override_edid_mode; }; struct panel_edp { @@ -301,6 +304,24 @@ static unsigned int panel_edp_get_display_modes(struct panel_edp *panel, return num; } +static int panel_edp_override_edid_mode(struct panel_edp *panel, + struct drm_connector *connector, + const struct drm_display_mode *override_mode) +{ + struct drm_display_mode *mode; + + mode = drm_mode_duplicate(connector->dev, override_mode); + if (!mode) { + dev_err(panel->base.dev, "failed to add additional mode\n"); + return 0; + } + + mode->type |= DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED; + drm_mode_set_name(mode); + drm_mode_probed_add(connector, mode); + return 1; +} + static int panel_edp_get_non_edid_modes(struct panel_edp *panel, struct drm_connector *connector) { @@ -568,6 +589,10 @@ static int panel_edp_get_modes(struct drm_panel *panel, { struct panel_edp *p = to_panel_edp(panel); int num = 0; + bool has_hard_coded_modes = p->desc->num_timings || p->desc->num_modes; + bool has_override_edid_mode = p->detected_panel && + p->detected_panel != ERR_PTR(-EINVAL) && + p->detected_panel->override_edid_mode; /* probe EDID if a DDC bus is available */ if (p->ddc) { @@ -575,20 +600,28 @@ static int panel_edp_get_modes(struct drm_panel *panel, if (!p->edid) p->edid = drm_get_edid(connector, p->ddc); - - if (p->edid) - num += drm_add_edid_modes(connector, p->edid); + /* + * If both edid and hard-coded modes exists, skip edid modes to + * avoid multiple preferred modes. + */ + if (p->edid && !has_hard_coded_modes) { + if (has_override_edid_mode) { + /* + * override_edid_mode is specified. Use + * override_edid_mode instead of from edid. + */ + num += panel_edp_override_edid_mode(p, connector, + p->detected_panel->override_edid_mode); + } else { + num += drm_add_edid_modes(connector, p->edid); + } + } pm_runtime_mark_last_busy(panel->dev); pm_runtime_put_autosuspend(panel->dev); } - /* - * Add hard-coded panel modes. Don't call this if there are no timings - * and no modes (the generic edp-panel case) because it will clobber - * the display_info that was already set by drm_add_edid_modes(). - */ - if (p->desc->num_timings || p->desc->num_modes) + if (has_hard_coded_modes) num += panel_edp_get_non_edid_modes(p, connector); else if (!num) dev_warn(p->base.dev, "No display modes\n"); @@ -950,6 +983,19 @@ static const struct panel_desc auo_b101ean01 = { }, }; +static const struct drm_display_mode auo_b116xa3_mode = { + .clock = 70589, + .hdisplay = 1366, + .hsync_start = 1366 + 40, + .hsync_end = 1366 + 40 + 40, + .htotal = 1366 + 40 + 40 + 32, + .vdisplay = 768, + .vsync_start = 768 + 10, + .vsync_end = 768 + 10 + 12, + .vtotal = 768 + 10 + 12 + 6, + .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC, +}; + static const struct drm_display_mode auo_b116xak01_mode = { .clock = 69300, .hdisplay = 1366, @@ -973,6 +1019,8 @@ static const struct panel_desc auo_b116xak01 = { }, .delay = { .hpd_absent = 200, + .unprepare = 500, + .enable = 50, }, }; @@ -1801,6 +1849,12 @@ static const struct panel_delay delay_200_500_e50 = { .enable = 50, }; +static const struct panel_delay delay_200_500_e80 = { + .hpd_absent = 200, + .unprepare = 500, + .enable = 80, +}; + static const struct panel_delay delay_200_500_e80_d50 = { .hpd_absent = 200, .unprepare = 500, @@ -1820,6 +1874,19 @@ static const struct panel_delay delay_200_500_e200 = { .enable = 200, }; +static const struct panel_delay delay_200_500_e200_d10 = { + .hpd_absent = 200, + .unprepare = 500, + .enable = 200, + .disable = 10, +}; + +static const struct panel_delay delay_200_150_e200 = { + .hpd_absent = 200, + .unprepare = 150, + .enable = 200, +}; + #define EDP_PANEL_ENTRY(vend_chr_0, vend_chr_1, vend_chr_2, product_id, _delay, _name) \ { \ .name = _name, \ @@ -1828,6 +1895,15 @@ static const struct panel_delay delay_200_500_e200 = { .delay = _delay \ } +#define EDP_PANEL_ENTRY2(vend_chr_0, vend_chr_1, vend_chr_2, product_id, _delay, _name, _mode) \ +{ \ + .name = _name, \ + .panel_id = drm_edid_encode_panel_id(vend_chr_0, vend_chr_1, vend_chr_2, \ + product_id), \ + .delay = _delay, \ + .override_edid_mode = _mode \ +} + /* * This table is used to figure out power sequencing delays for panels that * are detected by EDID. Entries here may point to entries in the @@ -1840,36 +1916,76 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('A', 'U', 'O', 0x145c, &delay_200_500_e50, "B116XAB01.4"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x1e9b, &delay_200_500_e50, "B133UAN02.1"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x1ea5, &delay_200_500_e50, "B116XAK01.6"), - EDP_PANEL_ENTRY('A', 'U', 'O', 0x405c, &auo_b116xak01.delay, "B116XAK01"), + EDP_PANEL_ENTRY('A', 'U', 'O', 0x208d, &delay_200_500_e50, "B140HTN02.1"), + EDP_PANEL_ENTRY('A', 'U', 'O', 0x235c, &delay_200_500_e50, "B116XTN02.3"), + EDP_PANEL_ENTRY('A', 'U', 'O', 0x239b, &delay_200_500_e50, "B116XAN06.1"), + EDP_PANEL_ENTRY('A', 'U', 'O', 0x255c, &delay_200_500_e50, "B116XTN02.5"), + EDP_PANEL_ENTRY('A', 'U', 'O', 0x403d, &delay_200_500_e50, "B140HAN04.0"), + EDP_PANEL_ENTRY2('A', 'U', 'O', 0x405c, &auo_b116xak01.delay, "B116XAK01.0", + &auo_b116xa3_mode), EDP_PANEL_ENTRY('A', 'U', 'O', 0x582d, &delay_200_500_e50, "B133UAN01.0"), - EDP_PANEL_ENTRY('A', 'U', 'O', 0x615c, &delay_200_500_e50, "B116XAN06.1"), + EDP_PANEL_ENTRY2('A', 'U', 'O', 0x615c, &delay_200_500_e50, "B116XAN06.1", + &auo_b116xa3_mode), + EDP_PANEL_ENTRY('A', 'U', 'O', 0x635c, &delay_200_500_e50, "B116XAN06.3"), + EDP_PANEL_ENTRY('A', 'U', 'O', 0x639c, &delay_200_500_e50, "B140HAK02.7"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x8594, &delay_200_500_e50, "B133UAN01.0"), + EDP_PANEL_ENTRY('A', 'U', 'O', 0xf390, &delay_200_500_e50, "B140XTN07.7"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x0715, &delay_200_150_e200, "NT116WHM-N21"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x0731, &delay_200_500_e80, "NT116WHM-N42"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x0741, &delay_200_500_e200, "NT116WHM-N44"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0786, &delay_200_500_p2e80, "NV116WHM-T01"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x07d1, &boe_nv133fhm_n61.delay, "NV133FHM-N61"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x07f6, &delay_200_500_e200, "NT140FHM-N44"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x082d, &boe_nv133fhm_n61.delay, "NV133FHM-N62"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x08b2, &delay_200_500_e200, "NT140WHM-N49"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x09c3, &delay_200_500_e50, "NT116WHM-N21,836X2"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x094b, &delay_200_500_e50, "NT116WHM-N21"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x0951, &delay_200_500_e80, "NV116WHM-N47"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x095f, &delay_200_500_e50, "NE135FBM-N41 v8.1"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x0979, &delay_200_500_e50, "NV116WHM-N49 V8.0"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x098d, &boe_nv110wtm_n61.delay, "NV110WTM-N61"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x09ae, &delay_200_500_e200, "NT140FHM-N45"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x09dd, &delay_200_500_e50, "NT116WHM-N21"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0a5d, &delay_200_500_e50, "NV116WHM-N45"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0ac5, &delay_200_500_e50, "NV116WHM-N4C"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b43, &delay_200_500_e200, "NV140FHM-T09"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b56, &delay_200_500_e80, "NT140FHM-N47"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x0c20, &delay_200_500_e80, "NT140FHM-N47"), + EDP_PANEL_ENTRY('C', 'M', 'N', 0x1132, &delay_200_500_e80_d50, "N116BGE-EA2"), + EDP_PANEL_ENTRY('C', 'M', 'N', 0x1138, &innolux_n116bca_ea1.delay, "N116BCA-EA1-RC4"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x1139, &delay_200_500_e80_d50, "N116BGE-EA2"), + EDP_PANEL_ENTRY('C', 'M', 'N', 0x1145, &delay_200_500_e80_d50, "N116BCN-EB1"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x114c, &innolux_n116bca_ea1.delay, "N116BCA-EA1"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x1152, &delay_200_500_e80_d50, "N116BCN-EA1"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x1153, &delay_200_500_e80_d50, "N116BGE-EA2"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x1154, &delay_200_500_e80_d50, "N116BCA-EA2"), + EDP_PANEL_ENTRY('C', 'M', 'N', 0x1157, &delay_200_500_e80_d50, "N116BGE-EA2"), + EDP_PANEL_ENTRY('C', 'M', 'N', 0x115b, &delay_200_500_e80_d50, "N116BCN-EB1"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x1247, &delay_200_500_e80_d50, "N120ACA-EA1"), + EDP_PANEL_ENTRY('C', 'M', 'N', 0x142b, &delay_200_500_e80_d50, "N140HCA-EAC"), + EDP_PANEL_ENTRY('C', 'M', 'N', 0x144f, &delay_200_500_e80_d50, "N140HGA-EA1"), + EDP_PANEL_ENTRY('C', 'M', 'N', 0x1468, &delay_200_500_e80, "N140HGA-EA1"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x14d4, &delay_200_500_e80_d50, "N140HCA-EAC"), + EDP_PANEL_ENTRY('C', 'M', 'N', 0x14d6, &delay_200_500_e80_d50, "N140BGA-EA4"), + EDP_PANEL_ENTRY('C', 'M', 'N', 0x14e5, &delay_200_500_e80_d50, "N140HGA-EA1"), + + EDP_PANEL_ENTRY('H', 'K', 'C', 0x2d5c, &delay_200_500_e200, "MB116AN01-2"), + EDP_PANEL_ENTRY('I', 'V', 'O', 0x048e, &delay_200_500_e200_d10, "M116NWR6 R5"), EDP_PANEL_ENTRY('I', 'V', 'O', 0x057d, &delay_200_500_e200, "R140NWF5 RH"), EDP_PANEL_ENTRY('I', 'V', 'O', 0x854a, &delay_200_500_p2e100, "M133NW4J"), EDP_PANEL_ENTRY('I', 'V', 'O', 0x854b, &delay_200_500_p2e100, "R133NW4K-R0"), + EDP_PANEL_ENTRY('I', 'V', 'O', 0x8c4d, &delay_200_150_e200, "R140NWFM R1"), EDP_PANEL_ENTRY('K', 'D', 'B', 0x0624, &kingdisplay_kd116n21_30nv_a010.delay, "116N21-30NV-A010"), EDP_PANEL_ENTRY('K', 'D', 'B', 0x1120, &delay_200_500_e80_d50, "116N29-30NK-C007"), + EDP_PANEL_ENTRY('K', 'D', 'C', 0x0809, &delay_200_500_e50, "KD116N2930A15"), + + EDP_PANEL_ENTRY('S', 'D', 'C', 0x416d, &delay_100_500_e200, "ATNA45AF01"), + EDP_PANEL_ENTRY('S', 'H', 'P', 0x1511, &delay_200_500_e50, "LQ140M1JW48"), EDP_PANEL_ENTRY('S', 'H', 'P', 0x1523, &sharp_lq140m1jw46.delay, "LQ140M1JW46"), EDP_PANEL_ENTRY('S', 'H', 'P', 0x154c, &delay_200_500_p2e100, "LQ116M1JW10"), diff --git a/drivers/gpu/drm/panel/panel-elida-kd35t133.c b/drivers/gpu/drm/panel/panel-elida-kd35t133.c index e7be15b68102..00791ea81e90 100644 --- a/drivers/gpu/drm/panel/panel-elida-kd35t133.c +++ b/drivers/gpu/drm/panel/panel-elida-kd35t133.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Elida kd35t133 5.5" MIPI-DSI panel driver + * Elida kd35t133 3.5" MIPI-DSI panel driver * Copyright (C) 2020 Theobroma Systems Design und Consulting GmbH * * based on @@ -43,7 +43,6 @@ struct kd35t133 { struct regulator *vdd; struct regulator *iovcc; enum drm_panel_orientation orientation; - bool prepared; }; static inline struct kd35t133 *panel_to_kd35t133(struct drm_panel *panel) @@ -91,9 +90,6 @@ static int kd35t133_unprepare(struct drm_panel *panel) struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev); int ret; - if (!ctx->prepared) - return 0; - ret = mipi_dsi_dcs_set_display_off(dsi); if (ret < 0) dev_err(ctx->dev, "failed to set display off: %d\n", ret); @@ -104,11 +100,11 @@ static int kd35t133_unprepare(struct drm_panel *panel) return ret; } + gpiod_set_value_cansleep(ctx->reset_gpio, 1); + regulator_disable(ctx->iovcc); regulator_disable(ctx->vdd); - ctx->prepared = false; - return 0; } @@ -118,9 +114,6 @@ static int kd35t133_prepare(struct drm_panel *panel) struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev); int ret; - if (ctx->prepared) - return 0; - dev_dbg(ctx->dev, "Resetting the panel\n"); ret = regulator_enable(ctx->vdd); if (ret < 0) { @@ -164,8 +157,6 @@ static int kd35t133_prepare(struct drm_panel *panel) msleep(50); - ctx->prepared = true; - return 0; disable_iovcc: @@ -209,11 +200,6 @@ static int kd35t133_get_modes(struct drm_panel *panel, connector->display_info.width_mm = mode->width_mm; connector->display_info.height_mm = mode->height_mm; drm_mode_probed_add(connector, mode); - /* - * TODO: Remove once all drm drivers call - * drm_connector_set_orientation_from_panel() - */ - drm_connector_set_panel_orientation(connector, ctx->orientation); return 1; } @@ -299,27 +285,11 @@ static int kd35t133_probe(struct mipi_dsi_device *dsi) return 0; } -static void kd35t133_shutdown(struct mipi_dsi_device *dsi) -{ - struct kd35t133 *ctx = mipi_dsi_get_drvdata(dsi); - int ret; - - ret = drm_panel_unprepare(&ctx->panel); - if (ret < 0) - dev_err(&dsi->dev, "Failed to unprepare panel: %d\n", ret); - - ret = drm_panel_disable(&ctx->panel); - if (ret < 0) - dev_err(&dsi->dev, "Failed to disable panel: %d\n", ret); -} - static void kd35t133_remove(struct mipi_dsi_device *dsi) { struct kd35t133 *ctx = mipi_dsi_get_drvdata(dsi); int ret; - kd35t133_shutdown(dsi); - ret = mipi_dsi_detach(dsi); if (ret < 0) dev_err(&dsi->dev, "Failed to detach from DSI host: %d\n", ret); @@ -340,7 +310,6 @@ static struct mipi_dsi_driver kd35t133_driver = { }, .probe = kd35t133_probe, .remove = kd35t133_remove, - .shutdown = kd35t133_shutdown, }; module_mipi_dsi_driver(kd35t133_driver); diff --git a/drivers/gpu/drm/panel/panel-himax-hx8394.c b/drivers/gpu/drm/panel/panel-himax-hx8394.c index c73243d85de7..ff0dc08b9829 100644 --- a/drivers/gpu/drm/panel/panel-himax-hx8394.c +++ b/drivers/gpu/drm/panel/panel-himax-hx8394.c @@ -38,6 +38,7 @@ #define HX8394_CMD_SETMIPI 0xba #define HX8394_CMD_SETOTP 0xbb #define HX8394_CMD_SETREGBANK 0xbd +#define HX8394_CMD_UNKNOWN5 0xbf #define HX8394_CMD_UNKNOWN1 0xc0 #define HX8394_CMD_SETDGCLUT 0xc1 #define HX8394_CMD_SETID 0xc3 @@ -52,6 +53,7 @@ #define HX8394_CMD_SETGIP1 0xd5 #define HX8394_CMD_SETGIP2 0xd6 #define HX8394_CMD_SETGPO 0xd6 +#define HX8394_CMD_UNKNOWN4 0xd8 #define HX8394_CMD_SETSCALING 0xdd #define HX8394_CMD_SETIDLE 0xdf #define HX8394_CMD_SETGAMMA 0xe0 @@ -68,7 +70,7 @@ struct hx8394 { struct gpio_desc *reset_gpio; struct regulator *vcc; struct regulator *iovcc; - bool prepared; + enum drm_panel_orientation orientation; const struct hx8394_panel_desc *desc; }; @@ -203,6 +205,140 @@ static const struct hx8394_panel_desc hsd060bhw4_desc = { .init_sequence = hsd060bhw4_init_sequence, }; +static int powkiddy_x55_init_sequence(struct hx8394 *ctx) +{ + struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev); + + /* 5.19.8 SETEXTC: Set extension command (B9h) */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETEXTC, + 0xff, 0x83, 0x94); + + /* 5.19.9 SETMIPI: Set MIPI control (BAh) */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETMIPI, + 0x63, 0x03, 0x68, 0x6b, 0xb2, 0xc0); + + /* 5.19.2 SETPOWER: Set power (B1h) */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETPOWER, + 0x48, 0x12, 0x72, 0x09, 0x32, 0x54, 0x71, 0x71, 0x57, 0x47); + + /* 5.19.3 SETDISP: Set display related register (B2h) */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETDISP, + 0x00, 0x80, 0x64, 0x2c, 0x16, 0x2f); + + /* 5.19.4 SETCYC: Set display waveform cycles (B4h) */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETCYC, + 0x73, 0x74, 0x73, 0x74, 0x73, 0x74, 0x01, 0x0c, 0x86, 0x75, + 0x00, 0x3f, 0x73, 0x74, 0x73, 0x74, 0x73, 0x74, 0x01, 0x0c, + 0x86); + + /* 5.19.5 SETVCOM: Set VCOM voltage (B6h) */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETVCOM, + 0x6e, 0x6e); + + /* 5.19.19 SETGIP0: Set GIP Option0 (D3h) */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETGIP0, + 0x00, 0x00, 0x07, 0x07, 0x40, 0x07, 0x0c, 0x00, 0x08, 0x10, + 0x08, 0x00, 0x08, 0x54, 0x15, 0x0a, 0x05, 0x0a, 0x02, 0x15, + 0x06, 0x05, 0x06, 0x47, 0x44, 0x0a, 0x0a, 0x4b, 0x10, 0x07, + 0x07, 0x0c, 0x40); + + /* 5.19.20 Set GIP Option1 (D5h) */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETGIP1, + 0x1c, 0x1c, 0x1d, 0x1d, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, + 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x24, 0x25, 0x18, 0x18, + 0x26, 0x27, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x20, 0x21, + 0x18, 0x18, 0x18, 0x18); + + /* 5.19.21 Set GIP Option2 (D6h) */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETGIP2, + 0x1c, 0x1c, 0x1d, 0x1d, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, + 0x01, 0x00, 0x0b, 0x0a, 0x09, 0x08, 0x21, 0x20, 0x18, 0x18, + 0x27, 0x26, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x25, 0x24, + 0x18, 0x18, 0x18, 0x18); + + /* 5.19.25 SETGAMMA: Set gamma curve related setting (E0h) */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETGAMMA, + 0x00, 0x0a, 0x15, 0x1b, 0x1e, 0x21, 0x24, 0x22, 0x47, 0x56, + 0x65, 0x66, 0x6e, 0x82, 0x88, 0x8b, 0x9a, 0x9d, 0x98, 0xa8, + 0xb9, 0x5d, 0x5c, 0x61, 0x66, 0x6a, 0x6f, 0x7f, 0x7f, 0x00, + 0x0a, 0x15, 0x1b, 0x1e, 0x21, 0x24, 0x22, 0x47, 0x56, 0x65, + 0x65, 0x6e, 0x81, 0x87, 0x8b, 0x98, 0x9d, 0x99, 0xa8, 0xba, + 0x5d, 0x5d, 0x62, 0x67, 0x6b, 0x72, 0x7f, 0x7f); + + /* Unknown command, not listed in the HX8394-F datasheet */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_UNKNOWN1, + 0x1f, 0x31); + + /* 5.19.17 SETPANEL (CCh) */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETPANEL, + 0x0b); + + /* Unknown command, not listed in the HX8394-F datasheet */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_UNKNOWN3, + 0x02); + + /* 5.19.11 Set register bank (BDh) */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETREGBANK, + 0x02); + + /* Unknown command, not listed in the HX8394-F datasheet */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_UNKNOWN4, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff); + + /* 5.19.11 Set register bank (BDh) */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETREGBANK, + 0x00); + + /* 5.19.11 Set register bank (BDh) */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETREGBANK, + 0x01); + + /* 5.19.2 SETPOWER: Set power (B1h) */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETPOWER, + 0x00); + + /* 5.19.11 Set register bank (BDh) */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETREGBANK, + 0x00); + + /* Unknown command, not listed in the HX8394-F datasheet */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_UNKNOWN5, + 0x40, 0x81, 0x50, 0x00, 0x1a, 0xfc, 0x01); + + /* Unknown command, not listed in the HX8394-F datasheet */ + mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_UNKNOWN2, + 0xed); + + return 0; +} + +static const struct drm_display_mode powkiddy_x55_mode = { + .hdisplay = 720, + .hsync_start = 720 + 44, + .hsync_end = 720 + 44 + 20, + .htotal = 720 + 44 + 20 + 20, + .vdisplay = 1280, + .vsync_start = 1280 + 12, + .vsync_end = 1280 + 12 + 10, + .vtotal = 1280 + 12 + 10 + 10, + .clock = 63290, + .flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC, + .width_mm = 67, + .height_mm = 121, +}; + +static const struct hx8394_panel_desc powkiddy_x55_desc = { + .mode = &powkiddy_x55_mode, + .lanes = 4, + .mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | + MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET, + .format = MIPI_DSI_FMT_RGB888, + .init_sequence = powkiddy_x55_init_sequence, +}; + static int hx8394_enable(struct drm_panel *panel) { struct hx8394 *ctx = panel_to_hx8394(panel); @@ -262,16 +398,11 @@ static int hx8394_unprepare(struct drm_panel *panel) { struct hx8394 *ctx = panel_to_hx8394(panel); - if (!ctx->prepared) - return 0; - gpiod_set_value_cansleep(ctx->reset_gpio, 1); regulator_disable(ctx->iovcc); regulator_disable(ctx->vcc); - ctx->prepared = false; - return 0; } @@ -280,9 +411,6 @@ static int hx8394_prepare(struct drm_panel *panel) struct hx8394 *ctx = panel_to_hx8394(panel); int ret; - if (ctx->prepared) - return 0; - gpiod_set_value_cansleep(ctx->reset_gpio, 1); ret = regulator_enable(ctx->vcc); @@ -301,8 +429,6 @@ static int hx8394_prepare(struct drm_panel *panel) msleep(180); - ctx->prepared = true; - return 0; disable_vcc: @@ -335,12 +461,20 @@ static int hx8394_get_modes(struct drm_panel *panel, return 1; } +static enum drm_panel_orientation hx8394_get_orientation(struct drm_panel *panel) +{ + struct hx8394 *ctx = panel_to_hx8394(panel); + + return ctx->orientation; +} + static const struct drm_panel_funcs hx8394_drm_funcs = { .disable = hx8394_disable, .unprepare = hx8394_unprepare, .prepare = hx8394_prepare, .enable = hx8394_enable, .get_modes = hx8394_get_modes, + .get_orientation = hx8394_get_orientation, }; static int hx8394_probe(struct mipi_dsi_device *dsi) @@ -358,6 +492,12 @@ static int hx8394_probe(struct mipi_dsi_device *dsi) return dev_err_probe(dev, PTR_ERR(ctx->reset_gpio), "Failed to get reset gpio\n"); + ret = of_drm_get_panel_orientation(dev->of_node, &ctx->orientation); + if (ret < 0) { + dev_err(dev, "%pOF: failed to get orientation %d\n", dev->of_node, ret); + return ret; + } + mipi_dsi_set_drvdata(dsi, ctx); ctx->dev = dev; @@ -401,27 +541,11 @@ static int hx8394_probe(struct mipi_dsi_device *dsi) return 0; } -static void hx8394_shutdown(struct mipi_dsi_device *dsi) -{ - struct hx8394 *ctx = mipi_dsi_get_drvdata(dsi); - int ret; - - ret = drm_panel_disable(&ctx->panel); - if (ret < 0) - dev_err(&dsi->dev, "Failed to disable panel: %d\n", ret); - - ret = drm_panel_unprepare(&ctx->panel); - if (ret < 0) - dev_err(&dsi->dev, "Failed to unprepare panel: %d\n", ret); -} - static void hx8394_remove(struct mipi_dsi_device *dsi) { struct hx8394 *ctx = mipi_dsi_get_drvdata(dsi); int ret; - hx8394_shutdown(dsi); - ret = mipi_dsi_detach(dsi); if (ret < 0) dev_err(&dsi->dev, "Failed to detach from DSI host: %d\n", ret); @@ -431,6 +555,7 @@ static void hx8394_remove(struct mipi_dsi_device *dsi) static const struct of_device_id hx8394_of_match[] = { { .compatible = "hannstar,hsd060bhw4", .data = &hsd060bhw4_desc }, + { .compatible = "powkiddy,x55-panel", .data = &powkiddy_x55_desc }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, hx8394_of_match); @@ -438,7 +563,6 @@ MODULE_DEVICE_TABLE(of, hx8394_of_match); static struct mipi_dsi_driver hx8394_driver = { .probe = hx8394_probe, .remove = hx8394_remove, - .shutdown = hx8394_shutdown, .driver = { .name = DRV_NAME, .of_match_table = hx8394_of_match, diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9805.c b/drivers/gpu/drm/panel/panel-ilitek-ili9805.c new file mode 100644 index 000000000000..1cbc25758bd2 --- /dev/null +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9805.c @@ -0,0 +1,405 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 BSH Hausgerate GmbH + */ + +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of.h> + +#include <linux/gpio/consumer.h> +#include <linux/regulator/consumer.h> + +#include <drm/drm_mipi_dsi.h> +#include <drm/drm_modes.h> +#include <drm/drm_panel.h> + +#include <video/mipi_display.h> + +#define ILI9805_EXTCMD_CMD_SET_ENABLE_REG (0xff) +#define ILI9805_SETEXTC_PARAMETER1 (0xff) +#define ILI9805_SETEXTC_PARAMETER2 (0x98) +#define ILI9805_SETEXTC_PARAMETER3 (0x05) + +#define ILI9805_INSTR(_delay, ...) { \ + .delay = (_delay), \ + .len = sizeof((u8[]) {__VA_ARGS__}), \ + .data = (u8[]){__VA_ARGS__} \ + } + +struct ili9805_instr { + size_t len; + const u8 *data; + u32 delay; +}; + +struct ili9805_desc { + const char *name; + const struct ili9805_instr *init; + const size_t init_length; + const struct drm_display_mode *mode; + u32 width_mm; + u32 height_mm; +}; + +struct ili9805 { + struct drm_panel panel; + struct mipi_dsi_device *dsi; + const struct ili9805_desc *desc; + + struct regulator *dvdd; + struct regulator *avdd; + struct gpio_desc *reset_gpio; +}; + +static const struct ili9805_instr gpm1780a0_init[] = { + ILI9805_INSTR(100, ILI9805_EXTCMD_CMD_SET_ENABLE_REG, ILI9805_SETEXTC_PARAMETER1, + ILI9805_SETEXTC_PARAMETER2, ILI9805_SETEXTC_PARAMETER3), + ILI9805_INSTR(100, 0xFD, 0x0F, 0x10, 0x44, 0x00), + ILI9805_INSTR(0, 0xf8, 0x18, 0x02, 0x02, 0x18, 0x02, 0x02, 0x30, 0x00, + 0x00, 0x30, 0x00, 0x00, 0x30, 0x00, 0x00), + ILI9805_INSTR(0, 0xB8, 0x62), + ILI9805_INSTR(0, 0xF1, 0x00), + ILI9805_INSTR(0, 0xF2, 0x00, 0x58, 0x40), + ILI9805_INSTR(0, 0xF3, 0x60, 0x83, 0x04), + ILI9805_INSTR(0, 0xFC, 0x04, 0x0F, 0x01), + ILI9805_INSTR(0, 0xEB, 0x08, 0x0F), + ILI9805_INSTR(0, 0xe0, 0x00, 0x08, 0x0d, 0x0e, 0x0e, 0x0d, 0x0a, 0x08, 0x04, + 0x08, 0x0d, 0x0f, 0x0b, 0x1c, 0x14, 0x0a), + ILI9805_INSTR(0, 0xe1, 0x00, 0x08, 0x0d, 0x0e, 0x0e, 0x0d, 0x0a, 0x08, 0x04, + 0x08, 0x0d, 0x0f, 0x0b, 0x1c, 0x14, 0x0a), + ILI9805_INSTR(10, 0xc1, 0x13, 0x39, 0x19, 0x06), + ILI9805_INSTR(10, 0xc7, 0xe5), + ILI9805_INSTR(10, 0xB1, 0x00, 0x12, 0x14), + ILI9805_INSTR(10, 0xB4, 0x02), + ILI9805_INSTR(0, 0xBB, 0x14, 0x55), + ILI9805_INSTR(0, MIPI_DCS_SET_ADDRESS_MODE, 0x08), + ILI9805_INSTR(0, MIPI_DCS_SET_PIXEL_FORMAT, 0x77), + ILI9805_INSTR(0, 0x20), + ILI9805_INSTR(0, 0xB0, 0x01), + ILI9805_INSTR(0, 0xB6, 0x31, 0x00, 0xef), + ILI9805_INSTR(0, 0xDF, 0x23), + ILI9805_INSTR(0, 0xB9, 0x02, 0x00), +}; + +static const struct ili9805_instr tm041xdhg01_init[] = { + ILI9805_INSTR(100, ILI9805_EXTCMD_CMD_SET_ENABLE_REG, ILI9805_SETEXTC_PARAMETER1, + ILI9805_SETEXTC_PARAMETER2, ILI9805_SETEXTC_PARAMETER3), + ILI9805_INSTR(100, 0xFD, 0x0F, 0x13, 0x44, 0x00), + ILI9805_INSTR(0, 0xf8, 0x18, 0x02, 0x02, 0x18, 0x02, 0x02, 0x30, 0x01, + 0x01, 0x30, 0x01, 0x01, 0x30, 0x01, 0x01), + ILI9805_INSTR(0, 0xB8, 0x74), + ILI9805_INSTR(0, 0xF1, 0x00), + ILI9805_INSTR(0, 0xF2, 0x00, 0x58, 0x40), + ILI9805_INSTR(0, 0xFC, 0x04, 0x0F, 0x01), + ILI9805_INSTR(0, 0xEB, 0x08, 0x0F), + ILI9805_INSTR(0, 0xe0, 0x01, 0x0d, 0x15, 0x0e, 0x0f, 0x0f, 0x0b, 0x08, 0x04, + 0x07, 0x0a, 0x0d, 0x0c, 0x15, 0x0f, 0x08), + ILI9805_INSTR(0, 0xe1, 0x01, 0x0d, 0x15, 0x0e, 0x0f, 0x0f, 0x0b, 0x08, 0x04, + 0x07, 0x0a, 0x0d, 0x0c, 0x15, 0x0f, 0x08), + ILI9805_INSTR(10, 0xc1, 0x15, 0x03, 0x03, 0x31), + ILI9805_INSTR(10, 0xB1, 0x00, 0x12, 0x14), + ILI9805_INSTR(10, 0xB4, 0x02), + ILI9805_INSTR(0, 0xBB, 0x14, 0x55), + ILI9805_INSTR(0, MIPI_DCS_SET_ADDRESS_MODE, 0x0a), + ILI9805_INSTR(0, MIPI_DCS_SET_PIXEL_FORMAT, 0x77), + ILI9805_INSTR(0, 0x20), + ILI9805_INSTR(0, 0xB0, 0x00), + ILI9805_INSTR(0, 0xB6, 0x01), + ILI9805_INSTR(0, 0xc2, 0x11), + ILI9805_INSTR(0, 0x51, 0xFF), + ILI9805_INSTR(0, 0x53, 0x24), + ILI9805_INSTR(0, 0x55, 0x00), +}; + +static inline struct ili9805 *panel_to_ili9805(struct drm_panel *panel) +{ + return container_of(panel, struct ili9805, panel); +} + +static int ili9805_power_on(struct ili9805 *ctx) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + int ret; + + ret = regulator_enable(ctx->avdd); + if (ret) { + dev_err(dev, "Failed to enable avdd regulator (%d)\n", ret); + return ret; + } + + ret = regulator_enable(ctx->dvdd); + if (ret) { + dev_err(dev, "Failed to enable dvdd regulator (%d)\n", ret); + regulator_disable(ctx->avdd); + return ret; + } + + gpiod_set_value(ctx->reset_gpio, 0); + usleep_range(5000, 10000); + gpiod_set_value(ctx->reset_gpio, 1); + msleep(120); + + return 0; +} + +static int ili9805_power_off(struct ili9805 *ctx) +{ + gpiod_set_value(ctx->reset_gpio, 0); + regulator_disable(ctx->dvdd); + regulator_disable(ctx->avdd); + + return 0; +} + +static int ili9805_activate(struct ili9805 *ctx) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + int i, ret; + + for (i = 0; i < ctx->desc->init_length; i++) { + const struct ili9805_instr *instr = &ctx->desc->init[i]; + + ret = mipi_dsi_dcs_write_buffer(ctx->dsi, instr->data, instr->len); + if (ret < 0) + return ret; + + if (instr->delay > 0) + msleep(instr->delay); + } + + ret = mipi_dsi_dcs_exit_sleep_mode(ctx->dsi); + if (ret) { + dev_err(dev, "Failed to exit sleep mode (%d)\n", ret); + return ret; + } + + usleep_range(5000, 6000); + + ret = mipi_dsi_dcs_set_display_on(ctx->dsi); + if (ret) { + dev_err(dev, "Failed to set display ON (%d)\n", ret); + return ret; + } + + return 0; +} + +static int ili9805_prepare(struct drm_panel *panel) +{ + struct ili9805 *ctx = panel_to_ili9805(panel); + int ret; + + ret = ili9805_power_on(ctx); + if (ret) + return ret; + + ret = ili9805_activate(ctx); + if (ret) { + ili9805_power_off(ctx); + return ret; + } + + return 0; +} + +static int ili9805_deactivate(struct ili9805 *ctx) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + int ret; + + ret = mipi_dsi_dcs_set_display_off(ctx->dsi); + if (ret < 0) { + dev_err(dev, "Failed to set display OFF (%d)\n", ret); + return ret; + } + + usleep_range(5000, 10000); + + ret = mipi_dsi_dcs_enter_sleep_mode(ctx->dsi); + if (ret < 0) { + dev_err(dev, "Failed to enter sleep mode (%d)\n", ret); + return ret; + } + + return 0; +} + +static int ili9805_unprepare(struct drm_panel *panel) +{ + struct ili9805 *ctx = panel_to_ili9805(panel); + + ili9805_deactivate(ctx); + ili9805_power_off(ctx); + + return 0; +} + +static const struct drm_display_mode gpm1780a0_timing = { + .clock = 26227, + + .hdisplay = 480, + .hsync_start = 480 + 10, + .hsync_end = 480 + 10 + 2, + .htotal = 480 + 10 + 2 + 36, + + .vdisplay = 480, + .vsync_start = 480 + 2, + .vsync_end = 480 + 10 + 4, + .vtotal = 480 + 2 + 4 + 10, +}; + +static const struct drm_display_mode tm041xdhg01_timing = { + .clock = 26227, + + .hdisplay = 480, + .hsync_start = 480 + 10, + .hsync_end = 480 + 10 + 2, + .htotal = 480 + 10 + 2 + 36, + + .vdisplay = 768, + .vsync_start = 768 + 2, + .vsync_end = 768 + 10 + 4, + .vtotal = 768 + 2 + 4 + 10, +}; + +static int ili9805_get_modes(struct drm_panel *panel, + struct drm_connector *connector) +{ + struct ili9805 *ctx = panel_to_ili9805(panel); + struct drm_display_mode *mode; + + mode = drm_mode_duplicate(connector->dev, ctx->desc->mode); + if (!mode) { + dev_err(&ctx->dsi->dev, "failed to add mode %ux%ux@%u\n", + ctx->desc->mode->hdisplay, + ctx->desc->mode->vdisplay, + drm_mode_vrefresh(ctx->desc->mode)); + return -ENOMEM; + } + + drm_mode_set_name(mode); + + mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED; + drm_mode_probed_add(connector, mode); + + connector->display_info.width_mm = mode->width_mm; + connector->display_info.height_mm = mode->height_mm; + + return 1; +} + +static const struct drm_panel_funcs ili9805_funcs = { + .prepare = ili9805_prepare, + .unprepare = ili9805_unprepare, + .get_modes = ili9805_get_modes, +}; + +static int ili9805_dsi_probe(struct mipi_dsi_device *dsi) +{ + struct ili9805 *ctx; + int ret; + + ctx = devm_kzalloc(&dsi->dev, sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + mipi_dsi_set_drvdata(dsi, ctx); + ctx->dsi = dsi; + ctx->desc = of_device_get_match_data(&dsi->dev); + + dsi->format = MIPI_DSI_FMT_RGB888; + dsi->mode_flags = MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_VIDEO | + MIPI_DSI_CLOCK_NON_CONTINUOUS | MIPI_DSI_MODE_LPM | + MIPI_DSI_MODE_VIDEO_SYNC_PULSE | MIPI_DSI_MODE_NO_EOT_PACKET; + dsi->lanes = 2; + + drm_panel_init(&ctx->panel, &dsi->dev, &ili9805_funcs, + DRM_MODE_CONNECTOR_DSI); + + ctx->dvdd = devm_regulator_get(&dsi->dev, "dvdd"); + if (IS_ERR(ctx->dvdd)) + return PTR_ERR(ctx->dvdd); + ctx->avdd = devm_regulator_get(&dsi->dev, "avdd"); + if (IS_ERR(ctx->avdd)) + return PTR_ERR(ctx->avdd); + + ctx->reset_gpio = devm_gpiod_get(&dsi->dev, "reset", GPIOD_OUT_LOW); + if (IS_ERR(ctx->reset_gpio)) { + dev_err(&dsi->dev, "Couldn't get our reset GPIO\n"); + return PTR_ERR(ctx->reset_gpio); + } + + ctx->panel.prepare_prev_first = true; + ret = drm_panel_of_backlight(&ctx->panel); + if (ret) + return ret; + + drm_panel_add(&ctx->panel); + + ret = mipi_dsi_attach(dsi); + if (ret < 0) { + dev_err(&dsi->dev, "mipi_dsi_attach failed: %d\n", ret); + drm_panel_remove(&ctx->panel); + return ret; + } + + return 0; +} + +static void ili9805_dsi_remove(struct mipi_dsi_device *dsi) +{ + struct ili9805 *ctx = mipi_dsi_get_drvdata(dsi); + int ret; + + ret = mipi_dsi_detach(dsi); + if (ret < 0) + dev_err(&dsi->dev, "failed to detach from DSI host: %d\n", + ret); + + drm_panel_remove(&ctx->panel); +} + +static const struct ili9805_desc gpm1780a0_desc = { + .init = gpm1780a0_init, + .init_length = ARRAY_SIZE(gpm1780a0_init), + .mode = &gpm1780a0_timing, + .width_mm = 65, + .height_mm = 65, +}; + +static const struct ili9805_desc tm041xdhg01_desc = { + .init = tm041xdhg01_init, + .init_length = ARRAY_SIZE(tm041xdhg01_init), + .mode = &tm041xdhg01_timing, + .width_mm = 42, + .height_mm = 96, +}; + +static const struct of_device_id ili9805_of_match[] = { + { .compatible = "giantplus,gpm1790a0", .data = &gpm1780a0_desc }, + { .compatible = "tianma,tm041xdhg01", .data = &tm041xdhg01_desc }, + { } +}; +MODULE_DEVICE_TABLE(of, ili9805_of_match); + +static struct mipi_dsi_driver ili9805_dsi_driver = { + .probe = ili9805_dsi_probe, + .remove = ili9805_dsi_remove, + .driver = { + .name = "ili9805-dsi", + .of_match_table = ili9805_of_match, + }, +}; +module_mipi_dsi_driver(ili9805_dsi_driver); + +MODULE_AUTHOR("Matthias Proske <Matthias.Proske@bshg.com>"); +MODULE_AUTHOR("Michael Trimarchi <michael@amarulasolutions.com>"); +MODULE_DESCRIPTION("Ilitek ILI9805 Controller Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c b/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c index 7838947a1bf3..2ffe5f68a890 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c @@ -830,6 +830,203 @@ static const struct ili9881c_instr w552946ab_init[] = { ILI9881C_SWITCH_PAGE_INSTR(0), }; +static const struct ili9881c_instr am8001280g_init[] = { + ILI9881C_SWITCH_PAGE_INSTR(3), + ILI9881C_COMMAND_INSTR(0x01, 0x00), + ILI9881C_COMMAND_INSTR(0x02, 0x00), + ILI9881C_COMMAND_INSTR(0x03, 0x73), + ILI9881C_COMMAND_INSTR(0x04, 0xD3), + ILI9881C_COMMAND_INSTR(0x05, 0x00), + ILI9881C_COMMAND_INSTR(0x06, 0x0A), + ILI9881C_COMMAND_INSTR(0x07, 0x0E), + ILI9881C_COMMAND_INSTR(0x08, 0x00), + ILI9881C_COMMAND_INSTR(0x09, 0x01), + ILI9881C_COMMAND_INSTR(0x0a, 0x01), + ILI9881C_COMMAND_INSTR(0x0b, 0x01), + ILI9881C_COMMAND_INSTR(0x0c, 0x01), + ILI9881C_COMMAND_INSTR(0x0d, 0x01), + ILI9881C_COMMAND_INSTR(0x0e, 0x01), + ILI9881C_COMMAND_INSTR(0x0f, 0x01), + ILI9881C_COMMAND_INSTR(0x10, 0x01), + ILI9881C_COMMAND_INSTR(0x11, 0x00), + ILI9881C_COMMAND_INSTR(0x12, 0x00), + ILI9881C_COMMAND_INSTR(0x13, 0x00), + ILI9881C_COMMAND_INSTR(0x14, 0x00), + ILI9881C_COMMAND_INSTR(0x15, 0x00), + ILI9881C_COMMAND_INSTR(0x16, 0x00), + ILI9881C_COMMAND_INSTR(0x17, 0x00), + ILI9881C_COMMAND_INSTR(0x18, 0x00), + ILI9881C_COMMAND_INSTR(0x19, 0x00), + ILI9881C_COMMAND_INSTR(0x1a, 0x00), + ILI9881C_COMMAND_INSTR(0x1b, 0x00), + ILI9881C_COMMAND_INSTR(0x1c, 0x00), + ILI9881C_COMMAND_INSTR(0x1d, 0x00), + ILI9881C_COMMAND_INSTR(0x1e, 0x40), + ILI9881C_COMMAND_INSTR(0x1f, 0x80), + ILI9881C_COMMAND_INSTR(0x20, 0x06), + ILI9881C_COMMAND_INSTR(0x21, 0x01), + ILI9881C_COMMAND_INSTR(0x22, 0x00), + ILI9881C_COMMAND_INSTR(0x23, 0x00), + ILI9881C_COMMAND_INSTR(0x24, 0x00), + ILI9881C_COMMAND_INSTR(0x25, 0x00), + ILI9881C_COMMAND_INSTR(0x26, 0x00), + ILI9881C_COMMAND_INSTR(0x27, 0x00), + ILI9881C_COMMAND_INSTR(0x28, 0x33), + ILI9881C_COMMAND_INSTR(0x29, 0x03), + ILI9881C_COMMAND_INSTR(0x2a, 0x00), + ILI9881C_COMMAND_INSTR(0x2b, 0x00), + ILI9881C_COMMAND_INSTR(0x2c, 0x00), + ILI9881C_COMMAND_INSTR(0x2d, 0x00), + ILI9881C_COMMAND_INSTR(0x2e, 0x00), + ILI9881C_COMMAND_INSTR(0x2f, 0x00), + ILI9881C_COMMAND_INSTR(0x30, 0x00), + ILI9881C_COMMAND_INSTR(0x31, 0x00), + ILI9881C_COMMAND_INSTR(0x32, 0x00), + ILI9881C_COMMAND_INSTR(0x33, 0x00), + ILI9881C_COMMAND_INSTR(0x34, 0x03), + ILI9881C_COMMAND_INSTR(0x35, 0x00), + ILI9881C_COMMAND_INSTR(0x36, 0x03), + ILI9881C_COMMAND_INSTR(0x37, 0x00), + ILI9881C_COMMAND_INSTR(0x38, 0x00), + ILI9881C_COMMAND_INSTR(0x39, 0x00), + ILI9881C_COMMAND_INSTR(0x3a, 0x40), + ILI9881C_COMMAND_INSTR(0x3b, 0x40), + ILI9881C_COMMAND_INSTR(0x3c, 0x00), + ILI9881C_COMMAND_INSTR(0x3d, 0x00), + ILI9881C_COMMAND_INSTR(0x3e, 0x00), + ILI9881C_COMMAND_INSTR(0x3f, 0x00), + ILI9881C_COMMAND_INSTR(0x40, 0x00), + ILI9881C_COMMAND_INSTR(0x41, 0x00), + ILI9881C_COMMAND_INSTR(0x42, 0x00), + ILI9881C_COMMAND_INSTR(0x43, 0x00), + ILI9881C_COMMAND_INSTR(0x44, 0x00), + + ILI9881C_COMMAND_INSTR(0x50, 0x01), + ILI9881C_COMMAND_INSTR(0x51, 0x23), + ILI9881C_COMMAND_INSTR(0x52, 0x45), + ILI9881C_COMMAND_INSTR(0x53, 0x67), + ILI9881C_COMMAND_INSTR(0x54, 0x89), + ILI9881C_COMMAND_INSTR(0x55, 0xab), + ILI9881C_COMMAND_INSTR(0x56, 0x01), + ILI9881C_COMMAND_INSTR(0x57, 0x23), + ILI9881C_COMMAND_INSTR(0x58, 0x45), + ILI9881C_COMMAND_INSTR(0x59, 0x67), + ILI9881C_COMMAND_INSTR(0x5a, 0x89), + ILI9881C_COMMAND_INSTR(0x5b, 0xab), + ILI9881C_COMMAND_INSTR(0x5c, 0xcd), + ILI9881C_COMMAND_INSTR(0x5d, 0xef), + + ILI9881C_COMMAND_INSTR(0x5e, 0x11), + ILI9881C_COMMAND_INSTR(0x5f, 0x02), + ILI9881C_COMMAND_INSTR(0x60, 0x00), + ILI9881C_COMMAND_INSTR(0x61, 0x01), + ILI9881C_COMMAND_INSTR(0x62, 0x0D), + ILI9881C_COMMAND_INSTR(0x63, 0x0C), + ILI9881C_COMMAND_INSTR(0x64, 0x0F), + ILI9881C_COMMAND_INSTR(0x65, 0x0E), + ILI9881C_COMMAND_INSTR(0x66, 0x06), + ILI9881C_COMMAND_INSTR(0x67, 0x07), + ILI9881C_COMMAND_INSTR(0x68, 0x02), + ILI9881C_COMMAND_INSTR(0x69, 0x02), + ILI9881C_COMMAND_INSTR(0x6a, 0x08), + ILI9881C_COMMAND_INSTR(0x6b, 0x02), + ILI9881C_COMMAND_INSTR(0x6c, 0x02), + ILI9881C_COMMAND_INSTR(0x6d, 0x02), + ILI9881C_COMMAND_INSTR(0x6e, 0x02), + ILI9881C_COMMAND_INSTR(0x6f, 0x02), + ILI9881C_COMMAND_INSTR(0x70, 0x02), + ILI9881C_COMMAND_INSTR(0x71, 0x02), + ILI9881C_COMMAND_INSTR(0x72, 0x02), + ILI9881C_COMMAND_INSTR(0x73, 0x02), + ILI9881C_COMMAND_INSTR(0x74, 0x02), + ILI9881C_COMMAND_INSTR(0x75, 0x02), + ILI9881C_COMMAND_INSTR(0x76, 0x00), + ILI9881C_COMMAND_INSTR(0x77, 0x01), + ILI9881C_COMMAND_INSTR(0x78, 0x0D), + ILI9881C_COMMAND_INSTR(0x79, 0x0C), + ILI9881C_COMMAND_INSTR(0x7a, 0x0F), + ILI9881C_COMMAND_INSTR(0x7b, 0x0E), + ILI9881C_COMMAND_INSTR(0x7c, 0x06), + ILI9881C_COMMAND_INSTR(0x7d, 0x07), + ILI9881C_COMMAND_INSTR(0x7e, 0x02), + ILI9881C_COMMAND_INSTR(0x7f, 0x02), + ILI9881C_COMMAND_INSTR(0x80, 0x08), + ILI9881C_COMMAND_INSTR(0x81, 0x02), + ILI9881C_COMMAND_INSTR(0x82, 0x02), + ILI9881C_COMMAND_INSTR(0x83, 0x02), + ILI9881C_COMMAND_INSTR(0x84, 0x02), + ILI9881C_COMMAND_INSTR(0x85, 0x02), + ILI9881C_COMMAND_INSTR(0x86, 0x02), + ILI9881C_COMMAND_INSTR(0x87, 0x02), + ILI9881C_COMMAND_INSTR(0x88, 0x02), + ILI9881C_COMMAND_INSTR(0x89, 0x02), + ILI9881C_COMMAND_INSTR(0x8A, 0x02), + + ILI9881C_SWITCH_PAGE_INSTR(4), + ILI9881C_COMMAND_INSTR(0x6c, 0x15), + ILI9881C_COMMAND_INSTR(0x6e, 0x30), + ILI9881C_COMMAND_INSTR(0x6f, 0x33), + ILI9881C_COMMAND_INSTR(0x8d, 0x15), + ILI9881C_COMMAND_INSTR(0x3a, 0xa4), + ILI9881C_COMMAND_INSTR(0x87, 0xba), + ILI9881C_COMMAND_INSTR(0x26, 0x76), + ILI9881C_COMMAND_INSTR(0xb2, 0xd1), + + ILI9881C_SWITCH_PAGE_INSTR(1), + ILI9881C_COMMAND_INSTR(0x22, 0x0A), + ILI9881C_COMMAND_INSTR(0x31, 0x0B), + ILI9881C_COMMAND_INSTR(0x50, 0xa5), + ILI9881C_COMMAND_INSTR(0x51, 0xa0), + ILI9881C_COMMAND_INSTR(0x53, 0x70), + ILI9881C_COMMAND_INSTR(0x55, 0x7A), + ILI9881C_COMMAND_INSTR(0x60, 0x14), + + ILI9881C_COMMAND_INSTR(0xA0, 0x00), + ILI9881C_COMMAND_INSTR(0xA1, 0x53), + ILI9881C_COMMAND_INSTR(0xA2, 0x50), + ILI9881C_COMMAND_INSTR(0xA3, 0x20), + ILI9881C_COMMAND_INSTR(0xA4, 0x27), + ILI9881C_COMMAND_INSTR(0xA5, 0x33), + ILI9881C_COMMAND_INSTR(0xA6, 0x25), + ILI9881C_COMMAND_INSTR(0xA7, 0x25), + ILI9881C_COMMAND_INSTR(0xA8, 0xD4), + ILI9881C_COMMAND_INSTR(0xA9, 0x1A), + ILI9881C_COMMAND_INSTR(0xAA, 0x2B), + ILI9881C_COMMAND_INSTR(0xAB, 0xB5), + ILI9881C_COMMAND_INSTR(0xAC, 0x19), + ILI9881C_COMMAND_INSTR(0xAD, 0x18), + ILI9881C_COMMAND_INSTR(0xAE, 0x53), + ILI9881C_COMMAND_INSTR(0xAF, 0x1A), + ILI9881C_COMMAND_INSTR(0xB0, 0x25), + ILI9881C_COMMAND_INSTR(0xB1, 0x62), + ILI9881C_COMMAND_INSTR(0xB2, 0x6A), + ILI9881C_COMMAND_INSTR(0xB3, 0x31), + + ILI9881C_COMMAND_INSTR(0xC0, 0x00), + ILI9881C_COMMAND_INSTR(0xC1, 0x53), + ILI9881C_COMMAND_INSTR(0xC2, 0x50), + ILI9881C_COMMAND_INSTR(0xC3, 0x20), + ILI9881C_COMMAND_INSTR(0xC4, 0x27), + ILI9881C_COMMAND_INSTR(0xC5, 0x33), + ILI9881C_COMMAND_INSTR(0xC6, 0x25), + ILI9881C_COMMAND_INSTR(0xC7, 0x25), + ILI9881C_COMMAND_INSTR(0xC8, 0xD4), + ILI9881C_COMMAND_INSTR(0xC9, 0x1A), + ILI9881C_COMMAND_INSTR(0xCA, 0x2B), + ILI9881C_COMMAND_INSTR(0xCB, 0xB5), + ILI9881C_COMMAND_INSTR(0xCC, 0x19), + ILI9881C_COMMAND_INSTR(0xCD, 0x18), + ILI9881C_COMMAND_INSTR(0xCE, 0x53), + ILI9881C_COMMAND_INSTR(0xCF, 0x1A), + ILI9881C_COMMAND_INSTR(0xD0, 0x25), + ILI9881C_COMMAND_INSTR(0xD1, 0x62), + ILI9881C_COMMAND_INSTR(0xD2, 0x6A), + ILI9881C_COMMAND_INSTR(0xD3, 0x31), + ILI9881C_SWITCH_PAGE_INSTR(0), + ILI9881C_COMMAND_INSTR(MIPI_DCS_WRITE_CONTROL_DISPLAY, 0x2c), + ILI9881C_COMMAND_INSTR(MIPI_DCS_WRITE_POWER_SAVE, 0x00), +}; + static inline struct ili9881c *panel_to_ili9881c(struct drm_panel *panel) { return container_of(panel, struct ili9881c, panel); @@ -1014,6 +1211,23 @@ static const struct drm_display_mode w552946aba_default_mode = { .height_mm = 121, }; +static const struct drm_display_mode am8001280g_default_mode = { + .clock = 67911, + + .hdisplay = 800, + .hsync_start = 800 + 20, + .hsync_end = 800 + 20 + 32, + .htotal = 800 + 20 + 32 + 20, + + .vdisplay = 1280, + .vsync_start = 1280 + 6, + .vsync_end = 1280 + 6 + 8, + .vtotal = 1280 + 6 + 8 + 4, + + .width_mm = 94, + .height_mm = 151, +}; + static int ili9881c_get_modes(struct drm_panel *panel, struct drm_connector *connector) { @@ -1094,6 +1308,8 @@ static int ili9881c_dsi_probe(struct mipi_dsi_device *dsi) return ret; } + ctx->panel.prepare_prev_first = true; + ret = drm_panel_of_backlight(&ctx->panel); if (ret) return ret; @@ -1145,11 +1361,20 @@ static const struct ili9881c_desc w552946aba_desc = { MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET, }; +static const struct ili9881c_desc am8001280g_desc = { + .init = am8001280g_init, + .init_length = ARRAY_SIZE(am8001280g_init), + .mode = &am8001280g_default_mode, + .mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE | + MIPI_DSI_CLOCK_NON_CONTINUOUS | MIPI_DSI_MODE_LPM, +}; + static const struct of_device_id ili9881c_of_match[] = { { .compatible = "bananapi,lhr050h41", .data = &lhr050h41_desc }, { .compatible = "feixin,k101-im2byl02", .data = &k101_im2byl02_desc }, { .compatible = "tdo,tl050hdv35", .data = &tl050hdv35_desc }, { .compatible = "wanchanglong,w552946aba", .data = &w552946aba_desc }, + { .compatible = "ampire,am8001280g", .data = &am8001280g_desc }, { } }; MODULE_DEVICE_TABLE(of, ili9881c_of_match); diff --git a/drivers/gpu/drm/panel/panel-newvision-nv3051d.c b/drivers/gpu/drm/panel/panel-newvision-nv3051d.c index 79de6c886292..94d89ffd596b 100644 --- a/drivers/gpu/drm/panel/panel-newvision-nv3051d.c +++ b/drivers/gpu/drm/panel/panel-newvision-nv3051d.c @@ -28,6 +28,7 @@ struct nv3051d_panel_info { unsigned int num_modes; u16 width_mm, height_mm; u32 bus_flags; + u32 mode_flags; }; struct panel_nv3051d { @@ -261,6 +262,8 @@ static int panel_nv3051d_unprepare(struct drm_panel *panel) usleep_range(10000, 15000); + gpiod_set_value_cansleep(ctx->reset_gpio, 1); + regulator_disable(ctx->vdd); return 0; @@ -385,15 +388,7 @@ static int panel_nv3051d_probe(struct mipi_dsi_device *dsi) dsi->lanes = 4; dsi->format = MIPI_DSI_FMT_RGB888; - dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | - MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET; - - /* - * The panel in the RG351V is identical to the 353P, except it - * requires MIPI_DSI_CLOCK_NON_CONTINUOUS to operate correctly. - */ - if (of_device_is_compatible(dev->of_node, "anbernic,rg351v-panel")) - dsi->mode_flags |= MIPI_DSI_CLOCK_NON_CONTINUOUS; + dsi->mode_flags = ctx->panel_info->mode_flags; drm_panel_init(&ctx->panel, &dsi->dev, &panel_nv3051d_funcs, DRM_MODE_CONNECTOR_DSI); @@ -481,16 +476,56 @@ static const struct drm_display_mode nv3051d_rgxx3_modes[] = { }, }; -static const struct nv3051d_panel_info nv3051d_rgxx3_info = { +static const struct drm_display_mode nv3051d_rk2023_modes[] = { + { + .hdisplay = 640, + .hsync_start = 640 + 40, + .hsync_end = 640 + 40 + 2, + .htotal = 640 + 40 + 2 + 80, + .vdisplay = 480, + .vsync_start = 480 + 18, + .vsync_end = 480 + 18 + 2, + .vtotal = 480 + 18 + 2 + 4, + .clock = 24150, + .flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC, + }, +}; + +static const struct nv3051d_panel_info nv3051d_rg351v_info = { .display_modes = nv3051d_rgxx3_modes, .num_modes = ARRAY_SIZE(nv3051d_rgxx3_modes), .width_mm = 70, .height_mm = 57, .bus_flags = DRM_BUS_FLAG_DE_LOW | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE, + .mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | + MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET | + MIPI_DSI_CLOCK_NON_CONTINUOUS, +}; + +static const struct nv3051d_panel_info nv3051d_rg353p_info = { + .display_modes = nv3051d_rgxx3_modes, + .num_modes = ARRAY_SIZE(nv3051d_rgxx3_modes), + .width_mm = 70, + .height_mm = 57, + .bus_flags = DRM_BUS_FLAG_DE_LOW | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE, + .mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | + MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET, +}; + +static const struct nv3051d_panel_info nv3051d_rk2023_info = { + .display_modes = nv3051d_rk2023_modes, + .num_modes = ARRAY_SIZE(nv3051d_rk2023_modes), + .width_mm = 70, + .height_mm = 57, + .bus_flags = DRM_BUS_FLAG_DE_LOW | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE, + .mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | + MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET, }; static const struct of_device_id newvision_nv3051d_of_match[] = { - { .compatible = "newvision,nv3051d", .data = &nv3051d_rgxx3_info }, + { .compatible = "anbernic,rg351v-panel", .data = &nv3051d_rg351v_info }, + { .compatible = "anbernic,rg353p-panel", .data = &nv3051d_rg353p_info }, + { .compatible = "powkiddy,rk2023-panel", .data = &nv3051d_rk2023_info }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, newvision_nv3051d_of_match); diff --git a/drivers/gpu/drm/panel/panel-newvision-nv3052c.c b/drivers/gpu/drm/panel/panel-newvision-nv3052c.c index 71e57de6d8b2..1aab0c9ae52f 100644 --- a/drivers/gpu/drm/panel/panel-newvision-nv3052c.c +++ b/drivers/gpu/drm/panel/panel-newvision-nv3052c.c @@ -20,11 +20,18 @@ #include <drm/drm_modes.h> #include <drm/drm_panel.h> +struct nv3052c_reg { + u8 cmd; + u8 val; +}; + struct nv3052c_panel_info { const struct drm_display_mode *display_modes; unsigned int num_modes; u16 width_mm, height_mm; u32 bus_format, bus_flags; + const struct nv3052c_reg *panel_regs; + unsigned int panel_regs_len; }; struct nv3052c { @@ -36,15 +43,10 @@ struct nv3052c { struct gpio_desc *reset_gpio; }; -struct nv3052c_reg { - u8 cmd; - u8 val; -}; - -static const struct nv3052c_reg nv3052c_panel_regs[] = { - { 0xff, 0x30 }, - { 0xff, 0x52 }, - { 0xff, 0x01 }, +static const struct nv3052c_reg ltk035c5444t_panel_regs[] = { + // EXTC Command set enable, select page 1 + { 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x01 }, + // Mostly unknown registers { 0xe3, 0x00 }, { 0x40, 0x00 }, { 0x03, 0x40 }, @@ -62,15 +64,15 @@ static const struct nv3052c_reg nv3052c_panel_regs[] = { { 0x25, 0x06 }, { 0x26, 0x14 }, { 0x27, 0x14 }, - { 0x38, 0xcc }, - { 0x39, 0xd7 }, - { 0x3a, 0x4a }, + { 0x38, 0xcc }, // VCOM_ADJ1 + { 0x39, 0xd7 }, // VCOM_ADJ2 + { 0x3a, 0x4a }, // VCOM_ADJ3 { 0x28, 0x40 }, { 0x29, 0x01 }, { 0x2a, 0xdf }, { 0x49, 0x3c }, - { 0x91, 0x77 }, - { 0x92, 0x77 }, + { 0x91, 0x77 }, // EXTPW_CTRL2 + { 0x92, 0x77 }, // EXTPW_CTRL3 { 0xa0, 0x55 }, { 0xa1, 0x50 }, { 0xa4, 0x9c }, @@ -94,123 +96,321 @@ static const struct nv3052c_reg nv3052c_panel_regs[] = { { 0xb8, 0x26 }, { 0xf0, 0x00 }, { 0xf6, 0xc0 }, - { 0xff, 0x30 }, - { 0xff, 0x52 }, - { 0xff, 0x02 }, - { 0xb0, 0x0b }, - { 0xb1, 0x16 }, - { 0xb2, 0x17 }, - { 0xb3, 0x2c }, - { 0xb4, 0x32 }, - { 0xb5, 0x3b }, - { 0xb6, 0x29 }, - { 0xb7, 0x40 }, - { 0xb8, 0x0d }, - { 0xb9, 0x05 }, - { 0xba, 0x12 }, - { 0xbb, 0x10 }, - { 0xbc, 0x12 }, - { 0xbd, 0x15 }, - { 0xbe, 0x19 }, - { 0xbf, 0x0e }, - { 0xc0, 0x16 }, - { 0xc1, 0x0a }, - { 0xd0, 0x0c }, - { 0xd1, 0x17 }, - { 0xd2, 0x14 }, - { 0xd3, 0x2e }, - { 0xd4, 0x32 }, - { 0xd5, 0x3c }, - { 0xd6, 0x22 }, - { 0xd7, 0x3d }, - { 0xd8, 0x0d }, - { 0xd9, 0x07 }, - { 0xda, 0x13 }, - { 0xdb, 0x13 }, - { 0xdc, 0x11 }, - { 0xdd, 0x15 }, - { 0xde, 0x19 }, - { 0xdf, 0x10 }, - { 0xe0, 0x17 }, - { 0xe1, 0x0a }, - { 0xff, 0x30 }, - { 0xff, 0x52 }, - { 0xff, 0x03 }, - { 0x00, 0x2a }, - { 0x01, 0x2a }, - { 0x02, 0x2a }, - { 0x03, 0x2a }, - { 0x04, 0x61 }, - { 0x05, 0x80 }, - { 0x06, 0xc7 }, - { 0x07, 0x01 }, - { 0x08, 0x03 }, - { 0x09, 0x04 }, - { 0x70, 0x22 }, - { 0x71, 0x80 }, - { 0x30, 0x2a }, - { 0x31, 0x2a }, - { 0x32, 0x2a }, - { 0x33, 0x2a }, - { 0x34, 0x61 }, - { 0x35, 0xc5 }, - { 0x36, 0x80 }, - { 0x37, 0x23 }, - { 0x40, 0x03 }, - { 0x41, 0x04 }, - { 0x42, 0x05 }, - { 0x43, 0x06 }, - { 0x44, 0x11 }, - { 0x45, 0xe8 }, - { 0x46, 0xe9 }, - { 0x47, 0x11 }, - { 0x48, 0xea }, - { 0x49, 0xeb }, - { 0x50, 0x07 }, - { 0x51, 0x08 }, - { 0x52, 0x09 }, - { 0x53, 0x0a }, - { 0x54, 0x11 }, - { 0x55, 0xec }, - { 0x56, 0xed }, - { 0x57, 0x11 }, - { 0x58, 0xef }, - { 0x59, 0xf0 }, - { 0xb1, 0x01 }, - { 0xb4, 0x15 }, - { 0xb5, 0x16 }, - { 0xb6, 0x09 }, - { 0xb7, 0x0f }, - { 0xb8, 0x0d }, - { 0xb9, 0x0b }, - { 0xba, 0x00 }, - { 0xc7, 0x02 }, - { 0xca, 0x17 }, - { 0xcb, 0x18 }, - { 0xcc, 0x0a }, - { 0xcd, 0x10 }, - { 0xce, 0x0e }, - { 0xcf, 0x0c }, - { 0xd0, 0x00 }, - { 0x81, 0x00 }, - { 0x84, 0x15 }, - { 0x85, 0x16 }, - { 0x86, 0x10 }, - { 0x87, 0x0a }, - { 0x88, 0x0c }, - { 0x89, 0x0e }, - { 0x8a, 0x02 }, - { 0x97, 0x00 }, - { 0x9a, 0x17 }, - { 0x9b, 0x18 }, - { 0x9c, 0x0f }, - { 0x9d, 0x09 }, - { 0x9e, 0x0b }, - { 0x9f, 0x0d }, - { 0xa0, 0x01 }, - { 0xff, 0x30 }, - { 0xff, 0x52 }, - { 0xff, 0x02 }, + // EXTC Command set enable, select page 2 + { 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x02 }, + // Set gray scale voltage to adjust gamma + { 0xb0, 0x0b }, // PGAMVR0 + { 0xb1, 0x16 }, // PGAMVR1 + { 0xb2, 0x17 }, // PGAMVR2 + { 0xb3, 0x2c }, // PGAMVR3 + { 0xb4, 0x32 }, // PGAMVR4 + { 0xb5, 0x3b }, // PGAMVR5 + { 0xb6, 0x29 }, // PGAMPR0 + { 0xb7, 0x40 }, // PGAMPR1 + { 0xb8, 0x0d }, // PGAMPK0 + { 0xb9, 0x05 }, // PGAMPK1 + { 0xba, 0x12 }, // PGAMPK2 + { 0xbb, 0x10 }, // PGAMPK3 + { 0xbc, 0x12 }, // PGAMPK4 + { 0xbd, 0x15 }, // PGAMPK5 + { 0xbe, 0x19 }, // PGAMPK6 + { 0xbf, 0x0e }, // PGAMPK7 + { 0xc0, 0x16 }, // PGAMPK8 + { 0xc1, 0x0a }, // PGAMPK9 + // Set gray scale voltage to adjust gamma + { 0xd0, 0x0c }, // NGAMVR0 + { 0xd1, 0x17 }, // NGAMVR0 + { 0xd2, 0x14 }, // NGAMVR1 + { 0xd3, 0x2e }, // NGAMVR2 + { 0xd4, 0x32 }, // NGAMVR3 + { 0xd5, 0x3c }, // NGAMVR4 + { 0xd6, 0x22 }, // NGAMPR0 + { 0xd7, 0x3d }, // NGAMPR1 + { 0xd8, 0x0d }, // NGAMPK0 + { 0xd9, 0x07 }, // NGAMPK1 + { 0xda, 0x13 }, // NGAMPK2 + { 0xdb, 0x13 }, // NGAMPK3 + { 0xdc, 0x11 }, // NGAMPK4 + { 0xdd, 0x15 }, // NGAMPK5 + { 0xde, 0x19 }, // NGAMPK6 + { 0xdf, 0x10 }, // NGAMPK7 + { 0xe0, 0x17 }, // NGAMPK8 + { 0xe1, 0x0a }, // NGAMPK9 + // EXTC Command set enable, select page 3 + { 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x03 }, + // Set various timing settings + { 0x00, 0x2a }, // GIP_VST_1 + { 0x01, 0x2a }, // GIP_VST_2 + { 0x02, 0x2a }, // GIP_VST_3 + { 0x03, 0x2a }, // GIP_VST_4 + { 0x04, 0x61 }, // GIP_VST_5 + { 0x05, 0x80 }, // GIP_VST_6 + { 0x06, 0xc7 }, // GIP_VST_7 + { 0x07, 0x01 }, // GIP_VST_8 + { 0x08, 0x03 }, // GIP_VST_9 + { 0x09, 0x04 }, // GIP_VST_10 + { 0x70, 0x22 }, // GIP_ECLK1 + { 0x71, 0x80 }, // GIP_ECLK2 + { 0x30, 0x2a }, // GIP_CLK_1 + { 0x31, 0x2a }, // GIP_CLK_2 + { 0x32, 0x2a }, // GIP_CLK_3 + { 0x33, 0x2a }, // GIP_CLK_4 + { 0x34, 0x61 }, // GIP_CLK_5 + { 0x35, 0xc5 }, // GIP_CLK_6 + { 0x36, 0x80 }, // GIP_CLK_7 + { 0x37, 0x23 }, // GIP_CLK_8 + { 0x40, 0x03 }, // GIP_CLKA_1 + { 0x41, 0x04 }, // GIP_CLKA_2 + { 0x42, 0x05 }, // GIP_CLKA_3 + { 0x43, 0x06 }, // GIP_CLKA_4 + { 0x44, 0x11 }, // GIP_CLKA_5 + { 0x45, 0xe8 }, // GIP_CLKA_6 + { 0x46, 0xe9 }, // GIP_CLKA_7 + { 0x47, 0x11 }, // GIP_CLKA_8 + { 0x48, 0xea }, // GIP_CLKA_9 + { 0x49, 0xeb }, // GIP_CLKA_10 + { 0x50, 0x07 }, // GIP_CLKB_1 + { 0x51, 0x08 }, // GIP_CLKB_2 + { 0x52, 0x09 }, // GIP_CLKB_3 + { 0x53, 0x0a }, // GIP_CLKB_4 + { 0x54, 0x11 }, // GIP_CLKB_5 + { 0x55, 0xec }, // GIP_CLKB_6 + { 0x56, 0xed }, // GIP_CLKB_7 + { 0x57, 0x11 }, // GIP_CLKB_8 + { 0x58, 0xef }, // GIP_CLKB_9 + { 0x59, 0xf0 }, // GIP_CLKB_10 + // Map internal GOA signals to GOA output pad + { 0xb1, 0x01 }, // PANELD2U2 + { 0xb4, 0x15 }, // PANELD2U5 + { 0xb5, 0x16 }, // PANELD2U6 + { 0xb6, 0x09 }, // PANELD2U7 + { 0xb7, 0x0f }, // PANELD2U8 + { 0xb8, 0x0d }, // PANELD2U9 + { 0xb9, 0x0b }, // PANELD2U10 + { 0xba, 0x00 }, // PANELD2U11 + { 0xc7, 0x02 }, // PANELD2U24 + { 0xca, 0x17 }, // PANELD2U27 + { 0xcb, 0x18 }, // PANELD2U28 + { 0xcc, 0x0a }, // PANELD2U29 + { 0xcd, 0x10 }, // PANELD2U30 + { 0xce, 0x0e }, // PANELD2U31 + { 0xcf, 0x0c }, // PANELD2U32 + { 0xd0, 0x00 }, // PANELD2U33 + // Map internal GOA signals to GOA output pad + { 0x81, 0x00 }, // PANELU2D2 + { 0x84, 0x15 }, // PANELU2D5 + { 0x85, 0x16 }, // PANELU2D6 + { 0x86, 0x10 }, // PANELU2D7 + { 0x87, 0x0a }, // PANELU2D8 + { 0x88, 0x0c }, // PANELU2D9 + { 0x89, 0x0e }, // PANELU2D10 + { 0x8a, 0x02 }, // PANELU2D11 + { 0x97, 0x00 }, // PANELU2D24 + { 0x9a, 0x17 }, // PANELU2D27 + { 0x9b, 0x18 }, // PANELU2D28 + { 0x9c, 0x0f }, // PANELU2D29 + { 0x9d, 0x09 }, // PANELU2D30 + { 0x9e, 0x0b }, // PANELU2D31 + { 0x9f, 0x0d }, // PANELU2D32 + { 0xa0, 0x01 }, // PANELU2D33 + // EXTC Command set enable, select page 2 + { 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x02 }, + // Unknown registers + { 0x01, 0x01 }, + { 0x02, 0xda }, + { 0x03, 0xba }, + { 0x04, 0xa8 }, + { 0x05, 0x9a }, + { 0x06, 0x70 }, + { 0x07, 0xff }, + { 0x08, 0x91 }, + { 0x09, 0x90 }, + { 0x0a, 0xff }, + { 0x0b, 0x8f }, + { 0x0c, 0x60 }, + { 0x0d, 0x58 }, + { 0x0e, 0x48 }, + { 0x0f, 0x38 }, + { 0x10, 0x2b }, + // EXTC Command set enable, select page 0 + { 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x00 }, + // Display Access Control + { 0x36, 0x0a }, // bgr = 1, ss = 1, gs = 0 +}; + +static const struct nv3052c_reg fs035vg158_panel_regs[] = { + // EXTC Command set enable, select page 1 + { 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x01 }, + // Mostly unknown registers + { 0xe3, 0x00 }, + { 0x40, 0x00 }, + { 0x03, 0x40 }, + { 0x04, 0x00 }, + { 0x05, 0x03 }, + { 0x08, 0x00 }, + { 0x09, 0x07 }, + { 0x0a, 0x01 }, + { 0x0b, 0x32 }, + { 0x0c, 0x32 }, + { 0x0d, 0x0b }, + { 0x0e, 0x00 }, + { 0x23, 0x20 }, // RGB interface control: DE MODE PCLK-N + { 0x24, 0x0c }, + { 0x25, 0x06 }, + { 0x26, 0x14 }, + { 0x27, 0x14 }, + { 0x38, 0x9c }, //VCOM_ADJ1, different to ltk035c5444t + { 0x39, 0xa7 }, //VCOM_ADJ2, different to ltk035c5444t + { 0x3a, 0x50 }, //VCOM_ADJ3, different to ltk035c5444t + { 0x28, 0x40 }, + { 0x29, 0x01 }, + { 0x2a, 0xdf }, + { 0x49, 0x3c }, + { 0x91, 0x57 }, //EXTPW_CTRL2, different to ltk035c5444t + { 0x92, 0x57 }, //EXTPW_CTRL3, different to ltk035c5444t + { 0xa0, 0x55 }, + { 0xa1, 0x50 }, + { 0xa4, 0x9c }, + { 0xa7, 0x02 }, + { 0xa8, 0x01 }, + { 0xa9, 0x01 }, + { 0xaa, 0xfc }, + { 0xab, 0x28 }, + { 0xac, 0x06 }, + { 0xad, 0x06 }, + { 0xae, 0x06 }, + { 0xaf, 0x03 }, + { 0xb0, 0x08 }, + { 0xb1, 0x26 }, + { 0xb2, 0x28 }, + { 0xb3, 0x28 }, + { 0xb4, 0x03 }, // Unknown, different to ltk035c5444 + { 0xb5, 0x08 }, + { 0xb6, 0x26 }, + { 0xb7, 0x08 }, + { 0xb8, 0x26 }, + { 0xf0, 0x00 }, + { 0xf6, 0xc0 }, + // EXTC Command set enable, select page 0 + { 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x02 }, + // Set gray scale voltage to adjust gamma + { 0xb0, 0x0b }, // PGAMVR0 + { 0xb1, 0x16 }, // PGAMVR1 + { 0xb2, 0x17 }, // PGAMVR2 + { 0xb3, 0x2c }, // PGAMVR3 + { 0xb4, 0x32 }, // PGAMVR4 + { 0xb5, 0x3b }, // PGAMVR5 + { 0xb6, 0x29 }, // PGAMPR0 + { 0xb7, 0x40 }, // PGAMPR1 + { 0xb8, 0x0d }, // PGAMPK0 + { 0xb9, 0x05 }, // PGAMPK1 + { 0xba, 0x12 }, // PGAMPK2 + { 0xbb, 0x10 }, // PGAMPK3 + { 0xbc, 0x12 }, // PGAMPK4 + { 0xbd, 0x15 }, // PGAMPK5 + { 0xbe, 0x19 }, // PGAMPK6 + { 0xbf, 0x0e }, // PGAMPK7 + { 0xc0, 0x16 }, // PGAMPK8 + { 0xc1, 0x0a }, // PGAMPK9 + // Set gray scale voltage to adjust gamma + { 0xd0, 0x0c }, // NGAMVR0 + { 0xd1, 0x17 }, // NGAMVR0 + { 0xd2, 0x14 }, // NGAMVR1 + { 0xd3, 0x2e }, // NGAMVR2 + { 0xd4, 0x32 }, // NGAMVR3 + { 0xd5, 0x3c }, // NGAMVR4 + { 0xd6, 0x22 }, // NGAMPR0 + { 0xd7, 0x3d }, // NGAMPR1 + { 0xd8, 0x0d }, // NGAMPK0 + { 0xd9, 0x07 }, // NGAMPK1 + { 0xda, 0x13 }, // NGAMPK2 + { 0xdb, 0x13 }, // NGAMPK3 + { 0xdc, 0x11 }, // NGAMPK4 + { 0xdd, 0x15 }, // NGAMPK5 + { 0xde, 0x19 }, // NGAMPK6 + { 0xdf, 0x10 }, // NGAMPK7 + { 0xe0, 0x17 }, // NGAMPK8 + { 0xe1, 0x0a }, // NGAMPK9 + // EXTC Command set enable, select page 3 + { 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x03 }, + // Set various timing settings + { 0x00, 0x2a }, // GIP_VST_1 + { 0x01, 0x2a }, // GIP_VST_2 + { 0x02, 0x2a }, // GIP_VST_3 + { 0x03, 0x2a }, // GIP_VST_4 + { 0x04, 0x61 }, // GIP_VST_5 + { 0x05, 0x80 }, // GIP_VST_6 + { 0x06, 0xc7 }, // GIP_VST_7 + { 0x07, 0x01 }, // GIP_VST_8 + { 0x08, 0x03 }, // GIP_VST_9 + { 0x09, 0x04 }, // GIP_VST_10 + { 0x70, 0x22 }, // GIP_ECLK1 + { 0x71, 0x80 }, // GIP_ECLK2 + { 0x30, 0x2a }, // GIP_CLK_1 + { 0x31, 0x2a }, // GIP_CLK_2 + { 0x32, 0x2a }, // GIP_CLK_3 + { 0x33, 0x2a }, // GIP_CLK_4 + { 0x34, 0x61 }, // GIP_CLK_5 + { 0x35, 0xc5 }, // GIP_CLK_6 + { 0x36, 0x80 }, // GIP_CLK_7 + { 0x37, 0x23 }, // GIP_CLK_8 + { 0x40, 0x03 }, // GIP_CLKA_1 + { 0x41, 0x04 }, // GIP_CLKA_2 + { 0x42, 0x05 }, // GIP_CLKA_3 + { 0x43, 0x06 }, // GIP_CLKA_4 + { 0x44, 0x11 }, // GIP_CLKA_5 + { 0x45, 0xe8 }, // GIP_CLKA_6 + { 0x46, 0xe9 }, // GIP_CLKA_7 + { 0x47, 0x11 }, // GIP_CLKA_8 + { 0x48, 0xea }, // GIP_CLKA_9 + { 0x49, 0xeb }, // GIP_CLKA_10 + { 0x50, 0x07 }, // GIP_CLKB_1 + { 0x51, 0x08 }, // GIP_CLKB_2 + { 0x52, 0x09 }, // GIP_CLKB_3 + { 0x53, 0x0a }, // GIP_CLKB_4 + { 0x54, 0x11 }, // GIP_CLKB_5 + { 0x55, 0xec }, // GIP_CLKB_6 + { 0x56, 0xed }, // GIP_CLKB_7 + { 0x57, 0x11 }, // GIP_CLKB_8 + { 0x58, 0xef }, // GIP_CLKB_9 + { 0x59, 0xf0 }, // GIP_CLKB_10 + // Map internal GOA signals to GOA output pad + { 0xb1, 0x01 }, // PANELD2U2 + { 0xb4, 0x15 }, // PANELD2U5 + { 0xb5, 0x16 }, // PANELD2U6 + { 0xb6, 0x09 }, // PANELD2U7 + { 0xb7, 0x0f }, // PANELD2U8 + { 0xb8, 0x0d }, // PANELD2U9 + { 0xb9, 0x0b }, // PANELD2U10 + { 0xba, 0x00 }, // PANELD2U11 + { 0xc7, 0x02 }, // PANELD2U24 + { 0xca, 0x17 }, // PANELD2U27 + { 0xcb, 0x18 }, // PANELD2U28 + { 0xcc, 0x0a }, // PANELD2U29 + { 0xcd, 0x10 }, // PANELD2U30 + { 0xce, 0x0e }, // PANELD2U31 + { 0xcf, 0x0c }, // PANELD2U32 + { 0xd0, 0x00 }, // PANELD2U33 + // Map internal GOA signals to GOA output pad + { 0x81, 0x00 }, // PANELU2D2 + { 0x84, 0x15 }, // PANELU2D5 + { 0x85, 0x16 }, // PANELU2D6 + { 0x86, 0x10 }, // PANELU2D7 + { 0x87, 0x0a }, // PANELU2D8 + { 0x88, 0x0c }, // PANELU2D9 + { 0x89, 0x0e }, // PANELU2D10 + { 0x8a, 0x02 }, // PANELU2D11 + { 0x97, 0x00 }, // PANELU2D24 + { 0x9a, 0x17 }, // PANELU2D27 + { 0x9b, 0x18 }, // PANELU2D28 + { 0x9c, 0x0f }, // PANELU2D29 + { 0x9d, 0x09 }, // PANELU2D30 + { 0x9e, 0x0b }, // PANELU2D31 + { 0x9f, 0x0d }, // PANELU2D32 + { 0xa0, 0x01 }, // PANELU2D33 + // EXTC Command set enable, select page 2 + { 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x02 }, + // Unknown registers { 0x01, 0x01 }, { 0x02, 0xda }, { 0x03, 0xba }, @@ -227,10 +427,10 @@ static const struct nv3052c_reg nv3052c_panel_regs[] = { { 0x0e, 0x48 }, { 0x0f, 0x38 }, { 0x10, 0x2b }, - { 0xff, 0x30 }, - { 0xff, 0x52 }, - { 0xff, 0x00 }, - { 0x36, 0x0a }, + // EXTC Command set enable, select page 0 + { 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x00 }, + // Display Access Control + { 0x36, 0x0a }, // bgr = 1, ss = 1, gs = 0 }; static inline struct nv3052c *to_nv3052c(struct drm_panel *panel) @@ -241,6 +441,8 @@ static inline struct nv3052c *to_nv3052c(struct drm_panel *panel) static int nv3052c_prepare(struct drm_panel *panel) { struct nv3052c *priv = to_nv3052c(panel); + const struct nv3052c_reg *panel_regs = priv->panel_info->panel_regs; + unsigned int panel_regs_len = priv->panel_info->panel_regs_len; struct mipi_dbi *dbi = &priv->dbi; unsigned int i; int err; @@ -257,9 +459,9 @@ static int nv3052c_prepare(struct drm_panel *panel) gpiod_set_value_cansleep(priv->reset_gpio, 0); usleep_range(5000, 20000); - for (i = 0; i < ARRAY_SIZE(nv3052c_panel_regs); i++) { - err = mipi_dbi_command(dbi, nv3052c_panel_regs[i].cmd, - nv3052c_panel_regs[i].val); + for (i = 0; i < panel_regs_len; i++) { + err = mipi_dbi_command(dbi, panel_regs[i].cmd, + panel_regs[i].val); if (err) { dev_err(priv->dev, "Unable to set register: %d\n", err); @@ -453,6 +655,21 @@ static const struct drm_display_mode ltk035c5444t_modes[] = { }, }; +static const struct drm_display_mode fs035vg158_modes[] = { + { /* 60 Hz */ + .clock = 21000, + .hdisplay = 640, + .hsync_start = 640 + 34, + .hsync_end = 640 + 34 + 4, + .htotal = 640 + 34 + 4 + 20, + .vdisplay = 480, + .vsync_start = 480 + 12, + .vsync_end = 480 + 12 + 4, + .vtotal = 480 + 12 + 4 + 6, + .flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC, + }, +}; + static const struct nv3052c_panel_info ltk035c5444t_panel_info = { .display_modes = ltk035c5444t_modes, .num_modes = ARRAY_SIZE(ltk035c5444t_modes), @@ -460,10 +677,31 @@ static const struct nv3052c_panel_info ltk035c5444t_panel_info = { .height_mm = 64, .bus_format = MEDIA_BUS_FMT_RGB888_1X24, .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE, + .panel_regs = ltk035c5444t_panel_regs, + .panel_regs_len = ARRAY_SIZE(ltk035c5444t_panel_regs), +}; + +static const struct nv3052c_panel_info fs035vg158_panel_info = { + .display_modes = fs035vg158_modes, + .num_modes = ARRAY_SIZE(fs035vg158_modes), + .width_mm = 70, + .height_mm = 53, + .bus_format = MEDIA_BUS_FMT_RGB888_1X24, + .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE, + .panel_regs = fs035vg158_panel_regs, + .panel_regs_len = ARRAY_SIZE(fs035vg158_panel_regs), +}; + +static const struct spi_device_id nv3052c_ids[] = { + { "ltk035c5444t", }, + { "fs035vg158", }, + { /* sentinel */ } }; +MODULE_DEVICE_TABLE(spi, nv3052c_ids); static const struct of_device_id nv3052c_of_match[] = { { .compatible = "leadtek,ltk035c5444t", .data = <k035c5444t_panel_info }, + { .compatible = "fascontek,fs035vg158", .data = &fs035vg158_panel_info }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, nv3052c_of_match); @@ -473,6 +711,7 @@ static struct spi_driver nv3052c_driver = { .name = "nv3052c", .of_match_table = nv3052c_of_match, }, + .id_table = nv3052c_ids, .probe = nv3052c_probe, .remove = nv3052c_remove, }; diff --git a/drivers/gpu/drm/panel/panel-novatek-nt35510.c b/drivers/gpu/drm/panel/panel-novatek-nt35510.c index d6dceb858008..83a9cf53d269 100644 --- a/drivers/gpu/drm/panel/panel-novatek-nt35510.c +++ b/drivers/gpu/drm/panel/panel-novatek-nt35510.c @@ -1023,7 +1023,7 @@ static const struct nt35510_config nt35510_hydis_hva40wv1 = { .hdisplay = 480, .hsync_start = 480 + 2, /* HFP = 2 */ .hsync_end = 480 + 2 + 0, /* HSync = 0 */ - .htotal = 480 + 2 + 0 + 5, /* HFP = 5 */ + .htotal = 480 + 2 + 0 + 5, /* HBP = 5 */ .vdisplay = 800, .vsync_start = 800 + 2, /* VFP = 2 */ .vsync_end = 800 + 2 + 0, /* VSync = 0 */ diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 9367a4572dcf..2214cb09678c 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -1134,6 +1134,37 @@ static const struct panel_desc auo_g133han01 = { .connector_type = DRM_MODE_CONNECTOR_LVDS, }; +static const struct display_timing auo_g156han04_timings = { + .pixelclock = { 137000000, 141000000, 146000000 }, + .hactive = { 1920, 1920, 1920 }, + .hfront_porch = { 60, 60, 60 }, + .hback_porch = { 90, 92, 111 }, + .hsync_len = { 32, 32, 32 }, + .vactive = { 1080, 1080, 1080 }, + .vfront_porch = { 12, 12, 12 }, + .vback_porch = { 24, 36, 56 }, + .vsync_len = { 8, 8, 8 }, +}; + +static const struct panel_desc auo_g156han04 = { + .timings = &auo_g156han04_timings, + .num_timings = 1, + .bpc = 8, + .size = { + .width = 344, + .height = 194, + }, + .delay = { + .prepare = 50, /* T2 */ + .enable = 200, /* T3 */ + .disable = 110, /* T10 */ + .unprepare = 1000, /* T13 */ + }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, + .connector_type = DRM_MODE_CONNECTOR_LVDS, +}; + static const struct drm_display_mode auo_g156xtn01_mode = { .clock = 76000, .hdisplay = 1366, @@ -1324,6 +1355,35 @@ static const struct panel_desc bananapi_s070wv20_ct16 = { }, }; +static const struct drm_display_mode boe_bp101wx1_100_mode = { + .clock = 78945, + .hdisplay = 1280, + .hsync_start = 1280 + 0, + .hsync_end = 1280 + 0 + 2, + .htotal = 1280 + 62 + 0 + 2, + .vdisplay = 800, + .vsync_start = 800 + 8, + .vsync_end = 800 + 8 + 2, + .vtotal = 800 + 6 + 8 + 2, +}; + +static const struct panel_desc boe_bp101wx1_100 = { + .modes = &boe_bp101wx1_100_mode, + .num_modes = 1, + .bpc = 8, + .size = { + .width = 217, + .height = 136, + }, + .delay = { + .enable = 50, + .disable = 50, + }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, + .connector_type = DRM_MODE_CONNECTOR_LVDS, +}; + static const struct display_timing boe_ev121wxm_n10_1850_timing = { .pixelclock = { 69922000, 71000000, 72293000 }, .hactive = { 1280, 1280, 1280 }, @@ -1973,6 +2033,33 @@ static const struct panel_desc eink_vb3300_kca = { .connector_type = DRM_MODE_CONNECTOR_DPI, }; +static const struct display_timing evervision_vgg644804_timing = { + .pixelclock = { 25175000, 25175000, 25175000 }, + .hactive = { 640, 640, 640 }, + .hfront_porch = { 16, 16, 16 }, + .hback_porch = { 82, 114, 170 }, + .hsync_len = { 5, 30, 30 }, + .vactive = { 480, 480, 480 }, + .vfront_porch = { 10, 10, 10 }, + .vback_porch = { 30, 32, 34 }, + .vsync_len = { 1, 3, 5 }, + .flags = DISPLAY_FLAGS_HSYNC_LOW | DISPLAY_FLAGS_VSYNC_LOW | + DISPLAY_FLAGS_DE_HIGH | DISPLAY_FLAGS_PIXDATA_POSEDGE | + DISPLAY_FLAGS_SYNC_POSEDGE, +}; + +static const struct panel_desc evervision_vgg644804 = { + .timings = &evervision_vgg644804_timing, + .num_timings = 1, + .bpc = 8, + .size = { + .width = 115, + .height = 86, + }, + .bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG, + .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_SAMPLE_NEGEDGE, +}; + static const struct display_timing evervision_vgg804821_timing = { .pixelclock = { 27600000, 33300000, 50000000 }, .hactive = { 800, 800, 800 }, @@ -4233,6 +4320,9 @@ static const struct of_device_id platform_of_match[] = { .compatible = "auo,g133han01", .data = &auo_g133han01, }, { + .compatible = "auo,g156han04", + .data = &auo_g156han04, + }, { .compatible = "auo,g156xtn01", .data = &auo_g156xtn01, }, { @@ -4254,6 +4344,9 @@ static const struct of_device_id platform_of_match[] = { .compatible = "bananapi,s070wv20-ct16", .data = &bananapi_s070wv20_ct16, }, { + .compatible = "boe,bp101wx1-100", + .data = &boe_bp101wx1_100, + }, { .compatible = "boe,ev121wxm-n10-1850", .data = &boe_ev121wxm_n10_1850, }, { @@ -4335,6 +4428,9 @@ static const struct of_device_id platform_of_match[] = { .compatible = "eink,vb3300-kca", .data = &eink_vb3300_kca, }, { + .compatible = "evervision,vgg644804", + .data = &evervision_vgg644804, + }, { .compatible = "evervision,vgg804821", .data = &evervision_vgg804821, }, { diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7701.c b/drivers/gpu/drm/panel/panel-sitronix-st7701.c index 0459965e1b4f..421eb4592b61 100644 --- a/drivers/gpu/drm/panel/panel-sitronix-st7701.c +++ b/drivers/gpu/drm/panel/panel-sitronix-st7701.c @@ -288,7 +288,7 @@ static void st7701_init_sequence(struct st7701 *st7701) FIELD_PREP(DSI_CMD2_BK1_PWRCTRL2_AVDD_MASK, DIV_ROUND_CLOSEST(desc->avdd_mv - 6200, 200)) | FIELD_PREP(DSI_CMD2_BK1_PWRCTRL2_AVCL_MASK, - DIV_ROUND_CLOSEST(-4400 + desc->avcl_mv, 200))); + DIV_ROUND_CLOSEST(-4400 - desc->avcl_mv, 200))); /* T2D = 0.2us * T2D[3:0] */ ST7701_DSI(st7701, DSI_CMD2_BK1_SPD1, @@ -423,6 +423,42 @@ static void kd50t048a_gip_sequence(struct st7701 *st7701) 0xFF, 0xFF, 0xFF, 0xFF, 0x10, 0x45, 0x67, 0x98, 0xBA); } +static void rg_arc_gip_sequence(struct st7701 *st7701) +{ + st7701_switch_cmd_bkx(st7701, true, 3); + ST7701_DSI(st7701, 0xEF, 0x08); + st7701_switch_cmd_bkx(st7701, true, 0); + ST7701_DSI(st7701, 0xC7, 0x04); + ST7701_DSI(st7701, 0xCC, 0x38); + st7701_switch_cmd_bkx(st7701, true, 1); + ST7701_DSI(st7701, 0xB9, 0x10); + ST7701_DSI(st7701, 0xBC, 0x03); + ST7701_DSI(st7701, 0xC0, 0x89); + ST7701_DSI(st7701, 0xE0, 0x00, 0x00, 0x02); + ST7701_DSI(st7701, 0xE1, 0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, + 0x00, 0x00, 0x20, 0x20); + ST7701_DSI(st7701, 0xE2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + ST7701_DSI(st7701, 0xE3, 0x00, 0x00, 0x33, 0x00); + ST7701_DSI(st7701, 0xE4, 0x22, 0x00); + ST7701_DSI(st7701, 0xE5, 0x04, 0x5C, 0xA0, 0xA0, 0x06, 0x5C, 0xA0, + 0xA0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + ST7701_DSI(st7701, 0xE6, 0x00, 0x00, 0x33, 0x00); + ST7701_DSI(st7701, 0xE7, 0x22, 0x00); + ST7701_DSI(st7701, 0xE8, 0x05, 0x5C, 0xA0, 0xA0, 0x07, 0x5C, 0xA0, + 0xA0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + ST7701_DSI(st7701, 0xEB, 0x02, 0x00, 0x40, 0x40, 0x00, 0x00, 0x00); + ST7701_DSI(st7701, 0xEC, 0x00, 0x00); + ST7701_DSI(st7701, 0xED, 0xFA, 0x45, 0x0B, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xB0, 0x54, 0xAF); + ST7701_DSI(st7701, 0xEF, 0x08, 0x08, 0x08, 0x45, 0x3F, 0x54); + st7701_switch_cmd_bkx(st7701, false, 0); + ST7701_DSI(st7701, MIPI_DCS_SET_ADDRESS_MODE, 0x17); + ST7701_DSI(st7701, MIPI_DCS_SET_PIXEL_FORMAT, 0x77); + ST7701_DSI(st7701, MIPI_DCS_EXIT_SLEEP_MODE, 0x00); + msleep(120); +} + static int st7701_prepare(struct drm_panel *panel) { struct st7701 *st7701 = panel_to_st7701(panel); @@ -839,6 +875,105 @@ static const struct st7701_panel_desc kd50t048a_desc = { .gip_sequence = kd50t048a_gip_sequence, }; +static const struct drm_display_mode rg_arc_mode = { + .clock = 25600, + + .hdisplay = 480, + .hsync_start = 480 + 60, + .hsync_end = 480 + 60 + 42, + .htotal = 480 + 60 + 42 + 60, + + .vdisplay = 640, + .vsync_start = 640 + 10, + .vsync_end = 640 + 10 + 4, + .vtotal = 640 + 10 + 4 + 16, + + .width_mm = 63, + .height_mm = 84, + + .type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED, +}; + +static const struct st7701_panel_desc rg_arc_desc = { + .mode = &rg_arc_mode, + .lanes = 2, + .format = MIPI_DSI_FMT_RGB888, + .panel_sleep_delay = 80, + + .pv_gamma = { + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0x01) | + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC0_MASK, 0), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) | + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC4_MASK, 0x16), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) | + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC8_MASK, 0x1d), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC16_MASK, 0x0e), + + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) | + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC24_MASK, 0x12), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC52_MASK, 0x06), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC80_MASK, 0x0c), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC108_MASK, 0x0a), + + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC147_MASK, 0x09), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC175_MASK, 0x25), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC203_MASK, 0x00), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) | + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC231_MASK, 0x03), + + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC239_MASK, 0x00), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) | + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC247_MASK, 0x3f), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) | + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC251_MASK, 0x3f), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) | + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC255_MASK, 0x1c) + }, + .nv_gamma = { + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0x01) | + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC0_MASK, 0), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) | + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC4_MASK, 0x16), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) | + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC8_MASK, 0x1e), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC16_MASK, 0x0e), + + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) | + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC24_MASK, 0x11), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC52_MASK, 0x06), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC80_MASK, 0x0c), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC108_MASK, 0x08), + + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC147_MASK, 0x09), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC175_MASK, 0x26), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC203_MASK, 0x00), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) | + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC231_MASK, 0x15), + + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC239_MASK, 0x00), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) | + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC247_MASK, 0x3f), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) | + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC251_MASK, 0x3f), + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) | + CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC255_MASK, 0x1c) + }, + .nlinv = 0, + .vop_uv = 4500000, + .vcom_uv = 762500, + .vgh_mv = 15000, + .vgl_mv = -9510, + .avdd_mv = 6600, + .avcl_mv = -4400, + .gamma_op_bias = OP_BIAS_MIDDLE, + .input_op_bias = OP_BIAS_MIN, + .output_op_bias = OP_BIAS_MIN, + .t2d_ns = 1600, + .t3d_ns = 10400, + .eot_en = true, + .gip_sequence = rg_arc_gip_sequence, +}; + static int st7701_dsi_probe(struct mipi_dsi_device *dsi) { const struct st7701_panel_desc *desc; @@ -917,6 +1052,7 @@ static void st7701_dsi_remove(struct mipi_dsi_device *dsi) } static const struct of_device_id st7701_of_match[] = { + { .compatible = "anbernic,rg-arc-panel", .data = &rg_arc_desc }, { .compatible = "densitron,dmt028vghmcmi-1a", .data = &dmt028vghmcmi_1a_desc }, { .compatible = "elida,kd50t048a", .data = &kd50t048a_desc }, { .compatible = "techstar,ts8550b", .data = &ts8550b_desc }, diff --git a/drivers/gpu/drm/panel/panel-synaptics-r63353.c b/drivers/gpu/drm/panel/panel-synaptics-r63353.c new file mode 100644 index 000000000000..169c629746c7 --- /dev/null +++ b/drivers/gpu/drm/panel/panel-synaptics-r63353.c @@ -0,0 +1,362 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Synaptics R63353 Controller driver + * + * Copyright (C) 2020 BSH Hausgerate GmbH + */ + +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/media-bus-format.h> + +#include <linux/gpio/consumer.h> +#include <linux/regulator/consumer.h> + +#include <drm/drm_mipi_dsi.h> +#include <drm/drm_modes.h> +#include <drm/drm_panel.h> + +#include <video/mipi_display.h> + +#define R63353_INSTR(...) { \ + .len = sizeof((u8[]) {__VA_ARGS__}), \ + .data = (u8[]){__VA_ARGS__} \ + } + +struct r63353_instr { + size_t len; + const u8 *data; +}; + +static const struct r63353_instr sharp_ls068b3sx02_init[] = { + R63353_INSTR(0x51, 0xff), + R63353_INSTR(0x53, 0x0c), + R63353_INSTR(0x55, 0x00), + R63353_INSTR(0x84, 0x00), + R63353_INSTR(0x29), +}; + +struct r63353_desc { + const char *name; + const struct r63353_instr *init; + const size_t init_length; + const struct drm_display_mode *mode; + u32 width_mm; + u32 height_mm; +}; + +struct r63353_panel { + struct drm_panel base; + struct mipi_dsi_device *dsi; + + struct gpio_desc *reset_gpio; + struct regulator *dvdd; + struct regulator *avdd; + + struct r63353_desc *pdata; +}; + +static inline struct r63353_panel *to_r63353_panel(struct drm_panel *panel) +{ + return container_of(panel, struct r63353_panel, base); +} + +static int r63353_panel_power_on(struct r63353_panel *rpanel) +{ + struct mipi_dsi_device *dsi = rpanel->dsi; + struct device *dev = &dsi->dev; + int ret; + + ret = regulator_enable(rpanel->avdd); + if (ret) { + dev_err(dev, "Failed to enable avdd regulator (%d)\n", ret); + return ret; + } + + usleep_range(15000, 25000); + + ret = regulator_enable(rpanel->dvdd); + if (ret) { + dev_err(dev, "Failed to enable dvdd regulator (%d)\n", ret); + regulator_disable(rpanel->avdd); + return ret; + } + + usleep_range(300000, 350000); + gpiod_set_value(rpanel->reset_gpio, 1); + usleep_range(15000, 25000); + + return 0; +} + +static int r63353_panel_power_off(struct r63353_panel *rpanel) +{ + gpiod_set_value(rpanel->reset_gpio, 0); + regulator_disable(rpanel->dvdd); + regulator_disable(rpanel->avdd); + + return 0; +} + +static int r63353_panel_activate(struct r63353_panel *rpanel) +{ + struct mipi_dsi_device *dsi = rpanel->dsi; + struct device *dev = &dsi->dev; + int i, ret; + + ret = mipi_dsi_dcs_soft_reset(dsi); + if (ret < 0) { + dev_err(dev, "Failed to do Software Reset (%d)\n", ret); + goto fail; + } + + usleep_range(15000, 17000); + + ret = mipi_dsi_dcs_enter_sleep_mode(dsi); + if (ret < 0) { + dev_err(dev, "Failed to enter sleep mode (%d)\n", ret); + goto fail; + } + + for (i = 0; i < rpanel->pdata->init_length; i++) { + const struct r63353_instr *instr = &rpanel->pdata->init[i]; + + ret = mipi_dsi_dcs_write_buffer(dsi, instr->data, instr->len); + if (ret < 0) + goto fail; + } + + msleep(120); + + ret = mipi_dsi_dcs_exit_sleep_mode(dsi); + if (ret < 0) { + dev_err(dev, "Failed to exit sleep mode (%d)\n", ret); + goto fail; + } + + usleep_range(5000, 10000); + + ret = mipi_dsi_dcs_set_display_on(dsi); + if (ret < 0) { + dev_err(dev, "Failed to set display ON (%d)\n", ret); + goto fail; + } + + return 0; + +fail: + gpiod_set_value(rpanel->reset_gpio, 0); + + return ret; +} + +static int r63353_panel_prepare(struct drm_panel *panel) +{ + struct r63353_panel *rpanel = to_r63353_panel(panel); + struct mipi_dsi_device *dsi = rpanel->dsi; + struct device *dev = &dsi->dev; + int ret; + + dev_dbg(dev, "Preparing\n"); + + ret = r63353_panel_power_on(rpanel); + if (ret) + return ret; + + ret = r63353_panel_activate(rpanel); + if (ret) { + r63353_panel_power_off(rpanel); + return ret; + } + + dev_dbg(dev, "Prepared\n"); + return 0; +} + +static int r63353_panel_deactivate(struct r63353_panel *rpanel) +{ + struct mipi_dsi_device *dsi = rpanel->dsi; + struct device *dev = &dsi->dev; + int ret; + + ret = mipi_dsi_dcs_set_display_off(dsi); + if (ret < 0) { + dev_err(dev, "Failed to set display OFF (%d)\n", ret); + return ret; + } + + usleep_range(5000, 10000); + + ret = mipi_dsi_dcs_enter_sleep_mode(dsi); + if (ret < 0) { + dev_err(dev, "Failed to enter sleep mode (%d)\n", ret); + return ret; + } + + return 0; +} + +static int r63353_panel_unprepare(struct drm_panel *panel) +{ + struct r63353_panel *rpanel = to_r63353_panel(panel); + + r63353_panel_deactivate(rpanel); + r63353_panel_power_off(rpanel); + + return 0; +} + +static const struct drm_display_mode sharp_ls068b3sx02_timing = { + .clock = 70000, + .hdisplay = 640, + .hsync_start = 640 + 35, + .hsync_end = 640 + 35 + 2, + .htotal = 640 + 35 + 2 + 150, + .vdisplay = 1280, + .vsync_start = 1280 + 2, + .vsync_end = 1280 + 2 + 4, + .vtotal = 1280 + 2 + 4 + 0, +}; + +static int r63353_panel_get_modes(struct drm_panel *panel, + struct drm_connector *connector) +{ + struct r63353_panel *rpanel = to_r63353_panel(panel); + struct drm_display_mode *mode; + static const u32 bus_format = MEDIA_BUS_FMT_RGB888_1X24; + + mode = drm_mode_duplicate(connector->dev, rpanel->pdata->mode); + if (!mode) + return -ENOMEM; + + drm_mode_set_name(mode); + drm_mode_probed_add(connector, mode); + + mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED; + connector->display_info.width_mm = rpanel->pdata->width_mm; + connector->display_info.height_mm = rpanel->pdata->height_mm; + + drm_display_info_set_bus_formats(&connector->display_info, + &bus_format, 1); + + return 1; +} + +static const struct drm_panel_funcs r63353_panel_funcs = { + .prepare = r63353_panel_prepare, + .unprepare = r63353_panel_unprepare, + .get_modes = r63353_panel_get_modes, +}; + +static int r63353_panel_probe(struct mipi_dsi_device *dsi) +{ + int ret = 0; + struct device *dev = &dsi->dev; + struct r63353_panel *panel; + + panel = devm_kzalloc(&dsi->dev, sizeof(*panel), GFP_KERNEL); + if (!panel) + return -ENOMEM; + + mipi_dsi_set_drvdata(dsi, panel); + panel->dsi = dsi; + panel->pdata = (struct r63353_desc *)of_device_get_match_data(dev); + + dev_info(dev, "Panel %s\n", panel->pdata->name); + + dsi->lanes = 2; + dsi->format = MIPI_DSI_FMT_RGB888; + dsi->mode_flags = MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_VIDEO | + MIPI_DSI_CLOCK_NON_CONTINUOUS | MIPI_DSI_MODE_LPM | + MIPI_DSI_MODE_VIDEO_SYNC_PULSE | MIPI_DSI_MODE_NO_EOT_PACKET; + + panel->dvdd = devm_regulator_get(dev, "dvdd"); + if (IS_ERR(panel->dvdd)) + return PTR_ERR(panel->dvdd); + panel->avdd = devm_regulator_get(dev, "avdd"); + if (IS_ERR(panel->avdd)) + return PTR_ERR(panel->avdd); + + panel->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW); + if (IS_ERR(panel->reset_gpio)) { + dev_err(dev, "failed to get RESET GPIO\n"); + return PTR_ERR(panel->reset_gpio); + } + + drm_panel_init(&panel->base, dev, &r63353_panel_funcs, + DRM_MODE_CONNECTOR_DSI); + + panel->base.prepare_prev_first = true; + ret = drm_panel_of_backlight(&panel->base); + if (ret) + return ret; + + drm_panel_add(&panel->base); + + ret = mipi_dsi_attach(dsi); + if (ret < 0) { + dev_err(dev, "mipi_dsi_attach failed: %d\n", ret); + drm_panel_remove(&panel->base); + return ret; + } + + return ret; +} + +static void r63353_panel_remove(struct mipi_dsi_device *dsi) +{ + struct r63353_panel *rpanel = mipi_dsi_get_drvdata(dsi); + struct device *dev = &dsi->dev; + int ret; + + ret = mipi_dsi_detach(dsi); + if (ret < 0) + dev_err(dev, "Failed to detach from host (%d)\n", ret); + + drm_panel_remove(&rpanel->base); +} + +static void r63353_panel_shutdown(struct mipi_dsi_device *dsi) +{ + struct r63353_panel *rpanel = mipi_dsi_get_drvdata(dsi); + + r63353_panel_unprepare(&rpanel->base); +} + +static const struct r63353_desc sharp_ls068b3sx02_data = { + .name = "Sharp LS068B3SX02", + .mode = &sharp_ls068b3sx02_timing, + .init = sharp_ls068b3sx02_init, + .init_length = ARRAY_SIZE(sharp_ls068b3sx02_init), + .width_mm = 68, + .height_mm = 159, +}; + +static const struct of_device_id r63353_of_match[] = { + { .compatible = "sharp,ls068b3sx02", .data = &sharp_ls068b3sx02_data }, + { } +}; + +MODULE_DEVICE_TABLE(of, r63353_of_match); + +static struct mipi_dsi_driver r63353_panel_driver = { + .driver = { + .name = "r63353-dsi", + .of_match_table = r63353_of_match, + }, + .probe = r63353_panel_probe, + .remove = r63353_panel_remove, + .shutdown = r63353_panel_shutdown, +}; + +module_mipi_dsi_driver(r63353_panel_driver); + +MODULE_AUTHOR("Matthias Proske <Matthias.Proske@bshg.com>"); +MODULE_AUTHOR("Michael Trimarchi <michael@amarulasolutions.com>"); +MODULE_DESCRIPTION("Synaptics R63353 Controller Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c index 28f7046e1b1a..a45e4addcc19 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.c +++ b/drivers/gpu/drm/panfrost/panfrost_device.c @@ -403,7 +403,7 @@ void panfrost_device_reset(struct panfrost_device *pfdev) panfrost_job_enable_interrupts(pfdev); } -static int panfrost_device_resume(struct device *dev) +static int panfrost_device_runtime_resume(struct device *dev) { struct panfrost_device *pfdev = dev_get_drvdata(dev); @@ -413,7 +413,7 @@ static int panfrost_device_resume(struct device *dev) return 0; } -static int panfrost_device_suspend(struct device *dev) +static int panfrost_device_runtime_suspend(struct device *dev) { struct panfrost_device *pfdev = dev_get_drvdata(dev); @@ -421,10 +421,83 @@ static int panfrost_device_suspend(struct device *dev) return -EBUSY; panfrost_devfreq_suspend(pfdev); + panfrost_job_suspend_irq(pfdev); + panfrost_mmu_suspend_irq(pfdev); + panfrost_gpu_suspend_irq(pfdev); panfrost_gpu_power_off(pfdev); return 0; } -EXPORT_GPL_RUNTIME_DEV_PM_OPS(panfrost_pm_ops, panfrost_device_suspend, - panfrost_device_resume, NULL); +static int panfrost_device_resume(struct device *dev) +{ + struct panfrost_device *pfdev = dev_get_drvdata(dev); + int ret; + + if (pfdev->comp->pm_features & BIT(GPU_PM_VREG_OFF)) { + unsigned long freq = pfdev->pfdevfreq.fast_rate; + struct dev_pm_opp *opp; + + opp = dev_pm_opp_find_freq_ceil(dev, &freq); + if (IS_ERR(opp)) + return PTR_ERR(opp); + dev_pm_opp_set_opp(dev, opp); + dev_pm_opp_put(opp); + } + + if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS)) { + ret = clk_enable(pfdev->clock); + if (ret) + goto err_clk; + + if (pfdev->bus_clock) { + ret = clk_enable(pfdev->bus_clock); + if (ret) + goto err_bus_clk; + } + } + + ret = pm_runtime_force_resume(dev); + if (ret) + goto err_resume; + + return 0; + +err_resume: + if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS) && pfdev->bus_clock) + clk_disable(pfdev->bus_clock); +err_bus_clk: + if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS)) + clk_disable(pfdev->clock); +err_clk: + if (pfdev->comp->pm_features & BIT(GPU_PM_VREG_OFF)) + dev_pm_opp_set_opp(dev, NULL); + return ret; +} + +static int panfrost_device_suspend(struct device *dev) +{ + struct panfrost_device *pfdev = dev_get_drvdata(dev); + int ret; + + ret = pm_runtime_force_suspend(dev); + if (ret) + return ret; + + if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS)) { + if (pfdev->bus_clock) + clk_disable(pfdev->bus_clock); + + clk_disable(pfdev->clock); + } + + if (pfdev->comp->pm_features & BIT(GPU_PM_VREG_OFF)) + dev_pm_opp_set_opp(dev, NULL); + + return 0; +} + +EXPORT_GPL_DEV_PM_OPS(panfrost_pm_ops) = { + RUNTIME_PM_OPS(panfrost_device_runtime_suspend, panfrost_device_runtime_resume, NULL) + SYSTEM_SLEEP_PM_OPS(panfrost_device_suspend, panfrost_device_resume) +}; diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h index 1ef38f60d5dc..62f7e3527385 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.h +++ b/drivers/gpu/drm/panfrost/panfrost_device.h @@ -25,6 +25,23 @@ struct panfrost_perfcnt; #define NUM_JOB_SLOTS 3 #define MAX_PM_DOMAINS 5 +enum panfrost_drv_comp_bits { + PANFROST_COMP_BIT_GPU, + PANFROST_COMP_BIT_JOB, + PANFROST_COMP_BIT_MMU, + PANFROST_COMP_BIT_MAX +}; + +/** + * enum panfrost_gpu_pm - Supported kernel power management features + * @GPU_PM_CLK_DIS: Allow disabling clocks during system suspend + * @GPU_PM_VREG_OFF: Allow turning off regulators during system suspend + */ +enum panfrost_gpu_pm { + GPU_PM_CLK_DIS, + GPU_PM_VREG_OFF, +}; + struct panfrost_features { u16 id; u16 revision; @@ -75,12 +92,17 @@ struct panfrost_compatible { /* Vendor implementation quirks callback */ void (*vendor_quirk)(struct panfrost_device *pfdev); + + /* Allowed PM features */ + u8 pm_features; }; struct panfrost_device { struct device *dev; struct drm_device *ddev; struct platform_device *pdev; + int gpu_irq; + int mmu_irq; void __iomem *iomem; struct clk *clock; @@ -94,6 +116,7 @@ struct panfrost_device { struct panfrost_features features; const struct panfrost_compatible *comp; + DECLARE_BITMAP(is_suspended, PANFROST_COMP_BIT_MAX); spinlock_t as_lock; unsigned long as_in_use_mask; diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 7cabf4e3d1f2..a926d71e8131 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -274,7 +274,7 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data, ret = drm_sched_job_init(&job->base, &file_priv->sched_entity[slot], - NULL); + 1, NULL); if (ret) goto out_put_job; @@ -734,6 +734,7 @@ static const struct panfrost_compatible mediatek_mt8183_b_data = { .supply_names = mediatek_mt8183_b_supplies, .num_pm_domains = ARRAY_SIZE(mediatek_mt8183_pm_domains), .pm_domain_names = mediatek_mt8183_pm_domains, + .pm_features = BIT(GPU_PM_CLK_DIS) | BIT(GPU_PM_VREG_OFF), }; static const char * const mediatek_mt8186_pm_domains[] = { "core0", "core1" }; @@ -742,6 +743,7 @@ static const struct panfrost_compatible mediatek_mt8186_data = { .supply_names = mediatek_mt8183_b_supplies, .num_pm_domains = ARRAY_SIZE(mediatek_mt8186_pm_domains), .pm_domain_names = mediatek_mt8186_pm_domains, + .pm_features = BIT(GPU_PM_CLK_DIS) | BIT(GPU_PM_VREG_OFF), }; static const char * const mediatek_mt8192_supplies[] = { "mali", NULL }; @@ -752,6 +754,7 @@ static const struct panfrost_compatible mediatek_mt8192_data = { .supply_names = mediatek_mt8192_supplies, .num_pm_domains = ARRAY_SIZE(mediatek_mt8192_pm_domains), .pm_domain_names = mediatek_mt8192_pm_domains, + .pm_features = BIT(GPU_PM_CLK_DIS) | BIT(GPU_PM_VREG_OFF), }; static const struct of_device_id dt_match[] = { diff --git a/drivers/gpu/drm/panfrost/panfrost_dump.c b/drivers/gpu/drm/panfrost/panfrost_dump.c index e7942ac449c6..47751302f1bc 100644 --- a/drivers/gpu/drm/panfrost/panfrost_dump.c +++ b/drivers/gpu/drm/panfrost/panfrost_dump.c @@ -220,16 +220,8 @@ void panfrost_core_dump(struct panfrost_job *job) iter.hdr->bomap.data[0] = bomap - bomap_start; - for_each_sgtable_page(bo->base.sgt, &page_iter, 0) { - struct page *page = sg_page_iter_page(&page_iter); - - if (!IS_ERR(page)) { - *bomap++ = page_to_phys(page); - } else { - dev_err(pfdev->dev, "Panfrost Dump: wrong page\n"); - *bomap++ = 0; - } - } + for_each_sgtable_page(bo->base.sgt, &page_iter, 0) + *bomap++ = page_to_phys(sg_page_iter_page(&page_iter)); iter.hdr->bomap.iova = mapping->mmnode.start << PAGE_SHIFT; diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c index f0be7e19b13e..9063ce254642 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c @@ -22,9 +22,13 @@ static irqreturn_t panfrost_gpu_irq_handler(int irq, void *data) { struct panfrost_device *pfdev = data; - u32 state = gpu_read(pfdev, GPU_INT_STAT); - u32 fault_status = gpu_read(pfdev, GPU_FAULT_STATUS); + u32 fault_status, state; + if (test_bit(PANFROST_COMP_BIT_GPU, pfdev->is_suspended)) + return IRQ_NONE; + + fault_status = gpu_read(pfdev, GPU_FAULT_STATUS); + state = gpu_read(pfdev, GPU_INT_STAT); if (!state) return IRQ_NONE; @@ -60,18 +64,32 @@ int panfrost_gpu_soft_reset(struct panfrost_device *pfdev) gpu_write(pfdev, GPU_INT_MASK, 0); gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_RESET_COMPLETED); - gpu_write(pfdev, GPU_CMD, GPU_CMD_SOFT_RESET); + clear_bit(PANFROST_COMP_BIT_GPU, pfdev->is_suspended); + + gpu_write(pfdev, GPU_CMD, GPU_CMD_SOFT_RESET); ret = readl_relaxed_poll_timeout(pfdev->iomem + GPU_INT_RAWSTAT, - val, val & GPU_IRQ_RESET_COMPLETED, 100, 10000); + val, val & GPU_IRQ_RESET_COMPLETED, 10, 10000); if (ret) { - dev_err(pfdev->dev, "gpu soft reset timed out\n"); - return ret; + dev_err(pfdev->dev, "gpu soft reset timed out, attempting hard reset\n"); + + gpu_write(pfdev, GPU_CMD, GPU_CMD_HARD_RESET); + ret = readl_relaxed_poll_timeout(pfdev->iomem + GPU_INT_RAWSTAT, val, + val & GPU_IRQ_RESET_COMPLETED, 100, 10000); + if (ret) { + dev_err(pfdev->dev, "gpu hard reset timed out\n"); + return ret; + } } gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_MASK_ALL); - gpu_write(pfdev, GPU_INT_MASK, GPU_IRQ_MASK_ALL); + + /* Only enable the interrupts we care about */ + gpu_write(pfdev, GPU_INT_MASK, + GPU_IRQ_MASK_ERROR | + GPU_IRQ_PERFCNT_SAMPLE_COMPLETED | + GPU_IRQ_CLEAN_CACHES_COMPLETED); /* * All in-flight jobs should have released their cycle @@ -362,32 +380,42 @@ unsigned long long panfrost_cycle_counter_read(struct panfrost_device *pfdev) return ((u64)hi << 32) | lo; } +static u64 panfrost_get_core_mask(struct panfrost_device *pfdev) +{ + u64 core_mask; + + if (pfdev->features.l2_present == 1) + return U64_MAX; + + /* + * Only support one core group now. + * ~(l2_present - 1) unsets all bits in l2_present except + * the bottom bit. (l2_present - 2) has all the bits in + * the first core group set. AND them together to generate + * a mask of cores in the first core group. + */ + core_mask = ~(pfdev->features.l2_present - 1) & + (pfdev->features.l2_present - 2); + dev_info_once(pfdev->dev, "using only 1st core group (%lu cores from %lu)\n", + hweight64(core_mask), + hweight64(pfdev->features.shader_present)); + + return core_mask; +} + void panfrost_gpu_power_on(struct panfrost_device *pfdev) { int ret; u32 val; - u64 core_mask = U64_MAX; + u64 core_mask; panfrost_gpu_init_quirks(pfdev); + core_mask = panfrost_get_core_mask(pfdev); - if (pfdev->features.l2_present != 1) { - /* - * Only support one core group now. - * ~(l2_present - 1) unsets all bits in l2_present except - * the bottom bit. (l2_present - 2) has all the bits in - * the first core group set. AND them together to generate - * a mask of cores in the first core group. - */ - core_mask = ~(pfdev->features.l2_present - 1) & - (pfdev->features.l2_present - 2); - dev_info_once(pfdev->dev, "using only 1st core group (%lu cores from %lu)\n", - hweight64(core_mask), - hweight64(pfdev->features.shader_present)); - } gpu_write(pfdev, L2_PWRON_LO, pfdev->features.l2_present & core_mask); ret = readl_relaxed_poll_timeout(pfdev->iomem + L2_READY_LO, val, val == (pfdev->features.l2_present & core_mask), - 100, 20000); + 10, 20000); if (ret) dev_err(pfdev->dev, "error powering up gpu L2"); @@ -395,27 +423,52 @@ void panfrost_gpu_power_on(struct panfrost_device *pfdev) pfdev->features.shader_present & core_mask); ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_READY_LO, val, val == (pfdev->features.shader_present & core_mask), - 100, 20000); + 10, 20000); if (ret) dev_err(pfdev->dev, "error powering up gpu shader"); gpu_write(pfdev, TILER_PWRON_LO, pfdev->features.tiler_present); ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_READY_LO, - val, val == pfdev->features.tiler_present, 100, 1000); + val, val == pfdev->features.tiler_present, 10, 1000); if (ret) dev_err(pfdev->dev, "error powering up gpu tiler"); } void panfrost_gpu_power_off(struct panfrost_device *pfdev) { - gpu_write(pfdev, TILER_PWROFF_LO, 0); - gpu_write(pfdev, SHADER_PWROFF_LO, 0); - gpu_write(pfdev, L2_PWROFF_LO, 0); + int ret; + u32 val; + + gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present); + ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_PWRTRANS_LO, + val, !val, 1, 1000); + if (ret) + dev_err(pfdev->dev, "shader power transition timeout"); + + gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present); + ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_PWRTRANS_LO, + val, !val, 1, 1000); + if (ret) + dev_err(pfdev->dev, "tiler power transition timeout"); + + gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present); + ret = readl_poll_timeout(pfdev->iomem + L2_PWRTRANS_LO, + val, !val, 0, 1000); + if (ret) + dev_err(pfdev->dev, "l2 power transition timeout"); +} + +void panfrost_gpu_suspend_irq(struct panfrost_device *pfdev) +{ + set_bit(PANFROST_COMP_BIT_GPU, pfdev->is_suspended); + + gpu_write(pfdev, GPU_INT_MASK, 0); + synchronize_irq(pfdev->gpu_irq); } int panfrost_gpu_init(struct panfrost_device *pfdev) { - int err, irq; + int err; err = panfrost_gpu_soft_reset(pfdev); if (err) @@ -430,11 +483,11 @@ int panfrost_gpu_init(struct panfrost_device *pfdev) dma_set_max_seg_size(pfdev->dev, UINT_MAX); - irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "gpu"); - if (irq < 0) - return irq; + pfdev->gpu_irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "gpu"); + if (pfdev->gpu_irq < 0) + return pfdev->gpu_irq; - err = devm_request_irq(pfdev->dev, irq, panfrost_gpu_irq_handler, + err = devm_request_irq(pfdev->dev, pfdev->gpu_irq, panfrost_gpu_irq_handler, IRQF_SHARED, KBUILD_MODNAME "-gpu", pfdev); if (err) { dev_err(pfdev->dev, "failed to request gpu irq"); diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.h b/drivers/gpu/drm/panfrost/panfrost_gpu.h index 876fdad9f721..d841b86504ea 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gpu.h +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.h @@ -15,6 +15,7 @@ u32 panfrost_gpu_get_latest_flush_id(struct panfrost_device *pfdev); int panfrost_gpu_soft_reset(struct panfrost_device *pfdev); void panfrost_gpu_power_on(struct panfrost_device *pfdev); void panfrost_gpu_power_off(struct panfrost_device *pfdev); +void panfrost_gpu_suspend_irq(struct panfrost_device *pfdev); void panfrost_cycle_counter_get(struct panfrost_device *pfdev); void panfrost_cycle_counter_put(struct panfrost_device *pfdev); diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index ecd2e035147f..0c2dbf6ef2a5 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -405,6 +405,8 @@ void panfrost_job_enable_interrupts(struct panfrost_device *pfdev) int j; u32 irq_mask = 0; + clear_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended); + for (j = 0; j < NUM_JOB_SLOTS; j++) { irq_mask |= MK_JS_MASK(j); } @@ -413,6 +415,14 @@ void panfrost_job_enable_interrupts(struct panfrost_device *pfdev) job_write(pfdev, JOB_INT_MASK, irq_mask); } +void panfrost_job_suspend_irq(struct panfrost_device *pfdev) +{ + set_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended); + + job_write(pfdev, JOB_INT_MASK, 0); + synchronize_irq(pfdev->js->irq); +} + static void panfrost_job_handle_err(struct panfrost_device *pfdev, struct panfrost_job *job, unsigned int js) @@ -792,17 +802,25 @@ static irqreturn_t panfrost_job_irq_handler_thread(int irq, void *data) struct panfrost_device *pfdev = data; panfrost_job_handle_irqs(pfdev); - job_write(pfdev, JOB_INT_MASK, - GENMASK(16 + NUM_JOB_SLOTS - 1, 16) | - GENMASK(NUM_JOB_SLOTS - 1, 0)); + + /* Enable interrupts only if we're not about to get suspended */ + if (!test_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended)) + job_write(pfdev, JOB_INT_MASK, + GENMASK(16 + NUM_JOB_SLOTS - 1, 16) | + GENMASK(NUM_JOB_SLOTS - 1, 0)); + return IRQ_HANDLED; } static irqreturn_t panfrost_job_irq_handler(int irq, void *data) { struct panfrost_device *pfdev = data; - u32 status = job_read(pfdev, JOB_INT_STAT); + u32 status; + + if (test_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended)) + return IRQ_NONE; + status = job_read(pfdev, JOB_INT_STAT); if (!status) return IRQ_NONE; @@ -852,7 +870,7 @@ int panfrost_job_init(struct panfrost_device *pfdev) js->queue[j].fence_context = dma_fence_context_alloc(1); ret = drm_sched_init(&js->queue[j].sched, - &panfrost_sched_ops, + &panfrost_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, nentries, 0, msecs_to_jiffies(JOB_TIMEOUT_MS), @@ -963,7 +981,7 @@ int panfrost_job_is_idle(struct panfrost_device *pfdev) for (i = 0; i < NUM_JOB_SLOTS; i++) { /* If there are any jobs in the HW queue, we're not idle */ - if (atomic_read(&js->queue[i].sched.hw_rq_count)) + if (atomic_read(&js->queue[i].sched.credit_count)) return false; } diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h index 17ff808dba07..ec581b97852b 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.h +++ b/drivers/gpu/drm/panfrost/panfrost_job.h @@ -47,6 +47,7 @@ int panfrost_job_get_slot(struct panfrost_job *job); int panfrost_job_push(struct panfrost_job *job); void panfrost_job_put(struct panfrost_job *job); void panfrost_job_enable_interrupts(struct panfrost_device *pfdev); +void panfrost_job_suspend_irq(struct panfrost_device *pfdev); int panfrost_job_is_idle(struct panfrost_device *pfdev); #endif diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index 846dd697c410..f38385fe76bb 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -231,6 +231,8 @@ void panfrost_mmu_reset(struct panfrost_device *pfdev) { struct panfrost_mmu *mmu, *mmu_tmp; + clear_bit(PANFROST_COMP_BIT_MMU, pfdev->is_suspended); + spin_lock(&pfdev->as_lock); pfdev->as_alloc_mask = 0; @@ -670,6 +672,9 @@ static irqreturn_t panfrost_mmu_irq_handler(int irq, void *data) { struct panfrost_device *pfdev = data; + if (test_bit(PANFROST_COMP_BIT_MMU, pfdev->is_suspended)) + return IRQ_NONE; + if (!mmu_read(pfdev, MMU_INT_STAT)) return IRQ_NONE; @@ -744,22 +749,25 @@ static irqreturn_t panfrost_mmu_irq_handler_thread(int irq, void *data) status = mmu_read(pfdev, MMU_INT_RAWSTAT) & ~pfdev->as_faulty_mask; } - spin_lock(&pfdev->as_lock); - mmu_write(pfdev, MMU_INT_MASK, ~pfdev->as_faulty_mask); - spin_unlock(&pfdev->as_lock); + /* Enable interrupts only if we're not about to get suspended */ + if (!test_bit(PANFROST_COMP_BIT_MMU, pfdev->is_suspended)) { + spin_lock(&pfdev->as_lock); + mmu_write(pfdev, MMU_INT_MASK, ~pfdev->as_faulty_mask); + spin_unlock(&pfdev->as_lock); + } return IRQ_HANDLED; }; int panfrost_mmu_init(struct panfrost_device *pfdev) { - int err, irq; + int err; - irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "mmu"); - if (irq < 0) - return irq; + pfdev->mmu_irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "mmu"); + if (pfdev->mmu_irq < 0) + return pfdev->mmu_irq; - err = devm_request_threaded_irq(pfdev->dev, irq, + err = devm_request_threaded_irq(pfdev->dev, pfdev->mmu_irq, panfrost_mmu_irq_handler, panfrost_mmu_irq_handler_thread, IRQF_SHARED, KBUILD_MODNAME "-mmu", @@ -777,3 +785,11 @@ void panfrost_mmu_fini(struct panfrost_device *pfdev) { mmu_write(pfdev, MMU_INT_MASK, 0); } + +void panfrost_mmu_suspend_irq(struct panfrost_device *pfdev) +{ + set_bit(PANFROST_COMP_BIT_MMU, pfdev->is_suspended); + + mmu_write(pfdev, MMU_INT_MASK, 0); + synchronize_irq(pfdev->mmu_irq); +} diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.h b/drivers/gpu/drm/panfrost/panfrost_mmu.h index cc2a0d307feb..022a9a74a114 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.h +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.h @@ -14,6 +14,7 @@ void panfrost_mmu_unmap(struct panfrost_gem_mapping *mapping); int panfrost_mmu_init(struct panfrost_device *pfdev); void panfrost_mmu_fini(struct panfrost_device *pfdev); void panfrost_mmu_reset(struct panfrost_device *pfdev); +void panfrost_mmu_suspend_irq(struct panfrost_device *pfdev); u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu); void panfrost_mmu_as_put(struct panfrost_device *pfdev, struct panfrost_mmu *mmu); diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h index 55ec807550b3..c25743b05c55 100644 --- a/drivers/gpu/drm/panfrost/panfrost_regs.h +++ b/drivers/gpu/drm/panfrost/panfrost_regs.h @@ -44,6 +44,7 @@ GPU_IRQ_MULTIPLE_FAULT) #define GPU_CMD 0x30 #define GPU_CMD_SOFT_RESET 0x01 +#define GPU_CMD_HARD_RESET 0x02 #define GPU_CMD_PERFCNT_CLEAR 0x03 #define GPU_CMD_PERFCNT_SAMPLE 0x04 #define GPU_CMD_CYCLE_COUNT_START 0x05 diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 404b0483bb7c..c6d35c33d5d6 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -485,7 +485,6 @@ static int qxl_primary_atomic_check(struct drm_plane *plane, static int qxl_primary_apply_cursor(struct qxl_device *qdev, struct drm_plane_state *plane_state) { - struct drm_framebuffer *fb = plane_state->fb; struct qxl_crtc *qcrtc = to_qxl_crtc(plane_state->crtc); struct qxl_cursor_cmd *cmd; struct qxl_release *release; @@ -510,8 +509,8 @@ static int qxl_primary_apply_cursor(struct qxl_device *qdev, cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release); cmd->type = QXL_CURSOR_SET; - cmd->u.set.position.x = plane_state->crtc_x + fb->hot_x; - cmd->u.set.position.y = plane_state->crtc_y + fb->hot_y; + cmd->u.set.position.x = plane_state->crtc_x + plane_state->hotspot_x; + cmd->u.set.position.y = plane_state->crtc_y + plane_state->hotspot_y; cmd->u.set.shape = qxl_bo_physical_address(qdev, qcrtc->cursor_bo, 0); @@ -531,7 +530,6 @@ out_free_release: static int qxl_primary_move_cursor(struct qxl_device *qdev, struct drm_plane_state *plane_state) { - struct drm_framebuffer *fb = plane_state->fb; struct qxl_crtc *qcrtc = to_qxl_crtc(plane_state->crtc); struct qxl_cursor_cmd *cmd; struct qxl_release *release; @@ -554,8 +552,8 @@ static int qxl_primary_move_cursor(struct qxl_device *qdev, cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release); cmd->type = QXL_CURSOR_MOVE; - cmd->u.position.x = plane_state->crtc_x + fb->hot_x; - cmd->u.position.y = plane_state->crtc_y + fb->hot_y; + cmd->u.position.x = plane_state->crtc_x + plane_state->hotspot_x; + cmd->u.position.y = plane_state->crtc_y + plane_state->hotspot_y; qxl_release_unmap(qdev, release, &cmd->release_info); qxl_release_fence_buffer_objects(release); @@ -851,8 +849,8 @@ static int qxl_plane_prepare_fb(struct drm_plane *plane, struct qxl_bo *old_cursor_bo = qcrtc->cursor_bo; qcrtc->cursor_bo = qxl_create_cursor(qdev, user_bo, - new_state->fb->hot_x, - new_state->fb->hot_y); + new_state->hotspot_x, + new_state->hotspot_y); qxl_free_cursor(old_cursor_bo); } diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c index 46de4f171970..beee5563031a 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.c +++ b/drivers/gpu/drm/qxl/qxl_drv.c @@ -285,7 +285,7 @@ static const struct drm_ioctl_desc qxl_ioctls[] = { }; static struct drm_driver qxl_driver = { - .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, + .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_CURSOR_HOTSPOT, .dumb_create = qxl_mode_dumb_create, .dumb_map_offset = drm_gem_ttm_dumb_map_offset, diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index 307a890fde13..32069acd93f8 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -119,7 +119,6 @@ struct qxl_output { #define to_qxl_crtc(x) container_of(x, struct qxl_crtc, base) #define drm_connector_to_qxl_output(x) container_of(x, struct qxl_output, base) -#define drm_encoder_to_qxl_output(x) container_of(x, struct qxl_output, enc) struct qxl_mman { struct ttm_device bdev; @@ -256,8 +255,6 @@ struct qxl_device { #define to_qxl(dev) container_of(dev, struct qxl_device, ddev) -int qxl_debugfs_fence_init(struct qxl_device *rdev); - int qxl_device_init(struct qxl_device *qdev, struct pci_dev *pdev); void qxl_device_fini(struct qxl_device *qdev); @@ -344,8 +341,6 @@ qxl_image_alloc_objects(struct qxl_device *qdev, int height, int stride); void qxl_image_free_objects(struct qxl_device *qdev, struct qxl_drm_image *dimage); -void qxl_update_screen(struct qxl_device *qxl); - /* qxl io operations (qxl_cmd.c) */ void qxl_io_create_primary(struct qxl_device *qdev, @@ -445,8 +440,6 @@ int qxl_hw_surface_dealloc(struct qxl_device *qdev, int qxl_bo_check_id(struct qxl_device *qdev, struct qxl_bo *bo); -struct qxl_drv_surface * -qxl_surface_lookup(struct drm_device *dev, int surface_id); void qxl_surface_evict(struct qxl_device *qdev, struct qxl_bo *surf, bool freeing); /* qxl_ioctl.c */ diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 4aca09cab4b8..6e537c5bd295 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -29,6 +29,7 @@ #include <linux/pci.h> #include <drm/drm_crtc_helper.h> +#include <drm/drm_edid.h> #include <drm/drm_file.h> #include <drm/drm_modeset_helper_vtables.h> #include <drm/radeon_drm.h> diff --git a/drivers/gpu/drm/radeon/clearstate_evergreen.h b/drivers/gpu/drm/radeon/clearstate_evergreen.h index 63a1ffbb3ced..3b645558f133 100644 --- a/drivers/gpu/drm/radeon/clearstate_evergreen.h +++ b/drivers/gpu/drm/radeon/clearstate_evergreen.h @@ -1049,7 +1049,7 @@ static const struct cs_extent_def SECT_CONTEXT_defs[] = {SECT_CONTEXT_def_5, 0x0000a29e, 5 }, {SECT_CONTEXT_def_6, 0x0000a2a5, 56 }, {SECT_CONTEXT_def_7, 0x0000a2de, 290 }, - { 0, 0, 0 } + { NULL, 0, 0 } }; static const u32 SECT_CLEAR_def_1[] = { @@ -1060,7 +1060,7 @@ static const u32 SECT_CLEAR_def_1[] = static const struct cs_extent_def SECT_CLEAR_defs[] = { {SECT_CLEAR_def_1, 0x0000ffc0, 3 }, - { 0, 0, 0 } + { NULL, 0, 0 } }; static const u32 SECT_CTRLCONST_def_1[] = { @@ -1070,11 +1070,11 @@ static const u32 SECT_CTRLCONST_def_1[] = static const struct cs_extent_def SECT_CTRLCONST_defs[] = { {SECT_CTRLCONST_def_1, 0x0000f3fc, 2 }, - { 0, 0, 0 } + { NULL, 0, 0 } }; static const struct cs_section_def evergreen_cs_data[] = { { SECT_CONTEXT_defs, SECT_CONTEXT }, { SECT_CLEAR_defs, SECT_CLEAR }, { SECT_CTRLCONST_defs, SECT_CTRLCONST }, - { 0, SECT_NONE } + { NULL, SECT_NONE } }; diff --git a/drivers/gpu/drm/radeon/dce3_1_afmt.c b/drivers/gpu/drm/radeon/dce3_1_afmt.c index e8fe239b9d79..324e9b765098 100644 --- a/drivers/gpu/drm/radeon/dce3_1_afmt.c +++ b/drivers/gpu/drm/radeon/dce3_1_afmt.c @@ -21,6 +21,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ #include <linux/hdmi.h> +#include <drm/drm_edid.h> #include "radeon.h" #include "radeon_asic.h" diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index 4a1d5447eac1..4c06f47453fd 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -21,6 +21,7 @@ * */ #include <linux/hdmi.h> +#include <drm/drm_edid.h> #include "dce6_afmt.h" #include "radeon.h" diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index f0ae087be914..a424b86008b8 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -26,6 +26,7 @@ #include <linux/pci.h> #include <linux/slab.h> +#include <drm/drm_edid.h> #include <drm/drm_vblank.h> #include <drm/radeon_drm.h> #include <drm/drm_fourcc.h> diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c index 5f3078f8ab95..681119c91d94 100644 --- a/drivers/gpu/drm/radeon/evergreen_hdmi.c +++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c @@ -26,6 +26,7 @@ */ #include <linux/hdmi.h> +#include <drm/drm_edid.h> #include <drm/radeon_drm.h> #include "evergreen_hdmi.h" #include "radeon.h" diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index affa9e0309b2..cfeca2694d5f 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -2321,7 +2321,7 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) switch (prim_walk) { case 1: for (i = 0; i < track->num_arrays; i++) { - size = track->arrays[i].esize * track->max_indx * 4; + size = track->arrays[i].esize * track->max_indx * 4UL; if (track->arrays[i].robj == NULL) { DRM_ERROR("(PW %u) Vertex array %u no buffer " "bound\n", prim_walk, i); @@ -2340,7 +2340,7 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) break; case 2: for (i = 0; i < track->num_arrays; i++) { - size = track->arrays[i].esize * (nverts - 1) * 4; + size = track->arrays[i].esize * (nverts - 1) * 4UL; if (track->arrays[i].robj == NULL) { DRM_ERROR("(PW %u) Vertex array %u no buffer " "bound\n", prim_walk, i); diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 638f861af80f..6cf54a747749 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -1275,7 +1275,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } tmp = (reg - CB_COLOR0_BASE) / 4; - track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8; + track->cb_color_bo_offset[tmp] = (u64)radeon_get_ib_value(p, idx) << 8; ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->cb_color_base_last[tmp] = ib[idx]; track->cb_color_bo[tmp] = reloc->robj; @@ -1302,7 +1302,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - track->htile_offset = radeon_get_ib_value(p, idx) << 8; + track->htile_offset = (u64)radeon_get_ib_value(p, idx) << 8; ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->htile_bo = reloc->robj; track->db_dirty = true; diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 85c4bb186203..3596ea4a8b60 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -27,6 +27,7 @@ #include <linux/pci.h> #include <drm/drm_device.h> +#include <drm/drm_edid.h> #include <drm/radeon_drm.h> #include "radeon.h" diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index d6ccaf24ee0c..91b58fbc2be7 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -26,6 +26,8 @@ #include <linux/component.h> #include <drm/drm_crtc.h> +#include <drm/drm_eld.h> +#include <drm/drm_edid.h> #include "dce6_afmt.h" #include "evergreen_hdmi.h" #include "radeon.h" diff --git a/drivers/gpu/drm/radeon/radeon_audio.h b/drivers/gpu/drm/radeon/radeon_audio.h index 05e67867469b..dacaaa007051 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.h +++ b/drivers/gpu/drm/radeon/radeon_audio.h @@ -27,7 +27,9 @@ #include <linux/types.h> -#define RREG32_ENDPOINT(block, reg) \ +struct cea_sad; + +#define RREG32_ENDPOINT(block, reg) \ radeon_audio_endpoint_rreg(rdev, (block), (reg)) #define WREG32_ENDPOINT(block, reg, v) \ radeon_audio_endpoint_wreg(rdev, (block), (reg), (v)) diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 2620efc7c675..6952b1273b0f 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -28,6 +28,7 @@ #include <linux/pci.h> #include <drm/drm_device.h> +#include <drm/drm_edid.h> #include <drm/radeon_drm.h> #include "radeon.h" diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 901e75ec70ff..efd18c8d84c8 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -687,11 +687,16 @@ static void radeon_crtc_init(struct drm_device *dev, int index) if (radeon_crtc == NULL) return; + radeon_crtc->flip_queue = alloc_workqueue("radeon-crtc", WQ_HIGHPRI, 0); + if (!radeon_crtc->flip_queue) { + kfree(radeon_crtc); + return; + } + drm_crtc_init(dev, &radeon_crtc->base, &radeon_crtc_funcs); drm_mode_crtc_set_gamma_size(&radeon_crtc->base, 256); radeon_crtc->crtc_id = index; - radeon_crtc->flip_queue = alloc_workqueue("radeon-crtc", WQ_HIGHPRI, 0); rdev->mode_info.crtcs[index] = radeon_crtc; if (rdev->family >= CHIP_BONAIRE) { diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h index 34a1c73d3938..02a65971d140 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.h +++ b/drivers/gpu/drm/radeon/radeon_drv.h @@ -33,7 +33,6 @@ #include <linux/firmware.h> #include <linux/platform_device.h> -#include <drm/drm_legacy.h> #include "radeon_family.h" diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c index 9cb6401fe97e..3de3dce9e89d 100644 --- a/drivers/gpu/drm/radeon/radeon_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_encoders.c @@ -26,6 +26,7 @@ #include <linux/pci.h> +#include <drm/drm_edid.h> #include <drm/drm_device.h> #include <drm/radeon_drm.h> diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 1decdcec0264..59c4db13d90a 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -32,13 +32,13 @@ #include <drm/display/drm_dp_helper.h> #include <drm/drm_crtc.h> -#include <drm/drm_edid.h> #include <drm/drm_encoder.h> #include <drm/drm_fixed.h> #include <drm/drm_modeset_helper_vtables.h> #include <linux/i2c.h> #include <linux/i2c-algo-bit.h> +struct edid; struct radeon_bo; struct radeon_device; diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index e6534fa9f1fb..38048593bb4a 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -413,6 +413,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig dev_err(rdev->dev, "(%d) ring map failed\n", r); return r; } + radeon_debugfs_ring_init(rdev, ring); } ring->ptr_mask = (ring->ring_size / 4) - 1; ring->ring_free_dw = ring->ring_size / 4; @@ -421,7 +422,6 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig ring->next_rptr_gpu_addr = rdev->wb.gpu_addr + index; ring->next_rptr_cpu_addr = &rdev->wb.wb[index/4]; } - radeon_debugfs_ring_init(rdev, ring); radeon_ring_lockup_update(rdev, ring); return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 987cabbf1318..c38b4d5d6a14 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -1204,13 +1204,17 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) r = radeon_bo_create(rdev, pd_size, align, true, RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL, &vm->page_directory); - if (r) + if (r) { + kfree(vm->page_tables); + vm->page_tables = NULL; return r; - + } r = radeon_vm_clear_bo(rdev, vm->page_directory); if (r) { radeon_bo_unref(&vm->page_directory); vm->page_directory = NULL; + kfree(vm->page_tables); + vm->page_tables = NULL; return r; } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index a91012447b56..85e9cba49cec 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3611,6 +3611,10 @@ static int si_cp_start(struct radeon_device *rdev) for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) { ring = &rdev->ring[i]; r = radeon_ring_lock(rdev, ring, 2); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); + return r; + } /* clear the compute context state */ radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0)); diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c index f74f381af05f..d49c145db437 100644 --- a/drivers/gpu/drm/radeon/sumo_dpm.c +++ b/drivers/gpu/drm/radeon/sumo_dpm.c @@ -1493,8 +1493,10 @@ static int sumo_parse_power_table(struct radeon_device *rdev) non_clock_array_index = power_state->v2.nonClockInfoIndex; non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *) &non_clock_info_array->nonClockInfo[non_clock_array_index]; - if (!rdev->pm.power_state[i].clock_info) + if (!rdev->pm.power_state[i].clock_info) { + kfree(rdev->pm.dpm.ps); return -EINVAL; + } ps = kzalloc(sizeof(struct sumo_ps), GFP_KERNEL); if (ps == NULL) { kfree(rdev->pm.dpm.ps); diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c index 08ea1c864cb2..ef1cc7bad20a 100644 --- a/drivers/gpu/drm/radeon/trinity_dpm.c +++ b/drivers/gpu/drm/radeon/trinity_dpm.c @@ -1726,8 +1726,10 @@ static int trinity_parse_power_table(struct radeon_device *rdev) non_clock_array_index = power_state->v2.nonClockInfoIndex; non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *) &non_clock_info_array->nonClockInfo[non_clock_array_index]; - if (!rdev->pm.power_state[i].clock_info) + if (!rdev->pm.power_state[i].clock_info) { + kfree(rdev->pm.dpm.ps); return -EINVAL; + } ps = kzalloc(sizeof(struct sumo_ps), GFP_KERNEL); if (ps == NULL) { kfree(rdev->pm.dpm.ps); diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.c index 8f9a728affde..07ad17d24294 100644 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.c +++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.c @@ -14,7 +14,6 @@ #include <drm/drm_fourcc.h> #include <drm/drm_framebuffer.h> #include <drm/drm_gem_dma_helper.h> -#include <drm/drm_plane_helper.h> #include "shmob_drm_drv.h" #include "shmob_drm_kms.h" diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c index 84aa811ca1e9..bd08d57486fe 100644 --- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c +++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c @@ -30,7 +30,6 @@ #include <drm/drm_simple_kms_helper.h> #include "rockchip_drm_drv.h" -#include "rockchip_drm_vop.h" #define RK3288_GRF_SOC_CON6 0x25c #define RK3288_EDP_LCDC_SEL BIT(5) diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c index 21254e4e107a..a855c45ae7f3 100644 --- a/drivers/gpu/drm/rockchip/cdn-dp-core.c +++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c @@ -24,7 +24,6 @@ #include "cdn-dp-core.h" #include "cdn-dp-reg.h" -#include "rockchip_drm_vop.h" static inline struct cdn_dp_device *connector_to_dp(struct drm_connector *connector) { diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c index 6396f9324dab..4cc8ed8f4fbd 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c @@ -26,7 +26,6 @@ #include <drm/drm_simple_kms_helper.h> #include "rockchip_drm_drv.h" -#include "rockchip_drm_vop.h" #define DSI_PHY_RSTZ 0xa0 #define PHY_DISFORCEPLL 0 diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c index 341550199111..fe33092abbe7 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c @@ -18,7 +18,6 @@ #include <drm/drm_simple_kms_helper.h> #include "rockchip_drm_drv.h" -#include "rockchip_drm_vop.h" #define RK3228_GRF_SOC_CON2 0x0408 #define RK3228_HDMI_SDAIN_MSK BIT(14) diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c index 6e5b922a121e..f6d819803c0e 100644 --- a/drivers/gpu/drm/rockchip/inno_hdmi.c +++ b/drivers/gpu/drm/rockchip/inno_hdmi.c @@ -23,7 +23,6 @@ #include <drm/drm_simple_kms_helper.h> #include "rockchip_drm_drv.h" -#include "rockchip_drm_vop.h" #include "inno_hdmi.h" diff --git a/drivers/gpu/drm/rockchip/rk3066_hdmi.c b/drivers/gpu/drm/rockchip/rk3066_hdmi.c index fa6e592e0276..62e6d8187de7 100644 --- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c +++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c @@ -4,6 +4,7 @@ * Zheng Yang <zhengyang@rock-chips.com> */ +#include <drm/drm_atomic.h> #include <drm/drm_edid.h> #include <drm/drm_of.h> #include <drm/drm_probe_helper.h> @@ -17,7 +18,6 @@ #include "rk3066_hdmi.h" #include "rockchip_drm_drv.h" -#include "rockchip_drm_vop.h" #define DEFAULT_PLLA_RATE 30000000 @@ -55,7 +55,6 @@ struct rk3066_hdmi { unsigned int tmdsclk; struct hdmi_data_info hdmi_data; - struct drm_display_mode previous_mode; }; static struct rk3066_hdmi *encoder_to_rk3066_hdmi(struct drm_encoder *encoder) @@ -387,21 +386,21 @@ static int rk3066_hdmi_setup(struct rk3066_hdmi *hdmi, return 0; } -static void -rk3066_hdmi_encoder_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode, - struct drm_display_mode *adj_mode) +static void rk3066_hdmi_encoder_enable(struct drm_encoder *encoder, + struct drm_atomic_state *state) { struct rk3066_hdmi *hdmi = encoder_to_rk3066_hdmi(encoder); + struct drm_connector_state *conn_state; + struct drm_crtc_state *crtc_state; + int mux, val; - /* Store the display mode for plugin/DPMS poweron events. */ - drm_mode_copy(&hdmi->previous_mode, adj_mode); -} + conn_state = drm_atomic_get_new_connector_state(state, &hdmi->connector); + if (WARN_ON(!conn_state)) + return; -static void rk3066_hdmi_encoder_enable(struct drm_encoder *encoder) -{ - struct rk3066_hdmi *hdmi = encoder_to_rk3066_hdmi(encoder); - int mux, val; + crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc); + if (WARN_ON(!crtc_state)) + return; mux = drm_of_encoder_active_endpoint_id(hdmi->dev->of_node, encoder); if (mux) @@ -414,10 +413,11 @@ static void rk3066_hdmi_encoder_enable(struct drm_encoder *encoder) DRM_DEV_DEBUG(hdmi->dev, "hdmi encoder enable select: vop%s\n", (mux) ? "1" : "0"); - rk3066_hdmi_setup(hdmi, &hdmi->previous_mode); + rk3066_hdmi_setup(hdmi, &crtc_state->adjusted_mode); } -static void rk3066_hdmi_encoder_disable(struct drm_encoder *encoder) +static void rk3066_hdmi_encoder_disable(struct drm_encoder *encoder, + struct drm_atomic_state *state) { struct rk3066_hdmi *hdmi = encoder_to_rk3066_hdmi(encoder); @@ -434,14 +434,6 @@ static void rk3066_hdmi_encoder_disable(struct drm_encoder *encoder) rk3066_hdmi_set_power_mode(hdmi, HDMI_SYS_POWER_MODE_A); } -static bool -rk3066_hdmi_encoder_mode_fixup(struct drm_encoder *encoder, - const struct drm_display_mode *mode, - struct drm_display_mode *adj_mode) -{ - return true; -} - static int rk3066_hdmi_encoder_atomic_check(struct drm_encoder *encoder, struct drm_crtc_state *crtc_state, @@ -457,11 +449,9 @@ rk3066_hdmi_encoder_atomic_check(struct drm_encoder *encoder, static const struct drm_encoder_helper_funcs rk3066_hdmi_encoder_helper_funcs = { - .enable = rk3066_hdmi_encoder_enable, - .disable = rk3066_hdmi_encoder_disable, - .mode_fixup = rk3066_hdmi_encoder_mode_fixup, - .mode_set = rk3066_hdmi_encoder_mode_set, - .atomic_check = rk3066_hdmi_encoder_atomic_check, + .atomic_check = rk3066_hdmi_encoder_atomic_check, + .atomic_enable = rk3066_hdmi_encoder_enable, + .atomic_disable = rk3066_hdmi_encoder_disable, }; static enum drm_connector_status diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h index aeb03a57240f..bbb9e0bf6804 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h @@ -20,6 +20,23 @@ #define ROCKCHIP_MAX_CONNECTOR 2 #define ROCKCHIP_MAX_CRTC 4 +/* + * display output interface supported by rockchip lcdc + */ +#define ROCKCHIP_OUT_MODE_P888 0 +#define ROCKCHIP_OUT_MODE_BT1120 0 +#define ROCKCHIP_OUT_MODE_P666 1 +#define ROCKCHIP_OUT_MODE_P565 2 +#define ROCKCHIP_OUT_MODE_BT656 5 +#define ROCKCHIP_OUT_MODE_S888 8 +#define ROCKCHIP_OUT_MODE_S888_DUMMY 12 +#define ROCKCHIP_OUT_MODE_YUV420 14 +/* for use special outface */ +#define ROCKCHIP_OUT_MODE_AAAA 15 + +/* output flags */ +#define ROCKCHIP_OUTPUT_DSI_DUAL BIT(0) + struct drm_device; struct drm_connector; struct iommu_domain; @@ -31,6 +48,7 @@ struct rockchip_crtc_state { int output_bpc; int output_flags; bool enable_afbc; + bool yuv_overlay; u32 bus_format; u32 bus_flags; int color_space; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h index 4b2daefeb8c1..b33e5bdc26be 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h @@ -277,18 +277,6 @@ struct vop_data { /* dst alpha ctrl define */ #define DST_FACTOR_M0(x) (((x) & 0x7) << 6) -/* - * display output interface supported by rockchip lcdc - */ -#define ROCKCHIP_OUT_MODE_P888 0 -#define ROCKCHIP_OUT_MODE_P666 1 -#define ROCKCHIP_OUT_MODE_P565 2 -/* for use special outface */ -#define ROCKCHIP_OUT_MODE_AAAA 15 - -/* output flags */ -#define ROCKCHIP_OUTPUT_DSI_DUAL BIT(0) - enum alpha_mode { ALPHA_STRAIGHT, ALPHA_INVERSE, diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index 6862fb146ace..574103fc79f9 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -190,7 +190,10 @@ struct vop2 { void __iomem *regs; struct regmap *map; - struct regmap *grf; + struct regmap *sys_grf; + struct regmap *vop_grf; + struct regmap *vo1_grf; + struct regmap *sys_pmu; /* physical map length of vop2 register */ u32 len; @@ -209,6 +212,7 @@ struct vop2 { unsigned int enable_count; struct clk *hclk; struct clk *aclk; + struct clk *pclk; /* optional internal rgb encoder */ struct rockchip_rgb *rgb; @@ -217,6 +221,25 @@ struct vop2 { struct vop2_win win[]; }; +#define vop2_output_if_is_hdmi(x) ((x) == ROCKCHIP_VOP2_EP_HDMI0 || \ + (x) == ROCKCHIP_VOP2_EP_HDMI1) + +#define vop2_output_if_is_dp(x) ((x) == ROCKCHIP_VOP2_EP_DP0 || \ + (x) == ROCKCHIP_VOP2_EP_DP1) + +#define vop2_output_if_is_edp(x) ((x) == ROCKCHIP_VOP2_EP_EDP0 || \ + (x) == ROCKCHIP_VOP2_EP_EDP1) + +#define vop2_output_if_is_mipi(x) ((x) == ROCKCHIP_VOP2_EP_MIPI0 || \ + (x) == ROCKCHIP_VOP2_EP_MIPI1) + +#define vop2_output_if_is_lvds(x) ((x) == ROCKCHIP_VOP2_EP_LVDS0 || \ + (x) == ROCKCHIP_VOP2_EP_LVDS1) + +#define vop2_output_if_is_dpi(x) ((x) == ROCKCHIP_VOP2_EP_RGB0) + +static const struct regmap_config vop2_regmap_config; + static struct vop2_video_port *to_vop2_video_port(struct drm_crtc *crtc) { return container_of(crtc, struct vop2_video_port, crtc); @@ -266,12 +289,23 @@ static bool vop2_cluster_window(const struct vop2_win *win) return win->data->feature & WIN_FEATURE_CLUSTER; } +/* + * Note: + * The write mask function is documented but missing on rk3566/8, writes + * to these bits have no effect. For newer soc(rk3588 and following) the + * write mask is needed for register writes. + * + * GLB_CFG_DONE_EN has no write mask bit. + * + */ static void vop2_cfg_done(struct vop2_video_port *vp) { struct vop2 *vop2 = vp->vop2; + u32 val = RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN; + + val |= BIT(vp->id) | (BIT(vp->id) << 16); - regmap_set_bits(vop2->map, RK3568_REG_CFG_DONE, - BIT(vp->id) | RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN); + regmap_set_bits(vop2->map, RK3568_REG_CFG_DONE, val); } static void vop2_win_disable(struct vop2_win *win) @@ -325,11 +359,14 @@ static enum vop2_data_format vop2_convert_format(u32 format) case DRM_FORMAT_NV16: case DRM_FORMAT_NV61: return VOP2_FMT_YUV422SP; + case DRM_FORMAT_NV20: case DRM_FORMAT_Y210: return VOP2_FMT_YUV422SP_10; case DRM_FORMAT_NV24: case DRM_FORMAT_NV42: return VOP2_FMT_YUV444SP; + case DRM_FORMAT_NV30: + return VOP2_FMT_YUV444SP_10; case DRM_FORMAT_YUYV: case DRM_FORMAT_YVYU: return VOP2_FMT_VYUY422; @@ -414,6 +451,8 @@ static bool vop2_win_uv_swap(u32 format) case DRM_FORMAT_NV16: case DRM_FORMAT_NV24: case DRM_FORMAT_NV15: + case DRM_FORMAT_NV20: + case DRM_FORMAT_NV30: case DRM_FORMAT_YUYV: case DRM_FORMAT_UYVY: return true; @@ -457,6 +496,17 @@ static bool vop2_output_uv_swap(u32 bus_format, u32 output_mode) return false; } +static bool vop2_output_rg_swap(struct vop2 *vop2, u32 bus_format) +{ + if (vop2->data->soc_id == 3588) { + if (bus_format == MEDIA_BUS_FMT_YUV8_1X24 || + bus_format == MEDIA_BUS_FMT_YUV10_1X30) + return true; + } + + return false; +} + static bool is_yuv_output(u32 bus_format) { switch (bus_format) { @@ -514,6 +564,18 @@ static bool rockchip_vop2_mod_supported(struct drm_plane *plane, u32 format, return vop2_convert_afbc_format(format) >= 0; } +/* + * 0: Full mode, 16 lines for one tail + * 1: half block mode, 8 lines one tail + */ +static bool vop2_half_block_enable(struct drm_plane_state *pstate) +{ + if (pstate->rotation & (DRM_MODE_ROTATE_270 | DRM_MODE_ROTATE_90)) + return false; + else + return true; +} + static u32 vop2_afbc_transform_offset(struct drm_plane_state *pstate, bool afbc_half_block_en) { @@ -849,13 +911,32 @@ static int vop2_core_clks_prepare_enable(struct vop2 *vop2) goto err; } + ret = clk_prepare_enable(vop2->pclk); + if (ret < 0) { + drm_err(vop2->drm, "failed to enable pclk - %d\n", ret); + goto err1; + } + return 0; +err1: + clk_disable_unprepare(vop2->aclk); err: clk_disable_unprepare(vop2->hclk); return ret; } +static void rk3588_vop2_power_domain_enable_all(struct vop2 *vop2) +{ + u32 pd; + + pd = vop2_readl(vop2, RK3588_SYS_PD_CTRL); + pd &= ~(VOP2_PD_CLUSTER0 | VOP2_PD_CLUSTER1 | VOP2_PD_CLUSTER2 | + VOP2_PD_CLUSTER3 | VOP2_PD_ESMART); + + vop2_writel(vop2, RK3588_SYS_PD_CTRL, pd); +} + static void vop2_enable(struct vop2 *vop2) { int ret; @@ -878,11 +959,12 @@ static void vop2_enable(struct vop2 *vop2) return; } - regcache_sync(vop2->map); - if (vop2->data->soc_id == 3566) vop2_writel(vop2, RK3568_OTP_WIN_EN, 1); + if (vop2->data->soc_id == 3588) + rk3588_vop2_power_domain_enable_all(vop2); + vop2_writel(vop2, RK3568_REG_CFG_DONE, RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN); /* @@ -908,8 +990,9 @@ static void vop2_disable(struct vop2 *vop2) pm_runtime_put_sync(vop2->dev); - regcache_mark_dirty(vop2->map); + regcache_drop_region(vop2->map, 0, vop2_regmap_config.max_register); + clk_disable_unprepare(vop2->pclk); clk_disable_unprepare(vop2->aclk); clk_disable_unprepare(vop2->hclk); } @@ -1135,6 +1218,7 @@ static void vop2_plane_atomic_update(struct drm_plane *plane, bool rotate_90 = pstate->rotation & DRM_MODE_ROTATE_90; struct rockchip_gem_object *rk_obj; unsigned long offset; + bool half_block_en; bool afbc_en; dma_addr_t yrgb_mst; dma_addr_t uv_mst; @@ -1227,6 +1311,7 @@ static void vop2_plane_atomic_update(struct drm_plane *plane, dsp_info = (dsp_h - 1) << 16 | ((dsp_w - 1) & 0xffff); format = vop2_convert_format(fb->format->format); + half_block_en = vop2_half_block_enable(pstate); drm_dbg(vop2->drm, "vp%d update %s[%dx%d->%dx%d@%dx%d] fmt[%p4cc_%s] addr[%pad]\n", vp->id, win->data->name, actual_w, actual_h, dsp_w, dsp_h, @@ -1234,6 +1319,9 @@ static void vop2_plane_atomic_update(struct drm_plane *plane, &fb->format->format, afbc_en ? "AFBC" : "", &yrgb_mst); + if (vop2_cluster_window(win)) + vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, half_block_en); + if (afbc_en) { u32 stride; @@ -1272,15 +1360,21 @@ static void vop2_plane_atomic_update(struct drm_plane *plane, vop2_win_write(win, VOP2_WIN_AFBC_ENABLE, 1); vop2_win_write(win, VOP2_WIN_AFBC_FORMAT, afbc_format); vop2_win_write(win, VOP2_WIN_AFBC_UV_SWAP, uv_swap); - vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 0); + /* + * On rk3566/8, this bit is auto gating enable, + * but this function is not work well so we need + * to disable it for these two platform. + * On rk3588, and the following new soc(rk3528/rk3576), + * this bit is gating disable, we should write 1 to + * disable gating when enable afbc. + */ + if (vop2->data->soc_id == 3566 || vop2->data->soc_id == 3568) + vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 0); + else + vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 1); + vop2_win_write(win, VOP2_WIN_AFBC_BLOCK_SPLIT_EN, 0); - if (pstate->rotation & (DRM_MODE_ROTATE_270 | DRM_MODE_ROTATE_90)) { - vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, 0); - transform_offset = vop2_afbc_transform_offset(pstate, false); - } else { - vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, 1); - transform_offset = vop2_afbc_transform_offset(pstate, true); - } + transform_offset = vop2_afbc_transform_offset(pstate, half_block_en); vop2_win_write(win, VOP2_WIN_AFBC_HDR_PTR, yrgb_mst); vop2_win_write(win, VOP2_WIN_AFBC_PIC_SIZE, act_info); vop2_win_write(win, VOP2_WIN_AFBC_TRANSFORM_OFFSET, transform_offset); @@ -1292,6 +1386,11 @@ static void vop2_plane_atomic_update(struct drm_plane *plane, vop2_win_write(win, VOP2_WIN_AFBC_ROTATE_270, rotate_270); vop2_win_write(win, VOP2_WIN_AFBC_ROTATE_90, rotate_90); } else { + if (vop2_cluster_window(win)) { + vop2_win_write(win, VOP2_WIN_AFBC_ENABLE, 0); + vop2_win_write(win, VOP2_WIN_AFBC_TRANSFORM_OFFSET, 0); + } + vop2_win_write(win, VOP2_WIN_YRGB_VIR, DIV_ROUND_UP(fb->pitches[0], 4)); } @@ -1424,8 +1523,18 @@ static void vop2_post_config(struct drm_crtc *crtc) u32 top_margin = 100, bottom_margin = 100; u16 hsize = hdisplay * (left_margin + right_margin) / 200; u16 vsize = vdisplay * (top_margin + bottom_margin) / 200; + u16 hsync_len = mode->crtc_hsync_end - mode->crtc_hsync_start; u16 hact_end, vact_end; u32 val; + u32 bg_dly; + u32 pre_scan_dly; + + bg_dly = vp->data->pre_scan_max_dly[3]; + vop2_writel(vp->vop2, RK3568_VP_BG_MIX_CTRL(vp->id), + FIELD_PREP(RK3568_VP_BG_MIX_CTRL__BG_DLY, bg_dly)); + + pre_scan_dly = ((bg_dly + (hdisplay >> 1) - 1) << 16) | hsync_len; + vop2_vp_write(vp, RK3568_VP_PRE_SCAN_HTIMING, pre_scan_dly); vsize = rounddown(vsize, 2); hsize = rounddown(hsize, 2); @@ -1461,10 +1570,10 @@ static void vop2_post_config(struct drm_crtc *crtc) vop2_vp_write(vp, RK3568_VP_DSP_BG, 0); } -static void rk3568_set_intf_mux(struct vop2_video_port *vp, int id, - u32 polflags) +static unsigned long rk3568_set_intf_mux(struct vop2_video_port *vp, int id, u32 polflags) { struct vop2 *vop2 = vp->vop2; + struct drm_crtc *crtc = &vp->crtc; u32 die, dip; die = vop2_readl(vop2, RK3568_DSP_IF_EN); @@ -1478,9 +1587,9 @@ static void rk3568_set_intf_mux(struct vop2_video_port *vp, int id, dip &= ~RK3568_DSP_IF_POL__RGB_LVDS_PIN_POL; dip |= FIELD_PREP(RK3568_DSP_IF_POL__RGB_LVDS_PIN_POL, polflags); if (polflags & POLFLAG_DCLK_INV) - regmap_write(vop2->grf, RK3568_GRF_VO_CON1, BIT(3 + 16) | BIT(3)); + regmap_write(vop2->sys_grf, RK3568_GRF_VO_CON1, BIT(3 + 16) | BIT(3)); else - regmap_write(vop2->grf, RK3568_GRF_VO_CON1, BIT(3 + 16)); + regmap_write(vop2->sys_grf, RK3568_GRF_VO_CON1, BIT(3 + 16)); break; case ROCKCHIP_VOP2_EP_HDMI0: die &= ~RK3568_SYS_DSP_INFACE_EN_HDMI_MUX; @@ -1526,13 +1635,281 @@ static void rk3568_set_intf_mux(struct vop2_video_port *vp, int id, break; default: drm_err(vop2->drm, "Invalid interface id %d on vp%d\n", id, vp->id); - return; + return 0; } dip |= RK3568_DSP_IF_POL__CFG_DONE_IMD; vop2_writel(vop2, RK3568_DSP_IF_EN, die); vop2_writel(vop2, RK3568_DSP_IF_POL, dip); + + return crtc->state->adjusted_mode.crtc_clock * 1000LL; +} + +/* + * calc the dclk on rk3588 + * the available div of dclk is 1, 2, 4 + */ +static unsigned long rk3588_calc_dclk(unsigned long child_clk, unsigned long max_dclk) +{ + if (child_clk * 4 <= max_dclk) + return child_clk * 4; + else if (child_clk * 2 <= max_dclk) + return child_clk * 2; + else if (child_clk <= max_dclk) + return child_clk; + else + return 0; +} + +/* + * 4 pixclk/cycle on rk3588 + * RGB/eDP/HDMI: if_pixclk >= dclk_core + * DP: dp_pixclk = dclk_out <= dclk_core + * DSI: mipi_pixclk <= dclk_out <= dclk_core + */ +static unsigned long rk3588_calc_cru_cfg(struct vop2_video_port *vp, int id, + int *dclk_core_div, int *dclk_out_div, + int *if_pixclk_div, int *if_dclk_div) +{ + struct vop2 *vop2 = vp->vop2; + struct drm_crtc *crtc = &vp->crtc; + struct drm_display_mode *adjusted_mode = &crtc->state->adjusted_mode; + struct rockchip_crtc_state *vcstate = to_rockchip_crtc_state(crtc->state); + int output_mode = vcstate->output_mode; + unsigned long v_pixclk = adjusted_mode->crtc_clock * 1000LL; /* video timing pixclk */ + unsigned long dclk_core_rate = v_pixclk >> 2; + unsigned long dclk_rate = v_pixclk; + unsigned long dclk_out_rate; + unsigned long if_dclk_rate; + unsigned long if_pixclk_rate; + int K = 1; + + if (vop2_output_if_is_hdmi(id)) { + /* + * K = 2: dclk_core = if_pixclk_rate > if_dclk_rate + * K = 1: dclk_core = hdmie_edp_dclk > if_pixclk_rate + */ + if (output_mode == ROCKCHIP_OUT_MODE_YUV420) { + dclk_rate = dclk_rate >> 1; + K = 2; + } + + if_pixclk_rate = (dclk_core_rate << 1) / K; + if_dclk_rate = dclk_core_rate / K; + /* + * *if_pixclk_div = dclk_rate / if_pixclk_rate; + * *if_dclk_div = dclk_rate / if_dclk_rate; + */ + *if_pixclk_div = 2; + *if_dclk_div = 4; + } else if (vop2_output_if_is_edp(id)) { + /* + * edp_pixclk = edp_dclk > dclk_core + */ + if_pixclk_rate = v_pixclk / K; + dclk_rate = if_pixclk_rate * K; + /* + * *if_pixclk_div = dclk_rate / if_pixclk_rate; + * *if_dclk_div = *if_pixclk_div; + */ + *if_pixclk_div = K; + *if_dclk_div = K; + } else if (vop2_output_if_is_dp(id)) { + if (output_mode == ROCKCHIP_OUT_MODE_YUV420) + dclk_out_rate = v_pixclk >> 3; + else + dclk_out_rate = v_pixclk >> 2; + + dclk_rate = rk3588_calc_dclk(dclk_out_rate, 600000); + if (!dclk_rate) { + drm_err(vop2->drm, "DP dclk_out_rate out of range, dclk_out_rate: %ld KHZ\n", + dclk_out_rate); + return 0; + } + *dclk_out_div = dclk_rate / dclk_out_rate; + } else if (vop2_output_if_is_mipi(id)) { + if_pixclk_rate = dclk_core_rate / K; + /* + * dclk_core = dclk_out * K = if_pixclk * K = v_pixclk / 4 + */ + dclk_out_rate = if_pixclk_rate; + /* + * dclk_rate = N * dclk_core_rate N = (1,2,4 ), + * we get a little factor here + */ + dclk_rate = rk3588_calc_dclk(dclk_out_rate, 600000); + if (!dclk_rate) { + drm_err(vop2->drm, "MIPI dclk out of range, dclk_out_rate: %ld KHZ\n", + dclk_out_rate); + return 0; + } + *dclk_out_div = dclk_rate / dclk_out_rate; + /* + * mipi pixclk == dclk_out + */ + *if_pixclk_div = 1; + } else if (vop2_output_if_is_dpi(id)) { + dclk_rate = v_pixclk; + } + + *dclk_core_div = dclk_rate / dclk_core_rate; + *if_pixclk_div = ilog2(*if_pixclk_div); + *if_dclk_div = ilog2(*if_dclk_div); + *dclk_core_div = ilog2(*dclk_core_div); + *dclk_out_div = ilog2(*dclk_out_div); + + drm_dbg(vop2->drm, "dclk: %ld, pixclk_div: %d, dclk_div: %d\n", + dclk_rate, *if_pixclk_div, *if_dclk_div); + + return dclk_rate; +} + +/* + * MIPI port mux on rk3588: + * 0: Video Port2 + * 1: Video Port3 + * 3: Video Port 1(MIPI1 only) + */ +static u32 rk3588_get_mipi_port_mux(int vp_id) +{ + if (vp_id == 1) + return 3; + else if (vp_id == 3) + return 1; + else + return 0; +} + +static u32 rk3588_get_hdmi_pol(u32 flags) +{ + u32 val; + + val = (flags & DRM_MODE_FLAG_NHSYNC) ? BIT(HSYNC_POSITIVE) : 0; + val |= (flags & DRM_MODE_FLAG_NVSYNC) ? BIT(VSYNC_POSITIVE) : 0; + + return val; +} + +static unsigned long rk3588_set_intf_mux(struct vop2_video_port *vp, int id, u32 polflags) +{ + struct vop2 *vop2 = vp->vop2; + int dclk_core_div, dclk_out_div, if_pixclk_div, if_dclk_div; + unsigned long clock; + u32 die, dip, div, vp_clk_div, val; + + clock = rk3588_calc_cru_cfg(vp, id, &dclk_core_div, &dclk_out_div, + &if_pixclk_div, &if_dclk_div); + if (!clock) + return 0; + + vp_clk_div = FIELD_PREP(RK3588_VP_CLK_CTRL__DCLK_CORE_DIV, dclk_core_div); + vp_clk_div |= FIELD_PREP(RK3588_VP_CLK_CTRL__DCLK_OUT_DIV, dclk_out_div); + + die = vop2_readl(vop2, RK3568_DSP_IF_EN); + dip = vop2_readl(vop2, RK3568_DSP_IF_POL); + div = vop2_readl(vop2, RK3568_DSP_IF_CTRL); + + switch (id) { + case ROCKCHIP_VOP2_EP_HDMI0: + div &= ~RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV; + div &= ~RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV; + div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV, if_dclk_div); + div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV, if_pixclk_div); + die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX; + die |= RK3588_SYS_DSP_INFACE_EN_HDMI0 | + FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX, vp->id); + val = rk3588_get_hdmi_pol(polflags); + regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 1, 1)); + regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, HIWORD_UPDATE(val, 6, 5)); + break; + case ROCKCHIP_VOP2_EP_HDMI1: + div &= ~RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV; + div &= ~RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV; + div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV, if_dclk_div); + div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV, if_pixclk_div); + die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX; + die |= RK3588_SYS_DSP_INFACE_EN_HDMI1 | + FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX, vp->id); + val = rk3588_get_hdmi_pol(polflags); + regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 4, 4)); + regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, HIWORD_UPDATE(val, 8, 7)); + break; + case ROCKCHIP_VOP2_EP_EDP0: + div &= ~RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV; + div &= ~RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV; + div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV, if_dclk_div); + div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV, if_pixclk_div); + die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX; + die |= RK3588_SYS_DSP_INFACE_EN_EDP0 | + FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX, vp->id); + regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 0, 0)); + break; + case ROCKCHIP_VOP2_EP_EDP1: + div &= ~RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV; + div &= ~RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV; + div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV, if_dclk_div); + div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV, if_pixclk_div); + die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX; + die |= RK3588_SYS_DSP_INFACE_EN_EDP1 | + FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX, vp->id); + regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 3, 3)); + break; + case ROCKCHIP_VOP2_EP_MIPI0: + div &= ~RK3588_DSP_IF_MIPI0_PCLK_DIV; + div |= FIELD_PREP(RK3588_DSP_IF_MIPI0_PCLK_DIV, if_pixclk_div); + die &= ~RK3588_SYS_DSP_INFACE_EN_MIPI0_MUX; + val = rk3588_get_mipi_port_mux(vp->id); + die |= RK3588_SYS_DSP_INFACE_EN_MIPI0 | + FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_MIPI0_MUX, !!val); + break; + case ROCKCHIP_VOP2_EP_MIPI1: + div &= ~RK3588_DSP_IF_MIPI1_PCLK_DIV; + div |= FIELD_PREP(RK3588_DSP_IF_MIPI1_PCLK_DIV, if_pixclk_div); + die &= ~RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX; + val = rk3588_get_mipi_port_mux(vp->id); + die |= RK3588_SYS_DSP_INFACE_EN_MIPI1 | + FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX, val); + break; + case ROCKCHIP_VOP2_EP_DP0: + die &= ~RK3588_SYS_DSP_INFACE_EN_DP0_MUX; + die |= RK3588_SYS_DSP_INFACE_EN_DP0 | + FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_DP0_MUX, vp->id); + dip &= ~RK3588_DSP_IF_POL__DP0_PIN_POL; + dip |= FIELD_PREP(RK3588_DSP_IF_POL__DP0_PIN_POL, polflags); + break; + case ROCKCHIP_VOP2_EP_DP1: + die &= ~RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX; + die |= RK3588_SYS_DSP_INFACE_EN_MIPI1 | + FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX, vp->id); + dip &= ~RK3588_DSP_IF_POL__DP1_PIN_POL; + dip |= FIELD_PREP(RK3588_DSP_IF_POL__DP1_PIN_POL, polflags); + break; + default: + drm_err(vop2->drm, "Invalid interface id %d on vp%d\n", id, vp->id); + return 0; + } + + dip |= RK3568_DSP_IF_POL__CFG_DONE_IMD; + + vop2_vp_write(vp, RK3588_VP_CLK_CTRL, vp_clk_div); + vop2_writel(vop2, RK3568_DSP_IF_EN, die); + vop2_writel(vop2, RK3568_DSP_IF_CTRL, div); + vop2_writel(vop2, RK3568_DSP_IF_POL, dip); + + return clock; +} + +static unsigned long vop2_set_intf_mux(struct vop2_video_port *vp, int ep_id, u32 polflags) +{ + struct vop2 *vop2 = vp->vop2; + + if (vop2->data->soc_id == 3566 || vop2->data->soc_id == 3568) + return rk3568_set_intf_mux(vp, ep_id, polflags); + else if (vop2->data->soc_id == 3588) + return rk3588_set_intf_mux(vp, ep_id, polflags); + else + return 0; } static int us_to_vertical_line(struct drm_display_mode *mode, int us) @@ -1587,6 +1964,8 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc, vop2->enable_count++; + vcstate->yuv_overlay = is_yuv_output(vcstate->bus_format); + vop2_crtc_enable_irq(vp, VP_INT_POST_BUF_EMPTY); polflags = 0; @@ -1600,11 +1979,19 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc, drm_for_each_encoder_mask(encoder, crtc->dev, crtc_state->encoder_mask) { struct rockchip_encoder *rkencoder = to_rockchip_encoder(encoder); - rk3568_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags); + /* + * for drive a high resolution(4KP120, 8K), vop on rk3588/rk3576 need + * process multi(1/2/4/8) pixels per cycle, so the dclk feed by the + * system cru may be the 1/2 or 1/4 of mode->clock. + */ + clock = vop2_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags); } + if (!clock) + return; + if (vcstate->output_mode == ROCKCHIP_OUT_MODE_AAAA && - !(vp_data->feature & VOP_FEATURE_OUTPUT_10BIT)) + !(vp_data->feature & VOP2_VP_FEATURE_OUTPUT_10BIT)) out_mode = ROCKCHIP_OUT_MODE_P888; else out_mode = vcstate->output_mode; @@ -1613,8 +2000,10 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc, if (vop2_output_uv_swap(vcstate->bus_format, vcstate->output_mode)) dsp_ctrl |= RK3568_VP_DSP_CTRL__DSP_RB_SWAP; + if (vop2_output_rg_swap(vop2, vcstate->bus_format)) + dsp_ctrl |= RK3568_VP_DSP_CTRL__DSP_RG_SWAP; - if (is_yuv_output(vcstate->bus_format)) + if (vcstate->yuv_overlay) dsp_ctrl |= RK3568_VP_DSP_CTRL__POST_DSP_OUT_R2Y; vop2_dither_setup(crtc, &dsp_ctrl); @@ -1918,28 +2307,22 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp) u32 layer_sel = 0; u32 port_sel; unsigned int nlayer, ofs; - struct drm_display_mode *adjusted_mode; - u16 hsync_len; - u16 hdisplay; - u32 bg_dly; - u32 pre_scan_dly; + u32 ovl_ctrl; int i; struct vop2_video_port *vp0 = &vop2->vps[0]; struct vop2_video_port *vp1 = &vop2->vps[1]; struct vop2_video_port *vp2 = &vop2->vps[2]; + struct rockchip_crtc_state *vcstate = to_rockchip_crtc_state(vp->crtc.state); - adjusted_mode = &vp->crtc.state->adjusted_mode; - hsync_len = adjusted_mode->crtc_hsync_end - adjusted_mode->crtc_hsync_start; - hdisplay = adjusted_mode->crtc_hdisplay; - - bg_dly = vp->data->pre_scan_max_dly[3]; - vop2_writel(vop2, RK3568_VP_BG_MIX_CTRL(vp->id), - FIELD_PREP(RK3568_VP_BG_MIX_CTRL__BG_DLY, bg_dly)); + ovl_ctrl = vop2_readl(vop2, RK3568_OVL_CTRL); + ovl_ctrl |= RK3568_OVL_CTRL__LAYERSEL_REGDONE_IMD; + if (vcstate->yuv_overlay) + ovl_ctrl |= RK3568_OVL_CTRL__YUV_MODE(vp->id); + else + ovl_ctrl &= ~RK3568_OVL_CTRL__YUV_MODE(vp->id); - pre_scan_dly = ((bg_dly + (hdisplay >> 1) - 1) << 16) | hsync_len; - vop2_vp_write(vp, RK3568_VP_PRE_SCAN_HTIMING, pre_scan_dly); + vop2_writel(vop2, RK3568_OVL_CTRL, ovl_ctrl); - vop2_writel(vop2, RK3568_OVL_CTRL, 0); port_sel = vop2_readl(vop2, RK3568_OVL_PORT_SEL); port_sel &= RK3568_OVL_PORT_SEL__SEL_PORT; @@ -1980,6 +2363,14 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp) port_sel &= ~RK3568_OVL_PORT_SEL__CLUSTER1; port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__CLUSTER1, vp->id); break; + case ROCKCHIP_VOP2_CLUSTER2: + port_sel &= ~RK3588_OVL_PORT_SEL__CLUSTER2; + port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__CLUSTER2, vp->id); + break; + case ROCKCHIP_VOP2_CLUSTER3: + port_sel &= ~RK3588_OVL_PORT_SEL__CLUSTER3; + port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__CLUSTER3, vp->id); + break; case ROCKCHIP_VOP2_ESMART0: port_sel &= ~RK3568_OVL_PORT_SEL__ESMART0; port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__ESMART0, vp->id); @@ -1988,6 +2379,14 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp) port_sel &= ~RK3568_OVL_PORT_SEL__ESMART1; port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__ESMART1, vp->id); break; + case ROCKCHIP_VOP2_ESMART2: + port_sel &= ~RK3588_OVL_PORT_SEL__ESMART2; + port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__ESMART2, vp->id); + break; + case ROCKCHIP_VOP2_ESMART3: + port_sel &= ~RK3588_OVL_PORT_SEL__ESMART3; + port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__ESMART3, vp->id); + break; case ROCKCHIP_VOP2_SMART0: port_sel &= ~RK3568_OVL_PORT_SEL__SMART0; port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__SMART0, vp->id); @@ -2013,7 +2412,6 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp) vop2_writel(vop2, RK3568_OVL_LAYER_SEL, layer_sel); vop2_writel(vop2, RK3568_OVL_PORT_SEL, port_sel); - vop2_writel(vop2, RK3568_OVL_CTRL, RK3568_OVL_CTRL__LAYERSEL_REGDONE_IMD); } static void vop2_setup_dly_for_windows(struct vop2 *vop2) @@ -2725,8 +3123,29 @@ static int vop2_bind(struct device *dev, struct device *master, void *data) if (IS_ERR(vop2->lut_regs)) return PTR_ERR(vop2->lut_regs); } + if (vop2_data->feature & VOP2_FEATURE_HAS_SYS_GRF) { + vop2->sys_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf"); + if (IS_ERR(vop2->sys_grf)) + return dev_err_probe(dev, PTR_ERR(vop2->sys_grf), "cannot get sys_grf"); + } - vop2->grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf"); + if (vop2_data->feature & VOP2_FEATURE_HAS_VOP_GRF) { + vop2->vop_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,vop-grf"); + if (IS_ERR(vop2->vop_grf)) + return dev_err_probe(dev, PTR_ERR(vop2->vop_grf), "cannot get vop_grf"); + } + + if (vop2_data->feature & VOP2_FEATURE_HAS_VO1_GRF) { + vop2->vo1_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,vo1-grf"); + if (IS_ERR(vop2->vo1_grf)) + return dev_err_probe(dev, PTR_ERR(vop2->vo1_grf), "cannot get vo1_grf"); + } + + if (vop2_data->feature & VOP2_FEATURE_HAS_SYS_PMU) { + vop2->sys_pmu = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,pmu"); + if (IS_ERR(vop2->sys_pmu)) + return dev_err_probe(dev, PTR_ERR(vop2->sys_pmu), "cannot get sys_pmu"); + } vop2->hclk = devm_clk_get(vop2->dev, "hclk"); if (IS_ERR(vop2->hclk)) { @@ -2740,6 +3159,12 @@ static int vop2_bind(struct device *dev, struct device *master, void *data) return PTR_ERR(vop2->aclk); } + vop2->pclk = devm_clk_get_optional(vop2->dev, "pclk_vop"); + if (IS_ERR(vop2->pclk)) { + drm_err(vop2->drm, "failed to get pclk source\n"); + return PTR_ERR(vop2->pclk); + } + vop2->irq = platform_get_irq(pdev, 0); if (vop2->irq < 0) { drm_err(vop2->drm, "cannot find irq for vop2\n"); diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h index 56fd31e05238..615a16196aff 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h @@ -7,16 +7,22 @@ #ifndef _ROCKCHIP_DRM_VOP2_H #define _ROCKCHIP_DRM_VOP2_H -#include "rockchip_drm_vop.h" - #include <linux/regmap.h> #include <drm/drm_modes.h> +#include "rockchip_drm_vop.h" -#define VOP_FEATURE_OUTPUT_10BIT BIT(0) +#define VOP2_VP_FEATURE_OUTPUT_10BIT BIT(0) + +#define VOP2_FEATURE_HAS_SYS_GRF BIT(0) +#define VOP2_FEATURE_HAS_VO0_GRF BIT(1) +#define VOP2_FEATURE_HAS_VO1_GRF BIT(2) +#define VOP2_FEATURE_HAS_VOP_GRF BIT(3) +#define VOP2_FEATURE_HAS_SYS_PMU BIT(4) #define WIN_FEATURE_AFBDC BIT(0) #define WIN_FEATURE_CLUSTER BIT(1) +#define HIWORD_UPDATE(v, h, l) ((GENMASK(h, l) << 16) | ((v) << (l))) /* * the delay number of a window in different mode. */ @@ -39,6 +45,18 @@ enum vop2_scale_down_mode { VOP2_SCALE_DOWN_AVG, }; +/* + * vop2 internal power domain id, + * should be all none zero, 0 will be treat as invalid; + */ +#define VOP2_PD_CLUSTER0 BIT(0) +#define VOP2_PD_CLUSTER1 BIT(1) +#define VOP2_PD_CLUSTER2 BIT(2) +#define VOP2_PD_CLUSTER3 BIT(3) +#define VOP2_PD_DSC_8K BIT(5) +#define VOP2_PD_DSC_4K BIT(6) +#define VOP2_PD_ESMART BIT(7) + enum vop2_win_regs { VOP2_WIN_ENABLE, VOP2_WIN_FORMAT, @@ -139,6 +157,7 @@ struct vop2_video_port_data { struct vop2_data { u8 nr_vps; + u64 feature; const struct vop2_win_data *win; const struct vop2_video_port_data *vp; struct vop_rect max_input; @@ -166,19 +185,6 @@ struct vop2_data { #define WB_YRGB_FIFO_FULL_INTR BIT(18) #define WB_COMPLETE_INTR BIT(19) -/* - * display output interface supported by rockchip lcdc - */ -#define ROCKCHIP_OUT_MODE_P888 0 -#define ROCKCHIP_OUT_MODE_BT1120 0 -#define ROCKCHIP_OUT_MODE_P666 1 -#define ROCKCHIP_OUT_MODE_P565 2 -#define ROCKCHIP_OUT_MODE_BT656 5 -#define ROCKCHIP_OUT_MODE_S888 8 -#define ROCKCHIP_OUT_MODE_S888_DUMMY 12 -#define ROCKCHIP_OUT_MODE_YUV420 14 -/* for use special outface */ -#define ROCKCHIP_OUT_MODE_AAAA 15 enum vop_csc_format { CSC_BT601L, @@ -206,6 +212,11 @@ enum dst_factor_mode { }; #define RK3568_GRF_VO_CON1 0x0364 + +#define RK3588_GRF_SOC_CON1 0x0304 +#define RK3588_GRF_VOP_CON2 0x08 +#define RK3588_GRF_VO1_CON0 0x00 + /* System registers definition */ #define RK3568_REG_CFG_DONE 0x000 #define RK3568_VERSION_INFO 0x004 @@ -214,6 +225,7 @@ enum dst_factor_mode { #define RK3568_DSP_IF_EN 0x028 #define RK3568_DSP_IF_CTRL 0x02c #define RK3568_DSP_IF_POL 0x030 +#define RK3588_SYS_PD_CTRL 0x034 #define RK3568_WB_CTRL 0x40 #define RK3568_WB_XSCAL_FACTOR 0x44 #define RK3568_WB_YRGB_MST 0x48 @@ -234,9 +246,14 @@ enum dst_factor_mode { #define RK3568_VP_INT_RAW_STATUS(vp) (0xAC + (vp) * 0x10) /* Video Port registers definition */ +#define RK3568_VP0_CTRL_BASE 0x0C00 +#define RK3568_VP1_CTRL_BASE 0x0D00 +#define RK3568_VP2_CTRL_BASE 0x0E00 +#define RK3588_VP3_CTRL_BASE 0x0F00 #define RK3568_VP_DSP_CTRL 0x00 #define RK3568_VP_MIPI_CTRL 0x04 #define RK3568_VP_COLOR_BAR_CTRL 0x08 +#define RK3588_VP_CLK_CTRL 0x0C #define RK3568_VP_3D_LUT_CTRL 0x10 #define RK3568_VP_3D_LUT_MST 0x20 #define RK3568_VP_DSP_BG 0x2C @@ -278,6 +295,17 @@ enum dst_factor_mode { #define RK3568_SMART_DLY_NUM 0x6F8 /* Cluster register definition, offset relative to window base */ +#define RK3568_CLUSTER0_CTRL_BASE 0x1000 +#define RK3568_CLUSTER1_CTRL_BASE 0x1200 +#define RK3588_CLUSTER2_CTRL_BASE 0x1400 +#define RK3588_CLUSTER3_CTRL_BASE 0x1600 +#define RK3568_ESMART0_CTRL_BASE 0x1800 +#define RK3568_ESMART1_CTRL_BASE 0x1A00 +#define RK3568_SMART0_CTRL_BASE 0x1C00 +#define RK3568_SMART1_CTRL_BASE 0x1E00 +#define RK3588_ESMART2_CTRL_BASE 0x1C00 +#define RK3588_ESMART3_CTRL_BASE 0x1E00 + #define RK3568_CLUSTER_WIN_CTRL0 0x00 #define RK3568_CLUSTER_WIN_CTRL1 0x04 #define RK3568_CLUSTER_WIN_YRGB_MST 0x10 @@ -371,13 +399,18 @@ enum dst_factor_mode { #define RK3568_VP_DSP_CTRL__DITHER_DOWN_EN BIT(17) #define RK3568_VP_DSP_CTRL__PRE_DITHER_DOWN_EN BIT(16) #define RK3568_VP_DSP_CTRL__POST_DSP_OUT_R2Y BIT(15) +#define RK3568_VP_DSP_CTRL__DSP_RG_SWAP BIT(10) #define RK3568_VP_DSP_CTRL__DSP_RB_SWAP BIT(9) +#define RK3568_VP_DSP_CTRL__DSP_BG_SWAP BIT(8) #define RK3568_VP_DSP_CTRL__DSP_INTERLACE BIT(7) #define RK3568_VP_DSP_CTRL__DSP_FILED_POL BIT(6) #define RK3568_VP_DSP_CTRL__P2I_EN BIT(5) #define RK3568_VP_DSP_CTRL__CORE_DCLK_DIV BIT(4) #define RK3568_VP_DSP_CTRL__OUT_MODE GENMASK(3, 0) +#define RK3588_VP_CLK_CTRL__DCLK_OUT_DIV GENMASK(3, 2) +#define RK3588_VP_CLK_CTRL__DCLK_CORE_DIV GENMASK(1, 0) + #define RK3568_VP_POST_SCL_CTRL__VSCALEDOWN BIT(1) #define RK3568_VP_POST_SCL_CTRL__HSCALEDOWN BIT(0) @@ -396,11 +429,37 @@ enum dst_factor_mode { #define RK3568_SYS_DSP_INFACE_EN_HDMI BIT(1) #define RK3568_SYS_DSP_INFACE_EN_RGB BIT(0) +#define RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX GENMASK(22, 21) +#define RK3588_SYS_DSP_INFACE_EN_MIPI0_MUX GENMASK(20, 20) +#define RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX GENMASK(19, 18) +#define RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX GENMASK(17, 16) +#define RK3588_SYS_DSP_INFACE_EN_DP1_MUX GENMASK(15, 14) +#define RK3588_SYS_DSP_INFACE_EN_DP0_MUX GENMASK(13, 12) +#define RK3588_SYS_DSP_INFACE_EN_DPI GENMASK(9, 8) +#define RK3588_SYS_DSP_INFACE_EN_MIPI1 BIT(7) +#define RK3588_SYS_DSP_INFACE_EN_MIPI0 BIT(6) +#define RK3588_SYS_DSP_INFACE_EN_HDMI1 BIT(5) +#define RK3588_SYS_DSP_INFACE_EN_EDP1 BIT(4) +#define RK3588_SYS_DSP_INFACE_EN_HDMI0 BIT(3) +#define RK3588_SYS_DSP_INFACE_EN_EDP0 BIT(2) +#define RK3588_SYS_DSP_INFACE_EN_DP1 BIT(1) +#define RK3588_SYS_DSP_INFACE_EN_DP0 BIT(0) + +#define RK3588_DSP_IF_MIPI1_PCLK_DIV GENMASK(27, 26) +#define RK3588_DSP_IF_MIPI0_PCLK_DIV GENMASK(25, 24) +#define RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV GENMASK(22, 22) +#define RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV GENMASK(21, 20) +#define RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV GENMASK(18, 18) +#define RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV GENMASK(17, 16) + #define RK3568_DSP_IF_POL__MIPI_PIN_POL GENMASK(19, 16) #define RK3568_DSP_IF_POL__EDP_PIN_POL GENMASK(15, 12) #define RK3568_DSP_IF_POL__HDMI_PIN_POL GENMASK(7, 4) #define RK3568_DSP_IF_POL__RGB_LVDS_PIN_POL GENMASK(3, 0) +#define RK3588_DSP_IF_POL__DP1_PIN_POL GENMASK(14, 12) +#define RK3588_DSP_IF_POL__DP0_PIN_POL GENMASK(10, 8) + #define RK3568_VP0_MIPI_CTRL__DCLK_DIV2_PHASE_LOCK BIT(5) #define RK3568_VP0_MIPI_CTRL__DCLK_DIV2 BIT(4) @@ -415,14 +474,19 @@ enum dst_factor_mode { #define VOP2_COLOR_KEY_MASK BIT(31) #define RK3568_OVL_CTRL__LAYERSEL_REGDONE_IMD BIT(28) +#define RK3568_OVL_CTRL__YUV_MODE(vp) BIT(vp) #define RK3568_VP_BG_MIX_CTRL__BG_DLY GENMASK(31, 24) #define RK3568_OVL_PORT_SEL__SEL_PORT GENMASK(31, 16) #define RK3568_OVL_PORT_SEL__SMART1 GENMASK(31, 30) #define RK3568_OVL_PORT_SEL__SMART0 GENMASK(29, 28) +#define RK3588_OVL_PORT_SEL__ESMART3 GENMASK(31, 30) +#define RK3588_OVL_PORT_SEL__ESMART2 GENMASK(29, 28) #define RK3568_OVL_PORT_SEL__ESMART1 GENMASK(27, 26) #define RK3568_OVL_PORT_SEL__ESMART0 GENMASK(25, 24) +#define RK3588_OVL_PORT_SEL__CLUSTER3 GENMASK(23, 22) +#define RK3588_OVL_PORT_SEL__CLUSTER2 GENMASK(21, 20) #define RK3568_OVL_PORT_SEL__CLUSTER1 GENMASK(19, 18) #define RK3568_OVL_PORT_SEL__CLUSTER0 GENMASK(17, 16) #define RK3568_OVL_PORT_SET__PORT2_MUX GENMASK(11, 8) @@ -435,6 +499,10 @@ enum dst_factor_mode { #define RK3568_CLUSTER_DLY_NUM__CLUSTER0_1 GENMASK(15, 8) #define RK3568_CLUSTER_DLY_NUM__CLUSTER0_0 GENMASK(7, 0) +#define RK3568_CLUSTER_WIN_CTRL0__WIN0_EN BIT(0) + +#define RK3568_SMART_REGION0_CTRL__WIN0_EN BIT(0) + #define RK3568_SMART_DLY_NUM__SMART1 GENMASK(31, 24) #define RK3568_SMART_DLY_NUM__SMART0 GENMASK(23, 16) #define RK3568_SMART_DLY_NUM__ESMART1 GENMASK(15, 8) diff --git a/drivers/gpu/drm/rockchip/rockchip_lvds.c b/drivers/gpu/drm/rockchip/rockchip_lvds.c index f0f47e9abf5a..59341654ec32 100644 --- a/drivers/gpu/drm/rockchip/rockchip_lvds.c +++ b/drivers/gpu/drm/rockchip/rockchip_lvds.c @@ -27,7 +27,6 @@ #include <drm/drm_simple_kms_helper.h> #include "rockchip_drm_drv.h" -#include "rockchip_drm_vop.h" #include "rockchip_lvds.h" #define DISPLAY_OUTPUT_RGB 0 diff --git a/drivers/gpu/drm/rockchip/rockchip_rgb.c b/drivers/gpu/drm/rockchip/rockchip_rgb.c index c677b71ae516..dbfbde24698e 100644 --- a/drivers/gpu/drm/rockchip/rockchip_rgb.c +++ b/drivers/gpu/drm/rockchip/rockchip_rgb.c @@ -19,7 +19,6 @@ #include <drm/drm_simple_kms_helper.h> #include "rockchip_drm_drv.h" -#include "rockchip_drm_vop.h" #include "rockchip_rgb.h" struct rockchip_rgb { diff --git a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c index 22288ad7f326..48170694ac6b 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c +++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c @@ -34,6 +34,30 @@ static const uint32_t formats_cluster[] = { DRM_FORMAT_Y210, /* yuv422_10bit non-Linear mode only */ }; +static const uint32_t formats_esmart[] = { + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_RGB888, + DRM_FORMAT_BGR888, + DRM_FORMAT_RGB565, + DRM_FORMAT_BGR565, + DRM_FORMAT_NV12, /* yuv420_8bit linear mode, 2 plane */ + DRM_FORMAT_NV21, /* yvu420_8bit linear mode, 2 plane */ + DRM_FORMAT_NV16, /* yuv422_8bit linear mode, 2 plane */ + DRM_FORMAT_NV61, /* yvu422_8bit linear mode, 2 plane */ + DRM_FORMAT_NV20, /* yuv422_10bit linear mode, 2 plane, no padding */ + DRM_FORMAT_NV24, /* yuv444_8bit linear mode, 2 plane */ + DRM_FORMAT_NV42, /* yvu444_8bit linear mode, 2 plane */ + DRM_FORMAT_NV30, /* yuv444_10bit linear mode, 2 plane, no padding */ + DRM_FORMAT_NV15, /* yuv420_10bit linear mode, 2 plane, no padding */ + DRM_FORMAT_YVYU, /* yuv422_8bit[YVYU] linear mode */ + DRM_FORMAT_VYUY, /* yuv422_8bit[VYUY] linear mode */ + DRM_FORMAT_YUYV, /* yuv422_8bit[YUYV] linear mode */ + DRM_FORMAT_UYVY, /* yuv422_8bit[UYVY] linear mode */ +}; + static const uint32_t formats_rk356x_esmart[] = { DRM_FORMAT_XRGB8888, DRM_FORMAT_ARGB8888, @@ -48,8 +72,10 @@ static const uint32_t formats_rk356x_esmart[] = { DRM_FORMAT_NV15, /* yuv420_10bit linear mode, 2 plane, no padding */ DRM_FORMAT_NV16, /* yuv422_8bit linear mode, 2 plane */ DRM_FORMAT_NV61, /* yuv422_8bit linear mode, 2 plane */ + DRM_FORMAT_NV20, /* yuv422_10bit linear mode, 2 plane, no padding */ DRM_FORMAT_NV24, /* yuv444_8bit linear mode, 2 plane */ DRM_FORMAT_NV42, /* yuv444_8bit linear mode, 2 plane */ + DRM_FORMAT_NV30, /* yuv444_10bit linear mode, 2 plane, no padding */ DRM_FORMAT_YVYU, /* yuv422_8bit[YVYU] linear mode */ DRM_FORMAT_VYUY, /* yuv422_8bit[VYUY] linear mode */ }; @@ -110,7 +136,7 @@ static const uint64_t format_modifiers_afbc[] = { static const struct vop2_video_port_data rk3568_vop_video_ports[] = { { .id = 0, - .feature = VOP_FEATURE_OUTPUT_10BIT, + .feature = VOP2_VP_FEATURE_OUTPUT_10BIT, .gamma_lut_len = 1024, .cubic_lut_len = 9 * 9 * 9, .max_output = { 4096, 2304 }, @@ -234,7 +260,188 @@ static const struct vop2_win_data rk3568_vop_win_data[] = { }, }; +static const struct vop2_video_port_data rk3588_vop_video_ports[] = { + { + .id = 0, + .feature = VOP2_VP_FEATURE_OUTPUT_10BIT, + .gamma_lut_len = 1024, + .cubic_lut_len = 9 * 9 * 9, /* 9x9x9 */ + .max_output = { 4096, 2304 }, + /* hdr2sdr sdr2hdr hdr2hdr sdr2sdr */ + .pre_scan_max_dly = { 76, 65, 65, 54 }, + .offset = 0xc00, + }, { + .id = 1, + .feature = VOP2_VP_FEATURE_OUTPUT_10BIT, + .gamma_lut_len = 1024, + .cubic_lut_len = 729, /* 9x9x9 */ + .max_output = { 4096, 2304 }, + .pre_scan_max_dly = { 76, 65, 65, 54 }, + .offset = 0xd00, + }, { + .id = 2, + .feature = VOP2_VP_FEATURE_OUTPUT_10BIT, + .gamma_lut_len = 1024, + .cubic_lut_len = 17 * 17 * 17, /* 17x17x17 */ + .max_output = { 4096, 2304 }, + .pre_scan_max_dly = { 52, 52, 52, 52 }, + .offset = 0xe00, + }, { + .id = 3, + .gamma_lut_len = 1024, + .max_output = { 2048, 1536 }, + .pre_scan_max_dly = { 52, 52, 52, 52 }, + .offset = 0xf00, + }, +}; + +/* + * rk3588 vop with 4 cluster, 4 esmart win. + * Every cluster can work as 4K win or split into two win. + * All win in cluster support AFBCD. + * + * Every esmart win and smart win support 4 Multi-region. + * + * Scale filter mode: + * + * * Cluster: bicubic for horizontal scale up, others use bilinear + * * ESmart: + * * nearest-neighbor/bilinear/bicubic for scale up + * * nearest-neighbor/bilinear/average for scale down + * + * AXI Read ID assignment: + * Two AXI bus: + * AXI0 is a read/write bus with a higher performance. + * AXI1 is a read only bus. + * + * Every window on a AXI bus must assigned two unique + * read id(yrgb_id/uv_id, valid id are 0x1~0xe). + * + * AXI0: + * Cluster0/1, Esmart0/1, WriteBack + * + * AXI 1: + * Cluster2/3, Esmart2/3 + * + */ +static const struct vop2_win_data rk3588_vop_win_data[] = { + { + .name = "Cluster0-win0", + .phys_id = ROCKCHIP_VOP2_CLUSTER0, + .base = 0x1000, + .formats = formats_cluster, + .nformats = ARRAY_SIZE(formats_cluster), + .format_modifiers = format_modifiers_afbc, + .layer_sel_id = 0, + .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 | + DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y, + .max_upscale_factor = 4, + .max_downscale_factor = 4, + .dly = { 4, 26, 29 }, + .type = DRM_PLANE_TYPE_PRIMARY, + .feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER, + }, { + .name = "Cluster1-win0", + .phys_id = ROCKCHIP_VOP2_CLUSTER1, + .base = 0x1200, + .formats = formats_cluster, + .nformats = ARRAY_SIZE(formats_cluster), + .format_modifiers = format_modifiers_afbc, + .layer_sel_id = 1, + .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 | + DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y, + .type = DRM_PLANE_TYPE_PRIMARY, + .max_upscale_factor = 4, + .max_downscale_factor = 4, + .dly = { 4, 26, 29 }, + .feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER, + }, { + .name = "Cluster2-win0", + .phys_id = ROCKCHIP_VOP2_CLUSTER2, + .base = 0x1400, + .formats = formats_cluster, + .nformats = ARRAY_SIZE(formats_cluster), + .format_modifiers = format_modifiers_afbc, + .layer_sel_id = 4, + .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 | + DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y, + .type = DRM_PLANE_TYPE_PRIMARY, + .max_upscale_factor = 4, + .max_downscale_factor = 4, + .dly = { 4, 26, 29 }, + .feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER, + }, { + .name = "Cluster3-win0", + .phys_id = ROCKCHIP_VOP2_CLUSTER3, + .base = 0x1600, + .formats = formats_cluster, + .nformats = ARRAY_SIZE(formats_cluster), + .format_modifiers = format_modifiers_afbc, + .layer_sel_id = 5, + .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 | + DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y, + .type = DRM_PLANE_TYPE_PRIMARY, + .max_upscale_factor = 4, + .max_downscale_factor = 4, + .dly = { 4, 26, 29 }, + .feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER, + }, { + .name = "Esmart0-win0", + .phys_id = ROCKCHIP_VOP2_ESMART0, + .formats = formats_esmart, + .nformats = ARRAY_SIZE(formats_esmart), + .format_modifiers = format_modifiers, + .base = 0x1800, + .layer_sel_id = 2, + .supported_rotations = DRM_MODE_REFLECT_Y, + .type = DRM_PLANE_TYPE_OVERLAY, + .max_upscale_factor = 8, + .max_downscale_factor = 8, + .dly = { 23, 45, 48 }, + }, { + .name = "Esmart1-win0", + .phys_id = ROCKCHIP_VOP2_ESMART1, + .formats = formats_esmart, + .nformats = ARRAY_SIZE(formats_esmart), + .format_modifiers = format_modifiers, + .base = 0x1a00, + .layer_sel_id = 3, + .supported_rotations = DRM_MODE_REFLECT_Y, + .type = DRM_PLANE_TYPE_OVERLAY, + .max_upscale_factor = 8, + .max_downscale_factor = 8, + .dly = { 23, 45, 48 }, + }, { + .name = "Esmart2-win0", + .phys_id = ROCKCHIP_VOP2_ESMART2, + .base = 0x1c00, + .formats = formats_esmart, + .nformats = ARRAY_SIZE(formats_esmart), + .format_modifiers = format_modifiers, + .layer_sel_id = 6, + .supported_rotations = DRM_MODE_REFLECT_Y, + .type = DRM_PLANE_TYPE_OVERLAY, + .max_upscale_factor = 8, + .max_downscale_factor = 8, + .dly = { 23, 45, 48 }, + }, { + .name = "Esmart3-win0", + .phys_id = ROCKCHIP_VOP2_ESMART3, + .formats = formats_esmart, + .nformats = ARRAY_SIZE(formats_esmart), + .format_modifiers = format_modifiers, + .base = 0x1e00, + .layer_sel_id = 7, + .supported_rotations = DRM_MODE_REFLECT_Y, + .type = DRM_PLANE_TYPE_OVERLAY, + .max_upscale_factor = 8, + .max_downscale_factor = 8, + .dly = { 23, 45, 48 }, + }, +}; + static const struct vop2_data rk3566_vop = { + .feature = VOP2_FEATURE_HAS_SYS_GRF, .nr_vps = 3, .max_input = { 4096, 2304 }, .max_output = { 4096, 2304 }, @@ -245,6 +452,7 @@ static const struct vop2_data rk3566_vop = { }; static const struct vop2_data rk3568_vop = { + .feature = VOP2_FEATURE_HAS_SYS_GRF, .nr_vps = 3, .max_input = { 4096, 2304 }, .max_output = { 4096, 2304 }, @@ -254,6 +462,18 @@ static const struct vop2_data rk3568_vop = { .soc_id = 3568, }; +static const struct vop2_data rk3588_vop = { + .feature = VOP2_FEATURE_HAS_SYS_GRF | VOP2_FEATURE_HAS_VO1_GRF | + VOP2_FEATURE_HAS_VOP_GRF | VOP2_FEATURE_HAS_SYS_PMU, + .nr_vps = 4, + .max_input = { 4096, 4320 }, + .max_output = { 4096, 4320 }, + .vp = rk3588_vop_video_ports, + .win = rk3588_vop_win_data, + .win_size = ARRAY_SIZE(rk3588_vop_win_data), + .soc_id = 3588, +}; + static const struct of_device_id vop2_dt_match[] = { { .compatible = "rockchip,rk3566-vop", @@ -262,6 +482,9 @@ static const struct of_device_id vop2_dt_match[] = { .compatible = "rockchip,rk3568-vop", .data = &rk3568_vop, }, { + .compatible = "rockchip,rk3588-vop", + .data = &rk3588_vop + }, { }, }; MODULE_DEVICE_TABLE(of, vop2_dt_match); diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h index 3143ecaaff86..f8ed093b7356 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h @@ -51,7 +51,7 @@ DECLARE_EVENT_CLASS(drm_sched_job, __assign_str(name, sched_job->sched->name); __entry->job_count = spsc_queue_count(&entity->job_queue); __entry->hw_job_count = atomic_read( - &sched_job->sched->hw_rq_count); + &sched_job->sched->credit_count); ), TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", __entry->entity, __entry->id, diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 409e4256f6e7..3c4f5a392b06 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -81,12 +81,16 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, */ pr_warn("%s: called with uninitialized scheduler\n", __func__); } else if (num_sched_list) { - /* The "priority" of an entity cannot exceed the number - * of run-queues of a scheduler. + /* The "priority" of an entity cannot exceed the number of run-queues of a + * scheduler. Protect against num_rqs being 0, by converting to signed. Choose + * the lowest priority available. */ - if (entity->priority >= sched_list[0]->num_rqs) - entity->priority = max_t(u32, sched_list[0]->num_rqs, - DRM_SCHED_PRIORITY_MIN); + if (entity->priority >= sched_list[0]->num_rqs) { + drm_err(sched_list[0], "entity with out-of-bounds priority:%u num_rqs:%u\n", + entity->priority, sched_list[0]->num_rqs); + entity->priority = max_t(s32, (s32) sched_list[0]->num_rqs - 1, + (s32) DRM_SCHED_PRIORITY_KERNEL); + } entity->rq = sched_list[0]->sched_rq[entity->priority]; } @@ -370,7 +374,7 @@ static void drm_sched_entity_wakeup(struct dma_fence *f, container_of(cb, struct drm_sched_entity, cb); drm_sched_entity_clear_dep(f, cb); - drm_sched_wakeup_if_can_queue(entity->rq->sched); + drm_sched_wakeup(entity->rq->sched, entity); } /** @@ -602,7 +606,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) drm_sched_rq_update_fifo(entity, submit_ts); - drm_sched_wakeup_if_can_queue(entity->rq->sched); + drm_sched_wakeup(entity->rq->sched, entity); } } EXPORT_SYMBOL(drm_sched_entity_push_job); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 99797a8c836a..550492a7a031 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -48,7 +48,30 @@ * through the jobs entity pointer. */ -#include <linux/kthread.h> +/** + * DOC: Flow Control + * + * The DRM GPU scheduler provides a flow control mechanism to regulate the rate + * in which the jobs fetched from scheduler entities are executed. + * + * In this context the &drm_gpu_scheduler keeps track of a driver specified + * credit limit representing the capacity of this scheduler and a credit count; + * every &drm_sched_job carries a driver specified number of credits. + * + * Once a job is executed (but not yet finished), the job's credits contribute + * to the scheduler's credit count until the job is finished. If by executing + * one more job the scheduler's credit count would exceed the scheduler's + * credit limit, the job won't be executed. Instead, the scheduler will wait + * until the credit count has decreased enough to not overflow its credit limit. + * This implies waiting for previously executed jobs. + * + * Optionally, drivers may register a callback (update_job_credits) provided by + * struct drm_sched_backend_ops to update the job's credits dynamically. The + * scheduler executes this callback every time the scheduler considers a job for + * execution and subsequently checks whether the job fits the scheduler's credit + * limit. + */ + #include <linux/wait.h> #include <linux/sched.h> #include <linux/completion.h> @@ -76,6 +99,51 @@ int drm_sched_policy = DRM_SCHED_POLICY_FIFO; MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default)."); module_param_named(sched_policy, drm_sched_policy, int, 0444); +static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched) +{ + u32 credits; + + drm_WARN_ON(sched, check_sub_overflow(sched->credit_limit, + atomic_read(&sched->credit_count), + &credits)); + + return credits; +} + +/** + * drm_sched_can_queue -- Can we queue more to the hardware? + * @sched: scheduler instance + * @entity: the scheduler entity + * + * Return true if we can push at least one more job from @entity, false + * otherwise. + */ +static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity) +{ + struct drm_sched_job *s_job; + + s_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); + if (!s_job) + return false; + + if (sched->ops->update_job_credits) { + s_job->credits = sched->ops->update_job_credits(s_job); + + drm_WARN(sched, !s_job->credits, + "Jobs with zero credits bypass job-flow control.\n"); + } + + /* If a job exceeds the credit limit, truncate it to the credit limit + * itself to guarantee forward progress. + */ + if (drm_WARN(sched, s_job->credits > sched->credit_limit, + "Jobs may not exceed the credit limit, truncate.\n")) + s_job->credits = sched->credit_limit; + + return drm_sched_available_credits(sched) >= s_job->credits; +} + static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a, const struct rb_node *b) { @@ -187,12 +255,18 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, /** * drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run * + * @sched: the gpu scheduler * @rq: scheduler run queue to check. * - * Try to find a ready entity, returns NULL if none found. + * Try to find the next ready entity. + * + * Return an entity if one is found; return an error-pointer (!NULL) if an + * entity was ready, but the scheduler had insufficient credits to accommodate + * its job; return NULL, if no ready entity was found. */ static struct drm_sched_entity * -drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq) +drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched, + struct drm_sched_rq *rq) { struct drm_sched_entity *entity; @@ -202,6 +276,14 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq) if (entity) { list_for_each_entry_continue(entity, &rq->entities, list) { if (drm_sched_entity_is_ready(entity)) { + /* If we can't queue yet, preserve the current + * entity in terms of fairness. + */ + if (!drm_sched_can_queue(sched, entity)) { + spin_unlock(&rq->lock); + return ERR_PTR(-ENOSPC); + } + rq->current_entity = entity; reinit_completion(&entity->entity_idle); spin_unlock(&rq->lock); @@ -211,8 +293,15 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq) } list_for_each_entry(entity, &rq->entities, list) { - if (drm_sched_entity_is_ready(entity)) { + /* If we can't queue yet, preserve the current entity in + * terms of fairness. + */ + if (!drm_sched_can_queue(sched, entity)) { + spin_unlock(&rq->lock); + return ERR_PTR(-ENOSPC); + } + rq->current_entity = entity; reinit_completion(&entity->entity_idle); spin_unlock(&rq->lock); @@ -231,12 +320,18 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq) /** * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run * + * @sched: the gpu scheduler * @rq: scheduler run queue to check. * - * Find oldest waiting ready entity, returns NULL if none found. + * Find oldest waiting ready entity. + * + * Return an entity if one is found; return an error-pointer (!NULL) if an + * entity was ready, but the scheduler had insufficient credits to accommodate + * its job; return NULL, if no ready entity was found. */ static struct drm_sched_entity * -drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq) +drm_sched_rq_select_entity_fifo(struct drm_gpu_scheduler *sched, + struct drm_sched_rq *rq) { struct rb_node *rb; @@ -246,6 +341,14 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq) entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node); if (drm_sched_entity_is_ready(entity)) { + /* If we can't queue yet, preserve the current entity in + * terms of fairness. + */ + if (!drm_sched_can_queue(sched, entity)) { + spin_unlock(&rq->lock); + return ERR_PTR(-ENOSPC); + } + rq->current_entity = entity; reinit_completion(&entity->entity_idle); break; @@ -257,6 +360,42 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq) } /** + * drm_sched_run_job_queue - enqueue run-job work + * @sched: scheduler instance + */ +static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched) +{ + if (!READ_ONCE(sched->pause_submit)) + queue_work(sched->submit_wq, &sched->work_run_job); +} + +/** + * __drm_sched_run_free_queue - enqueue free-job work + * @sched: scheduler instance + */ +static void __drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) +{ + if (!READ_ONCE(sched->pause_submit)) + queue_work(sched->submit_wq, &sched->work_free_job); +} + +/** + * drm_sched_run_free_queue - enqueue free-job work if ready + * @sched: scheduler instance + */ +static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) +{ + struct drm_sched_job *job; + + spin_lock(&sched->job_list_lock); + job = list_first_entry_or_null(&sched->pending_list, + struct drm_sched_job, list); + if (job && dma_fence_is_signaled(&job->s_fence->finished)) + __drm_sched_run_free_queue(sched); + spin_unlock(&sched->job_list_lock); +} + +/** * drm_sched_job_done - complete a job * @s_job: pointer to the job which is done * @@ -267,7 +406,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result) struct drm_sched_fence *s_fence = s_job->s_fence; struct drm_gpu_scheduler *sched = s_fence->sched; - atomic_dec(&sched->hw_rq_count); + atomic_sub(s_job->credits, &sched->credit_count); atomic_dec(sched->score); trace_drm_sched_process_job(s_fence); @@ -275,7 +414,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result) dma_fence_get(&s_fence->finished); drm_sched_fence_finished(s_fence, result); dma_fence_put(&s_fence->finished); - wake_up_interruptible(&sched->wake_up_worker); + __drm_sched_run_free_queue(sched); } /** @@ -299,10 +438,35 @@ static void drm_sched_job_done_cb(struct dma_fence *f, struct dma_fence_cb *cb) */ static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched) { + lockdep_assert_held(&sched->job_list_lock); + if (sched->timeout != MAX_SCHEDULE_TIMEOUT && !list_empty(&sched->pending_list)) - queue_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout); + mod_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout); +} + +static void drm_sched_start_timeout_unlocked(struct drm_gpu_scheduler *sched) +{ + spin_lock(&sched->job_list_lock); + drm_sched_start_timeout(sched); + spin_unlock(&sched->job_list_lock); +} + +/** + * drm_sched_tdr_queue_imm: - immediately start job timeout handler + * + * @sched: scheduler for which the timeout handling should be started. + * + * Start timeout handling immediately for the named scheduler. + */ +void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched) +{ + spin_lock(&sched->job_list_lock); + sched->timeout = 0; + drm_sched_start_timeout(sched); + spin_unlock(&sched->job_list_lock); } +EXPORT_SYMBOL(drm_sched_tdr_queue_imm); /** * drm_sched_fault - immediately start timeout handler @@ -388,7 +552,7 @@ static void drm_sched_job_timedout(struct work_struct *work) sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); - /* Protects against concurrent deletion in drm_sched_get_cleanup_job */ + /* Protects against concurrent deletion in drm_sched_get_finished_job */ spin_lock(&sched->job_list_lock); job = list_first_entry_or_null(&sched->pending_list, struct drm_sched_job, list); @@ -416,11 +580,8 @@ static void drm_sched_job_timedout(struct work_struct *work) spin_unlock(&sched->job_list_lock); } - if (status != DRM_GPU_SCHED_STAT_ENODEV) { - spin_lock(&sched->job_list_lock); - drm_sched_start_timeout(sched); - spin_unlock(&sched->job_list_lock); - } + if (status != DRM_GPU_SCHED_STAT_ENODEV) + drm_sched_start_timeout_unlocked(sched); } /** @@ -439,13 +600,13 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) { struct drm_sched_job *s_job, *tmp; - kthread_park(sched->thread); + drm_sched_wqueue_stop(sched); /* * Reinsert back the bad job here - now it's safe as - * drm_sched_get_cleanup_job cannot race against us and release the + * drm_sched_get_finished_job cannot race against us and release the * bad job at this point - we parked (waited for) any in progress - * (earlier) cleanups and drm_sched_get_cleanup_job will not be called + * (earlier) cleanups and drm_sched_get_finished_job will not be called * now until the scheduler thread is unparked. */ if (bad && bad->sched == sched) @@ -468,7 +629,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) &s_job->cb)) { dma_fence_put(s_job->s_fence->parent); s_job->s_fence->parent = NULL; - atomic_dec(&sched->hw_rq_count); + atomic_sub(s_job->credits, &sched->credit_count); } else { /* * remove job from pending_list. @@ -529,7 +690,7 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery) list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) { struct dma_fence *fence = s_job->s_fence->parent; - atomic_inc(&sched->hw_rq_count); + atomic_add(s_job->credits, &sched->credit_count); if (!full_recovery) continue; @@ -546,13 +707,10 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery) drm_sched_job_done(s_job, -ECANCELED); } - if (full_recovery) { - spin_lock(&sched->job_list_lock); - drm_sched_start_timeout(sched); - spin_unlock(&sched->job_list_lock); - } + if (full_recovery) + drm_sched_start_timeout_unlocked(sched); - kthread_unpark(sched->thread); + drm_sched_wqueue_start(sched); } EXPORT_SYMBOL(drm_sched_start); @@ -613,6 +771,8 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs); * drm_sched_job_init - init a scheduler job * @job: scheduler job to init * @entity: scheduler entity to use + * @credits: the number of credits this job contributes to the schedulers + * credit limit * @owner: job owner for debugging * * Refer to drm_sched_entity_push_job() documentation @@ -630,7 +790,7 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs); */ int drm_sched_job_init(struct drm_sched_job *job, struct drm_sched_entity *entity, - void *owner) + u32 credits, void *owner) { if (!entity->rq) { /* This will most likely be followed by missing frames @@ -641,7 +801,13 @@ int drm_sched_job_init(struct drm_sched_job *job, return -ENOENT; } + if (unlikely(!credits)) { + pr_err("*ERROR* %s: credits cannot be 0!\n", __func__); + return -EINVAL; + } + job->entity = entity; + job->credits = credits; job->s_fence = drm_sched_fence_alloc(entity, owner); if (!job->s_fence) return -ENOMEM; @@ -854,27 +1020,17 @@ void drm_sched_job_cleanup(struct drm_sched_job *job) EXPORT_SYMBOL(drm_sched_job_cleanup); /** - * drm_sched_can_queue -- Can we queue more to the hardware? - * @sched: scheduler instance - * - * Return true if we can push more jobs to the hw, otherwise false. - */ -static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched) -{ - return atomic_read(&sched->hw_rq_count) < - sched->hw_submission_limit; -} - -/** - * drm_sched_wakeup_if_can_queue - Wake up the scheduler + * drm_sched_wakeup - Wake up the scheduler if it is ready to queue * @sched: scheduler instance + * @entity: the scheduler entity * * Wake up the scheduler if we can queue jobs. */ -void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched) +void drm_sched_wakeup(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity) { - if (drm_sched_can_queue(sched)) - wake_up_interruptible(&sched->wake_up_worker); + if (drm_sched_can_queue(sched, entity)) + drm_sched_run_job_queue(sched); } /** @@ -882,7 +1038,11 @@ void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched) * * @sched: scheduler instance * - * Returns the entity to process or NULL if none are found. + * Return an entity to process or NULL if none are found. + * + * Note, that we break out of the for-loop when "entity" is non-null, which can + * also be an error-pointer--this assures we don't process lower priority + * run-queues. See comments in the respectively called functions. */ static struct drm_sched_entity * drm_sched_select_entity(struct drm_gpu_scheduler *sched) @@ -890,23 +1050,21 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched) struct drm_sched_entity *entity; int i; - if (!drm_sched_can_queue(sched)) - return NULL; - - /* Kernel run queue has higher priority than normal run queue*/ - for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { + /* Start with the highest priority. + */ + for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ? - drm_sched_rq_select_entity_fifo(sched->sched_rq[i]) : - drm_sched_rq_select_entity_rr(sched->sched_rq[i]); + drm_sched_rq_select_entity_fifo(sched, sched->sched_rq[i]) : + drm_sched_rq_select_entity_rr(sched, sched->sched_rq[i]); if (entity) break; } - return entity; + return IS_ERR(entity) ? NULL : entity; } /** - * drm_sched_get_cleanup_job - fetch the next finished job to be destroyed + * drm_sched_get_finished_job - fetch the next finished job to be destroyed * * @sched: scheduler instance * @@ -914,7 +1072,7 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched) * ready for it to be destroyed. */ static struct drm_sched_job * -drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) +drm_sched_get_finished_job(struct drm_gpu_scheduler *sched) { struct drm_sched_job *job, *next; @@ -934,8 +1092,10 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) typeof(*next), list); if (next) { - next->s_fence->scheduled.timestamp = - dma_fence_timestamp(&job->s_fence->finished); + if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, + &next->s_fence->scheduled.flags)) + next->s_fence->scheduled.timestamp = + dma_fence_timestamp(&job->s_fence->finished); /* start TO timer for next job */ drm_sched_start_timeout(sched); } @@ -985,91 +1145,82 @@ drm_sched_pick_best(struct drm_gpu_scheduler **sched_list, EXPORT_SYMBOL(drm_sched_pick_best); /** - * drm_sched_blocked - check if the scheduler is blocked - * - * @sched: scheduler instance + * drm_sched_free_job_work - worker to call free_job * - * Returns true if blocked, otherwise false. + * @w: free job work */ -static bool drm_sched_blocked(struct drm_gpu_scheduler *sched) +static void drm_sched_free_job_work(struct work_struct *w) { - if (kthread_should_park()) { - kthread_parkme(); - return true; - } + struct drm_gpu_scheduler *sched = + container_of(w, struct drm_gpu_scheduler, work_free_job); + struct drm_sched_job *job; + + if (READ_ONCE(sched->pause_submit)) + return; - return false; + job = drm_sched_get_finished_job(sched); + if (job) + sched->ops->free_job(job); + + drm_sched_run_free_queue(sched); + drm_sched_run_job_queue(sched); } /** - * drm_sched_main - main scheduler thread - * - * @param: scheduler instance + * drm_sched_run_job_work - worker to call run_job * - * Returns 0. + * @w: run job work */ -static int drm_sched_main(void *param) +static void drm_sched_run_job_work(struct work_struct *w) { - struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param; + struct drm_gpu_scheduler *sched = + container_of(w, struct drm_gpu_scheduler, work_run_job); + struct drm_sched_entity *entity; + struct dma_fence *fence; + struct drm_sched_fence *s_fence; + struct drm_sched_job *sched_job; int r; - sched_set_fifo_low(current); - - while (!kthread_should_stop()) { - struct drm_sched_entity *entity = NULL; - struct drm_sched_fence *s_fence; - struct drm_sched_job *sched_job; - struct dma_fence *fence; - struct drm_sched_job *cleanup_job = NULL; - - wait_event_interruptible(sched->wake_up_worker, - (cleanup_job = drm_sched_get_cleanup_job(sched)) || - (!drm_sched_blocked(sched) && - (entity = drm_sched_select_entity(sched))) || - kthread_should_stop()); - - if (cleanup_job) - sched->ops->free_job(cleanup_job); - - if (!entity) - continue; - - sched_job = drm_sched_entity_pop_job(entity); + if (READ_ONCE(sched->pause_submit)) + return; - if (!sched_job) { - complete_all(&entity->entity_idle); - continue; - } + entity = drm_sched_select_entity(sched); + if (!entity) + return; - s_fence = sched_job->s_fence; + sched_job = drm_sched_entity_pop_job(entity); + if (!sched_job) { + complete_all(&entity->entity_idle); + return; /* No more work */ + } - atomic_inc(&sched->hw_rq_count); - drm_sched_job_begin(sched_job); + s_fence = sched_job->s_fence; - trace_drm_run_job(sched_job, entity); - fence = sched->ops->run_job(sched_job); - complete_all(&entity->entity_idle); - drm_sched_fence_scheduled(s_fence, fence); + atomic_add(sched_job->credits, &sched->credit_count); + drm_sched_job_begin(sched_job); - if (!IS_ERR_OR_NULL(fence)) { - /* Drop for original kref_init of the fence */ - dma_fence_put(fence); + trace_drm_run_job(sched_job, entity); + fence = sched->ops->run_job(sched_job); + complete_all(&entity->entity_idle); + drm_sched_fence_scheduled(s_fence, fence); - r = dma_fence_add_callback(fence, &sched_job->cb, - drm_sched_job_done_cb); - if (r == -ENOENT) - drm_sched_job_done(sched_job, fence->error); - else if (r) - DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", - r); - } else { - drm_sched_job_done(sched_job, IS_ERR(fence) ? - PTR_ERR(fence) : 0); - } + if (!IS_ERR_OR_NULL(fence)) { + /* Drop for original kref_init of the fence */ + dma_fence_put(fence); - wake_up(&sched->job_scheduled); + r = dma_fence_add_callback(fence, &sched_job->cb, + drm_sched_job_done_cb); + if (r == -ENOENT) + drm_sched_job_done(sched_job, fence->error); + else if (r) + DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", r); + } else { + drm_sched_job_done(sched_job, IS_ERR(fence) ? + PTR_ERR(fence) : 0); } - return 0; + + wake_up(&sched->job_scheduled); + drm_sched_run_job_queue(sched); } /** @@ -1077,8 +1228,10 @@ static int drm_sched_main(void *param) * * @sched: scheduler instance * @ops: backend operations for this scheduler + * @submit_wq: workqueue to use for submission. If NULL, an ordered wq is + * allocated and used * @num_rqs: number of runqueues, one for each priority, up to DRM_SCHED_PRIORITY_COUNT - * @hw_submission: number of hw submissions that can be in flight + * @credit_limit: the number of credits this scheduler can hold from all jobs * @hang_limit: number of times to allow a job to hang before dropping it * @timeout: timeout value in jiffies for the scheduler * @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq is @@ -1091,14 +1244,15 @@ static int drm_sched_main(void *param) */ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_backend_ops *ops, - u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit, + struct workqueue_struct *submit_wq, + u32 num_rqs, u32 credit_limit, unsigned int hang_limit, long timeout, struct workqueue_struct *timeout_wq, atomic_t *score, const char *name, struct device *dev) { int i, ret; sched->ops = ops; - sched->hw_submission_limit = hw_submission; + sched->credit_limit = credit_limit; sched->name = name; sched->timeout = timeout; sched->timeout_wq = timeout_wq ? : system_wq; @@ -1121,46 +1275,50 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, return 0; } + if (submit_wq) { + sched->submit_wq = submit_wq; + sched->own_submit_wq = false; + } else { + sched->submit_wq = alloc_ordered_workqueue(name, 0); + if (!sched->submit_wq) + return -ENOMEM; + + sched->own_submit_wq = true; + } + ret = -ENOMEM; sched->sched_rq = kmalloc_array(num_rqs, sizeof(*sched->sched_rq), GFP_KERNEL | __GFP_ZERO); - if (!sched->sched_rq) { - drm_err(sched, "%s: out of memory for sched_rq\n", __func__); - return -ENOMEM; - } + if (!sched->sched_rq) + goto Out_free; sched->num_rqs = num_rqs; - ret = -ENOMEM; - for (i = DRM_SCHED_PRIORITY_MIN; i < sched->num_rqs; i++) { + for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL); if (!sched->sched_rq[i]) goto Out_unroll; drm_sched_rq_init(sched, sched->sched_rq[i]); } - init_waitqueue_head(&sched->wake_up_worker); init_waitqueue_head(&sched->job_scheduled); INIT_LIST_HEAD(&sched->pending_list); spin_lock_init(&sched->job_list_lock); - atomic_set(&sched->hw_rq_count, 0); + atomic_set(&sched->credit_count, 0); INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); + INIT_WORK(&sched->work_run_job, drm_sched_run_job_work); + INIT_WORK(&sched->work_free_job, drm_sched_free_job_work); atomic_set(&sched->_score, 0); atomic64_set(&sched->job_id_count, 0); - - /* Each scheduler will run on a seperate kernel thread */ - sched->thread = kthread_run(drm_sched_main, sched, sched->name); - if (IS_ERR(sched->thread)) { - ret = PTR_ERR(sched->thread); - sched->thread = NULL; - DRM_DEV_ERROR(sched->dev, "Failed to create scheduler for %s.\n", name); - goto Out_unroll; - } + sched->pause_submit = false; sched->ready = true; return 0; Out_unroll: - for (--i ; i >= DRM_SCHED_PRIORITY_MIN; i--) + for (--i ; i >= DRM_SCHED_PRIORITY_KERNEL; i--) kfree(sched->sched_rq[i]); +Out_free: kfree(sched->sched_rq); sched->sched_rq = NULL; + if (sched->own_submit_wq) + destroy_workqueue(sched->submit_wq); drm_err(sched, "%s: Failed to setup GPU scheduler--out of memory\n", __func__); return ret; } @@ -1178,10 +1336,9 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched) struct drm_sched_entity *s_entity; int i; - if (sched->thread) - kthread_stop(sched->thread); + drm_sched_wqueue_stop(sched); - for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { + for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { struct drm_sched_rq *rq = sched->sched_rq[i]; spin_lock(&rq->lock); @@ -1202,6 +1359,8 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched) /* Confirm no work left behind accessing device structures */ cancel_delayed_work_sync(&sched->work_tdr); + if (sched->own_submit_wq) + destroy_workqueue(sched->submit_wq); sched->ready = false; kfree(sched->sched_rq); sched->sched_rq = NULL; @@ -1231,9 +1390,7 @@ void drm_sched_increase_karma(struct drm_sched_job *bad) if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { atomic_inc(&bad->karma); - for (i = DRM_SCHED_PRIORITY_MIN; - i < min_t(typeof(sched->num_rqs), sched->num_rqs, DRM_SCHED_PRIORITY_KERNEL); - i++) { + for (i = DRM_SCHED_PRIORITY_HIGH; i < sched->num_rqs; i++) { struct drm_sched_rq *rq = sched->sched_rq[i]; spin_lock(&rq->lock); @@ -1252,3 +1409,42 @@ void drm_sched_increase_karma(struct drm_sched_job *bad) } } EXPORT_SYMBOL(drm_sched_increase_karma); + +/** + * drm_sched_wqueue_ready - Is the scheduler ready for submission + * + * @sched: scheduler instance + * + * Returns true if submission is ready + */ +bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched) +{ + return sched->ready; +} +EXPORT_SYMBOL(drm_sched_wqueue_ready); + +/** + * drm_sched_wqueue_stop - stop scheduler submission + * + * @sched: scheduler instance + */ +void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched) +{ + WRITE_ONCE(sched->pause_submit, true); + cancel_work_sync(&sched->work_run_job); + cancel_work_sync(&sched->work_free_job); +} +EXPORT_SYMBOL(drm_sched_wqueue_stop); + +/** + * drm_sched_wqueue_start - start scheduler submission + * + * @sched: scheduler instance + */ +void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched) +{ + WRITE_ONCE(sched->pause_submit, false); + queue_work(sched->submit_wq, &sched->work_run_job); + queue_work(sched->submit_wq, &sched->work_free_job); +} +EXPORT_SYMBOL(drm_sched_wqueue_start); diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c index e0174f82e353..bef293922b98 100644 --- a/drivers/gpu/drm/solomon/ssd130x.c +++ b/drivers/gpu/drm/solomon/ssd130x.c @@ -808,7 +808,8 @@ static void ssd132x_clear_screen(struct ssd130x_device *ssd130x, u8 *data_array) static int ssd130x_fb_blit_rect(struct drm_framebuffer *fb, const struct iosys_map *vmap, struct drm_rect *rect, - u8 *buf, u8 *data_array) + u8 *buf, u8 *data_array, + struct drm_format_conv_state *fmtcnv_state) { struct ssd130x_device *ssd130x = drm_to_ssd130x(fb->dev); struct iosys_map dst; @@ -826,7 +827,7 @@ static int ssd130x_fb_blit_rect(struct drm_framebuffer *fb, return ret; iosys_map_set_vaddr(&dst, buf); - drm_fb_xrgb8888_to_mono(&dst, &dst_pitch, vmap, fb, rect); + drm_fb_xrgb8888_to_mono(&dst, &dst_pitch, vmap, fb, rect, fmtcnv_state); drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); @@ -838,7 +839,8 @@ static int ssd130x_fb_blit_rect(struct drm_framebuffer *fb, static int ssd132x_fb_blit_rect(struct drm_framebuffer *fb, const struct iosys_map *vmap, struct drm_rect *rect, u8 *buf, - u8 *data_array) + u8 *data_array, + struct drm_format_conv_state *fmtcnv_state) { struct ssd130x_device *ssd130x = drm_to_ssd130x(fb->dev); unsigned int dst_pitch = drm_rect_width(rect); @@ -855,7 +857,7 @@ static int ssd132x_fb_blit_rect(struct drm_framebuffer *fb, return ret; iosys_map_set_vaddr(&dst, buf); - drm_fb_xrgb8888_to_gray8(&dst, &dst_pitch, vmap, fb, rect); + drm_fb_xrgb8888_to_gray8(&dst, &dst_pitch, vmap, fb, rect, fmtcnv_state); drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); @@ -871,6 +873,7 @@ static int ssd130x_primary_plane_atomic_check(struct drm_plane *plane, struct ssd130x_device *ssd130x = drm_to_ssd130x(drm); struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state, plane); struct ssd130x_plane_state *ssd130x_state = to_ssd130x_plane_state(plane_state); + struct drm_shadow_plane_state *shadow_plane_state = &ssd130x_state->base; struct drm_crtc *crtc = plane_state->crtc; struct drm_crtc_state *crtc_state = NULL; const struct drm_format_info *fi; @@ -895,6 +898,16 @@ static int ssd130x_primary_plane_atomic_check(struct drm_plane *plane, pitch = drm_format_info_min_pitch(fi, 0, ssd130x->width); + if (plane_state->fb->format != fi) { + void *buf; + + /* format conversion necessary; reserve buffer */ + buf = drm_format_conv_state_reserve(&shadow_plane_state->fmtcnv_state, + pitch, GFP_KERNEL); + if (!buf) + return -ENOMEM; + } + ssd130x_state->buffer = kcalloc(pitch, ssd130x->height, GFP_KERNEL); if (!ssd130x_state->buffer) return -ENOMEM; @@ -909,6 +922,7 @@ static int ssd132x_primary_plane_atomic_check(struct drm_plane *plane, struct ssd130x_device *ssd130x = drm_to_ssd130x(drm); struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state, plane); struct ssd130x_plane_state *ssd130x_state = to_ssd130x_plane_state(plane_state); + struct drm_shadow_plane_state *shadow_plane_state = &ssd130x_state->base; struct drm_crtc *crtc = plane_state->crtc; struct drm_crtc_state *crtc_state = NULL; const struct drm_format_info *fi; @@ -933,6 +947,16 @@ static int ssd132x_primary_plane_atomic_check(struct drm_plane *plane, pitch = drm_format_info_min_pitch(fi, 0, ssd130x->width); + if (plane_state->fb->format != fi) { + void *buf; + + /* format conversion necessary; reserve buffer */ + buf = drm_format_conv_state_reserve(&shadow_plane_state->fmtcnv_state, + pitch, GFP_KERNEL); + if (!buf) + return -ENOMEM; + } + ssd130x_state->buffer = kcalloc(pitch, ssd130x->height, GFP_KERNEL); if (!ssd130x_state->buffer) return -ENOMEM; @@ -968,7 +992,8 @@ static void ssd130x_primary_plane_atomic_update(struct drm_plane *plane, ssd130x_fb_blit_rect(fb, &shadow_plane_state->data[0], &dst_clip, ssd130x_plane_state->buffer, - ssd130x_crtc_state->data_array); + ssd130x_crtc_state->data_array, + &shadow_plane_state->fmtcnv_state); } drm_dev_exit(idx); @@ -1002,7 +1027,8 @@ static void ssd132x_primary_plane_atomic_update(struct drm_plane *plane, ssd132x_fb_blit_rect(fb, &shadow_plane_state->data[0], &dst_clip, ssd130x_plane_state->buffer, - ssd130x_crtc_state->data_array); + ssd130x_crtc_state->data_array, + &shadow_plane_state->fmtcnv_state); } drm_dev_exit(idx); diff --git a/drivers/gpu/drm/solomon/ssd130x.h b/drivers/gpu/drm/solomon/ssd130x.h index acf7cedf0c1a..075c5c3ee75a 100644 --- a/drivers/gpu/drm/solomon/ssd130x.h +++ b/drivers/gpu/drm/solomon/ssd130x.h @@ -17,7 +17,6 @@ #include <drm/drm_crtc.h> #include <drm/drm_drv.h> #include <drm/drm_encoder.h> -#include <drm/drm_plane_helper.h> #include <linux/regmap.h> diff --git a/drivers/gpu/drm/sprd/sprd_dpu.c b/drivers/gpu/drm/sprd/sprd_dpu.c index 48183bbd0590..deb3bb96e2a8 100644 --- a/drivers/gpu/drm/sprd/sprd_dpu.c +++ b/drivers/gpu/drm/sprd/sprd_dpu.c @@ -859,16 +859,14 @@ static int sprd_dpu_probe(struct platform_device *pdev) return component_add(&pdev->dev, &dpu_component_ops); } -static int sprd_dpu_remove(struct platform_device *pdev) +static void sprd_dpu_remove(struct platform_device *pdev) { component_del(&pdev->dev, &dpu_component_ops); - - return 0; } struct platform_driver sprd_dpu_driver = { .probe = sprd_dpu_probe, - .remove = sprd_dpu_remove, + .remove_new = sprd_dpu_remove, .driver = { .name = "sprd-dpu-drv", .of_match_table = dpu_match_table, diff --git a/drivers/gpu/drm/sprd/sprd_drm.c b/drivers/gpu/drm/sprd/sprd_drm.c index 0aa39156f2fa..a74cd0caf645 100644 --- a/drivers/gpu/drm/sprd/sprd_drm.c +++ b/drivers/gpu/drm/sprd/sprd_drm.c @@ -138,10 +138,9 @@ static int sprd_drm_probe(struct platform_device *pdev) return drm_of_component_probe(&pdev->dev, component_compare_of, &drm_component_ops); } -static int sprd_drm_remove(struct platform_device *pdev) +static void sprd_drm_remove(struct platform_device *pdev) { component_master_del(&pdev->dev, &drm_component_ops); - return 0; } static void sprd_drm_shutdown(struct platform_device *pdev) @@ -164,7 +163,7 @@ MODULE_DEVICE_TABLE(of, drm_match_table); static struct platform_driver sprd_drm_driver = { .probe = sprd_drm_probe, - .remove = sprd_drm_remove, + .remove_new = sprd_drm_remove, .shutdown = sprd_drm_shutdown, .driver = { .name = "sprd-drm-drv", diff --git a/drivers/gpu/drm/sprd/sprd_dsi.c b/drivers/gpu/drm/sprd/sprd_dsi.c index d7b143a75601..0b69c140eab3 100644 --- a/drivers/gpu/drm/sprd/sprd_dsi.c +++ b/drivers/gpu/drm/sprd/sprd_dsi.c @@ -1051,18 +1051,16 @@ static int sprd_dsi_probe(struct platform_device *pdev) return mipi_dsi_host_register(&dsi->host); } -static int sprd_dsi_remove(struct platform_device *pdev) +static void sprd_dsi_remove(struct platform_device *pdev) { struct sprd_dsi *dsi = dev_get_drvdata(&pdev->dev); mipi_dsi_host_unregister(&dsi->host); - - return 0; } struct platform_driver sprd_dsi_driver = { .probe = sprd_dsi_probe, - .remove = sprd_dsi_remove, + .remove_new = sprd_dsi_remove, .driver = { .name = "sprd-dsi-drv", .of_match_table = dsi_match_table, diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c index 0ba3ca3ac509..a1fcee665023 100644 --- a/drivers/gpu/drm/tegra/hdmi.c +++ b/drivers/gpu/drm/tegra/hdmi.c @@ -24,6 +24,7 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_crtc.h> #include <drm/drm_debugfs.h> +#include <drm/drm_eld.h> #include <drm/drm_file.h> #include <drm/drm_fourcc.h> #include <drm/drm_probe_helper.h> diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index d5a3d3f4fece..83341576630d 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -20,6 +20,7 @@ #include <drm/display/drm_scdc_helper.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_debugfs.h> +#include <drm/drm_eld.h> #include <drm/drm_file.h> #include <drm/drm_panel.h> #include <drm/drm_simple_kms_helper.h> diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile index ba7baa622675..d6183b3d7688 100644 --- a/drivers/gpu/drm/tests/Makefile +++ b/drivers/gpu/drm/tests/Makefile @@ -9,15 +9,16 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \ drm_connector_test.o \ drm_damage_helper_test.o \ drm_dp_mst_helper_test.o \ + drm_exec_test.o \ drm_format_helper_test.o \ drm_format_test.o \ drm_framebuffer_test.o \ + drm_gem_shmem_test.o \ drm_managed_test.o \ drm_mm_test.o \ drm_modes_test.o \ drm_plane_helper_test.o \ drm_probe_helper_test.o \ - drm_rect_test.o \ - drm_exec_test.o + drm_rect_test.o CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN) diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c index 09ee6f6af896..ea2af6bd9abe 100644 --- a/drivers/gpu/drm/tests/drm_buddy_test.c +++ b/drivers/gpu/drm/tests/drm_buddy_test.c @@ -13,315 +13,11 @@ #include "../lib/drm_random.h" -#define TIMEOUT(name__) \ - unsigned long name__ = jiffies + MAX_SCHEDULE_TIMEOUT - -static unsigned int random_seed; - static inline u64 get_size(int order, u64 chunk_size) { return (1 << order) * chunk_size; } -__printf(2, 3) -static bool __timeout(unsigned long timeout, const char *fmt, ...) -{ - va_list va; - - if (!signal_pending(current)) { - cond_resched(); - if (time_before(jiffies, timeout)) - return false; - } - - if (fmt) { - va_start(va, fmt); - vprintk(fmt, va); - va_end(va); - } - - return true; -} - -static void __dump_block(struct kunit *test, struct drm_buddy *mm, - struct drm_buddy_block *block, bool buddy) -{ - kunit_err(test, "block info: header=%llx, state=%u, order=%d, offset=%llx size=%llx root=%d buddy=%d\n", - block->header, drm_buddy_block_state(block), - drm_buddy_block_order(block), drm_buddy_block_offset(block), - drm_buddy_block_size(mm, block), !block->parent, buddy); -} - -static void dump_block(struct kunit *test, struct drm_buddy *mm, - struct drm_buddy_block *block) -{ - struct drm_buddy_block *buddy; - - __dump_block(test, mm, block, false); - - buddy = drm_get_buddy(block); - if (buddy) - __dump_block(test, mm, buddy, true); -} - -static int check_block(struct kunit *test, struct drm_buddy *mm, - struct drm_buddy_block *block) -{ - struct drm_buddy_block *buddy; - unsigned int block_state; - u64 block_size; - u64 offset; - int err = 0; - - block_state = drm_buddy_block_state(block); - - if (block_state != DRM_BUDDY_ALLOCATED && - block_state != DRM_BUDDY_FREE && block_state != DRM_BUDDY_SPLIT) { - kunit_err(test, "block state mismatch\n"); - err = -EINVAL; - } - - block_size = drm_buddy_block_size(mm, block); - offset = drm_buddy_block_offset(block); - - if (block_size < mm->chunk_size) { - kunit_err(test, "block size smaller than min size\n"); - err = -EINVAL; - } - - /* We can't use is_power_of_2() for a u64 on 32-bit systems. */ - if (block_size & (block_size - 1)) { - kunit_err(test, "block size not power of two\n"); - err = -EINVAL; - } - - if (!IS_ALIGNED(block_size, mm->chunk_size)) { - kunit_err(test, "block size not aligned to min size\n"); - err = -EINVAL; - } - - if (!IS_ALIGNED(offset, mm->chunk_size)) { - kunit_err(test, "block offset not aligned to min size\n"); - err = -EINVAL; - } - - if (!IS_ALIGNED(offset, block_size)) { - kunit_err(test, "block offset not aligned to block size\n"); - err = -EINVAL; - } - - buddy = drm_get_buddy(block); - - if (!buddy && block->parent) { - kunit_err(test, "buddy has gone fishing\n"); - err = -EINVAL; - } - - if (buddy) { - if (drm_buddy_block_offset(buddy) != (offset ^ block_size)) { - kunit_err(test, "buddy has wrong offset\n"); - err = -EINVAL; - } - - if (drm_buddy_block_size(mm, buddy) != block_size) { - kunit_err(test, "buddy size mismatch\n"); - err = -EINVAL; - } - - if (drm_buddy_block_state(buddy) == block_state && - block_state == DRM_BUDDY_FREE) { - kunit_err(test, "block and its buddy are free\n"); - err = -EINVAL; - } - } - - return err; -} - -static int check_blocks(struct kunit *test, struct drm_buddy *mm, - struct list_head *blocks, u64 expected_size, bool is_contiguous) -{ - struct drm_buddy_block *block; - struct drm_buddy_block *prev; - u64 total; - int err = 0; - - block = NULL; - prev = NULL; - total = 0; - - list_for_each_entry(block, blocks, link) { - err = check_block(test, mm, block); - - if (!drm_buddy_block_is_allocated(block)) { - kunit_err(test, "block not allocated\n"); - err = -EINVAL; - } - - if (is_contiguous && prev) { - u64 prev_block_size; - u64 prev_offset; - u64 offset; - - prev_offset = drm_buddy_block_offset(prev); - prev_block_size = drm_buddy_block_size(mm, prev); - offset = drm_buddy_block_offset(block); - - if (offset != (prev_offset + prev_block_size)) { - kunit_err(test, "block offset mismatch\n"); - err = -EINVAL; - } - } - - if (err) - break; - - total += drm_buddy_block_size(mm, block); - prev = block; - } - - if (!err) { - if (total != expected_size) { - kunit_err(test, "size mismatch, expected=%llx, found=%llx\n", - expected_size, total); - err = -EINVAL; - } - return err; - } - - if (prev) { - kunit_err(test, "prev block, dump:\n"); - dump_block(test, mm, prev); - } - - kunit_err(test, "bad block, dump:\n"); - dump_block(test, mm, block); - - return err; -} - -static int check_mm(struct kunit *test, struct drm_buddy *mm) -{ - struct drm_buddy_block *root; - struct drm_buddy_block *prev; - unsigned int i; - u64 total; - int err = 0; - - if (!mm->n_roots) { - kunit_err(test, "n_roots is zero\n"); - return -EINVAL; - } - - if (mm->n_roots != hweight64(mm->size)) { - kunit_err(test, "n_roots mismatch, n_roots=%u, expected=%lu\n", - mm->n_roots, hweight64(mm->size)); - return -EINVAL; - } - - root = NULL; - prev = NULL; - total = 0; - - for (i = 0; i < mm->n_roots; ++i) { - struct drm_buddy_block *block; - unsigned int order; - - root = mm->roots[i]; - if (!root) { - kunit_err(test, "root(%u) is NULL\n", i); - err = -EINVAL; - break; - } - - err = check_block(test, mm, root); - - if (!drm_buddy_block_is_free(root)) { - kunit_err(test, "root not free\n"); - err = -EINVAL; - } - - order = drm_buddy_block_order(root); - - if (!i) { - if (order != mm->max_order) { - kunit_err(test, "max order root missing\n"); - err = -EINVAL; - } - } - - if (prev) { - u64 prev_block_size; - u64 prev_offset; - u64 offset; - - prev_offset = drm_buddy_block_offset(prev); - prev_block_size = drm_buddy_block_size(mm, prev); - offset = drm_buddy_block_offset(root); - - if (offset != (prev_offset + prev_block_size)) { - kunit_err(test, "root offset mismatch\n"); - err = -EINVAL; - } - } - - block = list_first_entry_or_null(&mm->free_list[order], - struct drm_buddy_block, link); - if (block != root) { - kunit_err(test, "root mismatch at order=%u\n", order); - err = -EINVAL; - } - - if (err) - break; - - prev = root; - total += drm_buddy_block_size(mm, root); - } - - if (!err) { - if (total != mm->size) { - kunit_err(test, "expected mm size=%llx, found=%llx\n", - mm->size, total); - err = -EINVAL; - } - return err; - } - - if (prev) { - kunit_err(test, "prev root(%u), dump:\n", i - 1); - dump_block(test, mm, prev); - } - - if (root) { - kunit_err(test, "bad root(%u), dump:\n", i); - dump_block(test, mm, root); - } - - return err; -} - -static void mm_config(u64 *size, u64 *chunk_size) -{ - DRM_RND_STATE(prng, random_seed); - u32 s, ms; - - /* Nothing fancy, just try to get an interesting bit pattern */ - - prandom_seed_state(&prng, random_seed); - - /* Let size be a random number of pages up to 8 GB (2M pages) */ - s = 1 + drm_prandom_u32_max_state((BIT(33 - 12)) - 1, &prng); - /* Let the chunk size be a random power of 2 less than size */ - ms = BIT(drm_prandom_u32_max_state(ilog2(s), &prng)); - /* Round size down to the chunk size */ - s &= -ms; - - /* Convert from pages to bytes */ - *chunk_size = (u64)ms << 12; - *size = (u64)s << 12; -} - static void drm_test_buddy_alloc_pathological(struct kunit *test) { u64 mm_size, size, start = 0; @@ -403,96 +99,6 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test) drm_buddy_fini(&mm); } -static void drm_test_buddy_alloc_smoke(struct kunit *test) -{ - u64 mm_size, chunk_size, start = 0; - unsigned long flags = 0; - struct drm_buddy mm; - int *order; - int i; - - DRM_RND_STATE(prng, random_seed); - TIMEOUT(end_time); - - mm_config(&mm_size, &chunk_size); - - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, chunk_size), - "buddy_init failed\n"); - - order = drm_random_order(mm.max_order + 1, &prng); - KUNIT_ASSERT_TRUE(test, order); - - for (i = 0; i <= mm.max_order; ++i) { - struct drm_buddy_block *block; - int max_order = order[i]; - bool timeout = false; - LIST_HEAD(blocks); - u64 total, size; - LIST_HEAD(tmp); - int order, err; - - KUNIT_ASSERT_FALSE_MSG(test, check_mm(test, &mm), - "pre-mm check failed, abort\n"); - - order = max_order; - total = 0; - - do { -retry: - size = get_size(order, chunk_size); - err = drm_buddy_alloc_blocks(&mm, start, mm_size, size, size, &tmp, flags); - if (err) { - if (err == -ENOMEM) { - KUNIT_FAIL(test, "buddy_alloc hit -ENOMEM with order=%d\n", - order); - } else { - if (order--) { - err = 0; - goto retry; - } - - KUNIT_FAIL(test, "buddy_alloc with order=%d failed\n", - order); - } - - break; - } - - block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); - KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); - - list_move_tail(&block->link, &blocks); - KUNIT_EXPECT_EQ_MSG(test, drm_buddy_block_order(block), order, - "buddy_alloc order mismatch\n"); - - total += drm_buddy_block_size(&mm, block); - - if (__timeout(end_time, NULL)) { - timeout = true; - break; - } - } while (total < mm.size); - - if (!err) - err = check_blocks(test, &mm, &blocks, total, false); - - drm_buddy_free_list(&mm, &blocks); - - if (!err) { - KUNIT_EXPECT_FALSE_MSG(test, check_mm(test, &mm), - "post-mm check failed\n"); - } - - if (err || timeout) - break; - - cond_resched(); - } - - kfree(order); - drm_buddy_fini(&mm); -} - static void drm_test_buddy_alloc_pessimistic(struct kunit *test) { u64 mm_size, size, start = 0; @@ -634,64 +240,6 @@ static void drm_test_buddy_alloc_optimistic(struct kunit *test) drm_buddy_fini(&mm); } -static void drm_test_buddy_alloc_range(struct kunit *test) -{ - unsigned long flags = DRM_BUDDY_RANGE_ALLOCATION; - u64 offset, size, rem, chunk_size, end; - unsigned long page_num; - struct drm_buddy mm; - LIST_HEAD(blocks); - - mm_config(&size, &chunk_size); - - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, size, chunk_size), - "buddy_init failed"); - - KUNIT_ASSERT_FALSE_MSG(test, check_mm(test, &mm), - "pre-mm check failed, abort!"); - - rem = mm.size; - offset = 0; - - for_each_prime_number_from(page_num, 1, ULONG_MAX - 1) { - struct drm_buddy_block *block; - LIST_HEAD(tmp); - - size = min(page_num * mm.chunk_size, rem); - end = offset + size; - - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, offset, end, - size, mm.chunk_size, - &tmp, flags), - "alloc_range with offset=%llx, size=%llx failed\n", offset, size); - - block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); - KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_range has no blocks\n"); - - KUNIT_ASSERT_EQ_MSG(test, drm_buddy_block_offset(block), offset, - "alloc_range start offset mismatch, found=%llx, expected=%llx\n", - drm_buddy_block_offset(block), offset); - - KUNIT_ASSERT_FALSE(test, check_blocks(test, &mm, &tmp, size, true)); - - list_splice_tail(&tmp, &blocks); - - offset += size; - - rem -= size; - if (!rem) - break; - - cond_resched(); - } - - drm_buddy_free_list(&mm, &blocks); - - KUNIT_EXPECT_FALSE_MSG(test, check_mm(test, &mm), "post-mm check failed\n"); - - drm_buddy_fini(&mm); -} - static void drm_test_buddy_alloc_limit(struct kunit *test) { u64 size = U64_MAX, start = 0; @@ -727,29 +275,16 @@ static void drm_test_buddy_alloc_limit(struct kunit *test) drm_buddy_fini(&mm); } -static int drm_buddy_suite_init(struct kunit_suite *suite) -{ - while (!random_seed) - random_seed = get_random_u32(); - - kunit_info(suite, "Testing DRM buddy manager, with random_seed=0x%x\n", random_seed); - - return 0; -} - static struct kunit_case drm_buddy_tests[] = { KUNIT_CASE(drm_test_buddy_alloc_limit), - KUNIT_CASE(drm_test_buddy_alloc_range), KUNIT_CASE(drm_test_buddy_alloc_optimistic), KUNIT_CASE(drm_test_buddy_alloc_pessimistic), - KUNIT_CASE(drm_test_buddy_alloc_smoke), KUNIT_CASE(drm_test_buddy_alloc_pathological), {} }; static struct kunit_suite drm_buddy_test_suite = { .name = "drm_buddy", - .suite_init = drm_buddy_suite_init, .test_cases = drm_buddy_tests, }; diff --git a/drivers/gpu/drm/tests/drm_dp_mst_helper_test.c b/drivers/gpu/drm/tests/drm_dp_mst_helper_test.c index 545beea33e8c..d916e548fcb1 100644 --- a/drivers/gpu/drm/tests/drm_dp_mst_helper_test.c +++ b/drivers/gpu/drm/tests/drm_dp_mst_helper_test.c @@ -42,13 +42,13 @@ static const struct drm_dp_mst_calc_pbn_mode_test drm_dp_mst_calc_pbn_mode_cases .clock = 332880, .bpp = 24, .dsc = true, - .expected = 50 + .expected = 1191 }, { .clock = 324540, .bpp = 24, .dsc = true, - .expected = 49 + .expected = 1161 }, }; @@ -56,7 +56,7 @@ static void drm_test_dp_mst_calc_pbn_mode(struct kunit *test) { const struct drm_dp_mst_calc_pbn_mode_test *params = test->param_value; - KUNIT_EXPECT_EQ(test, drm_dp_calc_pbn_mode(params->clock, params->bpp, params->dsc), + KUNIT_EXPECT_EQ(test, drm_dp_calc_pbn_mode(params->clock, params->bpp << 4), params->expected); } @@ -68,6 +68,152 @@ static void dp_mst_calc_pbn_mode_desc(const struct drm_dp_mst_calc_pbn_mode_test KUNIT_ARRAY_PARAM(drm_dp_mst_calc_pbn_mode, drm_dp_mst_calc_pbn_mode_cases, dp_mst_calc_pbn_mode_desc); +struct drm_dp_mst_calc_pbn_div_test { + int link_rate; + int lane_count; + fixed20_12 expected; +}; + +#define fp_init(__int, __frac) { \ + .full = (__int) * (1 << 12) + \ + (__frac) * (1 << 12) / 100000 \ +} + +static const struct drm_dp_mst_calc_pbn_div_test drm_dp_mst_calc_pbn_div_dp1_4_cases[] = { + /* + * UHBR rates (DP Standard v2.1 2.7.6.3, specifying the rounded to + * closest value to 2 decimal places): + * .expected = .link_rate * .lane_count * 0.9671 / 8 / 54 / 100 + * DP1.4 rates (DP Standard v2.1 2.6.4.2): + * .expected = .link_rate * .lane_count * 0.8000 / 8 / 54 / 100 + * + * truncated to 5 decimal places. + */ + { + .link_rate = 2000000, + .lane_count = 4, + .expected = fp_init(179, 9259), /* 179.09259 */ + }, + { + .link_rate = 2000000, + .lane_count = 2, + .expected = fp_init(89, 54629), + }, + { + .link_rate = 2000000, + .lane_count = 1, + .expected = fp_init(44, 77314), + }, + { + .link_rate = 1350000, + .lane_count = 4, + .expected = fp_init(120, 88750), + }, + { + .link_rate = 1350000, + .lane_count = 2, + .expected = fp_init(60, 44375), + }, + { + .link_rate = 1350000, + .lane_count = 1, + .expected = fp_init(30, 22187), + }, + { + .link_rate = 1000000, + .lane_count = 4, + .expected = fp_init(89, 54629), + }, + { + .link_rate = 1000000, + .lane_count = 2, + .expected = fp_init(44, 77314), + }, + { + .link_rate = 1000000, + .lane_count = 1, + .expected = fp_init(22, 38657), + }, + { + .link_rate = 810000, + .lane_count = 4, + .expected = fp_init(60, 0), + }, + { + .link_rate = 810000, + .lane_count = 2, + .expected = fp_init(30, 0), + }, + { + .link_rate = 810000, + .lane_count = 1, + .expected = fp_init(15, 0), + }, + { + .link_rate = 540000, + .lane_count = 4, + .expected = fp_init(40, 0), + }, + { + .link_rate = 540000, + .lane_count = 2, + .expected = fp_init(20, 0), + }, + { + .link_rate = 540000, + .lane_count = 1, + .expected = fp_init(10, 0), + }, + { + .link_rate = 270000, + .lane_count = 4, + .expected = fp_init(20, 0), + }, + { + .link_rate = 270000, + .lane_count = 2, + .expected = fp_init(10, 0), + }, + { + .link_rate = 270000, + .lane_count = 1, + .expected = fp_init(5, 0), + }, + { + .link_rate = 162000, + .lane_count = 4, + .expected = fp_init(12, 0), + }, + { + .link_rate = 162000, + .lane_count = 2, + .expected = fp_init(6, 0), + }, + { + .link_rate = 162000, + .lane_count = 1, + .expected = fp_init(3, 0), + }, +}; + +static void drm_test_dp_mst_calc_pbn_div(struct kunit *test) +{ + const struct drm_dp_mst_calc_pbn_div_test *params = test->param_value; + /* mgr->dev is only needed by drm_dbg_kms(), but it's not called for the test cases. */ + struct drm_dp_mst_topology_mgr *mgr = test->priv; + + KUNIT_EXPECT_EQ(test, drm_dp_get_vc_payload_bw(mgr, params->link_rate, params->lane_count).full, + params->expected.full); +} + +static void dp_mst_calc_pbn_div_desc(const struct drm_dp_mst_calc_pbn_div_test *t, char *desc) +{ + sprintf(desc, "Link rate %d lane count %d", t->link_rate, t->lane_count); +} + +KUNIT_ARRAY_PARAM(drm_dp_mst_calc_pbn_div, drm_dp_mst_calc_pbn_div_dp1_4_cases, + dp_mst_calc_pbn_div_desc); + static u8 data[] = { 0xff, 0x00, 0xdd }; struct drm_dp_mst_sideband_msg_req_test { @@ -416,13 +562,27 @@ KUNIT_ARRAY_PARAM(drm_dp_mst_sideband_msg_req, drm_dp_mst_sideband_msg_req_cases static struct kunit_case drm_dp_mst_helper_tests[] = { KUNIT_CASE_PARAM(drm_test_dp_mst_calc_pbn_mode, drm_dp_mst_calc_pbn_mode_gen_params), + KUNIT_CASE_PARAM(drm_test_dp_mst_calc_pbn_div, drm_dp_mst_calc_pbn_div_gen_params), KUNIT_CASE_PARAM(drm_test_dp_mst_sideband_msg_req_decode, drm_dp_mst_sideband_msg_req_gen_params), { } }; +static int drm_dp_mst_helper_tests_init(struct kunit *test) +{ + struct drm_dp_mst_topology_mgr *mgr; + + mgr = kunit_kzalloc(test, sizeof(*mgr), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, mgr); + + test->priv = mgr; + + return 0; +} + static struct kunit_suite drm_dp_mst_helper_test_suite = { .name = "drm_dp_mst_helper", + .init = drm_dp_mst_helper_tests_init, .test_cases = drm_dp_mst_helper_tests, }; diff --git a/drivers/gpu/drm/tests/drm_exec_test.c b/drivers/gpu/drm/tests/drm_exec_test.c index 563949d777dd..81f928a429ba 100644 --- a/drivers/gpu/drm/tests/drm_exec_test.c +++ b/drivers/gpu/drm/tests/drm_exec_test.c @@ -46,7 +46,7 @@ static void sanitycheck(struct kunit *test) { struct drm_exec exec; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_fini(&exec); KUNIT_SUCCEED(test); } @@ -60,7 +60,7 @@ static void test_lock(struct kunit *test) drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE); - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { ret = drm_exec_lock_obj(&exec, &gobj); drm_exec_retry_on_contention(&exec); @@ -80,7 +80,7 @@ static void test_lock_unlock(struct kunit *test) drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE); - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { ret = drm_exec_lock_obj(&exec, &gobj); drm_exec_retry_on_contention(&exec); @@ -107,7 +107,7 @@ static void test_duplicates(struct kunit *test) drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE); - drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES); + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { ret = drm_exec_lock_obj(&exec, &gobj); drm_exec_retry_on_contention(&exec); @@ -134,7 +134,7 @@ static void test_prepare(struct kunit *test) drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE); - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { ret = drm_exec_prepare_obj(&exec, &gobj, 1); drm_exec_retry_on_contention(&exec); @@ -159,7 +159,7 @@ static void test_prepare_array(struct kunit *test) drm_gem_private_object_init(priv->drm, &gobj1, PAGE_SIZE); drm_gem_private_object_init(priv->drm, &gobj2, PAGE_SIZE); - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) ret = drm_exec_prepare_array(&exec, array, ARRAY_SIZE(array), 1); @@ -174,14 +174,14 @@ static void test_multiple_loops(struct kunit *test) { struct drm_exec exec; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { break; } drm_exec_fini(&exec); - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { break; diff --git a/drivers/gpu/drm/tests/drm_format_helper_test.c b/drivers/gpu/drm/tests/drm_format_helper_test.c index f6408e56f786..08992636ec05 100644 --- a/drivers/gpu/drm/tests/drm_format_helper_test.c +++ b/drivers/gpu/drm/tests/drm_format_helper_test.c @@ -20,6 +20,10 @@ #define TEST_USE_DEFAULT_PITCH 0 +static unsigned char fmtcnv_state_mem[PAGE_SIZE]; +static struct drm_format_conv_state fmtcnv_state = + DRM_FORMAT_CONV_STATE_INIT_PREALLOCATED(fmtcnv_state_mem, sizeof(fmtcnv_state_mem)); + struct convert_to_gray8_result { unsigned int dst_pitch; const u8 expected[TEST_BUF_SIZE]; @@ -630,8 +634,7 @@ static void drm_test_fb_xrgb8888_to_gray8(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_gray8(&dst, dst_pitch, &src, &fb, ¶ms->clip); - + drm_fb_xrgb8888_to_gray8(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); } @@ -664,7 +667,7 @@ static void drm_test_fb_xrgb8888_to_rgb332(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_rgb332(&dst, dst_pitch, &src, &fb, ¶ms->clip); + drm_fb_xrgb8888_to_rgb332(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); } @@ -697,12 +700,14 @@ static void drm_test_fb_xrgb8888_to_rgb565(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_rgb565(&dst, dst_pitch, &src, &fb, ¶ms->clip, false); + drm_fb_xrgb8888_to_rgb565(&dst, dst_pitch, &src, &fb, ¶ms->clip, + &fmtcnv_state, false); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); buf = dst.vaddr; /* restore original value of buf */ - drm_fb_xrgb8888_to_rgb565(&dst, &result->dst_pitch, &src, &fb, ¶ms->clip, true); + drm_fb_xrgb8888_to_rgb565(&dst, &result->dst_pitch, &src, &fb, ¶ms->clip, + &fmtcnv_state, true); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected_swab, dst_size); @@ -711,7 +716,8 @@ static void drm_test_fb_xrgb8888_to_rgb565(struct kunit *test) int blit_result = 0; - blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_RGB565, &src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_RGB565, &src, &fb, ¶ms->clip, + &fmtcnv_state); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); @@ -748,7 +754,7 @@ static void drm_test_fb_xrgb8888_to_xrgb1555(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_xrgb1555(&dst, dst_pitch, &src, &fb, ¶ms->clip); + drm_fb_xrgb8888_to_xrgb1555(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); @@ -757,7 +763,8 @@ static void drm_test_fb_xrgb8888_to_xrgb1555(struct kunit *test) int blit_result = 0; - blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XRGB1555, &src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XRGB1555, &src, &fb, ¶ms->clip, + &fmtcnv_state); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); @@ -794,7 +801,7 @@ static void drm_test_fb_xrgb8888_to_argb1555(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_argb1555(&dst, dst_pitch, &src, &fb, ¶ms->clip); + drm_fb_xrgb8888_to_argb1555(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); @@ -803,7 +810,8 @@ static void drm_test_fb_xrgb8888_to_argb1555(struct kunit *test) int blit_result = 0; - blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ARGB1555, &src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ARGB1555, &src, &fb, ¶ms->clip, + &fmtcnv_state); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); @@ -840,7 +848,7 @@ static void drm_test_fb_xrgb8888_to_rgba5551(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_rgba5551(&dst, dst_pitch, &src, &fb, ¶ms->clip); + drm_fb_xrgb8888_to_rgba5551(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); @@ -849,7 +857,8 @@ static void drm_test_fb_xrgb8888_to_rgba5551(struct kunit *test) int blit_result = 0; - blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_RGBA5551, &src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_RGBA5551, &src, &fb, ¶ms->clip, + &fmtcnv_state); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); @@ -890,7 +899,7 @@ static void drm_test_fb_xrgb8888_to_rgb888(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_rgb888(&dst, dst_pitch, &src, &fb, ¶ms->clip); + drm_fb_xrgb8888_to_rgb888(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); buf = dst.vaddr; /* restore original value of buf */ @@ -898,7 +907,8 @@ static void drm_test_fb_xrgb8888_to_rgb888(struct kunit *test) int blit_result = 0; - blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_RGB888, &src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_RGB888, &src, &fb, ¶ms->clip, + &fmtcnv_state); KUNIT_EXPECT_FALSE(test, blit_result); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); @@ -933,7 +943,7 @@ static void drm_test_fb_xrgb8888_to_argb8888(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_argb8888(&dst, dst_pitch, &src, &fb, ¶ms->clip); + drm_fb_xrgb8888_to_argb8888(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); @@ -942,7 +952,8 @@ static void drm_test_fb_xrgb8888_to_argb8888(struct kunit *test) int blit_result = 0; - blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ARGB8888, &src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ARGB8888, &src, &fb, ¶ms->clip, + &fmtcnv_state); buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); @@ -979,7 +990,7 @@ static void drm_test_fb_xrgb8888_to_xrgb2101010(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_xrgb2101010(&dst, dst_pitch, &src, &fb, ¶ms->clip); + drm_fb_xrgb8888_to_xrgb2101010(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); buf = le32buf_to_cpu(test, buf, dst_size / sizeof(u32)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); @@ -989,7 +1000,7 @@ static void drm_test_fb_xrgb8888_to_xrgb2101010(struct kunit *test) int blit_result = 0; blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XRGB2101010, &src, &fb, - ¶ms->clip); + ¶ms->clip, &fmtcnv_state); KUNIT_EXPECT_FALSE(test, blit_result); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); @@ -1024,7 +1035,7 @@ static void drm_test_fb_xrgb8888_to_argb2101010(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_argb2101010(&dst, dst_pitch, &src, &fb, ¶ms->clip); + drm_fb_xrgb8888_to_argb2101010(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); @@ -1034,7 +1045,7 @@ static void drm_test_fb_xrgb8888_to_argb2101010(struct kunit *test) int blit_result = 0; blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ARGB2101010, &src, &fb, - ¶ms->clip); + ¶ms->clip, &fmtcnv_state); buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); @@ -1071,7 +1082,7 @@ static void drm_test_fb_xrgb8888_to_mono(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_mono(&dst, dst_pitch, &src, &fb, ¶ms->clip); + drm_fb_xrgb8888_to_mono(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); } @@ -1104,7 +1115,7 @@ static void drm_test_fb_swab(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_swab(&dst, dst_pitch, &src, &fb, ¶ms->clip, false); + drm_fb_swab(&dst, dst_pitch, &src, &fb, ¶ms->clip, false, &fmtcnv_state); buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); @@ -1114,7 +1125,7 @@ static void drm_test_fb_swab(struct kunit *test) int blit_result; blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XRGB8888 | DRM_FORMAT_BIG_ENDIAN, - &src, &fb, ¶ms->clip); + &src, &fb, ¶ms->clip, &fmtcnv_state); buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); KUNIT_EXPECT_FALSE(test, blit_result); @@ -1123,7 +1134,8 @@ static void drm_test_fb_swab(struct kunit *test) buf = dst.vaddr; memset(buf, 0, dst_size); - blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_BGRX8888, &src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_BGRX8888, &src, &fb, ¶ms->clip, + &fmtcnv_state); buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); KUNIT_EXPECT_FALSE(test, blit_result); @@ -1137,7 +1149,8 @@ static void drm_test_fb_swab(struct kunit *test) mock_format.format |= DRM_FORMAT_BIG_ENDIAN; fb.format = &mock_format; - blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XRGB8888, &src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XRGB8888, &src, &fb, ¶ms->clip, + &fmtcnv_state); buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); KUNIT_EXPECT_FALSE(test, blit_result); @@ -1175,7 +1188,8 @@ static void drm_test_fb_xrgb8888_to_abgr8888(struct kunit *test) int blit_result = 0; - blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ABGR8888, &src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ABGR8888, &src, &fb, ¶ms->clip, + &fmtcnv_state); buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); @@ -1214,7 +1228,8 @@ static void drm_test_fb_xrgb8888_to_xbgr8888(struct kunit *test) int blit_result = 0; - blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XBGR8888, &src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XBGR8888, &src, &fb, ¶ms->clip, + &fmtcnv_state); buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); @@ -1817,7 +1832,8 @@ static void drm_test_fb_memcpy(struct kunit *test) int blit_result; - blit_result = drm_fb_blit(dst, dst_pitches, params->format, src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(dst, dst_pitches, params->format, src, &fb, ¶ms->clip, + &fmtcnv_state); KUNIT_EXPECT_FALSE(test, blit_result); for (size_t i = 0; i < fb.format->num_planes; i++) { diff --git a/drivers/gpu/drm/tests/drm_gem_shmem_test.c b/drivers/gpu/drm/tests/drm_gem_shmem_test.c new file mode 100644 index 000000000000..91202e40cde9 --- /dev/null +++ b/drivers/gpu/drm/tests/drm_gem_shmem_test.c @@ -0,0 +1,383 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KUnit test suite for GEM objects backed by shmem buffers + * + * Copyright (C) 2023 Red Hat, Inc. + * + * Author: Marco Pagani <marpagan@redhat.com> + */ + +#include <linux/dma-buf.h> +#include <linux/iosys-map.h> +#include <linux/sizes.h> + +#include <kunit/test.h> + +#include <drm/drm_device.h> +#include <drm/drm_drv.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_kunit_helpers.h> + +#define TEST_SIZE SZ_1M +#define TEST_BYTE 0xae + +/* + * Wrappers to avoid an explicit type casting when passing action + * functions to kunit_add_action(). + */ +static void kfree_wrapper(void *ptr) +{ + const void *obj = ptr; + + kfree(obj); +} + +static void sg_free_table_wrapper(void *ptr) +{ + struct sg_table *sgt = ptr; + + sg_free_table(sgt); +} + +static void drm_gem_shmem_free_wrapper(void *ptr) +{ + struct drm_gem_shmem_object *shmem = ptr; + + drm_gem_shmem_free(shmem); +} + +/* + * Test creating a shmem GEM object backed by shmem buffer. The test + * case succeeds if the GEM object is successfully allocated with the + * shmem file node and object functions attributes set, and the size + * attribute is equal to the correct size. + */ +static void drm_gem_shmem_test_obj_create(struct kunit *test) +{ + struct drm_device *drm_dev = test->priv; + struct drm_gem_shmem_object *shmem; + + shmem = drm_gem_shmem_create(drm_dev, TEST_SIZE); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, shmem); + KUNIT_EXPECT_EQ(test, shmem->base.size, TEST_SIZE); + KUNIT_EXPECT_NOT_NULL(test, shmem->base.filp); + KUNIT_EXPECT_NOT_NULL(test, shmem->base.funcs); + + drm_gem_shmem_free(shmem); +} + +/* + * Test creating a shmem GEM object from a scatter/gather table exported + * via a DMA-BUF. The test case succeed if the GEM object is successfully + * created with the shmem file node attribute equal to NULL and the sgt + * attribute pointing to the scatter/gather table that has been imported. + */ +static void drm_gem_shmem_test_obj_create_private(struct kunit *test) +{ + struct drm_device *drm_dev = test->priv; + struct drm_gem_shmem_object *shmem; + struct drm_gem_object *gem_obj; + struct dma_buf buf_mock; + struct dma_buf_attachment attach_mock; + struct sg_table *sgt; + char *buf; + int ret; + + /* Create a mock scatter/gather table */ + buf = kunit_kzalloc(test, TEST_SIZE, GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, buf); + + sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, sgt); + + ret = kunit_add_action_or_reset(test, kfree_wrapper, sgt); + KUNIT_ASSERT_EQ(test, ret, 0); + + ret = sg_alloc_table(sgt, 1, GFP_KERNEL); + KUNIT_ASSERT_EQ(test, ret, 0); + + ret = kunit_add_action_or_reset(test, sg_free_table_wrapper, sgt); + KUNIT_ASSERT_EQ(test, ret, 0); + + sg_init_one(sgt->sgl, buf, TEST_SIZE); + + /* Init a mock DMA-BUF */ + buf_mock.size = TEST_SIZE; + attach_mock.dmabuf = &buf_mock; + + gem_obj = drm_gem_shmem_prime_import_sg_table(drm_dev, &attach_mock, sgt); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gem_obj); + KUNIT_EXPECT_EQ(test, gem_obj->size, TEST_SIZE); + KUNIT_EXPECT_NULL(test, gem_obj->filp); + KUNIT_EXPECT_NOT_NULL(test, gem_obj->funcs); + + /* The scatter/gather table will be freed by drm_gem_shmem_free */ + kunit_remove_action(test, sg_free_table_wrapper, sgt); + kunit_remove_action(test, kfree_wrapper, sgt); + + shmem = to_drm_gem_shmem_obj(gem_obj); + KUNIT_EXPECT_PTR_EQ(test, shmem->sgt, sgt); + + drm_gem_shmem_free(shmem); +} + +/* + * Test pinning backing pages for a shmem GEM object. The test case + * succeeds if a suitable number of backing pages are allocated, and + * the pages table counter attribute is increased by one. + */ +static void drm_gem_shmem_test_pin_pages(struct kunit *test) +{ + struct drm_device *drm_dev = test->priv; + struct drm_gem_shmem_object *shmem; + int i, ret; + + shmem = drm_gem_shmem_create(drm_dev, TEST_SIZE); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, shmem); + KUNIT_EXPECT_NULL(test, shmem->pages); + KUNIT_EXPECT_EQ(test, shmem->pages_use_count, 0); + + ret = kunit_add_action_or_reset(test, drm_gem_shmem_free_wrapper, shmem); + KUNIT_ASSERT_EQ(test, ret, 0); + + ret = drm_gem_shmem_pin(shmem); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_NOT_NULL(test, shmem->pages); + KUNIT_EXPECT_EQ(test, shmem->pages_use_count, 1); + + for (i = 0; i < (shmem->base.size >> PAGE_SHIFT); i++) + KUNIT_ASSERT_NOT_NULL(test, shmem->pages[i]); + + drm_gem_shmem_unpin(shmem); + KUNIT_EXPECT_NULL(test, shmem->pages); + KUNIT_EXPECT_EQ(test, shmem->pages_use_count, 0); +} + +/* + * Test creating a virtual mapping for a shmem GEM object. The test + * case succeeds if the backing memory is mapped and the reference + * counter for virtual mapping is increased by one. Moreover, the test + * case writes and then reads a test pattern over the mapped memory. + */ +static void drm_gem_shmem_test_vmap(struct kunit *test) +{ + struct drm_device *drm_dev = test->priv; + struct drm_gem_shmem_object *shmem; + struct iosys_map map; + int ret, i; + + shmem = drm_gem_shmem_create(drm_dev, TEST_SIZE); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, shmem); + KUNIT_EXPECT_NULL(test, shmem->vaddr); + KUNIT_EXPECT_EQ(test, shmem->vmap_use_count, 0); + + ret = kunit_add_action_or_reset(test, drm_gem_shmem_free_wrapper, shmem); + KUNIT_ASSERT_EQ(test, ret, 0); + + ret = drm_gem_shmem_vmap(shmem, &map); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_NOT_NULL(test, shmem->vaddr); + KUNIT_ASSERT_FALSE(test, iosys_map_is_null(&map)); + KUNIT_EXPECT_EQ(test, shmem->vmap_use_count, 1); + + iosys_map_memset(&map, 0, TEST_BYTE, TEST_SIZE); + for (i = 0; i < TEST_SIZE; i++) + KUNIT_EXPECT_EQ(test, iosys_map_rd(&map, i, u8), TEST_BYTE); + + drm_gem_shmem_vunmap(shmem, &map); + KUNIT_EXPECT_NULL(test, shmem->vaddr); + KUNIT_EXPECT_EQ(test, shmem->vmap_use_count, 0); +} + +/* + * Test exporting a scatter/gather table of pinned pages suitable for + * PRIME usage from a shmem GEM object. The test case succeeds if a + * scatter/gather table large enough to accommodate the backing memory + * is successfully exported. + */ +static void drm_gem_shmem_test_get_pages_sgt(struct kunit *test) +{ + struct drm_device *drm_dev = test->priv; + struct drm_gem_shmem_object *shmem; + struct sg_table *sgt; + struct scatterlist *sg; + unsigned int si, len = 0; + int ret; + + shmem = drm_gem_shmem_create(drm_dev, TEST_SIZE); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, shmem); + + ret = kunit_add_action_or_reset(test, drm_gem_shmem_free_wrapper, shmem); + KUNIT_ASSERT_EQ(test, ret, 0); + + ret = drm_gem_shmem_pin(shmem); + KUNIT_ASSERT_EQ(test, ret, 0); + + sgt = drm_gem_shmem_get_sg_table(shmem); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sgt); + KUNIT_EXPECT_NULL(test, shmem->sgt); + + ret = kunit_add_action_or_reset(test, sg_free_table_wrapper, sgt); + KUNIT_ASSERT_EQ(test, ret, 0); + + for_each_sgtable_sg(sgt, sg, si) { + KUNIT_EXPECT_NOT_NULL(test, sg); + len += sg->length; + } + + KUNIT_EXPECT_GE(test, len, TEST_SIZE); +} + +/* + * Test pinning pages and exporting a scatter/gather table suitable for + * driver usage from a shmem GEM object. The test case succeeds if the + * backing pages are pinned and a scatter/gather table large enough to + * accommodate the backing memory is successfully exported. + */ +static void drm_gem_shmem_test_get_sg_table(struct kunit *test) +{ + struct drm_device *drm_dev = test->priv; + struct drm_gem_shmem_object *shmem; + struct sg_table *sgt; + struct scatterlist *sg; + unsigned int si, ret, len = 0; + + shmem = drm_gem_shmem_create(drm_dev, TEST_SIZE); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, shmem); + + ret = kunit_add_action_or_reset(test, drm_gem_shmem_free_wrapper, shmem); + KUNIT_ASSERT_EQ(test, ret, 0); + + /* The scatter/gather table will be freed by drm_gem_shmem_free */ + sgt = drm_gem_shmem_get_pages_sgt(shmem); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sgt); + KUNIT_ASSERT_NOT_NULL(test, shmem->pages); + KUNIT_EXPECT_EQ(test, shmem->pages_use_count, 1); + KUNIT_EXPECT_PTR_EQ(test, sgt, shmem->sgt); + + for_each_sgtable_sg(sgt, sg, si) { + KUNIT_EXPECT_NOT_NULL(test, sg); + len += sg->length; + } + + KUNIT_EXPECT_GE(test, len, TEST_SIZE); +} + +/* + * Test updating the madvise state of a shmem GEM object. The test + * case checks that the function for setting madv updates it only if + * its current value is greater or equal than zero and returns false + * if it has a negative value. + */ +static void drm_gem_shmem_test_madvise(struct kunit *test) +{ + struct drm_device *drm_dev = test->priv; + struct drm_gem_shmem_object *shmem; + int ret; + + shmem = drm_gem_shmem_create(drm_dev, TEST_SIZE); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, shmem); + KUNIT_ASSERT_EQ(test, shmem->madv, 0); + + ret = kunit_add_action_or_reset(test, drm_gem_shmem_free_wrapper, shmem); + KUNIT_ASSERT_EQ(test, ret, 0); + + ret = drm_gem_shmem_madvise(shmem, 1); + KUNIT_EXPECT_TRUE(test, ret); + KUNIT_ASSERT_EQ(test, shmem->madv, 1); + + /* Set madv to a negative value */ + ret = drm_gem_shmem_madvise(shmem, -1); + KUNIT_EXPECT_FALSE(test, ret); + KUNIT_ASSERT_EQ(test, shmem->madv, -1); + + /* Check that madv cannot be set back to a positive value */ + ret = drm_gem_shmem_madvise(shmem, 0); + KUNIT_EXPECT_FALSE(test, ret); + KUNIT_ASSERT_EQ(test, shmem->madv, -1); +} + +/* + * Test purging a shmem GEM object. First, assert that a newly created + * shmem GEM object is not purgeable. Then, set madvise to a positive + * value and call drm_gem_shmem_get_pages_sgt() to pin and dma-map the + * backing pages. Finally, assert that the shmem GEM object is now + * purgeable and purge it. + */ +static void drm_gem_shmem_test_purge(struct kunit *test) +{ + struct drm_device *drm_dev = test->priv; + struct drm_gem_shmem_object *shmem; + struct sg_table *sgt; + int ret; + + shmem = drm_gem_shmem_create(drm_dev, TEST_SIZE); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, shmem); + + ret = kunit_add_action_or_reset(test, drm_gem_shmem_free_wrapper, shmem); + KUNIT_ASSERT_EQ(test, ret, 0); + + ret = drm_gem_shmem_is_purgeable(shmem); + KUNIT_EXPECT_FALSE(test, ret); + + ret = drm_gem_shmem_madvise(shmem, 1); + KUNIT_EXPECT_TRUE(test, ret); + + /* The scatter/gather table will be freed by drm_gem_shmem_free */ + sgt = drm_gem_shmem_get_pages_sgt(shmem); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sgt); + + ret = drm_gem_shmem_is_purgeable(shmem); + KUNIT_EXPECT_TRUE(test, ret); + + drm_gem_shmem_purge(shmem); + KUNIT_EXPECT_NULL(test, shmem->pages); + KUNIT_EXPECT_NULL(test, shmem->sgt); + KUNIT_EXPECT_EQ(test, shmem->madv, -1); +} + +static int drm_gem_shmem_test_init(struct kunit *test) +{ + struct device *dev; + struct drm_device *drm_dev; + + /* Allocate a parent device */ + dev = drm_kunit_helper_alloc_device(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + /* + * The DRM core will automatically initialize the GEM core and create + * a DRM Memory Manager object which provides an address space pool + * for GEM objects allocation. + */ + drm_dev = __drm_kunit_helper_alloc_drm_device(test, dev, sizeof(*drm_dev), + 0, DRIVER_GEM); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, drm_dev); + + test->priv = drm_dev; + + return 0; +} + +static struct kunit_case drm_gem_shmem_test_cases[] = { + KUNIT_CASE(drm_gem_shmem_test_obj_create), + KUNIT_CASE(drm_gem_shmem_test_obj_create_private), + KUNIT_CASE(drm_gem_shmem_test_pin_pages), + KUNIT_CASE(drm_gem_shmem_test_vmap), + KUNIT_CASE(drm_gem_shmem_test_get_pages_sgt), + KUNIT_CASE(drm_gem_shmem_test_get_sg_table), + KUNIT_CASE(drm_gem_shmem_test_madvise), + KUNIT_CASE(drm_gem_shmem_test_purge), + {} +}; + +static struct kunit_suite drm_gem_shmem_suite = { + .name = "drm_gem_shmem", + .init = drm_gem_shmem_test_init, + .test_cases = drm_gem_shmem_test_cases +}; + +kunit_test_suite(drm_gem_shmem_suite); + +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/tests/drm_mm_test.c b/drivers/gpu/drm/tests/drm_mm_test.c index 05d5e7af6d25..4e9247cf9977 100644 --- a/drivers/gpu/drm/tests/drm_mm_test.c +++ b/drivers/gpu/drm/tests/drm_mm_test.c @@ -17,10 +17,6 @@ #include "../lib/drm_random.h" -static unsigned int random_seed; -static unsigned int max_iterations = 8192; -static unsigned int max_prime = 128; - enum { BEST, BOTTOMUP, @@ -37,10 +33,6 @@ static const struct insert_mode { [TOPDOWN] = { "top-down", DRM_MM_INSERT_HIGH }, [EVICT] = { "evict", DRM_MM_INSERT_EVICT }, {} -}, evict_modes[] = { - { "bottom-up", DRM_MM_INSERT_LOW }, - { "top-down", DRM_MM_INSERT_HIGH }, - {} }; static bool assert_no_holes(struct kunit *test, const struct drm_mm *mm) @@ -97,57 +89,6 @@ static bool assert_one_hole(struct kunit *test, const struct drm_mm *mm, u64 sta return ok; } -static bool assert_continuous(struct kunit *test, const struct drm_mm *mm, u64 size) -{ - struct drm_mm_node *node, *check, *found; - unsigned long n; - u64 addr; - - if (!assert_no_holes(test, mm)) - return false; - - n = 0; - addr = 0; - drm_mm_for_each_node(node, mm) { - if (node->start != addr) { - KUNIT_FAIL(test, "node[%ld] list out of order, expected %llx found %llx\n", - n, addr, node->start); - return false; - } - - if (node->size != size) { - KUNIT_FAIL(test, "node[%ld].size incorrect, expected %llx, found %llx\n", - n, size, node->size); - return false; - } - - if (drm_mm_hole_follows(node)) { - KUNIT_FAIL(test, "node[%ld] is followed by a hole!\n", n); - return false; - } - - found = NULL; - drm_mm_for_each_node_in_range(check, mm, addr, addr + size) { - if (node != check) { - KUNIT_FAIL(test, - "lookup return wrong node, expected start %llx, found %llx\n", - node->start, check->start); - return false; - } - found = check; - } - if (!found) { - KUNIT_FAIL(test, "lookup failed for node %llx + %llx\n", addr, size); - return false; - } - - addr += size; - n++; - } - - return true; -} - static u64 misalignment(struct drm_mm_node *node, u64 alignment) { u64 rem; @@ -270,215 +211,6 @@ static void drm_test_mm_debug(struct kunit *test) nodes[0].start, nodes[0].size); } -static struct drm_mm_node *set_node(struct drm_mm_node *node, - u64 start, u64 size) -{ - node->start = start; - node->size = size; - return node; -} - -static bool expect_reserve_fail(struct kunit *test, struct drm_mm *mm, struct drm_mm_node *node) -{ - int err; - - err = drm_mm_reserve_node(mm, node); - if (likely(err == -ENOSPC)) - return true; - - if (!err) { - KUNIT_FAIL(test, "impossible reserve succeeded, node %llu + %llu\n", - node->start, node->size); - drm_mm_remove_node(node); - } else { - KUNIT_FAIL(test, - "impossible reserve failed with wrong error %d [expected %d], node %llu + %llu\n", - err, -ENOSPC, node->start, node->size); - } - return false; -} - -static bool noinline_for_stack check_reserve_boundaries(struct kunit *test, struct drm_mm *mm, - unsigned int count, - u64 size) -{ - const struct boundary { - u64 start, size; - const char *name; - } boundaries[] = { -#define B(st, sz) { (st), (sz), "{ " #st ", " #sz "}" } - B(0, 0), - B(-size, 0), - B(size, 0), - B(size * count, 0), - B(-size, size), - B(-size, -size), - B(-size, 2 * size), - B(0, -size), - B(size, -size), - B(count * size, size), - B(count * size, -size), - B(count * size, count * size), - B(count * size, -count * size), - B(count * size, -(count + 1) * size), - B((count + 1) * size, size), - B((count + 1) * size, -size), - B((count + 1) * size, -2 * size), -#undef B - }; - struct drm_mm_node tmp = {}; - int n; - - for (n = 0; n < ARRAY_SIZE(boundaries); n++) { - if (!expect_reserve_fail(test, mm, set_node(&tmp, boundaries[n].start, - boundaries[n].size))) { - KUNIT_FAIL(test, "boundary[%d:%s] failed, count=%u, size=%lld\n", - n, boundaries[n].name, count, size); - return false; - } - } - - return true; -} - -static int __drm_test_mm_reserve(struct kunit *test, unsigned int count, u64 size) -{ - DRM_RND_STATE(prng, random_seed); - struct drm_mm mm; - struct drm_mm_node tmp, *nodes, *node, *next; - unsigned int *order, n, m, o = 0; - int ret, err; - - /* For exercising drm_mm_reserve_node(), we want to check that - * reservations outside of the drm_mm range are rejected, and to - * overlapping and otherwise already occupied ranges. Afterwards, - * the tree and nodes should be intact. - */ - - DRM_MM_BUG_ON(!count); - DRM_MM_BUG_ON(!size); - - ret = -ENOMEM; - order = drm_random_order(count, &prng); - if (!order) - goto err; - - nodes = vzalloc(array_size(count, sizeof(*nodes))); - KUNIT_ASSERT_TRUE(test, nodes); - - ret = -EINVAL; - drm_mm_init(&mm, 0, count * size); - - if (!check_reserve_boundaries(test, &mm, count, size)) - goto out; - - for (n = 0; n < count; n++) { - nodes[n].start = order[n] * size; - nodes[n].size = size; - - err = drm_mm_reserve_node(&mm, &nodes[n]); - if (err) { - KUNIT_FAIL(test, "reserve failed, step %d, start %llu\n", - n, nodes[n].start); - ret = err; - goto out; - } - - if (!drm_mm_node_allocated(&nodes[n])) { - KUNIT_FAIL(test, "reserved node not allocated! step %d, start %llu\n", - n, nodes[n].start); - goto out; - } - - if (!expect_reserve_fail(test, &mm, &nodes[n])) - goto out; - } - - /* After random insertion the nodes should be in order */ - if (!assert_continuous(test, &mm, size)) - goto out; - - /* Repeated use should then fail */ - drm_random_reorder(order, count, &prng); - for (n = 0; n < count; n++) { - if (!expect_reserve_fail(test, &mm, set_node(&tmp, order[n] * size, 1))) - goto out; - - /* Remove and reinsert should work */ - drm_mm_remove_node(&nodes[order[n]]); - err = drm_mm_reserve_node(&mm, &nodes[order[n]]); - if (err) { - KUNIT_FAIL(test, "reserve failed, step %d, start %llu\n", - n, nodes[n].start); - ret = err; - goto out; - } - } - - if (!assert_continuous(test, &mm, size)) - goto out; - - /* Overlapping use should then fail */ - for (n = 0; n < count; n++) { - if (!expect_reserve_fail(test, &mm, set_node(&tmp, 0, size * count))) - goto out; - } - for (n = 0; n < count; n++) { - if (!expect_reserve_fail(test, &mm, set_node(&tmp, size * n, size * (count - n)))) - goto out; - } - - /* Remove several, reinsert, check full */ - for_each_prime_number(n, min(max_prime, count)) { - for (m = 0; m < n; m++) { - node = &nodes[order[(o + m) % count]]; - drm_mm_remove_node(node); - } - - for (m = 0; m < n; m++) { - node = &nodes[order[(o + m) % count]]; - err = drm_mm_reserve_node(&mm, node); - if (err) { - KUNIT_FAIL(test, "reserve failed, step %d/%d, start %llu\n", - m, n, node->start); - ret = err; - goto out; - } - } - - o += n; - - if (!assert_continuous(test, &mm, size)) - goto out; - } - - ret = 0; -out: - drm_mm_for_each_node_safe(node, next, &mm) - drm_mm_remove_node(node); - drm_mm_takedown(&mm); - vfree(nodes); - kfree(order); -err: - return ret; -} - -static void drm_test_mm_reserve(struct kunit *test) -{ - const unsigned int count = min_t(unsigned int, BIT(10), max_iterations); - int n; - - for_each_prime_number_from(n, 1, 54) { - u64 size = BIT_ULL(n); - - KUNIT_ASSERT_FALSE(test, __drm_test_mm_reserve(test, count, size - 1)); - KUNIT_ASSERT_FALSE(test, __drm_test_mm_reserve(test, count, size)); - KUNIT_ASSERT_FALSE(test, __drm_test_mm_reserve(test, count, size + 1)); - - cond_resched(); - } -} - static bool expect_insert(struct kunit *test, struct drm_mm *mm, struct drm_mm_node *node, u64 size, u64 alignment, unsigned long color, const struct insert_mode *mode) @@ -503,600 +235,6 @@ static bool expect_insert(struct kunit *test, struct drm_mm *mm, return true; } -static bool expect_insert_fail(struct kunit *test, struct drm_mm *mm, u64 size) -{ - struct drm_mm_node tmp = {}; - int err; - - err = drm_mm_insert_node(mm, &tmp, size); - if (likely(err == -ENOSPC)) - return true; - - if (!err) { - KUNIT_FAIL(test, "impossible insert succeeded, node %llu + %llu\n", - tmp.start, tmp.size); - drm_mm_remove_node(&tmp); - } else { - KUNIT_FAIL(test, - "impossible insert failed with wrong error %d [expected %d], size %llu\n", - err, -ENOSPC, size); - } - return false; -} - -static int __drm_test_mm_insert(struct kunit *test, unsigned int count, u64 size, bool replace) -{ - DRM_RND_STATE(prng, random_seed); - const struct insert_mode *mode; - struct drm_mm mm; - struct drm_mm_node *nodes, *node, *next; - unsigned int *order, n, m, o = 0; - int ret; - - /* Fill a range with lots of nodes, check it doesn't fail too early */ - - DRM_MM_BUG_ON(!count); - DRM_MM_BUG_ON(!size); - - ret = -ENOMEM; - nodes = vmalloc(array_size(count, sizeof(*nodes))); - KUNIT_ASSERT_TRUE(test, nodes); - - order = drm_random_order(count, &prng); - if (!order) - goto err_nodes; - - ret = -EINVAL; - drm_mm_init(&mm, 0, count * size); - - for (mode = insert_modes; mode->name; mode++) { - for (n = 0; n < count; n++) { - struct drm_mm_node tmp; - - node = replace ? &tmp : &nodes[n]; - memset(node, 0, sizeof(*node)); - if (!expect_insert(test, &mm, node, size, 0, n, mode)) { - KUNIT_FAIL(test, "%s insert failed, size %llu step %d\n", - mode->name, size, n); - goto out; - } - - if (replace) { - drm_mm_replace_node(&tmp, &nodes[n]); - if (drm_mm_node_allocated(&tmp)) { - KUNIT_FAIL(test, - "replaced old-node still allocated! step %d\n", - n); - goto out; - } - - if (!assert_node(test, &nodes[n], &mm, size, 0, n)) { - KUNIT_FAIL(test, - "replaced node did not inherit parameters, size %llu step %d\n", - size, n); - goto out; - } - - if (tmp.start != nodes[n].start) { - KUNIT_FAIL(test, - "replaced node mismatch location expected [%llx + %llx], found [%llx + %llx]\n", - tmp.start, size, nodes[n].start, nodes[n].size); - goto out; - } - } - } - - /* After random insertion the nodes should be in order */ - if (!assert_continuous(test, &mm, size)) - goto out; - - /* Repeated use should then fail */ - if (!expect_insert_fail(test, &mm, size)) - goto out; - - /* Remove one and reinsert, as the only hole it should refill itself */ - for (n = 0; n < count; n++) { - u64 addr = nodes[n].start; - - drm_mm_remove_node(&nodes[n]); - if (!expect_insert(test, &mm, &nodes[n], size, 0, n, mode)) { - KUNIT_FAIL(test, "%s reinsert failed, size %llu step %d\n", - mode->name, size, n); - goto out; - } - - if (nodes[n].start != addr) { - KUNIT_FAIL(test, - "%s reinsert node moved, step %d, expected %llx, found %llx\n", - mode->name, n, addr, nodes[n].start); - goto out; - } - - if (!assert_continuous(test, &mm, size)) - goto out; - } - - /* Remove several, reinsert, check full */ - for_each_prime_number(n, min(max_prime, count)) { - for (m = 0; m < n; m++) { - node = &nodes[order[(o + m) % count]]; - drm_mm_remove_node(node); - } - - for (m = 0; m < n; m++) { - node = &nodes[order[(o + m) % count]]; - if (!expect_insert(test, &mm, node, size, 0, n, mode)) { - KUNIT_FAIL(test, - "%s multiple reinsert failed, size %llu step %d\n", - mode->name, size, n); - goto out; - } - } - - o += n; - - if (!assert_continuous(test, &mm, size)) - goto out; - - if (!expect_insert_fail(test, &mm, size)) - goto out; - } - - drm_mm_for_each_node_safe(node, next, &mm) - drm_mm_remove_node(node); - DRM_MM_BUG_ON(!drm_mm_clean(&mm)); - - cond_resched(); - } - - ret = 0; -out: - drm_mm_for_each_node_safe(node, next, &mm) - drm_mm_remove_node(node); - drm_mm_takedown(&mm); - kfree(order); -err_nodes: - vfree(nodes); - return ret; -} - -static void drm_test_mm_insert(struct kunit *test) -{ - const unsigned int count = min_t(unsigned int, BIT(10), max_iterations); - unsigned int n; - - for_each_prime_number_from(n, 1, 54) { - u64 size = BIT_ULL(n); - - KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert(test, count, size - 1, false)); - KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert(test, count, size, false)); - KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert(test, count, size + 1, false)); - - cond_resched(); - } -} - -static void drm_test_mm_replace(struct kunit *test) -{ - const unsigned int count = min_t(unsigned int, BIT(10), max_iterations); - unsigned int n; - - /* Reuse __drm_test_mm_insert to exercise replacement by inserting a dummy node, - * then replacing it with the intended node. We want to check that - * the tree is intact and all the information we need is carried - * across to the target node. - */ - - for_each_prime_number_from(n, 1, 54) { - u64 size = BIT_ULL(n); - - KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert(test, count, size - 1, true)); - KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert(test, count, size, true)); - KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert(test, count, size + 1, true)); - - cond_resched(); - } -} - -static bool expect_insert_in_range(struct kunit *test, struct drm_mm *mm, struct drm_mm_node *node, - u64 size, u64 alignment, unsigned long color, - u64 range_start, u64 range_end, const struct insert_mode *mode) -{ - int err; - - err = drm_mm_insert_node_in_range(mm, node, - size, alignment, color, - range_start, range_end, - mode->mode); - if (err) { - KUNIT_FAIL(test, - "insert (size=%llu, alignment=%llu, color=%lu, mode=%s) nto range [%llx, %llx] failed with err=%d\n", - size, alignment, color, mode->name, - range_start, range_end, err); - return false; - } - - if (!assert_node(test, node, mm, size, alignment, color)) { - drm_mm_remove_node(node); - return false; - } - - return true; -} - -static bool expect_insert_in_range_fail(struct kunit *test, struct drm_mm *mm, - u64 size, u64 range_start, u64 range_end) -{ - struct drm_mm_node tmp = {}; - int err; - - err = drm_mm_insert_node_in_range(mm, &tmp, size, 0, 0, range_start, range_end, - 0); - if (likely(err == -ENOSPC)) - return true; - - if (!err) { - KUNIT_FAIL(test, - "impossible insert succeeded, node %llx + %llu, range [%llx, %llx]\n", - tmp.start, tmp.size, range_start, range_end); - drm_mm_remove_node(&tmp); - } else { - KUNIT_FAIL(test, - "impossible insert failed with wrong error %d [expected %d], size %llu, range [%llx, %llx]\n", - err, -ENOSPC, size, range_start, range_end); - } - - return false; -} - -static bool assert_contiguous_in_range(struct kunit *test, struct drm_mm *mm, - u64 size, u64 start, u64 end) -{ - struct drm_mm_node *node; - unsigned int n; - - if (!expect_insert_in_range_fail(test, mm, size, start, end)) - return false; - - n = div64_u64(start + size - 1, size); - drm_mm_for_each_node(node, mm) { - if (node->start < start || node->start + node->size > end) { - KUNIT_FAIL(test, - "node %d out of range, address [%llx + %llu], range [%llx, %llx]\n", - n, node->start, node->start + node->size, start, end); - return false; - } - - if (node->start != n * size) { - KUNIT_FAIL(test, "node %d out of order, expected start %llx, found %llx\n", - n, n * size, node->start); - return false; - } - - if (node->size != size) { - KUNIT_FAIL(test, "node %d has wrong size, expected size %llx, found %llx\n", - n, size, node->size); - return false; - } - - if (drm_mm_hole_follows(node) && drm_mm_hole_node_end(node) < end) { - KUNIT_FAIL(test, "node %d is followed by a hole!\n", n); - return false; - } - - n++; - } - - if (start > 0) { - node = __drm_mm_interval_first(mm, 0, start - 1); - if (drm_mm_node_allocated(node)) { - KUNIT_FAIL(test, "node before start: node=%llx+%llu, start=%llx\n", - node->start, node->size, start); - return false; - } - } - - if (end < U64_MAX) { - node = __drm_mm_interval_first(mm, end, U64_MAX); - if (drm_mm_node_allocated(node)) { - KUNIT_FAIL(test, "node after end: node=%llx+%llu, end=%llx\n", - node->start, node->size, end); - return false; - } - } - - return true; -} - -static int __drm_test_mm_insert_range(struct kunit *test, unsigned int count, u64 size, - u64 start, u64 end) -{ - const struct insert_mode *mode; - struct drm_mm mm; - struct drm_mm_node *nodes, *node, *next; - unsigned int n, start_n, end_n; - int ret; - - DRM_MM_BUG_ON(!count); - DRM_MM_BUG_ON(!size); - DRM_MM_BUG_ON(end <= start); - - /* Very similar to __drm_test_mm_insert(), but now instead of populating the - * full range of the drm_mm, we try to fill a small portion of it. - */ - - ret = -ENOMEM; - nodes = vzalloc(array_size(count, sizeof(*nodes))); - KUNIT_ASSERT_TRUE(test, nodes); - - ret = -EINVAL; - drm_mm_init(&mm, 0, count * size); - - start_n = div64_u64(start + size - 1, size); - end_n = div64_u64(end - size, size); - - for (mode = insert_modes; mode->name; mode++) { - for (n = start_n; n <= end_n; n++) { - if (!expect_insert_in_range(test, &mm, &nodes[n], size, size, n, - start, end, mode)) { - KUNIT_FAIL(test, - "%s insert failed, size %llu, step %d [%d, %d], range [%llx, %llx]\n", - mode->name, size, n, start_n, end_n, start, end); - goto out; - } - } - - if (!assert_contiguous_in_range(test, &mm, size, start, end)) { - KUNIT_FAIL(test, - "%s: range [%llx, %llx] not full after initialisation, size=%llu\n", - mode->name, start, end, size); - goto out; - } - - /* Remove one and reinsert, it should refill itself */ - for (n = start_n; n <= end_n; n++) { - u64 addr = nodes[n].start; - - drm_mm_remove_node(&nodes[n]); - if (!expect_insert_in_range(test, &mm, &nodes[n], size, size, n, - start, end, mode)) { - KUNIT_FAIL(test, "%s reinsert failed, step %d\n", mode->name, n); - goto out; - } - - if (nodes[n].start != addr) { - KUNIT_FAIL(test, - "%s reinsert node moved, step %d, expected %llx, found %llx\n", - mode->name, n, addr, nodes[n].start); - goto out; - } - } - - if (!assert_contiguous_in_range(test, &mm, size, start, end)) { - KUNIT_FAIL(test, - "%s: range [%llx, %llx] not full after reinsertion, size=%llu\n", - mode->name, start, end, size); - goto out; - } - - drm_mm_for_each_node_safe(node, next, &mm) - drm_mm_remove_node(node); - DRM_MM_BUG_ON(!drm_mm_clean(&mm)); - - cond_resched(); - } - - ret = 0; -out: - drm_mm_for_each_node_safe(node, next, &mm) - drm_mm_remove_node(node); - drm_mm_takedown(&mm); - vfree(nodes); - return ret; -} - -static int insert_outside_range(struct kunit *test) -{ - struct drm_mm mm; - const unsigned int start = 1024; - const unsigned int end = 2048; - const unsigned int size = end - start; - - drm_mm_init(&mm, start, size); - - if (!expect_insert_in_range_fail(test, &mm, 1, 0, start)) - return -EINVAL; - - if (!expect_insert_in_range_fail(test, &mm, size, - start - size / 2, start + (size + 1) / 2)) - return -EINVAL; - - if (!expect_insert_in_range_fail(test, &mm, size, - end - (size + 1) / 2, end + size / 2)) - return -EINVAL; - - if (!expect_insert_in_range_fail(test, &mm, 1, end, end + size)) - return -EINVAL; - - drm_mm_takedown(&mm); - return 0; -} - -static void drm_test_mm_insert_range(struct kunit *test) -{ - const unsigned int count = min_t(unsigned int, BIT(13), max_iterations); - unsigned int n; - - /* Check that requests outside the bounds of drm_mm are rejected. */ - KUNIT_ASSERT_FALSE(test, insert_outside_range(test)); - - for_each_prime_number_from(n, 1, 50) { - const u64 size = BIT_ULL(n); - const u64 max = count * size; - - KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert_range(test, count, size, 0, max)); - KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert_range(test, count, size, 1, max)); - KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert_range(test, count, size, 0, max - 1)); - KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert_range(test, count, size, 0, max / 2)); - KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert_range(test, count, size, - max / 2, max)); - KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert_range(test, count, size, - max / 4 + 1, 3 * max / 4 - 1)); - - cond_resched(); - } -} - -static int prepare_frag(struct kunit *test, struct drm_mm *mm, struct drm_mm_node *nodes, - unsigned int num_insert, const struct insert_mode *mode) -{ - unsigned int size = 4096; - unsigned int i; - - for (i = 0; i < num_insert; i++) { - if (!expect_insert(test, mm, &nodes[i], size, 0, i, mode) != 0) { - KUNIT_FAIL(test, "%s insert failed\n", mode->name); - return -EINVAL; - } - } - - /* introduce fragmentation by freeing every other node */ - for (i = 0; i < num_insert; i++) { - if (i % 2 == 0) - drm_mm_remove_node(&nodes[i]); - } - - return 0; -} - -static u64 get_insert_time(struct kunit *test, struct drm_mm *mm, - unsigned int num_insert, struct drm_mm_node *nodes, - const struct insert_mode *mode) -{ - unsigned int size = 8192; - ktime_t start; - unsigned int i; - - start = ktime_get(); - for (i = 0; i < num_insert; i++) { - if (!expect_insert(test, mm, &nodes[i], size, 0, i, mode) != 0) { - KUNIT_FAIL(test, "%s insert failed\n", mode->name); - return 0; - } - } - - return ktime_to_ns(ktime_sub(ktime_get(), start)); -} - -static void drm_test_mm_frag(struct kunit *test) -{ - struct drm_mm mm; - const struct insert_mode *mode; - struct drm_mm_node *nodes, *node, *next; - unsigned int insert_size = 10000; - unsigned int scale_factor = 4; - - /* We need 4 * insert_size nodes to hold intermediate allocated - * drm_mm nodes. - * 1 times for prepare_frag() - * 1 times for get_insert_time() - * 2 times for get_insert_time() - */ - nodes = vzalloc(array_size(insert_size * 4, sizeof(*nodes))); - KUNIT_ASSERT_TRUE(test, nodes); - - /* For BOTTOMUP and TOPDOWN, we first fragment the - * address space using prepare_frag() and then try to verify - * that insertions scale quadratically from 10k to 20k insertions - */ - drm_mm_init(&mm, 1, U64_MAX - 2); - for (mode = insert_modes; mode->name; mode++) { - u64 insert_time1, insert_time2; - - if (mode->mode != DRM_MM_INSERT_LOW && - mode->mode != DRM_MM_INSERT_HIGH) - continue; - - if (prepare_frag(test, &mm, nodes, insert_size, mode)) - goto err; - - insert_time1 = get_insert_time(test, &mm, insert_size, - nodes + insert_size, mode); - if (insert_time1 == 0) - goto err; - - insert_time2 = get_insert_time(test, &mm, (insert_size * 2), - nodes + insert_size * 2, mode); - if (insert_time2 == 0) - goto err; - - kunit_info(test, "%s fragmented insert of %u and %u insertions took %llu and %llu nsecs\n", - mode->name, insert_size, insert_size * 2, insert_time1, insert_time2); - - if (insert_time2 > (scale_factor * insert_time1)) { - KUNIT_FAIL(test, "%s fragmented insert took %llu nsecs more\n", - mode->name, insert_time2 - (scale_factor * insert_time1)); - goto err; - } - - drm_mm_for_each_node_safe(node, next, &mm) - drm_mm_remove_node(node); - } - -err: - drm_mm_for_each_node_safe(node, next, &mm) - drm_mm_remove_node(node); - drm_mm_takedown(&mm); - vfree(nodes); -} - -static void drm_test_mm_align(struct kunit *test) -{ - const struct insert_mode *mode; - const unsigned int max_count = min(8192u, max_prime); - struct drm_mm mm; - struct drm_mm_node *nodes, *node, *next; - unsigned int prime; - - /* For each of the possible insertion modes, we pick a few - * arbitrary alignments and check that the inserted node - * meets our requirements. - */ - - nodes = vzalloc(array_size(max_count, sizeof(*nodes))); - KUNIT_ASSERT_TRUE(test, nodes); - - drm_mm_init(&mm, 1, U64_MAX - 2); - - for (mode = insert_modes; mode->name; mode++) { - unsigned int i = 0; - - for_each_prime_number_from(prime, 1, max_count) { - u64 size = next_prime_number(prime); - - if (!expect_insert(test, &mm, &nodes[i], size, prime, i, mode)) { - KUNIT_FAIL(test, "%s insert failed with alignment=%d", - mode->name, prime); - goto out; - } - - i++; - } - - drm_mm_for_each_node_safe(node, next, &mm) - drm_mm_remove_node(node); - DRM_MM_BUG_ON(!drm_mm_clean(&mm)); - - cond_resched(); - } - -out: - drm_mm_for_each_node_safe(node, next, &mm) - drm_mm_remove_node(node); - drm_mm_takedown(&mm); - vfree(nodes); -} - static void drm_test_mm_align_pot(struct kunit *test, int max) { struct drm_mm mm; @@ -1144,626 +282,6 @@ static void drm_test_mm_align64(struct kunit *test) drm_test_mm_align_pot(test, 64); } -static void show_scan(struct kunit *test, const struct drm_mm_scan *scan) -{ - kunit_info(test, "scan: hit [%llx, %llx], size=%lld, align=%lld, color=%ld\n", - scan->hit_start, scan->hit_end, scan->size, scan->alignment, scan->color); -} - -static void show_holes(struct kunit *test, const struct drm_mm *mm, int count) -{ - u64 hole_start, hole_end; - struct drm_mm_node *hole; - - drm_mm_for_each_hole(hole, mm, hole_start, hole_end) { - struct drm_mm_node *next = list_next_entry(hole, node_list); - const char *node1 = NULL, *node2 = NULL; - - if (drm_mm_node_allocated(hole)) - node1 = kasprintf(GFP_KERNEL, "[%llx + %lld, color=%ld], ", - hole->start, hole->size, hole->color); - - if (drm_mm_node_allocated(next)) - node2 = kasprintf(GFP_KERNEL, ", [%llx + %lld, color=%ld]", - next->start, next->size, next->color); - - kunit_info(test, "%sHole [%llx - %llx, size %lld]%s\n", node1, - hole_start, hole_end, hole_end - hole_start, node2); - - kfree(node2); - kfree(node1); - - if (!--count) - break; - } -} - -struct evict_node { - struct drm_mm_node node; - struct list_head link; -}; - -static bool evict_nodes(struct kunit *test, struct drm_mm_scan *scan, - struct evict_node *nodes, unsigned int *order, unsigned int count, - bool use_color, struct list_head *evict_list) -{ - struct evict_node *e, *en; - unsigned int i; - - for (i = 0; i < count; i++) { - e = &nodes[order ? order[i] : i]; - list_add(&e->link, evict_list); - if (drm_mm_scan_add_block(scan, &e->node)) - break; - } - list_for_each_entry_safe(e, en, evict_list, link) { - if (!drm_mm_scan_remove_block(scan, &e->node)) - list_del(&e->link); - } - if (list_empty(evict_list)) { - KUNIT_FAIL(test, - "Failed to find eviction: size=%lld [avail=%d], align=%lld (color=%lu)\n", - scan->size, count, scan->alignment, scan->color); - return false; - } - - list_for_each_entry(e, evict_list, link) - drm_mm_remove_node(&e->node); - - if (use_color) { - struct drm_mm_node *node; - - while ((node = drm_mm_scan_color_evict(scan))) { - e = container_of(node, typeof(*e), node); - drm_mm_remove_node(&e->node); - list_add(&e->link, evict_list); - } - } else { - if (drm_mm_scan_color_evict(scan)) { - KUNIT_FAIL(test, - "drm_mm_scan_color_evict unexpectedly reported overlapping nodes!\n"); - return false; - } - } - - return true; -} - -static bool evict_nothing(struct kunit *test, struct drm_mm *mm, - unsigned int total_size, struct evict_node *nodes) -{ - struct drm_mm_scan scan; - LIST_HEAD(evict_list); - struct evict_node *e; - struct drm_mm_node *node; - unsigned int n; - - drm_mm_scan_init(&scan, mm, 1, 0, 0, 0); - for (n = 0; n < total_size; n++) { - e = &nodes[n]; - list_add(&e->link, &evict_list); - drm_mm_scan_add_block(&scan, &e->node); - } - list_for_each_entry(e, &evict_list, link) - drm_mm_scan_remove_block(&scan, &e->node); - - for (n = 0; n < total_size; n++) { - e = &nodes[n]; - - if (!drm_mm_node_allocated(&e->node)) { - KUNIT_FAIL(test, "node[%d] no longer allocated!\n", n); - return false; - } - - e->link.next = NULL; - } - - drm_mm_for_each_node(node, mm) { - e = container_of(node, typeof(*e), node); - e->link.next = &e->link; - } - - for (n = 0; n < total_size; n++) { - e = &nodes[n]; - - if (!e->link.next) { - KUNIT_FAIL(test, "node[%d] no longer connected!\n", n); - return false; - } - } - - return assert_continuous(test, mm, nodes[0].node.size); -} - -static bool evict_everything(struct kunit *test, struct drm_mm *mm, - unsigned int total_size, struct evict_node *nodes) -{ - struct drm_mm_scan scan; - LIST_HEAD(evict_list); - struct evict_node *e; - unsigned int n; - int err; - - drm_mm_scan_init(&scan, mm, total_size, 0, 0, 0); - for (n = 0; n < total_size; n++) { - e = &nodes[n]; - list_add(&e->link, &evict_list); - if (drm_mm_scan_add_block(&scan, &e->node)) - break; - } - - err = 0; - list_for_each_entry(e, &evict_list, link) { - if (!drm_mm_scan_remove_block(&scan, &e->node)) { - if (!err) { - KUNIT_FAIL(test, "Node %lld not marked for eviction!\n", - e->node.start); - err = -EINVAL; - } - } - } - if (err) - return false; - - list_for_each_entry(e, &evict_list, link) - drm_mm_remove_node(&e->node); - - if (!assert_one_hole(test, mm, 0, total_size)) - return false; - - list_for_each_entry(e, &evict_list, link) { - err = drm_mm_reserve_node(mm, &e->node); - if (err) { - KUNIT_FAIL(test, "Failed to reinsert node after eviction: start=%llx\n", - e->node.start); - return false; - } - } - - return assert_continuous(test, mm, nodes[0].node.size); -} - -static int evict_something(struct kunit *test, struct drm_mm *mm, - u64 range_start, u64 range_end, struct evict_node *nodes, - unsigned int *order, unsigned int count, unsigned int size, - unsigned int alignment, const struct insert_mode *mode) -{ - struct drm_mm_scan scan; - LIST_HEAD(evict_list); - struct evict_node *e; - struct drm_mm_node tmp; - int err; - - drm_mm_scan_init_with_range(&scan, mm, size, alignment, 0, range_start, - range_end, mode->mode); - if (!evict_nodes(test, &scan, nodes, order, count, false, &evict_list)) - return -EINVAL; - - memset(&tmp, 0, sizeof(tmp)); - err = drm_mm_insert_node_generic(mm, &tmp, size, alignment, 0, - DRM_MM_INSERT_EVICT); - if (err) { - KUNIT_FAIL(test, "Failed to insert into eviction hole: size=%d, align=%d\n", - size, alignment); - show_scan(test, &scan); - show_holes(test, mm, 3); - return err; - } - - if (tmp.start < range_start || tmp.start + tmp.size > range_end) { - KUNIT_FAIL(test, - "Inserted [address=%llu + %llu] did not fit into the request range [%llu, %llu]\n", - tmp.start, tmp.size, range_start, range_end); - err = -EINVAL; - } - - if (!assert_node(test, &tmp, mm, size, alignment, 0) || - drm_mm_hole_follows(&tmp)) { - KUNIT_FAIL(test, - "Inserted did not fill the eviction hole: size=%lld [%d], align=%d [rem=%lld], start=%llx, hole-follows?=%d\n", - tmp.size, size, alignment, misalignment(&tmp, alignment), - tmp.start, drm_mm_hole_follows(&tmp)); - err = -EINVAL; - } - - drm_mm_remove_node(&tmp); - if (err) - return err; - - list_for_each_entry(e, &evict_list, link) { - err = drm_mm_reserve_node(mm, &e->node); - if (err) { - KUNIT_FAIL(test, "Failed to reinsert node after eviction: start=%llx\n", - e->node.start); - return err; - } - } - - if (!assert_continuous(test, mm, nodes[0].node.size)) { - KUNIT_FAIL(test, "range is no longer continuous\n"); - return -EINVAL; - } - - return 0; -} - -static void drm_test_mm_evict(struct kunit *test) -{ - DRM_RND_STATE(prng, random_seed); - const unsigned int size = 8192; - const struct insert_mode *mode; - struct drm_mm mm; - struct evict_node *nodes; - struct drm_mm_node *node, *next; - unsigned int *order, n; - - /* Here we populate a full drm_mm and then try and insert a new node - * by evicting other nodes in a random order. The drm_mm_scan should - * pick the first matching hole it finds from the random list. We - * repeat that for different allocation strategies, alignments and - * sizes to try and stress the hole finder. - */ - - nodes = vzalloc(array_size(size, sizeof(*nodes))); - KUNIT_ASSERT_TRUE(test, nodes); - - order = drm_random_order(size, &prng); - if (!order) - goto err_nodes; - - drm_mm_init(&mm, 0, size); - for (n = 0; n < size; n++) { - if (drm_mm_insert_node(&mm, &nodes[n].node, 1)) { - KUNIT_FAIL(test, "insert failed, step %d\n", n); - goto out; - } - } - - /* First check that using the scanner doesn't break the mm */ - if (!evict_nothing(test, &mm, size, nodes)) { - KUNIT_FAIL(test, "evict_nothing() failed\n"); - goto out; - } - if (!evict_everything(test, &mm, size, nodes)) { - KUNIT_FAIL(test, "evict_everything() failed\n"); - goto out; - } - - for (mode = evict_modes; mode->name; mode++) { - for (n = 1; n <= size; n <<= 1) { - drm_random_reorder(order, size, &prng); - if (evict_something(test, &mm, 0, U64_MAX, nodes, order, size, n, 1, - mode)) { - KUNIT_FAIL(test, "%s evict_something(size=%u) failed\n", - mode->name, n); - goto out; - } - } - - for (n = 1; n < size; n <<= 1) { - drm_random_reorder(order, size, &prng); - if (evict_something(test, &mm, 0, U64_MAX, nodes, order, size, - size / 2, n, mode)) { - KUNIT_FAIL(test, - "%s evict_something(size=%u, alignment=%u) failed\n", - mode->name, size / 2, n); - goto out; - } - } - - for_each_prime_number_from(n, 1, min(size, max_prime)) { - unsigned int nsize = (size - n + 1) / 2; - - DRM_MM_BUG_ON(!nsize); - - drm_random_reorder(order, size, &prng); - if (evict_something(test, &mm, 0, U64_MAX, nodes, order, size, - nsize, n, mode)) { - KUNIT_FAIL(test, - "%s evict_something(size=%u, alignment=%u) failed\n", - mode->name, nsize, n); - goto out; - } - } - - cond_resched(); - } - -out: - drm_mm_for_each_node_safe(node, next, &mm) - drm_mm_remove_node(node); - drm_mm_takedown(&mm); - kfree(order); -err_nodes: - vfree(nodes); -} - -static void drm_test_mm_evict_range(struct kunit *test) -{ - DRM_RND_STATE(prng, random_seed); - const unsigned int size = 8192; - const unsigned int range_size = size / 2; - const unsigned int range_start = size / 4; - const unsigned int range_end = range_start + range_size; - const struct insert_mode *mode; - struct drm_mm mm; - struct evict_node *nodes; - struct drm_mm_node *node, *next; - unsigned int *order, n; - - /* Like drm_test_mm_evict() but now we are limiting the search to a - * small portion of the full drm_mm. - */ - - nodes = vzalloc(array_size(size, sizeof(*nodes))); - KUNIT_ASSERT_TRUE(test, nodes); - - order = drm_random_order(size, &prng); - if (!order) - goto err_nodes; - - drm_mm_init(&mm, 0, size); - for (n = 0; n < size; n++) { - if (drm_mm_insert_node(&mm, &nodes[n].node, 1)) { - KUNIT_FAIL(test, "insert failed, step %d\n", n); - goto out; - } - } - - for (mode = evict_modes; mode->name; mode++) { - for (n = 1; n <= range_size; n <<= 1) { - drm_random_reorder(order, size, &prng); - if (evict_something(test, &mm, range_start, range_end, nodes, - order, size, n, 1, mode)) { - KUNIT_FAIL(test, - "%s evict_something(size=%u) failed with range [%u, %u]\n", - mode->name, n, range_start, range_end); - goto out; - } - } - - for (n = 1; n <= range_size; n <<= 1) { - drm_random_reorder(order, size, &prng); - if (evict_something(test, &mm, range_start, range_end, nodes, - order, size, range_size / 2, n, mode)) { - KUNIT_FAIL(test, - "%s evict_something(size=%u, alignment=%u) failed with range [%u, %u]\n", - mode->name, range_size / 2, n, range_start, range_end); - goto out; - } - } - - for_each_prime_number_from(n, 1, min(range_size, max_prime)) { - unsigned int nsize = (range_size - n + 1) / 2; - - DRM_MM_BUG_ON(!nsize); - - drm_random_reorder(order, size, &prng); - if (evict_something(test, &mm, range_start, range_end, nodes, - order, size, nsize, n, mode)) { - KUNIT_FAIL(test, - "%s evict_something(size=%u, alignment=%u) failed with range [%u, %u]\n", - mode->name, nsize, n, range_start, range_end); - goto out; - } - } - - cond_resched(); - } - -out: - drm_mm_for_each_node_safe(node, next, &mm) - drm_mm_remove_node(node); - drm_mm_takedown(&mm); - kfree(order); -err_nodes: - vfree(nodes); -} - -static unsigned int node_index(const struct drm_mm_node *node) -{ - return div64_u64(node->start, node->size); -} - -static void drm_test_mm_topdown(struct kunit *test) -{ - const struct insert_mode *topdown = &insert_modes[TOPDOWN]; - - DRM_RND_STATE(prng, random_seed); - const unsigned int count = 8192; - unsigned int size; - unsigned long *bitmap; - struct drm_mm mm; - struct drm_mm_node *nodes, *node, *next; - unsigned int *order, n, m, o = 0; - - /* When allocating top-down, we expect to be returned a node - * from a suitable hole at the top of the drm_mm. We check that - * the returned node does match the highest available slot. - */ - - nodes = vzalloc(array_size(count, sizeof(*nodes))); - KUNIT_ASSERT_TRUE(test, nodes); - - bitmap = bitmap_zalloc(count, GFP_KERNEL); - if (!bitmap) - goto err_nodes; - - order = drm_random_order(count, &prng); - if (!order) - goto err_bitmap; - - for (size = 1; size <= 64; size <<= 1) { - drm_mm_init(&mm, 0, size * count); - for (n = 0; n < count; n++) { - if (!expect_insert(test, &mm, &nodes[n], size, 0, n, topdown)) { - KUNIT_FAIL(test, "insert failed, size %u step %d\n", size, n); - goto out; - } - - if (drm_mm_hole_follows(&nodes[n])) { - KUNIT_FAIL(test, - "hole after topdown insert %d, start=%llx\n, size=%u", - n, nodes[n].start, size); - goto out; - } - - if (!assert_one_hole(test, &mm, 0, size * (count - n - 1))) - goto out; - } - - if (!assert_continuous(test, &mm, size)) - goto out; - - drm_random_reorder(order, count, &prng); - for_each_prime_number_from(n, 1, min(count, max_prime)) { - for (m = 0; m < n; m++) { - node = &nodes[order[(o + m) % count]]; - drm_mm_remove_node(node); - __set_bit(node_index(node), bitmap); - } - - for (m = 0; m < n; m++) { - unsigned int last; - - node = &nodes[order[(o + m) % count]]; - if (!expect_insert(test, &mm, node, size, 0, 0, topdown)) { - KUNIT_FAIL(test, "insert failed, step %d/%d\n", m, n); - goto out; - } - - if (drm_mm_hole_follows(node)) { - KUNIT_FAIL(test, - "hole after topdown insert %d/%d, start=%llx\n", - m, n, node->start); - goto out; - } - - last = find_last_bit(bitmap, count); - if (node_index(node) != last) { - KUNIT_FAIL(test, - "node %d/%d, size %d, not inserted into upmost hole, expected %d, found %d\n", - m, n, size, last, node_index(node)); - goto out; - } - - __clear_bit(last, bitmap); - } - - DRM_MM_BUG_ON(find_first_bit(bitmap, count) != count); - - o += n; - } - - drm_mm_for_each_node_safe(node, next, &mm) - drm_mm_remove_node(node); - DRM_MM_BUG_ON(!drm_mm_clean(&mm)); - cond_resched(); - } - -out: - drm_mm_for_each_node_safe(node, next, &mm) - drm_mm_remove_node(node); - drm_mm_takedown(&mm); - kfree(order); -err_bitmap: - bitmap_free(bitmap); -err_nodes: - vfree(nodes); -} - -static void drm_test_mm_bottomup(struct kunit *test) -{ - const struct insert_mode *bottomup = &insert_modes[BOTTOMUP]; - - DRM_RND_STATE(prng, random_seed); - const unsigned int count = 8192; - unsigned int size; - unsigned long *bitmap; - struct drm_mm mm; - struct drm_mm_node *nodes, *node, *next; - unsigned int *order, n, m, o = 0; - - /* Like drm_test_mm_topdown, but instead of searching for the last hole, - * we search for the first. - */ - - nodes = vzalloc(array_size(count, sizeof(*nodes))); - KUNIT_ASSERT_TRUE(test, nodes); - - bitmap = bitmap_zalloc(count, GFP_KERNEL); - if (!bitmap) - goto err_nodes; - - order = drm_random_order(count, &prng); - if (!order) - goto err_bitmap; - - for (size = 1; size <= 64; size <<= 1) { - drm_mm_init(&mm, 0, size * count); - for (n = 0; n < count; n++) { - if (!expect_insert(test, &mm, &nodes[n], size, 0, n, bottomup)) { - KUNIT_FAIL(test, - "bottomup insert failed, size %u step %d\n", size, n); - goto out; - } - - if (!assert_one_hole(test, &mm, size * (n + 1), size * count)) - goto out; - } - - if (!assert_continuous(test, &mm, size)) - goto out; - - drm_random_reorder(order, count, &prng); - for_each_prime_number_from(n, 1, min(count, max_prime)) { - for (m = 0; m < n; m++) { - node = &nodes[order[(o + m) % count]]; - drm_mm_remove_node(node); - __set_bit(node_index(node), bitmap); - } - - for (m = 0; m < n; m++) { - unsigned int first; - - node = &nodes[order[(o + m) % count]]; - if (!expect_insert(test, &mm, node, size, 0, 0, bottomup)) { - KUNIT_FAIL(test, "insert failed, step %d/%d\n", m, n); - goto out; - } - - first = find_first_bit(bitmap, count); - if (node_index(node) != first) { - KUNIT_FAIL(test, - "node %d/%d not inserted into bottom hole, expected %d, found %d\n", - m, n, first, node_index(node)); - goto out; - } - __clear_bit(first, bitmap); - } - - DRM_MM_BUG_ON(find_first_bit(bitmap, count) != count); - - o += n; - } - - drm_mm_for_each_node_safe(node, next, &mm) - drm_mm_remove_node(node); - DRM_MM_BUG_ON(!drm_mm_clean(&mm)); - cond_resched(); - } - -out: - drm_mm_for_each_node_safe(node, next, &mm) - drm_mm_remove_node(node); - drm_mm_takedown(&mm); - kfree(order); -err_bitmap: - bitmap_free(bitmap); -err_nodes: - vfree(nodes); -} - static void drm_test_mm_once(struct kunit *test, unsigned int mode) { struct drm_mm mm; @@ -1817,440 +335,18 @@ static void drm_test_mm_highest(struct kunit *test) drm_test_mm_once(test, DRM_MM_INSERT_HIGH); } -static void separate_adjacent_colors(const struct drm_mm_node *node, - unsigned long color, u64 *start, u64 *end) -{ - if (drm_mm_node_allocated(node) && node->color != color) - ++*start; - - node = list_next_entry(node, node_list); - if (drm_mm_node_allocated(node) && node->color != color) - --*end; -} - -static bool colors_abutt(struct kunit *test, const struct drm_mm_node *node) -{ - if (!drm_mm_hole_follows(node) && - drm_mm_node_allocated(list_next_entry(node, node_list))) { - KUNIT_FAIL(test, "colors abutt; %ld [%llx + %llx] is next to %ld [%llx + %llx]!\n", - node->color, node->start, node->size, - list_next_entry(node, node_list)->color, - list_next_entry(node, node_list)->start, - list_next_entry(node, node_list)->size); - return true; - } - - return false; -} - -static void drm_test_mm_color(struct kunit *test) -{ - const unsigned int count = min(4096u, max_iterations); - const struct insert_mode *mode; - struct drm_mm mm; - struct drm_mm_node *node, *nn; - unsigned int n; - - /* Color adjustment complicates everything. First we just check - * that when we insert a node we apply any color_adjustment callback. - * The callback we use should ensure that there is a gap between - * any two nodes, and so after each insertion we check that those - * holes are inserted and that they are preserved. - */ - - drm_mm_init(&mm, 0, U64_MAX); - - for (n = 1; n <= count; n++) { - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (!node) - goto out; - - if (!expect_insert(test, &mm, node, n, 0, n, &insert_modes[0])) { - KUNIT_FAIL(test, "insert failed, step %d\n", n); - kfree(node); - goto out; - } - } - - drm_mm_for_each_node_safe(node, nn, &mm) { - if (node->color != node->size) { - KUNIT_FAIL(test, "invalid color stored: expected %lld, found %ld\n", - node->size, node->color); - - goto out; - } - - drm_mm_remove_node(node); - kfree(node); - } - - /* Now, let's start experimenting with applying a color callback */ - mm.color_adjust = separate_adjacent_colors; - for (mode = insert_modes; mode->name; mode++) { - u64 last; - - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (!node) - goto out; - - node->size = 1 + 2 * count; - node->color = node->size; - - if (drm_mm_reserve_node(&mm, node)) { - KUNIT_FAIL(test, "initial reserve failed!\n"); - goto out; - } - - last = node->start + node->size; - - for (n = 1; n <= count; n++) { - int rem; - - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (!node) - goto out; - - node->start = last; - node->size = n + count; - node->color = node->size; - - if (drm_mm_reserve_node(&mm, node) != -ENOSPC) { - KUNIT_FAIL(test, "reserve %d did not report color overlap!", n); - goto out; - } - - node->start += n + 1; - rem = misalignment(node, n + count); - node->start += n + count - rem; - - if (drm_mm_reserve_node(&mm, node)) { - KUNIT_FAIL(test, "reserve %d failed", n); - goto out; - } - - last = node->start + node->size; - } - - for (n = 1; n <= count; n++) { - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (!node) - goto out; - - if (!expect_insert(test, &mm, node, n, n, n, mode)) { - KUNIT_FAIL(test, "%s insert failed, step %d\n", mode->name, n); - kfree(node); - goto out; - } - } - - drm_mm_for_each_node_safe(node, nn, &mm) { - u64 rem; - - if (node->color != node->size) { - KUNIT_FAIL(test, - "%s invalid color stored: expected %lld, found %ld\n", - mode->name, node->size, node->color); - - goto out; - } - - if (colors_abutt(test, node)) - goto out; - - div64_u64_rem(node->start, node->size, &rem); - if (rem) { - KUNIT_FAIL(test, - "%s colored node misaligned, start=%llx expected alignment=%lld [rem=%lld]\n", - mode->name, node->start, node->size, rem); - goto out; - } - - drm_mm_remove_node(node); - kfree(node); - } - - cond_resched(); - } - -out: - drm_mm_for_each_node_safe(node, nn, &mm) { - drm_mm_remove_node(node); - kfree(node); - } - drm_mm_takedown(&mm); -} - -static int evict_color(struct kunit *test, struct drm_mm *mm, u64 range_start, - u64 range_end, struct evict_node *nodes, unsigned int *order, - unsigned int count, unsigned int size, unsigned int alignment, - unsigned long color, const struct insert_mode *mode) -{ - struct drm_mm_scan scan; - LIST_HEAD(evict_list); - struct evict_node *e; - struct drm_mm_node tmp; - int err; - - drm_mm_scan_init_with_range(&scan, mm, size, alignment, color, range_start, - range_end, mode->mode); - if (!evict_nodes(test, &scan, nodes, order, count, true, &evict_list)) - return -EINVAL; - - memset(&tmp, 0, sizeof(tmp)); - err = drm_mm_insert_node_generic(mm, &tmp, size, alignment, color, - DRM_MM_INSERT_EVICT); - if (err) { - KUNIT_FAIL(test, - "Failed to insert into eviction hole: size=%d, align=%d, color=%lu, err=%d\n", - size, alignment, color, err); - show_scan(test, &scan); - show_holes(test, mm, 3); - return err; - } - - if (tmp.start < range_start || tmp.start + tmp.size > range_end) { - KUNIT_FAIL(test, - "Inserted [address=%llu + %llu] did not fit into the request range [%llu, %llu]\n", - tmp.start, tmp.size, range_start, range_end); - err = -EINVAL; - } - - if (colors_abutt(test, &tmp)) - err = -EINVAL; - - if (!assert_node(test, &tmp, mm, size, alignment, color)) { - KUNIT_FAIL(test, - "Inserted did not fit the eviction hole: size=%lld [%d], align=%d [rem=%lld], start=%llx\n", - tmp.size, size, alignment, misalignment(&tmp, alignment), tmp.start); - err = -EINVAL; - } - - drm_mm_remove_node(&tmp); - if (err) - return err; - - list_for_each_entry(e, &evict_list, link) { - err = drm_mm_reserve_node(mm, &e->node); - if (err) { - KUNIT_FAIL(test, "Failed to reinsert node after eviction: start=%llx\n", - e->node.start); - return err; - } - } - - cond_resched(); - return 0; -} - -static void drm_test_mm_color_evict(struct kunit *test) -{ - DRM_RND_STATE(prng, random_seed); - const unsigned int total_size = min(8192u, max_iterations); - const struct insert_mode *mode; - unsigned long color = 0; - struct drm_mm mm; - struct evict_node *nodes; - struct drm_mm_node *node, *next; - unsigned int *order, n; - - /* Check that the drm_mm_scan also honours color adjustment when - * choosing its victims to create a hole. Our color_adjust does not - * allow two nodes to be placed together without an intervening hole - * enlarging the set of victims that must be evicted. - */ - - nodes = vzalloc(array_size(total_size, sizeof(*nodes))); - KUNIT_ASSERT_TRUE(test, nodes); - - order = drm_random_order(total_size, &prng); - if (!order) - goto err_nodes; - - drm_mm_init(&mm, 0, 2 * total_size - 1); - mm.color_adjust = separate_adjacent_colors; - for (n = 0; n < total_size; n++) { - if (!expect_insert(test, &mm, &nodes[n].node, - 1, 0, color++, - &insert_modes[0])) { - KUNIT_FAIL(test, "insert failed, step %d\n", n); - goto out; - } - } - - for (mode = evict_modes; mode->name; mode++) { - for (n = 1; n <= total_size; n <<= 1) { - drm_random_reorder(order, total_size, &prng); - if (evict_color(test, &mm, 0, U64_MAX, nodes, order, total_size, - n, 1, color++, mode)) { - KUNIT_FAIL(test, "%s evict_color(size=%u) failed\n", mode->name, n); - goto out; - } - } - - for (n = 1; n < total_size; n <<= 1) { - drm_random_reorder(order, total_size, &prng); - if (evict_color(test, &mm, 0, U64_MAX, nodes, order, total_size, - total_size / 2, n, color++, mode)) { - KUNIT_FAIL(test, "%s evict_color(size=%u, alignment=%u) failed\n", - mode->name, total_size / 2, n); - goto out; - } - } - - for_each_prime_number_from(n, 1, min(total_size, max_prime)) { - unsigned int nsize = (total_size - n + 1) / 2; - - DRM_MM_BUG_ON(!nsize); - - drm_random_reorder(order, total_size, &prng); - if (evict_color(test, &mm, 0, U64_MAX, nodes, order, total_size, - nsize, n, color++, mode)) { - KUNIT_FAIL(test, "%s evict_color(size=%u, alignment=%u) failed\n", - mode->name, nsize, n); - goto out; - } - } - - cond_resched(); - } - -out: - drm_mm_for_each_node_safe(node, next, &mm) - drm_mm_remove_node(node); - drm_mm_takedown(&mm); - kfree(order); -err_nodes: - vfree(nodes); -} - -static void drm_test_mm_color_evict_range(struct kunit *test) -{ - DRM_RND_STATE(prng, random_seed); - const unsigned int total_size = 8192; - const unsigned int range_size = total_size / 2; - const unsigned int range_start = total_size / 4; - const unsigned int range_end = range_start + range_size; - const struct insert_mode *mode; - unsigned long color = 0; - struct drm_mm mm; - struct evict_node *nodes; - struct drm_mm_node *node, *next; - unsigned int *order, n; - - /* Like drm_test_mm_color_evict(), but limited to small portion of the full - * drm_mm range. - */ - - nodes = vzalloc(array_size(total_size, sizeof(*nodes))); - KUNIT_ASSERT_TRUE(test, nodes); - - order = drm_random_order(total_size, &prng); - if (!order) - goto err_nodes; - - drm_mm_init(&mm, 0, 2 * total_size - 1); - mm.color_adjust = separate_adjacent_colors; - for (n = 0; n < total_size; n++) { - if (!expect_insert(test, &mm, &nodes[n].node, - 1, 0, color++, - &insert_modes[0])) { - KUNIT_FAIL(test, "insert failed, step %d\n", n); - goto out; - } - } - - for (mode = evict_modes; mode->name; mode++) { - for (n = 1; n <= range_size; n <<= 1) { - drm_random_reorder(order, range_size, &prng); - if (evict_color(test, &mm, range_start, range_end, nodes, order, - total_size, n, 1, color++, mode)) { - KUNIT_FAIL(test, - "%s evict_color(size=%u) failed for range [%x, %x]\n", - mode->name, n, range_start, range_end); - goto out; - } - } - - for (n = 1; n < range_size; n <<= 1) { - drm_random_reorder(order, total_size, &prng); - if (evict_color(test, &mm, range_start, range_end, nodes, order, - total_size, range_size / 2, n, color++, mode)) { - KUNIT_FAIL(test, - "%s evict_color(size=%u, alignment=%u) failed for range [%x, %x]\n", - mode->name, total_size / 2, n, range_start, range_end); - goto out; - } - } - - for_each_prime_number_from(n, 1, min(range_size, max_prime)) { - unsigned int nsize = (range_size - n + 1) / 2; - - DRM_MM_BUG_ON(!nsize); - - drm_random_reorder(order, total_size, &prng); - if (evict_color(test, &mm, range_start, range_end, nodes, order, - total_size, nsize, n, color++, mode)) { - KUNIT_FAIL(test, - "%s evict_color(size=%u, alignment=%u) failed for range [%x, %x]\n", - mode->name, nsize, n, range_start, range_end); - goto out; - } - } - - cond_resched(); - } - -out: - drm_mm_for_each_node_safe(node, next, &mm) - drm_mm_remove_node(node); - drm_mm_takedown(&mm); - kfree(order); -err_nodes: - vfree(nodes); -} - -static int drm_mm_suite_init(struct kunit_suite *suite) -{ - while (!random_seed) - random_seed = get_random_u32(); - - kunit_info(suite, - "Testing DRM range manager, with random_seed=0x%x max_iterations=%u max_prime=%u\n", - random_seed, max_iterations, max_prime); - - return 0; -} - -module_param(random_seed, uint, 0400); -module_param(max_iterations, uint, 0400); -module_param(max_prime, uint, 0400); - static struct kunit_case drm_mm_tests[] = { KUNIT_CASE(drm_test_mm_init), KUNIT_CASE(drm_test_mm_debug), - KUNIT_CASE(drm_test_mm_reserve), - KUNIT_CASE(drm_test_mm_insert), - KUNIT_CASE(drm_test_mm_replace), - KUNIT_CASE(drm_test_mm_insert_range), - KUNIT_CASE(drm_test_mm_frag), - KUNIT_CASE(drm_test_mm_align), KUNIT_CASE(drm_test_mm_align32), KUNIT_CASE(drm_test_mm_align64), - KUNIT_CASE(drm_test_mm_evict), - KUNIT_CASE(drm_test_mm_evict_range), - KUNIT_CASE(drm_test_mm_topdown), - KUNIT_CASE(drm_test_mm_bottomup), KUNIT_CASE(drm_test_mm_lowest), KUNIT_CASE(drm_test_mm_highest), - KUNIT_CASE(drm_test_mm_color), - KUNIT_CASE(drm_test_mm_color_evict), - KUNIT_CASE(drm_test_mm_color_evict_range), {} }; static struct kunit_suite drm_mm_test_suite = { .name = "drm_mm", - .suite_init = drm_mm_suite_init, .test_cases = drm_mm_tests, }; diff --git a/drivers/gpu/drm/tidss/tidss_crtc.c b/drivers/gpu/drm/tidss/tidss_crtc.c index 5e5e466f35d1..5f838980c7a1 100644 --- a/drivers/gpu/drm/tidss/tidss_crtc.c +++ b/drivers/gpu/drm/tidss/tidss_crtc.c @@ -169,14 +169,10 @@ static void tidss_crtc_atomic_flush(struct drm_crtc *crtc, struct tidss_device *tidss = to_tidss(ddev); unsigned long flags; - dev_dbg(ddev->dev, - "%s: %s enabled %d, needs modeset %d, event %p\n", __func__, - crtc->name, drm_atomic_crtc_needs_modeset(crtc->state), - crtc->state->enable, crtc->state->event); - - /* There is nothing to do if CRTC is not going to be enabled. */ - if (!crtc->state->enable) - return; + dev_dbg(ddev->dev, "%s: %s is %sactive, %s modeset, event %p\n", + __func__, crtc->name, crtc->state->active ? "" : "not ", + drm_atomic_crtc_needs_modeset(crtc->state) ? "needs" : "doesn't need", + crtc->state->event); /* * Flush CRTC changes with go bit only if new modeset is not diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c index 9d9dee7abaef..1ad711f8d2a8 100644 --- a/drivers/gpu/drm/tidss/tidss_dispc.c +++ b/drivers/gpu/drm/tidss/tidss_dispc.c @@ -322,6 +322,60 @@ const struct dispc_features dispc_am625_feats = { .vid_order = { 1, 0 }, }; +const struct dispc_features dispc_am62a7_feats = { + /* + * if the code reaches dispc_mode_valid with VP1, + * it should return MODE_BAD. + */ + .max_pclk_khz = { + [DISPC_VP_TIED_OFF] = 0, + [DISPC_VP_DPI] = 165000, + }, + + .scaling = { + .in_width_max_5tap_rgb = 1280, + .in_width_max_3tap_rgb = 2560, + .in_width_max_5tap_yuv = 2560, + .in_width_max_3tap_yuv = 4096, + .upscale_limit = 16, + .downscale_limit_5tap = 4, + .downscale_limit_3tap = 2, + /* + * The max supported pixel inc value is 255. The value + * of pixel inc is calculated like this: 1+(xinc-1)*bpp. + * The maximum bpp of all formats supported by the HW + * is 8. So the maximum supported xinc value is 32, + * because 1+(32-1)*8 < 255 < 1+(33-1)*4. + */ + .xinc_max = 32, + }, + + .subrev = DISPC_AM62A7, + + .common = "common", + .common_regs = tidss_am65x_common_regs, + + .num_vps = 2, + .vp_name = { "vp1", "vp2" }, + .ovr_name = { "ovr1", "ovr2" }, + .vpclk_name = { "vp1", "vp2" }, + /* VP1 of the DSS in AM62A7 SoC is tied off internally */ + .vp_bus_type = { DISPC_VP_TIED_OFF, DISPC_VP_DPI }, + + .vp_feat = { .color = { + .has_ctm = true, + .gamma_size = 256, + .gamma_type = TIDSS_GAMMA_8BIT, + }, + }, + + .num_planes = 2, + /* note: vid is plane_id 0 and vidl1 is plane_id 1 */ + .vid_name = { "vid", "vidl1" }, + .vid_lite = { false, true, }, + .vid_order = { 1, 0 }, +}; + static const u16 *dispc_common_regmap; struct dss_vp_data { @@ -824,6 +878,7 @@ dispc_irq_t dispc_read_and_clear_irqstatus(struct dispc_device *dispc) case DISPC_K2G: return dispc_k2g_read_and_clear_irqstatus(dispc); case DISPC_AM625: + case DISPC_AM62A7: case DISPC_AM65X: case DISPC_J721E: return dispc_k3_read_and_clear_irqstatus(dispc); @@ -840,6 +895,7 @@ void dispc_set_irqenable(struct dispc_device *dispc, dispc_irq_t mask) dispc_k2g_set_irqenable(dispc, mask); break; case DISPC_AM625: + case DISPC_AM62A7: case DISPC_AM65X: case DISPC_J721E: dispc_k3_set_irqenable(dispc, mask); @@ -1331,6 +1387,7 @@ void dispc_ovr_set_plane(struct dispc_device *dispc, u32 hw_plane, x, y, layer); break; case DISPC_AM625: + case DISPC_AM62A7: case DISPC_AM65X: dispc_am65x_ovr_set_plane(dispc, hw_plane, hw_videoport, x, y, layer); @@ -2250,6 +2307,7 @@ static void dispc_plane_init(struct dispc_device *dispc) dispc_k2g_plane_init(dispc); break; case DISPC_AM625: + case DISPC_AM62A7: case DISPC_AM65X: case DISPC_J721E: dispc_k3_plane_init(dispc); @@ -2357,6 +2415,7 @@ static void dispc_vp_write_gamma_table(struct dispc_device *dispc, dispc_k2g_vp_write_gamma_table(dispc, hw_videoport); break; case DISPC_AM625: + case DISPC_AM62A7: case DISPC_AM65X: dispc_am65x_vp_write_gamma_table(dispc, hw_videoport); break; @@ -2702,18 +2761,83 @@ static void dispc_init_errata(struct dispc_device *dispc) } } -static void dispc_softreset(struct dispc_device *dispc) +/* + * K2G display controller does not support soft reset, so we do a basic manual + * reset here: make sure the IRQs are masked and VPs are disabled. + */ +static void dispc_softreset_k2g(struct dispc_device *dispc) +{ + dispc_set_irqenable(dispc, 0); + dispc_read_and_clear_irqstatus(dispc); + + for (unsigned int vp_idx = 0; vp_idx < dispc->feat->num_vps; ++vp_idx) + VP_REG_FLD_MOD(dispc, vp_idx, DISPC_VP_CONTROL, 0, 0, 0); +} + +static int dispc_softreset(struct dispc_device *dispc) { u32 val; - int ret = 0; + int ret; + + if (dispc->feat->subrev == DISPC_K2G) { + dispc_softreset_k2g(dispc); + return 0; + } /* Soft reset */ REG_FLD_MOD(dispc, DSS_SYSCONFIG, 1, 1, 1); /* Wait for reset to complete */ ret = readl_poll_timeout(dispc->base_common + DSS_SYSSTATUS, val, val & 1, 100, 5000); + if (ret) { + dev_err(dispc->dev, "failed to reset dispc\n"); + return ret; + } + + return 0; +} + +static int dispc_init_hw(struct dispc_device *dispc) +{ + struct device *dev = dispc->dev; + int ret; + + ret = pm_runtime_set_active(dev); + if (ret) { + dev_err(dev, "Failed to set DSS PM to active\n"); + return ret; + } + + ret = clk_prepare_enable(dispc->fclk); + if (ret) { + dev_err(dev, "Failed to enable DSS fclk\n"); + goto err_runtime_suspend; + } + + ret = dispc_softreset(dispc); if (ret) - dev_warn(dispc->dev, "failed to reset dispc\n"); + goto err_clk_disable; + + clk_disable_unprepare(dispc->fclk); + ret = pm_runtime_set_suspended(dev); + if (ret) { + dev_err(dev, "Failed to set DSS PM to suspended\n"); + return ret; + } + + return 0; + +err_clk_disable: + clk_disable_unprepare(dispc->fclk); + +err_runtime_suspend: + ret = pm_runtime_set_suspended(dev); + if (ret) { + dev_err(dev, "Failed to set DSS PM to suspended\n"); + return ret; + } + + return ret; } int dispc_init(struct tidss_device *tidss) @@ -2777,10 +2901,6 @@ int dispc_init(struct tidss_device *tidss) return r; } - /* K2G display controller does not support soft reset */ - if (feat->subrev != DISPC_K2G) - dispc_softreset(dispc); - for (i = 0; i < dispc->feat->num_vps; i++) { u32 gamma_size = dispc->feat->vp_feat.color.gamma_size; u32 *gamma_table; @@ -2829,6 +2949,10 @@ int dispc_init(struct tidss_device *tidss) of_property_read_u32(dispc->dev->of_node, "max-memory-bandwidth", &dispc->memory_bandwidth_limit); + r = dispc_init_hw(dispc); + if (r) + return r; + tidss->dispc = dispc; return 0; diff --git a/drivers/gpu/drm/tidss/tidss_dispc.h b/drivers/gpu/drm/tidss/tidss_dispc.h index 33ac5ad7a423..086327d51a90 100644 --- a/drivers/gpu/drm/tidss/tidss_dispc.h +++ b/drivers/gpu/drm/tidss/tidss_dispc.h @@ -54,12 +54,14 @@ enum dispc_vp_bus_type { DISPC_VP_DPI, /* DPI output */ DISPC_VP_OLDI, /* OLDI (LVDS) output */ DISPC_VP_INTERNAL, /* SoC internal routing */ + DISPC_VP_TIED_OFF, /* Tied off / Unavailable */ DISPC_VP_MAX_BUS_TYPE, }; enum dispc_dss_subrevision { DISPC_K2G, DISPC_AM625, + DISPC_AM62A7, DISPC_AM65X, DISPC_J721E, }; @@ -88,6 +90,7 @@ struct dispc_features { extern const struct dispc_features dispc_k2g_feats; extern const struct dispc_features dispc_am625_feats; +extern const struct dispc_features dispc_am62a7_feats; extern const struct dispc_features dispc_am65x_feats; extern const struct dispc_features dispc_j721e_feats; diff --git a/drivers/gpu/drm/tidss/tidss_drv.c b/drivers/gpu/drm/tidss/tidss_drv.c index 4d063eb9cd0b..d15f836dca95 100644 --- a/drivers/gpu/drm/tidss/tidss_drv.c +++ b/drivers/gpu/drm/tidss/tidss_drv.c @@ -32,9 +32,9 @@ int tidss_runtime_get(struct tidss_device *tidss) dev_dbg(tidss->dev, "%s\n", __func__); - r = pm_runtime_get_sync(tidss->dev); + r = pm_runtime_resume_and_get(tidss->dev); WARN_ON(r < 0); - return r < 0 ? r : 0; + return r; } void tidss_runtime_put(struct tidss_device *tidss) @@ -43,7 +43,9 @@ void tidss_runtime_put(struct tidss_device *tidss) dev_dbg(tidss->dev, "%s\n", __func__); - r = pm_runtime_put_sync(tidss->dev); + pm_runtime_mark_last_busy(tidss->dev); + + r = pm_runtime_put_autosuspend(tidss->dev); WARN_ON(r < 0); } @@ -136,6 +138,8 @@ static int tidss_probe(struct platform_device *pdev) platform_set_drvdata(pdev, tidss); + spin_lock_init(&tidss->wait_lock); + ret = dispc_init(tidss); if (ret) { dev_err(dev, "failed to initialize dispc: %d\n", ret); @@ -144,6 +148,9 @@ static int tidss_probe(struct platform_device *pdev) pm_runtime_enable(dev); + pm_runtime_set_autosuspend_delay(dev, 1000); + pm_runtime_use_autosuspend(dev); + #ifndef CONFIG_PM /* If we don't have PM, we need to call resume manually */ dispc_runtime_resume(tidss->dispc); @@ -192,6 +199,7 @@ err_runtime_suspend: #ifndef CONFIG_PM dispc_runtime_suspend(tidss->dispc); #endif + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); return ret; @@ -215,6 +223,7 @@ static void tidss_remove(struct platform_device *pdev) /* If we don't have PM, we need to call suspend manually */ dispc_runtime_suspend(tidss->dispc); #endif + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); /* devm allocated dispc goes away with the dev so mark it NULL */ @@ -231,6 +240,7 @@ static void tidss_shutdown(struct platform_device *pdev) static const struct of_device_id tidss_of_table[] = { { .compatible = "ti,k2g-dss", .data = &dispc_k2g_feats, }, { .compatible = "ti,am625-dss", .data = &dispc_am625_feats, }, + { .compatible = "ti,am62a7-dss", .data = &dispc_am62a7_feats, }, { .compatible = "ti,am65x-dss", .data = &dispc_am65x_feats, }, { .compatible = "ti,j721e-dss", .data = &dispc_j721e_feats, }, { } diff --git a/drivers/gpu/drm/tidss/tidss_irq.c b/drivers/gpu/drm/tidss/tidss_irq.c index 0c681c7600bc..604334ef526a 100644 --- a/drivers/gpu/drm/tidss/tidss_irq.c +++ b/drivers/gpu/drm/tidss/tidss_irq.c @@ -93,33 +93,21 @@ void tidss_irq_resume(struct tidss_device *tidss) spin_unlock_irqrestore(&tidss->wait_lock, flags); } -static void tidss_irq_preinstall(struct drm_device *ddev) -{ - struct tidss_device *tidss = to_tidss(ddev); - - spin_lock_init(&tidss->wait_lock); - - tidss_runtime_get(tidss); - - dispc_set_irqenable(tidss->dispc, 0); - dispc_read_and_clear_irqstatus(tidss->dispc); - - tidss_runtime_put(tidss); -} - -static void tidss_irq_postinstall(struct drm_device *ddev) +int tidss_irq_install(struct drm_device *ddev, unsigned int irq) { struct tidss_device *tidss = to_tidss(ddev); - unsigned long flags; - unsigned int i; + int ret; - tidss_runtime_get(tidss); + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; - spin_lock_irqsave(&tidss->wait_lock, flags); + ret = request_irq(irq, tidss_irq_handler, 0, ddev->driver->name, ddev); + if (ret) + return ret; tidss->irq_mask = DSS_IRQ_DEVICE_OCP_ERR; - for (i = 0; i < tidss->num_crtcs; ++i) { + for (unsigned int i = 0; i < tidss->num_crtcs; ++i) { struct tidss_crtc *tcrtc = to_tidss_crtc(tidss->crtcs[i]); tidss->irq_mask |= DSS_IRQ_VP_SYNC_LOST(tcrtc->hw_videoport); @@ -127,28 +115,6 @@ static void tidss_irq_postinstall(struct drm_device *ddev) tidss->irq_mask |= DSS_IRQ_VP_FRAME_DONE(tcrtc->hw_videoport); } - tidss_irq_update(tidss); - - spin_unlock_irqrestore(&tidss->wait_lock, flags); - - tidss_runtime_put(tidss); -} - -int tidss_irq_install(struct drm_device *ddev, unsigned int irq) -{ - int ret; - - if (irq == IRQ_NOTCONNECTED) - return -ENOTCONN; - - tidss_irq_preinstall(ddev); - - ret = request_irq(irq, tidss_irq_handler, 0, ddev->driver->name, ddev); - if (ret) - return ret; - - tidss_irq_postinstall(ddev); - return 0; } @@ -156,9 +122,5 @@ void tidss_irq_uninstall(struct drm_device *ddev) { struct tidss_device *tidss = to_tidss(ddev); - tidss_runtime_get(tidss); - dispc_set_irqenable(tidss->dispc, 0); - tidss_runtime_put(tidss); - free_irq(tidss->irq, ddev); } diff --git a/drivers/gpu/drm/tidss/tidss_kms.c b/drivers/gpu/drm/tidss/tidss_kms.c index c979ad1af236..a0e494c806a9 100644 --- a/drivers/gpu/drm/tidss/tidss_kms.c +++ b/drivers/gpu/drm/tidss/tidss_kms.c @@ -4,8 +4,6 @@ * Author: Tomi Valkeinen <tomi.valkeinen@ti.com> */ -#include <linux/dma-fence.h> - #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_bridge.h> @@ -25,18 +23,16 @@ static void tidss_atomic_commit_tail(struct drm_atomic_state *old_state) { struct drm_device *ddev = old_state->dev; struct tidss_device *tidss = to_tidss(ddev); - bool fence_cookie = dma_fence_begin_signalling(); dev_dbg(ddev->dev, "%s\n", __func__); tidss_runtime_get(tidss); drm_atomic_helper_commit_modeset_disables(ddev, old_state); - drm_atomic_helper_commit_planes(ddev, old_state, 0); + drm_atomic_helper_commit_planes(ddev, old_state, DRM_PLANE_COMMIT_ACTIVE_ONLY); drm_atomic_helper_commit_modeset_enables(ddev, old_state); drm_atomic_helper_commit_hw_done(old_state); - dma_fence_end_signalling(fence_cookie); drm_atomic_helper_wait_for_flip_done(ddev, old_state); drm_atomic_helper_cleanup_planes(ddev, old_state); diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c index 8ebd7134ee21..23bf16f596f6 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c @@ -138,7 +138,7 @@ static int tilcdc_irq_install(struct drm_device *dev, unsigned int irq) if (ret) return ret; - priv->irq_enabled = false; + priv->irq_enabled = true; return 0; } @@ -570,19 +570,18 @@ static int tilcdc_pdev_probe(struct platform_device *pdev) match); } -static int tilcdc_pdev_remove(struct platform_device *pdev) +static void tilcdc_pdev_remove(struct platform_device *pdev) { int ret; ret = tilcdc_get_external_components(&pdev->dev, NULL); if (ret < 0) - return ret; + dev_err(&pdev->dev, "tilcdc_get_external_components() failed (%pe)\n", + ERR_PTR(ret)); else if (ret == 0) tilcdc_fini(platform_get_drvdata(pdev)); else component_master_del(&pdev->dev, &tilcdc_comp_ops); - - return 0; } static void tilcdc_pdev_shutdown(struct platform_device *pdev) @@ -599,7 +598,7 @@ MODULE_DEVICE_TABLE(of, tilcdc_of_match); static struct platform_driver tilcdc_platform_driver = { .probe = tilcdc_pdev_probe, - .remove = tilcdc_pdev_remove, + .remove_new = tilcdc_pdev_remove, .shutdown = tilcdc_pdev_shutdown, .driver = { .name = "tilcdc", diff --git a/drivers/gpu/drm/tiny/arcpgu.c b/drivers/gpu/drm/tiny/arcpgu.c index e5b10e41554a..4f8f3172379e 100644 --- a/drivers/gpu/drm/tiny/arcpgu.c +++ b/drivers/gpu/drm/tiny/arcpgu.c @@ -404,14 +404,12 @@ err_unload: return ret; } -static int arcpgu_remove(struct platform_device *pdev) +static void arcpgu_remove(struct platform_device *pdev) { struct drm_device *drm = platform_get_drvdata(pdev); drm_dev_unregister(drm); arcpgu_unload(drm); - - return 0; } static const struct of_device_id arcpgu_of_table[] = { @@ -423,7 +421,7 @@ MODULE_DEVICE_TABLE(of, arcpgu_of_table); static struct platform_driver arcpgu_platform_driver = { .probe = arcpgu_probe, - .remove = arcpgu_remove, + .remove_new = arcpgu_remove, .driver = { .name = "arcpgu", .of_match_table = arcpgu_of_table, diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c index c5c34cd2edc1..4e3a152f897a 100644 --- a/drivers/gpu/drm/tiny/cirrus.c +++ b/drivers/gpu/drm/tiny/cirrus.c @@ -411,7 +411,8 @@ static void cirrus_primary_plane_helper_atomic_update(struct drm_plane *plane, unsigned int offset = drm_fb_clip_offset(pitch, format, &damage); struct iosys_map dst = IOSYS_MAP_INIT_OFFSET(&vaddr, offset); - drm_fb_blit(&dst, &pitch, format->format, shadow_plane_state->data, fb, &damage); + drm_fb_blit(&dst, &pitch, format->format, shadow_plane_state->data, fb, + &damage, &shadow_plane_state->fmtcnv_state); } drm_dev_exit(idx); diff --git a/drivers/gpu/drm/tiny/ili9225.c b/drivers/gpu/drm/tiny/ili9225.c index 4ceb68ffac4b..dd8b0a181be9 100644 --- a/drivers/gpu/drm/tiny/ili9225.c +++ b/drivers/gpu/drm/tiny/ili9225.c @@ -78,7 +78,7 @@ static inline int ili9225_command(struct mipi_dbi *dbi, u8 cmd, u16 data) } static void ili9225_fb_dirty(struct iosys_map *src, struct drm_framebuffer *fb, - struct drm_rect *rect) + struct drm_rect *rect, struct drm_format_conv_state *fmtcnv_state) { struct mipi_dbi_dev *dbidev = drm_to_mipi_dbi_dev(fb->dev); unsigned int height = rect->y2 - rect->y1; @@ -98,7 +98,7 @@ static void ili9225_fb_dirty(struct iosys_map *src, struct drm_framebuffer *fb, if (!dbi->dc || !full || swap || fb->format->format == DRM_FORMAT_XRGB8888) { tr = dbidev->tx_buf; - ret = mipi_dbi_buf_copy(tr, src, fb, rect, swap); + ret = mipi_dbi_buf_copy(tr, src, fb, rect, swap, fmtcnv_state); if (ret) goto err_msg; } else { @@ -171,7 +171,8 @@ static void ili9225_pipe_update(struct drm_simple_display_pipe *pipe, return; if (drm_atomic_helper_damage_merged(old_state, state, &rect)) - ili9225_fb_dirty(&shadow_plane_state->data[0], fb, &rect); + ili9225_fb_dirty(&shadow_plane_state->data[0], fb, &rect, + &shadow_plane_state->fmtcnv_state); drm_dev_exit(idx); } @@ -281,7 +282,8 @@ static void ili9225_pipe_enable(struct drm_simple_display_pipe *pipe, ili9225_command(dbi, ILI9225_DISPLAY_CONTROL_1, 0x1017); - ili9225_fb_dirty(&shadow_plane_state->data[0], fb, &rect); + ili9225_fb_dirty(&shadow_plane_state->data[0], fb, &rect, + &shadow_plane_state->fmtcnv_state); out_exit: drm_dev_exit(idx); diff --git a/drivers/gpu/drm/tiny/ofdrm.c b/drivers/gpu/drm/tiny/ofdrm.c index 2d999a0facde..ab89b7fc7bf6 100644 --- a/drivers/gpu/drm/tiny/ofdrm.c +++ b/drivers/gpu/drm/tiny/ofdrm.c @@ -19,7 +19,6 @@ #include <drm/drm_gem_shmem_helper.h> #include <drm/drm_managed.h> #include <drm/drm_modeset_helper_vtables.h> -#include <drm/drm_plane_helper.h> #include <drm/drm_probe_helper.h> #include <drm/drm_simple_kms_helper.h> @@ -758,7 +757,11 @@ static const uint64_t ofdrm_primary_plane_format_modifiers[] = { static int ofdrm_primary_plane_helper_atomic_check(struct drm_plane *plane, struct drm_atomic_state *new_state) { + struct drm_device *dev = plane->dev; + struct ofdrm_device *odev = ofdrm_device_of_dev(dev); struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(new_state, plane); + struct drm_shadow_plane_state *new_shadow_plane_state = + to_drm_shadow_plane_state(new_plane_state); struct drm_framebuffer *new_fb = new_plane_state->fb; struct drm_crtc *new_crtc = new_plane_state->crtc; struct drm_crtc_state *new_crtc_state = NULL; @@ -777,6 +780,16 @@ static int ofdrm_primary_plane_helper_atomic_check(struct drm_plane *plane, else if (!new_plane_state->visible) return 0; + if (new_fb->format != odev->format) { + void *buf; + + /* format conversion necessary; reserve buffer */ + buf = drm_format_conv_state_reserve(&new_shadow_plane_state->fmtcnv_state, + odev->pitch, GFP_KERNEL); + if (!buf) + return -ENOMEM; + } + new_crtc_state = drm_atomic_get_new_crtc_state(new_state, new_plane_state->crtc); new_ofdrm_crtc_state = to_ofdrm_crtc_state(new_crtc_state); @@ -817,7 +830,7 @@ static void ofdrm_primary_plane_helper_atomic_update(struct drm_plane *plane, iosys_map_incr(&dst, drm_fb_clip_offset(dst_pitch, dst_format, &dst_clip)); drm_fb_blit(&dst, &dst_pitch, dst_format->format, shadow_plane_state->data, fb, - &damage); + &damage, &shadow_plane_state->fmtcnv_state); } drm_dev_exit(idx); diff --git a/drivers/gpu/drm/tiny/repaper.c b/drivers/gpu/drm/tiny/repaper.c index 73dd4f4289c2..8fd6758f5725 100644 --- a/drivers/gpu/drm/tiny/repaper.c +++ b/drivers/gpu/drm/tiny/repaper.c @@ -509,7 +509,8 @@ static void repaper_get_temperature(struct repaper_epd *epd) epd->factored_stage_time = epd->stage_time * factor10x / 10; } -static int repaper_fb_dirty(struct drm_framebuffer *fb) +static int repaper_fb_dirty(struct drm_framebuffer *fb, + struct drm_format_conv_state *fmtcnv_state) { struct drm_gem_dma_object *dma_obj = drm_fb_dma_get_gem_obj(fb, 0); struct repaper_epd *epd = drm_to_epd(fb->dev); @@ -545,7 +546,7 @@ static int repaper_fb_dirty(struct drm_framebuffer *fb) iosys_map_set_vaddr(&dst, buf); iosys_map_set_vaddr(&vmap, dma_obj->vaddr); - drm_fb_xrgb8888_to_mono(&dst, &dst_pitch, &vmap, fb, &clip); + drm_fb_xrgb8888_to_mono(&dst, &dst_pitch, &vmap, fb, &clip, fmtcnv_state); drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); @@ -830,13 +831,16 @@ static void repaper_pipe_update(struct drm_simple_display_pipe *pipe, struct drm_plane_state *old_state) { struct drm_plane_state *state = pipe->plane.state; + struct drm_format_conv_state fmtcnv_state = DRM_FORMAT_CONV_STATE_INIT; struct drm_rect rect; if (!pipe->crtc.state->active) return; if (drm_atomic_helper_damage_merged(old_state, state, &rect)) - repaper_fb_dirty(state->fb); + repaper_fb_dirty(state->fb, &fmtcnv_state); + + drm_format_conv_state_release(&fmtcnv_state); } static const struct drm_simple_display_pipe_funcs repaper_pipe_funcs = { diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c index 5fefc895bca2..7ce1c4617675 100644 --- a/drivers/gpu/drm/tiny/simpledrm.c +++ b/drivers/gpu/drm/tiny/simpledrm.c @@ -19,12 +19,12 @@ #include <drm/drm_drv.h> #include <drm/drm_fbdev_generic.h> #include <drm/drm_format_helper.h> +#include <drm/drm_framebuffer.h> #include <drm/drm_gem_atomic_helper.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_gem_shmem_helper.h> #include <drm/drm_managed.h> #include <drm/drm_modeset_helper_vtables.h> -#include <drm/drm_plane_helper.h> #include <drm/drm_probe_helper.h> #define DRIVER_NAME "simpledrm" @@ -579,6 +579,44 @@ static const uint64_t simpledrm_primary_plane_format_modifiers[] = { DRM_FORMAT_MOD_INVALID }; +static int simpledrm_primary_plane_helper_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); + struct drm_shadow_plane_state *new_shadow_plane_state = + to_drm_shadow_plane_state(new_plane_state); + struct drm_framebuffer *new_fb = new_plane_state->fb; + struct drm_crtc *new_crtc = new_plane_state->crtc; + struct drm_crtc_state *new_crtc_state = NULL; + struct drm_device *dev = plane->dev; + struct simpledrm_device *sdev = simpledrm_device_of_dev(dev); + int ret; + + if (new_crtc) + new_crtc_state = drm_atomic_get_new_crtc_state(state, new_crtc); + + ret = drm_atomic_helper_check_plane_state(new_plane_state, new_crtc_state, + DRM_PLANE_NO_SCALING, + DRM_PLANE_NO_SCALING, + false, false); + if (ret) + return ret; + else if (!new_plane_state->visible) + return 0; + + if (new_fb->format != sdev->format) { + void *buf; + + /* format conversion necessary; reserve buffer */ + buf = drm_format_conv_state_reserve(&new_shadow_plane_state->fmtcnv_state, + sdev->pitch, GFP_KERNEL); + if (!buf) + return -ENOMEM; + } + + return 0; +} + static void simpledrm_primary_plane_helper_atomic_update(struct drm_plane *plane, struct drm_atomic_state *state) { @@ -609,7 +647,7 @@ static void simpledrm_primary_plane_helper_atomic_update(struct drm_plane *plane iosys_map_incr(&dst, drm_fb_clip_offset(sdev->pitch, sdev->format, &dst_clip)); drm_fb_blit(&dst, &sdev->pitch, sdev->format->format, shadow_plane_state->data, - fb, &damage); + fb, &damage, &shadow_plane_state->fmtcnv_state); } drm_dev_exit(idx); @@ -635,7 +673,7 @@ static void simpledrm_primary_plane_helper_atomic_disable(struct drm_plane *plan static const struct drm_plane_helper_funcs simpledrm_primary_plane_helper_funcs = { DRM_GEM_SHADOW_PLANE_HELPER_FUNCS, - .atomic_check = drm_plane_helper_atomic_check, + .atomic_check = simpledrm_primary_plane_helper_atomic_check, .atomic_update = simpledrm_primary_plane_helper_atomic_update, .atomic_disable = simpledrm_primary_plane_helper_atomic_disable, }; diff --git a/drivers/gpu/drm/tiny/st7586.c b/drivers/gpu/drm/tiny/st7586.c index 3cf4eec16a81..7336fa1ddaed 100644 --- a/drivers/gpu/drm/tiny/st7586.c +++ b/drivers/gpu/drm/tiny/st7586.c @@ -64,7 +64,8 @@ static const u8 st7586_lookup[] = { 0x7, 0x4, 0x2, 0x0 }; static void st7586_xrgb8888_to_gray332(u8 *dst, void *vaddr, struct drm_framebuffer *fb, - struct drm_rect *clip) + struct drm_rect *clip, + struct drm_format_conv_state *fmtcnv_state) { size_t len = (clip->x2 - clip->x1) * (clip->y2 - clip->y1); unsigned int x, y; @@ -77,7 +78,7 @@ static void st7586_xrgb8888_to_gray332(u8 *dst, void *vaddr, iosys_map_set_vaddr(&dst_map, buf); iosys_map_set_vaddr(&vmap, vaddr); - drm_fb_xrgb8888_to_gray8(&dst_map, NULL, &vmap, fb, clip); + drm_fb_xrgb8888_to_gray8(&dst_map, NULL, &vmap, fb, clip, fmtcnv_state); src = buf; for (y = clip->y1; y < clip->y2; y++) { @@ -93,7 +94,7 @@ static void st7586_xrgb8888_to_gray332(u8 *dst, void *vaddr, } static int st7586_buf_copy(void *dst, struct iosys_map *src, struct drm_framebuffer *fb, - struct drm_rect *clip) + struct drm_rect *clip, struct drm_format_conv_state *fmtcnv_state) { int ret; @@ -101,7 +102,7 @@ static int st7586_buf_copy(void *dst, struct iosys_map *src, struct drm_framebuf if (ret) return ret; - st7586_xrgb8888_to_gray332(dst, src->vaddr, fb, clip); + st7586_xrgb8888_to_gray332(dst, src->vaddr, fb, clip, fmtcnv_state); drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); @@ -109,7 +110,7 @@ static int st7586_buf_copy(void *dst, struct iosys_map *src, struct drm_framebuf } static void st7586_fb_dirty(struct iosys_map *src, struct drm_framebuffer *fb, - struct drm_rect *rect) + struct drm_rect *rect, struct drm_format_conv_state *fmtcnv_state) { struct mipi_dbi_dev *dbidev = drm_to_mipi_dbi_dev(fb->dev); struct mipi_dbi *dbi = &dbidev->dbi; @@ -121,7 +122,7 @@ static void st7586_fb_dirty(struct iosys_map *src, struct drm_framebuffer *fb, DRM_DEBUG_KMS("Flushing [FB:%d] " DRM_RECT_FMT "\n", fb->base.id, DRM_RECT_ARG(rect)); - ret = st7586_buf_copy(dbidev->tx_buf, src, fb, rect); + ret = st7586_buf_copy(dbidev->tx_buf, src, fb, rect, fmtcnv_state); if (ret) goto err_msg; @@ -160,7 +161,8 @@ static void st7586_pipe_update(struct drm_simple_display_pipe *pipe, return; if (drm_atomic_helper_damage_merged(old_state, state, &rect)) - st7586_fb_dirty(&shadow_plane_state->data[0], fb, &rect); + st7586_fb_dirty(&shadow_plane_state->data[0], fb, &rect, + &shadow_plane_state->fmtcnv_state); drm_dev_exit(idx); } @@ -238,7 +240,8 @@ static void st7586_pipe_enable(struct drm_simple_display_pipe *pipe, msleep(100); - st7586_fb_dirty(&shadow_plane_state->data[0], fb, &rect); + st7586_fb_dirty(&shadow_plane_state->data[0], fb, &rect, + &shadow_plane_state->fmtcnv_state); mipi_dbi_command(dbi, MIPI_DCS_SET_DISPLAY_ON); out_exit: diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index e58b7e249816..edf10618fe2b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -370,7 +370,13 @@ static void ttm_bo_release(struct kref *kref) spin_unlock(&bo->bdev->lru_lock); INIT_WORK(&bo->delayed_delete, ttm_bo_delayed_delete); - queue_work(bdev->wq, &bo->delayed_delete); + + /* Schedule the worker on the closest NUMA node. This + * improves performance since system memory might be + * cleared on free and that is best done on a CPU core + * close to it. + */ + queue_work_node(bdev->pool.nid, bdev->wq, &bo->delayed_delete); return; } diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index d48b39132b32..f5187b384ae9 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -204,7 +204,8 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func if (ret) return ret; - bdev->wq = alloc_workqueue("ttm", WQ_MEM_RECLAIM | WQ_HIGHPRI, 16); + bdev->wq = alloc_workqueue("ttm", + WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 16); if (!bdev->wq) { ttm_global_release(); return -ENOMEM; @@ -213,7 +214,8 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func bdev->funcs = funcs; ttm_sys_man_init(bdev); - ttm_pool_init(&bdev->pool, dev, NUMA_NO_NODE, use_dma_alloc, use_dma32); + + ttm_pool_init(&bdev->pool, dev, dev_to_node(dev), use_dma_alloc, use_dma32); bdev->vma_manager = vma_manager; spin_lock_init(&bdev->lru_lock); diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c index 40876bcdd79a..7702359c90c2 100644 --- a/drivers/gpu/drm/udl/udl_modeset.c +++ b/drivers/gpu/drm/udl/udl_modeset.c @@ -21,7 +21,6 @@ #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_gem_shmem_helper.h> #include <drm/drm_modeset_helper_vtables.h> -#include <drm/drm_plane_helper.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> @@ -261,6 +260,22 @@ static const uint64_t udl_primary_plane_fmtmods[] = { DRM_FORMAT_MOD_INVALID }; +static int udl_primary_plane_helper_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); + struct drm_crtc *new_crtc = new_plane_state->crtc; + struct drm_crtc_state *new_crtc_state = NULL; + + if (new_crtc) + new_crtc_state = drm_atomic_get_new_crtc_state(state, new_crtc); + + return drm_atomic_helper_check_plane_state(new_plane_state, new_crtc_state, + DRM_PLANE_NO_SCALING, + DRM_PLANE_NO_SCALING, + false, false); +} + static void udl_primary_plane_helper_atomic_update(struct drm_plane *plane, struct drm_atomic_state *state) { @@ -296,7 +311,7 @@ out_drm_gem_fb_end_cpu_access: static const struct drm_plane_helper_funcs udl_primary_plane_helper_funcs = { DRM_GEM_SHADOW_PLANE_HELPER_FUNCS, - .atomic_check = drm_plane_helper_atomic_check, + .atomic_check = udl_primary_plane_helper_atomic_check, .atomic_update = udl_primary_plane_helper_atomic_update, }; diff --git a/drivers/gpu/drm/v3d/Makefile b/drivers/gpu/drm/v3d/Makefile index e8b314137020..b7d673f1153b 100644 --- a/drivers/gpu/drm/v3d/Makefile +++ b/drivers/gpu/drm/v3d/Makefile @@ -11,7 +11,9 @@ v3d-y := \ v3d_mmu.o \ v3d_perfmon.o \ v3d_trace_points.o \ - v3d_sched.o + v3d_sched.o \ + v3d_sysfs.o \ + v3d_submit.o v3d-$(CONFIG_DEBUG_FS) += v3d_debugfs.o diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c index 8b3229a37c6d..1bdfac8beafd 100644 --- a/drivers/gpu/drm/v3d/v3d_bo.c +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -33,6 +33,9 @@ void v3d_free_object(struct drm_gem_object *obj) struct v3d_dev *v3d = to_v3d_dev(obj->dev); struct v3d_bo *bo = to_v3d_bo(obj); + if (bo->vaddr) + v3d_put_bo_vaddr(bo); + v3d_mmu_remove_ptes(bo); mutex_lock(&v3d->bo_lock); @@ -134,6 +137,7 @@ struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv, if (IS_ERR(shmem_obj)) return ERR_CAST(shmem_obj); bo = to_v3d_bo(&shmem_obj->base); + bo->vaddr = NULL; ret = v3d_bo_create_finish(&shmem_obj->base); if (ret) @@ -167,6 +171,20 @@ v3d_prime_import_sg_table(struct drm_device *dev, return obj; } +void v3d_get_bo_vaddr(struct v3d_bo *bo) +{ + struct drm_gem_shmem_object *obj = &bo->base; + + bo->vaddr = vmap(obj->pages, obj->base.size >> PAGE_SHIFT, VM_MAP, + pgprot_writecombine(PAGE_KERNEL)); +} + +void v3d_put_bo_vaddr(struct v3d_bo *bo) +{ + vunmap(bo->vaddr); + bo->vaddr = NULL; +} + int v3d_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { @@ -233,3 +251,36 @@ int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data, drm_gem_object_put(gem_obj); return 0; } + +int +v3d_wait_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + int ret; + struct drm_v3d_wait_bo *args = data; + ktime_t start = ktime_get(); + u64 delta_ns; + unsigned long timeout_jiffies = + nsecs_to_jiffies_timeout(args->timeout_ns); + + if (args->pad != 0) + return -EINVAL; + + ret = drm_gem_dma_resv_wait(file_priv, args->handle, + true, timeout_jiffies); + + /* Decrement the user's timeout, in case we got interrupted + * such that the ioctl will be restarted. + */ + delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start)); + if (delta_ns < args->timeout_ns) + args->timeout_ns -= delta_ns; + else + args->timeout_ns = 0; + + /* Asked to wait beyond the jiffie/scheduler precision? */ + if (ret == -ETIME && args->timeout_ns) + ret = -EAGAIN; + + return ret; +} diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c index 330669f51fa7..f843a50d5dce 100644 --- a/drivers/gpu/drm/v3d/v3d_debugfs.c +++ b/drivers/gpu/drm/v3d/v3d_debugfs.c @@ -12,69 +12,83 @@ #include "v3d_drv.h" #include "v3d_regs.h" -#define REGDEF(reg) { reg, #reg } +#define REGDEF(min_ver, max_ver, reg) { min_ver, max_ver, reg, #reg } struct v3d_reg_def { + u32 min_ver; + u32 max_ver; u32 reg; const char *name; }; static const struct v3d_reg_def v3d_hub_reg_defs[] = { - REGDEF(V3D_HUB_AXICFG), - REGDEF(V3D_HUB_UIFCFG), - REGDEF(V3D_HUB_IDENT0), - REGDEF(V3D_HUB_IDENT1), - REGDEF(V3D_HUB_IDENT2), - REGDEF(V3D_HUB_IDENT3), - REGDEF(V3D_HUB_INT_STS), - REGDEF(V3D_HUB_INT_MSK_STS), - - REGDEF(V3D_MMU_CTL), - REGDEF(V3D_MMU_VIO_ADDR), - REGDEF(V3D_MMU_VIO_ID), - REGDEF(V3D_MMU_DEBUG_INFO), + REGDEF(33, 42, V3D_HUB_AXICFG), + REGDEF(33, 71, V3D_HUB_UIFCFG), + REGDEF(33, 71, V3D_HUB_IDENT0), + REGDEF(33, 71, V3D_HUB_IDENT1), + REGDEF(33, 71, V3D_HUB_IDENT2), + REGDEF(33, 71, V3D_HUB_IDENT3), + REGDEF(33, 71, V3D_HUB_INT_STS), + REGDEF(33, 71, V3D_HUB_INT_MSK_STS), + + REGDEF(33, 71, V3D_MMU_CTL), + REGDEF(33, 71, V3D_MMU_VIO_ADDR), + REGDEF(33, 71, V3D_MMU_VIO_ID), + REGDEF(33, 71, V3D_MMU_DEBUG_INFO), + + REGDEF(71, 71, V3D_GMP_STATUS(71)), + REGDEF(71, 71, V3D_GMP_CFG(71)), + REGDEF(71, 71, V3D_GMP_VIO_ADDR(71)), }; static const struct v3d_reg_def v3d_gca_reg_defs[] = { - REGDEF(V3D_GCA_SAFE_SHUTDOWN), - REGDEF(V3D_GCA_SAFE_SHUTDOWN_ACK), + REGDEF(33, 33, V3D_GCA_SAFE_SHUTDOWN), + REGDEF(33, 33, V3D_GCA_SAFE_SHUTDOWN_ACK), }; static const struct v3d_reg_def v3d_core_reg_defs[] = { - REGDEF(V3D_CTL_IDENT0), - REGDEF(V3D_CTL_IDENT1), - REGDEF(V3D_CTL_IDENT2), - REGDEF(V3D_CTL_MISCCFG), - REGDEF(V3D_CTL_INT_STS), - REGDEF(V3D_CTL_INT_MSK_STS), - REGDEF(V3D_CLE_CT0CS), - REGDEF(V3D_CLE_CT0CA), - REGDEF(V3D_CLE_CT0EA), - REGDEF(V3D_CLE_CT1CS), - REGDEF(V3D_CLE_CT1CA), - REGDEF(V3D_CLE_CT1EA), - - REGDEF(V3D_PTB_BPCA), - REGDEF(V3D_PTB_BPCS), - - REGDEF(V3D_GMP_STATUS), - REGDEF(V3D_GMP_CFG), - REGDEF(V3D_GMP_VIO_ADDR), - - REGDEF(V3D_ERR_FDBGO), - REGDEF(V3D_ERR_FDBGB), - REGDEF(V3D_ERR_FDBGS), - REGDEF(V3D_ERR_STAT), + REGDEF(33, 71, V3D_CTL_IDENT0), + REGDEF(33, 71, V3D_CTL_IDENT1), + REGDEF(33, 71, V3D_CTL_IDENT2), + REGDEF(33, 71, V3D_CTL_MISCCFG), + REGDEF(33, 71, V3D_CTL_INT_STS), + REGDEF(33, 71, V3D_CTL_INT_MSK_STS), + REGDEF(33, 71, V3D_CLE_CT0CS), + REGDEF(33, 71, V3D_CLE_CT0CA), + REGDEF(33, 71, V3D_CLE_CT0EA), + REGDEF(33, 71, V3D_CLE_CT1CS), + REGDEF(33, 71, V3D_CLE_CT1CA), + REGDEF(33, 71, V3D_CLE_CT1EA), + + REGDEF(33, 71, V3D_PTB_BPCA), + REGDEF(33, 71, V3D_PTB_BPCS), + + REGDEF(33, 41, V3D_GMP_STATUS(33)), + REGDEF(33, 41, V3D_GMP_CFG(33)), + REGDEF(33, 41, V3D_GMP_VIO_ADDR(33)), + + REGDEF(33, 71, V3D_ERR_FDBGO), + REGDEF(33, 71, V3D_ERR_FDBGB), + REGDEF(33, 71, V3D_ERR_FDBGS), + REGDEF(33, 71, V3D_ERR_STAT), }; static const struct v3d_reg_def v3d_csd_reg_defs[] = { - REGDEF(V3D_CSD_STATUS), - REGDEF(V3D_CSD_CURRENT_CFG0), - REGDEF(V3D_CSD_CURRENT_CFG1), - REGDEF(V3D_CSD_CURRENT_CFG2), - REGDEF(V3D_CSD_CURRENT_CFG3), - REGDEF(V3D_CSD_CURRENT_CFG4), - REGDEF(V3D_CSD_CURRENT_CFG5), - REGDEF(V3D_CSD_CURRENT_CFG6), + REGDEF(41, 71, V3D_CSD_STATUS), + REGDEF(41, 41, V3D_CSD_CURRENT_CFG0(41)), + REGDEF(41, 41, V3D_CSD_CURRENT_CFG1(41)), + REGDEF(41, 41, V3D_CSD_CURRENT_CFG2(41)), + REGDEF(41, 41, V3D_CSD_CURRENT_CFG3(41)), + REGDEF(41, 41, V3D_CSD_CURRENT_CFG4(41)), + REGDEF(41, 41, V3D_CSD_CURRENT_CFG5(41)), + REGDEF(41, 41, V3D_CSD_CURRENT_CFG6(41)), + REGDEF(71, 71, V3D_CSD_CURRENT_CFG0(71)), + REGDEF(71, 71, V3D_CSD_CURRENT_CFG1(71)), + REGDEF(71, 71, V3D_CSD_CURRENT_CFG2(71)), + REGDEF(71, 71, V3D_CSD_CURRENT_CFG3(71)), + REGDEF(71, 71, V3D_CSD_CURRENT_CFG4(71)), + REGDEF(71, 71, V3D_CSD_CURRENT_CFG5(71)), + REGDEF(71, 71, V3D_CSD_CURRENT_CFG6(71)), + REGDEF(71, 71, V3D_V7_CSD_CURRENT_CFG7), }; static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused) @@ -85,38 +99,41 @@ static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused) int i, core; for (i = 0; i < ARRAY_SIZE(v3d_hub_reg_defs); i++) { - seq_printf(m, "%s (0x%04x): 0x%08x\n", - v3d_hub_reg_defs[i].name, v3d_hub_reg_defs[i].reg, - V3D_READ(v3d_hub_reg_defs[i].reg)); + const struct v3d_reg_def *def = &v3d_hub_reg_defs[i]; + + if (v3d->ver >= def->min_ver && v3d->ver <= def->max_ver) { + seq_printf(m, "%s (0x%04x): 0x%08x\n", + def->name, def->reg, V3D_READ(def->reg)); + } } - if (v3d->ver < 41) { - for (i = 0; i < ARRAY_SIZE(v3d_gca_reg_defs); i++) { + for (i = 0; i < ARRAY_SIZE(v3d_gca_reg_defs); i++) { + const struct v3d_reg_def *def = &v3d_gca_reg_defs[i]; + + if (v3d->ver >= def->min_ver && v3d->ver <= def->max_ver) { seq_printf(m, "%s (0x%04x): 0x%08x\n", - v3d_gca_reg_defs[i].name, - v3d_gca_reg_defs[i].reg, - V3D_GCA_READ(v3d_gca_reg_defs[i].reg)); + def->name, def->reg, V3D_GCA_READ(def->reg)); } } for (core = 0; core < v3d->cores; core++) { for (i = 0; i < ARRAY_SIZE(v3d_core_reg_defs); i++) { - seq_printf(m, "core %d %s (0x%04x): 0x%08x\n", - core, - v3d_core_reg_defs[i].name, - v3d_core_reg_defs[i].reg, - V3D_CORE_READ(core, - v3d_core_reg_defs[i].reg)); + const struct v3d_reg_def *def = &v3d_core_reg_defs[i]; + + if (v3d->ver >= def->min_ver && v3d->ver <= def->max_ver) { + seq_printf(m, "core %d %s (0x%04x): 0x%08x\n", + core, def->name, def->reg, + V3D_CORE_READ(core, def->reg)); + } } - if (v3d_has_csd(v3d)) { - for (i = 0; i < ARRAY_SIZE(v3d_csd_reg_defs); i++) { + for (i = 0; i < ARRAY_SIZE(v3d_csd_reg_defs); i++) { + const struct v3d_reg_def *def = &v3d_csd_reg_defs[i]; + + if (v3d->ver >= def->min_ver && v3d->ver <= def->max_ver) { seq_printf(m, "core %d %s (0x%04x): 0x%08x\n", - core, - v3d_csd_reg_defs[i].name, - v3d_csd_reg_defs[i].reg, - V3D_CORE_READ(core, - v3d_csd_reg_defs[i].reg)); + core, def->name, def->reg, + V3D_CORE_READ(core, def->reg)); } } } @@ -147,8 +164,10 @@ static int v3d_v3d_debugfs_ident(struct seq_file *m, void *unused) str_yes_no(ident2 & V3D_HUB_IDENT2_WITH_MMU)); seq_printf(m, "TFU: %s\n", str_yes_no(ident1 & V3D_HUB_IDENT1_WITH_TFU)); - seq_printf(m, "TSY: %s\n", - str_yes_no(ident1 & V3D_HUB_IDENT1_WITH_TSY)); + if (v3d->ver <= 42) { + seq_printf(m, "TSY: %s\n", + str_yes_no(ident1 & V3D_HUB_IDENT1_WITH_TSY)); + } seq_printf(m, "MSO: %s\n", str_yes_no(ident1 & V3D_HUB_IDENT1_WITH_MSO)); seq_printf(m, "L3C: %s (%dkb)\n", @@ -177,10 +196,14 @@ static int v3d_v3d_debugfs_ident(struct seq_file *m, void *unused) seq_printf(m, " QPUs: %d\n", nslc * qups); seq_printf(m, " Semaphores: %d\n", V3D_GET_FIELD(ident1, V3D_IDENT1_NSEM)); - seq_printf(m, " BCG int: %d\n", - (ident2 & V3D_IDENT2_BCG_INT) != 0); - seq_printf(m, " Override TMU: %d\n", - (misccfg & V3D_MISCCFG_OVRTMUOUT) != 0); + if (v3d->ver <= 42) { + seq_printf(m, " BCG int: %d\n", + (ident2 & V3D_IDENT2_BCG_INT) != 0); + } + if (v3d->ver < 40) { + seq_printf(m, " Override TMU: %d\n", + (misccfg & V3D_MISCCFG_OVRTMUOUT) != 0); + } } return 0; @@ -212,14 +235,15 @@ static int v3d_measure_clock(struct seq_file *m, void *unused) int measure_ms = 1000; if (v3d->ver >= 40) { + int cycle_count_reg = V3D_PCTR_CYCLE_COUNT(v3d->ver); V3D_CORE_WRITE(core, V3D_V4_PCTR_0_SRC_0_3, - V3D_SET_FIELD(V3D_PCTR_CYCLE_COUNT, + V3D_SET_FIELD(cycle_count_reg, V3D_PCTR_S0)); V3D_CORE_WRITE(core, V3D_V4_PCTR_0_CLR, 1); V3D_CORE_WRITE(core, V3D_V4_PCTR_0_EN, 1); } else { V3D_CORE_WRITE(core, V3D_V3_PCTR_0_PCTRS0, - V3D_PCTR_CYCLE_COUNT); + V3D_PCTR_CYCLE_COUNT(v3d->ver)); V3D_CORE_WRITE(core, V3D_V3_PCTR_0_CLR, 1); V3D_CORE_WRITE(core, V3D_V3_PCTR_0_EN, V3D_V3_PCTR_0_EN_ENABLE | diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index ffbbe9d527d3..3debf37e7d9b 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -19,6 +19,7 @@ #include <linux/module.h> #include <linux/of_platform.h> #include <linux/platform_device.h> +#include <linux/sched/clock.h> #include <linux/reset.h> #include <drm/drm_drv.h> @@ -90,6 +91,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data, case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT: args->value = 1; return 0; + case DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE: + args->value = 1; + return 0; default: DRM_DEBUG("Unknown parameter %d\n", args->param); return -EINVAL; @@ -111,6 +115,10 @@ v3d_open(struct drm_device *dev, struct drm_file *file) v3d_priv->v3d = v3d; for (i = 0; i < V3D_MAX_QUEUES; i++) { + v3d_priv->enabled_ns[i] = 0; + v3d_priv->start_ns[i] = 0; + v3d_priv->jobs_sent[i] = 0; + sched = &v3d->queue[i].sched; drm_sched_entity_init(&v3d_priv->sched_entity[i], DRM_SCHED_PRIORITY_NORMAL, &sched, @@ -136,7 +144,35 @@ v3d_postclose(struct drm_device *dev, struct drm_file *file) kfree(v3d_priv); } -DEFINE_DRM_GEM_FOPS(v3d_drm_fops); +static void v3d_show_fdinfo(struct drm_printer *p, struct drm_file *file) +{ + struct v3d_file_priv *file_priv = file->driver_priv; + u64 timestamp = local_clock(); + enum v3d_queue queue; + + for (queue = 0; queue < V3D_MAX_QUEUES; queue++) { + /* Note that, in case of a GPU reset, the time spent during an + * attempt of executing the job is not computed in the runtime. + */ + drm_printf(p, "drm-engine-%s: \t%llu ns\n", + v3d_queue_to_string(queue), + file_priv->start_ns[queue] ? file_priv->enabled_ns[queue] + + timestamp - file_priv->start_ns[queue] + : file_priv->enabled_ns[queue]); + + /* Note that we only count jobs that completed. Therefore, jobs + * that were resubmitted due to a GPU reset are not computed. + */ + drm_printf(p, "v3d-jobs-%s: \t%llu jobs\n", + v3d_queue_to_string(queue), file_priv->jobs_sent[queue]); + } +} + +static const struct file_operations v3d_drm_fops = { + .owner = THIS_MODULE, + DRM_GEM_FOPS, + .show_fdinfo = drm_show_fdinfo, +}; /* DRM_AUTH is required on SUBMIT_CL for now, while we don't have GMP * protection between clients. Note that render nodes would be @@ -156,6 +192,7 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(V3D_PERFMON_CREATE, v3d_perfmon_create_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(V3D_PERFMON_DESTROY, v3d_perfmon_destroy_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES, v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CPU, v3d_submit_cpu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), }; static const struct drm_driver v3d_drm_driver = { @@ -176,6 +213,7 @@ static const struct drm_driver v3d_drm_driver = { .ioctls = v3d_drm_ioctls, .num_ioctls = ARRAY_SIZE(v3d_drm_ioctls), .fops = &v3d_drm_fops, + .show_fdinfo = v3d_show_fdinfo, .name = DRIVER_NAME, .desc = DRIVER_DESC, @@ -187,6 +225,7 @@ static const struct drm_driver v3d_drm_driver = { static const struct of_device_id v3d_of_match[] = { { .compatible = "brcm,2711-v3d" }, + { .compatible = "brcm,2712-v3d" }, { .compatible = "brcm,7268-v3d" }, { .compatible = "brcm,7278-v3d" }, {}, @@ -281,8 +320,14 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) if (ret) goto irq_disable; + ret = v3d_sysfs_init(dev); + if (ret) + goto drm_unregister; + return 0; +drm_unregister: + drm_dev_unregister(drm); irq_disable: v3d_irq_disable(v3d); gem_destroy: @@ -296,6 +341,9 @@ static void v3d_platform_drm_remove(struct platform_device *pdev) { struct drm_device *drm = platform_get_drvdata(pdev); struct v3d_dev *v3d = to_v3d_dev(drm); + struct device *dev = &pdev->dev; + + v3d_sysfs_destroy(dev); drm_dev_unregister(drm); diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 106454f28956..3c7d58866570 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -19,13 +19,30 @@ struct reset_control; #define GMP_GRANULARITY (128 * 1024) -#define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1) +#define V3D_MAX_QUEUES (V3D_CPU + 1) + +static inline char *v3d_queue_to_string(enum v3d_queue queue) +{ + switch (queue) { + case V3D_BIN: return "bin"; + case V3D_RENDER: return "render"; + case V3D_TFU: return "tfu"; + case V3D_CSD: return "csd"; + case V3D_CACHE_CLEAN: return "cache_clean"; + case V3D_CPU: return "cpu"; + } + return "UNKNOWN"; +} struct v3d_queue_state { struct drm_gpu_scheduler sched; u64 fence_context; u64 emit_seqno; + + u64 start_ns; + u64 enabled_ns; + u64 jobs_sent; }; /* Performance monitor object. The perform lifetime is controlled by userspace @@ -106,6 +123,7 @@ struct v3d_dev { struct v3d_render_job *render_job; struct v3d_tfu_job *tfu_job; struct v3d_csd_job *csd_job; + struct v3d_cpu_job *cpu_job; struct v3d_queue_state queue[V3D_MAX_QUEUES]; @@ -167,6 +185,12 @@ struct v3d_file_priv { } perfmon; struct drm_sched_entity sched_entity[V3D_MAX_QUEUES]; + + u64 start_ns[V3D_MAX_QUEUES]; + + u64 enabled_ns[V3D_MAX_QUEUES]; + + u64 jobs_sent[V3D_MAX_QUEUES]; }; struct v3d_bo { @@ -178,6 +202,8 @@ struct v3d_bo { * v3d_render_job->unref_list */ struct list_head unref_head; + + void *vaddr; }; static inline struct v3d_bo * @@ -238,6 +264,11 @@ struct v3d_job { */ struct v3d_perfmon *perfmon; + /* File descriptor of the process that submitted the job that could be used + * for collecting stats by process of GPU usage. + */ + struct drm_file *file; + /* Callback for the freeing of the job on refcount going to 0. */ void (*free)(struct kref *ref); }; @@ -285,6 +316,112 @@ struct v3d_csd_job { struct drm_v3d_submit_csd args; }; +enum v3d_cpu_job_type { + V3D_CPU_JOB_TYPE_INDIRECT_CSD = 1, + V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY, + V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY, + V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY, + V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY, + V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY, +}; + +struct v3d_timestamp_query { + /* Offset of this query in the timestamp BO for its value. */ + u32 offset; + + /* Syncobj that indicates the timestamp availability */ + struct drm_syncobj *syncobj; +}; + +/* Number of perfmons required to handle all supported performance counters */ +#define V3D_MAX_PERFMONS DIV_ROUND_UP(V3D_PERFCNT_NUM, \ + DRM_V3D_MAX_PERF_COUNTERS) + +struct v3d_performance_query { + /* Performance monitor IDs for this query */ + u32 kperfmon_ids[V3D_MAX_PERFMONS]; + + /* Syncobj that indicates the query availability */ + struct drm_syncobj *syncobj; +}; + +struct v3d_indirect_csd_info { + /* Indirect CSD */ + struct v3d_csd_job *job; + + /* Clean cache job associated to the Indirect CSD job */ + struct v3d_job *clean_job; + + /* Offset within the BO where the workgroup counts are stored */ + u32 offset; + + /* Workgroups size */ + u32 wg_size; + + /* Indices of the uniforms with the workgroup dispatch counts + * in the uniform stream. + */ + u32 wg_uniform_offsets[3]; + + /* Indirect BO */ + struct drm_gem_object *indirect; + + /* Context of the Indirect CSD job */ + struct ww_acquire_ctx acquire_ctx; +}; + +struct v3d_timestamp_query_info { + struct v3d_timestamp_query *queries; + + u32 count; +}; + +struct v3d_performance_query_info { + struct v3d_performance_query *queries; + + /* Number of performance queries */ + u32 count; + + /* Number of performance monitors related to that query pool */ + u32 nperfmons; + + /* Number of performance counters related to that query pool */ + u32 ncounters; +}; + +struct v3d_copy_query_results_info { + /* Define if should write to buffer using 64 or 32 bits */ + bool do_64bit; + + /* Define if it can write to buffer even if the query is not available */ + bool do_partial; + + /* Define if it should write availability bit to buffer */ + bool availability_bit; + + /* Offset of the copy buffer in the BO */ + u32 offset; + + /* Stride of the copy buffer in the BO */ + u32 stride; +}; + +struct v3d_cpu_job { + struct v3d_job base; + + enum v3d_cpu_job_type job_type; + + struct v3d_indirect_csd_info indirect_csd; + + struct v3d_timestamp_query_info timestamp_query; + + struct v3d_copy_query_results_info copy; + + struct v3d_performance_query_info performance_query; +}; + +typedef void (*v3d_cpu_job_fn)(struct v3d_cpu_job *); + struct v3d_submit_outsync { struct drm_syncobj *syncobj; }; @@ -352,12 +489,16 @@ struct drm_gem_object *v3d_create_object(struct drm_device *dev, size_t size); void v3d_free_object(struct drm_gem_object *gem_obj); struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv, size_t size); +void v3d_get_bo_vaddr(struct v3d_bo *bo); +void v3d_put_bo_vaddr(struct v3d_bo *bo); int v3d_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int v3d_wait_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); struct drm_gem_object *v3d_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sgt); @@ -372,19 +513,21 @@ struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue); /* v3d_gem.c */ int v3d_gem_init(struct drm_device *dev); void v3d_gem_destroy(struct drm_device *dev); +void v3d_reset(struct v3d_dev *v3d); +void v3d_invalidate_caches(struct v3d_dev *v3d); +void v3d_clean_caches(struct v3d_dev *v3d); + +/* v3d_submit.c */ +void v3d_job_cleanup(struct v3d_job *job); +void v3d_job_put(struct v3d_job *job); int v3d_submit_cl_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int v3d_submit_csd_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -int v3d_wait_bo_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -void v3d_job_cleanup(struct v3d_job *job); -void v3d_job_put(struct v3d_job *job); -void v3d_reset(struct v3d_dev *v3d); -void v3d_invalidate_caches(struct v3d_dev *v3d); -void v3d_clean_caches(struct v3d_dev *v3d); +int v3d_submit_cpu_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); /* v3d_irq.c */ int v3d_irq_init(struct v3d_dev *v3d); @@ -393,8 +536,6 @@ void v3d_irq_disable(struct v3d_dev *v3d); void v3d_irq_reset(struct v3d_dev *v3d); /* v3d_mmu.c */ -int v3d_mmu_get_offset(struct drm_file *file_priv, struct v3d_bo *bo, - u32 *offset); int v3d_mmu_set_page_table(struct v3d_dev *v3d); void v3d_mmu_insert_ptes(struct v3d_bo *bo); void v3d_mmu_remove_ptes(struct v3d_bo *bo); @@ -418,3 +559,7 @@ int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); + +/* v3d_sysfs.c */ +int v3d_sysfs_init(struct device *dev); +void v3d_sysfs_destroy(struct device *dev); diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 2e94ce788c71..afc565078c78 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -11,8 +11,6 @@ #include <linux/uaccess.h> #include <drm/drm_managed.h> -#include <drm/drm_syncobj.h> -#include <uapi/drm/v3d_drm.h> #include "v3d_drv.h" #include "v3d_regs.h" @@ -47,9 +45,9 @@ v3d_init_hw_state(struct v3d_dev *v3d) static void v3d_idle_axi(struct v3d_dev *v3d, int core) { - V3D_CORE_WRITE(core, V3D_GMP_CFG, V3D_GMP_CFG_STOP_REQ); + V3D_CORE_WRITE(core, V3D_GMP_CFG(v3d->ver), V3D_GMP_CFG_STOP_REQ); - if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS) & + if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS(v3d->ver)) & (V3D_GMP_STATUS_RD_COUNT_MASK | V3D_GMP_STATUS_WR_COUNT_MASK | V3D_GMP_STATUS_CFG_BUSY)) == 0, 100)) { @@ -241,771 +239,6 @@ v3d_invalidate_caches(struct v3d_dev *v3d) v3d_invalidate_slices(v3d, 0); } -/* Takes the reservation lock on all the BOs being referenced, so that - * at queue submit time we can update the reservations. - * - * We don't lock the RCL the tile alloc/state BOs, or overflow memory - * (all of which are on exec->unref_list). They're entirely private - * to v3d, so we don't attach dma-buf fences to them. - */ -static int -v3d_lock_bo_reservations(struct v3d_job *job, - struct ww_acquire_ctx *acquire_ctx) -{ - int i, ret; - - ret = drm_gem_lock_reservations(job->bo, job->bo_count, acquire_ctx); - if (ret) - return ret; - - for (i = 0; i < job->bo_count; i++) { - ret = dma_resv_reserve_fences(job->bo[i]->resv, 1); - if (ret) - goto fail; - - ret = drm_sched_job_add_implicit_dependencies(&job->base, - job->bo[i], true); - if (ret) - goto fail; - } - - return 0; - -fail: - drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); - return ret; -} - -/** - * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects - * referenced by the job. - * @dev: DRM device - * @file_priv: DRM file for this fd - * @job: V3D job being set up - * @bo_handles: GEM handles - * @bo_count: Number of GEM handles passed in - * - * The command validator needs to reference BOs by their index within - * the submitted job's BO list. This does the validation of the job's - * BO list and reference counting for the lifetime of the job. - * - * Note that this function doesn't need to unreference the BOs on - * failure, because that will happen at v3d_exec_cleanup() time. - */ -static int -v3d_lookup_bos(struct drm_device *dev, - struct drm_file *file_priv, - struct v3d_job *job, - u64 bo_handles, - u32 bo_count) -{ - job->bo_count = bo_count; - - if (!job->bo_count) { - /* See comment on bo_index for why we have to check - * this. - */ - DRM_DEBUG("Rendering requires BOs\n"); - return -EINVAL; - } - - return drm_gem_objects_lookup(file_priv, - (void __user *)(uintptr_t)bo_handles, - job->bo_count, &job->bo); -} - -static void -v3d_job_free(struct kref *ref) -{ - struct v3d_job *job = container_of(ref, struct v3d_job, refcount); - int i; - - if (job->bo) { - for (i = 0; i < job->bo_count; i++) - drm_gem_object_put(job->bo[i]); - kvfree(job->bo); - } - - dma_fence_put(job->irq_fence); - dma_fence_put(job->done_fence); - - if (job->perfmon) - v3d_perfmon_put(job->perfmon); - - kfree(job); -} - -static void -v3d_render_job_free(struct kref *ref) -{ - struct v3d_render_job *job = container_of(ref, struct v3d_render_job, - base.refcount); - struct v3d_bo *bo, *save; - - list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) { - drm_gem_object_put(&bo->base.base); - } - - v3d_job_free(ref); -} - -void v3d_job_cleanup(struct v3d_job *job) -{ - if (!job) - return; - - drm_sched_job_cleanup(&job->base); - v3d_job_put(job); -} - -void v3d_job_put(struct v3d_job *job) -{ - kref_put(&job->refcount, job->free); -} - -int -v3d_wait_bo_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - int ret; - struct drm_v3d_wait_bo *args = data; - ktime_t start = ktime_get(); - u64 delta_ns; - unsigned long timeout_jiffies = - nsecs_to_jiffies_timeout(args->timeout_ns); - - if (args->pad != 0) - return -EINVAL; - - ret = drm_gem_dma_resv_wait(file_priv, args->handle, - true, timeout_jiffies); - - /* Decrement the user's timeout, in case we got interrupted - * such that the ioctl will be restarted. - */ - delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start)); - if (delta_ns < args->timeout_ns) - args->timeout_ns -= delta_ns; - else - args->timeout_ns = 0; - - /* Asked to wait beyond the jiffie/scheduler precision? */ - if (ret == -ETIME && args->timeout_ns) - ret = -EAGAIN; - - return ret; -} - -static int -v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, - void **container, size_t size, void (*free)(struct kref *ref), - u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue) -{ - struct v3d_file_priv *v3d_priv = file_priv->driver_priv; - struct v3d_job *job; - bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); - int ret, i; - - *container = kcalloc(1, size, GFP_KERNEL); - if (!*container) { - DRM_ERROR("Cannot allocate memory for v3d job."); - return -ENOMEM; - } - - job = *container; - job->v3d = v3d; - job->free = free; - - ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], - v3d_priv); - if (ret) - goto fail; - - if (has_multisync) { - if (se->in_sync_count && se->wait_stage == queue) { - struct drm_v3d_sem __user *handle = u64_to_user_ptr(se->in_syncs); - - for (i = 0; i < se->in_sync_count; i++) { - struct drm_v3d_sem in; - - if (copy_from_user(&in, handle++, sizeof(in))) { - ret = -EFAULT; - DRM_DEBUG("Failed to copy wait dep handle.\n"); - goto fail_deps; - } - ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in.handle, 0); - - // TODO: Investigate why this was filtered out for the IOCTL. - if (ret && ret != -ENOENT) - goto fail_deps; - } - } - } else { - ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in_sync, 0); - - // TODO: Investigate why this was filtered out for the IOCTL. - if (ret && ret != -ENOENT) - goto fail_deps; - } - - kref_init(&job->refcount); - - return 0; - -fail_deps: - drm_sched_job_cleanup(&job->base); -fail: - kfree(*container); - *container = NULL; - - return ret; -} - -static void -v3d_push_job(struct v3d_job *job) -{ - drm_sched_job_arm(&job->base); - - job->done_fence = dma_fence_get(&job->base.s_fence->finished); - - /* put by scheduler job completion */ - kref_get(&job->refcount); - - drm_sched_entity_push_job(&job->base); -} - -static void -v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, - struct v3d_job *job, - struct ww_acquire_ctx *acquire_ctx, - u32 out_sync, - struct v3d_submit_ext *se, - struct dma_fence *done_fence) -{ - struct drm_syncobj *sync_out; - bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); - int i; - - for (i = 0; i < job->bo_count; i++) { - /* XXX: Use shared fences for read-only objects. */ - dma_resv_add_fence(job->bo[i]->resv, job->done_fence, - DMA_RESV_USAGE_WRITE); - } - - drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); - - /* Update the return sync object for the job */ - /* If it only supports a single signal semaphore*/ - if (!has_multisync) { - sync_out = drm_syncobj_find(file_priv, out_sync); - if (sync_out) { - drm_syncobj_replace_fence(sync_out, done_fence); - drm_syncobj_put(sync_out); - } - return; - } - - /* If multiple semaphores extension is supported */ - if (se->out_sync_count) { - for (i = 0; i < se->out_sync_count; i++) { - drm_syncobj_replace_fence(se->out_syncs[i].syncobj, - done_fence); - drm_syncobj_put(se->out_syncs[i].syncobj); - } - kvfree(se->out_syncs); - } -} - -static void -v3d_put_multisync_post_deps(struct v3d_submit_ext *se) -{ - unsigned int i; - - if (!(se && se->out_sync_count)) - return; - - for (i = 0; i < se->out_sync_count; i++) - drm_syncobj_put(se->out_syncs[i].syncobj); - kvfree(se->out_syncs); -} - -static int -v3d_get_multisync_post_deps(struct drm_file *file_priv, - struct v3d_submit_ext *se, - u32 count, u64 handles) -{ - struct drm_v3d_sem __user *post_deps; - int i, ret; - - if (!count) - return 0; - - se->out_syncs = (struct v3d_submit_outsync *) - kvmalloc_array(count, - sizeof(struct v3d_submit_outsync), - GFP_KERNEL); - if (!se->out_syncs) - return -ENOMEM; - - post_deps = u64_to_user_ptr(handles); - - for (i = 0; i < count; i++) { - struct drm_v3d_sem out; - - if (copy_from_user(&out, post_deps++, sizeof(out))) { - ret = -EFAULT; - DRM_DEBUG("Failed to copy post dep handles\n"); - goto fail; - } - - se->out_syncs[i].syncobj = drm_syncobj_find(file_priv, - out.handle); - if (!se->out_syncs[i].syncobj) { - ret = -EINVAL; - goto fail; - } - } - se->out_sync_count = count; - - return 0; - -fail: - for (i--; i >= 0; i--) - drm_syncobj_put(se->out_syncs[i].syncobj); - kvfree(se->out_syncs); - - return ret; -} - -/* Get data for multiple binary semaphores synchronization. Parse syncobj - * to be signaled when job completes (out_sync). - */ -static int -v3d_get_multisync_submit_deps(struct drm_file *file_priv, - struct drm_v3d_extension __user *ext, - void *data) -{ - struct drm_v3d_multi_sync multisync; - struct v3d_submit_ext *se = data; - int ret; - - if (copy_from_user(&multisync, ext, sizeof(multisync))) - return -EFAULT; - - if (multisync.pad) - return -EINVAL; - - ret = v3d_get_multisync_post_deps(file_priv, data, multisync.out_sync_count, - multisync.out_syncs); - if (ret) - return ret; - - se->in_sync_count = multisync.in_sync_count; - se->in_syncs = multisync.in_syncs; - se->flags |= DRM_V3D_EXT_ID_MULTI_SYNC; - se->wait_stage = multisync.wait_stage; - - return 0; -} - -/* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data - * according to the extension id (name). - */ -static int -v3d_get_extensions(struct drm_file *file_priv, - u64 ext_handles, - void *data) -{ - struct drm_v3d_extension __user *user_ext; - int ret; - - user_ext = u64_to_user_ptr(ext_handles); - while (user_ext) { - struct drm_v3d_extension ext; - - if (copy_from_user(&ext, user_ext, sizeof(ext))) { - DRM_DEBUG("Failed to copy submit extension\n"); - return -EFAULT; - } - - switch (ext.id) { - case DRM_V3D_EXT_ID_MULTI_SYNC: - ret = v3d_get_multisync_submit_deps(file_priv, user_ext, data); - if (ret) - return ret; - break; - default: - DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id); - return -EINVAL; - } - - user_ext = u64_to_user_ptr(ext.next); - } - - return 0; -} - -/** - * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D. - * @dev: DRM device - * @data: ioctl argument - * @file_priv: DRM file for this fd - * - * This is the main entrypoint for userspace to submit a 3D frame to - * the GPU. Userspace provides the binner command list (if - * applicable), and the kernel sets up the render command list to draw - * to the framebuffer described in the ioctl, using the command lists - * that the 3D engine's binner will produce. - */ -int -v3d_submit_cl_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct v3d_dev *v3d = to_v3d_dev(dev); - struct v3d_file_priv *v3d_priv = file_priv->driver_priv; - struct drm_v3d_submit_cl *args = data; - struct v3d_submit_ext se = {0}; - struct v3d_bin_job *bin = NULL; - struct v3d_render_job *render = NULL; - struct v3d_job *clean_job = NULL; - struct v3d_job *last_job; - struct ww_acquire_ctx acquire_ctx; - int ret = 0; - - trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end); - - if (args->pad) - return -EINVAL; - - if (args->flags && - args->flags & ~(DRM_V3D_SUBMIT_CL_FLUSH_CACHE | - DRM_V3D_SUBMIT_EXTENSION)) { - DRM_INFO("invalid flags: %d\n", args->flags); - return -EINVAL; - } - - if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { - ret = v3d_get_extensions(file_priv, args->extensions, &se); - if (ret) { - DRM_DEBUG("Failed to get extensions.\n"); - return ret; - } - } - - ret = v3d_job_init(v3d, file_priv, (void *)&render, sizeof(*render), - v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER); - if (ret) - goto fail; - - render->start = args->rcl_start; - render->end = args->rcl_end; - INIT_LIST_HEAD(&render->unref_list); - - if (args->bcl_start != args->bcl_end) { - ret = v3d_job_init(v3d, file_priv, (void *)&bin, sizeof(*bin), - v3d_job_free, args->in_sync_bcl, &se, V3D_BIN); - if (ret) - goto fail; - - bin->start = args->bcl_start; - bin->end = args->bcl_end; - bin->qma = args->qma; - bin->qms = args->qms; - bin->qts = args->qts; - bin->render = render; - } - - if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) { - ret = v3d_job_init(v3d, file_priv, (void *)&clean_job, sizeof(*clean_job), - v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); - if (ret) - goto fail; - - last_job = clean_job; - } else { - last_job = &render->base; - } - - ret = v3d_lookup_bos(dev, file_priv, last_job, - args->bo_handles, args->bo_handle_count); - if (ret) - goto fail; - - ret = v3d_lock_bo_reservations(last_job, &acquire_ctx); - if (ret) - goto fail; - - if (args->perfmon_id) { - render->base.perfmon = v3d_perfmon_find(v3d_priv, - args->perfmon_id); - - if (!render->base.perfmon) { - ret = -ENOENT; - goto fail_perfmon; - } - } - - mutex_lock(&v3d->sched_lock); - if (bin) { - bin->base.perfmon = render->base.perfmon; - v3d_perfmon_get(bin->base.perfmon); - v3d_push_job(&bin->base); - - ret = drm_sched_job_add_dependency(&render->base.base, - dma_fence_get(bin->base.done_fence)); - if (ret) - goto fail_unreserve; - } - - v3d_push_job(&render->base); - - if (clean_job) { - struct dma_fence *render_fence = - dma_fence_get(render->base.done_fence); - ret = drm_sched_job_add_dependency(&clean_job->base, - render_fence); - if (ret) - goto fail_unreserve; - clean_job->perfmon = render->base.perfmon; - v3d_perfmon_get(clean_job->perfmon); - v3d_push_job(clean_job); - } - - mutex_unlock(&v3d->sched_lock); - - v3d_attach_fences_and_unlock_reservation(file_priv, - last_job, - &acquire_ctx, - args->out_sync, - &se, - last_job->done_fence); - - if (bin) - v3d_job_put(&bin->base); - v3d_job_put(&render->base); - if (clean_job) - v3d_job_put(clean_job); - - return 0; - -fail_unreserve: - mutex_unlock(&v3d->sched_lock); -fail_perfmon: - drm_gem_unlock_reservations(last_job->bo, - last_job->bo_count, &acquire_ctx); -fail: - v3d_job_cleanup((void *)bin); - v3d_job_cleanup((void *)render); - v3d_job_cleanup(clean_job); - v3d_put_multisync_post_deps(&se); - - return ret; -} - -/** - * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D. - * @dev: DRM device - * @data: ioctl argument - * @file_priv: DRM file for this fd - * - * Userspace provides the register setup for the TFU, which we don't - * need to validate since the TFU is behind the MMU. - */ -int -v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct v3d_dev *v3d = to_v3d_dev(dev); - struct drm_v3d_submit_tfu *args = data; - struct v3d_submit_ext se = {0}; - struct v3d_tfu_job *job = NULL; - struct ww_acquire_ctx acquire_ctx; - int ret = 0; - - trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia); - - if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { - DRM_DEBUG("invalid flags: %d\n", args->flags); - return -EINVAL; - } - - if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { - ret = v3d_get_extensions(file_priv, args->extensions, &se); - if (ret) { - DRM_DEBUG("Failed to get extensions.\n"); - return ret; - } - } - - ret = v3d_job_init(v3d, file_priv, (void *)&job, sizeof(*job), - v3d_job_free, args->in_sync, &se, V3D_TFU); - if (ret) - goto fail; - - job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles), - sizeof(*job->base.bo), GFP_KERNEL); - if (!job->base.bo) { - ret = -ENOMEM; - goto fail; - } - - job->args = *args; - - for (job->base.bo_count = 0; - job->base.bo_count < ARRAY_SIZE(args->bo_handles); - job->base.bo_count++) { - struct drm_gem_object *bo; - - if (!args->bo_handles[job->base.bo_count]) - break; - - bo = drm_gem_object_lookup(file_priv, args->bo_handles[job->base.bo_count]); - if (!bo) { - DRM_DEBUG("Failed to look up GEM BO %d: %d\n", - job->base.bo_count, - args->bo_handles[job->base.bo_count]); - ret = -ENOENT; - goto fail; - } - job->base.bo[job->base.bo_count] = bo; - } - - ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx); - if (ret) - goto fail; - - mutex_lock(&v3d->sched_lock); - v3d_push_job(&job->base); - mutex_unlock(&v3d->sched_lock); - - v3d_attach_fences_and_unlock_reservation(file_priv, - &job->base, &acquire_ctx, - args->out_sync, - &se, - job->base.done_fence); - - v3d_job_put(&job->base); - - return 0; - -fail: - v3d_job_cleanup((void *)job); - v3d_put_multisync_post_deps(&se); - - return ret; -} - -/** - * v3d_submit_csd_ioctl() - Submits a CSD (texture formatting) job to the V3D. - * @dev: DRM device - * @data: ioctl argument - * @file_priv: DRM file for this fd - * - * Userspace provides the register setup for the CSD, which we don't - * need to validate since the CSD is behind the MMU. - */ -int -v3d_submit_csd_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct v3d_dev *v3d = to_v3d_dev(dev); - struct v3d_file_priv *v3d_priv = file_priv->driver_priv; - struct drm_v3d_submit_csd *args = data; - struct v3d_submit_ext se = {0}; - struct v3d_csd_job *job = NULL; - struct v3d_job *clean_job = NULL; - struct ww_acquire_ctx acquire_ctx; - int ret; - - trace_v3d_submit_csd_ioctl(&v3d->drm, args->cfg[5], args->cfg[6]); - - if (args->pad) - return -EINVAL; - - if (!v3d_has_csd(v3d)) { - DRM_DEBUG("Attempting CSD submit on non-CSD hardware\n"); - return -EINVAL; - } - - if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { - DRM_INFO("invalid flags: %d\n", args->flags); - return -EINVAL; - } - - if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { - ret = v3d_get_extensions(file_priv, args->extensions, &se); - if (ret) { - DRM_DEBUG("Failed to get extensions.\n"); - return ret; - } - } - - ret = v3d_job_init(v3d, file_priv, (void *)&job, sizeof(*job), - v3d_job_free, args->in_sync, &se, V3D_CSD); - if (ret) - goto fail; - - ret = v3d_job_init(v3d, file_priv, (void *)&clean_job, sizeof(*clean_job), - v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); - if (ret) - goto fail; - - job->args = *args; - - ret = v3d_lookup_bos(dev, file_priv, clean_job, - args->bo_handles, args->bo_handle_count); - if (ret) - goto fail; - - ret = v3d_lock_bo_reservations(clean_job, &acquire_ctx); - if (ret) - goto fail; - - if (args->perfmon_id) { - job->base.perfmon = v3d_perfmon_find(v3d_priv, - args->perfmon_id); - if (!job->base.perfmon) { - ret = -ENOENT; - goto fail_perfmon; - } - } - - mutex_lock(&v3d->sched_lock); - v3d_push_job(&job->base); - - ret = drm_sched_job_add_dependency(&clean_job->base, - dma_fence_get(job->base.done_fence)); - if (ret) - goto fail_unreserve; - - v3d_push_job(clean_job); - mutex_unlock(&v3d->sched_lock); - - v3d_attach_fences_and_unlock_reservation(file_priv, - clean_job, - &acquire_ctx, - args->out_sync, - &se, - clean_job->done_fence); - - v3d_job_put(&job->base); - v3d_job_put(clean_job); - - return 0; - -fail_unreserve: - mutex_unlock(&v3d->sched_lock); -fail_perfmon: - drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count, - &acquire_ctx); -fail: - v3d_job_cleanup((void *)job); - v3d_job_cleanup(clean_job); - v3d_put_multisync_post_deps(&se); - - return ret; -} - int v3d_gem_init(struct drm_device *dev) { @@ -1013,8 +246,12 @@ v3d_gem_init(struct drm_device *dev) u32 pt_size = 4096 * 1024; int ret, i; - for (i = 0; i < V3D_MAX_QUEUES; i++) + for (i = 0; i < V3D_MAX_QUEUES; i++) { v3d->queue[i].fence_context = dma_fence_context_alloc(1); + v3d->queue[i].start_ns = 0; + v3d->queue[i].enabled_ns = 0; + v3d->queue[i].jobs_sent = 0; + } spin_lock_init(&v3d->mm_lock); spin_lock_init(&v3d->job_lock); @@ -1072,6 +309,8 @@ v3d_gem_destroy(struct drm_device *dev) */ WARN_ON(v3d->bin_job); WARN_ON(v3d->render_job); + WARN_ON(v3d->tfu_job); + WARN_ON(v3d->csd_job); drm_mm_takedown(&v3d->mm); diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index e714d5318f30..afc76390a197 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -14,21 +14,23 @@ */ #include <linux/platform_device.h> +#include <linux/sched/clock.h> #include "v3d_drv.h" #include "v3d_regs.h" #include "v3d_trace.h" -#define V3D_CORE_IRQS ((u32)(V3D_INT_OUTOMEM | \ - V3D_INT_FLDONE | \ - V3D_INT_FRDONE | \ - V3D_INT_CSDDONE | \ - V3D_INT_GMPV)) +#define V3D_CORE_IRQS(ver) ((u32)(V3D_INT_OUTOMEM | \ + V3D_INT_FLDONE | \ + V3D_INT_FRDONE | \ + V3D_INT_CSDDONE(ver) | \ + (ver < 71 ? V3D_INT_GMPV : 0))) -#define V3D_HUB_IRQS ((u32)(V3D_HUB_INT_MMU_WRV | \ - V3D_HUB_INT_MMU_PTI | \ - V3D_HUB_INT_MMU_CAP | \ - V3D_HUB_INT_TFUC)) +#define V3D_HUB_IRQS(ver) ((u32)(V3D_HUB_INT_MMU_WRV | \ + V3D_HUB_INT_MMU_PTI | \ + V3D_HUB_INT_MMU_CAP | \ + V3D_HUB_INT_TFUC | \ + (ver >= 71 ? V3D_V7_HUB_INT_GMPV : 0))) static irqreturn_t v3d_hub_irq(int irq, void *arg); @@ -100,6 +102,18 @@ v3d_irq(int irq, void *arg) if (intsts & V3D_INT_FLDONE) { struct v3d_fence *fence = to_v3d_fence(v3d->bin_job->base.irq_fence); + struct v3d_file_priv *file = v3d->bin_job->base.file->driver_priv; + u64 runtime = local_clock() - file->start_ns[V3D_BIN]; + + file->enabled_ns[V3D_BIN] += local_clock() - file->start_ns[V3D_BIN]; + file->jobs_sent[V3D_BIN]++; + v3d->queue[V3D_BIN].jobs_sent++; + + file->start_ns[V3D_BIN] = 0; + v3d->queue[V3D_BIN].start_ns = 0; + + file->enabled_ns[V3D_BIN] += runtime; + v3d->queue[V3D_BIN].enabled_ns += runtime; trace_v3d_bcl_irq(&v3d->drm, fence->seqno); dma_fence_signal(&fence->base); @@ -109,15 +123,39 @@ v3d_irq(int irq, void *arg) if (intsts & V3D_INT_FRDONE) { struct v3d_fence *fence = to_v3d_fence(v3d->render_job->base.irq_fence); + struct v3d_file_priv *file = v3d->render_job->base.file->driver_priv; + u64 runtime = local_clock() - file->start_ns[V3D_RENDER]; + + file->enabled_ns[V3D_RENDER] += local_clock() - file->start_ns[V3D_RENDER]; + file->jobs_sent[V3D_RENDER]++; + v3d->queue[V3D_RENDER].jobs_sent++; + + file->start_ns[V3D_RENDER] = 0; + v3d->queue[V3D_RENDER].start_ns = 0; + + file->enabled_ns[V3D_RENDER] += runtime; + v3d->queue[V3D_RENDER].enabled_ns += runtime; trace_v3d_rcl_irq(&v3d->drm, fence->seqno); dma_fence_signal(&fence->base); status = IRQ_HANDLED; } - if (intsts & V3D_INT_CSDDONE) { + if (intsts & V3D_INT_CSDDONE(v3d->ver)) { struct v3d_fence *fence = to_v3d_fence(v3d->csd_job->base.irq_fence); + struct v3d_file_priv *file = v3d->csd_job->base.file->driver_priv; + u64 runtime = local_clock() - file->start_ns[V3D_CSD]; + + file->enabled_ns[V3D_CSD] += local_clock() - file->start_ns[V3D_CSD]; + file->jobs_sent[V3D_CSD]++; + v3d->queue[V3D_CSD].jobs_sent++; + + file->start_ns[V3D_CSD] = 0; + v3d->queue[V3D_CSD].start_ns = 0; + + file->enabled_ns[V3D_CSD] += runtime; + v3d->queue[V3D_CSD].enabled_ns += runtime; trace_v3d_csd_irq(&v3d->drm, fence->seqno); dma_fence_signal(&fence->base); @@ -127,7 +165,7 @@ v3d_irq(int irq, void *arg) /* We shouldn't be triggering these if we have GMP in * always-allowed mode. */ - if (intsts & V3D_INT_GMPV) + if (v3d->ver < 71 && (intsts & V3D_INT_GMPV)) dev_err(v3d->drm.dev, "GMP violation\n"); /* V3D 4.2 wires the hub and core IRQs together, so if we & @@ -154,6 +192,18 @@ v3d_hub_irq(int irq, void *arg) if (intsts & V3D_HUB_INT_TFUC) { struct v3d_fence *fence = to_v3d_fence(v3d->tfu_job->base.irq_fence); + struct v3d_file_priv *file = v3d->tfu_job->base.file->driver_priv; + u64 runtime = local_clock() - file->start_ns[V3D_TFU]; + + file->enabled_ns[V3D_TFU] += local_clock() - file->start_ns[V3D_TFU]; + file->jobs_sent[V3D_TFU]++; + v3d->queue[V3D_TFU].jobs_sent++; + + file->start_ns[V3D_TFU] = 0; + v3d->queue[V3D_TFU].start_ns = 0; + + file->enabled_ns[V3D_TFU] += runtime; + v3d->queue[V3D_TFU].enabled_ns += runtime; trace_v3d_tfu_irq(&v3d->drm, fence->seqno); dma_fence_signal(&fence->base); @@ -197,6 +247,11 @@ v3d_hub_irq(int irq, void *arg) status = IRQ_HANDLED; } + if (v3d->ver >= 71 && (intsts & V3D_V7_HUB_INT_GMPV)) { + dev_err(v3d->drm.dev, "GMP Violation\n"); + status = IRQ_HANDLED; + } + return status; } @@ -211,8 +266,8 @@ v3d_irq_init(struct v3d_dev *v3d) * for us. */ for (core = 0; core < v3d->cores; core++) - V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS); - V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS); + V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver)); + V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS(v3d->ver)); irq1 = platform_get_irq_optional(v3d_to_pdev(v3d), 1); if (irq1 == -EPROBE_DEFER) @@ -256,12 +311,12 @@ v3d_irq_enable(struct v3d_dev *v3d) /* Enable our set of interrupts, masking out any others. */ for (core = 0; core < v3d->cores; core++) { - V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~V3D_CORE_IRQS); - V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_CLR, V3D_CORE_IRQS); + V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~V3D_CORE_IRQS(v3d->ver)); + V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_CLR, V3D_CORE_IRQS(v3d->ver)); } - V3D_WRITE(V3D_HUB_INT_MSK_SET, ~V3D_HUB_IRQS); - V3D_WRITE(V3D_HUB_INT_MSK_CLR, V3D_HUB_IRQS); + V3D_WRITE(V3D_HUB_INT_MSK_SET, ~V3D_HUB_IRQS(v3d->ver)); + V3D_WRITE(V3D_HUB_INT_MSK_CLR, V3D_HUB_IRQS(v3d->ver)); } void @@ -276,8 +331,8 @@ v3d_irq_disable(struct v3d_dev *v3d) /* Clear any pending interrupts we might have left. */ for (core = 0; core < v3d->cores; core++) - V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS); - V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS); + V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver)); + V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS(v3d->ver)); cancel_work_sync(&v3d->overflow_mem_work); } diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h index 3663e0d6bf76..1b1a62ad9585 100644 --- a/drivers/gpu/drm/v3d/v3d_regs.h +++ b/drivers/gpu/drm/v3d/v3d_regs.h @@ -57,6 +57,7 @@ #define V3D_HUB_INT_MSK_STS 0x0005c #define V3D_HUB_INT_MSK_SET 0x00060 #define V3D_HUB_INT_MSK_CLR 0x00064 +# define V3D_V7_HUB_INT_GMPV BIT(6) # define V3D_HUB_INT_MMU_WRV BIT(5) # define V3D_HUB_INT_MMU_PTI BIT(4) # define V3D_HUB_INT_MMU_CAP BIT(3) @@ -64,6 +65,7 @@ # define V3D_HUB_INT_TFUC BIT(1) # define V3D_HUB_INT_TFUF BIT(0) +/* GCA registers only exist in V3D < 41 */ #define V3D_GCA_CACHE_CTRL 0x0000c # define V3D_GCA_CACHE_CTRL_FLUSH BIT(0) @@ -86,7 +88,8 @@ # define V3D_TOP_GR_BRIDGE_SW_INIT_1 0x0000c # define V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT BIT(0) -#define V3D_TFU_CS 0x00400 +#define V3D_TFU_CS(ver) ((ver >= 71) ? 0x00700 : 0x00400) + /* Stops current job, empties input fifo. */ # define V3D_TFU_CS_TFURST BIT(31) # define V3D_TFU_CS_CVTCT_MASK V3D_MASK(23, 16) @@ -95,7 +98,7 @@ # define V3D_TFU_CS_NFREE_SHIFT 8 # define V3D_TFU_CS_BUSY BIT(0) -#define V3D_TFU_SU 0x00404 +#define V3D_TFU_SU(ver) ((ver >= 71) ? 0x00704 : 0x00404) /* Interrupt when FINTTHR input slots are free (0 = disabled) */ # define V3D_TFU_SU_FINTTHR_MASK V3D_MASK(13, 8) # define V3D_TFU_SU_FINTTHR_SHIFT 8 @@ -106,39 +109,42 @@ # define V3D_TFU_SU_THROTTLE_MASK V3D_MASK(1, 0) # define V3D_TFU_SU_THROTTLE_SHIFT 0 -#define V3D_TFU_ICFG 0x00408 +#define V3D_TFU_ICFG(ver) ((ver >= 71) ? 0x00708 : 0x00408) /* Interrupt when the conversion is complete. */ # define V3D_TFU_ICFG_IOC BIT(0) /* Input Image Address */ -#define V3D_TFU_IIA 0x0040c +#define V3D_TFU_IIA(ver) ((ver >= 71) ? 0x0070c : 0x0040c) /* Input Chroma Address */ -#define V3D_TFU_ICA 0x00410 +#define V3D_TFU_ICA(ver) ((ver >= 71) ? 0x00710 : 0x00410) /* Input Image Stride */ -#define V3D_TFU_IIS 0x00414 +#define V3D_TFU_IIS(ver) ((ver >= 71) ? 0x00714 : 0x00414) /* Input Image U-Plane Address */ -#define V3D_TFU_IUA 0x00418 +#define V3D_TFU_IUA(ver) ((ver >= 71) ? 0x00718 : 0x00418) +/* Image output config (VD 7.x only) */ +#define V3D_V7_TFU_IOC 0x0071c /* Output Image Address */ -#define V3D_TFU_IOA 0x0041c +#define V3D_TFU_IOA(ver) ((ver >= 71) ? 0x00720 : 0x0041c) /* Image Output Size */ -#define V3D_TFU_IOS 0x00420 +#define V3D_TFU_IOS(ver) ((ver >= 71) ? 0x00724 : 0x00420) /* TFU YUV Coefficient 0 */ -#define V3D_TFU_COEF0 0x00424 -/* Use these regs instead of the defaults. */ +#define V3D_TFU_COEF0(ver) ((ver >= 71) ? 0x00728 : 0x00424) +/* Use these regs instead of the defaults (V3D 4.x only) */ # define V3D_TFU_COEF0_USECOEF BIT(31) /* TFU YUV Coefficient 1 */ -#define V3D_TFU_COEF1 0x00428 +#define V3D_TFU_COEF1(ver) ((ver >= 71) ? 0x0072c : 0x00428) /* TFU YUV Coefficient 2 */ -#define V3D_TFU_COEF2 0x0042c +#define V3D_TFU_COEF2(ver) ((ver >= 71) ? 0x00730 : 0x0042c) /* TFU YUV Coefficient 3 */ -#define V3D_TFU_COEF3 0x00430 +#define V3D_TFU_COEF3(ver) ((ver >= 71) ? 0x00734 : 0x00430) +/* V3D 4.x only */ #define V3D_TFU_CRC 0x00434 /* Per-MMU registers. */ #define V3D_MMUC_CONTROL 0x01000 -# define V3D_MMUC_CONTROL_CLEAR BIT(3) +#define V3D_MMUC_CONTROL_CLEAR(ver) ((ver >= 71) ? BIT(11) : BIT(3)) # define V3D_MMUC_CONTROL_FLUSHING BIT(2) # define V3D_MMUC_CONTROL_FLUSH BIT(1) # define V3D_MMUC_CONTROL_ENABLE BIT(0) @@ -246,7 +252,6 @@ #define V3D_CTL_L2TCACTL 0x00030 # define V3D_L2TCACTL_TMUWCF BIT(8) -# define V3D_L2TCACTL_L2T_NO_WM BIT(4) /* Invalidates cache lines. */ # define V3D_L2TCACTL_FLM_FLUSH 0 /* Removes cachelines without writing dirty lines back. */ @@ -267,8 +272,8 @@ #define V3D_CTL_INT_MSK_CLR 0x00064 # define V3D_INT_QPU_MASK V3D_MASK(27, 16) # define V3D_INT_QPU_SHIFT 16 -# define V3D_INT_CSDDONE BIT(7) -# define V3D_INT_PCTR BIT(6) +#define V3D_INT_CSDDONE(ver) ((ver >= 71) ? BIT(6) : BIT(7)) +#define V3D_INT_PCTR(ver) ((ver >= 71) ? BIT(5) : BIT(6)) # define V3D_INT_GMPV BIT(5) # define V3D_INT_TRFB BIT(4) # define V3D_INT_SPILLUSE BIT(3) @@ -350,21 +355,25 @@ #define V3D_V4_PCTR_0_SRC_X(x) (V3D_V4_PCTR_0_SRC_0_3 + \ 4 * (x)) # define V3D_PCTR_S0_MASK V3D_MASK(6, 0) +# define V3D_V7_PCTR_S0_MASK V3D_MASK(7, 0) # define V3D_PCTR_S0_SHIFT 0 # define V3D_PCTR_S1_MASK V3D_MASK(14, 8) +# define V3D_V7_PCTR_S1_MASK V3D_MASK(15, 8) # define V3D_PCTR_S1_SHIFT 8 # define V3D_PCTR_S2_MASK V3D_MASK(22, 16) +# define V3D_V7_PCTR_S2_MASK V3D_MASK(23, 16) # define V3D_PCTR_S2_SHIFT 16 # define V3D_PCTR_S3_MASK V3D_MASK(30, 24) +# define V3D_V7_PCTR_S3_MASK V3D_MASK(31, 24) # define V3D_PCTR_S3_SHIFT 24 -# define V3D_PCTR_CYCLE_COUNT 32 +#define V3D_PCTR_CYCLE_COUNT(ver) ((ver >= 71) ? 0 : 32) /* Output values of the counters. */ #define V3D_PCTR_0_PCTR0 0x00680 #define V3D_PCTR_0_PCTR31 0x006fc #define V3D_PCTR_0_PCTRX(x) (V3D_PCTR_0_PCTR0 + \ 4 * (x)) -#define V3D_GMP_STATUS 0x00800 +#define V3D_GMP_STATUS(ver) ((ver >= 71) ? 0x00600 : 0x00800) # define V3D_GMP_STATUS_GMPRST BIT(31) # define V3D_GMP_STATUS_WR_COUNT_MASK V3D_MASK(30, 24) # define V3D_GMP_STATUS_WR_COUNT_SHIFT 24 @@ -377,13 +386,13 @@ # define V3D_GMP_STATUS_INVPROT BIT(1) # define V3D_GMP_STATUS_VIO BIT(0) -#define V3D_GMP_CFG 0x00804 +#define V3D_GMP_CFG(ver) ((ver >= 71) ? 0x00604 : 0x00804) # define V3D_GMP_CFG_LBURSTEN BIT(3) # define V3D_GMP_CFG_PGCRSEN BIT() # define V3D_GMP_CFG_STOP_REQ BIT(1) # define V3D_GMP_CFG_PROT_ENABLE BIT(0) -#define V3D_GMP_VIO_ADDR 0x00808 +#define V3D_GMP_VIO_ADDR(ver) ((ver >= 71) ? 0x00608 : 0x00808) #define V3D_GMP_VIO_TYPE 0x0080c #define V3D_GMP_TABLE_ADDR 0x00810 #define V3D_GMP_CLEAR_LOAD 0x00814 @@ -398,25 +407,25 @@ # define V3D_CSD_STATUS_HAVE_CURRENT_DISPATCH BIT(1) # define V3D_CSD_STATUS_HAVE_QUEUED_DISPATCH BIT(0) -#define V3D_CSD_QUEUED_CFG0 0x00904 +#define V3D_CSD_QUEUED_CFG0(ver) ((ver >= 71) ? 0x00930 : 0x00904) # define V3D_CSD_QUEUED_CFG0_NUM_WGS_X_MASK V3D_MASK(31, 16) # define V3D_CSD_QUEUED_CFG0_NUM_WGS_X_SHIFT 16 # define V3D_CSD_QUEUED_CFG0_WG_X_OFFSET_MASK V3D_MASK(15, 0) # define V3D_CSD_QUEUED_CFG0_WG_X_OFFSET_SHIFT 0 -#define V3D_CSD_QUEUED_CFG1 0x00908 +#define V3D_CSD_QUEUED_CFG1(ver) ((ver >= 71) ? 0x00934 : 0x00908) # define V3D_CSD_QUEUED_CFG1_NUM_WGS_Y_MASK V3D_MASK(31, 16) # define V3D_CSD_QUEUED_CFG1_NUM_WGS_Y_SHIFT 16 # define V3D_CSD_QUEUED_CFG1_WG_Y_OFFSET_MASK V3D_MASK(15, 0) # define V3D_CSD_QUEUED_CFG1_WG_Y_OFFSET_SHIFT 0 -#define V3D_CSD_QUEUED_CFG2 0x0090c +#define V3D_CSD_QUEUED_CFG2(ver) ((ver >= 71) ? 0x00938 : 0x0090c) # define V3D_CSD_QUEUED_CFG2_NUM_WGS_Z_MASK V3D_MASK(31, 16) # define V3D_CSD_QUEUED_CFG2_NUM_WGS_Z_SHIFT 16 # define V3D_CSD_QUEUED_CFG2_WG_Z_OFFSET_MASK V3D_MASK(15, 0) # define V3D_CSD_QUEUED_CFG2_WG_Z_OFFSET_SHIFT 0 -#define V3D_CSD_QUEUED_CFG3 0x00910 +#define V3D_CSD_QUEUED_CFG3(ver) ((ver >= 71) ? 0x0093c : 0x00910) # define V3D_CSD_QUEUED_CFG3_OVERLAP_WITH_PREV BIT(26) # define V3D_CSD_QUEUED_CFG3_MAX_SG_ID_MASK V3D_MASK(25, 20) # define V3D_CSD_QUEUED_CFG3_MAX_SG_ID_SHIFT 20 @@ -428,23 +437,28 @@ # define V3D_CSD_QUEUED_CFG3_WG_SIZE_SHIFT 0 /* Number of batches, minus 1 */ -#define V3D_CSD_QUEUED_CFG4 0x00914 +#define V3D_CSD_QUEUED_CFG4(ver) ((ver >= 71) ? 0x00940 : 0x00914) /* Shader address, pnan, singleseg, threading, like a shader record. */ -#define V3D_CSD_QUEUED_CFG5 0x00918 +#define V3D_CSD_QUEUED_CFG5(ver) ((ver >= 71) ? 0x00944 : 0x00918) /* Uniforms address (4 byte aligned) */ -#define V3D_CSD_QUEUED_CFG6 0x0091c - -#define V3D_CSD_CURRENT_CFG0 0x00920 -#define V3D_CSD_CURRENT_CFG1 0x00924 -#define V3D_CSD_CURRENT_CFG2 0x00928 -#define V3D_CSD_CURRENT_CFG3 0x0092c -#define V3D_CSD_CURRENT_CFG4 0x00930 -#define V3D_CSD_CURRENT_CFG5 0x00934 -#define V3D_CSD_CURRENT_CFG6 0x00938 - -#define V3D_CSD_CURRENT_ID0 0x0093c +#define V3D_CSD_QUEUED_CFG6(ver) ((ver >= 71) ? 0x00948 : 0x0091c) + +/* V3D 7.x+ only */ +#define V3D_V7_CSD_QUEUED_CFG7 0x0094c + +#define V3D_CSD_CURRENT_CFG0(ver) ((ver >= 71) ? 0x00958 : 0x00920) +#define V3D_CSD_CURRENT_CFG1(ver) ((ver >= 71) ? 0x0095c : 0x00924) +#define V3D_CSD_CURRENT_CFG2(ver) ((ver >= 71) ? 0x00960 : 0x00928) +#define V3D_CSD_CURRENT_CFG3(ver) ((ver >= 71) ? 0x00964 : 0x0092c) +#define V3D_CSD_CURRENT_CFG4(ver) ((ver >= 71) ? 0x00968 : 0x00930) +#define V3D_CSD_CURRENT_CFG5(ver) ((ver >= 71) ? 0x0096c : 0x00934) +#define V3D_CSD_CURRENT_CFG6(ver) ((ver >= 71) ? 0x00970 : 0x00938) +/* V3D 7.x+ only */ +#define V3D_V7_CSD_CURRENT_CFG7 0x00974 + +#define V3D_CSD_CURRENT_ID0(ver) ((ver >= 71) ? 0x00978 : 0x0093c) # define V3D_CSD_CURRENT_ID0_WG_X_MASK V3D_MASK(31, 16) # define V3D_CSD_CURRENT_ID0_WG_X_SHIFT 16 # define V3D_CSD_CURRENT_ID0_WG_IN_SG_MASK V3D_MASK(11, 8) @@ -452,7 +466,7 @@ # define V3D_CSD_CURRENT_ID0_L_IDX_MASK V3D_MASK(7, 0) # define V3D_CSD_CURRENT_ID0_L_IDX_SHIFT 0 -#define V3D_CSD_CURRENT_ID1 0x00940 +#define V3D_CSD_CURRENT_ID1(ver) ((ver >= 71) ? 0x0097c : 0x00940) # define V3D_CSD_CURRENT_ID0_WG_Z_MASK V3D_MASK(31, 16) # define V3D_CSD_CURRENT_ID0_WG_Z_SHIFT 16 # define V3D_CSD_CURRENT_ID0_WG_Y_MASK V3D_MASK(15, 0) diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 038e1ae589c7..54015ad765c7 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -18,12 +18,17 @@ * semaphores to interlock between them. */ +#include <linux/sched/clock.h> #include <linux/kthread.h> +#include <drm/drm_syncobj.h> + #include "v3d_drv.h" #include "v3d_regs.h" #include "v3d_trace.h" +#define V3D_CSD_CFG012_WG_COUNT_SHIFT 16 + static struct v3d_job * to_v3d_job(struct drm_sched_job *sched_job) { @@ -54,6 +59,12 @@ to_csd_job(struct drm_sched_job *sched_job) return container_of(sched_job, struct v3d_csd_job, base.base); } +static struct v3d_cpu_job * +to_cpu_job(struct drm_sched_job *sched_job) +{ + return container_of(sched_job, struct v3d_cpu_job, base.base); +} + static void v3d_sched_job_free(struct drm_sched_job *sched_job) { @@ -63,6 +74,28 @@ v3d_sched_job_free(struct drm_sched_job *sched_job) } static void +v3d_cpu_job_free(struct drm_sched_job *sched_job) +{ + struct v3d_cpu_job *job = to_cpu_job(sched_job); + struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query; + struct v3d_performance_query_info *performance_query = &job->performance_query; + + if (timestamp_query->queries) { + for (int i = 0; i < timestamp_query->count; i++) + drm_syncobj_put(timestamp_query->queries[i].syncobj); + kvfree(timestamp_query->queries); + } + + if (performance_query->queries) { + for (int i = 0; i < performance_query->count; i++) + drm_syncobj_put(performance_query->queries[i].syncobj); + kvfree(performance_query->queries); + } + + v3d_job_cleanup(&job->base); +} + +static void v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job) { if (job->perfmon != v3d->active_perfmon) @@ -76,6 +109,7 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job) { struct v3d_bin_job *job = to_bin_job(sched_job); struct v3d_dev *v3d = job->base.v3d; + struct v3d_file_priv *file = job->base.file->driver_priv; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; unsigned long irqflags; @@ -107,6 +141,9 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job) trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno, job->start, job->end); + file->start_ns[V3D_BIN] = local_clock(); + v3d->queue[V3D_BIN].start_ns = file->start_ns[V3D_BIN]; + v3d_switch_perfmon(v3d, &job->base); /* Set the current and end address of the control list. @@ -131,6 +168,7 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job) { struct v3d_render_job *job = to_render_job(sched_job); struct v3d_dev *v3d = job->base.v3d; + struct v3d_file_priv *file = job->base.file->driver_priv; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; @@ -158,6 +196,9 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job) trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno, job->start, job->end); + file->start_ns[V3D_RENDER] = local_clock(); + v3d->queue[V3D_RENDER].start_ns = file->start_ns[V3D_RENDER]; + v3d_switch_perfmon(v3d, &job->base); /* XXX: Set the QCFG */ @@ -176,6 +217,7 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job) { struct v3d_tfu_job *job = to_tfu_job(sched_job); struct v3d_dev *v3d = job->base.v3d; + struct v3d_file_priv *file = job->base.file->driver_priv; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; @@ -190,20 +232,25 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job) trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno); - V3D_WRITE(V3D_TFU_IIA, job->args.iia); - V3D_WRITE(V3D_TFU_IIS, job->args.iis); - V3D_WRITE(V3D_TFU_ICA, job->args.ica); - V3D_WRITE(V3D_TFU_IUA, job->args.iua); - V3D_WRITE(V3D_TFU_IOA, job->args.ioa); - V3D_WRITE(V3D_TFU_IOS, job->args.ios); - V3D_WRITE(V3D_TFU_COEF0, job->args.coef[0]); - if (job->args.coef[0] & V3D_TFU_COEF0_USECOEF) { - V3D_WRITE(V3D_TFU_COEF1, job->args.coef[1]); - V3D_WRITE(V3D_TFU_COEF2, job->args.coef[2]); - V3D_WRITE(V3D_TFU_COEF3, job->args.coef[3]); + file->start_ns[V3D_TFU] = local_clock(); + v3d->queue[V3D_TFU].start_ns = file->start_ns[V3D_TFU]; + + V3D_WRITE(V3D_TFU_IIA(v3d->ver), job->args.iia); + V3D_WRITE(V3D_TFU_IIS(v3d->ver), job->args.iis); + V3D_WRITE(V3D_TFU_ICA(v3d->ver), job->args.ica); + V3D_WRITE(V3D_TFU_IUA(v3d->ver), job->args.iua); + V3D_WRITE(V3D_TFU_IOA(v3d->ver), job->args.ioa); + if (v3d->ver >= 71) + V3D_WRITE(V3D_V7_TFU_IOC, job->args.v71.ioc); + V3D_WRITE(V3D_TFU_IOS(v3d->ver), job->args.ios); + V3D_WRITE(V3D_TFU_COEF0(v3d->ver), job->args.coef[0]); + if (v3d->ver >= 71 || (job->args.coef[0] & V3D_TFU_COEF0_USECOEF)) { + V3D_WRITE(V3D_TFU_COEF1(v3d->ver), job->args.coef[1]); + V3D_WRITE(V3D_TFU_COEF2(v3d->ver), job->args.coef[2]); + V3D_WRITE(V3D_TFU_COEF3(v3d->ver), job->args.coef[3]); } /* ICFG kicks off the job. */ - V3D_WRITE(V3D_TFU_ICFG, job->args.icfg | V3D_TFU_ICFG_IOC); + V3D_WRITE(V3D_TFU_ICFG(v3d->ver), job->args.icfg | V3D_TFU_ICFG_IOC); return fence; } @@ -213,9 +260,10 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) { struct v3d_csd_job *job = to_csd_job(sched_job); struct v3d_dev *v3d = job->base.v3d; + struct v3d_file_priv *file = job->base.file->driver_priv; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; - int i; + int i, csd_cfg0_reg, csd_cfg_reg_count; v3d->csd_job = job; @@ -231,24 +279,314 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno); + file->start_ns[V3D_CSD] = local_clock(); + v3d->queue[V3D_CSD].start_ns = file->start_ns[V3D_CSD]; + v3d_switch_perfmon(v3d, &job->base); - for (i = 1; i <= 6; i++) - V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0 + 4 * i, job->args.cfg[i]); + csd_cfg0_reg = V3D_CSD_QUEUED_CFG0(v3d->ver); + csd_cfg_reg_count = v3d->ver < 71 ? 6 : 7; + for (i = 1; i <= csd_cfg_reg_count; i++) + V3D_CORE_WRITE(0, csd_cfg0_reg + 4 * i, job->args.cfg[i]); /* CFG0 write kicks off the job. */ - V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0, job->args.cfg[0]); + V3D_CORE_WRITE(0, csd_cfg0_reg, job->args.cfg[0]); return fence; } +static void +v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job) +{ + struct v3d_indirect_csd_info *indirect_csd = &job->indirect_csd; + struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); + struct v3d_bo *indirect = to_v3d_bo(indirect_csd->indirect); + struct drm_v3d_submit_csd *args = &indirect_csd->job->args; + u32 *wg_counts; + + v3d_get_bo_vaddr(bo); + v3d_get_bo_vaddr(indirect); + + wg_counts = (uint32_t *)(bo->vaddr + indirect_csd->offset); + + if (wg_counts[0] == 0 || wg_counts[1] == 0 || wg_counts[2] == 0) + return; + + args->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT; + args->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT; + args->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT; + args->cfg[4] = DIV_ROUND_UP(indirect_csd->wg_size, 16) * + (wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1; + + for (int i = 0; i < 3; i++) { + /* 0xffffffff indicates that the uniform rewrite is not needed */ + if (indirect_csd->wg_uniform_offsets[i] != 0xffffffff) { + u32 uniform_idx = indirect_csd->wg_uniform_offsets[i]; + ((uint32_t *)indirect->vaddr)[uniform_idx] = wg_counts[i]; + } + } + + v3d_put_bo_vaddr(indirect); + v3d_put_bo_vaddr(bo); +} + +static void +v3d_timestamp_query(struct v3d_cpu_job *job) +{ + struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query; + struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); + u8 *value_addr; + + v3d_get_bo_vaddr(bo); + + for (int i = 0; i < timestamp_query->count; i++) { + value_addr = ((u8 *)bo->vaddr) + timestamp_query->queries[i].offset; + *((u64 *)value_addr) = i == 0 ? ktime_get_ns() : 0ull; + + drm_syncobj_replace_fence(timestamp_query->queries[i].syncobj, + job->base.done_fence); + } + + v3d_put_bo_vaddr(bo); +} + +static void +v3d_reset_timestamp_queries(struct v3d_cpu_job *job) +{ + struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query; + struct v3d_timestamp_query *queries = timestamp_query->queries; + struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); + u8 *value_addr; + + v3d_get_bo_vaddr(bo); + + for (int i = 0; i < timestamp_query->count; i++) { + value_addr = ((u8 *)bo->vaddr) + queries[i].offset; + *((u64 *)value_addr) = 0; + + drm_syncobj_replace_fence(queries[i].syncobj, NULL); + } + + v3d_put_bo_vaddr(bo); +} + +static void +write_to_buffer(void *dst, u32 idx, bool do_64bit, u64 value) +{ + if (do_64bit) { + u64 *dst64 = (u64 *)dst; + + dst64[idx] = value; + } else { + u32 *dst32 = (u32 *)dst; + + dst32[idx] = (u32)value; + } +} + +static void +v3d_copy_query_results(struct v3d_cpu_job *job) +{ + struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query; + struct v3d_timestamp_query *queries = timestamp_query->queries; + struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); + struct v3d_bo *timestamp = to_v3d_bo(job->base.bo[1]); + struct v3d_copy_query_results_info *copy = &job->copy; + struct dma_fence *fence; + u8 *query_addr; + bool available, write_result; + u8 *data; + int i; + + v3d_get_bo_vaddr(bo); + v3d_get_bo_vaddr(timestamp); + + data = ((u8 *)bo->vaddr) + copy->offset; + + for (i = 0; i < timestamp_query->count; i++) { + fence = drm_syncobj_fence_get(queries[i].syncobj); + available = fence ? dma_fence_is_signaled(fence) : false; + + write_result = available || copy->do_partial; + if (write_result) { + query_addr = ((u8 *)timestamp->vaddr) + queries[i].offset; + write_to_buffer(data, 0, copy->do_64bit, *((u64 *)query_addr)); + } + + if (copy->availability_bit) + write_to_buffer(data, 1, copy->do_64bit, available ? 1u : 0u); + + data += copy->stride; + + dma_fence_put(fence); + } + + v3d_put_bo_vaddr(timestamp); + v3d_put_bo_vaddr(bo); +} + +static void +v3d_reset_performance_queries(struct v3d_cpu_job *job) +{ + struct v3d_performance_query_info *performance_query = &job->performance_query; + struct v3d_file_priv *v3d_priv = job->base.file->driver_priv; + struct v3d_dev *v3d = job->base.v3d; + struct v3d_perfmon *perfmon; + + for (int i = 0; i < performance_query->count; i++) { + for (int j = 0; j < performance_query->nperfmons; j++) { + perfmon = v3d_perfmon_find(v3d_priv, + performance_query->queries[i].kperfmon_ids[j]); + if (!perfmon) { + DRM_DEBUG("Failed to find perfmon."); + continue; + } + + v3d_perfmon_stop(v3d, perfmon, false); + + memset(perfmon->values, 0, perfmon->ncounters * sizeof(u64)); + + v3d_perfmon_put(perfmon); + } + + drm_syncobj_replace_fence(performance_query->queries[i].syncobj, NULL); + } +} + +static void +v3d_write_performance_query_result(struct v3d_cpu_job *job, void *data, u32 query) +{ + struct v3d_performance_query_info *performance_query = &job->performance_query; + struct v3d_copy_query_results_info *copy = &job->copy; + struct v3d_file_priv *v3d_priv = job->base.file->driver_priv; + struct v3d_dev *v3d = job->base.v3d; + struct v3d_perfmon *perfmon; + u64 counter_values[V3D_PERFCNT_NUM]; + + for (int i = 0; i < performance_query->nperfmons; i++) { + perfmon = v3d_perfmon_find(v3d_priv, + performance_query->queries[query].kperfmon_ids[i]); + if (!perfmon) { + DRM_DEBUG("Failed to find perfmon."); + continue; + } + + v3d_perfmon_stop(v3d, perfmon, true); + + memcpy(&counter_values[i * DRM_V3D_MAX_PERF_COUNTERS], perfmon->values, + perfmon->ncounters * sizeof(u64)); + + v3d_perfmon_put(perfmon); + } + + for (int i = 0; i < performance_query->ncounters; i++) + write_to_buffer(data, i, copy->do_64bit, counter_values[i]); +} + +static void +v3d_copy_performance_query(struct v3d_cpu_job *job) +{ + struct v3d_performance_query_info *performance_query = &job->performance_query; + struct v3d_copy_query_results_info *copy = &job->copy; + struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); + struct dma_fence *fence; + bool available, write_result; + u8 *data; + + v3d_get_bo_vaddr(bo); + + data = ((u8 *)bo->vaddr) + copy->offset; + + for (int i = 0; i < performance_query->count; i++) { + fence = drm_syncobj_fence_get(performance_query->queries[i].syncobj); + available = fence ? dma_fence_is_signaled(fence) : false; + + write_result = available || copy->do_partial; + if (write_result) + v3d_write_performance_query_result(job, data, i); + + if (copy->availability_bit) + write_to_buffer(data, performance_query->ncounters, + copy->do_64bit, available ? 1u : 0u); + + data += copy->stride; + + dma_fence_put(fence); + } + + v3d_put_bo_vaddr(bo); +} + +static const v3d_cpu_job_fn cpu_job_function[] = { + [V3D_CPU_JOB_TYPE_INDIRECT_CSD] = v3d_rewrite_csd_job_wg_counts_from_indirect, + [V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = v3d_timestamp_query, + [V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = v3d_reset_timestamp_queries, + [V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = v3d_copy_query_results, + [V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = v3d_reset_performance_queries, + [V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = v3d_copy_performance_query, +}; + +static struct dma_fence * +v3d_cpu_job_run(struct drm_sched_job *sched_job) +{ + struct v3d_cpu_job *job = to_cpu_job(sched_job); + struct v3d_dev *v3d = job->base.v3d; + struct v3d_file_priv *file = job->base.file->driver_priv; + u64 runtime; + + v3d->cpu_job = job; + + if (job->job_type >= ARRAY_SIZE(cpu_job_function)) { + DRM_DEBUG_DRIVER("Unknown CPU job: %d\n", job->job_type); + return NULL; + } + + file->start_ns[V3D_CPU] = local_clock(); + v3d->queue[V3D_CPU].start_ns = file->start_ns[V3D_CPU]; + + trace_v3d_cpu_job_begin(&v3d->drm, job->job_type); + + cpu_job_function[job->job_type](job); + + trace_v3d_cpu_job_end(&v3d->drm, job->job_type); + + runtime = local_clock() - file->start_ns[V3D_CPU]; + + file->enabled_ns[V3D_CPU] += runtime; + v3d->queue[V3D_CPU].enabled_ns += runtime; + + file->jobs_sent[V3D_CPU]++; + v3d->queue[V3D_CPU].jobs_sent++; + + file->start_ns[V3D_CPU] = 0; + v3d->queue[V3D_CPU].start_ns = 0; + + return NULL; +} + static struct dma_fence * v3d_cache_clean_job_run(struct drm_sched_job *sched_job) { struct v3d_job *job = to_v3d_job(sched_job); struct v3d_dev *v3d = job->v3d; + struct v3d_file_priv *file = job->file->driver_priv; + u64 runtime; + + file->start_ns[V3D_CACHE_CLEAN] = local_clock(); + v3d->queue[V3D_CACHE_CLEAN].start_ns = file->start_ns[V3D_CACHE_CLEAN]; v3d_clean_caches(v3d); + runtime = local_clock() - file->start_ns[V3D_CACHE_CLEAN]; + + file->enabled_ns[V3D_CACHE_CLEAN] += runtime; + v3d->queue[V3D_CACHE_CLEAN].enabled_ns += runtime; + + file->jobs_sent[V3D_CACHE_CLEAN]++; + v3d->queue[V3D_CACHE_CLEAN].jobs_sent++; + + file->start_ns[V3D_CACHE_CLEAN] = 0; + v3d->queue[V3D_CACHE_CLEAN].start_ns = 0; + return NULL; } @@ -336,7 +674,7 @@ v3d_csd_job_timedout(struct drm_sched_job *sched_job) { struct v3d_csd_job *job = to_csd_job(sched_job); struct v3d_dev *v3d = job->base.v3d; - u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4); + u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4(v3d->ver)); /* If we've made progress, skip reset and let the timer get * rearmed. @@ -379,6 +717,12 @@ static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = { .free_job = v3d_sched_job_free }; +static const struct drm_sched_backend_ops v3d_cpu_sched_ops = { + .run_job = v3d_cpu_job_run, + .timedout_job = v3d_generic_job_timedout, + .free_job = v3d_cpu_job_free +}; + int v3d_sched_init(struct v3d_dev *v3d) { @@ -388,7 +732,7 @@ v3d_sched_init(struct v3d_dev *v3d) int ret; ret = drm_sched_init(&v3d->queue[V3D_BIN].sched, - &v3d_bin_sched_ops, + &v3d_bin_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), NULL, @@ -397,7 +741,7 @@ v3d_sched_init(struct v3d_dev *v3d) return ret; ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched, - &v3d_render_sched_ops, + &v3d_render_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), NULL, @@ -406,7 +750,7 @@ v3d_sched_init(struct v3d_dev *v3d) goto fail; ret = drm_sched_init(&v3d->queue[V3D_TFU].sched, - &v3d_tfu_sched_ops, + &v3d_tfu_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), NULL, @@ -416,7 +760,7 @@ v3d_sched_init(struct v3d_dev *v3d) if (v3d_has_csd(v3d)) { ret = drm_sched_init(&v3d->queue[V3D_CSD].sched, - &v3d_csd_sched_ops, + &v3d_csd_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), NULL, @@ -425,7 +769,7 @@ v3d_sched_init(struct v3d_dev *v3d) goto fail; ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched, - &v3d_cache_clean_sched_ops, + &v3d_cache_clean_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), NULL, @@ -434,6 +778,15 @@ v3d_sched_init(struct v3d_dev *v3d) goto fail; } + ret = drm_sched_init(&v3d->queue[V3D_CPU].sched, + &v3d_cpu_sched_ops, NULL, + DRM_SCHED_PRIORITY_COUNT, + 1, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), NULL, + NULL, "v3d_cpu", v3d->drm.dev); + if (ret) + goto fail; + return 0; fail: diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c new file mode 100644 index 000000000000..fcff41dd2315 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -0,0 +1,1320 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2014-2018 Broadcom + * Copyright (C) 2023 Raspberry Pi + */ + +#include <drm/drm_syncobj.h> + +#include "v3d_drv.h" +#include "v3d_regs.h" +#include "v3d_trace.h" + +/* Takes the reservation lock on all the BOs being referenced, so that + * at queue submit time we can update the reservations. + * + * We don't lock the RCL the tile alloc/state BOs, or overflow memory + * (all of which are on exec->unref_list). They're entirely private + * to v3d, so we don't attach dma-buf fences to them. + */ +static int +v3d_lock_bo_reservations(struct v3d_job *job, + struct ww_acquire_ctx *acquire_ctx) +{ + int i, ret; + + ret = drm_gem_lock_reservations(job->bo, job->bo_count, acquire_ctx); + if (ret) + return ret; + + for (i = 0; i < job->bo_count; i++) { + ret = dma_resv_reserve_fences(job->bo[i]->resv, 1); + if (ret) + goto fail; + + ret = drm_sched_job_add_implicit_dependencies(&job->base, + job->bo[i], true); + if (ret) + goto fail; + } + + return 0; + +fail: + drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); + return ret; +} + +/** + * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects + * referenced by the job. + * @dev: DRM device + * @file_priv: DRM file for this fd + * @job: V3D job being set up + * @bo_handles: GEM handles + * @bo_count: Number of GEM handles passed in + * + * The command validator needs to reference BOs by their index within + * the submitted job's BO list. This does the validation of the job's + * BO list and reference counting for the lifetime of the job. + * + * Note that this function doesn't need to unreference the BOs on + * failure, because that will happen at v3d_exec_cleanup() time. + */ +static int +v3d_lookup_bos(struct drm_device *dev, + struct drm_file *file_priv, + struct v3d_job *job, + u64 bo_handles, + u32 bo_count) +{ + job->bo_count = bo_count; + + if (!job->bo_count) { + /* See comment on bo_index for why we have to check + * this. + */ + DRM_DEBUG("Rendering requires BOs\n"); + return -EINVAL; + } + + return drm_gem_objects_lookup(file_priv, + (void __user *)(uintptr_t)bo_handles, + job->bo_count, &job->bo); +} + +static void +v3d_job_free(struct kref *ref) +{ + struct v3d_job *job = container_of(ref, struct v3d_job, refcount); + int i; + + if (job->bo) { + for (i = 0; i < job->bo_count; i++) + drm_gem_object_put(job->bo[i]); + kvfree(job->bo); + } + + dma_fence_put(job->irq_fence); + dma_fence_put(job->done_fence); + + if (job->perfmon) + v3d_perfmon_put(job->perfmon); + + kfree(job); +} + +static void +v3d_render_job_free(struct kref *ref) +{ + struct v3d_render_job *job = container_of(ref, struct v3d_render_job, + base.refcount); + struct v3d_bo *bo, *save; + + list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) { + drm_gem_object_put(&bo->base.base); + } + + v3d_job_free(ref); +} + +void v3d_job_cleanup(struct v3d_job *job) +{ + if (!job) + return; + + drm_sched_job_cleanup(&job->base); + v3d_job_put(job); +} + +void v3d_job_put(struct v3d_job *job) +{ + if (!job) + return; + + kref_put(&job->refcount, job->free); +} + +static int +v3d_job_allocate(void **container, size_t size) +{ + *container = kcalloc(1, size, GFP_KERNEL); + if (!*container) { + DRM_ERROR("Cannot allocate memory for V3D job.\n"); + return -ENOMEM; + } + + return 0; +} + +static int +v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, + struct v3d_job *job, void (*free)(struct kref *ref), + u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue) +{ + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; + bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); + int ret, i; + + job->v3d = v3d; + job->free = free; + job->file = file_priv; + + ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], + 1, v3d_priv); + if (ret) + return ret; + + if (has_multisync) { + if (se->in_sync_count && se->wait_stage == queue) { + struct drm_v3d_sem __user *handle = u64_to_user_ptr(se->in_syncs); + + for (i = 0; i < se->in_sync_count; i++) { + struct drm_v3d_sem in; + + if (copy_from_user(&in, handle++, sizeof(in))) { + ret = -EFAULT; + DRM_DEBUG("Failed to copy wait dep handle.\n"); + goto fail_deps; + } + ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in.handle, 0); + + // TODO: Investigate why this was filtered out for the IOCTL. + if (ret && ret != -ENOENT) + goto fail_deps; + } + } + } else { + ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in_sync, 0); + + // TODO: Investigate why this was filtered out for the IOCTL. + if (ret && ret != -ENOENT) + goto fail_deps; + } + + kref_init(&job->refcount); + + return 0; + +fail_deps: + drm_sched_job_cleanup(&job->base); + return ret; +} + +static void +v3d_push_job(struct v3d_job *job) +{ + drm_sched_job_arm(&job->base); + + job->done_fence = dma_fence_get(&job->base.s_fence->finished); + + /* put by scheduler job completion */ + kref_get(&job->refcount); + + drm_sched_entity_push_job(&job->base); +} + +static void +v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, + struct v3d_job *job, + struct ww_acquire_ctx *acquire_ctx, + u32 out_sync, + struct v3d_submit_ext *se, + struct dma_fence *done_fence) +{ + struct drm_syncobj *sync_out; + bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); + int i; + + for (i = 0; i < job->bo_count; i++) { + /* XXX: Use shared fences for read-only objects. */ + dma_resv_add_fence(job->bo[i]->resv, job->done_fence, + DMA_RESV_USAGE_WRITE); + } + + drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); + + /* Update the return sync object for the job */ + /* If it only supports a single signal semaphore*/ + if (!has_multisync) { + sync_out = drm_syncobj_find(file_priv, out_sync); + if (sync_out) { + drm_syncobj_replace_fence(sync_out, done_fence); + drm_syncobj_put(sync_out); + } + return; + } + + /* If multiple semaphores extension is supported */ + if (se->out_sync_count) { + for (i = 0; i < se->out_sync_count; i++) { + drm_syncobj_replace_fence(se->out_syncs[i].syncobj, + done_fence); + drm_syncobj_put(se->out_syncs[i].syncobj); + } + kvfree(se->out_syncs); + } +} + +static int +v3d_setup_csd_jobs_and_bos(struct drm_file *file_priv, + struct v3d_dev *v3d, + struct drm_v3d_submit_csd *args, + struct v3d_csd_job **job, + struct v3d_job **clean_job, + struct v3d_submit_ext *se, + struct ww_acquire_ctx *acquire_ctx) +{ + int ret; + + ret = v3d_job_allocate((void *)job, sizeof(**job)); + if (ret) + return ret; + + ret = v3d_job_init(v3d, file_priv, &(*job)->base, + v3d_job_free, args->in_sync, se, V3D_CSD); + if (ret) + return ret; + + ret = v3d_job_allocate((void *)clean_job, sizeof(**clean_job)); + if (ret) + return ret; + + ret = v3d_job_init(v3d, file_priv, *clean_job, + v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); + if (ret) + return ret; + + (*job)->args = *args; + + ret = v3d_lookup_bos(&v3d->drm, file_priv, *clean_job, + args->bo_handles, args->bo_handle_count); + if (ret) + return ret; + + return v3d_lock_bo_reservations(*clean_job, acquire_ctx); +} + +static void +v3d_put_multisync_post_deps(struct v3d_submit_ext *se) +{ + unsigned int i; + + if (!(se && se->out_sync_count)) + return; + + for (i = 0; i < se->out_sync_count; i++) + drm_syncobj_put(se->out_syncs[i].syncobj); + kvfree(se->out_syncs); +} + +static int +v3d_get_multisync_post_deps(struct drm_file *file_priv, + struct v3d_submit_ext *se, + u32 count, u64 handles) +{ + struct drm_v3d_sem __user *post_deps; + int i, ret; + + if (!count) + return 0; + + se->out_syncs = (struct v3d_submit_outsync *) + kvmalloc_array(count, + sizeof(struct v3d_submit_outsync), + GFP_KERNEL); + if (!se->out_syncs) + return -ENOMEM; + + post_deps = u64_to_user_ptr(handles); + + for (i = 0; i < count; i++) { + struct drm_v3d_sem out; + + if (copy_from_user(&out, post_deps++, sizeof(out))) { + ret = -EFAULT; + DRM_DEBUG("Failed to copy post dep handles\n"); + goto fail; + } + + se->out_syncs[i].syncobj = drm_syncobj_find(file_priv, + out.handle); + if (!se->out_syncs[i].syncobj) { + ret = -EINVAL; + goto fail; + } + } + se->out_sync_count = count; + + return 0; + +fail: + for (i--; i >= 0; i--) + drm_syncobj_put(se->out_syncs[i].syncobj); + kvfree(se->out_syncs); + + return ret; +} + +/* Get data for multiple binary semaphores synchronization. Parse syncobj + * to be signaled when job completes (out_sync). + */ +static int +v3d_get_multisync_submit_deps(struct drm_file *file_priv, + struct drm_v3d_extension __user *ext, + struct v3d_submit_ext *se) +{ + struct drm_v3d_multi_sync multisync; + int ret; + + if (se->in_sync_count || se->out_sync_count) { + DRM_DEBUG("Two multisync extensions were added to the same job."); + return -EINVAL; + } + + if (copy_from_user(&multisync, ext, sizeof(multisync))) + return -EFAULT; + + if (multisync.pad) + return -EINVAL; + + ret = v3d_get_multisync_post_deps(file_priv, se, multisync.out_sync_count, + multisync.out_syncs); + if (ret) + return ret; + + se->in_sync_count = multisync.in_sync_count; + se->in_syncs = multisync.in_syncs; + se->flags |= DRM_V3D_EXT_ID_MULTI_SYNC; + se->wait_stage = multisync.wait_stage; + + return 0; +} + +/* Get data for the indirect CSD job submission. */ +static int +v3d_get_cpu_indirect_csd_params(struct drm_file *file_priv, + struct drm_v3d_extension __user *ext, + struct v3d_cpu_job *job) +{ + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; + struct v3d_dev *v3d = v3d_priv->v3d; + struct drm_v3d_indirect_csd indirect_csd; + struct v3d_indirect_csd_info *info = &job->indirect_csd; + + if (!job) { + DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); + return -EINVAL; + } + + if (job->job_type) { + DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); + return -EINVAL; + } + + if (copy_from_user(&indirect_csd, ext, sizeof(indirect_csd))) + return -EFAULT; + + if (!v3d_has_csd(v3d)) { + DRM_DEBUG("Attempting CSD submit on non-CSD hardware.\n"); + return -EINVAL; + } + + job->job_type = V3D_CPU_JOB_TYPE_INDIRECT_CSD; + info->offset = indirect_csd.offset; + info->wg_size = indirect_csd.wg_size; + memcpy(&info->wg_uniform_offsets, &indirect_csd.wg_uniform_offsets, + sizeof(indirect_csd.wg_uniform_offsets)); + + info->indirect = drm_gem_object_lookup(file_priv, indirect_csd.indirect); + + return v3d_setup_csd_jobs_and_bos(file_priv, v3d, &indirect_csd.submit, + &info->job, &info->clean_job, + NULL, &info->acquire_ctx); +} + +/* Get data for the query timestamp job submission. */ +static int +v3d_get_cpu_timestamp_query_params(struct drm_file *file_priv, + struct drm_v3d_extension __user *ext, + struct v3d_cpu_job *job) +{ + u32 __user *offsets, *syncs; + struct drm_v3d_timestamp_query timestamp; + + if (!job) { + DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); + return -EINVAL; + } + + if (job->job_type) { + DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); + return -EINVAL; + } + + if (copy_from_user(×tamp, ext, sizeof(timestamp))) + return -EFAULT; + + if (timestamp.pad) + return -EINVAL; + + job->job_type = V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY; + + job->timestamp_query.queries = kvmalloc_array(timestamp.count, + sizeof(struct v3d_timestamp_query), + GFP_KERNEL); + if (!job->timestamp_query.queries) + return -ENOMEM; + + offsets = u64_to_user_ptr(timestamp.offsets); + syncs = u64_to_user_ptr(timestamp.syncs); + + for (int i = 0; i < timestamp.count; i++) { + u32 offset, sync; + + if (copy_from_user(&offset, offsets++, sizeof(offset))) { + kvfree(job->timestamp_query.queries); + return -EFAULT; + } + + job->timestamp_query.queries[i].offset = offset; + + if (copy_from_user(&sync, syncs++, sizeof(sync))) { + kvfree(job->timestamp_query.queries); + return -EFAULT; + } + + job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + } + job->timestamp_query.count = timestamp.count; + + return 0; +} + +static int +v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv, + struct drm_v3d_extension __user *ext, + struct v3d_cpu_job *job) +{ + u32 __user *syncs; + struct drm_v3d_reset_timestamp_query reset; + + if (!job) { + DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); + return -EINVAL; + } + + if (job->job_type) { + DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); + return -EINVAL; + } + + if (copy_from_user(&reset, ext, sizeof(reset))) + return -EFAULT; + + job->job_type = V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY; + + job->timestamp_query.queries = kvmalloc_array(reset.count, + sizeof(struct v3d_timestamp_query), + GFP_KERNEL); + if (!job->timestamp_query.queries) + return -ENOMEM; + + syncs = u64_to_user_ptr(reset.syncs); + + for (int i = 0; i < reset.count; i++) { + u32 sync; + + job->timestamp_query.queries[i].offset = reset.offset + 8 * i; + + if (copy_from_user(&sync, syncs++, sizeof(sync))) { + kvfree(job->timestamp_query.queries); + return -EFAULT; + } + + job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + } + job->timestamp_query.count = reset.count; + + return 0; +} + +/* Get data for the copy timestamp query results job submission. */ +static int +v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv, + struct drm_v3d_extension __user *ext, + struct v3d_cpu_job *job) +{ + u32 __user *offsets, *syncs; + struct drm_v3d_copy_timestamp_query copy; + int i; + + if (!job) { + DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); + return -EINVAL; + } + + if (job->job_type) { + DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); + return -EINVAL; + } + + if (copy_from_user(©, ext, sizeof(copy))) + return -EFAULT; + + if (copy.pad) + return -EINVAL; + + job->job_type = V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY; + + job->timestamp_query.queries = kvmalloc_array(copy.count, + sizeof(struct v3d_timestamp_query), + GFP_KERNEL); + if (!job->timestamp_query.queries) + return -ENOMEM; + + offsets = u64_to_user_ptr(copy.offsets); + syncs = u64_to_user_ptr(copy.syncs); + + for (i = 0; i < copy.count; i++) { + u32 offset, sync; + + if (copy_from_user(&offset, offsets++, sizeof(offset))) { + kvfree(job->timestamp_query.queries); + return -EFAULT; + } + + job->timestamp_query.queries[i].offset = offset; + + if (copy_from_user(&sync, syncs++, sizeof(sync))) { + kvfree(job->timestamp_query.queries); + return -EFAULT; + } + + job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + } + job->timestamp_query.count = copy.count; + + job->copy.do_64bit = copy.do_64bit; + job->copy.do_partial = copy.do_partial; + job->copy.availability_bit = copy.availability_bit; + job->copy.offset = copy.offset; + job->copy.stride = copy.stride; + + return 0; +} + +static int +v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, + struct drm_v3d_extension __user *ext, + struct v3d_cpu_job *job) +{ + u32 __user *syncs; + u64 __user *kperfmon_ids; + struct drm_v3d_reset_performance_query reset; + + if (!job) { + DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); + return -EINVAL; + } + + if (job->job_type) { + DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); + return -EINVAL; + } + + if (copy_from_user(&reset, ext, sizeof(reset))) + return -EFAULT; + + job->job_type = V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY; + + job->performance_query.queries = kvmalloc_array(reset.count, + sizeof(struct v3d_performance_query), + GFP_KERNEL); + if (!job->performance_query.queries) + return -ENOMEM; + + syncs = u64_to_user_ptr(reset.syncs); + kperfmon_ids = u64_to_user_ptr(reset.kperfmon_ids); + + for (int i = 0; i < reset.count; i++) { + u32 sync; + u64 ids; + u32 __user *ids_pointer; + u32 id; + + if (copy_from_user(&sync, syncs++, sizeof(sync))) { + kvfree(job->performance_query.queries); + return -EFAULT; + } + + job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + + if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) { + kvfree(job->performance_query.queries); + return -EFAULT; + } + + ids_pointer = u64_to_user_ptr(ids); + + for (int j = 0; j < reset.nperfmons; j++) { + if (copy_from_user(&id, ids_pointer++, sizeof(id))) { + kvfree(job->performance_query.queries); + return -EFAULT; + } + + job->performance_query.queries[i].kperfmon_ids[j] = id; + } + } + job->performance_query.count = reset.count; + job->performance_query.nperfmons = reset.nperfmons; + + return 0; +} + +static int +v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, + struct drm_v3d_extension __user *ext, + struct v3d_cpu_job *job) +{ + u32 __user *syncs; + u64 __user *kperfmon_ids; + struct drm_v3d_copy_performance_query copy; + + if (!job) { + DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); + return -EINVAL; + } + + if (job->job_type) { + DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); + return -EINVAL; + } + + if (copy_from_user(©, ext, sizeof(copy))) + return -EFAULT; + + if (copy.pad) + return -EINVAL; + + job->job_type = V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY; + + job->performance_query.queries = kvmalloc_array(copy.count, + sizeof(struct v3d_performance_query), + GFP_KERNEL); + if (!job->performance_query.queries) + return -ENOMEM; + + syncs = u64_to_user_ptr(copy.syncs); + kperfmon_ids = u64_to_user_ptr(copy.kperfmon_ids); + + for (int i = 0; i < copy.count; i++) { + u32 sync; + u64 ids; + u32 __user *ids_pointer; + u32 id; + + if (copy_from_user(&sync, syncs++, sizeof(sync))) { + kvfree(job->performance_query.queries); + return -EFAULT; + } + + job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + + if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) { + kvfree(job->performance_query.queries); + return -EFAULT; + } + + ids_pointer = u64_to_user_ptr(ids); + + for (int j = 0; j < copy.nperfmons; j++) { + if (copy_from_user(&id, ids_pointer++, sizeof(id))) { + kvfree(job->performance_query.queries); + return -EFAULT; + } + + job->performance_query.queries[i].kperfmon_ids[j] = id; + } + } + job->performance_query.count = copy.count; + job->performance_query.nperfmons = copy.nperfmons; + job->performance_query.ncounters = copy.ncounters; + + job->copy.do_64bit = copy.do_64bit; + job->copy.do_partial = copy.do_partial; + job->copy.availability_bit = copy.availability_bit; + job->copy.offset = copy.offset; + job->copy.stride = copy.stride; + + return 0; +} + +/* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data + * according to the extension id (name). + */ +static int +v3d_get_extensions(struct drm_file *file_priv, + u64 ext_handles, + struct v3d_submit_ext *se, + struct v3d_cpu_job *job) +{ + struct drm_v3d_extension __user *user_ext; + int ret; + + user_ext = u64_to_user_ptr(ext_handles); + while (user_ext) { + struct drm_v3d_extension ext; + + if (copy_from_user(&ext, user_ext, sizeof(ext))) { + DRM_DEBUG("Failed to copy submit extension\n"); + return -EFAULT; + } + + switch (ext.id) { + case DRM_V3D_EXT_ID_MULTI_SYNC: + ret = v3d_get_multisync_submit_deps(file_priv, user_ext, se); + break; + case DRM_V3D_EXT_ID_CPU_INDIRECT_CSD: + ret = v3d_get_cpu_indirect_csd_params(file_priv, user_ext, job); + break; + case DRM_V3D_EXT_ID_CPU_TIMESTAMP_QUERY: + ret = v3d_get_cpu_timestamp_query_params(file_priv, user_ext, job); + break; + case DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY: + ret = v3d_get_cpu_reset_timestamp_params(file_priv, user_ext, job); + break; + case DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY: + ret = v3d_get_cpu_copy_query_results_params(file_priv, user_ext, job); + break; + case DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY: + ret = v3d_get_cpu_reset_performance_params(file_priv, user_ext, job); + break; + case DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY: + ret = v3d_get_cpu_copy_performance_query_params(file_priv, user_ext, job); + break; + default: + DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id); + return -EINVAL; + } + + if (ret) + return ret; + + user_ext = u64_to_user_ptr(ext.next); + } + + return 0; +} + +/** + * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D. + * @dev: DRM device + * @data: ioctl argument + * @file_priv: DRM file for this fd + * + * This is the main entrypoint for userspace to submit a 3D frame to + * the GPU. Userspace provides the binner command list (if + * applicable), and the kernel sets up the render command list to draw + * to the framebuffer described in the ioctl, using the command lists + * that the 3D engine's binner will produce. + */ +int +v3d_submit_cl_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; + struct drm_v3d_submit_cl *args = data; + struct v3d_submit_ext se = {0}; + struct v3d_bin_job *bin = NULL; + struct v3d_render_job *render = NULL; + struct v3d_job *clean_job = NULL; + struct v3d_job *last_job; + struct ww_acquire_ctx acquire_ctx; + int ret = 0; + + trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end); + + if (args->pad) + return -EINVAL; + + if (args->flags && + args->flags & ~(DRM_V3D_SUBMIT_CL_FLUSH_CACHE | + DRM_V3D_SUBMIT_EXTENSION)) { + DRM_INFO("invalid flags: %d\n", args->flags); + return -EINVAL; + } + + if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { + ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); + if (ret) { + DRM_DEBUG("Failed to get extensions.\n"); + return ret; + } + } + + ret = v3d_job_allocate((void *)&render, sizeof(*render)); + if (ret) + return ret; + + ret = v3d_job_init(v3d, file_priv, &render->base, + v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER); + if (ret) + goto fail; + + render->start = args->rcl_start; + render->end = args->rcl_end; + INIT_LIST_HEAD(&render->unref_list); + + if (args->bcl_start != args->bcl_end) { + ret = v3d_job_allocate((void *)&bin, sizeof(*bin)); + if (ret) + goto fail; + + ret = v3d_job_init(v3d, file_priv, &bin->base, + v3d_job_free, args->in_sync_bcl, &se, V3D_BIN); + if (ret) + goto fail; + + bin->start = args->bcl_start; + bin->end = args->bcl_end; + bin->qma = args->qma; + bin->qms = args->qms; + bin->qts = args->qts; + bin->render = render; + } + + if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) { + ret = v3d_job_allocate((void *)&clean_job, sizeof(*clean_job)); + if (ret) + goto fail; + + ret = v3d_job_init(v3d, file_priv, clean_job, + v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); + if (ret) + goto fail; + + last_job = clean_job; + } else { + last_job = &render->base; + } + + ret = v3d_lookup_bos(dev, file_priv, last_job, + args->bo_handles, args->bo_handle_count); + if (ret) + goto fail; + + ret = v3d_lock_bo_reservations(last_job, &acquire_ctx); + if (ret) + goto fail; + + if (args->perfmon_id) { + render->base.perfmon = v3d_perfmon_find(v3d_priv, + args->perfmon_id); + + if (!render->base.perfmon) { + ret = -ENOENT; + goto fail_perfmon; + } + } + + mutex_lock(&v3d->sched_lock); + if (bin) { + bin->base.perfmon = render->base.perfmon; + v3d_perfmon_get(bin->base.perfmon); + v3d_push_job(&bin->base); + + ret = drm_sched_job_add_dependency(&render->base.base, + dma_fence_get(bin->base.done_fence)); + if (ret) + goto fail_unreserve; + } + + v3d_push_job(&render->base); + + if (clean_job) { + struct dma_fence *render_fence = + dma_fence_get(render->base.done_fence); + ret = drm_sched_job_add_dependency(&clean_job->base, + render_fence); + if (ret) + goto fail_unreserve; + clean_job->perfmon = render->base.perfmon; + v3d_perfmon_get(clean_job->perfmon); + v3d_push_job(clean_job); + } + + mutex_unlock(&v3d->sched_lock); + + v3d_attach_fences_and_unlock_reservation(file_priv, + last_job, + &acquire_ctx, + args->out_sync, + &se, + last_job->done_fence); + + v3d_job_put(&bin->base); + v3d_job_put(&render->base); + v3d_job_put(clean_job); + + return 0; + +fail_unreserve: + mutex_unlock(&v3d->sched_lock); +fail_perfmon: + drm_gem_unlock_reservations(last_job->bo, + last_job->bo_count, &acquire_ctx); +fail: + v3d_job_cleanup((void *)bin); + v3d_job_cleanup((void *)render); + v3d_job_cleanup(clean_job); + v3d_put_multisync_post_deps(&se); + + return ret; +} + +/** + * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D. + * @dev: DRM device + * @data: ioctl argument + * @file_priv: DRM file for this fd + * + * Userspace provides the register setup for the TFU, which we don't + * need to validate since the TFU is behind the MMU. + */ +int +v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct drm_v3d_submit_tfu *args = data; + struct v3d_submit_ext se = {0}; + struct v3d_tfu_job *job = NULL; + struct ww_acquire_ctx acquire_ctx; + int ret = 0; + + trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia); + + if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { + DRM_DEBUG("invalid flags: %d\n", args->flags); + return -EINVAL; + } + + if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { + ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); + if (ret) { + DRM_DEBUG("Failed to get extensions.\n"); + return ret; + } + } + + ret = v3d_job_allocate((void *)&job, sizeof(*job)); + if (ret) + return ret; + + ret = v3d_job_init(v3d, file_priv, &job->base, + v3d_job_free, args->in_sync, &se, V3D_TFU); + if (ret) + goto fail; + + job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles), + sizeof(*job->base.bo), GFP_KERNEL); + if (!job->base.bo) { + ret = -ENOMEM; + goto fail; + } + + job->args = *args; + + for (job->base.bo_count = 0; + job->base.bo_count < ARRAY_SIZE(args->bo_handles); + job->base.bo_count++) { + struct drm_gem_object *bo; + + if (!args->bo_handles[job->base.bo_count]) + break; + + bo = drm_gem_object_lookup(file_priv, args->bo_handles[job->base.bo_count]); + if (!bo) { + DRM_DEBUG("Failed to look up GEM BO %d: %d\n", + job->base.bo_count, + args->bo_handles[job->base.bo_count]); + ret = -ENOENT; + goto fail; + } + job->base.bo[job->base.bo_count] = bo; + } + + ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx); + if (ret) + goto fail; + + mutex_lock(&v3d->sched_lock); + v3d_push_job(&job->base); + mutex_unlock(&v3d->sched_lock); + + v3d_attach_fences_and_unlock_reservation(file_priv, + &job->base, &acquire_ctx, + args->out_sync, + &se, + job->base.done_fence); + + v3d_job_put(&job->base); + + return 0; + +fail: + v3d_job_cleanup((void *)job); + v3d_put_multisync_post_deps(&se); + + return ret; +} + +/** + * v3d_submit_csd_ioctl() - Submits a CSD (compute shader) job to the V3D. + * @dev: DRM device + * @data: ioctl argument + * @file_priv: DRM file for this fd + * + * Userspace provides the register setup for the CSD, which we don't + * need to validate since the CSD is behind the MMU. + */ +int +v3d_submit_csd_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; + struct drm_v3d_submit_csd *args = data; + struct v3d_submit_ext se = {0}; + struct v3d_csd_job *job = NULL; + struct v3d_job *clean_job = NULL; + struct ww_acquire_ctx acquire_ctx; + int ret; + + trace_v3d_submit_csd_ioctl(&v3d->drm, args->cfg[5], args->cfg[6]); + + if (args->pad) + return -EINVAL; + + if (!v3d_has_csd(v3d)) { + DRM_DEBUG("Attempting CSD submit on non-CSD hardware\n"); + return -EINVAL; + } + + if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { + DRM_INFO("invalid flags: %d\n", args->flags); + return -EINVAL; + } + + if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { + ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); + if (ret) { + DRM_DEBUG("Failed to get extensions.\n"); + return ret; + } + } + + ret = v3d_setup_csd_jobs_and_bos(file_priv, v3d, args, + &job, &clean_job, &se, + &acquire_ctx); + if (ret) + goto fail; + + if (args->perfmon_id) { + job->base.perfmon = v3d_perfmon_find(v3d_priv, + args->perfmon_id); + if (!job->base.perfmon) { + ret = -ENOENT; + goto fail_perfmon; + } + } + + mutex_lock(&v3d->sched_lock); + v3d_push_job(&job->base); + + ret = drm_sched_job_add_dependency(&clean_job->base, + dma_fence_get(job->base.done_fence)); + if (ret) + goto fail_unreserve; + + v3d_push_job(clean_job); + mutex_unlock(&v3d->sched_lock); + + v3d_attach_fences_and_unlock_reservation(file_priv, + clean_job, + &acquire_ctx, + args->out_sync, + &se, + clean_job->done_fence); + + v3d_job_put(&job->base); + v3d_job_put(clean_job); + + return 0; + +fail_unreserve: + mutex_unlock(&v3d->sched_lock); +fail_perfmon: + drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count, + &acquire_ctx); +fail: + v3d_job_cleanup((void *)job); + v3d_job_cleanup(clean_job); + v3d_put_multisync_post_deps(&se); + + return ret; +} + +static const unsigned int cpu_job_bo_handle_count[] = { + [V3D_CPU_JOB_TYPE_INDIRECT_CSD] = 1, + [V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = 1, + [V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = 1, + [V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = 2, + [V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = 0, + [V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = 1, +}; + +/** + * v3d_submit_cpu_ioctl() - Submits a CPU job to the V3D. + * @dev: DRM device + * @data: ioctl argument + * @file_priv: DRM file for this fd + * + * Userspace specifies the CPU job type and data required to perform its + * operations through the drm_v3d_extension struct. + */ +int +v3d_submit_cpu_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct drm_v3d_submit_cpu *args = data; + struct v3d_submit_ext se = {0}; + struct v3d_submit_ext *out_se = NULL; + struct v3d_cpu_job *cpu_job = NULL; + struct v3d_csd_job *csd_job = NULL; + struct v3d_job *clean_job = NULL; + struct ww_acquire_ctx acquire_ctx; + int ret; + + if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { + DRM_INFO("Invalid flags: %d\n", args->flags); + return -EINVAL; + } + + ret = v3d_job_allocate((void *)&cpu_job, sizeof(*cpu_job)); + if (ret) + return ret; + + if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { + ret = v3d_get_extensions(file_priv, args->extensions, &se, cpu_job); + if (ret) { + DRM_DEBUG("Failed to get extensions.\n"); + goto fail; + } + } + + /* Every CPU job must have a CPU job user extension */ + if (!cpu_job->job_type) { + DRM_DEBUG("CPU job must have a CPU job user extension.\n"); + ret = -EINVAL; + goto fail; + } + + if (args->bo_handle_count != cpu_job_bo_handle_count[cpu_job->job_type]) { + DRM_DEBUG("This CPU job was not submitted with the proper number of BOs.\n"); + ret = -EINVAL; + goto fail; + } + + trace_v3d_submit_cpu_ioctl(&v3d->drm, cpu_job->job_type); + + ret = v3d_job_init(v3d, file_priv, &cpu_job->base, + v3d_job_free, 0, &se, V3D_CPU); + if (ret) + goto fail; + + clean_job = cpu_job->indirect_csd.clean_job; + csd_job = cpu_job->indirect_csd.job; + + if (args->bo_handle_count) { + ret = v3d_lookup_bos(dev, file_priv, &cpu_job->base, + args->bo_handles, args->bo_handle_count); + if (ret) + goto fail; + + ret = v3d_lock_bo_reservations(&cpu_job->base, &acquire_ctx); + if (ret) + goto fail; + } + + mutex_lock(&v3d->sched_lock); + v3d_push_job(&cpu_job->base); + + switch (cpu_job->job_type) { + case V3D_CPU_JOB_TYPE_INDIRECT_CSD: + ret = drm_sched_job_add_dependency(&csd_job->base.base, + dma_fence_get(cpu_job->base.done_fence)); + if (ret) + goto fail_unreserve; + + v3d_push_job(&csd_job->base); + + ret = drm_sched_job_add_dependency(&clean_job->base, + dma_fence_get(csd_job->base.done_fence)); + if (ret) + goto fail_unreserve; + + v3d_push_job(clean_job); + + break; + default: + break; + } + mutex_unlock(&v3d->sched_lock); + + out_se = (cpu_job->job_type == V3D_CPU_JOB_TYPE_INDIRECT_CSD) ? NULL : &se; + + v3d_attach_fences_and_unlock_reservation(file_priv, + &cpu_job->base, + &acquire_ctx, 0, + out_se, cpu_job->base.done_fence); + + switch (cpu_job->job_type) { + case V3D_CPU_JOB_TYPE_INDIRECT_CSD: + v3d_attach_fences_and_unlock_reservation(file_priv, + clean_job, + &cpu_job->indirect_csd.acquire_ctx, + 0, &se, clean_job->done_fence); + break; + default: + break; + } + + v3d_job_put(&cpu_job->base); + v3d_job_put(&csd_job->base); + v3d_job_put(clean_job); + + return 0; + +fail_unreserve: + mutex_unlock(&v3d->sched_lock); + + drm_gem_unlock_reservations(cpu_job->base.bo, cpu_job->base.bo_count, + &acquire_ctx); + + drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count, + &cpu_job->indirect_csd.acquire_ctx); + +fail: + v3d_job_cleanup((void *)cpu_job); + v3d_job_cleanup((void *)csd_job); + v3d_job_cleanup(clean_job); + v3d_put_multisync_post_deps(&se); + kvfree(cpu_job->timestamp_query.queries); + kvfree(cpu_job->performance_query.queries); + + return ret; +} diff --git a/drivers/gpu/drm/v3d/v3d_sysfs.c b/drivers/gpu/drm/v3d/v3d_sysfs.c new file mode 100644 index 000000000000..d106845ba890 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_sysfs.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Igalia S.L. + */ + +#include <linux/sched/clock.h> +#include <linux/sysfs.h> + +#include "v3d_drv.h" + +static ssize_t +gpu_stats_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct v3d_dev *v3d = to_v3d_dev(drm); + enum v3d_queue queue; + u64 timestamp = local_clock(); + u64 active_runtime; + ssize_t len = 0; + + len += sysfs_emit(buf, "queue\ttimestamp\tjobs\truntime\n"); + + for (queue = 0; queue < V3D_MAX_QUEUES; queue++) { + if (v3d->queue[queue].start_ns) + active_runtime = timestamp - v3d->queue[queue].start_ns; + else + active_runtime = 0; + + /* Each line will display the queue name, timestamp, the number + * of jobs sent to that queue and the runtime, as can be seem here: + * + * queue timestamp jobs runtime + * bin 239043069420 22620 17438164056 + * render 239043069420 22619 27284814161 + * tfu 239043069420 8763 394592566 + * csd 239043069420 3168 10787905530 + * cache_clean 239043069420 6127 237375940 + */ + len += sysfs_emit_at(buf, len, "%s\t%llu\t%llu\t%llu\n", + v3d_queue_to_string(queue), + timestamp, + v3d->queue[queue].jobs_sent, + v3d->queue[queue].enabled_ns + active_runtime); + } + + return len; +} +static DEVICE_ATTR_RO(gpu_stats); + +static struct attribute *v3d_sysfs_entries[] = { + &dev_attr_gpu_stats.attr, + NULL, +}; + +static struct attribute_group v3d_sysfs_attr_group = { + .attrs = v3d_sysfs_entries, +}; + +int +v3d_sysfs_init(struct device *dev) +{ + return sysfs_create_group(&dev->kobj, &v3d_sysfs_attr_group); +} + +void +v3d_sysfs_destroy(struct device *dev) +{ + return sysfs_remove_group(&dev->kobj, &v3d_sysfs_attr_group); +} diff --git a/drivers/gpu/drm/v3d/v3d_trace.h b/drivers/gpu/drm/v3d/v3d_trace.h index 7aa8dc356e54..5917b94148f5 100644 --- a/drivers/gpu/drm/v3d/v3d_trace.h +++ b/drivers/gpu/drm/v3d/v3d_trace.h @@ -225,6 +225,63 @@ TRACE_EVENT(v3d_submit_csd, __entry->seqno) ); +TRACE_EVENT(v3d_submit_cpu_ioctl, + TP_PROTO(struct drm_device *dev, enum v3d_cpu_job_type job_type), + TP_ARGS(dev, job_type), + + TP_STRUCT__entry( + __field(u32, dev) + __field(enum v3d_cpu_job_type, job_type) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + __entry->job_type = job_type; + ), + + TP_printk("dev=%u, job_type=%d", + __entry->dev, + __entry->job_type) +); + +TRACE_EVENT(v3d_cpu_job_begin, + TP_PROTO(struct drm_device *dev, enum v3d_cpu_job_type job_type), + TP_ARGS(dev, job_type), + + TP_STRUCT__entry( + __field(u32, dev) + __field(enum v3d_cpu_job_type, job_type) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + __entry->job_type = job_type; + ), + + TP_printk("dev=%u, job_type=%d", + __entry->dev, + __entry->job_type) +); + +TRACE_EVENT(v3d_cpu_job_end, + TP_PROTO(struct drm_device *dev, enum v3d_cpu_job_type job_type), + TP_ARGS(dev, job_type), + + TP_STRUCT__entry( + __field(u32, dev) + __field(enum v3d_cpu_job_type, job_type) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + __entry->job_type = job_type; + ), + + TP_printk("dev=%u, job_type=%d", + __entry->dev, + __entry->job_type) +); + TRACE_EVENT(v3d_cache_clean_begin, TP_PROTO(struct drm_device *dev), TP_ARGS(dev), diff --git a/drivers/gpu/drm/vboxvideo/vbox_drv.c b/drivers/gpu/drm/vboxvideo/vbox_drv.c index 047b95812334..cd9e66a06596 100644 --- a/drivers/gpu/drm/vboxvideo/vbox_drv.c +++ b/drivers/gpu/drm/vboxvideo/vbox_drv.c @@ -182,7 +182,7 @@ DEFINE_DRM_GEM_FOPS(vbox_fops); static const struct drm_driver driver = { .driver_features = - DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC, + DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC | DRIVER_CURSOR_HOTSPOT, .fops = &vbox_fops, .name = DRIVER_NAME, diff --git a/drivers/gpu/drm/vboxvideo/vbox_mode.c b/drivers/gpu/drm/vboxvideo/vbox_mode.c index 341edd982cb3..9ff3bade9795 100644 --- a/drivers/gpu/drm/vboxvideo/vbox_mode.c +++ b/drivers/gpu/drm/vboxvideo/vbox_mode.c @@ -429,8 +429,8 @@ static void vbox_cursor_atomic_update(struct drm_plane *plane, flags = VBOX_MOUSE_POINTER_VISIBLE | VBOX_MOUSE_POINTER_SHAPE | VBOX_MOUSE_POINTER_ALPHA; hgsmi_update_pointer_shape(vbox->guest_pool, flags, - min_t(u32, max(fb->hot_x, 0), width), - min_t(u32, max(fb->hot_y, 0), height), + min_t(u32, max(new_state->hotspot_x, 0), width), + min_t(u32, max(new_state->hotspot_y, 0), height), width, height, vbox->cursor_data, data_size); mutex_unlock(&vbox->hw_mutex); diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 25c9c71256d3..f05e2c95a60d 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -672,11 +672,21 @@ vc4_hdmi_connector_duplicate_state(struct drm_connector *connector) return &new_state->base; } +static void vc4_hdmi_connector_destroy_state(struct drm_connector *connector, + struct drm_connector_state *state) +{ + struct vc4_hdmi_connector_state *vc4_state = + conn_state_to_vc4_hdmi_conn_state(state); + + __drm_atomic_helper_connector_destroy_state(state); + kfree(vc4_state); +} + static const struct drm_connector_funcs vc4_hdmi_connector_funcs = { .fill_modes = drm_helper_probe_single_connector_modes, .reset = vc4_hdmi_connector_reset, .atomic_duplicate_state = vc4_hdmi_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, + .atomic_destroy_state = vc4_hdmi_connector_destroy_state, .atomic_get_property = vc4_hdmi_connector_get_property, .atomic_set_property = vc4_hdmi_connector_set_property, }; diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index 4334c7608408..f8e9abe647b9 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -177,7 +177,7 @@ static const struct drm_driver driver = { * out via drm_device::driver_features: */ .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_RENDER | DRIVER_ATOMIC | - DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE, + DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE | DRIVER_CURSOR_HOTSPOT, .open = virtio_gpu_driver_open, .postclose = virtio_gpu_driver_postclose, diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 96365a772f77..bb7d86a0c6a1 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -58,6 +58,9 @@ #define MAX_CAPSET_ID 63 #define MAX_RINGS 64 +/* See virtio_gpu_ctx_create. One additional character for NULL terminator. */ +#define DEBUG_NAME_MAX_LEN 65 + struct virtio_gpu_object_params { unsigned long size; bool dumb; @@ -274,6 +277,8 @@ struct virtio_gpu_fpriv { uint64_t base_fence_ctx; uint64_t ring_idx_mask; struct mutex context_lock; + char debug_name[DEBUG_NAME_MAX_LEN]; + bool explicit_debug_name; }; /* virtgpu_ioctl.c */ diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index b24b11f25197..e4f76f315550 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -42,12 +42,19 @@ static void virtio_gpu_create_context_locked(struct virtio_gpu_device *vgdev, struct virtio_gpu_fpriv *vfpriv) { - char dbgname[TASK_COMM_LEN]; + if (vfpriv->explicit_debug_name) { + virtio_gpu_cmd_context_create(vgdev, vfpriv->ctx_id, + vfpriv->context_init, + strlen(vfpriv->debug_name), + vfpriv->debug_name); + } else { + char dbgname[TASK_COMM_LEN]; - get_task_comm(dbgname, current); - virtio_gpu_cmd_context_create(vgdev, vfpriv->ctx_id, - vfpriv->context_init, strlen(dbgname), - dbgname); + get_task_comm(dbgname, current); + virtio_gpu_cmd_context_create(vgdev, vfpriv->ctx_id, + vfpriv->context_init, strlen(dbgname), + dbgname); + } vfpriv->context_created = true; } @@ -107,6 +114,9 @@ static int virtio_gpu_getparam_ioctl(struct drm_device *dev, void *data, case VIRTGPU_PARAM_SUPPORTED_CAPSET_IDs: value = vgdev->capset_id_mask; break; + case VIRTGPU_PARAM_EXPLICIT_DEBUG_NAME: + value = vgdev->has_context_init ? 1 : 0; + break; default: return -EINVAL; } @@ -565,8 +575,8 @@ static int virtio_gpu_context_init_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { int ret = 0; - uint32_t num_params, i, param, value; - uint64_t valid_ring_mask; + uint32_t num_params, i; + uint64_t valid_ring_mask, param, value; size_t len; struct drm_virtgpu_context_set_param *ctx_set_params = NULL; struct virtio_gpu_device *vgdev = dev->dev_private; @@ -580,7 +590,7 @@ static int virtio_gpu_context_init_ioctl(struct drm_device *dev, return -EINVAL; /* Number of unique parameters supported at this time. */ - if (num_params > 3) + if (num_params > 4) return -EINVAL; ctx_set_params = memdup_user(u64_to_user_ptr(args->ctx_set_params), @@ -642,6 +652,21 @@ static int virtio_gpu_context_init_ioctl(struct drm_device *dev, vfpriv->ring_idx_mask = value; break; + case VIRTGPU_CONTEXT_PARAM_DEBUG_NAME: + if (vfpriv->explicit_debug_name) { + ret = -EINVAL; + goto out_unlock; + } + + ret = strncpy_from_user(vfpriv->debug_name, + u64_to_user_ptr(value), + DEBUG_NAME_MAX_LEN - 1); + if (ret < 0) + goto out_unlock; + + vfpriv->explicit_debug_name = true; + ret = 0; + break; default: ret = -EINVAL; goto out_unlock; diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c index a2e045f3a000..a72a2dbda031 100644 --- a/drivers/gpu/drm/virtio/virtgpu_plane.c +++ b/drivers/gpu/drm/virtio/virtgpu_plane.c @@ -79,6 +79,8 @@ static int virtio_gpu_plane_atomic_check(struct drm_plane *plane, { struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); + struct drm_plane_state *old_plane_state = drm_atomic_get_old_plane_state(state, + plane); bool is_cursor = plane->type == DRM_PLANE_TYPE_CURSOR; struct drm_crtc_state *crtc_state; int ret; @@ -86,6 +88,14 @@ static int virtio_gpu_plane_atomic_check(struct drm_plane *plane, if (!new_plane_state->fb || WARN_ON(!new_plane_state->crtc)) return 0; + /* + * Ignore damage clips if the framebuffer attached to the plane's state + * has changed since the last plane update (page-flip). In this case, a + * full plane update should happen because uploads are done per-buffer. + */ + if (old_plane_state->fb != new_plane_state->fb) + new_plane_state->ignore_damage_clips = true; + crtc_state = drm_atomic_get_crtc_state(state, new_plane_state->crtc); if (IS_ERR(crtc_state)) @@ -323,16 +333,16 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane, DRM_DEBUG("update, handle %d, pos +%d+%d, hot %d,%d\n", handle, plane->state->crtc_x, plane->state->crtc_y, - plane->state->fb ? plane->state->fb->hot_x : 0, - plane->state->fb ? plane->state->fb->hot_y : 0); + plane->state->hotspot_x, + plane->state->hotspot_y); output->cursor.hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_UPDATE_CURSOR); output->cursor.resource_id = cpu_to_le32(handle); if (plane->state->fb) { output->cursor.hot_x = - cpu_to_le32(plane->state->fb->hot_x); + cpu_to_le32(plane->state->hotspot_x); output->cursor.hot_y = - cpu_to_le32(plane->state->fb->hot_y); + cpu_to_le32(plane->state->hotspot_y); } else { output->cursor.hot_x = cpu_to_le32(0); output->cursor.hot_y = cpu_to_le32(0); diff --git a/drivers/gpu/drm/vkms/vkms_writeback.c b/drivers/gpu/drm/vkms/vkms_writeback.c index d7e63aa14663..bc724cbd5e3a 100644 --- a/drivers/gpu/drm/vkms/vkms_writeback.c +++ b/drivers/gpu/drm/vkms/vkms_writeback.c @@ -30,17 +30,25 @@ static const struct drm_connector_funcs vkms_wb_connector_funcs = { .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; -static int vkms_wb_encoder_atomic_check(struct drm_encoder *encoder, - struct drm_crtc_state *crtc_state, - struct drm_connector_state *conn_state) +static int vkms_wb_atomic_check(struct drm_connector *connector, + struct drm_atomic_state *state) { + struct drm_connector_state *conn_state = + drm_atomic_get_new_connector_state(state, connector); + struct drm_crtc_state *crtc_state; struct drm_framebuffer *fb; - const struct drm_display_mode *mode = &crtc_state->mode; + const struct drm_display_mode *mode; int ret; if (!conn_state->writeback_job || !conn_state->writeback_job->fb) return 0; + if (!conn_state->crtc) + return 0; + + crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc); + mode = &crtc_state->mode; + fb = conn_state->writeback_job->fb; if (fb->width != mode->hdisplay || fb->height != mode->vdisplay) { DRM_DEBUG_KMS("Invalid framebuffer size %ux%u\n", @@ -48,17 +56,13 @@ static int vkms_wb_encoder_atomic_check(struct drm_encoder *encoder, return -EINVAL; } - ret = drm_atomic_helper_check_wb_encoder_state(encoder, conn_state); + ret = drm_atomic_helper_check_wb_connector_state(connector, state); if (ret < 0) return ret; return 0; } -static const struct drm_encoder_helper_funcs vkms_wb_encoder_helper_funcs = { - .atomic_check = vkms_wb_encoder_atomic_check, -}; - static int vkms_wb_connector_get_modes(struct drm_connector *connector) { struct drm_device *dev = connector->dev; @@ -161,6 +165,7 @@ static const struct drm_connector_helper_funcs vkms_wb_conn_helper_funcs = { .prepare_writeback_job = vkms_wb_prepare_job, .cleanup_writeback_job = vkms_wb_cleanup_job, .atomic_commit = vkms_wb_atomic_commit, + .atomic_check = vkms_wb_atomic_check, }; int vkms_enable_writeback_connector(struct vkms_device *vkmsdev) @@ -171,7 +176,7 @@ int vkms_enable_writeback_connector(struct vkms_device *vkmsdev) return drm_writeback_connector_init(&vkmsdev->drm, wb, &vkms_wb_connector_funcs, - &vkms_wb_encoder_helper_funcs, + NULL, vkms_wb_formats, ARRAY_SIZE(vkms_wb_formats), 1); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 8b24ecf60e3e..d3e308fdfd5b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1611,7 +1611,7 @@ static const struct file_operations vmwgfx_driver_fops = { static const struct drm_driver driver = { .driver_features = - DRIVER_MODESET | DRIVER_RENDER | DRIVER_ATOMIC | DRIVER_GEM, + DRIVER_MODESET | DRIVER_RENDER | DRIVER_ATOMIC | DRIVER_GEM | DRIVER_CURSOR_HOTSPOT, .ioctls = vmw_ioctls, .num_ioctls = ARRAY_SIZE(vmw_ioctls), .master_set = vmw_master_set, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 818b7f109f53..5fd0ccaa0b41 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -768,13 +768,8 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, struct vmw_plane_state *old_vps = vmw_plane_state_to_vps(old_state); s32 hotspot_x, hotspot_y; - hotspot_x = du->hotspot_x; - hotspot_y = du->hotspot_y; - - if (new_state->fb) { - hotspot_x += new_state->fb->hot_x; - hotspot_y += new_state->fb->hot_y; - } + hotspot_x = du->hotspot_x + new_state->hotspot_x; + hotspot_y = du->hotspot_y + new_state->hotspot_y; du->cursor_surface = vps->surf; du->cursor_bo = vps->bo; @@ -837,10 +832,21 @@ int vmw_du_primary_plane_atomic_check(struct drm_plane *plane, { struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); + struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state, + plane); struct drm_crtc_state *crtc_state = NULL; struct drm_framebuffer *new_fb = new_state->fb; + struct drm_framebuffer *old_fb = old_state->fb; int ret; + /* + * Ignore damage clips if the framebuffer attached to the plane's state + * has changed since the last plane update (page-flip). In this case, a + * full plane update should happen because uploads are done per-buffer. + */ + if (old_fb != new_fb) + new_state->ignore_damage_clips = true; + if (new_state->crtc) crtc_state = drm_atomic_get_new_crtc_state(state, new_state->crtc); diff --git a/drivers/gpu/drm/xe/.gitignore b/drivers/gpu/drm/xe/.gitignore new file mode 100644 index 000000000000..8778bf132674 --- /dev/null +++ b/drivers/gpu/drm/xe/.gitignore @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +*.hdrtest +/generated +/xe_gen_wa_oob diff --git a/drivers/gpu/drm/xe/.kunitconfig b/drivers/gpu/drm/xe/.kunitconfig new file mode 100644 index 000000000000..9590eac91af3 --- /dev/null +++ b/drivers/gpu/drm/xe/.kunitconfig @@ -0,0 +1,13 @@ +# xe dependencies +CONFIG_KUNIT=y +CONFIG_PCI=y +CONFIG_PCI_IOV=y +CONFIG_DEBUG_FS=y +CONFIG_DRM=y +CONFIG_DRM_FBDEV_EMULATION=y +CONFIG_DRM_KMS_HELPER=y +CONFIG_DRM_XE=y +CONFIG_DRM_XE_DISPLAY=n +CONFIG_EXPERT=y +CONFIG_FB=y +CONFIG_DRM_XE_KUNIT_TEST=y diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig new file mode 100644 index 000000000000..1cced50d8d8c --- /dev/null +++ b/drivers/gpu/drm/xe/Kconfig @@ -0,0 +1,96 @@ +# SPDX-License-Identifier: GPL-2.0-only +config DRM_XE + tristate "Intel Xe Graphics" + depends on DRM && PCI && MMU && (m || (y && KUNIT=y)) && 64BIT + select INTERVAL_TREE + # we need shmfs for the swappable backing store, and in particular + # the shmem_readpage() which depends upon tmpfs + select SHMEM + select TMPFS + select DRM_BUDDY + select DRM_EXEC + select DRM_KMS_HELPER + select DRM_PANEL + select DRM_SUBALLOC_HELPER + select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HDCP_HELPER + select DRM_DISPLAY_HDMI_HELPER + select DRM_DISPLAY_HELPER + select DRM_MIPI_DSI + select RELAY + select IRQ_WORK + # xe depends on ACPI_VIDEO when ACPI is enabled + # but for select to work, need to select ACPI_VIDEO's dependencies, ick + select BACKLIGHT_CLASS_DEVICE if ACPI + select INPUT if ACPI + select ACPI_VIDEO if X86 && ACPI + select ACPI_BUTTON if ACPI + select ACPI_WMI if X86 && ACPI + select SYNC_FILE + select IOSF_MBI + select CRC32 + select SND_HDA_I915 if SND_HDA_CORE + select CEC_CORE if CEC_NOTIFIER + select VMAP_PFN + select DRM_TTM + select DRM_TTM_HELPER + select DRM_EXEC + select DRM_GPUVM + select DRM_SCHED + select MMU_NOTIFIER + select WANT_DEV_COREDUMP + select AUXILIARY_BUS + help + Experimental driver for Intel Xe series GPUs + + If "M" is selected, the module will be called xe. + +config DRM_XE_DISPLAY + bool "Enable display support" + depends on DRM_XE && EXPERT && DRM_XE=m + select FB_IOMEM_HELPERS + select I2C + select I2C_ALGOBIT + default y + help + Disable this option only if you want to compile out display support. + +config DRM_XE_FORCE_PROBE + string "Force probe xe for selected Intel hardware IDs" + depends on DRM_XE + help + This is the default value for the xe.force_probe module + parameter. Using the module parameter overrides this option. + + Force probe the xe for Intel graphics devices that are + recognized but not properly supported by this kernel version. It is + recommended to upgrade to a kernel version with proper support as soon + as it is available. + + It can also be used to block the probe of recognized and fully + supported devices. + + Use "" to disable force probe. If in doubt, use this. + + Use "<pci-id>[,<pci-id>,...]" to force probe the xe for listed + devices. For example, "4500" or "4500,4571". + + Use "*" to force probe the driver for all known devices. + + Use "!" right before the ID to block the probe of the device. For + example, "4500,!4571" forces the probe of 4500 and blocks the probe of + 4571. + + Use "!*" to block the probe of the driver for all known devices. + +menu "drm/Xe Debugging" +depends on DRM_XE +depends on EXPERT +source "drivers/gpu/drm/xe/Kconfig.debug" +endmenu + +menu "drm/xe Profile Guided Optimisation" + visible if EXPERT + depends on DRM_XE + source "drivers/gpu/drm/xe/Kconfig.profile" +endmenu diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug new file mode 100644 index 000000000000..549065f57a78 --- /dev/null +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -0,0 +1,107 @@ +# SPDX-License-Identifier: GPL-2.0-only +config DRM_XE_WERROR + bool "Force GCC to throw an error instead of a warning when compiling" + # As this may inadvertently break the build, only allow the user + # to shoot oneself in the foot iff they aim really hard + depends on EXPERT + # We use the dependency on !COMPILE_TEST to not be enabled in + # allmodconfig or allyesconfig configurations + depends on !COMPILE_TEST + default n + help + Add -Werror to the build flags for (and only for) xe.ko. + Do not enable this unless you are writing code for the xe.ko module. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_DEBUG + bool "Enable additional driver debugging" + depends on DRM_XE + depends on EXPERT + depends on !COMPILE_TEST + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will catch some internal issues. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_DEBUG_VM + bool "Enable extra VM debugging info" + default n + help + Enable extra VM debugging info + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_DEBUG_SRIOV + bool "Enable extra SR-IOV debugging" + default n + help + Enable extra SR-IOV debugging info. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_DEBUG_MEM + bool "Enable passing SYS/VRAM addresses to user space" + default n + help + Pass object location trough uapi. Intended for extended + testing and development only. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_SIMPLE_ERROR_CAPTURE + bool "Enable simple error capture to dmesg on job timeout" + default n + help + Choose this option when debugging an unexpected job timeout + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_KUNIT_TEST + tristate "KUnit tests for the drm xe driver" if !KUNIT_ALL_TESTS + depends on DRM_XE && KUNIT && DEBUG_FS + default KUNIT_ALL_TESTS + select DRM_EXPORT_FOR_TESTS if m + select DRM_KUNIT_TEST_HELPERS + help + Choose this option to allow the driver to perform selftests under + the kunit framework + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_LARGE_GUC_BUFFER + bool "Enable larger guc log buffer" + default n + help + Choose this option when debugging guc issues. + Buffer should be large enough for complex issues. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_USERPTR_INVAL_INJECT + bool "Inject userptr invalidation -EINVAL errors" + default n + help + Choose this option when debugging error paths that + are hit during checks for userptr invalidations. + + Recomended for driver developers only. + If in doubt, say "N". diff --git a/drivers/gpu/drm/xe/Kconfig.profile b/drivers/gpu/drm/xe/Kconfig.profile new file mode 100644 index 000000000000..ba17a25e8db3 --- /dev/null +++ b/drivers/gpu/drm/xe/Kconfig.profile @@ -0,0 +1,54 @@ +config DRM_XE_JOB_TIMEOUT_MAX + int "Default max job timeout (ms)" + default 10000 # milliseconds + help + Configures the default max job timeout after which job will + be forcefully taken away from scheduler. +config DRM_XE_JOB_TIMEOUT_MIN + int "Default min job timeout (ms)" + default 1 # milliseconds + help + Configures the default min job timeout after which job will + be forcefully taken away from scheduler. +config DRM_XE_TIMESLICE_MAX + int "Default max timeslice duration (us)" + default 10000000 # microseconds + help + Configures the default max timeslice duration between multiple + contexts by guc scheduling. +config DRM_XE_TIMESLICE_MIN + int "Default min timeslice duration (us)" + default 1 # microseconds + help + Configures the default min timeslice duration between multiple + contexts by guc scheduling. +config DRM_XE_PREEMPT_TIMEOUT + int "Preempt timeout (us, jiffy granularity)" + default 640000 # microseconds + help + How long to wait (in microseconds) for a preemption event to occur + when submitting a new context. If the current context does not hit + an arbitration point and yield to HW before the timer expires, the + HW will be reset to allow the more important context to execute. +config DRM_XE_PREEMPT_TIMEOUT_MAX + int "Default max preempt timeout (us)" + default 10000000 # microseconds + help + Configures the default max preempt timeout after which context + will be forcefully taken away and higher priority context will + run. +config DRM_XE_PREEMPT_TIMEOUT_MIN + int "Default min preempt timeout (us)" + default 1 # microseconds + help + Configures the default min preempt timeout after which context + will be forcefully taken away and higher priority context will + run. +config DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT + bool "Default configuration of limitation on scheduler timeout" + default y + help + Configures the enablement of limitation on scheduler timeout + to apply to applicable user. For elevated user, all above MIN + and MAX values will apply when this configuration is enable to + apply limitation. By default limitation is applied. diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile new file mode 100644 index 000000000000..53bd2a8ba1ae --- /dev/null +++ b/drivers/gpu/drm/xe/Makefile @@ -0,0 +1,305 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the drm device driver. This driver provides support for the +# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. + +# Unconditionally enable W=1 warnings locally +# --- begin copy-paste W=1 warnings from scripts/Makefile.extrawarn +subdir-ccflags-y += -Wextra -Wunused -Wno-unused-parameter +subdir-ccflags-y += -Wmissing-declarations +subdir-ccflags-y += $(call cc-option, -Wrestrict) +subdir-ccflags-y += -Wmissing-format-attribute +subdir-ccflags-y += -Wmissing-prototypes +subdir-ccflags-y += -Wold-style-definition +subdir-ccflags-y += -Wmissing-include-dirs +subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable) +subdir-ccflags-y += $(call cc-option, -Wunused-const-variable) +subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned) +subdir-ccflags-y += $(call cc-option, -Wformat-overflow) +subdir-ccflags-y += $(call cc-option, -Wformat-truncation) +subdir-ccflags-y += $(call cc-option, -Wstringop-overflow) +subdir-ccflags-y += $(call cc-option, -Wstringop-truncation) +# The following turn off the warnings enabled by -Wextra +ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),) +subdir-ccflags-y += -Wno-missing-field-initializers +subdir-ccflags-y += -Wno-type-limits +subdir-ccflags-y += -Wno-shift-negative-value +endif +ifeq ($(findstring 3, $(KBUILD_EXTRA_WARN)),) +subdir-ccflags-y += -Wno-sign-compare +endif +# --- end copy-paste + +# Enable -Werror in CI and development +subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror + +subdir-ccflags-y += -I$(obj) -I$(srctree)/$(src) + +# generated sources +hostprogs := xe_gen_wa_oob + +generated_oob := $(obj)/generated/xe_wa_oob.c $(obj)/generated/xe_wa_oob.h + +quiet_cmd_wa_oob = GEN $(notdir $(generated_oob)) + cmd_wa_oob = mkdir -p $(@D); $^ $(generated_oob) + +$(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules + $(call cmd,wa_oob) + +uses_generated_oob := \ + $(obj)/xe_gsc.o \ + $(obj)/xe_guc.o \ + $(obj)/xe_migrate.o \ + $(obj)/xe_ring_ops.o \ + $(obj)/xe_vm.o \ + $(obj)/xe_wa.o \ + $(obj)/xe_ttm_stolen_mgr.o + +$(uses_generated_oob): $(generated_oob) + +# Please keep these build lists sorted! + +# core driver code + +xe-y += xe_bb.o \ + xe_bo.o \ + xe_bo_evict.o \ + xe_debugfs.o \ + xe_devcoredump.o \ + xe_device.o \ + xe_device_sysfs.o \ + xe_dma_buf.o \ + xe_drm_client.o \ + xe_exec.o \ + xe_execlist.o \ + xe_exec_queue.o \ + xe_force_wake.o \ + xe_ggtt.o \ + xe_gpu_scheduler.o \ + xe_gsc.o \ + xe_gsc_submit.o \ + xe_gt.o \ + xe_gt_ccs_mode.o \ + xe_gt_clock.o \ + xe_gt_debugfs.o \ + xe_gt_freq.o \ + xe_gt_idle.o \ + xe_gt_mcr.o \ + xe_gt_pagefault.o \ + xe_gt_sysfs.o \ + xe_gt_throttle_sysfs.o \ + xe_gt_tlb_invalidation.o \ + xe_gt_topology.o \ + xe_guc.o \ + xe_guc_ads.o \ + xe_guc_ct.o \ + xe_guc_debugfs.o \ + xe_guc_hwconfig.o \ + xe_guc_log.o \ + xe_guc_pc.o \ + xe_guc_submit.o \ + xe_heci_gsc.o \ + xe_hw_engine.o \ + xe_hw_engine_class_sysfs.o \ + xe_hw_fence.o \ + xe_huc.o \ + xe_huc_debugfs.o \ + xe_irq.o \ + xe_lrc.o \ + xe_migrate.o \ + xe_mmio.o \ + xe_mocs.o \ + xe_module.o \ + xe_pat.o \ + xe_pci.o \ + xe_pcode.o \ + xe_pm.o \ + xe_preempt_fence.o \ + xe_pt.o \ + xe_pt_walk.o \ + xe_query.o \ + xe_range_fence.o \ + xe_reg_sr.o \ + xe_reg_whitelist.o \ + xe_rtp.o \ + xe_ring_ops.o \ + xe_sa.o \ + xe_sched_job.o \ + xe_step.o \ + xe_sync.o \ + xe_tile.o \ + xe_tile_sysfs.o \ + xe_trace.o \ + xe_ttm_sys_mgr.o \ + xe_ttm_stolen_mgr.o \ + xe_ttm_vram_mgr.o \ + xe_tuning.o \ + xe_uc.o \ + xe_uc_debugfs.o \ + xe_uc_fw.o \ + xe_vm.o \ + xe_wait_user_fence.o \ + xe_wa.o \ + xe_wopcm.o + +# graphics hardware monitoring (HWMON) support +xe-$(CONFIG_HWMON) += xe_hwmon.o + +# graphics virtualization (SR-IOV) support +xe-y += xe_sriov.o + +xe-$(CONFIG_PCI_IOV) += \ + xe_lmtt.o \ + xe_lmtt_2l.o \ + xe_lmtt_ml.o + +# i915 Display compat #defines and #includes +subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \ + -I$(srctree)/$(src)/display/ext \ + -I$(srctree)/$(src)/compat-i915-headers \ + -I$(srctree)/drivers/gpu/drm/xe/display/ \ + -I$(srctree)/drivers/gpu/drm/i915/display/ \ + -Ddrm_i915_gem_object=xe_bo \ + -Ddrm_i915_private=xe_device + +CFLAGS_i915-display/intel_fbdev.o = $(call cc-disable-warning, override-init) +CFLAGS_i915-display/intel_display_device.o = $(call cc-disable-warning, override-init) + +# Rule to build SOC code shared with i915 +$(obj)/i915-soc/%.o: $(srctree)/drivers/gpu/drm/i915/soc/%.c FORCE + $(call cmd,force_checksrc) + $(call if_changed_rule,cc_o_c) + +# Rule to build display code shared with i915 +$(obj)/i915-display/%.o: $(srctree)/drivers/gpu/drm/i915/display/%.c FORCE + $(call cmd,force_checksrc) + $(call if_changed_rule,cc_o_c) + +# Display code specific to xe +xe-$(CONFIG_DRM_XE_DISPLAY) += \ + xe_display.o \ + display/xe_fb_pin.o \ + display/xe_hdcp_gsc.o \ + display/xe_plane_initial.o \ + display/xe_display_rps.o \ + display/xe_display_misc.o \ + display/xe_dsb_buffer.o \ + display/intel_fbdev_fb.o \ + display/intel_fb_bo.o \ + display/ext/i915_irq.o \ + display/ext/i915_utils.o + +# SOC code shared with i915 +xe-$(CONFIG_DRM_XE_DISPLAY) += \ + i915-soc/intel_dram.o \ + i915-soc/intel_pch.o + +# Display code shared with i915 +xe-$(CONFIG_DRM_XE_DISPLAY) += \ + i915-display/icl_dsi.o \ + i915-display/intel_atomic.o \ + i915-display/intel_atomic_plane.o \ + i915-display/intel_audio.o \ + i915-display/intel_backlight.o \ + i915-display/intel_bios.o \ + i915-display/intel_bw.o \ + i915-display/intel_cdclk.o \ + i915-display/intel_color.o \ + i915-display/intel_combo_phy.o \ + i915-display/intel_connector.o \ + i915-display/intel_crtc.o \ + i915-display/intel_crtc_state_dump.o \ + i915-display/intel_cursor.o \ + i915-display/intel_cx0_phy.o \ + i915-display/intel_ddi.o \ + i915-display/intel_ddi_buf_trans.o \ + i915-display/intel_display.o \ + i915-display/intel_display_debugfs.o \ + i915-display/intel_display_debugfs_params.o \ + i915-display/intel_display_device.o \ + i915-display/intel_display_driver.o \ + i915-display/intel_display_irq.o \ + i915-display/intel_display_params.o \ + i915-display/intel_display_power.o \ + i915-display/intel_display_power_map.o \ + i915-display/intel_display_power_well.o \ + i915-display/intel_display_trace.o \ + i915-display/intel_display_wa.o \ + i915-display/intel_dkl_phy.o \ + i915-display/intel_dmc.o \ + i915-display/intel_dp.o \ + i915-display/intel_dp_aux.o \ + i915-display/intel_dp_aux_backlight.o \ + i915-display/intel_dp_hdcp.o \ + i915-display/intel_dp_link_training.o \ + i915-display/intel_dp_mst.o \ + i915-display/intel_dpll.o \ + i915-display/intel_dpll_mgr.o \ + i915-display/intel_dpt_common.o \ + i915-display/intel_drrs.o \ + i915-display/intel_dsb.o \ + i915-display/intel_dsi.o \ + i915-display/intel_dsi_dcs_backlight.o \ + i915-display/intel_dsi_vbt.o \ + i915-display/intel_fb.o \ + i915-display/intel_fbc.o \ + i915-display/intel_fdi.o \ + i915-display/intel_fifo_underrun.o \ + i915-display/intel_frontbuffer.o \ + i915-display/intel_global_state.o \ + i915-display/intel_gmbus.o \ + i915-display/intel_hdcp.o \ + i915-display/intel_hdmi.o \ + i915-display/intel_hotplug.o \ + i915-display/intel_hotplug_irq.o \ + i915-display/intel_hti.o \ + i915-display/intel_link_bw.o \ + i915-display/intel_lspcon.o \ + i915-display/intel_modeset_lock.o \ + i915-display/intel_modeset_setup.o \ + i915-display/intel_modeset_verify.o \ + i915-display/intel_panel.o \ + i915-display/intel_pipe_crc.o \ + i915-display/intel_pmdemand.o \ + i915-display/intel_pps.o \ + i915-display/intel_psr.o \ + i915-display/intel_qp_tables.o \ + i915-display/intel_quirks.o \ + i915-display/intel_snps_phy.o \ + i915-display/intel_tc.o \ + i915-display/intel_vblank.o \ + i915-display/intel_vdsc.o \ + i915-display/intel_vga.o \ + i915-display/intel_vrr.o \ + i915-display/intel_wm.o \ + i915-display/skl_scaler.o \ + i915-display/skl_universal_plane.o \ + i915-display/skl_watermark.o + +ifeq ($(CONFIG_ACPI),y) + xe-$(CONFIG_DRM_XE_DISPLAY) += \ + i915-display/intel_acpi.o \ + i915-display/intel_opregion.o +endif + +ifeq ($(CONFIG_DRM_FBDEV_EMULATION),y) + xe-$(CONFIG_DRM_XE_DISPLAY) += i915-display/intel_fbdev.o +endif + +obj-$(CONFIG_DRM_XE) += xe.o +obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/ + +# header test +hdrtest_find_args := -not -path xe_rtp_helpers.h +ifneq ($(CONFIG_DRM_XE_DISPLAY),y) + hdrtest_find_args += -not -path display/\* -not -path compat-i915-headers/\* -not -path xe_display.h +endif + +always-$(CONFIG_DRM_XE_WERROR) += \ + $(patsubst %.h,%.hdrtest, $(shell cd $(srctree)/$(src) && find * -name '*.h' $(hdrtest_find_args))) + +quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) + cmd_hdrtest = $(CC) -DHDRTEST $(filter-out $(CFLAGS_GCOV), $(c_flags)) -S -o /dev/null -x c /dev/null -include $<; touch $@ + +$(obj)/%.hdrtest: $(src)/%.h FORCE + $(call if_changed_dep,hdrtest) diff --git a/drivers/gpu/drm/xe/abi/gsc_command_header_abi.h b/drivers/gpu/drm/xe/abi/gsc_command_header_abi.h new file mode 100644 index 000000000000..a4c2646803b5 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/gsc_command_header_abi.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _ABI_GSC_COMMAND_HEADER_ABI_H +#define _ABI_GSC_COMMAND_HEADER_ABI_H + +#include <linux/types.h> + +struct intel_gsc_mtl_header { + u32 validity_marker; +#define GSC_HECI_VALIDITY_MARKER 0xA578875A + + u8 heci_client_id; + + u8 reserved1; + + u16 header_version; +#define MTL_GSC_HEADER_VERSION 1 + + /* FW allows host to decide host_session handle as it sees fit. */ + u64 host_session_handle; + + /* handle generated by FW for messages that need to be re-submitted */ + u64 gsc_message_handle; + + u32 message_size; /* lower 20 bits only, upper 12 are reserved */ + + /* + * Flags mask: + * Bit 0: Pending + * Bit 1: Session Cleanup; + * Bits 2-15: Flags + * Bits 16-31: Extension Size + * According to internal spec flags are either input or output + * we distinguish the flags using OUTFLAG or INFLAG + */ + u32 flags; +#define GSC_OUTFLAG_MSG_PENDING BIT(0) +#define GSC_INFLAG_MSG_CLEANUP BIT(1) + + u32 status; +} __packed; + +#endif diff --git a/drivers/gpu/drm/xe/abi/gsc_mkhi_commands_abi.h b/drivers/gpu/drm/xe/abi/gsc_mkhi_commands_abi.h new file mode 100644 index 000000000000..ad4d041873ab --- /dev/null +++ b/drivers/gpu/drm/xe/abi/gsc_mkhi_commands_abi.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _ABI_GSC_MKHI_COMMANDS_ABI_H +#define _ABI_GSC_MKHI_COMMANDS_ABI_H + +#include <linux/types.h> + +/* Heci client ID for MKHI commands */ +#define HECI_MEADDRESS_MKHI 7 + +/* Generic MKHI header */ +struct gsc_mkhi_header { + u8 group_id; + u8 command; + u8 reserved; + u8 result; +} __packed; + +/* GFX_SRV commands */ +#define MKHI_GROUP_ID_GFX_SRV 0x30 + +#define MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION (0x42) + +struct gsc_get_compatibility_version_in { + struct gsc_mkhi_header header; +} __packed; + +struct gsc_get_compatibility_version_out { + struct gsc_mkhi_header header; + u16 proj_major; + u16 compat_major; + u16 compat_minor; + u16 reserved[5]; +} __packed; + +#endif diff --git a/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h b/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h new file mode 100644 index 000000000000..57520809e48d --- /dev/null +++ b/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _ABI_GSC_PXP_COMMANDS_ABI_H +#define _ABI_GSC_PXP_COMMANDS_ABI_H + +#include <linux/types.h> + +/* Heci client ID for PXP commands */ +#define HECI_MEADDRESS_PXP 17 + +#define PXP_APIVER(x, y) (((x) & 0xFFFF) << 16 | ((y) & 0xFFFF)) + +/* + * there are a lot of status codes for PXP, but we only define the cross-API + * common ones that we actually can handle in the kernel driver. Other failure + * codes should be printed to error msg for debug. + */ +enum pxp_status { + PXP_STATUS_SUCCESS = 0x0, + PXP_STATUS_ERROR_API_VERSION = 0x1002, + PXP_STATUS_NOT_READY = 0x100e, + PXP_STATUS_PLATFCONFIG_KF1_NOVERIF = 0x101a, + PXP_STATUS_PLATFCONFIG_KF1_BAD = 0x101f, + PXP_STATUS_OP_NOT_PERMITTED = 0x4013 +}; + +/* Common PXP FW message header */ +struct pxp_cmd_header { + u32 api_version; + u32 command_id; + union { + u32 status; /* out */ + u32 stream_id; /* in */ +#define PXP_CMDHDR_EXTDATA_SESSION_VALID GENMASK(0, 0) +#define PXP_CMDHDR_EXTDATA_APP_TYPE GENMASK(1, 1) +#define PXP_CMDHDR_EXTDATA_SESSION_ID GENMASK(17, 2) + }; + /* Length of the message (excluding the header) */ + u32 buffer_len; +} __packed; + +#define PXP43_CMDID_NEW_HUC_AUTH 0x0000003F /* MTL+ */ + +/* PXP-Input-Packet: HUC Auth-only */ +struct pxp43_new_huc_auth_in { + struct pxp_cmd_header header; + u64 huc_base_address; + u32 huc_size; +} __packed; + +/* PXP-Output-Packet: HUC Load and Authentication or Auth-only */ +struct pxp43_huc_auth_out { + struct pxp_cmd_header header; +} __packed; + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h new file mode 100644 index 000000000000..3062e0e0d467 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -0,0 +1,219 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2021 Intel Corporation + */ + +#ifndef _ABI_GUC_ACTIONS_ABI_H +#define _ABI_GUC_ACTIONS_ABI_H + +/** + * DOC: HOST2GUC_SELF_CFG + * + * This message is used by Host KMD to setup of the `GuC Self Config KLVs`_. + * + * This message must be sent as `MMIO HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_SELF_CFG` = 0x0508 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:16 | **KLV_KEY** - KLV key, see `GuC Self Config KLVs`_ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **KLV_LEN** - KLV length | + * | | | | + * | | | - 32 bit KLV = 1 | + * | | | - 64 bit KLV = 2 | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **VALUE32** - Bits 31-0 of the KLV value | + * +---+-------+--------------------------------------------------------------+ + * | 3 | 31:0 | **VALUE64** - Bits 63-32 of the KLV value (**KLV_LEN** = 2) | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = **NUM** - 1 if KLV was parsed, 0 if not recognized | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_HOST2GUC_SELF_CFG 0x0508 + +#define HOST2GUC_SELF_CFG_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u) +#define HOST2GUC_SELF_CFG_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY (0xffff << 16) +#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN (0xffff << 0) +#define HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32 GUC_HXG_REQUEST_MSG_n_DATAn +#define HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64 GUC_HXG_REQUEST_MSG_n_DATAn + +#define HOST2GUC_SELF_CFG_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define HOST2GUC_SELF_CFG_RESPONSE_MSG_0_NUM GUC_HXG_RESPONSE_MSG_0_DATA0 + +/** + * DOC: HOST2GUC_CONTROL_CTB + * + * This H2G action allows Vf Host to enable or disable H2G and G2H `CT Buffer`_. + * + * This message must be sent as `MMIO HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_CONTROL_CTB` = 0x4509 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **CONTROL** - control `CTB based communication`_ | + * | | | | + * | | | - _`GUC_CTB_CONTROL_DISABLE` = 0 | + * | | | - _`GUC_CTB_CONTROL_ENABLE` = 1 | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_HOST2GUC_CONTROL_CTB 0x4509 + +#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u) +#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL GUC_HXG_REQUEST_MSG_n_DATAn +#define GUC_CTB_CONTROL_DISABLE 0u +#define GUC_CTB_CONTROL_ENABLE 1u + +#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 + +/* legacy definitions */ + +enum xe_guc_action { + XE_GUC_ACTION_DEFAULT = 0x0, + XE_GUC_ACTION_REQUEST_PREEMPTION = 0x2, + XE_GUC_ACTION_REQUEST_ENGINE_RESET = 0x3, + XE_GUC_ACTION_ALLOCATE_DOORBELL = 0x10, + XE_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20, + XE_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30, + XE_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x40, + XE_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302, + XE_GUC_ACTION_ENTER_S_STATE = 0x501, + XE_GUC_ACTION_EXIT_S_STATE = 0x502, + XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506, + XE_GUC_ACTION_SCHED_CONTEXT = 0x1000, + XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001, + XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, + XE_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003, + XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004, + XE_GUC_ACTION_SET_CONTEXT_PRIORITY = 0x1005, + XE_GUC_ACTION_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006, + XE_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007, + XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, + XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, + XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B, + XE_GUC_ACTION_SETUP_PC_GUCRC = 0x3004, + XE_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, + XE_GUC_ACTION_GET_HWCONFIG = 0x4100, + XE_GUC_ACTION_REGISTER_CONTEXT = 0x4502, + XE_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503, + XE_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, + XE_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, + XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, + XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601, + XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507, + XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, + XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000, + XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002, + XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003, + XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY = 0x6004, + XE_GUC_ACTION_TLB_INVALIDATION = 0x7000, + XE_GUC_ACTION_TLB_INVALIDATION_DONE = 0x7001, + XE_GUC_ACTION_TLB_INVALIDATION_ALL = 0x7002, + XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002, + XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003, + XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004, + XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005, + XE_GUC_ACTION_LIMIT +}; + +enum xe_guc_rc_options { + XE_GUCRC_HOST_CONTROL, + XE_GUCRC_FIRMWARE_CONTROL, +}; + +enum xe_guc_preempt_options { + XE_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4, + XE_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8, +}; + +enum xe_guc_report_status { + XE_GUC_REPORT_STATUS_UNKNOWN = 0x0, + XE_GUC_REPORT_STATUS_ACKED = 0x1, + XE_GUC_REPORT_STATUS_ERROR = 0x2, + XE_GUC_REPORT_STATUS_COMPLETE = 0x4, +}; + +enum xe_guc_sleep_state_status { + XE_GUC_SLEEP_STATE_SUCCESS = 0x1, + XE_GUC_SLEEP_STATE_PREEMPT_TO_IDLE_FAILED = 0x2, + XE_GUC_SLEEP_STATE_ENGINE_RESET_FAILED = 0x3 +#define XE_GUC_SLEEP_STATE_INVALID_MASK 0x80000000 +}; + +#define GUC_LOG_CONTROL_LOGGING_ENABLED (1 << 0) +#define GUC_LOG_CONTROL_VERBOSITY_SHIFT 4 +#define GUC_LOG_CONTROL_VERBOSITY_MASK (0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT) +#define GUC_LOG_CONTROL_DEFAULT_LOGGING (1 << 8) + +#define XE_GUC_TLB_INVAL_TYPE_SHIFT 0 +#define XE_GUC_TLB_INVAL_MODE_SHIFT 8 +/* Flush PPC or SMRO caches along with TLB invalidation request */ +#define XE_GUC_TLB_INVAL_FLUSH_CACHE (1 << 31) + +enum xe_guc_tlb_invalidation_type { + XE_GUC_TLB_INVAL_FULL = 0x0, + XE_GUC_TLB_INVAL_PAGE_SELECTIVE = 0x1, + XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX = 0x2, + XE_GUC_TLB_INVAL_GUC = 0x3, +}; + +/* + * 0: Heavy mode of Invalidation: + * The pipeline of the engine(s) for which the invalidation is targeted to is + * blocked, and all the in-flight transactions are guaranteed to be Globally + * Observed before completing the TLB invalidation + * 1: Lite mode of Invalidation: + * TLBs of the targeted engine(s) are immediately invalidated. + * In-flight transactions are NOT guaranteed to be Globally Observed before + * completing TLB invalidation. + * Light Invalidation Mode is to be used only when + * it can be guaranteed (by SW) that the address translations remain invariant + * for the in-flight transactions across the TLB invalidation. In other words, + * this mode can be used when the TLB invalidation is intended to clear out the + * stale cached translations that are no longer in use. Light Invalidation Mode + * is much faster than the Heavy Invalidation Mode, as it does not wait for the + * in-flight transactions to be GOd. + */ +enum xe_guc_tlb_inval_mode { + XE_GUC_TLB_INVAL_MODE_HEAVY = 0x0, + XE_GUC_TLB_INVAL_MODE_LITE = 0x1, +}; + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h new file mode 100644 index 000000000000..811add10c30d --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h @@ -0,0 +1,249 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _GUC_ACTIONS_SLPC_ABI_H_ +#define _GUC_ACTIONS_SLPC_ABI_H_ + +#include <linux/types.h> + +/** + * DOC: SLPC SHARED DATA STRUCTURE + * + * +----+------+--------------------------------------------------------------+ + * | CL | Bytes| Description | + * +====+======+==============================================================+ + * | 1 | 0-3 | SHARED DATA SIZE | + * | +------+--------------------------------------------------------------+ + * | | 4-7 | GLOBAL STATE | + * | +------+--------------------------------------------------------------+ + * | | 8-11 | DISPLAY DATA ADDRESS | + * | +------+--------------------------------------------------------------+ + * | | 12:63| PADDING | + * +----+------+--------------------------------------------------------------+ + * | | 0:63 | PADDING(PLATFORM INFO) | + * +----+------+--------------------------------------------------------------+ + * | 3 | 0-3 | TASK STATE DATA | + * + +------+--------------------------------------------------------------+ + * | | 4:63 | PADDING | + * +----+------+--------------------------------------------------------------+ + * |4-21|0:1087| OVERRIDE PARAMS AND BIT FIELDS | + * +----+------+--------------------------------------------------------------+ + * | | | PADDING + EXTRA RESERVED PAGE | + * +----+------+--------------------------------------------------------------+ + */ + +/* + * SLPC exposes certain parameters for global configuration by the host. + * These are referred to as override parameters, because in most cases + * the host will not need to modify the default values used by SLPC. + * SLPC remembers the default values which allows the host to easily restore + * them by simply unsetting the override. The host can set or unset override + * parameters during SLPC (re-)initialization using the SLPC Reset event. + * The host can also set or unset override parameters on the fly using the + * Parameter Set and Parameter Unset events + */ + +#define SLPC_MAX_OVERRIDE_PARAMETERS 256 +#define SLPC_OVERRIDE_BITFIELD_SIZE \ + (SLPC_MAX_OVERRIDE_PARAMETERS / 32) + +#define SLPC_PAGE_SIZE_BYTES 4096 +#define SLPC_CACHELINE_SIZE_BYTES 64 +#define SLPC_SHARED_DATA_SIZE_BYTE_HEADER SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE SLPC_PAGE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_MAX (2 * SLPC_PAGE_SIZE_BYTES) + +/* + * Cacheline size aligned (Total size needed for + * SLPM_KMD_MAX_OVERRIDE_PARAMETERS=256 is 1088 bytes) + */ +#define SLPC_OVERRIDE_PARAMS_TOTAL_BYTES (((((SLPC_MAX_OVERRIDE_PARAMETERS * 4) \ + + ((SLPC_MAX_OVERRIDE_PARAMETERS / 32) * 4)) \ + + (SLPC_CACHELINE_SIZE_BYTES - 1)) / SLPC_CACHELINE_SIZE_BYTES) * \ + SLPC_CACHELINE_SIZE_BYTES) + +#define SLPC_SHARED_DATA_SIZE_BYTE_OTHER (SLPC_SHARED_DATA_SIZE_BYTE_MAX - \ + (SLPC_SHARED_DATA_SIZE_BYTE_HEADER \ + + SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO \ + + SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE \ + + SLPC_OVERRIDE_PARAMS_TOTAL_BYTES \ + + SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE)) + +enum slpc_task_enable { + SLPC_PARAM_TASK_DEFAULT = 0, + SLPC_PARAM_TASK_ENABLED, + SLPC_PARAM_TASK_DISABLED, + SLPC_PARAM_TASK_UNKNOWN +}; + +enum slpc_global_state { + SLPC_GLOBAL_STATE_NOT_RUNNING = 0, + SLPC_GLOBAL_STATE_INITIALIZING = 1, + SLPC_GLOBAL_STATE_RESETTING = 2, + SLPC_GLOBAL_STATE_RUNNING = 3, + SLPC_GLOBAL_STATE_SHUTTING_DOWN = 4, + SLPC_GLOBAL_STATE_ERROR = 5 +}; + +enum slpc_param_id { + SLPC_PARAM_TASK_ENABLE_GTPERF = 0, + SLPC_PARAM_TASK_DISABLE_GTPERF = 1, + SLPC_PARAM_TASK_ENABLE_BALANCER = 2, + SLPC_PARAM_TASK_DISABLE_BALANCER = 3, + SLPC_PARAM_TASK_ENABLE_DCC = 4, + SLPC_PARAM_TASK_DISABLE_DCC = 5, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ = 6, + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ = 7, + SLPC_PARAM_GLOBAL_MIN_GT_SLICE_FREQ_MHZ = 8, + SLPC_PARAM_GLOBAL_MAX_GT_SLICE_FREQ_MHZ = 9, + SLPC_PARAM_GTPERF_THRESHOLD_MAX_FPS = 10, + SLPC_PARAM_GLOBAL_DISABLE_GT_FREQ_MANAGEMENT = 11, + SLPC_PARAM_GTPERF_ENABLE_FRAMERATE_STALLING = 12, + SLPC_PARAM_GLOBAL_DISABLE_RC6_MODE_CHANGE = 13, + SLPC_PARAM_GLOBAL_OC_UNSLICE_FREQ_MHZ = 14, + SLPC_PARAM_GLOBAL_OC_SLICE_FREQ_MHZ = 15, + SLPC_PARAM_GLOBAL_ENABLE_IA_GT_BALANCING = 16, + SLPC_PARAM_GLOBAL_ENABLE_ADAPTIVE_BURST_TURBO = 17, + SLPC_PARAM_GLOBAL_ENABLE_EVAL_MODE = 18, + SLPC_PARAM_GLOBAL_ENABLE_BALANCER_IN_NON_GAMING_MODE = 19, + SLPC_PARAM_GLOBAL_RT_MODE_TURBO_FREQ_DELTA_MHZ = 20, + SLPC_PARAM_PWRGATE_RC_MODE = 21, + SLPC_PARAM_EDR_MODE_COMPUTE_TIMEOUT_MS = 22, + SLPC_PARAM_EDR_QOS_FREQ_MHZ = 23, + SLPC_PARAM_MEDIA_FF_RATIO_MODE = 24, + SLPC_PARAM_ENABLE_IA_FREQ_LIMITING = 25, + SLPC_PARAM_STRATEGIES = 26, + SLPC_PARAM_POWER_PROFILE = 27, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY = 28, + SLPC_MAX_PARAM = 32, +}; + +enum slpc_media_ratio_mode { + SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL = 0, + SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_ONE = 1, + SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO = 2, +}; + +enum slpc_gucrc_mode { + SLPC_GUCRC_MODE_HW = 0, + SLPC_GUCRC_MODE_GUCRC_NO_RC6 = 1, + SLPC_GUCRC_MODE_GUCRC_STATIC_TIMEOUT = 2, + SLPC_GUCRC_MODE_GUCRC_DYNAMIC_HYSTERESIS = 3, + + SLPC_GUCRC_MODE_MAX, +}; + +enum slpc_event_id { + SLPC_EVENT_RESET = 0, + SLPC_EVENT_SHUTDOWN = 1, + SLPC_EVENT_PLATFORM_INFO_CHANGE = 2, + SLPC_EVENT_DISPLAY_MODE_CHANGE = 3, + SLPC_EVENT_FLIP_COMPLETE = 4, + SLPC_EVENT_QUERY_TASK_STATE = 5, + SLPC_EVENT_PARAMETER_SET = 6, + SLPC_EVENT_PARAMETER_UNSET = 7, +}; + +struct slpc_task_state_data { + union { + u32 task_status_padding; + struct { + u32 status; +#define SLPC_GTPERF_TASK_ENABLED REG_BIT(0) +#define SLPC_DCC_TASK_ENABLED REG_BIT(11) +#define SLPC_IN_DCC REG_BIT(12) +#define SLPC_BALANCER_ENABLED REG_BIT(15) +#define SLPC_IBC_TASK_ENABLED REG_BIT(16) +#define SLPC_BALANCER_IA_LMT_ENABLED REG_BIT(17) +#define SLPC_BALANCER_IA_LMT_ACTIVE REG_BIT(18) + }; + }; + union { + u32 freq_padding; + struct { +#define SLPC_MAX_UNSLICE_FREQ_MASK REG_GENMASK(7, 0) +#define SLPC_MIN_UNSLICE_FREQ_MASK REG_GENMASK(15, 8) +#define SLPC_MAX_SLICE_FREQ_MASK REG_GENMASK(23, 16) +#define SLPC_MIN_SLICE_FREQ_MASK REG_GENMASK(31, 24) + u32 freq; + }; + }; +} __packed; + +struct slpc_shared_data_header { + /* Total size in bytes of this shared buffer. */ + u32 size; + u32 global_state; + u32 display_data_addr; +} __packed; + +struct slpc_override_params { + u32 bits[SLPC_OVERRIDE_BITFIELD_SIZE]; + u32 values[SLPC_MAX_OVERRIDE_PARAMETERS]; +} __packed; + +struct slpc_shared_data { + struct slpc_shared_data_header header; + u8 shared_data_header_pad[SLPC_SHARED_DATA_SIZE_BYTE_HEADER - + sizeof(struct slpc_shared_data_header)]; + + u8 platform_info_pad[SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO]; + + struct slpc_task_state_data task_state_data; + u8 task_state_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE - + sizeof(struct slpc_task_state_data)]; + + struct slpc_override_params override_params; + u8 override_params_pad[SLPC_OVERRIDE_PARAMS_TOTAL_BYTES - + sizeof(struct slpc_override_params)]; + + u8 shared_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_OTHER]; + + /* PAGE 2 (4096 bytes), mode based parameter will be removed soon */ + u8 reserved_mode_definition[4096]; +} __packed; + +/** + * DOC: SLPC H2G MESSAGE FORMAT + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_PC_SLPM_REQUEST` = 0x3003 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:8 | **EVENT_ID** | + * + +-------+--------------------------------------------------------------+ + * | | 7:0 | **EVENT_ARGC** - number of data arguments | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **EVENT_DATA1** | + * +---+-------+--------------------------------------------------------------+ + * |...| 31:0 | ... | + * +---+-------+--------------------------------------------------------------+ + * |2+n| 31:0 | **EVENT_DATAn** | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST 0x3003 + +#define HOST2GUC_PC_SLPC_REQUEST_MSG_MIN_LEN \ + (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u) +#define HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS 9 +#define HOST2GUC_PC_SLPC_REQUEST_MSG_MAX_LEN \ + (HOST2GUC_PC_SLPC_REQUEST_REQUEST_MSG_MIN_LEN + \ + HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID (0xff << 8) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC (0xff << 0) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_N_EVENT_DATA_N GUC_HXG_REQUEST_MSG_n_DATAn + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h new file mode 100644 index 000000000000..3b83f907ece4 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2021 Intel Corporation + */ + +#ifndef _ABI_GUC_COMMUNICATION_CTB_ABI_H +#define _ABI_GUC_COMMUNICATION_CTB_ABI_H + +#include <linux/types.h> +#include <linux/build_bug.h> + +#include "guc_messages_abi.h" + +/** + * DOC: CT Buffer + * + * Circular buffer used to send `CTB Message`_ + */ + +/** + * DOC: CTB Descriptor + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:0 | **HEAD** - offset (in dwords) to the last dword that was | + * | | | read from the `CT Buffer`_. | + * | | | It can only be updated by the receiver. | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **TAIL** - offset (in dwords) to the last dword that was | + * | | | written to the `CT Buffer`_. | + * | | | It can only be updated by the sender. | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **STATUS** - status of the CTB | + * | | | | + * | | | - _`GUC_CTB_STATUS_NO_ERROR` = 0 (normal operation) | + * | | | - _`GUC_CTB_STATUS_OVERFLOW` = 1 (head/tail too large) | + * | | | - _`GUC_CTB_STATUS_UNDERFLOW` = 2 (truncated message) | + * | | | - _`GUC_CTB_STATUS_MISMATCH` = 4 (head/tail modified) | + * +---+-------+--------------------------------------------------------------+ + * |...| | RESERVED = MBZ | + * +---+-------+--------------------------------------------------------------+ + * | 15| 31:0 | RESERVED = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ + +struct guc_ct_buffer_desc { + u32 head; + u32 tail; + u32 status; +#define GUC_CTB_STATUS_NO_ERROR 0 +#define GUC_CTB_STATUS_OVERFLOW (1 << 0) +#define GUC_CTB_STATUS_UNDERFLOW (1 << 1) +#define GUC_CTB_STATUS_MISMATCH (1 << 2) + u32 reserved[13]; +} __packed; +static_assert(sizeof(struct guc_ct_buffer_desc) == 64); + +/** + * DOC: CTB Message + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:16 | **FENCE** - message identifier | + * | +-------+--------------------------------------------------------------+ + * | | 15:12 | **FORMAT** - format of the CTB message | + * | | | - _`GUC_CTB_FORMAT_HXG` = 0 - see `CTB HXG Message`_ | + * | +-------+--------------------------------------------------------------+ + * | | 11:8 | **RESERVED** | + * | +-------+--------------------------------------------------------------+ + * | | 7:0 | **NUM_DWORDS** - length of the CTB message (w/o header) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | optional (depends on FORMAT) | + * +---+-------+ | + * |...| | | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_CTB_HDR_LEN 1u +#define GUC_CTB_MSG_MIN_LEN GUC_CTB_HDR_LEN +#define GUC_CTB_MSG_MAX_LEN 256u +#define GUC_CTB_MSG_0_FENCE (0xffff << 16) +#define GUC_CTB_MSG_0_FORMAT (0xf << 12) +#define GUC_CTB_FORMAT_HXG 0u +#define GUC_CTB_MSG_0_RESERVED (0xf << 8) +#define GUC_CTB_MSG_0_NUM_DWORDS (0xff << 0) + +/** + * DOC: CTB HXG Message + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:16 | FENCE | + * | +-------+--------------------------------------------------------------+ + * | | 15:12 | FORMAT = GUC_CTB_FORMAT_HXG_ | + * | +-------+--------------------------------------------------------------+ + * | | 11:8 | RESERVED = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 7:0 | NUM_DWORDS = length (in dwords) of the embedded HXG message | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | [Embedded `HXG Message`_] | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_CTB_HXG_MSG_MIN_LEN (GUC_CTB_MSG_MIN_LEN + GUC_HXG_MSG_MIN_LEN) +#define GUC_CTB_HXG_MSG_MAX_LEN GUC_CTB_MSG_MAX_LEN + +/** + * DOC: CTB based communication + * + * The CTB (command transport buffer) communication between Host and GuC + * is based on u32 data stream written to the shared buffer. One buffer can + * be used to transmit data only in one direction (one-directional channel). + * + * Current status of the each buffer is maintained in the `CTB Descriptor`_. + * Each message in data stream is encoded as `CTB HXG Message`_. + */ + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h new file mode 100644 index 000000000000..ef538e34f894 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2021 Intel Corporation + */ + +#ifndef _ABI_GUC_COMMUNICATION_MMIO_ABI_H +#define _ABI_GUC_COMMUNICATION_MMIO_ABI_H + +/** + * DOC: GuC MMIO based communication + * + * The MMIO based communication between Host and GuC relies on special + * hardware registers which format could be defined by the software + * (so called scratch registers). + * + * Each MMIO based message, both Host to GuC (H2G) and GuC to Host (G2H) + * messages, which maximum length depends on number of available scratch + * registers, is directly written into those scratch registers. + * + * For Gen9+, there are 16 software scratch registers 0xC180-0xC1B8, + * but no H2G command takes more than 4 parameters and the GuC firmware + * itself uses an 4-element array to store the H2G message. + * + * For Gen11+, there are additional 4 registers 0x190240-0x19024C, which + * are, regardless on lower count, preferred over legacy ones. + * + * The MMIO based communication is mainly used during driver initialization + * phase to setup the `CTB based communication`_ that will be used afterwards. + */ + +#define GUC_MAX_MMIO_MSG_LEN 4 + +/** + * DOC: MMIO HXG Message + * + * Format of the MMIO messages follows definitions of `HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:0 | | + * +---+-------+ | + * |...| | [Embedded `HXG Message`_] | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_errors_abi.h b/drivers/gpu/drm/xe/abi/guc_errors_abi.h new file mode 100644 index 000000000000..ec83551bf9c0 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_errors_abi.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2021 Intel Corporation + */ + +#ifndef _ABI_GUC_ERRORS_ABI_H +#define _ABI_GUC_ERRORS_ABI_H + +enum xe_guc_response_status { + XE_GUC_RESPONSE_STATUS_SUCCESS = 0x0, + XE_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000, +}; + +enum xe_guc_load_status { + XE_GUC_LOAD_STATUS_DEFAULT = 0x00, + XE_GUC_LOAD_STATUS_START = 0x01, + XE_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH = 0x02, + XE_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH = 0x03, + XE_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE = 0x04, + XE_GUC_LOAD_STATUS_GDT_DONE = 0x10, + XE_GUC_LOAD_STATUS_IDT_DONE = 0x20, + XE_GUC_LOAD_STATUS_LAPIC_DONE = 0x30, + XE_GUC_LOAD_STATUS_GUCINT_DONE = 0x40, + XE_GUC_LOAD_STATUS_DPC_READY = 0x50, + XE_GUC_LOAD_STATUS_DPC_ERROR = 0x60, + XE_GUC_LOAD_STATUS_EXCEPTION = 0x70, + XE_GUC_LOAD_STATUS_INIT_DATA_INVALID = 0x71, + XE_GUC_LOAD_STATUS_PXP_TEARDOWN_CTRL_ENABLED = 0x72, + XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START, + XE_GUC_LOAD_STATUS_MPU_DATA_INVALID = 0x73, + XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID = 0x74, + XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END, + + XE_GUC_LOAD_STATUS_READY = 0xF0, +}; + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h new file mode 100644 index 000000000000..47094b9b044c --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -0,0 +1,322 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _ABI_GUC_KLVS_ABI_H +#define _ABI_GUC_KLVS_ABI_H + +#include <linux/types.h> + +/** + * DOC: GuC KLV + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:16 | **KEY** - KLV key identifier | + * | | | - `GuC Self Config KLVs`_ | + * | | | - `GuC VGT Policy KLVs`_ | + * | | | - `GuC VF Configuration KLVs`_ | + * | | | | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **LEN** - length of VALUE (in 32bit dwords) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **VALUE** - actual value of the KLV (format depends on KEY) | + * +---+-------+ | + * |...| | | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_KLV_LEN_MIN 1u +#define GUC_KLV_0_KEY (0xffff << 16) +#define GUC_KLV_0_LEN (0xffff << 0) +#define GUC_KLV_n_VALUE (0xffffffff << 0) + +/** + * DOC: GuC Self Config KLVs + * + * `GuC KLV`_ keys available for use with HOST2GUC_SELF_CFG_. + * + * _`GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR` : 0x0900 + * Refers to 64 bit Global Gfx address (in bytes) of memory based interrupts + * status vector for use by the GuC. + * + * _`GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR` : 0x0901 + * Refers to 64 bit Global Gfx address (in bytes) of memory based interrupts + * source vector for use by the GuC. + * + * _`GUC_KLV_SELF_CFG_H2G_CTB_ADDR` : 0x0902 + * Refers to 64 bit Global Gfx address of H2G `CT Buffer`_. + * Should be above WOPCM address but below APIC base address for native mode. + * + * _`GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR : 0x0903 + * Refers to 64 bit Global Gfx address of H2G `CTB Descriptor`_. + * Should be above WOPCM address but below APIC base address for native mode. + * + * _`GUC_KLV_SELF_CFG_H2G_CTB_SIZE : 0x0904 + * Refers to size of H2G `CT Buffer`_ in bytes. + * Should be a multiple of 4K. + * + * _`GUC_KLV_SELF_CFG_G2H_CTB_ADDR : 0x0905 + * Refers to 64 bit Global Gfx address of G2H `CT Buffer`_. + * Should be above WOPCM address but below APIC base address for native mode. + * + * _GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR : 0x0906 + * Refers to 64 bit Global Gfx address of G2H `CTB Descriptor`_. + * Should be above WOPCM address but below APIC base address for native mode. + * + * _GUC_KLV_SELF_CFG_G2H_CTB_SIZE : 0x0907 + * Refers to size of G2H `CT Buffer`_ in bytes. + * Should be a multiple of 4K. + */ + +#define GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_KEY 0x0900 +#define GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_KEY 0x0901 +#define GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY 0x0902 +#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY 0x0903 +#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY 0x0904 +#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_LEN 1u + +#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY 0x0905 +#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY 0x0906 +#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY 0x0907 +#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN 1u + +/* + * Per context scheduling policy update keys. + */ +enum { + GUC_CONTEXT_POLICIES_KLV_ID_EXECUTION_QUANTUM = 0x2001, + GUC_CONTEXT_POLICIES_KLV_ID_PREEMPTION_TIMEOUT = 0x2002, + GUC_CONTEXT_POLICIES_KLV_ID_SCHEDULING_PRIORITY = 0x2003, + GUC_CONTEXT_POLICIES_KLV_ID_PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY = 0x2004, + GUC_CONTEXT_POLICIES_KLV_ID_SLPM_GT_FREQUENCY = 0x2005, + + GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5, +}; + +/** + * DOC: GuC VGT Policy KLVs + * + * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VGT_POLICY. + * + * _`GUC_KLV_VGT_POLICY_SCHED_IF_IDLE` : 0x8001 + * This config sets whether strict scheduling is enabled whereby any VF + * that doesn’t have work to submit is still allocated a fixed execution + * time-slice to ensure active VFs execution is always consitent even + * during other VF reprovisiong / rebooting events. Changing this KLV + * impacts all VFs and takes effect on the next VF-Switch event. + * + * :0: don't schedule idle (default) + * :1: schedule if idle + * + * _`GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD` : 0x8002 + * This config sets the sample period for tracking adverse event counters. + * A sample period is the period in millisecs during which events are counted. + * This is applicable for all the VFs. + * + * :0: adverse events are not counted (default) + * :n: sample period in milliseconds + * + * _`GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH` : 0x8D00 + * This enum is to reset utilized HW engine after VF Switch (i.e to clean + * up Stale HW register left behind by previous VF) + * + * :0: don't reset (default) + * :1: reset + */ + +#define GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY 0x8001 +#define GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_LEN 1u + +#define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY 0x8002 +#define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_LEN 1u + +#define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY 0x8D00 +#define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_LEN 1u + +/** + * DOC: GuC VF Configuration KLVs + * + * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VF_CFG. + * + * _`GUC_KLV_VF_CFG_GGTT_START` : 0x0001 + * A 4K aligned start GTT address/offset assigned to VF. + * Value is 64 bits. + * + * _`GUC_KLV_VF_CFG_GGTT_SIZE` : 0x0002 + * A 4K aligned size of GGTT assigned to VF. + * Value is 64 bits. + * + * _`GUC_KLV_VF_CFG_LMEM_SIZE` : 0x0003 + * A 2M aligned size of local memory assigned to VF. + * Value is 64 bits. + * + * _`GUC_KLV_VF_CFG_NUM_CONTEXTS` : 0x0004 + * Refers to the number of contexts allocated to this VF. + * + * :0: no contexts (default) + * :1-65535: number of contexts (Gen12) + * + * _`GUC_KLV_VF_CFG_TILE_MASK` : 0x0005 + * For multi-tiled products, this field contains the bitwise-OR of tiles + * assigned to the VF. Bit-0-set means VF has access to Tile-0, + * Bit-31-set means VF has access to Tile-31, and etc. + * At least one tile will always be allocated. + * If all bits are zero, VF KMD should treat this as a fatal error. + * For, single-tile products this KLV config is ignored. + * + * _`GUC_KLV_VF_CFG_NUM_DOORBELLS` : 0x0006 + * Refers to the number of doorbells allocated to this VF. + * + * :0: no doorbells (default) + * :1-255: number of doorbells (Gen12) + * + * _`GUC_KLV_VF_CFG_EXEC_QUANTUM` : 0x8A01 + * This config sets the VFs-execution-quantum in milliseconds. + * GUC will attempt to obey the maximum values as much as HW is capable + * of and this will never be perfectly-exact (accumulated nano-second + * granularity) since the GPUs clock time runs off a different crystal + * from the CPUs clock. Changing this KLV on a VF that is currently + * running a context wont take effect until a new context is scheduled in. + * That said, when the PF is changing this value from 0xFFFFFFFF to + * something else, it might never take effect if the VF is running an + * inifinitely long compute or shader kernel. In such a scenario, the + * PF would need to trigger a VM PAUSE and then change the KLV to force + * it to take effect. Such cases might typically happen on a 1PF+1VF + * Virtualization config enabled for heavier workloads like AI/ML. + * + * :0: infinite exec quantum (default) + * + * _`GUC_KLV_VF_CFG_PREEMPT_TIMEOUT` : 0x8A02 + * This config sets the VF-preemption-timeout in microseconds. + * GUC will attempt to obey the minimum and maximum values as much as + * HW is capable and this will never be perfectly-exact (accumulated + * nano-second granularity) since the GPUs clock time runs off a + * different crystal from the CPUs clock. Changing this KLV on a VF + * that is currently running a context wont take effect until a new + * context is scheduled in. + * That said, when the PF is changing this value from 0xFFFFFFFF to + * something else, it might never take effect if the VF is running an + * inifinitely long compute or shader kernel. + * In this case, the PF would need to trigger a VM PAUSE and then change + * the KLV to force it to take effect. Such cases might typically happen + * on a 1PF+1VF Virtualization config enabled for heavier workloads like + * AI/ML. + * + * :0: no preemption timeout (default) + * + * _`GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR` : 0x8A03 + * This config sets threshold for CAT errors caused by the VF. + * + * :0: adverse events or error will not be reported (default) + * :n: event occurrence count per sampling interval + * + * _`GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET` : 0x8A04 + * This config sets threshold for engine reset caused by the VF. + * + * :0: adverse events or error will not be reported (default) + * :n: event occurrence count per sampling interval + * + * _`GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT` : 0x8A05 + * This config sets threshold for page fault errors caused by the VF. + * + * :0: adverse events or error will not be reported (default) + * :n: event occurrence count per sampling interval + * + * _`GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM` : 0x8A06 + * This config sets threshold for H2G interrupts triggered by the VF. + * + * :0: adverse events or error will not be reported (default) + * :n: time (us) per sampling interval + * + * _`GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM` : 0x8A07 + * This config sets threshold for GT interrupts triggered by the VF's + * workloads. + * + * :0: adverse events or error will not be reported (default) + * :n: time (us) per sampling interval + * + * _`GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM` : 0x8A08 + * This config sets threshold for doorbell's ring triggered by the VF. + * + * :0: adverse events or error will not be reported (default) + * :n: time (us) per sampling interval + * + * _`GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID` : 0x8A0A + * Refers to the start index of doorbell assigned to this VF. + * + * :0: (default) + * :1-255: number of doorbells (Gen12) + * + * _`GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID` : 0x8A0B + * Refers to the start index in context array allocated to this VF’s use. + * + * :0: (default) + * :1-65535: number of contexts (Gen12) + */ + +#define GUC_KLV_VF_CFG_GGTT_START_KEY 0x0001 +#define GUC_KLV_VF_CFG_GGTT_START_LEN 2u + +#define GUC_KLV_VF_CFG_GGTT_SIZE_KEY 0x0002 +#define GUC_KLV_VF_CFG_GGTT_SIZE_LEN 2u + +#define GUC_KLV_VF_CFG_LMEM_SIZE_KEY 0x0003 +#define GUC_KLV_VF_CFG_LMEM_SIZE_LEN 2u + +#define GUC_KLV_VF_CFG_NUM_CONTEXTS_KEY 0x0004 +#define GUC_KLV_VF_CFG_NUM_CONTEXTS_LEN 1u + +#define GUC_KLV_VF_CFG_TILE_MASK_KEY 0x0005 +#define GUC_KLV_VF_CFG_TILE_MASK_LEN 1u + +#define GUC_KLV_VF_CFG_NUM_DOORBELLS_KEY 0x0006 +#define GUC_KLV_VF_CFG_NUM_DOORBELLS_LEN 1u + +#define GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY 0x8a01 +#define GUC_KLV_VF_CFG_EXEC_QUANTUM_LEN 1u + +#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY 0x8a02 +#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN 1u + +#define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_KEY 0x8a03 +#define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_LEN 1u + +#define GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET_KEY 0x8a04 +#define GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET_LEN 1u + +#define GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT_KEY 0x8a05 +#define GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT_LEN 1u + +#define GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM_KEY 0x8a06 +#define GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM_LEN 1u + +#define GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM_KEY 0x8a07 +#define GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM_LEN 1u + +#define GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM_KEY 0x8a08 +#define GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM_LEN 1u + +#define GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_KEY 0x8a0a +#define GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_LEN 1u + +#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY 0x8a0b +#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_LEN 1u + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_messages_abi.h b/drivers/gpu/drm/xe/abi/guc_messages_abi.h new file mode 100644 index 000000000000..3d199016cf88 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_messages_abi.h @@ -0,0 +1,234 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2021 Intel Corporation + */ + +#ifndef _ABI_GUC_MESSAGES_ABI_H +#define _ABI_GUC_MESSAGES_ABI_H + +/** + * DOC: HXG Message + * + * All messages exchanged with GuC are defined using 32 bit dwords. + * First dword is treated as a message header. Remaining dwords are optional. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | | | | + * | 0 | 31 | **ORIGIN** - originator of the message | + * | | | - _`GUC_HXG_ORIGIN_HOST` = 0 | + * | | | - _`GUC_HXG_ORIGIN_GUC` = 1 | + * | | | | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | **TYPE** - message type | + * | | | - _`GUC_HXG_TYPE_REQUEST` = 0 | + * | | | - _`GUC_HXG_TYPE_EVENT` = 1 | + * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3 | + * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5 | + * | | | - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6 | + * | | | - _`GUC_HXG_TYPE_RESPONSE_SUCCESS` = 7 | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **AUX** - auxiliary data (depends on TYPE) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **PAYLOAD** - optional payload (depends on TYPE) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_MSG_MIN_LEN 1u +#define GUC_HXG_MSG_0_ORIGIN (0x1 << 31) +#define GUC_HXG_ORIGIN_HOST 0u +#define GUC_HXG_ORIGIN_GUC 1u +#define GUC_HXG_MSG_0_TYPE (0x7 << 28) +#define GUC_HXG_TYPE_REQUEST 0u +#define GUC_HXG_TYPE_EVENT 1u +#define GUC_HXG_TYPE_NO_RESPONSE_BUSY 3u +#define GUC_HXG_TYPE_NO_RESPONSE_RETRY 5u +#define GUC_HXG_TYPE_RESPONSE_FAILURE 6u +#define GUC_HXG_TYPE_RESPONSE_SUCCESS 7u +#define GUC_HXG_MSG_0_AUX (0xfffffff << 0) +#define GUC_HXG_MSG_n_PAYLOAD (0xffffffff << 0) + +/** + * DOC: HXG Request + * + * The `HXG Request`_ message should be used to initiate synchronous activity + * for which confirmation or return data is expected. + * + * The recipient of this message shall use `HXG Response`_, `HXG Failure`_ + * or `HXG Retry`_ message as a definite reply, and may use `HXG Busy`_ + * message as a intermediate reply. + * + * Format of @DATA0 and all @DATAn fields depends on the @ACTION code. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | **DATA0** - request data (depends on ACTION) | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **ACTION** - requested action code | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **DATAn** - optional data (depends on ACTION) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_REQUEST_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_REQUEST_MSG_0_DATA0 (0xfff << 16) +#define GUC_HXG_REQUEST_MSG_0_ACTION (0xffff << 0) +#define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD + +/** + * DOC: HXG Event + * + * The `HXG Event`_ message should be used to initiate asynchronous activity + * that does not involves immediate confirmation nor data. + * + * Format of @DATA0 and all @DATAn fields depends on the @ACTION code. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | **DATA0** - event data (depends on ACTION) | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **ACTION** - event action code | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **DATAn** - optional event data (depends on ACTION) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_EVENT_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_EVENT_MSG_0_DATA0 (0xfff << 16) +#define GUC_HXG_EVENT_MSG_0_ACTION (0xffff << 0) +#define GUC_HXG_EVENT_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD + +/** + * DOC: HXG Busy + * + * The `HXG Busy`_ message may be used to acknowledge reception of the `HXG Request`_ + * message if the recipient expects that it processing will be longer than default + * timeout. + * + * The @COUNTER field may be used as a progress indicator. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_BUSY_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **COUNTER** - progress indicator | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_BUSY_MSG_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_BUSY_MSG_0_COUNTER GUC_HXG_MSG_0_AUX + +/** + * DOC: HXG Retry + * + * The `HXG Retry`_ message should be used by recipient to indicate that the + * `HXG Request`_ message was dropped and it should be resent again. + * + * The @REASON field may be used to provide additional information. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_RETRY_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **REASON** - reason for retry | + * | | | - _`GUC_HXG_RETRY_REASON_UNSPECIFIED` = 0 | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_RETRY_MSG_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_RETRY_MSG_0_REASON GUC_HXG_MSG_0_AUX +#define GUC_HXG_RETRY_REASON_UNSPECIFIED 0u + +/** + * DOC: HXG Failure + * + * The `HXG Failure`_ message shall be used as a reply to the `HXG Request`_ + * message that could not be processed due to an error. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_FAILURE_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | **HINT** - additional error hint | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **ERROR** - error/result code | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_FAILURE_MSG_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_FAILURE_MSG_0_HINT (0xfff << 16) +#define GUC_HXG_FAILURE_MSG_0_ERROR (0xffff << 0) + +/** + * DOC: HXG Response + * + * The `HXG Response`_ message shall be used as a reply to the `HXG Request`_ + * message that was successfully processed without an error. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **DATA0** - data (depends on ACTION from `HXG Request`_) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **DATAn** - data (depends on ACTION from `HXG Request`_) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_RESPONSE_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_RESPONSE_MSG_0_DATA0 GUC_HXG_MSG_0_AUX +#define GUC_HXG_RESPONSE_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD + +/* deprecated */ +#define INTEL_GUC_MSG_TYPE_SHIFT 28 +#define INTEL_GUC_MSG_TYPE_MASK (0xF << INTEL_GUC_MSG_TYPE_SHIFT) +#define INTEL_GUC_MSG_DATA_SHIFT 16 +#define INTEL_GUC_MSG_DATA_MASK (0xFFF << INTEL_GUC_MSG_DATA_SHIFT) +#define INTEL_GUC_MSG_CODE_SHIFT 0 +#define INTEL_GUC_MSG_CODE_MASK (0xFFFF << INTEL_GUC_MSG_CODE_SHIFT) + +enum intel_guc_msg_type { + INTEL_GUC_MSG_TYPE_REQUEST = 0x0, + INTEL_GUC_MSG_TYPE_RESPONSE = 0xF, +}; + +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h new file mode 100644 index 000000000000..710cecca972d --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h @@ -0,0 +1 @@ +/* Empty */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h new file mode 100644 index 000000000000..650ea2803a97 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _I915_GEM_MMAN_H_ +#define _I915_GEM_MMAN_H_ + +#include "xe_bo_types.h" +#include <drm/drm_prime.h> + +static inline int i915_gem_fb_mmap(struct xe_bo *bo, struct vm_area_struct *vma) +{ + return drm_gem_prime_mmap(&bo->ttm.base, vma); +} + +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h new file mode 100644 index 000000000000..5f19550cc845 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _I915_GEM_OBJECT_H_ +#define _I915_GEM_OBJECT_H_ + +#include <linux/types.h> + +#include "xe_bo.h" + +#define i915_gem_object_is_shmem(obj) ((obj)->flags & XE_BO_CREATE_SYSTEM_BIT) + +static inline dma_addr_t i915_gem_object_get_dma_address(const struct xe_bo *bo, pgoff_t n) +{ + /* Should never be called */ + WARN_ON(1); + return n; +} + +static inline bool i915_gem_object_is_tiled(const struct xe_bo *bo) +{ + /* legacy tiling is unused */ + return false; +} + +static inline bool i915_gem_object_is_userptr(const struct xe_bo *bo) +{ + /* legacy tiling is unused */ + return false; +} + +static inline int i915_gem_object_read_from_page(struct xe_bo *bo, + u32 ofs, u64 *ptr, u32 size) +{ + struct ttm_bo_kmap_obj map; + void *virtual; + bool is_iomem; + int ret; + + XE_WARN_ON(size != 8); + + ret = xe_bo_lock(bo, true); + if (ret) + return ret; + + ret = ttm_bo_kmap(&bo->ttm, ofs >> PAGE_SHIFT, 1, &map); + if (ret) + goto out_unlock; + + ofs &= ~PAGE_MASK; + virtual = ttm_kmap_obj_virtual(&map, &is_iomem); + if (is_iomem) + *ptr = readq((void __iomem *)(virtual + ofs)); + else + *ptr = *(u64 *)(virtual + ofs); + + ttm_bo_kunmap(&map); +out_unlock: + xe_bo_unlock(bo); + return ret; +} + +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h new file mode 100644 index 000000000000..2a3f12d2978c --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _I915_GEM_OBJECT_FRONTBUFFER_H_ +#define _I915_GEM_OBJECT_FRONTBUFFER_H_ + +#define i915_gem_object_get_frontbuffer(obj) NULL +#define i915_gem_object_set_frontbuffer(obj, front) (front) + +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h b/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h new file mode 100644 index 000000000000..21fec9cc837c --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __INTEL_RPS_H__ +#define __INTEL_RPS_H__ + +#define gen5_rps_irq_handler(x) ({}) + +#endif /* __INTEL_RPS_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_active.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_active.h new file mode 100644 index 000000000000..6f0ab3753563 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_active.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _I915_ACTIVE_H_ +#define _I915_ACTIVE_H_ + +#include "i915_active_types.h" + +static inline void i915_active_init(struct i915_active *ref, + int (*active)(struct i915_active *ref), + void (*retire)(struct i915_active *ref), + unsigned long flags) +{ + (void) active; + (void) retire; +} + +#define i915_active_fini(active) do { } while (0) + +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h new file mode 100644 index 000000000000..8c31f9a8b168 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef _I915_ACTIVE_TYPES_H_ +#define _I915_ACTIVE_TYPES_H_ + +struct i915_active {}; +#define I915_ACTIVE_RETIRE_SLEEPS 0 + +#endif /* _I915_ACTIVE_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_config.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_config.h new file mode 100644 index 000000000000..e835bea08d1b --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_config.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __I915_CONFIG_H__ +#define __I915_CONFIG_H__ + +#include <linux/sched.h> + +struct drm_i915_private; + +static inline unsigned long +i915_fence_timeout(const struct drm_i915_private *i915) +{ + return MAX_SCHEDULE_TIMEOUT; +} + +#endif /* __I915_CONFIG_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h new file mode 100644 index 000000000000..b4c47617b64b --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __I915_DEBUGFS_H__ +#define __I915_DEBUGFS_H__ + +struct drm_i915_gem_object; +struct seq_file; + +static inline void i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) {} + +#endif /* __I915_DEBUGFS_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h new file mode 100644 index 000000000000..5d2a77b52db4 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -0,0 +1,233 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ +#ifndef _XE_I915_DRV_H_ +#define _XE_I915_DRV_H_ + +/* + * "Adaptation header" to allow i915 display to also build for xe driver. + * TODO: refactor i915 and xe so this can cease to exist + */ + +#include <drm/drm_drv.h> + +#include "gem/i915_gem_object.h" + +#include "soc/intel_pch.h" +#include "xe_device.h" +#include "xe_bo.h" +#include "xe_pm.h" +#include "xe_step.h" +#include "i915_gem.h" +#include "i915_gem_stolen.h" +#include "i915_gpu_error.h" +#include "i915_reg_defs.h" +#include "i915_utils.h" +#include "intel_gt_types.h" +#include "intel_step.h" +#include "intel_uc_fw.h" +#include "intel_uncore.h" +#include "intel_runtime_pm.h" +#include <linux/pm_runtime.h> + +static inline struct drm_i915_private *to_i915(const struct drm_device *dev) +{ + return container_of(dev, struct drm_i915_private, drm); +} + +static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) +{ + return dev_get_drvdata(kdev); +} + + +#define INTEL_JASPERLAKE 0 +#define INTEL_ELKHARTLAKE 0 +#define IS_PLATFORM(xe, x) ((xe)->info.platform == x) +#define INTEL_INFO(dev_priv) (&((dev_priv)->info)) +#define INTEL_DEVID(dev_priv) ((dev_priv)->info.devid) +#define IS_I830(dev_priv) (dev_priv && 0) +#define IS_I845G(dev_priv) (dev_priv && 0) +#define IS_I85X(dev_priv) (dev_priv && 0) +#define IS_I865G(dev_priv) (dev_priv && 0) +#define IS_I915G(dev_priv) (dev_priv && 0) +#define IS_I915GM(dev_priv) (dev_priv && 0) +#define IS_I945G(dev_priv) (dev_priv && 0) +#define IS_I945GM(dev_priv) (dev_priv && 0) +#define IS_I965G(dev_priv) (dev_priv && 0) +#define IS_I965GM(dev_priv) (dev_priv && 0) +#define IS_G45(dev_priv) (dev_priv && 0) +#define IS_GM45(dev_priv) (dev_priv && 0) +#define IS_G4X(dev_priv) (dev_priv && 0) +#define IS_PINEVIEW(dev_priv) (dev_priv && 0) +#define IS_G33(dev_priv) (dev_priv && 0) +#define IS_IRONLAKE(dev_priv) (dev_priv && 0) +#define IS_IRONLAKE_M(dev_priv) (dev_priv && 0) +#define IS_SANDYBRIDGE(dev_priv) (dev_priv && 0) +#define IS_IVYBRIDGE(dev_priv) (dev_priv && 0) +#define IS_IVB_GT1(dev_priv) (dev_priv && 0) +#define IS_VALLEYVIEW(dev_priv) (dev_priv && 0) +#define IS_CHERRYVIEW(dev_priv) (dev_priv && 0) +#define IS_HASWELL(dev_priv) (dev_priv && 0) +#define IS_BROADWELL(dev_priv) (dev_priv && 0) +#define IS_SKYLAKE(dev_priv) (dev_priv && 0) +#define IS_BROXTON(dev_priv) (dev_priv && 0) +#define IS_KABYLAKE(dev_priv) (dev_priv && 0) +#define IS_GEMINILAKE(dev_priv) (dev_priv && 0) +#define IS_COFFEELAKE(dev_priv) (dev_priv && 0) +#define IS_COMETLAKE(dev_priv) (dev_priv && 0) +#define IS_ICELAKE(dev_priv) (dev_priv && 0) +#define IS_JASPERLAKE(dev_priv) (dev_priv && 0) +#define IS_ELKHARTLAKE(dev_priv) (dev_priv && 0) +#define IS_TIGERLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_TIGERLAKE) +#define IS_ROCKETLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_ROCKETLAKE) +#define IS_DG1(dev_priv) IS_PLATFORM(dev_priv, XE_DG1) +#define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_S) +#define IS_ALDERLAKE_P(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_P) +#define IS_XEHPSDV(dev_priv) (dev_priv && 0) +#define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, XE_DG2) +#define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, XE_PVC) +#define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_METEORLAKE) +#define IS_LUNARLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_LUNARLAKE) + +#define IS_HASWELL_ULT(dev_priv) (dev_priv && 0) +#define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0) +#define IS_BROADWELL_ULX(dev_priv) (dev_priv && 0) + +#define IP_VER(ver, rel) ((ver) << 8 | (rel)) + +#define INTEL_DISPLAY_ENABLED(xe) (HAS_DISPLAY((xe)) && !intel_opregion_headless_sku((xe))) + +#define IS_GRAPHICS_VER(xe, first, last) \ + ((xe)->info.graphics_verx100 >= first * 100 && \ + (xe)->info.graphics_verx100 <= (last*100 + 99)) +#define IS_MOBILE(xe) (xe && 0) +#define HAS_LLC(xe) (!IS_DGFX((xe))) + +#define HAS_GMD_ID(xe) GRAPHICS_VERx100(xe) >= 1270 + +/* Workarounds not handled yet */ +#define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step <= last; }) +#define IS_GRAPHICS_STEP(xe, first, last) ({u8 __step = (xe)->info.step.graphics; first <= __step && __step <= last; }) + +#define IS_LP(xe) (0) +#define IS_GEN9_LP(xe) (0) +#define IS_GEN9_BC(xe) (0) + +#define IS_TIGERLAKE_UY(xe) (xe && 0) +#define IS_COMETLAKE_ULX(xe) (xe && 0) +#define IS_COFFEELAKE_ULX(xe) (xe && 0) +#define IS_KABYLAKE_ULX(xe) (xe && 0) +#define IS_SKYLAKE_ULX(xe) (xe && 0) +#define IS_HASWELL_ULX(xe) (xe && 0) +#define IS_COMETLAKE_ULT(xe) (xe && 0) +#define IS_COFFEELAKE_ULT(xe) (xe && 0) +#define IS_KABYLAKE_ULT(xe) (xe && 0) +#define IS_SKYLAKE_ULT(xe) (xe && 0) + +#define IS_DG1_GRAPHICS_STEP(xe, first, last) (IS_DG1(xe) && IS_GRAPHICS_STEP(xe, first, last)) +#define IS_DG2_GRAPHICS_STEP(xe, variant, first, last) \ + ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_ ## variant && \ + IS_GRAPHICS_STEP(xe, first, last)) +#define IS_XEHPSDV_GRAPHICS_STEP(xe, first, last) (IS_XEHPSDV(xe) && IS_GRAPHICS_STEP(xe, first, last)) + +/* XXX: No basedie stepping support yet */ +#define IS_PVC_BD_STEP(xe, first, last) (!WARN_ON(1) && IS_PONTEVECCHIO(xe)) + +#define IS_TIGERLAKE_DISPLAY_STEP(xe, first, last) (IS_TIGERLAKE(xe) && IS_DISPLAY_STEP(xe, first, last)) +#define IS_ROCKETLAKE_DISPLAY_STEP(xe, first, last) (IS_ROCKETLAKE(xe) && IS_DISPLAY_STEP(xe, first, last)) +#define IS_DG1_DISPLAY_STEP(xe, first, last) (IS_DG1(xe) && IS_DISPLAY_STEP(xe, first, last)) +#define IS_DG2_DISPLAY_STEP(xe, first, last) (IS_DG2(xe) && IS_DISPLAY_STEP(xe, first, last)) +#define IS_ADLP_DISPLAY_STEP(xe, first, last) (IS_ALDERLAKE_P(xe) && IS_DISPLAY_STEP(xe, first, last)) +#define IS_ADLS_DISPLAY_STEP(xe, first, last) (IS_ALDERLAKE_S(xe) && IS_DISPLAY_STEP(xe, first, last)) +#define IS_JSL_EHL_DISPLAY_STEP(xe, first, last) (IS_JSL_EHL(xe) && IS_DISPLAY_STEP(xe, first, last)) +#define IS_MTL_DISPLAY_STEP(xe, first, last) (IS_METEORLAKE(xe) && IS_DISPLAY_STEP(xe, first, last)) + +/* FIXME: Add subplatform here */ +#define IS_MTL_GRAPHICS_STEP(xe, sub, first, last) (IS_METEORLAKE(xe) && IS_DISPLAY_STEP(xe, first, last)) + +#define IS_DG2_G10(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G10) +#define IS_DG2_G11(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G11) +#define IS_DG2_G12(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G12) +#define IS_RAPTORLAKE_U(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_P_RPLU) +#define IS_ICL_WITH_PORT_F(xe) (xe && 0) +#define HAS_FLAT_CCS(xe) (xe_device_has_flat_ccs(xe)) +#define to_intel_bo(x) gem_to_xe_bo((x)) +#define mkwrite_device_info(xe) (INTEL_INFO(xe)) + +#define HAS_128_BYTE_Y_TILING(xe) (xe || 1) + +#define intel_has_gpu_reset(a) (a && 0) + +#include "intel_wakeref.h" + +static inline bool intel_runtime_pm_get(struct xe_runtime_pm *pm) +{ + struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); + + if (xe_pm_runtime_get(xe) < 0) { + xe_pm_runtime_put(xe); + return false; + } + return true; +} + +static inline bool intel_runtime_pm_get_if_in_use(struct xe_runtime_pm *pm) +{ + struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); + + return xe_pm_runtime_get_if_active(xe); +} + +static inline void intel_runtime_pm_put_unchecked(struct xe_runtime_pm *pm) +{ + struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); + + xe_pm_runtime_put(xe); +} + +static inline void intel_runtime_pm_put(struct xe_runtime_pm *pm, bool wakeref) +{ + if (wakeref) + intel_runtime_pm_put_unchecked(pm); +} + +#define intel_runtime_pm_get_raw intel_runtime_pm_get +#define intel_runtime_pm_put_raw intel_runtime_pm_put +#define assert_rpm_wakelock_held(x) do { } while (0) +#define assert_rpm_raw_wakeref_held(x) do { } while (0) + +#define intel_uncore_forcewake_get(x, y) do { } while (0) +#define intel_uncore_forcewake_put(x, y) do { } while (0) + +#define intel_uncore_arm_unclaimed_mmio_detection(x) do { } while (0) + +#define I915_PRIORITY_DISPLAY 0 +struct i915_sched_attr { + int priority; +}; +#define i915_gem_fence_wait_priority(fence, attr) do { (void) attr; } while (0) + +#define with_intel_runtime_pm(rpm, wf) \ + for ((wf) = intel_runtime_pm_get(rpm); (wf); \ + intel_runtime_pm_put((rpm), (wf)), (wf) = 0) + +#define pdev_to_i915 pdev_to_xe_device +#define RUNTIME_INFO(xe) (&(xe)->info.i915_runtime) + +#define FORCEWAKE_ALL XE_FORCEWAKE_ALL +#define HPD_STORM_DEFAULT_THRESHOLD 50 + +#ifdef CONFIG_ARM64 +/* + * arm64 indirectly includes linux/rtc.h, + * which defines a irq_lock, so include it + * here before #define-ing it + */ +#include <linux/rtc.h> +#endif + +#define irq_lock irq.lock + +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h new file mode 100644 index 000000000000..12c671fd5235 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../i915/i915_fixed.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h new file mode 100644 index 000000000000..06b723a479c5 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __I915_GEM_H__ +#define __I915_GEM_H__ +#define GEM_BUG_ON +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h new file mode 100644 index 000000000000..888e7a87a925 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h @@ -0,0 +1,79 @@ +#ifndef _I915_GEM_STOLEN_H_ +#define _I915_GEM_STOLEN_H_ + +#include "xe_ttm_stolen_mgr.h" +#include "xe_res_cursor.h" + +struct xe_bo; + +struct i915_stolen_fb { + struct xe_bo *bo; +}; + +static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, + struct i915_stolen_fb *fb, + u32 size, u32 align, + u32 start, u32 end) +{ + struct xe_bo *bo; + int err; + u32 flags = XE_BO_CREATE_PINNED_BIT | XE_BO_CREATE_STOLEN_BIT; + + bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe), + NULL, size, start, end, + ttm_bo_type_kernel, flags); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + bo = NULL; + return err; + } + err = xe_bo_pin(bo); + xe_bo_unlock_vm_held(bo); + + if (err) { + xe_bo_put(fb->bo); + bo = NULL; + } + + fb->bo = bo; + + return err; +} + +static inline int i915_gem_stolen_insert_node(struct xe_device *xe, + struct i915_stolen_fb *fb, + u32 size, u32 align) +{ + /* Not used on xe */ + BUG_ON(1); + return -ENODEV; +} + +static inline void i915_gem_stolen_remove_node(struct xe_device *xe, + struct i915_stolen_fb *fb) +{ + xe_bo_unpin_map_no_vm(fb->bo); + fb->bo = NULL; +} + +#define i915_gem_stolen_initialized(xe) (!!ttm_manager_type(&(xe)->ttm, XE_PL_STOLEN)) +#define i915_gem_stolen_node_allocated(fb) (!!((fb)->bo)) + +static inline u32 i915_gem_stolen_node_offset(struct i915_stolen_fb *fb) +{ + struct xe_res_cursor res; + + xe_res_first(fb->bo->ttm.resource, 0, 4096, &res); + return res.start; +} + +/* Used for < gen4. These are not supported by Xe */ +#define i915_gem_stolen_area_address(xe) (!WARN_ON(1)) +/* Used for gen9 specific WA. Gen9 is not supported by Xe */ +#define i915_gem_stolen_area_size(xe) (!WARN_ON(1)) + +#define i915_gem_stolen_node_address(xe, fb) (xe_ttm_stolen_gpu_offset(xe) + \ + i915_gem_stolen_node_offset(fb)) +#define i915_gem_stolen_node_size(fb) ((u64)((fb)->bo->ttm.base.size)) + +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h new file mode 100644 index 000000000000..98e9dd78f670 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _I915_GPU_ERROR_H_ +#define _I915_GPU_ERROR_H_ + +struct drm_i915_error_state_buf; + +__printf(2, 3) +static inline void +i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) +{ +} + +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_irq.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_irq.h new file mode 100644 index 000000000000..61707a07f91f --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_irq.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../i915/i915_irq.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h new file mode 100644 index 000000000000..8619ec015ad4 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../i915/i915_reg.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_reg_defs.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg_defs.h new file mode 100644 index 000000000000..723279c975b1 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg_defs.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../i915/i915_reg_defs.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h new file mode 100644 index 000000000000..d429d421ac70 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#define trace_i915_reg_rw(a...) do { } while (0) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h new file mode 100644 index 000000000000..1d7c4360e5c0 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../i915/i915_utils.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h new file mode 100644 index 000000000000..80b024d435dc --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _I915_VGPU_H_ +#define _I915_VGPU_H_ + +#include <linux/types.h> + +struct drm_i915_private; +struct i915_ggtt; + +static inline void intel_vgpu_detect(struct drm_i915_private *i915) +{ +} +static inline bool intel_vgpu_active(struct drm_i915_private *i915) +{ + return false; +} +static inline void intel_vgpu_register(struct drm_i915_private *i915) +{ +} +static inline bool intel_vgpu_has_full_ppgtt(struct drm_i915_private *i915) +{ + return false; +} +static inline bool intel_vgpu_has_hwsp_emulation(struct drm_i915_private *i915) +{ + return false; +} +static inline bool intel_vgpu_has_huge_gtt(struct drm_i915_private *i915) +{ + return false; +} +static inline int intel_vgt_balloon(struct i915_ggtt *ggtt) +{ + return 0; +} +static inline void intel_vgt_deballoon(struct i915_ggtt *ggtt) +{ +} + +#endif /* _I915_VGPU_H_ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h new file mode 100644 index 000000000000..a20d2638ea7a --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef I915_VMA_H +#define I915_VMA_H + +#include <uapi/drm/i915_drm.h> +#include <drm/drm_mm.h> + +/* We don't want these from i915_drm.h in case of Xe */ +#undef I915_TILING_X +#undef I915_TILING_Y +#define I915_TILING_X 0 +#define I915_TILING_Y 0 + +struct xe_bo; + +struct i915_vma { + struct xe_bo *bo, *dpt; + struct drm_mm_node node; +}; + +#define i915_ggtt_clear_scanout(bo) do { } while (0) + +#define i915_vma_fence_id(vma) -1 + +static inline u32 i915_ggtt_offset(const struct i915_vma *vma) +{ + return vma->node.start; +} + +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma_types.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma_types.h new file mode 100644 index 000000000000..e7aaf50f5485 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma_types.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include <linux/types.h> +#include <linux/build_bug.h> + +/* XX: Figure out how to handle this vma mapping in xe */ +struct intel_remapped_plane_info { + /* in gtt pages */ + u32 offset:31; + u32 linear:1; + union { + /* in gtt pages for !linear */ + struct { + u16 width; + u16 height; + u16 src_stride; + u16 dst_stride; + }; + + /* in gtt pages for linear */ + u32 size; + }; +} __packed; + +struct intel_remapped_info { + struct intel_remapped_plane_info plane[4]; + /* in gtt pages */ + u32 plane_alignment; +} __packed; + +struct intel_rotation_info { + struct intel_remapped_plane_info plane[2]; +} __packed; + +enum i915_gtt_view_type { + I915_GTT_VIEW_NORMAL = 0, + I915_GTT_VIEW_ROTATED = sizeof(struct intel_rotation_info), + I915_GTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info), +}; + +static inline void assert_i915_gem_gtt_types(void) +{ + BUILD_BUG_ON(sizeof(struct intel_rotation_info) != 2 * sizeof(u32) + 8 * sizeof(u16)); + BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 5 * sizeof(u32) + 16 * sizeof(u16)); + + /* Check that rotation/remapped shares offsets for simplicity */ + BUILD_BUG_ON(offsetof(struct intel_remapped_info, plane[0]) != + offsetof(struct intel_rotation_info, plane[0])); + BUILD_BUG_ON(offsetofend(struct intel_remapped_info, plane[1]) != + offsetofend(struct intel_rotation_info, plane[1])); + + /* As we encode the size of each branch inside the union into its type, + * we have to be careful that each branch has a unique size. + */ + switch ((enum i915_gtt_view_type)0) { + case I915_GTT_VIEW_NORMAL: + case I915_GTT_VIEW_ROTATED: + case I915_GTT_VIEW_REMAPPED: + /* gcc complains if these are identical cases */ + break; + } +} + +struct i915_gtt_view { + enum i915_gtt_view_type type; + union { + /* Members need to contain no holes/padding */ + struct intel_rotation_info rotated; + struct intel_remapped_info remapped; + }; +}; diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_clock_gating.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_clock_gating.h new file mode 100644 index 000000000000..ce986f0e8f38 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_clock_gating.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../i915/intel_clock_gating.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_gt_types.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_gt_types.h new file mode 100644 index 000000000000..c15806d6c4f7 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_gt_types.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __INTEL_GT_TYPES__ +#define __INTEL_GT_TYPES__ + +#define intel_gt_support_legacy_fencing(gt) 0 + +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_mchbar_regs.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_mchbar_regs.h new file mode 100644 index 000000000000..55b316985340 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_mchbar_regs.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../i915/intel_mchbar_regs.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_pci_config.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_pci_config.h new file mode 100644 index 000000000000..8c15867fd613 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_pci_config.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../i915/intel_pci_config.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h new file mode 100644 index 000000000000..0c47661bdc6a --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __INTEL_PCODE_H__ +#define __INTEL_PCODE_H__ + +#include "intel_uncore.h" +#include "xe_pcode.h" + +static inline int +snb_pcode_write_timeout(struct intel_uncore *uncore, u32 mbox, u32 val, + int fast_timeout_us, int slow_timeout_ms) +{ + return xe_pcode_write_timeout(__compat_uncore_to_gt(uncore), mbox, val, + slow_timeout_ms ?: 1); +} + +static inline int +snb_pcode_write(struct intel_uncore *uncore, u32 mbox, u32 val) +{ + + return xe_pcode_write(__compat_uncore_to_gt(uncore), mbox, val); +} + +static inline int +snb_pcode_read(struct intel_uncore *uncore, u32 mbox, u32 *val, u32 *val1) +{ + return xe_pcode_read(__compat_uncore_to_gt(uncore), mbox, val, val1); +} + +static inline int +skl_pcode_request(struct intel_uncore *uncore, u32 mbox, + u32 request, u32 reply_mask, u32 reply, + int timeout_base_ms) +{ + return xe_pcode_request(__compat_uncore_to_gt(uncore), mbox, request, reply_mask, reply, + timeout_base_ms); +} + +#endif /* __INTEL_PCODE_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h new file mode 100644 index 000000000000..89da3cc62f39 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "intel_wakeref.h" + +#define intel_runtime_pm xe_runtime_pm + +static inline void disable_rpm_wakeref_asserts(void *rpm) +{ +} + +static inline void enable_rpm_wakeref_asserts(void *rpm) +{ +} diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h new file mode 100644 index 000000000000..0006ef812346 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __INTEL_STEP_H__ +#define __INTEL_STEP_H__ + +#include "xe_device_types.h" +#include "xe_step.h" + +#define intel_display_step_name xe_display_step_name + +static inline +const char *xe_display_step_name(struct xe_device *xe) +{ + return xe_step_name(xe->info.step.display); +} + +#endif /* __INTEL_STEP_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h new file mode 100644 index 000000000000..009745328992 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _INTEL_UC_FW_H_ +#define _INTEL_UC_FW_H_ + +#define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git" + +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h new file mode 100644 index 000000000000..cd26ddc0f69e --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __INTEL_UNCORE_H__ +#define __INTEL_UNCORE_H__ + +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_mmio.h" + +static inline struct xe_gt *__compat_uncore_to_gt(struct intel_uncore *uncore) +{ + struct xe_device *xe = container_of(uncore, struct xe_device, uncore); + + return xe_root_mmio_gt(xe); +} + +static inline u32 intel_uncore_read(struct intel_uncore *uncore, + i915_reg_t i915_reg) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg); +} + +static inline u32 intel_uncore_read8(struct intel_uncore *uncore, + i915_reg_t i915_reg) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + return xe_mmio_read8(__compat_uncore_to_gt(uncore), reg); +} + +static inline u32 intel_uncore_read16(struct intel_uncore *uncore, + i915_reg_t i915_reg) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + return xe_mmio_read16(__compat_uncore_to_gt(uncore), reg); +} + +static inline u64 +intel_uncore_read64_2x32(struct intel_uncore *uncore, + i915_reg_t i915_lower_reg, i915_reg_t i915_upper_reg) +{ + struct xe_reg lower_reg = XE_REG(i915_mmio_reg_offset(i915_lower_reg)); + struct xe_reg upper_reg = XE_REG(i915_mmio_reg_offset(i915_upper_reg)); + u32 upper, lower, old_upper; + int loop = 0; + + upper = xe_mmio_read32(__compat_uncore_to_gt(uncore), upper_reg); + do { + old_upper = upper; + lower = xe_mmio_read32(__compat_uncore_to_gt(uncore), lower_reg); + upper = xe_mmio_read32(__compat_uncore_to_gt(uncore), upper_reg); + } while (upper != old_upper && loop++ < 2); + + return (u64)upper << 32 | lower; +} + +static inline void intel_uncore_posting_read(struct intel_uncore *uncore, + i915_reg_t i915_reg) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + xe_mmio_read32(__compat_uncore_to_gt(uncore), reg); +} + +static inline void intel_uncore_write(struct intel_uncore *uncore, + i915_reg_t i915_reg, u32 val) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val); +} + +static inline u32 intel_uncore_rmw(struct intel_uncore *uncore, + i915_reg_t i915_reg, u32 clear, u32 set) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + return xe_mmio_rmw32(__compat_uncore_to_gt(uncore), reg, clear, set); +} + +static inline int intel_wait_for_register(struct intel_uncore *uncore, + i915_reg_t i915_reg, u32 mask, + u32 value, unsigned int timeout) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value, + timeout * USEC_PER_MSEC, NULL, false); +} + +static inline int intel_wait_for_register_fw(struct intel_uncore *uncore, + i915_reg_t i915_reg, u32 mask, + u32 value, unsigned int timeout) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value, + timeout * USEC_PER_MSEC, NULL, false); +} + +static inline int +__intel_wait_for_register(struct intel_uncore *uncore, i915_reg_t i915_reg, + u32 mask, u32 value, unsigned int fast_timeout_us, + unsigned int slow_timeout_ms, u32 *out_value) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value, + fast_timeout_us + 1000 * slow_timeout_ms, + out_value, false); +} + +static inline u32 intel_uncore_read_fw(struct intel_uncore *uncore, + i915_reg_t i915_reg) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg); +} + +static inline void intel_uncore_write_fw(struct intel_uncore *uncore, + i915_reg_t i915_reg, u32 val) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val); +} + +static inline u32 intel_uncore_read_notrace(struct intel_uncore *uncore, + i915_reg_t i915_reg) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg); +} + +static inline void intel_uncore_write_notrace(struct intel_uncore *uncore, + i915_reg_t i915_reg, u32 val) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val); +} + +static inline void __iomem *intel_uncore_regs(struct intel_uncore *uncore) +{ + struct xe_device *xe = container_of(uncore, struct xe_device, uncore); + + return xe_device_get_root_tile(xe)->mmio.regs; +} + +/* + * The raw_reg_{read,write} macros are intended as a micro-optimization for + * interrupt handlers so that the pointer indirection on uncore->regs can + * be computed once (and presumably cached in a register) instead of generating + * extra load instructions for each MMIO access. + * + * Given that these macros are only intended for non-GSI interrupt registers + * (and the goal is to avoid extra instructions generated by the compiler), + * these macros do not account for uncore->gsi_offset. Any caller that needs + * to use these macros on a GSI register is responsible for adding the + * appropriate GSI offset to the 'base' parameter. + */ +#define raw_reg_read(base, reg) \ + readl(base + i915_mmio_reg_offset(reg)) +#define raw_reg_write(base, reg, value) \ + writel(value, base + i915_mmio_reg_offset(reg)) + +#endif /* __INTEL_UNCORE_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h new file mode 100644 index 000000000000..ecb1c0707706 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include <linux/types.h> + +typedef unsigned long intel_wakeref_t; diff --git a/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h b/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h new file mode 100644 index 000000000000..c2c30ece8f77 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __INTEL_PXP_H__ +#define __INTEL_PXP_H__ + +#include <linux/errno.h> +#include <linux/types.h> + +struct drm_i915_gem_object; +struct intel_pxp; + +static inline int intel_pxp_key_check(struct intel_pxp *pxp, + struct drm_i915_gem_object *obj, + bool assign) +{ + return -ENODEV; +} + +static inline bool +i915_gem_object_is_protected(const struct drm_i915_gem_object *obj) +{ + return false; +} + +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_dram.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_dram.h new file mode 100644 index 000000000000..65707e20c557 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_dram.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../../i915/soc/intel_dram.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_gmch.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_gmch.h new file mode 100644 index 000000000000..33c5257b3a71 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_gmch.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../../i915/soc/intel_gmch.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h new file mode 100644 index 000000000000..9c46556d33a4 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../../i915/soc/intel_pch.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h new file mode 100644 index 000000000000..ec6f12de5727 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2013-2021 Intel Corporation + */ + +#ifndef _VLV_SIDEBAND_H_ +#define _VLV_SIDEBAND_H_ + +#include <linux/types.h> + +#include "vlv_sideband_reg.h" + +enum pipe; +struct drm_i915_private; + +enum { + VLV_IOSF_SB_BUNIT, + VLV_IOSF_SB_CCK, + VLV_IOSF_SB_CCU, + VLV_IOSF_SB_DPIO, + VLV_IOSF_SB_FLISDSI, + VLV_IOSF_SB_GPIO, + VLV_IOSF_SB_NC, + VLV_IOSF_SB_PUNIT, +}; + +static inline void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports) +{ +} +static inline u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg) +{ + return 0; +} +static inline void vlv_iosf_sb_write(struct drm_i915_private *i915, + u8 port, u32 reg, u32 val) +{ +} +static inline void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports) +{ +} +static inline void vlv_bunit_get(struct drm_i915_private *i915) +{ +} +static inline u32 vlv_bunit_read(struct drm_i915_private *i915, u32 reg) +{ + return 0; +} +static inline void vlv_bunit_write(struct drm_i915_private *i915, u32 reg, u32 val) +{ +} +static inline void vlv_bunit_put(struct drm_i915_private *i915) +{ +} +static inline void vlv_cck_get(struct drm_i915_private *i915) +{ +} +static inline u32 vlv_cck_read(struct drm_i915_private *i915, u32 reg) +{ + return 0; +} +static inline void vlv_cck_write(struct drm_i915_private *i915, u32 reg, u32 val) +{ +} +static inline void vlv_cck_put(struct drm_i915_private *i915) +{ +} +static inline void vlv_ccu_get(struct drm_i915_private *i915) +{ +} +static inline u32 vlv_ccu_read(struct drm_i915_private *i915, u32 reg) +{ + return 0; +} +static inline void vlv_ccu_write(struct drm_i915_private *i915, u32 reg, u32 val) +{ +} +static inline void vlv_ccu_put(struct drm_i915_private *i915) +{ +} +static inline void vlv_dpio_get(struct drm_i915_private *i915) +{ +} +static inline u32 vlv_dpio_read(struct drm_i915_private *i915, int pipe, int reg) +{ + return 0; +} +static inline void vlv_dpio_write(struct drm_i915_private *i915, + int pipe, int reg, u32 val) +{ +} +static inline void vlv_dpio_put(struct drm_i915_private *i915) +{ +} +static inline void vlv_flisdsi_get(struct drm_i915_private *i915) +{ +} +static inline u32 vlv_flisdsi_read(struct drm_i915_private *i915, u32 reg) +{ + return 0; +} +static inline void vlv_flisdsi_write(struct drm_i915_private *i915, u32 reg, u32 val) +{ +} +static inline void vlv_flisdsi_put(struct drm_i915_private *i915) +{ +} +static inline void vlv_nc_get(struct drm_i915_private *i915) +{ +} +static inline u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr) +{ + return 0; +} +static inline void vlv_nc_put(struct drm_i915_private *i915) +{ +} +static inline void vlv_punit_get(struct drm_i915_private *i915) +{ +} +static inline u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr) +{ + return 0; +} +static inline int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val) +{ + return 0; +} +static inline void vlv_punit_put(struct drm_i915_private *i915) +{ +} + +#endif /* _VLV_SIDEBAND_H_ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h new file mode 100644 index 000000000000..949f134ce3cf --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../i915/vlv_sideband_reg.h" diff --git a/drivers/gpu/drm/xe/display/ext/i915_irq.c b/drivers/gpu/drm/xe/display/ext/i915_irq.c new file mode 100644 index 000000000000..bee191a4a97d --- /dev/null +++ b/drivers/gpu/drm/xe/display/ext/i915_irq.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_irq.h" +#include "i915_reg.h" +#include "intel_uncore.h" + +void gen3_irq_reset(struct intel_uncore *uncore, i915_reg_t imr, + i915_reg_t iir, i915_reg_t ier) +{ + intel_uncore_write(uncore, imr, 0xffffffff); + intel_uncore_posting_read(uncore, imr); + + intel_uncore_write(uncore, ier, 0); + + /* IIR can theoretically queue up two events. Be paranoid. */ + intel_uncore_write(uncore, iir, 0xffffffff); + intel_uncore_posting_read(uncore, iir); + intel_uncore_write(uncore, iir, 0xffffffff); + intel_uncore_posting_read(uncore, iir); +} + +/* + * We should clear IMR at preinstall/uninstall, and just check at postinstall. + */ +void gen3_assert_iir_is_zero(struct intel_uncore *uncore, i915_reg_t reg) +{ + struct xe_device *xe = container_of(uncore, struct xe_device, uncore); + u32 val = intel_uncore_read(uncore, reg); + + if (val == 0) + return; + + drm_WARN(&xe->drm, 1, + "Interrupt register 0x%x is not zero: 0x%08x\n", + i915_mmio_reg_offset(reg), val); + intel_uncore_write(uncore, reg, 0xffffffff); + intel_uncore_posting_read(uncore, reg); + intel_uncore_write(uncore, reg, 0xffffffff); + intel_uncore_posting_read(uncore, reg); +} + +void gen3_irq_init(struct intel_uncore *uncore, + i915_reg_t imr, u32 imr_val, + i915_reg_t ier, u32 ier_val, + i915_reg_t iir) +{ + gen3_assert_iir_is_zero(uncore, iir); + + intel_uncore_write(uncore, ier, ier_val); + intel_uncore_write(uncore, imr, imr_val); + intel_uncore_posting_read(uncore, imr); +} + +bool intel_irqs_enabled(struct xe_device *xe) +{ + /* + * XXX: i915 has a racy handling of the irq.enabled, since it doesn't + * lock its transitions. Because of that, the irq.enabled sometimes + * is not read with the irq.lock in place. + * However, the most critical cases like vblank and page flips are + * properly using the locks. + * We cannot take the lock in here or run any kind of assert because + * of i915 inconsistency. + * But at this point the xe irq is better protected against races, + * although the full solution would be protecting the i915 side. + */ + return xe->irq.enabled; +} + +void intel_synchronize_irq(struct xe_device *xe) +{ + synchronize_irq(to_pci_dev(xe->drm.dev)->irq); +} diff --git a/drivers/gpu/drm/xe/display/ext/i915_utils.c b/drivers/gpu/drm/xe/display/ext/i915_utils.c new file mode 100644 index 000000000000..43b10a2cc508 --- /dev/null +++ b/drivers/gpu/drm/xe/display/ext/i915_utils.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "i915_drv.h" + +bool i915_vtd_active(struct drm_i915_private *i915) +{ + if (device_iommu_mapped(i915->drm.dev)) + return true; + + /* Running as a guest, we assume the host is enforcing VT'd */ + return i915_run_as_guest(); +} + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) + +/* i915 specific, just put here for shutting it up */ +int __i915_inject_probe_error(struct drm_i915_private *i915, int err, + const char *func, int line) +{ + return 0; +} + +#endif diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c new file mode 100644 index 000000000000..b21da7b745a5 --- /dev/null +++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#include <drm/drm_modeset_helper.h> + +#include "i915_drv.h" +#include "intel_display_types.h" +#include "intel_fb_bo.h" + +void intel_fb_bo_framebuffer_fini(struct xe_bo *bo) +{ + if (bo->flags & XE_BO_CREATE_PINNED_BIT) { + /* Unpin our kernel fb first */ + xe_bo_lock(bo, false); + xe_bo_unpin(bo); + xe_bo_unlock(bo); + } + xe_bo_put(bo); +} + +int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, + struct xe_bo *bo, + struct drm_mode_fb_cmd2 *mode_cmd) +{ + struct drm_i915_private *i915 = to_i915(bo->ttm.base.dev); + int ret; + + xe_bo_get(bo); + + ret = ttm_bo_reserve(&bo->ttm, true, false, NULL); + if (ret) + return ret; + + if (!(bo->flags & XE_BO_SCANOUT_BIT)) { + /* + * XE_BO_SCANOUT_BIT should ideally be set at creation, or is + * automatically set when creating FB. We cannot change caching + * mode when the boect is VM_BINDed, so we can only set + * coherency with display when unbound. + */ + if (XE_IOCTL_DBG(i915, !list_empty(&bo->ttm.base.gpuva.list))) { + ttm_bo_unreserve(&bo->ttm); + return -EINVAL; + } + bo->flags |= XE_BO_SCANOUT_BIT; + } + ttm_bo_unreserve(&bo->ttm); + + return ret; +} + +struct xe_bo *intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915, + struct drm_file *filp, + const struct drm_mode_fb_cmd2 *mode_cmd) +{ + struct drm_i915_gem_object *bo; + struct drm_gem_object *gem = drm_gem_object_lookup(filp, mode_cmd->handles[0]); + + if (!gem) + return ERR_PTR(-ENOENT); + + bo = gem_to_xe_bo(gem); + /* Require vram placement or dma-buf import */ + if (IS_DGFX(i915) && + !xe_bo_can_migrate(gem_to_xe_bo(gem), XE_PL_VRAM0) && + bo->ttm.type != ttm_bo_type_sg) { + drm_gem_object_put(gem); + return ERR_PTR(-EREMOTE); + } + + return bo; +} diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.h b/drivers/gpu/drm/xe/display/intel_fb_bo.h new file mode 100644 index 000000000000..5d365b925b7a --- /dev/null +++ b/drivers/gpu/drm/xe/display/intel_fb_bo.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __INTEL_FB_BO_H__ +#define __INTEL_FB_BO_H__ + +struct drm_file; +struct drm_mode_fb_cmd2; +struct drm_i915_private; +struct intel_framebuffer; +struct xe_bo; + +void intel_fb_bo_framebuffer_fini(struct xe_bo *bo); +int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, + struct xe_bo *bo, + struct drm_mode_fb_cmd2 *mode_cmd); + +struct xe_bo *intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915, + struct drm_file *filp, + const struct drm_mode_fb_cmd2 *mode_cmd); + +#endif diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c new file mode 100644 index 000000000000..51ae3561fd0d --- /dev/null +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "intel_fbdev_fb.h" + +#include <drm/drm_fb_helper.h> + +#include "xe_gt.h" +#include "xe_ttm_stolen_mgr.h" + +#include "i915_drv.h" +#include "intel_display_types.h" + +struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, + struct drm_fb_helper_surface_size *sizes) +{ + struct drm_framebuffer *fb; + struct drm_device *dev = helper->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_mode_fb_cmd2 mode_cmd = {}; + struct drm_i915_gem_object *obj; + int size; + + /* we don't do packed 24bpp */ + if (sizes->surface_bpp == 24) + sizes->surface_bpp = 32; + + mode_cmd.width = sizes->surface_width; + mode_cmd.height = sizes->surface_height; + + mode_cmd.pitches[0] = ALIGN(mode_cmd.width * + DIV_ROUND_UP(sizes->surface_bpp, 8), XE_PAGE_SIZE); + mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, + sizes->surface_depth); + + size = mode_cmd.pitches[0] * mode_cmd.height; + size = PAGE_ALIGN(size); + obj = ERR_PTR(-ENODEV); + + if (!IS_DGFX(dev_priv)) { + obj = xe_bo_create_pin_map(dev_priv, xe_device_get_root_tile(dev_priv), + NULL, size, + ttm_bo_type_kernel, XE_BO_SCANOUT_BIT | + XE_BO_CREATE_STOLEN_BIT | + XE_BO_CREATE_PINNED_BIT); + if (!IS_ERR(obj)) + drm_info(&dev_priv->drm, "Allocated fbdev into stolen\n"); + else + drm_info(&dev_priv->drm, "Allocated fbdev into stolen failed: %li\n", PTR_ERR(obj)); + } + if (IS_ERR(obj)) { + obj = xe_bo_create_pin_map(dev_priv, xe_device_get_root_tile(dev_priv), NULL, size, + ttm_bo_type_kernel, XE_BO_SCANOUT_BIT | + XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(dev_priv)) | + XE_BO_CREATE_PINNED_BIT); + } + + if (IS_ERR(obj)) { + drm_err(&dev_priv->drm, "failed to allocate framebuffer (%pe)\n", obj); + fb = ERR_PTR(-ENOMEM); + goto err; + } + + fb = intel_framebuffer_create(obj, &mode_cmd); + if (IS_ERR(fb)) { + xe_bo_unpin_map_no_vm(obj); + goto err; + } + + drm_gem_object_put(intel_bo_to_drm_bo(obj)); + return fb; + +err: + return fb; +} + +int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info, + struct drm_i915_gem_object *obj, struct i915_vma *vma) +{ + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + + if (!(obj->flags & XE_BO_CREATE_SYSTEM_BIT)) { + if (obj->flags & XE_BO_CREATE_STOLEN_BIT) + info->fix.smem_start = xe_ttm_stolen_io_offset(obj, 0); + else + info->fix.smem_start = + pci_resource_start(pdev, 2) + + xe_bo_addr(obj, 0, XE_PAGE_SIZE); + + info->fix.smem_len = obj->ttm.base.size; + } else { + /* XXX: Pure fiction, as the BO may not be physically accessible.. */ + info->fix.smem_start = 0; + info->fix.smem_len = obj->ttm.base.size; + } + XE_WARN_ON(iosys_map_is_null(&obj->vmap)); + + info->screen_base = obj->vmap.vaddr_iomem; + info->screen_size = intel_bo_to_drm_bo(obj)->size; + + return 0; +} diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.h b/drivers/gpu/drm/xe/display/intel_fbdev_fb.h new file mode 100644 index 000000000000..ea186772e0bb --- /dev/null +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __INTEL_FBDEV_FB_H__ +#define __INTEL_FBDEV_FB_H__ + +struct drm_fb_helper; +struct drm_fb_helper_surface_size; +struct drm_i915_gem_object; +struct drm_i915_private; +struct fb_info; +struct i915_vma; + +struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, + struct drm_fb_helper_surface_size *sizes); +int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info, + struct drm_i915_gem_object *obj, struct i915_vma *vma); + +#endif diff --git a/drivers/gpu/drm/xe/display/xe_display_misc.c b/drivers/gpu/drm/xe/display/xe_display_misc.c new file mode 100644 index 000000000000..242c2ef4ca93 --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_display_misc.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "intel_display_types.h" + +struct pci_dev; + +unsigned int intel_gmch_vga_set_decode(struct pci_dev *pdev, bool enable_decode); + +unsigned int intel_gmch_vga_set_decode(struct pci_dev *pdev, bool enable_decode) +{ + /* ToDo: Implement the actual handling of vga decode */ + return 0; +} diff --git a/drivers/gpu/drm/xe/display/xe_display_rps.c b/drivers/gpu/drm/xe/display/xe_display_rps.c new file mode 100644 index 000000000000..ab21c581c192 --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_display_rps.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "intel_display_rps.h" + +void intel_display_rps_boost_after_vblank(struct drm_crtc *crtc, + struct dma_fence *fence) +{ +} + +void intel_display_rps_mark_interactive(struct drm_i915_private *i915, + struct intel_atomic_state *state, + bool interactive) +{ +} diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c new file mode 100644 index 000000000000..27c2fb1c002a --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2023, Intel Corporation. + */ + +#include "i915_drv.h" +#include "i915_vma.h" +#include "intel_display_types.h" +#include "intel_dsb_buffer.h" +#include "xe_bo.h" +#include "xe_gt.h" + +u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) +{ + return xe_bo_ggtt_addr(dsb_buf->vma->bo); +} + +void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) +{ + iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val); +} + +u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) +{ + return iosys_map_rd(&dsb_buf->vma->bo->vmap, idx * 4, u32); +} + +void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) +{ + WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); + + iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size); +} + +bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size) +{ + struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + vma = kzalloc(sizeof(*vma), GFP_KERNEL); + if (!vma) + return false; + + obj = xe_bo_create_pin_map(i915, xe_device_get_root_tile(i915), + NULL, PAGE_ALIGN(size), + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(i915)) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(obj)) { + kfree(vma); + return false; + } + + vma->bo = obj; + dsb_buf->vma = vma; + dsb_buf->buf_size = size; + + return true; +} + +void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf) +{ + xe_bo_unpin_map_no_vm(dsb_buf->vma->bo); + kfree(dsb_buf->vma); +} + +void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf) +{ + /* TODO: add xe specific flush_map() for dsb buffer object. */ +} diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c new file mode 100644 index 000000000000..722c84a56607 --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -0,0 +1,384 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_display_types.h" +#include "intel_dpt.h" +#include "intel_fb.h" +#include "intel_fb_pin.h" +#include "xe_ggtt.h" +#include "xe_gt.h" + +#include <drm/ttm/ttm_bo.h> + +static void +write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ofs, + u32 width, u32 height, u32 src_stride, u32 dst_stride) +{ + struct xe_device *xe = xe_bo_device(bo); + struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; + u32 column, row; + + /* TODO: Maybe rewrite so we can traverse the bo addresses sequentially, + * by writing dpt/ggtt in a different order? + */ + + for (column = 0; column < width; column++) { + u32 src_idx = src_stride * (height - 1) + column + bo_ofs; + + for (row = 0; row < height; row++) { + u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, + xe->pat.idx[XE_CACHE_WB]); + + iosys_map_wr(map, *dpt_ofs, u64, pte); + *dpt_ofs += 8; + src_idx -= src_stride; + } + + /* The DE ignores the PTEs for the padding tiles */ + *dpt_ofs += (dst_stride - height) * 8; + } + + /* Align to next page */ + *dpt_ofs = ALIGN(*dpt_ofs, 4096); +} + +static void +write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, + u32 bo_ofs, u32 width, u32 height, u32 src_stride, + u32 dst_stride) +{ + struct xe_device *xe = xe_bo_device(bo); + struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; + u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index) + = ggtt->pt_ops->pte_encode_bo; + u32 column, row; + + for (row = 0; row < height; row++) { + u32 src_idx = src_stride * row + bo_ofs; + + for (column = 0; column < width; column++) { + iosys_map_wr(map, *dpt_ofs, u64, + pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, + xe->pat.idx[XE_CACHE_WB])); + + *dpt_ofs += 8; + src_idx++; + } + + /* The DE ignores the PTEs for the padding tiles */ + *dpt_ofs += (dst_stride - width) * 8; + } + + /* Align to next page */ + *dpt_ofs = ALIGN(*dpt_ofs, 4096); +} + +static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb, + const struct i915_gtt_view *view, + struct i915_vma *vma) +{ + struct xe_device *xe = to_xe_device(fb->base.dev); + struct xe_tile *tile0 = xe_device_get_root_tile(xe); + struct xe_ggtt *ggtt = tile0->mem.ggtt; + struct xe_bo *bo = intel_fb_obj(&fb->base), *dpt; + u32 dpt_size, size = bo->ttm.base.size; + + if (view->type == I915_GTT_VIEW_NORMAL) + dpt_size = ALIGN(size / XE_PAGE_SIZE * 8, XE_PAGE_SIZE); + else if (view->type == I915_GTT_VIEW_REMAPPED) + dpt_size = ALIGN(intel_remapped_info_size(&fb->remapped_view.gtt.remapped) * 8, + XE_PAGE_SIZE); + else + /* display uses 4K tiles instead of bytes here, convert to entries.. */ + dpt_size = ALIGN(intel_rotation_info_size(&view->rotated) * 8, + XE_PAGE_SIZE); + + if (IS_DGFX(xe)) + dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM0_BIT | + XE_BO_CREATE_GGTT_BIT); + else + dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, + ttm_bo_type_kernel, + XE_BO_CREATE_STOLEN_BIT | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(dpt)) + dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, + ttm_bo_type_kernel, + XE_BO_CREATE_SYSTEM_BIT | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(dpt)) + return PTR_ERR(dpt); + + if (view->type == I915_GTT_VIEW_NORMAL) { + u32 x; + + for (x = 0; x < size / XE_PAGE_SIZE; x++) { + u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x * XE_PAGE_SIZE, + xe->pat.idx[XE_CACHE_WB]); + + iosys_map_wr(&dpt->vmap, x * 8, u64, pte); + } + } else if (view->type == I915_GTT_VIEW_REMAPPED) { + const struct intel_remapped_info *remap_info = &view->remapped; + u32 i, dpt_ofs = 0; + + for (i = 0; i < ARRAY_SIZE(remap_info->plane); i++) + write_dpt_remapped(bo, &dpt->vmap, &dpt_ofs, + remap_info->plane[i].offset, + remap_info->plane[i].width, + remap_info->plane[i].height, + remap_info->plane[i].src_stride, + remap_info->plane[i].dst_stride); + + } else { + const struct intel_rotation_info *rot_info = &view->rotated; + u32 i, dpt_ofs = 0; + + for (i = 0; i < ARRAY_SIZE(rot_info->plane); i++) + write_dpt_rotated(bo, &dpt->vmap, &dpt_ofs, + rot_info->plane[i].offset, + rot_info->plane[i].width, + rot_info->plane[i].height, + rot_info->plane[i].src_stride, + rot_info->plane[i].dst_stride); + } + + vma->dpt = dpt; + vma->node = dpt->ggtt_node; + return 0; +} + +static void +write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo_ofs, + u32 width, u32 height, u32 src_stride, u32 dst_stride) +{ + struct xe_device *xe = xe_bo_device(bo); + u32 column, row; + + for (column = 0; column < width; column++) { + u32 src_idx = src_stride * (height - 1) + column + bo_ofs; + + for (row = 0; row < height; row++) { + u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, + xe->pat.idx[XE_CACHE_WB]); + + xe_ggtt_set_pte(ggtt, *ggtt_ofs, pte); + *ggtt_ofs += XE_PAGE_SIZE; + src_idx -= src_stride; + } + + /* The DE ignores the PTEs for the padding tiles */ + *ggtt_ofs += (dst_stride - height) * XE_PAGE_SIZE; + } +} + +static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb, + const struct i915_gtt_view *view, + struct i915_vma *vma) +{ + struct xe_bo *bo = intel_fb_obj(&fb->base); + struct xe_device *xe = to_xe_device(fb->base.dev); + struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; + u32 align; + int ret; + + /* TODO: Consider sharing framebuffer mapping? + * embed i915_vma inside intel_framebuffer + */ + xe_device_mem_access_get(tile_to_xe(ggtt->tile)); + ret = mutex_lock_interruptible(&ggtt->lock); + if (ret) + goto out; + + align = XE_PAGE_SIZE; + if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) + align = max_t(u32, align, SZ_64K); + + if (bo->ggtt_node.size && view->type == I915_GTT_VIEW_NORMAL) { + vma->node = bo->ggtt_node; + } else if (view->type == I915_GTT_VIEW_NORMAL) { + u32 x, size = bo->ttm.base.size; + + ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size, + align, 0); + if (ret) + goto out_unlock; + + for (x = 0; x < size; x += XE_PAGE_SIZE) { + u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x, + xe->pat.idx[XE_CACHE_WB]); + + xe_ggtt_set_pte(ggtt, vma->node.start + x, pte); + } + } else { + u32 i, ggtt_ofs; + const struct intel_rotation_info *rot_info = &view->rotated; + + /* display seems to use tiles instead of bytes here, so convert it back.. */ + u32 size = intel_rotation_info_size(rot_info) * XE_PAGE_SIZE; + + ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size, + align, 0); + if (ret) + goto out_unlock; + + ggtt_ofs = vma->node.start; + + for (i = 0; i < ARRAY_SIZE(rot_info->plane); i++) + write_ggtt_rotated(bo, ggtt, &ggtt_ofs, + rot_info->plane[i].offset, + rot_info->plane[i].width, + rot_info->plane[i].height, + rot_info->plane[i].src_stride, + rot_info->plane[i].dst_stride); + } + + xe_ggtt_invalidate(ggtt); +out_unlock: + mutex_unlock(&ggtt->lock); +out: + xe_device_mem_access_put(tile_to_xe(ggtt->tile)); + return ret; +} + +static struct i915_vma *__xe_pin_fb_vma(struct intel_framebuffer *fb, + const struct i915_gtt_view *view) +{ + struct drm_device *dev = fb->base.dev; + struct xe_device *xe = to_xe_device(dev); + struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); + struct xe_bo *bo = intel_fb_obj(&fb->base); + int ret; + + if (!vma) + return ERR_PTR(-ENODEV); + + if (IS_DGFX(to_xe_device(bo->ttm.base.dev)) && + intel_fb_rc_ccs_cc_plane(&fb->base) >= 0 && + !(bo->flags & XE_BO_NEEDS_CPU_ACCESS)) { + struct xe_tile *tile = xe_device_get_root_tile(xe); + + /* + * If we need to able to access the clear-color value stored in + * the buffer, then we require that such buffers are also CPU + * accessible. This is important on small-bar systems where + * only some subset of VRAM is CPU accessible. + */ + if (tile->mem.vram.io_size < tile->mem.vram.usable_size) { + ret = -EINVAL; + goto err; + } + } + + /* + * Pin the framebuffer, we can't use xe_bo_(un)pin functions as the + * assumptions are incorrect for framebuffers + */ + ret = ttm_bo_reserve(&bo->ttm, false, false, NULL); + if (ret) + goto err; + + if (IS_DGFX(xe)) + ret = xe_bo_migrate(bo, XE_PL_VRAM0); + else + ret = xe_bo_validate(bo, NULL, true); + if (!ret) + ttm_bo_pin(&bo->ttm); + ttm_bo_unreserve(&bo->ttm); + if (ret) + goto err; + + vma->bo = bo; + if (intel_fb_uses_dpt(&fb->base)) + ret = __xe_pin_fb_vma_dpt(fb, view, vma); + else + ret = __xe_pin_fb_vma_ggtt(fb, view, vma); + if (ret) + goto err_unpin; + + return vma; + +err_unpin: + ttm_bo_reserve(&bo->ttm, false, false, NULL); + ttm_bo_unpin(&bo->ttm); + ttm_bo_unreserve(&bo->ttm); +err: + kfree(vma); + return ERR_PTR(ret); +} + +static void __xe_unpin_fb_vma(struct i915_vma *vma) +{ + struct xe_device *xe = to_xe_device(vma->bo->ttm.base.dev); + struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; + + if (vma->dpt) + xe_bo_unpin_map_no_vm(vma->dpt); + else if (!drm_mm_node_allocated(&vma->bo->ggtt_node) || + vma->bo->ggtt_node.start != vma->node.start) + xe_ggtt_remove_node(ggtt, &vma->node); + + ttm_bo_reserve(&vma->bo->ttm, false, false, NULL); + ttm_bo_unpin(&vma->bo->ttm); + ttm_bo_unreserve(&vma->bo->ttm); + kfree(vma); +} + +struct i915_vma * +intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, + bool phys_cursor, + const struct i915_gtt_view *view, + bool uses_fence, + unsigned long *out_flags) +{ + *out_flags = 0; + + return __xe_pin_fb_vma(to_intel_framebuffer(fb), view); +} + +void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags) +{ + __xe_unpin_fb_vma(vma); +} + +int intel_plane_pin_fb(struct intel_plane_state *plane_state) +{ + struct drm_framebuffer *fb = plane_state->hw.fb; + struct xe_bo *bo = intel_fb_obj(fb); + struct i915_vma *vma; + + /* We reject creating !SCANOUT fb's, so this is weird.. */ + drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_SCANOUT_BIT)); + + vma = __xe_pin_fb_vma(to_intel_framebuffer(fb), &plane_state->view.gtt); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + plane_state->ggtt_vma = vma; + return 0; +} + +void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) +{ + __xe_unpin_fb_vma(old_plane_state->ggtt_vma); + old_plane_state->ggtt_vma = NULL; +} + +/* + * For Xe introduce dummy intel_dpt_create which just return NULL and + * intel_dpt_destroy which does nothing. + */ +struct i915_address_space *intel_dpt_create(struct intel_framebuffer *fb) +{ + return NULL; +} + +void intel_dpt_destroy(struct i915_address_space *vm) +{ + return; +}
\ No newline at end of file diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c new file mode 100644 index 000000000000..0f11a39333e2 --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2023, Intel Corporation. + */ + +#include "i915_drv.h" +#include "intel_hdcp_gsc.h" + +bool intel_hdcp_gsc_cs_required(struct drm_i915_private *i915) +{ + return true; +} + +bool intel_hdcp_gsc_check_status(struct drm_i915_private *i915) +{ + return false; +} + +int intel_hdcp_gsc_init(struct drm_i915_private *i915) +{ + drm_info(&i915->drm, "HDCP support not yet implemented\n"); + return -ENODEV; +} + +void intel_hdcp_gsc_fini(struct drm_i915_private *i915) +{ +} + +ssize_t intel_hdcp_gsc_msg_send(struct drm_i915_private *i915, u8 *msg_in, + size_t msg_in_len, u8 *msg_out, + size_t msg_out_len) +{ + return -ENODEV; +} diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c new file mode 100644 index 000000000000..ccf83c12b545 --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +/* for ioread64 */ +#include <linux/io-64-nonatomic-lo-hi.h> + +#include "xe_ggtt.h" + +#include "i915_drv.h" +#include "intel_atomic_plane.h" +#include "intel_display.h" +#include "intel_display_types.h" +#include "intel_fb.h" +#include "intel_fb_pin.h" +#include "intel_frontbuffer.h" +#include "intel_plane_initial.h" + +static bool +intel_reuse_initial_plane_obj(struct drm_i915_private *i915, + const struct intel_initial_plane_config *plane_config, + struct drm_framebuffer **fb) +{ + struct intel_crtc *crtc; + + for_each_intel_crtc(&i915->drm, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane *plane = + to_intel_plane(crtc->base.primary); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + + if (!crtc_state->uapi.active) + continue; + + if (!plane_state->ggtt_vma) + continue; + + if (intel_plane_ggtt_offset(plane_state) == plane_config->base) { + *fb = plane_state->hw.fb; + return true; + } + } + + return false; +} + +static struct xe_bo * +initial_plane_bo(struct xe_device *xe, + struct intel_initial_plane_config *plane_config) +{ + struct xe_tile *tile0 = xe_device_get_root_tile(xe); + struct xe_bo *bo; + resource_size_t phys_base; + u32 base, size, flags; + u64 page_size = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; + + if (plane_config->size == 0) + return NULL; + + flags = XE_BO_CREATE_PINNED_BIT | XE_BO_SCANOUT_BIT | XE_BO_CREATE_GGTT_BIT; + + base = round_down(plane_config->base, page_size); + if (IS_DGFX(xe)) { + u64 __iomem *gte = tile0->mem.ggtt->gsm; + u64 pte; + + gte += base / XE_PAGE_SIZE; + + pte = ioread64(gte); + if (!(pte & XE_GGTT_PTE_DM)) { + drm_err(&xe->drm, + "Initial plane programming missing DM bit\n"); + return NULL; + } + + phys_base = pte & ~(page_size - 1); + flags |= XE_BO_CREATE_VRAM0_BIT; + + /* + * We don't currently expect this to ever be placed in the + * stolen portion. + */ + if (phys_base >= tile0->mem.vram.usable_size) { + drm_err(&xe->drm, + "Initial plane programming using invalid range, phys_base=%pa\n", + &phys_base); + return NULL; + } + + drm_dbg(&xe->drm, + "Using phys_base=%pa, based on initial plane programming\n", + &phys_base); + } else { + struct ttm_resource_manager *stolen = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); + + if (!stolen) + return NULL; + phys_base = base; + flags |= XE_BO_CREATE_STOLEN_BIT; + + /* + * If the FB is too big, just don't use it since fbdev is not very + * important and we should probably use that space with FBC or other + * features. + */ + if (IS_ENABLED(CONFIG_FRAMEBUFFER_CONSOLE) && + plane_config->size * 2 >> PAGE_SHIFT >= stolen->size) + return NULL; + } + + size = round_up(plane_config->base + plane_config->size, + page_size); + size -= base; + + bo = xe_bo_create_pin_map_at(xe, tile0, NULL, size, phys_base, + ttm_bo_type_kernel, flags); + if (IS_ERR(bo)) { + drm_dbg(&xe->drm, + "Failed to create bo phys_base=%pa size %u with flags %x: %li\n", + &phys_base, size, flags, PTR_ERR(bo)); + return NULL; + } + + return bo; +} + +static bool +intel_alloc_initial_plane_obj(struct intel_crtc *crtc, + struct intel_initial_plane_config *plane_config) +{ + struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_mode_fb_cmd2 mode_cmd = { 0 }; + struct drm_framebuffer *fb = &plane_config->fb->base; + struct xe_bo *bo; + + switch (fb->modifier) { + case DRM_FORMAT_MOD_LINEAR: + case I915_FORMAT_MOD_X_TILED: + case I915_FORMAT_MOD_Y_TILED: + case I915_FORMAT_MOD_4_TILED: + break; + default: + drm_dbg(&dev_priv->drm, + "Unsupported modifier for initial FB: 0x%llx\n", + fb->modifier); + return false; + } + + mode_cmd.pixel_format = fb->format->format; + mode_cmd.width = fb->width; + mode_cmd.height = fb->height; + mode_cmd.pitches[0] = fb->pitches[0]; + mode_cmd.modifier[0] = fb->modifier; + mode_cmd.flags = DRM_MODE_FB_MODIFIERS; + + bo = initial_plane_bo(dev_priv, plane_config); + if (!bo) + return false; + + if (intel_framebuffer_init(to_intel_framebuffer(fb), + bo, &mode_cmd)) { + drm_dbg_kms(&dev_priv->drm, "intel fb init failed\n"); + goto err_bo; + } + /* Reference handed over to fb */ + xe_bo_put(bo); + + return true; + +err_bo: + xe_bo_unpin_map_no_vm(bo); + return false; +} + +static void +intel_find_initial_plane_obj(struct intel_crtc *crtc, + struct intel_initial_plane_config *plane_config) +{ + struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_plane *plane = + to_intel_plane(crtc->base.primary); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct drm_framebuffer *fb; + struct i915_vma *vma; + + /* + * TODO: + * Disable planes if get_initial_plane_config() failed. + * Make sure things work if the surface base is not page aligned. + */ + if (!plane_config->fb) + return; + + if (intel_alloc_initial_plane_obj(crtc, plane_config)) + fb = &plane_config->fb->base; + else if (!intel_reuse_initial_plane_obj(dev_priv, plane_config, &fb)) + goto nofb; + + plane_state->uapi.rotation = plane_config->rotation; + intel_fb_fill_view(to_intel_framebuffer(fb), + plane_state->uapi.rotation, &plane_state->view); + + vma = intel_pin_and_fence_fb_obj(fb, false, &plane_state->view.gtt, + false, &plane_state->flags); + if (IS_ERR(vma)) + goto nofb; + + plane_state->ggtt_vma = vma; + plane_state->uapi.src_x = 0; + plane_state->uapi.src_y = 0; + plane_state->uapi.src_w = fb->width << 16; + plane_state->uapi.src_h = fb->height << 16; + + plane_state->uapi.crtc_x = 0; + plane_state->uapi.crtc_y = 0; + plane_state->uapi.crtc_w = fb->width; + plane_state->uapi.crtc_h = fb->height; + + plane_state->uapi.fb = fb; + drm_framebuffer_get(fb); + + plane_state->uapi.crtc = &crtc->base; + intel_plane_copy_uapi_to_hw_state(plane_state, plane_state, crtc); + + atomic_or(plane->frontbuffer_bit, &to_intel_frontbuffer(fb)->bits); + + plane_config->vma = vma; + + /* + * Flip to the newly created mapping ASAP, so we can re-use the + * first part of GGTT for WOPCM, prevent flickering, and prevent + * the lookup of sysmem scratch pages. + */ + plane->check_plane(crtc_state, plane_state); + plane->async_flip(plane, crtc_state, plane_state, true); + return; + +nofb: + /* + * We've failed to reconstruct the BIOS FB. Current display state + * indicates that the primary plane is visible, but has a NULL FB, + * which will lead to problems later if we don't fix it up. The + * simplest solution is to just disable the primary plane now and + * pretend the BIOS never had it enabled. + */ + intel_plane_disable_noatomic(crtc, plane); +} + +static void plane_config_fini(struct intel_initial_plane_config *plane_config) +{ + if (plane_config->fb) { + struct drm_framebuffer *fb = &plane_config->fb->base; + + /* We may only have the stub and not a full framebuffer */ + if (drm_framebuffer_read_refcount(fb)) + drm_framebuffer_put(fb); + else + kfree(fb); + } +} + +void intel_crtc_initial_plane_config(struct intel_crtc *crtc) +{ + struct xe_device *xe = to_xe_device(crtc->base.dev); + struct intel_initial_plane_config plane_config = {}; + + /* + * Note that reserving the BIOS fb up front prevents us + * from stuffing other stolen allocations like the ring + * on top. This prevents some ugliness at boot time, and + * can even allow for smooth boot transitions if the BIOS + * fb is large enough for the active pipe configuration. + */ + xe->display.funcs.display->get_initial_plane_config(crtc, &plane_config); + + /* + * If the fb is shared between multiple heads, we'll + * just get the first one. + */ + intel_find_initial_plane_obj(crtc, &plane_config); + + plane_config_fini(&plane_config); +} diff --git a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h new file mode 100644 index 000000000000..8e6dd061f2ae --- /dev/null +++ b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h @@ -0,0 +1,160 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GFXPIPE_COMMANDS_H_ +#define _XE_GFXPIPE_COMMANDS_H_ + +#include "instructions/xe_instr_defs.h" + +#define GFXPIPE_PIPELINE REG_GENMASK(28, 27) +#define PIPELINE_COMMON REG_FIELD_PREP(GFXPIPE_PIPELINE, 0x0) +#define PIPELINE_SINGLE_DW REG_FIELD_PREP(GFXPIPE_PIPELINE, 0x1) +#define PIPELINE_COMPUTE REG_FIELD_PREP(GFXPIPE_PIPELINE, 0x2) +#define PIPELINE_3D REG_FIELD_PREP(GFXPIPE_PIPELINE, 0x3) + +#define GFXPIPE_OPCODE REG_GENMASK(26, 24) +#define GFXPIPE_SUBOPCODE REG_GENMASK(23, 16) + +#define GFXPIPE_MATCH_MASK (XE_INSTR_CMD_TYPE | \ + GFXPIPE_PIPELINE | \ + GFXPIPE_OPCODE | \ + GFXPIPE_SUBOPCODE) + +#define GFXPIPE_COMMON_CMD(opcode, subopcode) \ + (XE_INSTR_GFXPIPE | PIPELINE_COMMON | \ + REG_FIELD_PREP(GFXPIPE_OPCODE, opcode) | \ + REG_FIELD_PREP(GFXPIPE_SUBOPCODE, subopcode)) + +#define GFXPIPE_SINGLE_DW_CMD(opcode, subopcode) \ + (XE_INSTR_GFXPIPE | PIPELINE_SINGLE_DW | \ + REG_FIELD_PREP(GFXPIPE_OPCODE, opcode) | \ + REG_FIELD_PREP(GFXPIPE_SUBOPCODE, subopcode)) + +#define GFXPIPE_3D_CMD(opcode, subopcode) \ + (XE_INSTR_GFXPIPE | PIPELINE_3D | \ + REG_FIELD_PREP(GFXPIPE_OPCODE, opcode) | \ + REG_FIELD_PREP(GFXPIPE_SUBOPCODE, subopcode)) + +#define GFXPIPE_COMPUTE_CMD(opcode, subopcode) \ + (XE_INSTR_GFXPIPE | PIPELINE_COMPUTE | \ + REG_FIELD_PREP(GFXPIPE_OPCODE, opcode) | \ + REG_FIELD_PREP(GFXPIPE_SUBOPCODE, subopcode)) + +#define STATE_BASE_ADDRESS GFXPIPE_COMMON_CMD(0x1, 0x1) +#define STATE_SIP GFXPIPE_COMMON_CMD(0x1, 0x2) +#define GPGPU_CSR_BASE_ADDRESS GFXPIPE_COMMON_CMD(0x1, 0x4) +#define STATE_COMPUTE_MODE GFXPIPE_COMMON_CMD(0x1, 0x5) +#define CMD_3DSTATE_BTD GFXPIPE_COMMON_CMD(0x1, 0x6) + +#define CMD_3DSTATE_VF_STATISTICS GFXPIPE_SINGLE_DW_CMD(0x0, 0xB) + +#define PIPELINE_SELECT GFXPIPE_SINGLE_DW_CMD(0x1, 0x4) + +#define CMD_3DSTATE_DRAWING_RECTANGLE_FAST GFXPIPE_3D_CMD(0x0, 0x0) +#define CMD_3DSTATE_CLEAR_PARAMS GFXPIPE_3D_CMD(0x0, 0x4) +#define CMD_3DSTATE_DEPTH_BUFFER GFXPIPE_3D_CMD(0x0, 0x5) +#define CMD_3DSTATE_STENCIL_BUFFER GFXPIPE_3D_CMD(0x0, 0x6) +#define CMD_3DSTATE_HIER_DEPTH_BUFFER GFXPIPE_3D_CMD(0x0, 0x7) +#define CMD_3DSTATE_VERTEX_BUFFERS GFXPIPE_3D_CMD(0x0, 0x8) +#define CMD_3DSTATE_VERTEX_ELEMENTS GFXPIPE_3D_CMD(0x0, 0x9) +#define CMD_3DSTATE_INDEX_BUFFER GFXPIPE_3D_CMD(0x0, 0xA) +#define CMD_3DSTATE_VF GFXPIPE_3D_CMD(0x0, 0xC) +#define CMD_3DSTATE_MULTISAMPLE GFXPIPE_3D_CMD(0x0, 0xD) +#define CMD_3DSTATE_CC_STATE_POINTERS GFXPIPE_3D_CMD(0x0, 0xE) +#define CMD_3DSTATE_SCISSOR_STATE_POINTERS GFXPIPE_3D_CMD(0x0, 0xF) +#define CMD_3DSTATE_VS GFXPIPE_3D_CMD(0x0, 0x10) +#define CMD_3DSTATE_GS GFXPIPE_3D_CMD(0x0, 0x11) +#define CMD_3DSTATE_CLIP GFXPIPE_3D_CMD(0x0, 0x12) +#define CMD_3DSTATE_SF GFXPIPE_3D_CMD(0x0, 0x13) +#define CMD_3DSTATE_WM GFXPIPE_3D_CMD(0x0, 0x14) +#define CMD_3DSTATE_CONSTANT_VS GFXPIPE_3D_CMD(0x0, 0x15) +#define CMD_3DSTATE_CONSTANT_GS GFXPIPE_3D_CMD(0x0, 0x16) +#define CMD_3DSTATE_SAMPLE_MASK GFXPIPE_3D_CMD(0x0, 0x18) +#define CMD_3DSTATE_CONSTANT_HS GFXPIPE_3D_CMD(0x0, 0x19) +#define CMD_3DSTATE_CONSTANT_DS GFXPIPE_3D_CMD(0x0, 0x1A) +#define CMD_3DSTATE_HS GFXPIPE_3D_CMD(0x0, 0x1B) +#define CMD_3DSTATE_TE GFXPIPE_3D_CMD(0x0, 0x1C) +#define CMD_3DSTATE_DS GFXPIPE_3D_CMD(0x0, 0x1D) +#define CMD_3DSTATE_STREAMOUT GFXPIPE_3D_CMD(0x0, 0x1E) +#define CMD_3DSTATE_SBE GFXPIPE_3D_CMD(0x0, 0x1F) +#define CMD_3DSTATE_PS GFXPIPE_3D_CMD(0x0, 0x20) +#define CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP GFXPIPE_3D_CMD(0x0, 0x21) +#define CMD_3DSTATE_CPS_POINTERS GFXPIPE_3D_CMD(0x0, 0x22) +#define CMD_3DSTATE_VIEWPORT_STATE_POINTERS_CC GFXPIPE_3D_CMD(0x0, 0x23) +#define CMD_3DSTATE_BLEND_STATE_POINTERS GFXPIPE_3D_CMD(0x0, 0x24) +#define CMD_3DSTATE_BINDING_TABLE_POINTERS_VS GFXPIPE_3D_CMD(0x0, 0x26) +#define CMD_3DSTATE_BINDING_TABLE_POINTERS_HS GFXPIPE_3D_CMD(0x0, 0x27) +#define CMD_3DSTATE_BINDING_TABLE_POINTERS_DS GFXPIPE_3D_CMD(0x0, 0x28) +#define CMD_3DSTATE_BINDING_TABLE_POINTERS_GS GFXPIPE_3D_CMD(0x0, 0x29) +#define CMD_3DSTATE_BINDING_TABLE_POINTERS_PS GFXPIPE_3D_CMD(0x0, 0x2A) +#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS GFXPIPE_3D_CMD(0x0, 0x2B) +#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS GFXPIPE_3D_CMD(0x0, 0x2C) +#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS GFXPIPE_3D_CMD(0x0, 0x2D) +#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS GFXPIPE_3D_CMD(0x0, 0x2E) +#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_PS GFXPIPE_3D_CMD(0x0, 0x2F) +#define CMD_3DSTATE_VF_INSTANCING GFXPIPE_3D_CMD(0x0, 0x49) +#define CMD_3DSTATE_VF_SGVS GFXPIPE_3D_CMD(0x0, 0x4A) +#define CMD_3DSTATE_VF_TOPOLOGY GFXPIPE_3D_CMD(0x0, 0x4B) +#define CMD_3DSTATE_WM_CHROMAKEY GFXPIPE_3D_CMD(0x0, 0x4C) +#define CMD_3DSTATE_PS_BLEND GFXPIPE_3D_CMD(0x0, 0x4D) +#define CMD_3DSTATE_WM_DEPTH_STENCIL GFXPIPE_3D_CMD(0x0, 0x4E) +#define CMD_3DSTATE_PS_EXTRA GFXPIPE_3D_CMD(0x0, 0x4F) +#define CMD_3DSTATE_RASTER GFXPIPE_3D_CMD(0x0, 0x50) +#define CMD_3DSTATE_SBE_SWIZ GFXPIPE_3D_CMD(0x0, 0x51) +#define CMD_3DSTATE_WM_HZ_OP GFXPIPE_3D_CMD(0x0, 0x52) +#define CMD_3DSTATE_VF_COMPONENT_PACKING GFXPIPE_3D_CMD(0x0, 0x55) +#define CMD_3DSTATE_VF_SGVS_2 GFXPIPE_3D_CMD(0x0, 0x56) +#define CMD_3DSTATE_VFG GFXPIPE_3D_CMD(0x0, 0x57) +#define CMD_3DSTATE_URB_ALLOC_VS GFXPIPE_3D_CMD(0x0, 0x58) +#define CMD_3DSTATE_URB_ALLOC_HS GFXPIPE_3D_CMD(0x0, 0x59) +#define CMD_3DSTATE_URB_ALLOC_DS GFXPIPE_3D_CMD(0x0, 0x5A) +#define CMD_3DSTATE_URB_ALLOC_GS GFXPIPE_3D_CMD(0x0, 0x5B) +#define CMD_3DSTATE_SO_BUFFER_INDEX_0 GFXPIPE_3D_CMD(0x0, 0x60) +#define CMD_3DSTATE_SO_BUFFER_INDEX_1 GFXPIPE_3D_CMD(0x0, 0x61) +#define CMD_3DSTATE_SO_BUFFER_INDEX_2 GFXPIPE_3D_CMD(0x0, 0x62) +#define CMD_3DSTATE_SO_BUFFER_INDEX_3 GFXPIPE_3D_CMD(0x0, 0x63) +#define CMD_3DSTATE_PRIMITIVE_REPLICATION GFXPIPE_3D_CMD(0x0, 0x6C) +#define CMD_3DSTATE_TBIMR_TILE_PASS_INFO GFXPIPE_3D_CMD(0x0, 0x6E) +#define CMD_3DSTATE_AMFS GFXPIPE_3D_CMD(0x0, 0x6F) +#define CMD_3DSTATE_DEPTH_BOUNDS GFXPIPE_3D_CMD(0x0, 0x71) +#define CMD_3DSTATE_AMFS_TEXTURE_POINTERS GFXPIPE_3D_CMD(0x0, 0x72) +#define CMD_3DSTATE_CONSTANT_TS_POINTER GFXPIPE_3D_CMD(0x0, 0x73) +#define CMD_3DSTATE_MESH_CONTROL GFXPIPE_3D_CMD(0x0, 0x77) +#define CMD_3DSTATE_MESH_DISTRIB GFXPIPE_3D_CMD(0x0, 0x78) +#define CMD_3DSTATE_TASK_REDISTRIB GFXPIPE_3D_CMD(0x0, 0x79) +#define CMD_3DSTATE_MESH_SHADER GFXPIPE_3D_CMD(0x0, 0x7A) +#define CMD_3DSTATE_MESH_SHADER_DATA GFXPIPE_3D_CMD(0x0, 0x7B) +#define CMD_3DSTATE_TASK_CONTROL GFXPIPE_3D_CMD(0x0, 0x7C) +#define CMD_3DSTATE_TASK_SHADER GFXPIPE_3D_CMD(0x0, 0x7D) +#define CMD_3DSTATE_TASK_SHADER_DATA GFXPIPE_3D_CMD(0x0, 0x7E) +#define CMD_3DSTATE_URB_ALLOC_MESH GFXPIPE_3D_CMD(0x0, 0x7F) +#define CMD_3DSTATE_URB_ALLOC_TASK GFXPIPE_3D_CMD(0x0, 0x80) +#define CMD_3DSTATE_CLIP_MESH GFXPIPE_3D_CMD(0x0, 0x81) +#define CMD_3DSTATE_SBE_MESH GFXPIPE_3D_CMD(0x0, 0x82) +#define CMD_3DSTATE_CPSIZE_CONTROL_BUFFER GFXPIPE_3D_CMD(0x0, 0x83) + +#define CMD_3DSTATE_DRAWING_RECTANGLE GFXPIPE_3D_CMD(0x1, 0x0) +#define CMD_3DSTATE_CHROMA_KEY GFXPIPE_3D_CMD(0x1, 0x4) +#define CMD_3DSTATE_POLY_STIPPLE_OFFSET GFXPIPE_3D_CMD(0x1, 0x6) +#define CMD_3DSTATE_POLY_STIPPLE_PATTERN GFXPIPE_3D_CMD(0x1, 0x7) +#define CMD_3DSTATE_LINE_STIPPLE GFXPIPE_3D_CMD(0x1, 0x8) +#define CMD_3DSTATE_AA_LINE_PARAMETERS GFXPIPE_3D_CMD(0x1, 0xA) +#define CMD_3DSTATE_MONOFILTER_SIZE GFXPIPE_3D_CMD(0x1, 0x11) +#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_VS GFXPIPE_3D_CMD(0x1, 0x12) +#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_HS GFXPIPE_3D_CMD(0x1, 0x13) +#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_DS GFXPIPE_3D_CMD(0x1, 0x14) +#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_GS GFXPIPE_3D_CMD(0x1, 0x15) +#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_PS GFXPIPE_3D_CMD(0x1, 0x16) +#define CMD_3DSTATE_SO_DECL_LIST GFXPIPE_3D_CMD(0x1, 0x17) +#define CMD_3DSTATE_SO_DECL_LIST_DW_LEN REG_GENMASK(8, 0) +#define CMD_3DSTATE_SO_BUFFER GFXPIPE_3D_CMD(0x1, 0x18) +#define CMD_3DSTATE_BINDING_TABLE_POOL_ALLOC GFXPIPE_3D_CMD(0x1, 0x19) +#define CMD_3DSTATE_SAMPLE_PATTERN GFXPIPE_3D_CMD(0x1, 0x1C) +#define CMD_3DSTATE_3D_MODE GFXPIPE_3D_CMD(0x1, 0x1E) +#define CMD_3DSTATE_SUBSLICE_HASH_TABLE GFXPIPE_3D_CMD(0x1, 0x1F) +#define CMD_3DSTATE_SLICE_TABLE_STATE_POINTERS GFXPIPE_3D_CMD(0x1, 0x20) +#define CMD_3DSTATE_PTBR_TILE_PASS_INFO GFXPIPE_3D_CMD(0x1, 0x22) + +#endif diff --git a/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h b/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h new file mode 100644 index 000000000000..f8949cad9d0f --- /dev/null +++ b/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GSC_COMMANDS_H_ +#define _XE_GSC_COMMANDS_H_ + +#include "instructions/xe_instr_defs.h" + +/* + * All GSCCS-specific commands have fixed length, so we can include it in the + * defines. Note that the generic GSC command header structure includes an + * optional data field in bits 9-21, but there are no commands that actually use + * it; some of the commands are instead defined as having an extended length + * field spanning bits 0-15, even if the extra bits are not required because the + * longest GSCCS command is only 8 dwords. To handle this, the defines below use + * a single field for both data and len. If we ever get a commands that does + * actually have data and this approach doesn't work for it we can re-work it + * at that point. + */ + +#define GSC_OPCODE REG_GENMASK(28, 22) +#define GSC_CMD_DATA_AND_LEN REG_GENMASK(21, 0) + +#define __GSC_INSTR(op, dl) \ + (XE_INSTR_GSC | \ + REG_FIELD_PREP(GSC_OPCODE, op) | \ + REG_FIELD_PREP(GSC_CMD_DATA_AND_LEN, dl)) + +#define GSC_HECI_CMD_PKT __GSC_INSTR(0, 6) + +#define GSC_FW_LOAD __GSC_INSTR(1, 2) +#define GSC_FW_LOAD_LIMIT_VALID REG_BIT(31) + +#endif diff --git a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h new file mode 100644 index 000000000000..04179b2a48e1 --- /dev/null +++ b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_INSTR_DEFS_H_ +#define _XE_INSTR_DEFS_H_ + +#include "regs/xe_reg_defs.h" + +/* + * The first dword of any GPU instruction is the "instruction header." Bits + * 31:29 identify the general type of the command and determine how exact + * opcodes and sub-opcodes will be encoded in the remaining bits. + */ +#define XE_INSTR_CMD_TYPE GENMASK(31, 29) +#define XE_INSTR_MI REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x0) +#define XE_INSTR_GSC REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x2) +#define XE_INSTR_GFXPIPE REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x3) + +/* + * Most (but not all) instructions have a "length" field in the instruction + * header. The value expected is the total number of dwords for the + * instruction, minus two. + * + * Some instructions have length fields longer or shorter than 8 bits, but + * those are rare. This definition can be used for the common case where + * the length field is from 7:0. + */ +#define XE_INSTR_LEN_MASK GENMASK(7, 0) +#define XE_INSTR_NUM_DW(x) REG_FIELD_PREP(XE_INSTR_LEN_MASK, (x) - 2) + +#endif diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h new file mode 100644 index 000000000000..1cfa96167fde --- /dev/null +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_MI_COMMANDS_H_ +#define _XE_MI_COMMANDS_H_ + +#include "instructions/xe_instr_defs.h" + +/* + * MI (Memory Interface) commands are supported by all GT engines. They + * provide general memory operations and command streamer control. MI commands + * have a command type of 0x0 (MI_COMMAND) in bits 31:29 of the instruction + * header dword and a specific MI opcode in bits 28:23. + */ + +#define MI_OPCODE REG_GENMASK(28, 23) +#define MI_SUBOPCODE REG_GENMASK(22, 17) /* used with MI_EXPANSION */ + +#define __MI_INSTR(opcode) \ + (XE_INSTR_MI | REG_FIELD_PREP(MI_OPCODE, opcode)) + +#define MI_NOOP __MI_INSTR(0x0) +#define MI_USER_INTERRUPT __MI_INSTR(0x2) +#define MI_ARB_CHECK __MI_INSTR(0x5) + +#define MI_ARB_ON_OFF __MI_INSTR(0x8) +#define MI_ARB_ENABLE REG_BIT(0) +#define MI_ARB_DISABLE 0x0 + +#define MI_BATCH_BUFFER_END __MI_INSTR(0xA) +#define MI_TOPOLOGY_FILTER __MI_INSTR(0xD) +#define MI_FORCE_WAKEUP __MI_INSTR(0x1D) + +#define MI_STORE_DATA_IMM __MI_INSTR(0x20) +#define MI_SDI_GGTT REG_BIT(22) +#define MI_SDI_LEN_DW GENMASK(9, 0) +#define MI_SDI_NUM_DW(x) REG_FIELD_PREP(MI_SDI_LEN_DW, (x) + 3 - 2) +#define MI_SDI_NUM_QW(x) (REG_FIELD_PREP(MI_SDI_LEN_DW, 2 * (x) + 3 - 2) | \ + REG_BIT(21)) + +#define MI_LOAD_REGISTER_IMM __MI_INSTR(0x22) +#define MI_LRI_LRM_CS_MMIO REG_BIT(19) +#define MI_LRI_MMIO_REMAP_EN REG_BIT(17) +#define MI_LRI_NUM_REGS(x) XE_INSTR_NUM_DW(2 * (x) + 1) +#define MI_LRI_FORCE_POSTED REG_BIT(12) + +#define MI_FLUSH_DW __MI_INSTR(0x26) +#define MI_FLUSH_DW_STORE_INDEX REG_BIT(21) +#define MI_INVALIDATE_TLB REG_BIT(18) +#define MI_FLUSH_DW_CCS REG_BIT(16) +#define MI_FLUSH_DW_OP_STOREDW REG_BIT(14) +#define MI_FLUSH_DW_LEN_DW REG_GENMASK(5, 0) +#define MI_FLUSH_IMM_DW REG_FIELD_PREP(MI_FLUSH_DW_LEN_DW, 4 - 2) +#define MI_FLUSH_IMM_QW REG_FIELD_PREP(MI_FLUSH_DW_LEN_DW, 5 - 2) +#define MI_FLUSH_DW_USE_GTT REG_BIT(2) + +#define MI_BATCH_BUFFER_START __MI_INSTR(0x31) + +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h new file mode 100644 index 000000000000..5592774fc690 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -0,0 +1,184 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_ENGINE_REGS_H_ +#define _XE_ENGINE_REGS_H_ + +#include <asm/page.h> + +#include "regs/xe_reg_defs.h" + +/* + * These *_BASE values represent the MMIO offset where each hardware engine's + * registers start. The other definitions in this header are parameterized + * macros that will take one of these values as a parameter. + */ +#define RENDER_RING_BASE 0x02000 +#define BSD_RING_BASE 0x1c0000 +#define BSD2_RING_BASE 0x1c4000 +#define BSD3_RING_BASE 0x1d0000 +#define BSD4_RING_BASE 0x1d4000 +#define XEHP_BSD5_RING_BASE 0x1e0000 +#define XEHP_BSD6_RING_BASE 0x1e4000 +#define XEHP_BSD7_RING_BASE 0x1f0000 +#define XEHP_BSD8_RING_BASE 0x1f4000 +#define VEBOX_RING_BASE 0x1c8000 +#define VEBOX2_RING_BASE 0x1d8000 +#define XEHP_VEBOX3_RING_BASE 0x1e8000 +#define XEHP_VEBOX4_RING_BASE 0x1f8000 +#define COMPUTE0_RING_BASE 0x1a000 +#define COMPUTE1_RING_BASE 0x1c000 +#define COMPUTE2_RING_BASE 0x1e000 +#define COMPUTE3_RING_BASE 0x26000 +#define BLT_RING_BASE 0x22000 +#define XEHPC_BCS1_RING_BASE 0x3e0000 +#define XEHPC_BCS2_RING_BASE 0x3e2000 +#define XEHPC_BCS3_RING_BASE 0x3e4000 +#define XEHPC_BCS4_RING_BASE 0x3e6000 +#define XEHPC_BCS5_RING_BASE 0x3e8000 +#define XEHPC_BCS6_RING_BASE 0x3ea000 +#define XEHPC_BCS7_RING_BASE 0x3ec000 +#define XEHPC_BCS8_RING_BASE 0x3ee000 +#define GSCCS_RING_BASE 0x11a000 + +#define RING_TAIL(base) XE_REG((base) + 0x30) + +#define RING_HEAD(base) XE_REG((base) + 0x34) +#define HEAD_ADDR 0x001FFFFC + +#define RING_START(base) XE_REG((base) + 0x38) + +#define RING_CTL(base) XE_REG((base) + 0x3c) +#define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ +#define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ + +#define RING_PSMI_CTL(base) XE_REG((base) + 0x50, XE_REG_OPTION_MASKED) +#define RC_SEMA_IDLE_MSG_DISABLE REG_BIT(12) +#define WAIT_FOR_EVENT_POWER_DOWN_DISABLE REG_BIT(7) +#define IDLE_MSG_DISABLE REG_BIT(0) + +#define RING_PWRCTX_MAXCNT(base) XE_REG((base) + 0x54) +#define IDLE_WAIT_TIME REG_GENMASK(19, 0) + +#define RING_ACTHD_UDW(base) XE_REG((base) + 0x5c) +#define RING_DMA_FADD_UDW(base) XE_REG((base) + 0x60) +#define RING_IPEHR(base) XE_REG((base) + 0x68) +#define RING_ACTHD(base) XE_REG((base) + 0x74) +#define RING_DMA_FADD(base) XE_REG((base) + 0x78) +#define RING_HWS_PGA(base) XE_REG((base) + 0x80) +#define RING_HWSTAM(base) XE_REG((base) + 0x98) +#define RING_MI_MODE(base) XE_REG((base) + 0x9c) +#define RING_NOPID(base) XE_REG((base) + 0x94) + +#define FF_THREAD_MODE(base) XE_REG((base) + 0xa0) +#define FF_TESSELATION_DOP_GATE_DISABLE BIT(19) + +#define RING_IMR(base) XE_REG((base) + 0xa8) + +#define RING_EIR(base) XE_REG((base) + 0xb0) +#define RING_EMR(base) XE_REG((base) + 0xb4) +#define RING_ESR(base) XE_REG((base) + 0xb8) + +#define RING_CMD_CCTL(base) XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED) +/* + * CMD_CCTL read/write fields take a MOCS value and _not_ a table index. + * The lsb of each can be considered a separate enabling bit for encryption. + * 6:0 == default MOCS value for reads => 6:1 == table index for reads. + * 13:7 == default MOCS value for writes => 13:8 == table index for writes. + * 15:14 == Reserved => 31:30 are set to 0. + */ +#define CMD_CCTL_WRITE_OVERRIDE_MASK REG_GENMASK(13, 8) +#define CMD_CCTL_READ_OVERRIDE_MASK REG_GENMASK(6, 1) + +#define CSFE_CHICKEN1(base) XE_REG((base) + 0xd4, XE_REG_OPTION_MASKED) +#define GHWSP_CSB_REPORT_DIS REG_BIT(15) +#define PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS REG_BIT(14) + +#define FF_SLICE_CS_CHICKEN1(base) XE_REG((base) + 0xe0, XE_REG_OPTION_MASKED) +#define FFSC_PERCTX_PREEMPT_CTRL REG_BIT(14) + +#define FF_SLICE_CS_CHICKEN2(base) XE_REG((base) + 0xe4, XE_REG_OPTION_MASKED) +#define PERF_FIX_BALANCING_CFE_DISABLE REG_BIT(15) + +#define CS_DEBUG_MODE1(base) XE_REG((base) + 0xec, XE_REG_OPTION_MASKED) +#define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1) +#define REPLAY_MODE_GRANULARITY REG_BIT(0) + +#define RING_BBADDR(base) XE_REG((base) + 0x140) +#define RING_BBADDR_UDW(base) XE_REG((base) + 0x168) + +#define BCS_SWCTRL(base) XE_REG((base) + 0x200, XE_REG_OPTION_MASKED) +#define BCS_SWCTRL_DISABLE_256B REG_BIT(2) + +/* Handling MOCS value in BLIT_CCTL like it was done CMD_CCTL */ +#define BLIT_CCTL(base) XE_REG((base) + 0x204) +#define BLIT_CCTL_DST_MOCS_MASK REG_GENMASK(14, 9) +#define BLIT_CCTL_SRC_MOCS_MASK REG_GENMASK(6, 1) + +#define RING_EXECLIST_STATUS_LO(base) XE_REG((base) + 0x234) +#define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4) + +#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244) +#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) +#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0) + +#define RING_MODE(base) XE_REG((base) + 0x29c) +#define GFX_DISABLE_LEGACY_MODE REG_BIT(3) + +#define RING_TIMESTAMP(base) XE_REG((base) + 0x358) + +#define RING_TIMESTAMP_UDW(base) XE_REG((base) + 0x358 + 4) +#define RING_VALID_MASK 0x00000001 +#define RING_VALID 0x00000001 +#define STOP_RING REG_BIT(8) +#define TAIL_ADDR 0x001FFFF8 + +#define RING_CTX_TIMESTAMP(base) XE_REG((base) + 0x3a8) + +#define RING_FORCE_TO_NONPRIV(base, i) XE_REG(((base) + 0x4d0) + (i) * 4) +#define RING_FORCE_TO_NONPRIV_DENY REG_BIT(30) +#define RING_FORCE_TO_NONPRIV_ACCESS_MASK REG_GENMASK(29, 28) +#define RING_FORCE_TO_NONPRIV_ACCESS_RW REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 0) +#define RING_FORCE_TO_NONPRIV_ACCESS_RD REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 1) +#define RING_FORCE_TO_NONPRIV_ACCESS_WR REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 2) +#define RING_FORCE_TO_NONPRIV_ACCESS_INVALID REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 3) +#define RING_FORCE_TO_NONPRIV_ADDRESS_MASK REG_GENMASK(25, 2) +#define RING_FORCE_TO_NONPRIV_RANGE_MASK REG_GENMASK(1, 0) +#define RING_FORCE_TO_NONPRIV_RANGE_1 REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_RANGE_MASK, 0) +#define RING_FORCE_TO_NONPRIV_RANGE_4 REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_RANGE_MASK, 1) +#define RING_FORCE_TO_NONPRIV_RANGE_16 REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_RANGE_MASK, 2) +#define RING_FORCE_TO_NONPRIV_RANGE_64 REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_RANGE_MASK, 3) +#define RING_FORCE_TO_NONPRIV_MASK_VALID (RING_FORCE_TO_NONPRIV_RANGE_MASK | \ + RING_FORCE_TO_NONPRIV_ACCESS_MASK | \ + RING_FORCE_TO_NONPRIV_DENY) +#define RING_MAX_NONPRIV_SLOTS 12 + +#define RING_EXECLIST_SQ_CONTENTS_LO(base) XE_REG((base) + 0x510) +#define RING_EXECLIST_SQ_CONTENTS_HI(base) XE_REG((base) + 0x510 + 4) + +#define RING_EXECLIST_CONTROL(base) XE_REG((base) + 0x550) +#define EL_CTRL_LOAD REG_BIT(0) + +#define CS_CHICKEN1(base) XE_REG((base) + 0x580, XE_REG_OPTION_MASKED) +#define PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1)) +#define PREEMPT_GPGPU_MID_THREAD_LEVEL PREEMPT_GPGPU_LEVEL(0, 0) +#define PREEMPT_GPGPU_THREAD_GROUP_LEVEL PREEMPT_GPGPU_LEVEL(0, 1) +#define PREEMPT_GPGPU_COMMAND_LEVEL PREEMPT_GPGPU_LEVEL(1, 0) +#define PREEMPT_GPGPU_LEVEL_MASK PREEMPT_GPGPU_LEVEL(1, 1) +#define PREEMPT_3D_OBJECT_LEVEL REG_BIT(0) + +#define VDBOX_CGCTL3F08(base) XE_REG((base) + 0x3f08) +#define CG3DDISHRS_CLKGATE_DIS REG_BIT(5) + +#define VDBOX_CGCTL3F10(base) XE_REG((base) + 0x3f10) +#define IECPUNIT_CLKGATE_DIS REG_BIT(22) + +#define VDBOX_CGCTL3F18(base) XE_REG((base) + 0x3f18) +#define ALNUNIT_CLKGATE_DIS REG_BIT(13) + +#define VDBOX_CGCTL3F1C(base) XE_REG((base) + 0x3f1c) +#define MFXPIPE_CLKGATE_DIS REG_BIT(3) + +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h new file mode 100644 index 000000000000..a255946b6f77 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GPU_COMMANDS_H_ +#define _XE_GPU_COMMANDS_H_ + +#include "regs/xe_reg_defs.h" + +#define XY_CTRL_SURF_COPY_BLT ((2 << 29) | (0x48 << 22) | 3) +#define SRC_ACCESS_TYPE_SHIFT 21 +#define DST_ACCESS_TYPE_SHIFT 20 +#define CCS_SIZE_MASK GENMASK(17, 8) +#define XE2_CCS_SIZE_MASK GENMASK(18, 9) +#define XY_CTRL_SURF_MOCS_MASK GENMASK(31, 26) +#define XE2_XY_CTRL_SURF_MOCS_INDEX_MASK GENMASK(31, 28) +#define NUM_CCS_BYTES_PER_BLOCK 256 +#define NUM_BYTES_PER_CCS_BYTE(_xe) (GRAPHICS_VER(_xe) >= 20 ? 512 : 256) + +#define XY_FAST_COLOR_BLT_CMD (2 << 29 | 0x44 << 22) +#define XY_FAST_COLOR_BLT_DEPTH_32 (2 << 19) +#define XY_FAST_COLOR_BLT_DW 16 +#define XY_FAST_COLOR_BLT_MOCS_MASK GENMASK(27, 22) +#define XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK GENMASK(27, 24) +#define XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31 + +#define XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22) +#define XY_FAST_COPY_BLT_DEPTH_32 (3<<24) +#define XY_FAST_COPY_BLT_D1_SRC_TILE4 REG_BIT(31) +#define XY_FAST_COPY_BLT_D1_DST_TILE4 REG_BIT(30) +#define XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK GENMASK(23, 20) + +#define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22) +#define PVC_MEM_SET_CMD_LEN_DW 7 +#define PVC_MEM_SET_MATRIX REG_BIT(17) +#define PVC_MEM_SET_DATA_FIELD GENMASK(31, 24) +/* Bspec lists field as [6:0], but index alone is from [6:1] */ +#define PVC_MEM_SET_MOCS_INDEX_MASK GENMASK(6, 1) +#define XE2_MEM_SET_MOCS_INDEX_MASK GENMASK(6, 3) + +#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) + +#define PIPE_CONTROL0_HDC_PIPELINE_FLUSH BIT(9) /* gen12 */ + +#define PIPE_CONTROL_COMMAND_CACHE_INVALIDATE (1<<29) +#define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28) +#define PIPE_CONTROL_AMFS_FLUSH (1<<25) +#define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) +#define PIPE_CONTROL_LRI_POST_SYNC BIT(23) +#define PIPE_CONTROL_STORE_DATA_INDEX (1<<21) +#define PIPE_CONTROL_CS_STALL (1<<20) +#define PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET (1<<19) +#define PIPE_CONTROL_TLB_INVALIDATE BIT(18) +#define PIPE_CONTROL_PSD_SYNC (1<<17) +#define PIPE_CONTROL_QW_WRITE (1<<14) +#define PIPE_CONTROL_DEPTH_STALL (1<<13) +#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) +#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) +#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) +#define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) +#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) +#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) +#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1<<4) +#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1<<3) +#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1<<2) +#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1<<1) +#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0) + +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h new file mode 100644 index 000000000000..9886ec9cb08e --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GSC_REGS_H_ +#define _XE_GSC_REGS_H_ + +#include <linux/compiler.h> +#include <linux/types.h> + +#include "regs/xe_reg_defs.h" + +/* Definitions of GSC H/W registers, bits, etc */ + +#define MTL_GSC_HECI1_BASE 0x00116000 +#define MTL_GSC_HECI2_BASE 0x00117000 + +#define HECI_H_CSR(base) XE_REG((base) + 0x4) +#define HECI_H_CSR_IE REG_BIT(0) +#define HECI_H_CSR_IS REG_BIT(1) +#define HECI_H_CSR_IG REG_BIT(2) +#define HECI_H_CSR_RDY REG_BIT(3) +#define HECI_H_CSR_RST REG_BIT(4) + +/* + * The FWSTS register values are FW defined and can be different between + * HECI1 and HECI2 + */ +#define HECI_FWSTS1(base) XE_REG((base) + 0xc40) +#define HECI1_FWSTS1_CURRENT_STATE REG_GENMASK(3, 0) +#define HECI1_FWSTS1_CURRENT_STATE_RESET 0 +#define HECI1_FWSTS1_PROXY_STATE_NORMAL 5 +#define HECI1_FWSTS1_INIT_COMPLETE REG_BIT(9) +#define HECI_FWSTS5(base) XE_REG((base) + 0xc68) +#define HECI1_FWSTS5_HUC_AUTH_DONE REG_BIT(19) + +#define HECI_H_GS1(base) XE_REG((base) + 0xc4c) +#define HECI_H_GS1_ER_PREP REG_BIT(0) + +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h new file mode 100644 index 000000000000..1dd361046b5d --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -0,0 +1,478 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_REGS_H_ +#define _XE_GT_REGS_H_ + +#include "regs/xe_reg_defs.h" + +/* + * The GSI register range [0x0 - 0x40000) is replicated at a higher offset + * for the media GT. xe_mmio and xe_gt_mcr functions will automatically + * translate offsets by MEDIA_GT_GSI_OFFSET when operating on the media GT. + */ +#define MEDIA_GT_GSI_OFFSET 0x380000 +#define MEDIA_GT_GSI_LENGTH 0x40000 + +/* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */ +#define MTL_MIRROR_TARGET_WP1 XE_REG(0xc60) +#define MTL_CAGF_MASK REG_GENMASK(8, 0) +#define MTL_CC_MASK REG_GENMASK(12, 9) + +/* RPM unit config (Gen8+) */ +#define RPM_CONFIG0 XE_REG(0xd00) +#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK REG_GENMASK(5, 3) +#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 0 +#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 1 +#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ 2 +#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ 3 +#define RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1) + +#define FORCEWAKE_ACK_MEDIA_VDBOX(n) XE_REG(0xd50 + (n) * 4) +#define FORCEWAKE_ACK_MEDIA_VEBOX(n) XE_REG(0xd70 + (n) * 4) +#define FORCEWAKE_ACK_RENDER XE_REG(0xd84) + +#define GMD_ID XE_REG(0xd8c) +#define GMD_ID_ARCH_MASK REG_GENMASK(31, 22) +#define GMD_ID_RELEASE_MASK REG_GENMASK(21, 14) +#define GMD_ID_REVID REG_GENMASK(5, 0) + +#define FORCEWAKE_ACK_GSC XE_REG(0xdf8) +#define FORCEWAKE_ACK_GT_MTL XE_REG(0xdfc) + +#define MCFG_MCR_SELECTOR XE_REG(0xfd0) +#define MTL_MCR_SELECTOR XE_REG(0xfd4) +#define SF_MCR_SELECTOR XE_REG(0xfd8) +#define MCR_SELECTOR XE_REG(0xfdc) +#define GAM_MCR_SELECTOR XE_REG(0xfe0) +#define MCR_MULTICAST REG_BIT(31) +#define MCR_SLICE_MASK REG_GENMASK(30, 27) +#define MCR_SLICE(slice) REG_FIELD_PREP(MCR_SLICE_MASK, slice) +#define MCR_SUBSLICE_MASK REG_GENMASK(26, 24) +#define MCR_SUBSLICE(subslice) REG_FIELD_PREP(MCR_SUBSLICE_MASK, subslice) +#define MTL_MCR_GROUPID REG_GENMASK(11, 8) +#define MTL_MCR_INSTANCEID REG_GENMASK(3, 0) + +#define PS_INVOCATION_COUNT XE_REG(0x2348) + +#define XELP_GLOBAL_MOCS(i) XE_REG(0x4000 + (i) * 4) +#define XEHP_GLOBAL_MOCS(i) XE_REG_MCR(0x4000 + (i) * 4) +#define CCS_AUX_INV XE_REG(0x4208) + +#define VD0_AUX_INV XE_REG(0x4218) +#define VE0_AUX_INV XE_REG(0x4238) + +#define VE1_AUX_INV XE_REG(0x42b8) +#define AUX_INV REG_BIT(0) + +#define XEHP_TILE_ADDR_RANGE(_idx) XE_REG_MCR(0x4900 + (_idx) * 4) +#define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910) + +#define WM_CHICKEN3 XE_REG_MCR(0x5588, XE_REG_OPTION_MASKED) +#define HIZ_PLANE_COMPRESSION_DIS REG_BIT(10) + +#define CHICKEN_RASTER_2 XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED) +#define TBIMR_FAST_CLIP REG_BIT(5) + +#define FF_MODE XE_REG_MCR(0x6210) +#define DIS_TE_AUTOSTRIP REG_BIT(31) +#define DIS_MESH_PARTIAL_AUTOSTRIP REG_BIT(16) +#define DIS_MESH_AUTOSTRIP REG_BIT(15) + +#define VFLSKPD XE_REG_MCR(0x62a8, XE_REG_OPTION_MASKED) +#define DIS_PARTIAL_AUTOSTRIP REG_BIT(9) +#define DIS_AUTOSTRIP REG_BIT(6) +#define DIS_OVER_FETCH_CACHE REG_BIT(1) +#define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) + +#define FF_MODE2 XE_REG(0x6604) +#define XEHP_FF_MODE2 XE_REG_MCR(0x6604) +#define FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24) +#define FF_MODE2_GS_TIMER_224 REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224) +#define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16) +#define FF_MODE2_TDS_TIMER_128 REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4) + +#define CACHE_MODE_1 XE_REG(0x7004, XE_REG_OPTION_MASKED) +#define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11) + +#define COMMON_SLICE_CHICKEN1 XE_REG(0x7010) + +#define HIZ_CHICKEN XE_REG(0x7018, XE_REG_OPTION_MASKED) +#define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14) +#define HZ_DEPTH_TEST_LE_GE_OPT_DISABLE REG_BIT(13) + +#define XEHP_PSS_MODE2 XE_REG_MCR(0x703c, XE_REG_OPTION_MASKED) +#define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5) + +#define XEHP_PSS_CHICKEN XE_REG_MCR(0x7044, XE_REG_OPTION_MASKED) +#define FLSH_IGNORES_PSD REG_BIT(10) +#define FD_END_COLLECT REG_BIT(5) + +#define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED) +#define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6) + +#define COMMON_SLICE_CHICKEN3 XE_REG(0x7304, XE_REG_OPTION_MASKED) +#define XEHP_COMMON_SLICE_CHICKEN3 XE_REG_MCR(0x7304, XE_REG_OPTION_MASKED) +#define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12) +#define XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE REG_BIT(12) +#define BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11) +#define DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9) + +#define XEHP_SLICE_COMMON_ECO_CHICKEN1 XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED) +#define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) + +#define VF_PREEMPTION XE_REG(0x83a4, XE_REG_OPTION_MASKED) +#define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0) + +#define VF_SCRATCHPAD XE_REG(0x83a8, XE_REG_OPTION_MASKED) +#define XE2_VFG_TED_CREDIT_INTERFACE_DISABLE REG_BIT(13) + +#define VFG_PREEMPTION_CHICKEN XE_REG(0x83b4, XE_REG_OPTION_MASKED) +#define POLYGON_TRIFAN_LINELOOP_DISABLE REG_BIT(4) + +#define SQCNT1 XE_REG_MCR(0x8718) +#define XELPMP_SQCNT1 XE_REG(0x8718) +#define ENFORCE_RAR REG_BIT(23) + +#define XEHP_SQCM XE_REG_MCR(0x8724) +#define EN_32B_ACCESS REG_BIT(30) + +#define XE2_FLAT_CCS_BASE_RANGE_LOWER XE_REG_MCR(0x8800) +#define XE2_FLAT_CCS_ENABLE REG_BIT(0) + +#define GSCPSMI_BASE XE_REG(0x880c) + +/* Fuse readout registers for GT */ +#define XEHP_FUSE4 XE_REG(0x9114) +#define CCS_EN_MASK REG_GENMASK(19, 16) +#define GT_L3_EXC_MASK REG_GENMASK(6, 4) + +#define MIRROR_FUSE3 XE_REG(0x9118) +#define XE2_NODE_ENABLE_MASK REG_GENMASK(31, 16) +#define L3BANK_PAIR_COUNT 4 +#define L3BANK_MASK REG_GENMASK(3, 0) +/* on Xe_HP the same fuses indicates mslices instead of L3 banks */ +#define MAX_MSLICES 4 +#define MEML3_EN_MASK REG_GENMASK(3, 0) + +#define XELP_EU_ENABLE XE_REG(0x9134) /* "_DISABLE" on Xe_LP */ +#define XELP_EU_MASK REG_GENMASK(7, 0) +#define XELP_GT_GEOMETRY_DSS_ENABLE XE_REG(0x913c) + +#define GT_VEBOX_VDBOX_DISABLE XE_REG(0x9140) +#define GT_VEBOX_DISABLE_MASK REG_GENMASK(19, 16) +#define GT_VDBOX_DISABLE_MASK REG_GENMASK(7, 0) + +#define XEHP_GT_COMPUTE_DSS_ENABLE XE_REG(0x9144) +#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT XE_REG(0x9148) +#define XE2_GT_COMPUTE_DSS_2 XE_REG(0x914c) +#define XE2_GT_GEOMETRY_DSS_1 XE_REG(0x9150) +#define XE2_GT_GEOMETRY_DSS_2 XE_REG(0x9154) + +#define GDRST XE_REG(0x941c) +#define GRDOM_GUC REG_BIT(3) +#define GRDOM_FULL REG_BIT(0) + +#define MISCCPCTL XE_REG(0x9424) +#define DOP_CLOCK_GATE_RENDER_ENABLE REG_BIT(1) + +#define UNSLCGCTL9430 XE_REG(0x9430) +#define MSQDUNIT_CLKGATE_DIS REG_BIT(3) + +#define UNSLICE_UNIT_LEVEL_CLKGATE XE_REG(0x9434) +#define VFUNIT_CLKGATE_DIS REG_BIT(20) +#define TSGUNIT_CLKGATE_DIS REG_BIT(17) /* XEHPSDV */ +#define CG3DDISCFEG_CLKGATE_DIS REG_BIT(17) /* DG2 */ +#define GAMEDIA_CLKGATE_DIS REG_BIT(11) +#define HSUNIT_CLKGATE_DIS REG_BIT(8) +#define VSUNIT_CLKGATE_DIS REG_BIT(3) + +#define UNSLCGCTL9440 XE_REG(0x9440) +#define GAMTLBOACS_CLKGATE_DIS REG_BIT(28) +#define GAMTLBVDBOX5_CLKGATE_DIS REG_BIT(27) +#define GAMTLBVDBOX6_CLKGATE_DIS REG_BIT(26) +#define GAMTLBVDBOX3_CLKGATE_DIS REG_BIT(24) +#define GAMTLBVDBOX4_CLKGATE_DIS REG_BIT(23) +#define GAMTLBVDBOX7_CLKGATE_DIS REG_BIT(22) +#define GAMTLBVDBOX2_CLKGATE_DIS REG_BIT(21) +#define GAMTLBVDBOX0_CLKGATE_DIS REG_BIT(17) +#define GAMTLBKCR_CLKGATE_DIS REG_BIT(16) +#define GAMTLBGUC_CLKGATE_DIS REG_BIT(15) +#define GAMTLBBLT_CLKGATE_DIS REG_BIT(14) +#define GAMTLBVDBOX1_CLKGATE_DIS REG_BIT(6) + +#define UNSLCGCTL9444 XE_REG(0x9444) +#define GAMTLBGFXA0_CLKGATE_DIS REG_BIT(30) +#define GAMTLBGFXA1_CLKGATE_DIS REG_BIT(29) +#define GAMTLBCOMPA0_CLKGATE_DIS REG_BIT(28) +#define GAMTLBCOMPA1_CLKGATE_DIS REG_BIT(27) +#define GAMTLBCOMPB0_CLKGATE_DIS REG_BIT(26) +#define GAMTLBCOMPB1_CLKGATE_DIS REG_BIT(25) +#define GAMTLBCOMPC0_CLKGATE_DIS REG_BIT(24) +#define GAMTLBCOMPC1_CLKGATE_DIS REG_BIT(23) +#define GAMTLBCOMPD0_CLKGATE_DIS REG_BIT(22) +#define GAMTLBCOMPD1_CLKGATE_DIS REG_BIT(21) +#define GAMTLBMERT_CLKGATE_DIS REG_BIT(20) +#define GAMTLBVEBOX3_CLKGATE_DIS REG_BIT(19) +#define GAMTLBVEBOX2_CLKGATE_DIS REG_BIT(18) +#define GAMTLBVEBOX1_CLKGATE_DIS REG_BIT(17) +#define GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16) +#define LTCDD_CLKGATE_DIS REG_BIT(10) + +#define XEHP_SLICE_UNIT_LEVEL_CLKGATE XE_REG_MCR(0x94d4) +#define L3_CR2X_CLKGATE_DIS REG_BIT(17) +#define L3_CLKGATE_DIS REG_BIT(16) +#define NODEDSS_CLKGATE_DIS REG_BIT(12) +#define MSCUNIT_CLKGATE_DIS REG_BIT(10) +#define RCCUNIT_CLKGATE_DIS REG_BIT(7) +#define SARBUNIT_CLKGATE_DIS REG_BIT(5) +#define SBEUNIT_CLKGATE_DIS REG_BIT(4) + +#define UNSLICE_UNIT_LEVEL_CLKGATE2 XE_REG(0x94e4) +#define VSUNIT_CLKGATE2_DIS REG_BIT(19) + +#define SUBSLICE_UNIT_LEVEL_CLKGATE XE_REG_MCR(0x9524) +#define DSS_ROUTER_CLKGATE_DIS REG_BIT(28) +#define GWUNIT_CLKGATE_DIS REG_BIT(16) + +#define SUBSLICE_UNIT_LEVEL_CLKGATE2 XE_REG_MCR(0x9528) +#define CPSSUNIT_CLKGATE_DIS REG_BIT(9) + +#define SSMCGCTL9530 XE_REG_MCR(0x9530) +#define RTFUNIT_CLKGATE_DIS REG_BIT(18) + +#define DFR_RATIO_EN_AND_CHICKEN XE_REG_MCR(0x9550) +#define DFR_DISABLE REG_BIT(9) + +#define RPNSWREQ XE_REG(0xa008) +#define REQ_RATIO_MASK REG_GENMASK(31, 23) + +#define RP_CONTROL XE_REG(0xa024) +#define RPSWCTL_MASK REG_GENMASK(10, 9) +#define RPSWCTL_ENABLE REG_FIELD_PREP(RPSWCTL_MASK, 2) +#define RPSWCTL_DISABLE REG_FIELD_PREP(RPSWCTL_MASK, 0) +#define RC_CONTROL XE_REG(0xa090) +#define RC_CTL_HW_ENABLE REG_BIT(31) +#define RC_CTL_TO_MODE REG_BIT(28) +#define RC_CTL_RC6_ENABLE REG_BIT(18) +#define RC_STATE XE_REG(0xa094) +#define RC_IDLE_HYSTERSIS XE_REG(0xa0ac) + +#define PMINTRMSK XE_REG(0xa168) +#define PMINTR_DISABLE_REDIRECT_TO_GUC REG_BIT(31) +#define ARAT_EXPIRED_INTRMSK REG_BIT(9) + +#define FORCEWAKE_GT XE_REG(0xa188) + +#define PG_ENABLE XE_REG(0xa210) + +#define CTC_MODE XE_REG(0xa26c) +#define CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1) +#define CTC_SOURCE_DIVIDE_LOGIC REG_BIT(0) + +#define FORCEWAKE_RENDER XE_REG(0xa278) +#define FORCEWAKE_MEDIA_VDBOX(n) XE_REG(0xa540 + (n) * 4) +#define FORCEWAKE_MEDIA_VEBOX(n) XE_REG(0xa560 + (n) * 4) +#define FORCEWAKE_GSC XE_REG(0xa618) + +#define XEHPC_LNCFMISCCFGREG0 XE_REG_MCR(0xb01c, XE_REG_OPTION_MASKED) +#define XEHPC_OVRLSCCC REG_BIT(0) + +/* L3 Cache Control */ +#define XELP_LNCFCMOCS(i) XE_REG(0xb020 + (i) * 4) +#define XEHP_LNCFCMOCS(i) XE_REG_MCR(0xb020 + (i) * 4) +#define LNCFCMOCS_REG_COUNT 32 + +#define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4) +#define XEHP_LNESPARE REG_BIT(19) + +#define XEHP_L3SQCREG5 XE_REG_MCR(0xb158) +#define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0) + +#define XEHP_L3SCQREG7 XE_REG_MCR(0xb188) +#define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3) + +#define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8) + +#define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658) + +#define XEHP_MERT_MOD_CTRL XE_REG_MCR(0xcf28) +#define RENDER_MOD_CTRL XE_REG_MCR(0xcf2c) +#define COMP_MOD_CTRL XE_REG_MCR(0xcf30) +#define XEHP_VDBX_MOD_CTRL XE_REG_MCR(0xcf34) +#define XELPMP_VDBX_MOD_CTRL XE_REG(0xcf34) +#define XEHP_VEBX_MOD_CTRL XE_REG_MCR(0xcf38) +#define XELPMP_VEBX_MOD_CTRL XE_REG(0xcf38) +#define FORCE_MISS_FTLB REG_BIT(3) + +#define XEHP_GAMSTLB_CTRL XE_REG_MCR(0xcf4c) +#define CONTROL_BLOCK_CLKGATE_DIS REG_BIT(12) +#define EGRESS_BLOCK_CLKGATE_DIS REG_BIT(11) +#define TAG_BLOCK_CLKGATE_DIS REG_BIT(7) + +#define XEHP_GAMCNTRL_CTRL XE_REG_MCR(0xcf54) +#define INVALIDATION_BROADCAST_MODE_DIS REG_BIT(12) +#define GLOBAL_INVALIDATION_MODE REG_BIT(2) + +#define HALF_SLICE_CHICKEN5 XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED) +#define DISABLE_SAMPLE_G_PERFORMANCE REG_BIT(0) + +#define SAMPLER_MODE XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED) +#define ENABLE_SMALLPL REG_BIT(15) +#define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) +#define SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) +#define INDIRECT_STATE_BASE_ADDR_OVERRIDE REG_BIT(0) + +#define HALF_SLICE_CHICKEN7 XE_REG_MCR(0xe194, XE_REG_OPTION_MASKED) +#define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15) + +#define CACHE_MODE_SS XE_REG_MCR(0xe420, XE_REG_OPTION_MASKED) +#define DISABLE_ECC REG_BIT(5) +#define ENABLE_PREFETCH_INTO_IC REG_BIT(3) + +#define ROW_CHICKEN4 XE_REG_MCR(0xe48c, XE_REG_OPTION_MASKED) +#define DISABLE_GRF_CLEAR REG_BIT(13) +#define XEHP_DIS_BBL_SYSPIPE REG_BIT(11) +#define DISABLE_TDL_PUSH REG_BIT(9) +#define DIS_PICK_2ND_EU REG_BIT(7) +#define DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX REG_BIT(4) +#define THREAD_EX_ARB_MODE REG_GENMASK(3, 2) +#define THREAD_EX_ARB_MODE_RR_AFTER_DEP REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2) + +#define ROW_CHICKEN3 XE_REG_MCR(0xe49c, XE_REG_OPTION_MASKED) +#define DIS_FIX_EOT1_FLUSH REG_BIT(9) + +#define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED) +#define UGM_BACKUP_MODE REG_BIT(13) +#define MDQ_ARBITRATION_MODE REG_BIT(12) +#define EARLY_EOT_DIS REG_BIT(1) + +#define ROW_CHICKEN2 XE_REG_MCR(0xe4f4, XE_REG_OPTION_MASKED) +#define DISABLE_READ_SUPPRESSION REG_BIT(15) +#define DISABLE_EARLY_READ REG_BIT(14) +#define ENABLE_LARGE_GRF_MODE REG_BIT(12) +#define PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8) +#define DISABLE_DOP_GATING REG_BIT(0) + +#define RT_CTRL XE_REG_MCR(0xe530) +#define DIS_NULL_QUERY REG_BIT(10) + +#define XEHP_HDC_CHICKEN0 XE_REG_MCR(0xe5f0, XE_REG_OPTION_MASKED) +#define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) +#define DIS_ATOMIC_CHAINING_TYPED_WRITES REG_BIT(3) + +#define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8) +#define DISABLE_D8_D16_COASLESCE REG_BIT(30) +#define TGM_WRITE_EOM_FORCE REG_BIT(17) +#define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15) +#define SEQUENTIAL_ACCESS_UPGRADE_DISABLE REG_BIT(13) + +#define LSC_CHICKEN_BIT_0_UDW XE_REG_MCR(0xe7c8 + 4) +#define UGM_FRAGMENT_THRESHOLD_TO_3 REG_BIT(58 - 32) +#define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32) +#define XE2_ALLOC_DPA_STARVE_FIX_DIS REG_BIT(47 - 32) +#define ENABLE_SMP_LD_RENDER_SURFACE_CONTROL REG_BIT(44 - 32) +#define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32) +#define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32) +#define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32) +#define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32) + +#define SARB_CHICKEN1 XE_REG_MCR(0xe90c) +#define COMP_CKN_IN REG_GENMASK(30, 29) + +#define RCU_MODE XE_REG(0x14800, XE_REG_OPTION_MASKED) +#define RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1) +#define RCU_MODE_CCS_ENABLE REG_BIT(0) + +/* + * Total of 4 cslices, where each cslice is in the form: + * [0-3] CCS ID + * [4-6] RSVD + * [7] Disabled + */ +#define CCS_MODE XE_REG(0x14804) +#define CCS_MODE_CSLICE_0_3_MASK REG_GENMASK(11, 0) /* 3 bits per cslice */ +#define CCS_MODE_CSLICE_MASK 0x7 /* CCS0-3 + rsvd */ +#define CCS_MODE_CSLICE_WIDTH ilog2(CCS_MODE_CSLICE_MASK + 1) +#define CCS_MODE_CSLICE(cslice, ccs) \ + ((ccs) << ((cslice) * CCS_MODE_CSLICE_WIDTH)) + +#define FORCEWAKE_ACK_GT XE_REG(0x130044) +#define FORCEWAKE_KERNEL BIT(0) +#define FORCEWAKE_USER BIT(1) +#define FORCEWAKE_KERNEL_FALLBACK BIT(15) + +#define MTL_MEDIA_PERF_LIMIT_REASONS XE_REG(0x138030) +#define MTL_MEDIA_MC6 XE_REG(0x138048) + +#define GT_CORE_STATUS XE_REG(0x138060) +#define RCN_MASK REG_GENMASK(2, 0) +#define GT_C0 0 +#define GT_C6 3 + +#define GT_GFX_RC6_LOCKED XE_REG(0x138104) +#define GT_GFX_RC6 XE_REG(0x138108) + +#define GT0_PERF_LIMIT_REASONS XE_REG(0x1381a8) +#define GT0_PERF_LIMIT_REASONS_MASK 0xde3 +#define PROCHOT_MASK REG_BIT(0) +#define THERMAL_LIMIT_MASK REG_BIT(1) +#define RATL_MASK REG_BIT(5) +#define VR_THERMALERT_MASK REG_BIT(6) +#define VR_TDC_MASK REG_BIT(7) +#define POWER_LIMIT_4_MASK REG_BIT(8) +#define POWER_LIMIT_1_MASK REG_BIT(10) +#define POWER_LIMIT_2_MASK REG_BIT(11) + +#define GT_PERF_STATUS XE_REG(0x1381b4) +#define VOLTAGE_MASK REG_GENMASK(10, 0) + +#define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4)) + +#define RENDER_COPY_INTR_ENABLE XE_REG(0x190030) +#define VCS_VECS_INTR_ENABLE XE_REG(0x190034) +#define GUC_SG_INTR_ENABLE XE_REG(0x190038) +#define ENGINE1_MASK REG_GENMASK(31, 16) +#define ENGINE0_MASK REG_GENMASK(15, 0) +#define GPM_WGBOXPERF_INTR_ENABLE XE_REG(0x19003c) +#define GUNIT_GSC_INTR_ENABLE XE_REG(0x190044) +#define CCS_RSVD_INTR_ENABLE XE_REG(0x190048) + +#define INTR_IDENTITY_REG(x) XE_REG(0x190060 + ((x) * 4)) +#define INTR_DATA_VALID REG_BIT(31) +#define INTR_ENGINE_INSTANCE(x) REG_FIELD_GET(GENMASK(25, 20), x) +#define INTR_ENGINE_CLASS(x) REG_FIELD_GET(GENMASK(18, 16), x) +#define INTR_ENGINE_INTR(x) REG_FIELD_GET(GENMASK(15, 0), x) +#define OTHER_GUC_INSTANCE 0 +#define OTHER_GSC_INSTANCE 6 + +#define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4)) +#define RCS0_RSVD_INTR_MASK XE_REG(0x190090) +#define BCS_RSVD_INTR_MASK XE_REG(0x1900a0) +#define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8) +#define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac) +#define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0) +#define GUC_SG_INTR_MASK XE_REG(0x1900e8) +#define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec) +#define GUNIT_GSC_INTR_MASK XE_REG(0x1900f4) +#define CCS0_CCS1_INTR_MASK XE_REG(0x190100) +#define CCS2_CCS3_INTR_MASK XE_REG(0x190104) +#define XEHPC_BCS1_BCS2_INTR_MASK XE_REG(0x190110) +#define XEHPC_BCS3_BCS4_INTR_MASK XE_REG(0x190114) +#define XEHPC_BCS5_BCS6_INTR_MASK XE_REG(0x190118) +#define XEHPC_BCS7_BCS8_INTR_MASK XE_REG(0x19011c) +#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) +#define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8) +#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4) +#define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3) +#define GT_RENDER_USER_INTERRUPT REG_BIT(0) + +#define PVC_GT0_PACKAGE_ENERGY_STATUS XE_REG(0x281004) +#define PVC_GT0_PACKAGE_RAPL_LIMIT XE_REG(0x281008) +#define PVC_GT0_PACKAGE_POWER_SKU_UNIT XE_REG(0x281068) +#define PVC_GT0_PLATFORM_ENERGY_STATUS XE_REG(0x28106c) +#define PVC_GT0_PACKAGE_POWER_SKU XE_REG(0x281080) + +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h new file mode 100644 index 000000000000..92320bbc9d3d --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_REGS_H_ +#define _XE_GUC_REGS_H_ + +#include <linux/compiler.h> +#include <linux/types.h> + +#include "regs/xe_reg_defs.h" + +/* Definitions of GuC H/W registers, bits, etc */ + +#define DIST_DBS_POPULATED XE_REG(0xd08) +#define DOORBELLS_PER_SQIDI_MASK REG_GENMASK(23, 16) +#define SQIDIS_DOORBELL_EXIST_MASK REG_GENMASK(15, 0) + +#define DRBREGL(x) XE_REG(0x1000 + (x) * 8) +#define DRB_VALID REG_BIT(0) +#define DRBREGU(x) XE_REG(0x1000 + (x) * 8 + 4) + +#define GTCR XE_REG(0x4274) +#define GTCR_INVALIDATE REG_BIT(0) + +#define GUC_ARAT_C6DIS XE_REG(0xa178) + +#define GUC_STATUS XE_REG(0xc000) +#define GS_AUTH_STATUS_MASK REG_GENMASK(31, 30) +#define GS_AUTH_STATUS_BAD REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x1) +#define GS_AUTH_STATUS_GOOD REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x2) +#define GS_MIA_MASK REG_GENMASK(18, 16) +#define GS_MIA_CORE_STATE REG_FIELD_PREP(GS_MIA_MASK, 0x1) +#define GS_MIA_HALT_REQUESTED REG_FIELD_PREP(GS_MIA_MASK, 0x2) +#define GS_MIA_ISR_ENTRY REG_FIELD_PREP(GS_MIA_MASK, 0x4) +#define GS_UKERNEL_MASK REG_GENMASK(15, 8) +#define GS_BOOTROM_MASK REG_GENMASK(7, 1) +#define GS_BOOTROM_RSA_FAILED REG_FIELD_PREP(GS_BOOTROM_MASK, 0x50) +#define GS_BOOTROM_JUMP_PASSED REG_FIELD_PREP(GS_BOOTROM_MASK, 0x76) +#define GS_MIA_IN_RESET REG_BIT(0) + +#define GUC_WOPCM_SIZE XE_REG(0xc050) +#define GUC_WOPCM_SIZE_MASK REG_GENMASK(31, 12) +#define GUC_WOPCM_SIZE_LOCKED REG_BIT(0) + +#define GUC_SHIM_CONTROL XE_REG(0xc064) +#define GUC_MOCS_INDEX_MASK REG_GENMASK(27, 24) +#define GUC_SHIM_WC_ENABLE REG_BIT(21) +#define GUC_ENABLE_MIA_CLOCK_GATING REG_BIT(15) +#define GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA REG_BIT(10) +#define GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA REG_BIT(9) +#define GUC_MSGCH_ENABLE REG_BIT(4) +#define GUC_ENABLE_MIA_CACHING REG_BIT(2) +#define GUC_ENABLE_READ_CACHE_LOGIC REG_BIT(1) +#define GUC_DISABLE_SRAM_INIT_TO_ZEROES REG_BIT(0) + +#define SOFT_SCRATCH(n) XE_REG(0xc180 + (n) * 4) +#define SOFT_SCRATCH_COUNT 16 + +#define HUC_KERNEL_LOAD_INFO XE_REG(0xc1dc) +#define HUC_LOAD_SUCCESSFUL REG_BIT(0) + +#define UOS_RSA_SCRATCH(i) XE_REG(0xc200 + (i) * 4) +#define UOS_RSA_SCRATCH_COUNT 64 + +#define DMA_ADDR_0_LOW XE_REG(0xc300) +#define DMA_ADDR_0_HIGH XE_REG(0xc304) +#define DMA_ADDR_1_LOW XE_REG(0xc308) +#define DMA_ADDR_1_HIGH XE_REG(0xc30c) +#define DMA_ADDR_SPACE_MASK REG_GENMASK(20, 16) +#define DMA_ADDRESS_SPACE_WOPCM REG_FIELD_PREP(DMA_ADDR_SPACE_MASK, 7) +#define DMA_ADDRESS_SPACE_GGTT REG_FIELD_PREP(DMA_ADDR_SPACE_MASK, 8) +#define DMA_COPY_SIZE XE_REG(0xc310) +#define DMA_CTRL XE_REG(0xc314) +#define HUC_UKERNEL REG_BIT(9) +#define UOS_MOVE REG_BIT(4) +#define START_DMA REG_BIT(0) +#define DMA_GUC_WOPCM_OFFSET XE_REG(0xc340) +#define GUC_WOPCM_OFFSET_SHIFT 14 +#define GUC_WOPCM_OFFSET_MASK REG_GENMASK(31, GUC_WOPCM_OFFSET_SHIFT) +#define HUC_LOADING_AGENT_GUC REG_BIT(1) +#define GUC_WOPCM_OFFSET_VALID REG_BIT(0) +#define GUC_MAX_IDLE_COUNT XE_REG(0xc3e4) + +#define GUC_SEND_INTERRUPT XE_REG(0xc4c8) +#define GUC_SEND_TRIGGER REG_BIT(0) + +#define GUC_BCS_RCS_IER XE_REG(0xc550) +#define GUC_VCS2_VCS1_IER XE_REG(0xc554) +#define GUC_WD_VECS_IER XE_REG(0xc558) +#define GUC_PM_P24C_IER XE_REG(0xc55c) + +#define GUC_TLB_INV_CR XE_REG(0xcee8) +#define GUC_TLB_INV_CR_INVALIDATE REG_BIT(0) + +#define HUC_STATUS2 XE_REG(0xd3b0) +#define HUC_FW_VERIFIED REG_BIT(7) + +#define GT_PM_CONFIG XE_REG(0x13816c) +#define GT_DOORBELL_ENABLE REG_BIT(0) + +#define GUC_HOST_INTERRUPT XE_REG(0x1901f0) + +#define VF_SW_FLAG(n) XE_REG(0x190240 + (n) * 4) +#define VF_SW_FLAG_COUNT 4 + +#define MED_GUC_HOST_INTERRUPT XE_REG(0x190304) + +#define MED_VF_SW_FLAG(n) XE_REG(0x190310 + (n) * 4) +#define MED_VF_SW_FLAG_COUNT 4 + +/* GuC Interrupt Vector */ +#define GUC_INTR_GUC2HOST REG_BIT(15) +#define GUC_INTR_EXEC_ERROR REG_BIT(14) +#define GUC_INTR_DISPLAY_EVENT REG_BIT(13) +#define GUC_INTR_SEM_SIG REG_BIT(12) +#define GUC_INTR_IOMMU2GUC REG_BIT(11) +#define GUC_INTR_DOORBELL_RANG REG_BIT(10) +#define GUC_INTR_DMA_DONE REG_BIT(9) +#define GUC_INTR_FATAL_ERROR REG_BIT(8) +#define GUC_INTR_NOTIF_ERROR REG_BIT(7) +#define GUC_INTR_SW_INT_6 REG_BIT(6) +#define GUC_INTR_SW_INT_5 REG_BIT(5) +#define GUC_INTR_SW_INT_4 REG_BIT(4) +#define GUC_INTR_SW_INT_3 REG_BIT(3) +#define GUC_INTR_SW_INT_2 REG_BIT(2) +#define GUC_INTR_SW_INT_1 REG_BIT(1) +#define GUC_INTR_SW_INT_0 REG_BIT(0) + +#define GUC_NUM_DOORBELLS 256 + +/* format of the HW-monitored doorbell cacheline */ +struct guc_doorbell_info { + u32 db_status; +#define GUC_DOORBELL_DISABLED 0 +#define GUC_DOORBELL_ENABLED 1 + + u32 cookie; + u32 reserved[14]; +} __packed; + +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h new file mode 100644 index 000000000000..4be81abc86ad --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_LRC_LAYOUT_H_ +#define _XE_LRC_LAYOUT_H_ + +#define CTX_CONTEXT_CONTROL (0x02 + 1) +#define CTX_RING_HEAD (0x04 + 1) +#define CTX_RING_TAIL (0x06 + 1) +#define CTX_RING_START (0x08 + 1) +#define CTX_RING_CTL (0x0a + 1) +#define CTX_PDP0_UDW (0x30 + 1) +#define CTX_PDP0_LDW (0x32 + 1) + +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h new file mode 100644 index 000000000000..519dd1067a19 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_MCHBAR_REGS_H_ +#define _XE_MCHBAR_REGS_H_ + +#include "regs/xe_reg_defs.h" + +/* + * MCHBAR mirror. + * + * This mirrors the MCHBAR MMIO space whose location is determined by + * device 0 function 0's pci config register 0x44 or 0x48 and matches it in + * every way. + */ + +#define MCHBAR_MIRROR_BASE_SNB 0x140000 + +#define PCU_CR_PACKAGE_POWER_SKU XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5930) +#define PKG_TDP GENMASK_ULL(14, 0) +#define PKG_MIN_PWR GENMASK_ULL(30, 16) +#define PKG_MAX_PWR GENMASK_ULL(46, 32) +#define PKG_MAX_WIN GENMASK_ULL(54, 48) +#define PKG_MAX_WIN_X GENMASK_ULL(54, 53) +#define PKG_MAX_WIN_Y GENMASK_ULL(52, 48) + + +#define PCU_CR_PACKAGE_POWER_SKU_UNIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5938) +#define PKG_PWR_UNIT REG_GENMASK(3, 0) +#define PKG_ENERGY_UNIT REG_GENMASK(12, 8) +#define PKG_TIME_UNIT REG_GENMASK(19, 16) + +#define PCU_CR_PACKAGE_ENERGY_STATUS XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x593c) + +#define PCU_CR_PACKAGE_RAPL_LIMIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0) +#define PKG_PWR_LIM_1 REG_GENMASK(14, 0) +#define PKG_PWR_LIM_1_EN REG_BIT(15) +#define PKG_PWR_LIM_1_TIME REG_GENMASK(23, 17) +#define PKG_PWR_LIM_1_TIME_X REG_GENMASK(23, 22) +#define PKG_PWR_LIM_1_TIME_Y REG_GENMASK(21, 17) + +#endif /* _XE_MCHBAR_REGS_H_ */ diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h new file mode 100644 index 000000000000..c50e7650c09a --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_REG_DEFS_H_ +#define _XE_REG_DEFS_H_ + +#include "compat-i915-headers/i915_reg_defs.h" + +/** + * struct xe_reg - Register definition + * + * Register defintion to be used by the individual register. Although the same + * definition is used for xe_reg and xe_reg_mcr, they use different internal + * APIs for accesses. + */ +struct xe_reg { + union { + struct { + /** @addr: address */ + u32 addr:28; + /** + * @masked: register is "masked", with upper 16bits used + * to identify the bits that are updated on the lower + * bits + */ + u32 masked:1; + /** + * @mcr: register is multicast/replicated in the + * hardware and needs special handling. Any register + * with this set should also use a type of xe_reg_mcr_t. + * It's only here so the few places that deal with MCR + * registers specially (xe_sr.c) and tests using the raw + * value can inspect it. + */ + u32 mcr:1; + /** + * @ext: access MMIO extension space for current register. + */ + u32 ext:1; + }; + /** @raw: Raw value with both address and options */ + u32 raw; + }; +}; + +/** + * struct xe_reg_mcr - MCR register definition + * + * MCR register is the same as a regular register, but uses another type since + * the internal API used for accessing them is different: it's never correct to + * use regular MMIO access. + */ +struct xe_reg_mcr { + /** @__reg: The register */ + struct xe_reg __reg; +}; + + +/** + * XE_REG_OPTION_MASKED - Register is "masked", with upper 16 bits marking the + * written bits on the lower 16 bits. + * + * It only applies to registers explicitly marked in bspec with + * "Access: Masked". Registers with this option can have write operations to + * specific lower bits by setting the corresponding upper bits. Other bits will + * not be affected. This allows register writes without needing a RMW cycle and + * without caching in software the register value. + * + * Example: a write with value 0x00010001 will set bit 0 and all other bits + * retain their previous values. + * + * To be used with XE_REG(). XE_REG_MCR() and XE_REG_INITIALIZER() + */ +#define XE_REG_OPTION_MASKED .masked = 1 + +/** + * XE_REG_INITIALIZER - Initializer for xe_reg_t. + * @r_: Register offset + * @...: Additional options like access mode. See struct xe_reg for available + * options. + * + * Register field is mandatory, and additional options may be passed as + * arguments. Usually ``XE_REG()`` should be preferred since it creates an + * object of the right type. However when initializing static const storage, + * where a compound statement is not allowed, this can be used instead. + */ +#define XE_REG_INITIALIZER(r_, ...) { .addr = r_, __VA_ARGS__ } + + +/** + * XE_REG - Create a struct xe_reg from offset and additional flags + * @r_: Register offset + * @...: Additional options like access mode. See struct xe_reg for available + * options. + */ +#define XE_REG(r_, ...) ((const struct xe_reg)XE_REG_INITIALIZER(r_, ##__VA_ARGS__)) + +/** + * XE_REG_EXT - Create a struct xe_reg from extension offset and additional + * flags + * @r_: Register extension offset + * @...: Additional options like access mode. See struct xe_reg for available + * options. + */ +#define XE_REG_EXT(r_, ...) \ + ((const struct xe_reg)XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .ext = 1)) + +/** + * XE_REG_MCR - Create a struct xe_reg_mcr from offset and additional flags + * @r_: Register offset + * @...: Additional options like access mode. See struct xe_reg for available + * options. + */ +#define XE_REG_MCR(r_, ...) ((const struct xe_reg_mcr){ \ + .__reg = XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .mcr = 1) \ + }) + +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h new file mode 100644 index 000000000000..2c214bb9b671 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ +#ifndef _XE_REGS_H_ +#define _XE_REGS_H_ + +#include "regs/xe_reg_defs.h" + +#define TIMESTAMP_OVERRIDE XE_REG(0x44074) +#define TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK REG_GENMASK(15, 12) +#define TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK REG_GENMASK(9, 0) + +#define PCU_IRQ_OFFSET 0x444e0 +#define GU_MISC_IRQ_OFFSET 0x444f0 +#define GU_MISC_GSE REG_BIT(27) + +#define SOFTWARE_FLAGS_SPR33 XE_REG(0x4f084) + +#define GU_CNTL_PROTECTED XE_REG(0x10100C) +#define DRIVERINT_FLR_DIS REG_BIT(31) + +#define GU_CNTL XE_REG(0x101010) +#define LMEM_INIT REG_BIT(7) +#define DRIVERFLR REG_BIT(31) + +#define GU_DEBUG XE_REG(0x101018) +#define DRIVERFLR_STATUS REG_BIT(31) + +#define XEHP_CLOCK_GATE_DIS XE_REG(0x101014) +#define SGSI_SIDECLK_DIS REG_BIT(17) + +#define GGC XE_REG(0x108040) +#define GMS_MASK REG_GENMASK(15, 8) +#define GGMS_MASK REG_GENMASK(7, 6) + +#define DSMBASE XE_REG(0x1080C0) +#define BDSM_MASK REG_GENMASK64(63, 20) + +#define GSMBASE XE_REG(0x108100) + +#define STOLEN_RESERVED XE_REG(0x1082c0) +#define WOPCM_SIZE_MASK REG_GENMASK64(9, 7) + +#define MTL_RP_STATE_CAP XE_REG(0x138000) + +#define MTL_GT_RPE_FREQUENCY XE_REG(0x13800c) + +#define MTL_MEDIAP_STATE_CAP XE_REG(0x138020) +#define MTL_RPN_CAP_MASK REG_GENMASK(24, 16) +#define MTL_RP0_CAP_MASK REG_GENMASK(8, 0) + +#define MTL_MPE_FREQUENCY XE_REG(0x13802c) +#define MTL_RPE_MASK REG_GENMASK(8, 0) + +#define DG1_MSTR_TILE_INTR XE_REG(0x190008) +#define DG1_MSTR_IRQ REG_BIT(31) +#define DG1_MSTR_TILE(t) REG_BIT(t) + +#define GFX_MSTR_IRQ XE_REG(0x190010) +#define MASTER_IRQ REG_BIT(31) +#define GU_MISC_IRQ REG_BIT(29) +#define DISPLAY_IRQ REG_BIT(16) +#define GT_DW_IRQ(x) REG_BIT(x) + +#define PVC_RP_STATE_CAP XE_REG(0x281014) + +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h new file mode 100644 index 000000000000..58a4e0fad1e1 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _REGS_XE_SRIOV_REGS_H_ +#define _REGS_XE_SRIOV_REGS_H_ + +#include "regs/xe_reg_defs.h" + +#define XE2_LMEM_CFG XE_REG(0x48b0) + +#define LMEM_CFG XE_REG(0xcf58) +#define LMEM_EN REG_BIT(31) +#define LMTT_DIR_PTR REG_GENMASK(30, 0) /* in multiples of 64KB */ + +#endif diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile new file mode 100644 index 000000000000..39d8a0892274 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_DRM_XE_KUNIT_TEST) += \ + xe_bo_test.o \ + xe_dma_buf_test.o \ + xe_migrate_test.o \ + xe_mocs_test.o \ + xe_pci_test.o \ + xe_rtp_test.o \ + xe_wa_test.o diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c new file mode 100644 index 000000000000..412b2e7ce40c --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -0,0 +1,353 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include <kunit/test.h> +#include <kunit/visibility.h> + +#include "tests/xe_bo_test.h" +#include "tests/xe_pci_test.h" +#include "tests/xe_test.h" + +#include "xe_bo_evict.h" +#include "xe_pci.h" +#include "xe_pm.h" + +static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, + bool clear, u64 get_val, u64 assign_val, + struct kunit *test) +{ + struct dma_fence *fence; + struct ttm_tt *ttm; + struct page *page; + pgoff_t ccs_page; + long timeout; + u64 *cpu_map; + int ret; + u32 offset; + + /* Move bo to VRAM if not already there. */ + ret = xe_bo_validate(bo, NULL, false); + if (ret) { + KUNIT_FAIL(test, "Failed to validate bo.\n"); + return ret; + } + + /* Optionally clear bo *and* CCS data in VRAM. */ + if (clear) { + fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource); + if (IS_ERR(fence)) { + KUNIT_FAIL(test, "Failed to submit bo clear.\n"); + return PTR_ERR(fence); + } + dma_fence_put(fence); + } + + /* Evict to system. CCS data should be copied. */ + ret = xe_bo_evict(bo, true); + if (ret) { + KUNIT_FAIL(test, "Failed to evict bo.\n"); + return ret; + } + + /* Sync all migration blits */ + timeout = dma_resv_wait_timeout(bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL, + true, + 5 * HZ); + if (timeout <= 0) { + KUNIT_FAIL(test, "Failed to sync bo eviction.\n"); + return -ETIME; + } + + /* + * Bo with CCS data is now in system memory. Verify backing store + * and data integrity. Then assign for the next testing round while + * we still have a CPU map. + */ + ttm = bo->ttm.ttm; + if (!ttm || !ttm_tt_is_populated(ttm)) { + KUNIT_FAIL(test, "Bo was not in expected placement.\n"); + return -EINVAL; + } + + ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT; + if (ccs_page >= ttm->num_pages) { + KUNIT_FAIL(test, "No TTM CCS pages present.\n"); + return -EINVAL; + } + + page = ttm->pages[ccs_page]; + cpu_map = kmap_local_page(page); + + /* Check first CCS value */ + if (cpu_map[0] != get_val) { + KUNIT_FAIL(test, + "Expected CCS readout 0x%016llx, got 0x%016llx.\n", + (unsigned long long)get_val, + (unsigned long long)cpu_map[0]); + ret = -EINVAL; + } + + /* Check last CCS value, or at least last value in page. */ + offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size); + offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1; + if (cpu_map[offset] != get_val) { + KUNIT_FAIL(test, + "Expected CCS readout 0x%016llx, got 0x%016llx.\n", + (unsigned long long)get_val, + (unsigned long long)cpu_map[offset]); + ret = -EINVAL; + } + + cpu_map[0] = assign_val; + cpu_map[offset] = assign_val; + kunmap_local(cpu_map); + + return ret; +} + +static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, + struct kunit *test) +{ + struct xe_bo *bo; + + int ret; + + /* TODO: Sanity check */ + unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile); + + if (IS_DGFX(xe)) + kunit_info(test, "Testing vram id %u\n", tile->id); + else + kunit_info(test, "Testing system memory\n"); + + bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, + ttm_bo_type_device, bo_flags); + + xe_bo_lock(bo, false); + + if (IS_ERR(bo)) { + KUNIT_FAIL(test, "Failed to create bo.\n"); + return; + } + + kunit_info(test, "Verifying that CCS data is cleared on creation.\n"); + ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL, + test); + if (ret) + goto out_unlock; + + kunit_info(test, "Verifying that CCS data survives migration.\n"); + ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL, + 0xdeadbeefdeadbeefULL, test); + if (ret) + goto out_unlock; + + kunit_info(test, "Verifying that CCS data can be properly cleared.\n"); + ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test); + +out_unlock: + xe_bo_unlock(bo); + xe_bo_put(bo); +} + +static int ccs_test_run_device(struct xe_device *xe) +{ + struct kunit *test = xe_cur_kunit(); + struct xe_tile *tile; + int id; + + if (!xe_device_has_flat_ccs(xe)) { + kunit_info(test, "Skipping non-flat-ccs device.\n"); + return 0; + } + + xe_device_mem_access_get(xe); + + for_each_tile(tile, xe, id) { + /* For igfx run only for primary tile */ + if (!IS_DGFX(xe) && id > 0) + continue; + ccs_test_run_tile(xe, tile, test); + } + + xe_device_mem_access_put(xe); + + return 0; +} + +void xe_ccs_migrate_kunit(struct kunit *test) +{ + xe_call_for_each_device(ccs_test_run_device); +} +EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit); + +static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test) +{ + struct xe_bo *bo, *external; + unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile); + struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate); + struct xe_gt *__gt; + int err, i, id; + + kunit_info(test, "Testing device %s vram id %u\n", + dev_name(xe->drm.dev), tile->id); + + for (i = 0; i < 2; ++i) { + xe_vm_lock(vm, false); + bo = xe_bo_create_user(xe, NULL, vm, 0x10000, + DRM_XE_GEM_CPU_CACHING_WC, + ttm_bo_type_device, + bo_flags); + xe_vm_unlock(vm); + if (IS_ERR(bo)) { + KUNIT_FAIL(test, "bo create err=%pe\n", bo); + break; + } + + external = xe_bo_create_user(xe, NULL, NULL, 0x10000, + DRM_XE_GEM_CPU_CACHING_WC, + ttm_bo_type_device, bo_flags); + if (IS_ERR(external)) { + KUNIT_FAIL(test, "external bo create err=%pe\n", external); + goto cleanup_bo; + } + + xe_bo_lock(external, false); + err = xe_bo_pin_external(external); + xe_bo_unlock(external); + if (err) { + KUNIT_FAIL(test, "external bo pin err=%pe\n", + ERR_PTR(err)); + goto cleanup_external; + } + + err = xe_bo_evict_all(xe); + if (err) { + KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err)); + goto cleanup_all; + } + + for_each_gt(__gt, xe, id) + xe_gt_sanitize(__gt); + err = xe_bo_restore_kernel(xe); + /* + * Snapshotting the CTB and copying back a potentially old + * version seems risky, depending on what might have been + * inflight. Also it seems snapshotting the ADS object and + * copying back results in serious breakage. Normally when + * calling xe_bo_restore_kernel() we always fully restart the + * GT, which re-intializes such things. We could potentially + * skip saving and restoring such objects in xe_bo_evict_all() + * however seems quite fragile not to also restart the GT. Try + * to do that here by triggering a GT reset. + */ + for_each_gt(__gt, xe, id) { + xe_gt_reset_async(__gt); + flush_work(&__gt->reset.worker); + } + if (err) { + KUNIT_FAIL(test, "restore kernel err=%pe\n", + ERR_PTR(err)); + goto cleanup_all; + } + + err = xe_bo_restore_user(xe); + if (err) { + KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err)); + goto cleanup_all; + } + + if (!xe_bo_is_vram(external)) { + KUNIT_FAIL(test, "external bo is not vram\n"); + err = -EPROTO; + goto cleanup_all; + } + + if (xe_bo_is_vram(bo)) { + KUNIT_FAIL(test, "bo is vram\n"); + err = -EPROTO; + goto cleanup_all; + } + + if (i) { + down_read(&vm->lock); + xe_vm_lock(vm, false); + err = xe_bo_validate(bo, bo->vm, false); + xe_vm_unlock(vm); + up_read(&vm->lock); + if (err) { + KUNIT_FAIL(test, "bo valid err=%pe\n", + ERR_PTR(err)); + goto cleanup_all; + } + xe_bo_lock(external, false); + err = xe_bo_validate(external, NULL, false); + xe_bo_unlock(external); + if (err) { + KUNIT_FAIL(test, "external bo valid err=%pe\n", + ERR_PTR(err)); + goto cleanup_all; + } + } + + xe_bo_lock(external, false); + xe_bo_unpin_external(external); + xe_bo_unlock(external); + + xe_bo_put(external); + + xe_bo_lock(bo, false); + __xe_bo_unset_bulk_move(bo); + xe_bo_unlock(bo); + xe_bo_put(bo); + continue; + +cleanup_all: + xe_bo_lock(external, false); + xe_bo_unpin_external(external); + xe_bo_unlock(external); +cleanup_external: + xe_bo_put(external); +cleanup_bo: + xe_bo_lock(bo, false); + __xe_bo_unset_bulk_move(bo); + xe_bo_unlock(bo); + xe_bo_put(bo); + break; + } + + xe_vm_put(vm); + + return 0; +} + +static int evict_test_run_device(struct xe_device *xe) +{ + struct kunit *test = xe_cur_kunit(); + struct xe_tile *tile; + int id; + + if (!IS_DGFX(xe)) { + kunit_info(test, "Skipping non-discrete device %s.\n", + dev_name(xe->drm.dev)); + return 0; + } + + xe_device_mem_access_get(xe); + + for_each_tile(tile, xe, id) + evict_test_run_tile(xe, tile, test); + + xe_device_mem_access_put(xe); + + return 0; +} + +void xe_bo_evict_kunit(struct kunit *test) +{ + xe_call_for_each_device(evict_test_run_device); +} +EXPORT_SYMBOL_IF_KUNIT(xe_bo_evict_kunit); diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c new file mode 100644 index 000000000000..f408f17f2164 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_bo_test.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_bo_test.h" + +#include <kunit/test.h> + +static struct kunit_case xe_bo_tests[] = { + KUNIT_CASE(xe_ccs_migrate_kunit), + KUNIT_CASE(xe_bo_evict_kunit), + {} +}; + +static struct kunit_suite xe_bo_test_suite = { + .name = "xe_bo", + .test_cases = xe_bo_tests, +}; + +kunit_test_suite(xe_bo_test_suite); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("xe_bo kunit test"); +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.h b/drivers/gpu/drm/xe/tests/xe_bo_test.h new file mode 100644 index 000000000000..0113ab45066a --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_bo_test.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 AND MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_BO_TEST_H_ +#define _XE_BO_TEST_H_ + +struct kunit; + +void xe_ccs_migrate_kunit(struct kunit *test); +void xe_bo_evict_kunit(struct kunit *test); + +#endif diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c new file mode 100644 index 000000000000..9f6d571d7fa9 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include <drm/xe_drm.h> + +#include <kunit/test.h> +#include <kunit/visibility.h> + +#include "tests/xe_dma_buf_test.h" +#include "tests/xe_pci_test.h" + +#include "xe_pci.h" + +static bool p2p_enabled(struct dma_buf_test_params *params) +{ + return IS_ENABLED(CONFIG_PCI_P2PDMA) && params->attach_ops && + params->attach_ops->allow_peer2peer; +} + +static bool is_dynamic(struct dma_buf_test_params *params) +{ + return IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY) && params->attach_ops && + params->attach_ops->move_notify; +} + +static void check_residency(struct kunit *test, struct xe_bo *exported, + struct xe_bo *imported, struct dma_buf *dmabuf) +{ + struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv); + u32 mem_type; + int ret; + + xe_bo_assert_held(exported); + xe_bo_assert_held(imported); + + mem_type = XE_PL_VRAM0; + if (!(params->mem_mask & XE_BO_CREATE_VRAM0_BIT)) + /* No VRAM allowed */ + mem_type = XE_PL_TT; + else if (params->force_different_devices && !p2p_enabled(params)) + /* No P2P */ + mem_type = XE_PL_TT; + else if (params->force_different_devices && !is_dynamic(params) && + (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) + /* Pin migrated to TT */ + mem_type = XE_PL_TT; + + if (!xe_bo_is_mem_type(exported, mem_type)) { + KUNIT_FAIL(test, "Exported bo was not in expected memory type.\n"); + return; + } + + if (xe_bo_is_pinned(exported)) + return; + + /* + * Evict exporter. Note that the gem object dma_buf member isn't + * set from xe_gem_prime_export(), and it's needed for the move_notify() + * functionality, so hack that up here. Evicting the exported bo will + * evict also the imported bo through the move_notify() functionality if + * importer is on a different device. If they're on the same device, + * the exporter and the importer should be the same bo. + */ + swap(exported->ttm.base.dma_buf, dmabuf); + ret = xe_bo_evict(exported, true); + swap(exported->ttm.base.dma_buf, dmabuf); + if (ret) { + if (ret != -EINTR && ret != -ERESTARTSYS) + KUNIT_FAIL(test, "Evicting exporter failed with err=%d.\n", + ret); + return; + } + + /* Verify that also importer has been evicted to SYSTEM */ + if (exported != imported && !xe_bo_is_mem_type(imported, XE_PL_SYSTEM)) { + KUNIT_FAIL(test, "Importer wasn't properly evicted.\n"); + return; + } + + /* Re-validate the importer. This should move also exporter in. */ + ret = xe_bo_validate(imported, NULL, false); + if (ret) { + if (ret != -EINTR && ret != -ERESTARTSYS) + KUNIT_FAIL(test, "Validating importer failed with err=%d.\n", + ret); + return; + } + + /* + * If on different devices, the exporter is kept in system if + * possible, saving a migration step as the transfer is just + * likely as fast from system memory. + */ + if (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT) + KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT)); + else + KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type)); + + if (params->force_different_devices) + KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(imported, XE_PL_TT)); + else + KUNIT_EXPECT_TRUE(test, exported == imported); +} + +static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) +{ + struct kunit *test = xe_cur_kunit(); + struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv); + struct drm_gem_object *import; + struct dma_buf *dmabuf; + struct xe_bo *bo; + size_t size; + + /* No VRAM on this device? */ + if (!ttm_manager_type(&xe->ttm, XE_PL_VRAM0) && + (params->mem_mask & XE_BO_CREATE_VRAM0_BIT)) + return; + + size = PAGE_SIZE; + if ((params->mem_mask & XE_BO_CREATE_VRAM0_BIT) && + xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) + size = SZ_64K; + + kunit_info(test, "running %s\n", __func__); + bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC, + ttm_bo_type_device, XE_BO_CREATE_USER_BIT | params->mem_mask); + if (IS_ERR(bo)) { + KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", + PTR_ERR(bo)); + return; + } + + dmabuf = xe_gem_prime_export(&bo->ttm.base, 0); + if (IS_ERR(dmabuf)) { + KUNIT_FAIL(test, "xe_gem_prime_export() failed with err=%ld\n", + PTR_ERR(dmabuf)); + goto out; + } + + import = xe_gem_prime_import(&xe->drm, dmabuf); + if (!IS_ERR(import)) { + struct xe_bo *import_bo = gem_to_xe_bo(import); + + /* + * Did import succeed when it shouldn't due to lack of p2p support? + */ + if (params->force_different_devices && + !p2p_enabled(params) && + !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) { + KUNIT_FAIL(test, + "xe_gem_prime_import() succeeded when it shouldn't have\n"); + } else { + int err; + + /* Is everything where we expect it to be? */ + xe_bo_lock(import_bo, false); + err = xe_bo_validate(import_bo, NULL, false); + + /* Pinning in VRAM is not allowed. */ + if (!is_dynamic(params) && + params->force_different_devices && + !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) + KUNIT_EXPECT_EQ(test, err, -EINVAL); + /* Otherwise only expect interrupts or success. */ + else if (err && err != -EINTR && err != -ERESTARTSYS) + KUNIT_EXPECT_TRUE(test, !err || err == -EINTR || + err == -ERESTARTSYS); + + if (!err) + check_residency(test, bo, import_bo, dmabuf); + xe_bo_unlock(import_bo); + } + drm_gem_object_put(import); + } else if (PTR_ERR(import) != -EOPNOTSUPP) { + /* Unexpected error code. */ + KUNIT_FAIL(test, + "xe_gem_prime_import failed with the wrong err=%ld\n", + PTR_ERR(import)); + } else if (!params->force_different_devices || + p2p_enabled(params) || + (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) { + /* Shouldn't fail if we can reuse same bo, use p2p or use system */ + KUNIT_FAIL(test, "dynamic p2p attachment failed with err=%ld\n", + PTR_ERR(import)); + } + dma_buf_put(dmabuf); +out: + drm_gem_object_put(&bo->ttm.base); +} + +static const struct dma_buf_attach_ops nop2p_attach_ops = { + .allow_peer2peer = false, + .move_notify = xe_dma_buf_move_notify +}; + +/* + * We test the implementation with bos of different residency and with + * importers with different capabilities; some lacking p2p support and some + * lacking dynamic capabilities (attach_ops == NULL). We also fake + * different devices avoiding the import shortcut that just reuses the same + * gem object. + */ +static const struct dma_buf_test_params test_params[] = { + {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &xe_dma_buf_attach_ops}, + {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &xe_dma_buf_attach_ops, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &nop2p_attach_ops}, + {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &nop2p_attach_ops, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_VRAM0_BIT}, + {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + .attach_ops = &xe_dma_buf_attach_ops}, + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + .attach_ops = &xe_dma_buf_attach_ops, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + .attach_ops = &nop2p_attach_ops}, + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + .attach_ops = &nop2p_attach_ops, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT}, + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &xe_dma_buf_attach_ops}, + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &xe_dma_buf_attach_ops, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &nop2p_attach_ops}, + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &nop2p_attach_ops, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT}, + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + .force_different_devices = true}, + + {} +}; + +static int dma_buf_run_device(struct xe_device *xe) +{ + const struct dma_buf_test_params *params; + struct kunit *test = xe_cur_kunit(); + + for (params = test_params; params->mem_mask; ++params) { + struct dma_buf_test_params p = *params; + + p.base.id = XE_TEST_LIVE_DMA_BUF; + test->priv = &p; + xe_test_dmabuf_import_same_driver(xe); + } + + /* A non-zero return would halt iteration over driver devices */ + return 0; +} + +void xe_dma_buf_kunit(struct kunit *test) +{ + xe_call_for_each_device(dma_buf_run_device); +} +EXPORT_SYMBOL_IF_KUNIT(xe_dma_buf_kunit); diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c new file mode 100644 index 000000000000..9f5a9cda8c0f --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_dma_buf_test.h" + +#include <kunit/test.h> + +static struct kunit_case xe_dma_buf_tests[] = { + KUNIT_CASE(xe_dma_buf_kunit), + {} +}; + +static struct kunit_suite xe_dma_buf_test_suite = { + .name = "xe_dma_buf", + .test_cases = xe_dma_buf_tests, +}; + +kunit_test_suite(xe_dma_buf_test_suite); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("xe_dma_buf kunit test"); +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h new file mode 100644 index 000000000000..e6b464ddd526 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 AND MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_DMA_BUF_TEST_H_ +#define _XE_DMA_BUF_TEST_H_ + +struct kunit; + +void xe_dma_buf_kunit(struct kunit *test); + +#endif diff --git a/drivers/gpu/drm/xe/tests/xe_lmtt_test.c b/drivers/gpu/drm/xe/tests/xe_lmtt_test.c new file mode 100644 index 000000000000..1f1557c45ae1 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_lmtt_test.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <kunit/test.h> + +static const struct lmtt_ops_param { + const char *desc; + const struct xe_lmtt_ops *ops; +} lmtt_ops_params[] = { + { "2-level", &lmtt_2l_ops, }, + { "multi-level", &lmtt_ml_ops, }, +}; + +static void lmtt_ops_param_get_desc(const struct lmtt_ops_param *p, char *desc) +{ + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s", p->desc); +} + +KUNIT_ARRAY_PARAM(lmtt_ops, lmtt_ops_params, lmtt_ops_param_get_desc); + +static void test_ops(struct kunit *test) +{ + const struct lmtt_ops_param *p = test->param_value; + const struct xe_lmtt_ops *ops = p->ops; + unsigned int n; + + KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_root_pd_level); + KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_num); + KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_size); + KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_shift); + KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_index); + KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_encode); + + KUNIT_EXPECT_NE(test, ops->lmtt_root_pd_level(), 0); + + for (n = 0; n <= ops->lmtt_root_pd_level(); n++) { + KUNIT_EXPECT_NE_MSG(test, ops->lmtt_pte_num(n), 0, + "level=%u", n); + KUNIT_EXPECT_NE_MSG(test, ops->lmtt_pte_size(n), 0, + "level=%u", n); + KUNIT_EXPECT_NE_MSG(test, ops->lmtt_pte_encode(0, n), LMTT_PTE_INVALID, + "level=%u", n); + } + + for (n = 0; n < ops->lmtt_root_pd_level(); n++) { + u64 addr = BIT_ULL(ops->lmtt_pte_shift(n)); + + KUNIT_EXPECT_NE_MSG(test, ops->lmtt_pte_shift(n), 0, + "level=%u", n); + KUNIT_EXPECT_EQ_MSG(test, ops->lmtt_pte_index(addr - 1, n), 0, + "addr=%#llx level=%u", addr, n); + KUNIT_EXPECT_EQ_MSG(test, ops->lmtt_pte_index(addr + 1, n), 1, + "addr=%#llx level=%u", addr, n); + KUNIT_EXPECT_EQ_MSG(test, ops->lmtt_pte_index(addr * 2 - 1, n), 1, + "addr=%#llx level=%u", addr, n); + KUNIT_EXPECT_EQ_MSG(test, ops->lmtt_pte_index(addr * 2, n), 2, + "addr=%#llx level=%u", addr, n); + } +} + +static struct kunit_case lmtt_test_cases[] = { + KUNIT_CASE_PARAM(test_ops, lmtt_ops_gen_params), + {} +}; + +static struct kunit_suite lmtt_suite = { + .name = "lmtt", + .test_cases = lmtt_test_cases, +}; + +kunit_test_suites(&lmtt_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c new file mode 100644 index 000000000000..7a32faa2f688 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -0,0 +1,444 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020-2022 Intel Corporation + */ + +#include <kunit/test.h> +#include <kunit/visibility.h> + +#include "tests/xe_migrate_test.h" +#include "tests/xe_pci_test.h" + +#include "xe_pci.h" + +static bool sanity_fence_failed(struct xe_device *xe, struct dma_fence *fence, + const char *str, struct kunit *test) +{ + long ret; + + if (IS_ERR(fence)) { + KUNIT_FAIL(test, "Failed to create fence for %s: %li\n", str, + PTR_ERR(fence)); + return true; + } + if (!fence) + return true; + + ret = dma_fence_wait_timeout(fence, false, 5 * HZ); + if (ret <= 0) { + KUNIT_FAIL(test, "Fence timed out for %s: %li\n", str, ret); + return true; + } + + return false; +} + +static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe, + struct xe_bb *bb, u32 second_idx, const char *str, + struct kunit *test) +{ + u64 batch_base = xe_migrate_batch_base(m, xe->info.has_usm); + struct xe_sched_job *job = xe_bb_create_migration_job(m->q, bb, + batch_base, + second_idx); + struct dma_fence *fence; + + if (IS_ERR(job)) { + KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", + PTR_ERR(job)); + return PTR_ERR(job); + } + + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + if (sanity_fence_failed(xe, fence, str, test)) + return -ETIMEDOUT; + + dma_fence_put(fence); + kunit_info(test, "%s: Job completed\n", str); + return 0; +} + +static void +sanity_populate_cb(struct xe_migrate_pt_update *pt_update, + struct xe_tile *tile, struct iosys_map *map, void *dst, + u32 qword_ofs, u32 num_qwords, + const struct xe_vm_pgtable_update *update) +{ + struct migrate_test_params *p = + to_migrate_test_params(xe_cur_kunit_priv(XE_TEST_LIVE_MIGRATE)); + int i; + u64 *ptr = dst; + u64 value; + + for (i = 0; i < num_qwords; i++) { + value = (qword_ofs + i - update->ofs) * 0x1111111111111111ULL; + if (map) + xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * + sizeof(u64), u64, value); + else + ptr[i] = value; + } + + kunit_info(xe_cur_kunit(), "Used %s.\n", map ? "CPU" : "GPU"); + if (p->force_gpu && map) + KUNIT_FAIL(xe_cur_kunit(), "GPU pagetable update used CPU.\n"); +} + +static const struct xe_migrate_pt_update_ops sanity_ops = { + .populate = sanity_populate_cb, +}; + +#define check(_retval, _expected, str, _test) \ + do { if ((_retval) != (_expected)) { \ + KUNIT_FAIL(_test, "Sanity check failed: " str \ + " expected %llx, got %llx\n", \ + (u64)(_expected), (u64)(_retval)); \ + } } while (0) + +static void test_copy(struct xe_migrate *m, struct xe_bo *bo, + struct kunit *test, u32 region) +{ + struct xe_device *xe = tile_to_xe(m->tile); + u64 retval, expected = 0; + bool big = bo->size >= SZ_2M; + struct dma_fence *fence; + const char *str = big ? "Copying big bo" : "Copying small bo"; + int err; + + struct xe_bo *remote = xe_bo_create_locked(xe, m->tile, NULL, + bo->size, + ttm_bo_type_kernel, + region | + XE_BO_NEEDS_CPU_ACCESS); + if (IS_ERR(remote)) { + KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %li\n", + str, PTR_ERR(remote)); + return; + } + + err = xe_bo_validate(remote, NULL, false); + if (err) { + KUNIT_FAIL(test, "Failed to validate system bo for %s: %li\n", + str, err); + goto out_unlock; + } + + err = xe_bo_vmap(remote); + if (err) { + KUNIT_FAIL(test, "Failed to vmap system bo for %s: %li\n", + str, err); + goto out_unlock; + } + + xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); + fence = xe_migrate_clear(m, remote, remote->ttm.resource); + if (!sanity_fence_failed(xe, fence, big ? "Clearing remote big bo" : + "Clearing remote small bo", test)) { + retval = xe_map_rd(xe, &remote->vmap, 0, u64); + check(retval, expected, "remote first offset should be cleared", + test); + retval = xe_map_rd(xe, &remote->vmap, remote->size - 8, u64); + check(retval, expected, "remote last offset should be cleared", + test); + } + dma_fence_put(fence); + + /* Try to copy 0xc0 from remote to vram with 2MB or 64KiB/4KiB pages */ + xe_map_memset(xe, &remote->vmap, 0, 0xc0, remote->size); + xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size); + + expected = 0xc0c0c0c0c0c0c0c0; + fence = xe_migrate_copy(m, remote, bo, remote->ttm.resource, + bo->ttm.resource, false); + if (!sanity_fence_failed(xe, fence, big ? "Copying big bo remote -> vram" : + "Copying small bo remote -> vram", test)) { + retval = xe_map_rd(xe, &bo->vmap, 0, u64); + check(retval, expected, + "remote -> vram bo first offset should be copied", test); + retval = xe_map_rd(xe, &bo->vmap, bo->size - 8, u64); + check(retval, expected, + "remote -> vram bo offset should be copied", test); + } + dma_fence_put(fence); + + /* And other way around.. slightly hacky.. */ + xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); + xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size); + + fence = xe_migrate_copy(m, bo, remote, bo->ttm.resource, + remote->ttm.resource, false); + if (!sanity_fence_failed(xe, fence, big ? "Copying big bo vram -> remote" : + "Copying small bo vram -> remote", test)) { + retval = xe_map_rd(xe, &remote->vmap, 0, u64); + check(retval, expected, + "vram -> remote bo first offset should be copied", test); + retval = xe_map_rd(xe, &remote->vmap, bo->size - 8, u64); + check(retval, expected, + "vram -> remote bo last offset should be copied", test); + } + dma_fence_put(fence); + + xe_bo_vunmap(remote); +out_unlock: + xe_bo_unlock(remote); + xe_bo_put(remote); +} + +static void test_copy_sysmem(struct xe_migrate *m, struct xe_bo *bo, + struct kunit *test) +{ + test_copy(m, bo, test, XE_BO_CREATE_SYSTEM_BIT); +} + +static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo, + struct kunit *test) +{ + u32 region; + + if (bo->ttm.resource->mem_type == XE_PL_SYSTEM) + return; + + if (bo->ttm.resource->mem_type == XE_PL_VRAM0) + region = XE_BO_CREATE_VRAM1_BIT; + else + region = XE_BO_CREATE_VRAM0_BIT; + test_copy(m, bo, test, region); +} + +static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt, + struct kunit *test, bool force_gpu) +{ + struct xe_device *xe = tile_to_xe(m->tile); + struct dma_fence *fence; + u64 retval, expected; + ktime_t then, now; + int i; + + struct xe_vm_pgtable_update update = { + .ofs = 1, + .qwords = 0x10, + .pt_bo = pt, + }; + struct xe_migrate_pt_update pt_update = { + .ops = &sanity_ops, + }; + struct migrate_test_params p = { + .base.id = XE_TEST_LIVE_MIGRATE, + .force_gpu = force_gpu, + }; + + test->priv = &p; + /* Test xe_migrate_update_pgtables() updates the pagetable as expected */ + expected = 0xf0f0f0f0f0f0f0f0ULL; + xe_map_memset(xe, &pt->vmap, 0, (u8)expected, pt->size); + + then = ktime_get(); + fence = xe_migrate_update_pgtables(m, m->q->vm, NULL, m->q, &update, 1, + NULL, 0, &pt_update); + now = ktime_get(); + if (sanity_fence_failed(xe, fence, "Migration pagetable update", test)) + return; + + kunit_info(test, "Updating without syncing took %llu us,\n", + (unsigned long long)ktime_to_us(ktime_sub(now, then))); + + dma_fence_put(fence); + retval = xe_map_rd(xe, &pt->vmap, 0, u64); + check(retval, expected, "PTE[0] must stay untouched", test); + + for (i = 0; i < update.qwords; i++) { + retval = xe_map_rd(xe, &pt->vmap, (update.ofs + i) * 8, u64); + check(retval, i * 0x1111111111111111ULL, "PTE update", test); + } + + retval = xe_map_rd(xe, &pt->vmap, 8 * (update.ofs + update.qwords), + u64); + check(retval, expected, "PTE[0x11] must stay untouched", test); +} + +static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) +{ + struct xe_tile *tile = m->tile; + struct xe_device *xe = tile_to_xe(tile); + struct xe_bo *pt, *bo = m->pt_bo, *big, *tiny; + struct xe_res_cursor src_it; + struct dma_fence *fence; + u64 retval, expected; + struct xe_bb *bb; + int err; + u8 id = tile->id; + + err = xe_bo_vmap(bo); + if (err) { + KUNIT_FAIL(test, "Failed to vmap our pagetables: %li\n", + PTR_ERR(bo)); + return; + } + + big = xe_bo_create_pin_map(xe, tile, m->q->vm, SZ_4M, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(big)) { + KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big)); + goto vunmap; + } + + pt = xe_bo_create_pin_map(xe, tile, m->q->vm, XE_PAGE_SIZE, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(pt)) { + KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", + PTR_ERR(pt)); + goto free_big; + } + + tiny = xe_bo_create_pin_map(xe, tile, m->q->vm, + 2 * SZ_4K, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(tiny)) { + KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", + PTR_ERR(pt)); + goto free_pt; + } + + bb = xe_bb_new(tile->primary_gt, 32, xe->info.has_usm); + if (IS_ERR(bb)) { + KUNIT_FAIL(test, "Failed to create batchbuffer: %li\n", + PTR_ERR(bb)); + goto free_tiny; + } + + kunit_info(test, "Starting tests, top level PT addr: %lx, special pagetable base addr: %lx\n", + (unsigned long)xe_bo_main_addr(m->q->vm->pt_root[id]->bo, XE_PAGE_SIZE), + (unsigned long)xe_bo_main_addr(m->pt_bo, XE_PAGE_SIZE)); + + /* First part of the test, are we updating our pagetable bo with a new entry? */ + xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64, + 0xdeaddeadbeefbeef); + expected = m->q->vm->pt_ops->pte_encode_bo(pt, 0, xe->pat.idx[XE_CACHE_WB], 0); + if (m->q->vm->flags & XE_VM_FLAG_64K) + expected |= XE_PTE_PS64; + if (xe_bo_is_vram(pt)) + xe_res_first(pt->ttm.resource, 0, pt->size, &src_it); + else + xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it); + + emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), false, + &src_it, XE_PAGE_SIZE, pt); + + run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test); + + retval = xe_map_rd(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), + u64); + check(retval, expected, "PTE entry write", test); + + /* Now try to write data to our newly mapped 'pagetable', see if it succeeds */ + bb->len = 0; + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + xe_map_wr(xe, &pt->vmap, 0, u32, 0xdeaddead); + expected = 0; + + emit_clear(tile->primary_gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4, + IS_DGFX(xe)); + run_sanity_job(m, xe, bb, 1, "Writing to our newly mapped pagetable", + test); + + retval = xe_map_rd(xe, &pt->vmap, 0, u32); + check(retval, expected, "Write to PT after adding PTE", test); + + /* Sanity checks passed, try the full ones! */ + + /* Clear a small bo */ + kunit_info(test, "Clearing small buffer object\n"); + xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size); + expected = 0; + fence = xe_migrate_clear(m, tiny, tiny->ttm.resource); + if (sanity_fence_failed(xe, fence, "Clearing small bo", test)) + goto out; + + dma_fence_put(fence); + retval = xe_map_rd(xe, &tiny->vmap, 0, u32); + check(retval, expected, "Command clear small first value", test); + retval = xe_map_rd(xe, &tiny->vmap, tiny->size - 4, u32); + check(retval, expected, "Command clear small last value", test); + + kunit_info(test, "Copying small buffer object to system\n"); + test_copy_sysmem(m, tiny, test); + if (xe->info.tile_count > 1) { + kunit_info(test, "Copying small buffer object to other vram\n"); + test_copy_vram(m, tiny, test); + } + + /* Clear a big bo */ + kunit_info(test, "Clearing big buffer object\n"); + xe_map_memset(xe, &big->vmap, 0, 0x11, big->size); + expected = 0; + fence = xe_migrate_clear(m, big, big->ttm.resource); + if (sanity_fence_failed(xe, fence, "Clearing big bo", test)) + goto out; + + dma_fence_put(fence); + retval = xe_map_rd(xe, &big->vmap, 0, u32); + check(retval, expected, "Command clear big first value", test); + retval = xe_map_rd(xe, &big->vmap, big->size - 4, u32); + check(retval, expected, "Command clear big last value", test); + + kunit_info(test, "Copying big buffer object to system\n"); + test_copy_sysmem(m, big, test); + if (xe->info.tile_count > 1) { + kunit_info(test, "Copying big buffer object to other vram\n"); + test_copy_vram(m, big, test); + } + + kunit_info(test, "Testing page table update using CPU if GPU idle.\n"); + test_pt_update(m, pt, test, false); + kunit_info(test, "Testing page table update using GPU\n"); + test_pt_update(m, pt, test, true); + +out: + xe_bb_free(bb, NULL); +free_tiny: + xe_bo_unpin(tiny); + xe_bo_put(tiny); +free_pt: + xe_bo_unpin(pt); + xe_bo_put(pt); +free_big: + xe_bo_unpin(big); + xe_bo_put(big); +vunmap: + xe_bo_vunmap(m->pt_bo); +} + +static int migrate_test_run_device(struct xe_device *xe) +{ + struct kunit *test = xe_cur_kunit(); + struct xe_tile *tile; + int id; + + for_each_tile(tile, xe, id) { + struct xe_migrate *m = tile->migrate; + + kunit_info(test, "Testing tile id %d.\n", id); + xe_vm_lock(m->q->vm, true); + xe_device_mem_access_get(xe); + xe_migrate_sanity_test(m, test); + xe_device_mem_access_put(xe); + xe_vm_unlock(m->q->vm); + } + + return 0; +} + +void xe_migrate_sanity_kunit(struct kunit *test) +{ + xe_call_for_each_device(migrate_test_run_device); +} +EXPORT_SYMBOL_IF_KUNIT(xe_migrate_sanity_kunit); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c new file mode 100644 index 000000000000..cf0c173b945f --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_migrate_test.h" + +#include <kunit/test.h> + +static struct kunit_case xe_migrate_tests[] = { + KUNIT_CASE(xe_migrate_sanity_kunit), + {} +}; + +static struct kunit_suite xe_migrate_test_suite = { + .name = "xe_migrate", + .test_cases = xe_migrate_tests, +}; + +kunit_test_suite(xe_migrate_test_suite); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("xe_migrate kunit test"); +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.h b/drivers/gpu/drm/xe/tests/xe_migrate_test.h new file mode 100644 index 000000000000..7c645c66824f --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 AND MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_MIGRATE_TEST_H_ +#define _XE_MIGRATE_TEST_H_ + +struct kunit; + +void xe_migrate_sanity_kunit(struct kunit *test); + +#endif diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c new file mode 100644 index 000000000000..7dd34f94e809 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include <kunit/test.h> +#include <kunit/visibility.h> + +#include "tests/xe_mocs_test.h" +#include "tests/xe_pci_test.h" +#include "tests/xe_test.h" + +#include "xe_pci.h" +#include "xe_gt.h" +#include "xe_mocs.h" +#include "xe_device.h" + +struct live_mocs { + struct xe_mocs_info table; +}; + +static int live_mocs_init(struct live_mocs *arg, struct xe_gt *gt) +{ + unsigned int flags; + struct kunit *test = xe_cur_kunit(); + + memset(arg, 0, sizeof(*arg)); + + flags = get_mocs_settings(gt_to_xe(gt), &arg->table); + + kunit_info(test, "table size %d", arg->table.size); + kunit_info(test, "table uc_index %d", arg->table.uc_index); + kunit_info(test, "table n_entries %d", arg->table.n_entries); + + return flags; +} + +static void read_l3cc_table(struct xe_gt *gt, + const struct xe_mocs_info *info) +{ + unsigned int i; + u32 l3cc; + u32 reg_val; + u32 ret; + + struct kunit *test = xe_cur_kunit(); + + xe_device_mem_access_get(gt_to_xe(gt)); + ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n"); + mocs_dbg(>_to_xe(gt)->drm, "L3CC entries:%d\n", info->n_entries); + for (i = 0; + i < (info->n_entries + 1) / 2 ? + (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), + get_entry_l3cc(info, 2 * i + 1))), 1 : 0; + i++) { + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) + reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i)); + else + reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i)); + mocs_dbg(>_to_xe(gt)->drm, "%d 0x%x 0x%x 0x%x\n", i, + XELP_LNCFCMOCS(i).addr, reg_val, l3cc); + if (reg_val != l3cc) + KUNIT_FAIL(test, "l3cc reg 0x%x has incorrect val.\n", + XELP_LNCFCMOCS(i).addr); + } + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + xe_device_mem_access_put(gt_to_xe(gt)); +} + +static void read_mocs_table(struct xe_gt *gt, + const struct xe_mocs_info *info) +{ + struct xe_device *xe = gt_to_xe(gt); + + unsigned int i; + u32 mocs; + u32 reg_val; + u32 ret; + + struct kunit *test = xe_cur_kunit(); + + xe_device_mem_access_get(gt_to_xe(gt)); + ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n"); + mocs_dbg(>_to_xe(gt)->drm, "Global MOCS entries:%d\n", info->n_entries); + drm_WARN_ONCE(&xe->drm, !info->unused_entries_index, + "Unused entries index should have been defined\n"); + for (i = 0; + i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0; + i++) { + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) + reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i)); + else + reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i)); + mocs_dbg(>_to_xe(gt)->drm, "%d 0x%x 0x%x 0x%x\n", i, + XELP_GLOBAL_MOCS(i).addr, reg_val, mocs); + if (reg_val != mocs) + KUNIT_FAIL(test, "mocs reg 0x%x has incorrect val.\n", + XELP_GLOBAL_MOCS(i).addr); + } + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + xe_device_mem_access_put(gt_to_xe(gt)); +} + +static int mocs_kernel_test_run_device(struct xe_device *xe) +{ + /* Basic check the system is configured with the expected mocs table */ + + struct live_mocs mocs; + struct xe_gt *gt; + + unsigned int flags; + int id; + + for_each_gt(gt, xe, id) { + flags = live_mocs_init(&mocs, gt); + if (flags & HAS_GLOBAL_MOCS) + read_mocs_table(gt, &mocs.table); + if (flags & HAS_LNCF_MOCS) + read_l3cc_table(gt, &mocs.table); + } + return 0; +} + +void xe_live_mocs_kernel_kunit(struct kunit *test) +{ + xe_call_for_each_device(mocs_kernel_test_run_device); +} +EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_kernel_kunit); diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.c b/drivers/gpu/drm/xe/tests/xe_mocs_test.c new file mode 100644 index 000000000000..ef56bd517b28 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_mocs_test.h" + +#include <kunit/test.h> + +static struct kunit_case xe_mocs_tests[] = { + KUNIT_CASE(xe_live_mocs_kernel_kunit), + {} +}; + +static struct kunit_suite xe_mocs_test_suite = { + .name = "xe_mocs", + .test_cases = xe_mocs_tests, +}; + +kunit_test_suite(xe_mocs_test_suite); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.h b/drivers/gpu/drm/xe/tests/xe_mocs_test.h new file mode 100644 index 000000000000..7faa3575e6c3 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 AND MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_MOCS_TEST_H_ +#define _XE_MOCS_TEST_H_ + +struct kunit; + +void xe_live_mocs_kernel_kunit(struct kunit *test); + +#endif diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c new file mode 100644 index 000000000000..602793644f61 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "tests/xe_pci_test.h" + +#include "tests/xe_test.h" + +#include <kunit/test-bug.h> +#include <kunit/test.h> +#include <kunit/test-bug.h> +#include <kunit/visibility.h> + +struct kunit_test_data { + int ndevs; + xe_device_fn xe_fn; +}; + +static int dev_to_xe_device_fn(struct device *dev, void *__data) + +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct kunit_test_data *data = __data; + int ret = 0; + int idx; + + data->ndevs++; + + if (drm_dev_enter(drm, &idx)) + ret = data->xe_fn(to_xe_device(dev_get_drvdata(dev))); + drm_dev_exit(idx); + + return ret; +} + +/** + * xe_call_for_each_device - Iterate over all devices this driver binds to + * @xe_fn: Function to call for each device. + * + * This function iterated over all devices this driver binds to, and calls + * @xe_fn: for each one of them. If the called function returns anything else + * than 0, iteration is stopped and the return value is returned by this + * function. Across each function call, drm_dev_enter() / drm_dev_exit() is + * called for the corresponding drm device. + * + * Return: Number of devices iterated or + * the error code of a call to @xe_fn returning an error code. + */ +int xe_call_for_each_device(xe_device_fn xe_fn) +{ + int ret; + struct kunit_test_data data = { + .xe_fn = xe_fn, + .ndevs = 0, + }; + + ret = driver_for_each_device(&xe_pci_driver.driver, NULL, + &data, dev_to_xe_device_fn); + + if (!data.ndevs) + kunit_skip(current->kunit_test, "test runs only on hardware\n"); + + return ret ?: data.ndevs; +} + +/** + * xe_call_for_each_graphics_ip - Iterate over all recognized graphics IPs + * @xe_fn: Function to call for each device. + * + * This function iterates over the descriptors for all graphics IPs recognized + * by the driver and calls @xe_fn: for each one of them. + */ +void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn) +{ + const struct xe_graphics_desc *ip, *last = NULL; + + for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) { + ip = graphics_ip_map[i].ip; + if (ip == last) + continue; + + xe_fn(ip); + last = ip; + } +} +EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_graphics_ip); + +/** + * xe_call_for_each_media_ip - Iterate over all recognized media IPs + * @xe_fn: Function to call for each device. + * + * This function iterates over the descriptors for all media IPs recognized + * by the driver and calls @xe_fn: for each one of them. + */ +void xe_call_for_each_media_ip(xe_media_fn xe_fn) +{ + const struct xe_media_desc *ip, *last = NULL; + + for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) { + ip = media_ip_map[i].ip; + if (ip == last) + continue; + + xe_fn(ip); + last = ip; + } +} +EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_media_ip); + +static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, + u32 *ver, u32 *revid) +{ + struct kunit *test = kunit_get_current_test(); + struct xe_pci_fake_data *data = test->priv; + + if (type == GMDID_MEDIA) { + *ver = data->media_verx100; + *revid = xe_step_to_gmdid(data->media_step); + } else { + *ver = data->graphics_verx100; + *revid = xe_step_to_gmdid(data->graphics_step); + } +} + +int xe_pci_fake_device_init(struct xe_device *xe) +{ + struct kunit *test = kunit_get_current_test(); + struct xe_pci_fake_data *data = test->priv; + const struct pci_device_id *ent = pciidlist; + const struct xe_device_desc *desc; + const struct xe_subplatform_desc *subplatform_desc; + + if (!data) { + desc = (const void *)ent->driver_data; + subplatform_desc = NULL; + goto done; + } + + for (ent = pciidlist; ent->device; ent++) { + desc = (const void *)ent->driver_data; + if (desc->platform == data->platform) + break; + } + + if (!ent->device) + return -ENODEV; + + for (subplatform_desc = desc->subplatforms; + subplatform_desc && subplatform_desc->subplatform; + subplatform_desc++) + if (subplatform_desc->subplatform == data->subplatform) + break; + + if (data->subplatform != XE_SUBPLATFORM_NONE && !subplatform_desc) + return -ENODEV; + +done: + kunit_activate_static_stub(test, read_gmdid, fake_read_gmdid); + + xe_info_init_early(xe, desc, subplatform_desc); + xe_info_init(xe, desc->graphics, desc->media); + + return 0; +} +EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init); diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c new file mode 100644 index 000000000000..171e4180f1aa --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2023 Intel Corporation + */ + +#include <drm/drm_drv.h> +#include <drm/drm_kunit_helpers.h> + +#include <kunit/test.h> + +#include "tests/xe_test.h" + +#include "xe_device.h" +#include "xe_pci_test.h" +#include "xe_pci_types.h" + +static void check_graphics_ip(const struct xe_graphics_desc *graphics) +{ + struct kunit *test = xe_cur_kunit(); + u64 mask = graphics->hw_engine_mask; + + /* RCS, CCS, and BCS engines are allowed on the graphics IP */ + mask &= ~(XE_HW_ENGINE_RCS_MASK | + XE_HW_ENGINE_CCS_MASK | + XE_HW_ENGINE_BCS_MASK); + + /* Any remaining engines are an error */ + KUNIT_ASSERT_EQ(test, mask, 0); +} + +static void check_media_ip(const struct xe_media_desc *media) +{ + struct kunit *test = xe_cur_kunit(); + u64 mask = media->hw_engine_mask; + + /* VCS, VECS and GSCCS engines are allowed on the media IP */ + mask &= ~(XE_HW_ENGINE_VCS_MASK | + XE_HW_ENGINE_VECS_MASK | + XE_HW_ENGINE_GSCCS_MASK); + + /* Any remaining engines are an error */ + KUNIT_ASSERT_EQ(test, mask, 0); +} + +static void xe_gmdid_graphics_ip(struct kunit *test) +{ + xe_call_for_each_graphics_ip(check_graphics_ip); +} + +static void xe_gmdid_media_ip(struct kunit *test) +{ + xe_call_for_each_media_ip(check_media_ip); +} + +static struct kunit_case xe_pci_tests[] = { + KUNIT_CASE(xe_gmdid_graphics_ip), + KUNIT_CASE(xe_gmdid_media_ip), + {} +}; + +static struct kunit_suite xe_pci_test_suite = { + .name = "xe_pci", + .test_cases = xe_pci_tests, +}; + +kunit_test_suite(xe_pci_test_suite); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("xe_pci kunit test"); +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h new file mode 100644 index 000000000000..811ffe5bd9fd --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 AND MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_PCI_TEST_H_ +#define _XE_PCI_TEST_H_ + +#include <linux/types.h> + +#include "xe_platform_types.h" + +struct xe_device; +struct xe_graphics_desc; +struct xe_media_desc; + +typedef int (*xe_device_fn)(struct xe_device *); +typedef void (*xe_graphics_fn)(const struct xe_graphics_desc *); +typedef void (*xe_media_fn)(const struct xe_media_desc *); + +int xe_call_for_each_device(xe_device_fn xe_fn); +void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn); +void xe_call_for_each_media_ip(xe_media_fn xe_fn); + +struct xe_pci_fake_data { + enum xe_platform platform; + enum xe_subplatform subplatform; + u32 graphics_verx100; + u32 media_verx100; + u32 graphics_step; + u32 media_step; +}; + +int xe_pci_fake_device_init(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c new file mode 100644 index 000000000000..4a6972897675 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -0,0 +1,319 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2023 Intel Corporation + */ + +#include <linux/string.h> +#include <linux/xarray.h> + +#include <drm/drm_drv.h> +#include <drm/drm_kunit_helpers.h> + +#include <kunit/test.h> + +#include "regs/xe_gt_regs.h" +#include "regs/xe_reg_defs.h" +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_pci_test.h" +#include "xe_reg_sr.h" +#include "xe_rtp.h" + +#define REGULAR_REG1 XE_REG(1) +#define REGULAR_REG2 XE_REG(2) +#define REGULAR_REG3 XE_REG(3) +#define MCR_REG1 XE_REG_MCR(1) +#define MCR_REG2 XE_REG_MCR(2) +#define MCR_REG3 XE_REG_MCR(3) +#define MASKED_REG1 XE_REG(1, XE_REG_OPTION_MASKED) + +#undef XE_REG_MCR +#define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) + +struct rtp_test_case { + const char *name; + struct xe_reg expected_reg; + u32 expected_set_bits; + u32 expected_clr_bits; + unsigned long expected_count; + unsigned int expected_sr_errors; + const struct xe_rtp_entry_sr *entries; +}; + +static bool match_yes(const struct xe_gt *gt, const struct xe_hw_engine *hwe) +{ + return true; +} + +static bool match_no(const struct xe_gt *gt, const struct xe_hw_engine *hwe) +{ + return false; +} + +static const struct rtp_test_case cases[] = { + { + .name = "coalesce-same-reg", + .expected_reg = REGULAR_REG1, + .expected_set_bits = REG_BIT(0) | REG_BIT(1), + .expected_clr_bits = REG_BIT(0) | REG_BIT(1), + .expected_count = 1, + /* Different bits on the same register: create a single entry */ + .entries = (const struct xe_rtp_entry_sr[]) { + { XE_RTP_NAME("basic-1"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + { XE_RTP_NAME("basic-2"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1))) + }, + {} + }, + }, + { + .name = "no-match-no-add", + .expected_reg = REGULAR_REG1, + .expected_set_bits = REG_BIT(0), + .expected_clr_bits = REG_BIT(0), + .expected_count = 1, + /* Don't coalesce second entry since rules don't match */ + .entries = (const struct xe_rtp_entry_sr[]) { + { XE_RTP_NAME("basic-1"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + { XE_RTP_NAME("basic-2"), + XE_RTP_RULES(FUNC(match_no)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1))) + }, + {} + }, + }, + { + .name = "no-match-no-add-multiple-rules", + .expected_reg = REGULAR_REG1, + .expected_set_bits = REG_BIT(0), + .expected_clr_bits = REG_BIT(0), + .expected_count = 1, + /* Don't coalesce second entry due to one of the rules */ + .entries = (const struct xe_rtp_entry_sr[]) { + { XE_RTP_NAME("basic-1"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + { XE_RTP_NAME("basic-2"), + XE_RTP_RULES(FUNC(match_yes), FUNC(match_no)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1))) + }, + {} + }, + }, + { + .name = "two-regs-two-entries", + .expected_reg = REGULAR_REG1, + .expected_set_bits = REG_BIT(0), + .expected_clr_bits = REG_BIT(0), + .expected_count = 2, + /* Same bits on different registers are not coalesced */ + .entries = (const struct xe_rtp_entry_sr[]) { + { XE_RTP_NAME("basic-1"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + { XE_RTP_NAME("basic-2"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG2, REG_BIT(0))) + }, + {} + }, + }, + { + .name = "clr-one-set-other", + .expected_reg = REGULAR_REG1, + .expected_set_bits = REG_BIT(0), + .expected_clr_bits = REG_BIT(1) | REG_BIT(0), + .expected_count = 1, + /* Check clr vs set actions on different bits */ + .entries = (const struct xe_rtp_entry_sr[]) { + { XE_RTP_NAME("basic-1"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + { XE_RTP_NAME("basic-2"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(CLR(REGULAR_REG1, REG_BIT(1))) + }, + {} + }, + }, + { +#define TEMP_MASK REG_GENMASK(10, 8) +#define TEMP_FIELD REG_FIELD_PREP(TEMP_MASK, 2) + .name = "set-field", + .expected_reg = REGULAR_REG1, + .expected_set_bits = TEMP_FIELD, + .expected_clr_bits = TEMP_MASK, + .expected_count = 1, + /* Check FIELD_SET works */ + .entries = (const struct xe_rtp_entry_sr[]) { + { XE_RTP_NAME("basic-1"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(FIELD_SET(REGULAR_REG1, + TEMP_MASK, TEMP_FIELD)) + }, + {} + }, +#undef TEMP_MASK +#undef TEMP_FIELD + }, + { + .name = "conflict-duplicate", + .expected_reg = REGULAR_REG1, + .expected_set_bits = REG_BIT(0), + .expected_clr_bits = REG_BIT(0), + .expected_count = 1, + .expected_sr_errors = 1, + .entries = (const struct xe_rtp_entry_sr[]) { + { XE_RTP_NAME("basic-1"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + /* drop: setting same values twice */ + { XE_RTP_NAME("basic-2"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + {} + }, + }, + { + .name = "conflict-not-disjoint", + .expected_reg = REGULAR_REG1, + .expected_set_bits = REG_BIT(0), + .expected_clr_bits = REG_BIT(0), + .expected_count = 1, + .expected_sr_errors = 1, + .entries = (const struct xe_rtp_entry_sr[]) { + { XE_RTP_NAME("basic-1"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + /* drop: bits are not disjoint with previous entries */ + { XE_RTP_NAME("basic-2"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(CLR(REGULAR_REG1, REG_GENMASK(1, 0))) + }, + {} + }, + }, + { + .name = "conflict-reg-type", + .expected_reg = REGULAR_REG1, + .expected_set_bits = REG_BIT(0), + .expected_clr_bits = REG_BIT(0), + .expected_count = 1, + .expected_sr_errors = 2, + .entries = (const struct xe_rtp_entry_sr[]) { + { XE_RTP_NAME("basic-1"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + /* drop: regular vs MCR */ + { XE_RTP_NAME("basic-2"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(MCR_REG1, REG_BIT(1))) + }, + /* drop: regular vs masked */ + { XE_RTP_NAME("basic-3"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(MASKED_REG1, REG_BIT(0))) + }, + {} + }, + }, +}; + +static void xe_rtp_process_tests(struct kunit *test) +{ + const struct rtp_test_case *param = test->param_value; + struct xe_device *xe = test->priv; + struct xe_gt *gt = xe_device_get_root_tile(xe)->primary_gt; + struct xe_reg_sr *reg_sr = >->reg_sr; + const struct xe_reg_sr_entry *sre, *sr_entry = NULL; + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); + unsigned long idx, count = 0; + + xe_reg_sr_init(reg_sr, "xe_rtp_tests", xe); + xe_rtp_process_to_sr(&ctx, param->entries, reg_sr); + + xa_for_each(®_sr->xa, idx, sre) { + if (idx == param->expected_reg.addr) + sr_entry = sre; + + count++; + } + + KUNIT_EXPECT_EQ(test, count, param->expected_count); + KUNIT_EXPECT_EQ(test, sr_entry->clr_bits, param->expected_clr_bits); + KUNIT_EXPECT_EQ(test, sr_entry->set_bits, param->expected_set_bits); + KUNIT_EXPECT_EQ(test, sr_entry->reg.raw, param->expected_reg.raw); + KUNIT_EXPECT_EQ(test, reg_sr->errors, param->expected_sr_errors); +} + +static void rtp_desc(const struct rtp_test_case *t, char *desc) +{ + strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE); +} + +KUNIT_ARRAY_PARAM(rtp, cases, rtp_desc); + +static int xe_rtp_test_init(struct kunit *test) +{ + struct xe_device *xe; + struct device *dev; + int ret; + + dev = drm_kunit_helper_alloc_device(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + xe = drm_kunit_helper_alloc_drm_device(test, dev, + struct xe_device, + drm, DRIVER_GEM); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe); + + /* Initialize an empty device */ + test->priv = NULL; + ret = xe_pci_fake_device_init(xe); + KUNIT_ASSERT_EQ(test, ret, 0); + + xe->drm.dev = dev; + test->priv = xe; + + return 0; +} + +static void xe_rtp_test_exit(struct kunit *test) +{ + struct xe_device *xe = test->priv; + + drm_kunit_helper_free_device(test, xe->drm.dev); +} + +static struct kunit_case xe_rtp_tests[] = { + KUNIT_CASE_PARAM(xe_rtp_process_tests, rtp_gen_params), + {} +}; + +static struct kunit_suite xe_rtp_test_suite = { + .name = "xe_rtp", + .init = xe_rtp_test_init, + .exit = xe_rtp_test_exit, + .test_cases = xe_rtp_tests, +}; + +kunit_test_suite(xe_rtp_test_suite); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("xe_rtp kunit test"); +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_test.h b/drivers/gpu/drm/xe/tests/xe_test.h new file mode 100644 index 000000000000..7a1ae213e750 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_test.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 AND MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_TEST_H_ +#define _XE_TEST_H_ + +#include <linux/types.h> + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +#include <linux/sched.h> +#include <kunit/test.h> + +/* + * Each test that provides a kunit private test structure, place a test id + * here and point the kunit->priv to an embedded struct xe_test_priv. + */ +enum xe_test_priv_id { + XE_TEST_LIVE_DMA_BUF, + XE_TEST_LIVE_MIGRATE, +}; + +/** + * struct xe_test_priv - Base class for test private info + * @id: enum xe_test_priv_id to identify the subclass. + */ +struct xe_test_priv { + enum xe_test_priv_id id; +}; + +#define XE_TEST_DECLARE(x) x +#define XE_TEST_ONLY(x) unlikely(x) +#define XE_TEST_EXPORT +#define xe_cur_kunit() current->kunit_test + +/** + * xe_cur_kunit_priv - Obtain the struct xe_test_priv pointed to by + * current->kunit->priv if it exists and is embedded in the expected subclass. + * @id: Id of the expected subclass. + * + * Return: NULL if the process is not a kunit test, and NULL if the + * current kunit->priv pointer is not pointing to an object of the expected + * subclass. A pointer to the embedded struct xe_test_priv otherwise. + */ +static inline struct xe_test_priv * +xe_cur_kunit_priv(enum xe_test_priv_id id) +{ + struct xe_test_priv *priv; + + if (!xe_cur_kunit()) + return NULL; + + priv = xe_cur_kunit()->priv; + return priv->id == id ? priv : NULL; +} + +#else /* if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) */ + +#define XE_TEST_DECLARE(x) +#define XE_TEST_ONLY(x) 0 +#define XE_TEST_EXPORT static +#define xe_cur_kunit() NULL +#define xe_cur_kunit_priv(_id) NULL + +#endif +#endif diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c new file mode 100644 index 000000000000..a53c22a19582 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2023 Intel Corporation + */ + +#include <drm/drm_drv.h> +#include <drm/drm_kunit_helpers.h> + +#include <kunit/test.h> + +#include "xe_device.h" +#include "xe_pci_test.h" +#include "xe_reg_sr.h" +#include "xe_tuning.h" +#include "xe_wa.h" + +struct platform_test_case { + const char *name; + enum xe_platform platform; + enum xe_subplatform subplatform; + u32 graphics_verx100; + u32 media_verx100; + struct xe_step_info step; +}; + +#define PLATFORM_CASE(platform__, graphics_step__) \ + { \ + .name = #platform__ " (" #graphics_step__ ")", \ + .platform = XE_ ## platform__, \ + .subplatform = XE_SUBPLATFORM_NONE, \ + .step = { .graphics = STEP_ ## graphics_step__ } \ + } + + +#define SUBPLATFORM_CASE(platform__, subplatform__, graphics_step__) \ + { \ + .name = #platform__ "_" #subplatform__ " (" #graphics_step__ ")", \ + .platform = XE_ ## platform__, \ + .subplatform = XE_SUBPLATFORM_ ## platform__ ## _ ## subplatform__, \ + .step = { .graphics = STEP_ ## graphics_step__ } \ + } + +#define GMDID_CASE(platform__, graphics_verx100__, graphics_step__, \ + media_verx100__, media_step__) \ + { \ + .name = #platform__ " (g:" #graphics_step__ ", m:" #media_step__ ")",\ + .platform = XE_ ## platform__, \ + .subplatform = XE_SUBPLATFORM_NONE, \ + .graphics_verx100 = graphics_verx100__, \ + .media_verx100 = media_verx100__, \ + .step = { .graphics = STEP_ ## graphics_step__, \ + .media = STEP_ ## media_step__ } \ + } + +static const struct platform_test_case cases[] = { + PLATFORM_CASE(TIGERLAKE, B0), + PLATFORM_CASE(DG1, A0), + PLATFORM_CASE(DG1, B0), + PLATFORM_CASE(ALDERLAKE_S, A0), + PLATFORM_CASE(ALDERLAKE_S, B0), + PLATFORM_CASE(ALDERLAKE_S, C0), + PLATFORM_CASE(ALDERLAKE_S, D0), + PLATFORM_CASE(ALDERLAKE_P, A0), + PLATFORM_CASE(ALDERLAKE_P, B0), + PLATFORM_CASE(ALDERLAKE_P, C0), + SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0), + SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0), + SUBPLATFORM_CASE(DG2, G10, A0), + SUBPLATFORM_CASE(DG2, G10, A1), + SUBPLATFORM_CASE(DG2, G10, B0), + SUBPLATFORM_CASE(DG2, G10, C0), + SUBPLATFORM_CASE(DG2, G11, A0), + SUBPLATFORM_CASE(DG2, G11, B0), + SUBPLATFORM_CASE(DG2, G11, B1), + SUBPLATFORM_CASE(DG2, G12, A0), + SUBPLATFORM_CASE(DG2, G12, A1), + PLATFORM_CASE(PVC, B0), + PLATFORM_CASE(PVC, B1), + PLATFORM_CASE(PVC, C0), + GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0), + GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0), + GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0), + GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0), +}; + +static void platform_desc(const struct platform_test_case *t, char *desc) +{ + strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE); +} + +KUNIT_ARRAY_PARAM(platform, cases, platform_desc); + +static int xe_wa_test_init(struct kunit *test) +{ + const struct platform_test_case *param = test->param_value; + struct xe_pci_fake_data data = { + .platform = param->platform, + .subplatform = param->subplatform, + .graphics_verx100 = param->graphics_verx100, + .media_verx100 = param->media_verx100, + .graphics_step = param->step.graphics, + .media_step = param->step.media, + }; + struct xe_device *xe; + struct device *dev; + int ret; + + dev = drm_kunit_helper_alloc_device(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + xe = drm_kunit_helper_alloc_drm_device(test, dev, + struct xe_device, + drm, DRIVER_GEM); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe); + + test->priv = &data; + ret = xe_pci_fake_device_init(xe); + KUNIT_ASSERT_EQ(test, ret, 0); + + if (!param->graphics_verx100) + xe->info.step = param->step; + + /* TODO: init hw engines for engine/LRC WAs */ + xe->drm.dev = dev; + test->priv = xe; + + return 0; +} + +static void xe_wa_test_exit(struct kunit *test) +{ + struct xe_device *xe = test->priv; + + drm_kunit_helper_free_device(test, xe->drm.dev); +} + +static void xe_wa_gt(struct kunit *test) +{ + struct xe_device *xe = test->priv; + struct xe_gt *gt; + int id; + + for_each_gt(gt, xe, id) { + xe_reg_sr_init(>->reg_sr, "GT", xe); + + xe_wa_process_gt(gt); + xe_tuning_process_gt(gt); + + KUNIT_ASSERT_EQ(test, gt->reg_sr.errors, 0); + } +} + +static struct kunit_case xe_wa_tests[] = { + KUNIT_CASE_PARAM(xe_wa_gt, platform_gen_params), + {} +}; + +static struct kunit_suite xe_rtp_test_suite = { + .name = "xe_wa", + .init = xe_wa_test_init, + .exit = xe_wa_test_exit, + .test_cases = xe_wa_tests, +}; + +kunit_test_suite(xe_rtp_test_suite); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("xe_wa kunit test"); +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h new file mode 100644 index 000000000000..34c142e6cfb0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_assert.h @@ -0,0 +1,174 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_ASSERT_H_ +#define _XE_ASSERT_H_ + +#include <linux/string_helpers.h> + +#include <drm/drm_print.h> + +#include "xe_device_types.h" +#include "xe_step.h" + +/** + * DOC: Xe ASSERTs + * + * While Xe driver aims to be simpler than legacy i915 driver it is still + * complex enough that some changes introduced while adding new functionality + * could break the existing code. + * + * Adding &drm_WARN or &drm_err to catch unwanted programming usage could lead + * to undesired increased driver footprint and may impact production driver + * performance as this additional code will be always present. + * + * To allow annotate functions with additional detailed debug checks to assert + * that all prerequisites are satisfied, without worrying about footprint or + * performance penalty on production builds where all potential misuses + * introduced during code integration were already fixed, we introduce family + * of Xe assert macros that try to follow classic assert() utility: + * + * * xe_assert() + * * xe_tile_assert() + * * xe_gt_assert() + * + * These macros are implemented on top of &drm_WARN, but unlikely to the origin, + * warning is triggered when provided condition is false. Additionally all above + * assert macros cannot be used in expressions or as a condition, since + * underlying code will be compiled out on non-debug builds. + * + * Note that these macros are not intended for use to cover known gaps in the + * implementation; for such cases use regular &drm_WARN or &drm_err and provide + * valid safe fallback. + * + * Also in cases where performance or footprint is not an issue, developers + * should continue to use the regular &drm_WARN or &drm_err to ensure that bug + * reports from production builds will contain meaningful diagnostics data. + * + * Below code shows how asserts could help in debug to catch unplanned use:: + * + * static void one_igfx(struct xe_device *xe) + * { + * xe_assert(xe, xe->info.is_dgfx == false); + * xe_assert(xe, xe->info.tile_count == 1); + * } + * + * static void two_dgfx(struct xe_device *xe) + * { + * xe_assert(xe, xe->info.is_dgfx); + * xe_assert(xe, xe->info.tile_count == 2); + * } + * + * void foo(struct xe_device *xe) + * { + * if (xe->info.dgfx) + * return two_dgfx(xe); + * return one_igfx(xe); + * } + * + * void bar(struct xe_device *xe) + * { + * if (drm_WARN_ON(xe->drm, xe->info.tile_count > 2)) + * return; + * + * if (xe->info.tile_count == 2) + * return two_dgfx(xe); + * return one_igfx(xe); + * } + */ + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +#define __xe_assert_msg(xe, condition, msg, arg...) ({ \ + (void)drm_WARN(&(xe)->drm, !(condition), "[" DRM_NAME "] Assertion `%s` failed!\n" msg, \ + __stringify(condition), ## arg); \ +}) +#else +#define __xe_assert_msg(xe, condition, msg, arg...) ({ \ + typecheck(const struct xe_device *, xe); \ + BUILD_BUG_ON_INVALID(condition); \ +}) +#endif + +/** + * xe_assert - warn if condition is false when debugging. + * @xe: the &struct xe_device pointer to which &condition applies + * @condition: condition to check + * + * xe_assert() uses &drm_WARN to emit a warning and print additional information + * that could be read from the &xe pointer if provided &condition is false. + * + * Contrary to &drm_WARN, xe_assert() is effective only on debug builds + * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions + * or as a condition. + * + * See `Xe ASSERTs`_ for general usage guidelines. + */ +#define xe_assert(xe, condition) xe_assert_msg((xe), condition, "") +#define xe_assert_msg(xe, condition, msg, arg...) ({ \ + const struct xe_device *__xe = (xe); \ + __xe_assert_msg(__xe, condition, \ + "platform: %d subplatform: %d\n" \ + "graphics: %s %u.%02u step %s\n" \ + "media: %s %u.%02u step %s\n" \ + msg, \ + __xe->info.platform, __xe->info.subplatform, \ + __xe->info.graphics_name, \ + __xe->info.graphics_verx100 / 100, \ + __xe->info.graphics_verx100 % 100, \ + xe_step_name(__xe->info.step.graphics), \ + __xe->info.media_name, \ + __xe->info.media_verx100 / 100, \ + __xe->info.media_verx100 % 100, \ + xe_step_name(__xe->info.step.media), \ + ## arg); \ +}) + +/** + * xe_tile_assert - warn if condition is false when debugging. + * @tile: the &struct xe_tile pointer to which &condition applies + * @condition: condition to check + * + * xe_tile_assert() uses &drm_WARN to emit a warning and print additional + * information that could be read from the &tile pointer if provided &condition + * is false. + * + * Contrary to &drm_WARN, xe_tile_assert() is effective only on debug builds + * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions + * or as a condition. + * + * See `Xe ASSERTs`_ for general usage guidelines. + */ +#define xe_tile_assert(tile, condition) xe_tile_assert_msg((tile), condition, "") +#define xe_tile_assert_msg(tile, condition, msg, arg...) ({ \ + const struct xe_tile *__tile = (tile); \ + char __buf[10] __maybe_unused; \ + xe_assert_msg(tile_to_xe(__tile), condition, "tile: %u VRAM %s\n" msg, \ + __tile->id, ({ string_get_size(__tile->mem.vram.actual_physical_size, 1, \ + STRING_UNITS_2, __buf, sizeof(__buf)); __buf; }), ## arg); \ +}) + +/** + * xe_gt_assert - warn if condition is false when debugging. + * @gt: the &struct xe_gt pointer to which &condition applies + * @condition: condition to check + * + * xe_gt_assert() uses &drm_WARN to emit a warning and print additional + * information that could be safetely read from the > pointer if provided + * &condition is false. + * + * Contrary to &drm_WARN, xe_gt_assert() is effective only on debug builds + * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions + * or as a condition. + * + * See `Xe ASSERTs`_ for general usage guidelines. + */ +#define xe_gt_assert(gt, condition) xe_gt_assert_msg((gt), condition, "") +#define xe_gt_assert_msg(gt, condition, msg, arg...) ({ \ + const struct xe_gt *__gt = (gt); \ + xe_tile_assert_msg(gt_to_tile(__gt), condition, "GT: %u type %d\n" msg, \ + __gt->info.id, __gt->info.type, ## arg); \ +}) + +#endif diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c new file mode 100644 index 000000000000..7c124475c428 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_bb.h" + +#include "instructions/xe_mi_commands.h" +#include "regs/xe_gpu_commands.h" +#include "xe_device.h" +#include "xe_exec_queue_types.h" +#include "xe_gt.h" +#include "xe_hw_fence.h" +#include "xe_sa.h" +#include "xe_sched_job.h" +#include "xe_vm_types.h" + +static int bb_prefetch(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt)) + /* + * RCS and CCS require 1K, although other engines would be + * okay with 512. + */ + return SZ_1K; + else + return SZ_512; +} + +struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm) +{ + struct xe_tile *tile = gt_to_tile(gt); + struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL); + int err; + + if (!bb) + return ERR_PTR(-ENOMEM); + + /* + * We need to allocate space for the requested number of dwords, + * one additional MI_BATCH_BUFFER_END dword, and additional buffer + * space to accomodate the platform-specific hardware prefetch + * requirements. + */ + bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool, + 4 * (dwords + 1) + bb_prefetch(gt)); + if (IS_ERR(bb->bo)) { + err = PTR_ERR(bb->bo); + goto err; + } + + bb->cs = xe_sa_bo_cpu_addr(bb->bo); + bb->len = 0; + + return bb; +err: + kfree(bb); + return ERR_PTR(err); +} + +static struct xe_sched_job * +__xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr) +{ + u32 size = drm_suballoc_size(bb->bo); + + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + + xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size); + + xe_sa_bo_flush_write(bb->bo); + + return xe_sched_job_create(q, addr); +} + +struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, + struct xe_bb *bb, + u64 batch_base_ofs, + u32 second_idx) +{ + u64 addr[2] = { + batch_base_ofs + drm_suballoc_soffset(bb->bo), + batch_base_ofs + drm_suballoc_soffset(bb->bo) + + 4 * second_idx, + }; + + xe_gt_assert(q->gt, second_idx <= bb->len); + xe_gt_assert(q->gt, q->vm->flags & XE_VM_FLAG_MIGRATION); + + return __xe_bb_create_job(q, bb, addr); +} + +struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, + struct xe_bb *bb) +{ + u64 addr = xe_sa_bo_gpu_addr(bb->bo); + + xe_gt_assert(q->gt, !(q->vm && q->vm->flags & XE_VM_FLAG_MIGRATION)); + return __xe_bb_create_job(q, bb, &addr); +} + +void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence) +{ + if (!bb) + return; + + xe_sa_bo_free(bb->bo, fence); + kfree(bb); +} diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h new file mode 100644 index 000000000000..fafacd73dcc3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bb.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_BB_H_ +#define _XE_BB_H_ + +#include "xe_bb_types.h" + +struct dma_fence; + +struct xe_gt; +struct xe_exec_queue; +struct xe_sched_job; + +struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm); +struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, + struct xe_bb *bb); +struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, + struct xe_bb *bb, u64 batch_ofs, + u32 second_idx); +void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence); + +#endif diff --git a/drivers/gpu/drm/xe/xe_bb_types.h b/drivers/gpu/drm/xe/xe_bb_types.h new file mode 100644 index 000000000000..b7d30308cf90 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bb_types.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_BB_TYPES_H_ +#define _XE_BB_TYPES_H_ + +#include <linux/types.h> + +struct drm_suballoc; + +struct xe_bb { + struct drm_suballoc *bo; + + u32 *cs; + u32 len; /* in dwords */ +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c new file mode 100644 index 000000000000..8e4a3b1f6b93 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -0,0 +1,2269 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_bo.h" + +#include <linux/dma-buf.h> + +#include <drm/drm_drv.h> +#include <drm/drm_gem_ttm_helper.h> +#include <drm/drm_managed.h> +#include <drm/ttm/ttm_device.h> +#include <drm/ttm/ttm_placement.h> +#include <drm/ttm/ttm_tt.h> +#include <drm/xe_drm.h> + +#include "xe_device.h" +#include "xe_dma_buf.h" +#include "xe_drm_client.h" +#include "xe_ggtt.h" +#include "xe_gt.h" +#include "xe_map.h" +#include "xe_migrate.h" +#include "xe_preempt_fence.h" +#include "xe_res_cursor.h" +#include "xe_trace.h" +#include "xe_ttm_stolen_mgr.h" +#include "xe_vm.h" + +static const struct ttm_place sys_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .mem_type = XE_PL_SYSTEM, + .flags = 0, +}; + +static struct ttm_placement sys_placement = { + .num_placement = 1, + .placement = &sys_placement_flags, + .num_busy_placement = 1, + .busy_placement = &sys_placement_flags, +}; + +static const struct ttm_place tt_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .mem_type = XE_PL_TT, + .flags = 0, +}; + +static struct ttm_placement tt_placement = { + .num_placement = 1, + .placement = &tt_placement_flags, + .num_busy_placement = 1, + .busy_placement = &sys_placement_flags, +}; + +bool mem_type_is_vram(u32 mem_type) +{ + return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN; +} + +static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res) +{ + return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe); +} + +static bool resource_is_vram(struct ttm_resource *res) +{ + return mem_type_is_vram(res->mem_type); +} + +bool xe_bo_is_vram(struct xe_bo *bo) +{ + return resource_is_vram(bo->ttm.resource) || + resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource); +} + +bool xe_bo_is_stolen(struct xe_bo *bo) +{ + return bo->ttm.resource->mem_type == XE_PL_STOLEN; +} + +/** + * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR + * @bo: The BO + * + * The stolen memory is accessed through the PCI BAR for both DGFX and some + * integrated platforms that have a dedicated bit in the PTE for devmem (DM). + * + * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise. + */ +bool xe_bo_is_stolen_devmem(struct xe_bo *bo) +{ + return xe_bo_is_stolen(bo) && + GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270; +} + +static bool xe_bo_is_user(struct xe_bo *bo) +{ + return bo->flags & XE_BO_CREATE_USER_BIT; +} + +static struct xe_migrate * +mem_type_to_migrate(struct xe_device *xe, u32 mem_type) +{ + struct xe_tile *tile; + + xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type)); + tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)]; + return tile->migrate; +} + +static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res) +{ + struct xe_device *xe = ttm_to_xe_device(res->bo->bdev); + struct ttm_resource_manager *mgr; + + xe_assert(xe, resource_is_vram(res)); + mgr = ttm_manager_type(&xe->ttm, res->mem_type); + return to_xe_ttm_vram_mgr(mgr)->vram; +} + +static void try_add_system(struct xe_device *xe, struct xe_bo *bo, + u32 bo_flags, u32 *c) +{ + xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); + + if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) { + bo->placements[*c] = (struct ttm_place) { + .mem_type = XE_PL_TT, + }; + *c += 1; + + if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) + bo->props.preferred_mem_type = XE_PL_TT; + } +} + +static void add_vram(struct xe_device *xe, struct xe_bo *bo, + struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c) +{ + struct ttm_place place = { .mem_type = mem_type }; + struct xe_mem_region *vram; + u64 io_size; + + vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram; + xe_assert(xe, vram && vram->usable_size); + io_size = vram->io_size; + + /* + * For eviction / restore on suspend / resume objects + * pinned in VRAM must be contiguous + */ + if (bo_flags & (XE_BO_CREATE_PINNED_BIT | + XE_BO_CREATE_GGTT_BIT)) + place.flags |= TTM_PL_FLAG_CONTIGUOUS; + + if (io_size < vram->usable_size) { + if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) { + place.fpfn = 0; + place.lpfn = io_size >> PAGE_SHIFT; + } else { + place.flags |= TTM_PL_FLAG_TOPDOWN; + } + } + places[*c] = place; + *c += 1; + + if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) + bo->props.preferred_mem_type = mem_type; +} + +static void try_add_vram(struct xe_device *xe, struct xe_bo *bo, + u32 bo_flags, u32 *c) +{ + xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); + + if (bo->props.preferred_gt == XE_GT1) { + if (bo_flags & XE_BO_CREATE_VRAM1_BIT) + add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); + if (bo_flags & XE_BO_CREATE_VRAM0_BIT) + add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); + } else { + if (bo_flags & XE_BO_CREATE_VRAM0_BIT) + add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); + if (bo_flags & XE_BO_CREATE_VRAM1_BIT) + add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); + } +} + +static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, + u32 bo_flags, u32 *c) +{ + xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); + + if (bo_flags & XE_BO_CREATE_STOLEN_BIT) { + bo->placements[*c] = (struct ttm_place) { + .mem_type = XE_PL_STOLEN, + .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT | + XE_BO_CREATE_GGTT_BIT) ? + TTM_PL_FLAG_CONTIGUOUS : 0, + }; + *c += 1; + } +} + +static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, + u32 bo_flags) +{ + u32 c = 0; + + bo->props.preferred_mem_type = XE_BO_PROPS_INVALID; + + /* The order of placements should indicate preferred location */ + + if (bo->props.preferred_mem_class == DRM_XE_MEM_REGION_CLASS_SYSMEM) { + try_add_system(xe, bo, bo_flags, &c); + try_add_vram(xe, bo, bo_flags, &c); + } else { + try_add_vram(xe, bo, bo_flags, &c); + try_add_system(xe, bo, bo_flags, &c); + } + try_add_stolen(xe, bo, bo_flags, &c); + + if (!c) + return -EINVAL; + + bo->placement = (struct ttm_placement) { + .num_placement = c, + .placement = bo->placements, + .num_busy_placement = c, + .busy_placement = bo->placements, + }; + + return 0; +} + +int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, + u32 bo_flags) +{ + xe_bo_assert_held(bo); + return __xe_bo_placement_for_flags(xe, bo, bo_flags); +} + +static void xe_evict_flags(struct ttm_buffer_object *tbo, + struct ttm_placement *placement) +{ + if (!xe_bo_is_xe_bo(tbo)) { + /* Don't handle scatter gather BOs */ + if (tbo->type == ttm_bo_type_sg) { + placement->num_placement = 0; + placement->num_busy_placement = 0; + return; + } + + *placement = sys_placement; + return; + } + + /* + * For xe, sg bos that are evicted to system just triggers a + * rebind of the sg list upon subsequent validation to XE_PL_TT. + */ + switch (tbo->resource->mem_type) { + case XE_PL_VRAM0: + case XE_PL_VRAM1: + case XE_PL_STOLEN: + *placement = tt_placement; + break; + case XE_PL_TT: + default: + *placement = sys_placement; + break; + } +} + +struct xe_ttm_tt { + struct ttm_tt ttm; + struct device *dev; + struct sg_table sgt; + struct sg_table *sg; +}; + +static int xe_tt_map_sg(struct ttm_tt *tt) +{ + struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); + unsigned long num_pages = tt->num_pages; + int ret; + + XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL); + + if (xe_tt->sg) + return 0; + + ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages, + num_pages, 0, + (u64)num_pages << PAGE_SHIFT, + xe_sg_segment_size(xe_tt->dev), + GFP_KERNEL); + if (ret) + return ret; + + xe_tt->sg = &xe_tt->sgt; + ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + if (ret) { + sg_free_table(xe_tt->sg); + xe_tt->sg = NULL; + return ret; + } + + return 0; +} + +struct sg_table *xe_bo_sg(struct xe_bo *bo) +{ + struct ttm_tt *tt = bo->ttm.ttm; + struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); + + return xe_tt->sg; +} + +static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, + u32 page_flags) +{ + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + struct xe_device *xe = xe_bo_device(bo); + struct xe_ttm_tt *tt; + unsigned long extra_pages; + enum ttm_caching caching; + int err; + + tt = kzalloc(sizeof(*tt), GFP_KERNEL); + if (!tt) + return NULL; + + tt->dev = xe->drm.dev; + + extra_pages = 0; + if (xe_bo_needs_ccs_pages(bo)) + extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size), + PAGE_SIZE); + + switch (bo->cpu_caching) { + case DRM_XE_GEM_CPU_CACHING_WC: + caching = ttm_write_combined; + break; + default: + caching = ttm_cached; + break; + } + + WARN_ON((bo->flags & XE_BO_CREATE_USER_BIT) && !bo->cpu_caching); + + /* + * Display scanout is always non-coherent with the CPU cache. + * + * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and + * require a CPU:WC mapping. + */ + if ((!bo->cpu_caching && bo->flags & XE_BO_SCANOUT_BIT) || + (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_PAGETABLE)) + caching = ttm_write_combined; + + err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages); + if (err) { + kfree(tt); + return NULL; + } + + return &tt->ttm; +} + +static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt, + struct ttm_operation_ctx *ctx) +{ + int err; + + /* + * dma-bufs are not populated with pages, and the dma- + * addresses are set up when moved to XE_PL_TT. + */ + if (tt->page_flags & TTM_TT_FLAG_EXTERNAL) + return 0; + + err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx); + if (err) + return err; + + /* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */ + err = xe_tt_map_sg(tt); + if (err) + ttm_pool_free(&ttm_dev->pool, tt); + + return err; +} + +static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt) +{ + struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); + + if (tt->page_flags & TTM_TT_FLAG_EXTERNAL) + return; + + if (xe_tt->sg) { + dma_unmap_sgtable(xe_tt->dev, xe_tt->sg, + DMA_BIDIRECTIONAL, 0); + sg_free_table(xe_tt->sg); + xe_tt->sg = NULL; + } + + return ttm_pool_free(&ttm_dev->pool, tt); +} + +static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt) +{ + ttm_tt_fini(tt); + kfree(tt); +} + +static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, + struct ttm_resource *mem) +{ + struct xe_device *xe = ttm_to_xe_device(bdev); + + switch (mem->mem_type) { + case XE_PL_SYSTEM: + case XE_PL_TT: + return 0; + case XE_PL_VRAM0: + case XE_PL_VRAM1: { + struct xe_ttm_vram_mgr_resource *vres = + to_xe_ttm_vram_mgr_resource(mem); + struct xe_mem_region *vram = res_to_mem_region(mem); + + if (vres->used_visible_size < mem->size) + return -EINVAL; + + mem->bus.offset = mem->start << PAGE_SHIFT; + + if (vram->mapping && + mem->placement & TTM_PL_FLAG_CONTIGUOUS) + mem->bus.addr = (u8 *)vram->mapping + + mem->bus.offset; + + mem->bus.offset += vram->io_start; + mem->bus.is_iomem = true; + +#if !defined(CONFIG_X86) + mem->bus.caching = ttm_write_combined; +#endif + return 0; + } case XE_PL_STOLEN: + return xe_ttm_stolen_io_mem_reserve(xe, mem); + default: + return -EINVAL; + } +} + +static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, + const struct ttm_operation_ctx *ctx) +{ + struct dma_resv_iter cursor; + struct dma_fence *fence; + struct drm_gem_object *obj = &bo->ttm.base; + struct drm_gpuvm_bo *vm_bo; + bool idle = false; + int ret = 0; + + dma_resv_assert_held(bo->ttm.base.resv); + + if (!list_empty(&bo->ttm.base.gpuva.list)) { + dma_resv_iter_begin(&cursor, bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP); + dma_resv_for_each_fence_unlocked(&cursor, fence) + dma_fence_enable_sw_signaling(fence); + dma_resv_iter_end(&cursor); + } + + drm_gem_for_each_gpuvm_bo(vm_bo, obj) { + struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); + struct drm_gpuva *gpuva; + + if (!xe_vm_in_fault_mode(vm)) { + drm_gpuvm_bo_evict(vm_bo, true); + continue; + } + + if (!idle) { + long timeout; + + if (ctx->no_wait_gpu && + !dma_resv_test_signaled(bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP)) + return -EBUSY; + + timeout = dma_resv_wait_timeout(bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP, + ctx->interruptible, + MAX_SCHEDULE_TIMEOUT); + if (!timeout) + return -ETIME; + if (timeout < 0) + return timeout; + + idle = true; + } + + drm_gpuvm_bo_for_each_va(gpuva, vm_bo) { + struct xe_vma *vma = gpuva_to_vma(gpuva); + + trace_xe_vma_evict(vma); + ret = xe_vm_invalidate_vma(vma); + if (XE_WARN_ON(ret)) + return ret; + } + } + + return ret; +} + +/* + * The dma-buf map_attachment() / unmap_attachment() is hooked up here. + * Note that unmapping the attachment is deferred to the next + * map_attachment time, or to bo destroy (after idling) whichever comes first. + * This is to avoid syncing before unmap_attachment(), assuming that the + * caller relies on idling the reservation object before moving the + * backing store out. Should that assumption not hold, then we will be able + * to unconditionally call unmap_attachment() when moving out to system. + */ +static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo, + struct ttm_resource *new_res) +{ + struct dma_buf_attachment *attach = ttm_bo->base.import_attach; + struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt, + ttm); + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + struct sg_table *sg; + + xe_assert(xe, attach); + xe_assert(xe, ttm_bo->ttm); + + if (new_res->mem_type == XE_PL_SYSTEM) + goto out; + + if (ttm_bo->sg) { + dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL); + ttm_bo->sg = NULL; + } + + sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); + if (IS_ERR(sg)) + return PTR_ERR(sg); + + ttm_bo->sg = sg; + xe_tt->sg = sg; + +out: + ttm_bo_move_null(ttm_bo, new_res); + + return 0; +} + +/** + * xe_bo_move_notify - Notify subsystems of a pending move + * @bo: The buffer object + * @ctx: The struct ttm_operation_ctx controlling locking and waits. + * + * This function notifies subsystems of an upcoming buffer move. + * Upon receiving such a notification, subsystems should schedule + * halting access to the underlying pages and optionally add a fence + * to the buffer object's dma_resv object, that signals when access is + * stopped. The caller will wait on all dma_resv fences before + * starting the move. + * + * A subsystem may commence access to the object after obtaining + * bindings to the new backing memory under the object lock. + * + * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode, + * negative error code on error. + */ +static int xe_bo_move_notify(struct xe_bo *bo, + const struct ttm_operation_ctx *ctx) +{ + struct ttm_buffer_object *ttm_bo = &bo->ttm; + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + int ret; + + /* + * If this starts to call into many components, consider + * using a notification chain here. + */ + + if (xe_bo_is_pinned(bo)) + return -EINVAL; + + xe_bo_vunmap(bo); + ret = xe_bo_trigger_rebind(xe, bo, ctx); + if (ret) + return ret; + + /* Don't call move_notify() for imported dma-bufs. */ + if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach) + dma_buf_move_notify(ttm_bo->base.dma_buf); + + return 0; +} + +static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, + struct ttm_operation_ctx *ctx, + struct ttm_resource *new_mem, + struct ttm_place *hop) +{ + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + struct ttm_resource *old_mem = ttm_bo->resource; + u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM; + struct ttm_tt *ttm = ttm_bo->ttm; + struct xe_migrate *migrate = NULL; + struct dma_fence *fence; + bool move_lacks_source; + bool tt_has_data; + bool needs_clear; + bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) && + ttm && ttm_tt_is_populated(ttm)) ? true : false; + int ret = 0; + /* Bo creation path, moving to system or TT. */ + if ((!old_mem && ttm) && !handle_system_ccs) { + ttm_bo_move_null(ttm_bo, new_mem); + return 0; + } + + if (ttm_bo->type == ttm_bo_type_sg) { + ret = xe_bo_move_notify(bo, ctx); + if (!ret) + ret = xe_bo_move_dmabuf(ttm_bo, new_mem); + goto out; + } + + tt_has_data = ttm && (ttm_tt_is_populated(ttm) || + (ttm->page_flags & TTM_TT_FLAG_SWAPPED)); + + move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared) : + (!mem_type_is_vram(old_mem_type) && !tt_has_data); + + needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) || + (!ttm && ttm_bo->type == ttm_bo_type_device); + + if ((move_lacks_source && !needs_clear)) { + ttm_bo_move_null(ttm_bo, new_mem); + goto out; + } + + if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) { + ttm_bo_move_null(ttm_bo, new_mem); + goto out; + } + + /* + * Failed multi-hop where the old_mem is still marked as + * TTM_PL_FLAG_TEMPORARY, should just be a dummy move. + */ + if (old_mem_type == XE_PL_TT && + new_mem->mem_type == XE_PL_TT) { + ttm_bo_move_null(ttm_bo, new_mem); + goto out; + } + + if (!move_lacks_source && !xe_bo_is_pinned(bo)) { + ret = xe_bo_move_notify(bo, ctx); + if (ret) + goto out; + } + + if (old_mem_type == XE_PL_TT && + new_mem->mem_type == XE_PL_SYSTEM) { + long timeout = dma_resv_wait_timeout(ttm_bo->base.resv, + DMA_RESV_USAGE_BOOKKEEP, + true, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) { + ret = timeout; + goto out; + } + + if (!handle_system_ccs) { + ttm_bo_move_null(ttm_bo, new_mem); + goto out; + } + } + + if (!move_lacks_source && + ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) || + (mem_type_is_vram(old_mem_type) && + new_mem->mem_type == XE_PL_SYSTEM))) { + hop->fpfn = 0; + hop->lpfn = 0; + hop->mem_type = XE_PL_TT; + hop->flags = TTM_PL_FLAG_TEMPORARY; + ret = -EMULTIHOP; + goto out; + } + + if (bo->tile) + migrate = bo->tile->migrate; + else if (resource_is_vram(new_mem)) + migrate = mem_type_to_migrate(xe, new_mem->mem_type); + else if (mem_type_is_vram(old_mem_type)) + migrate = mem_type_to_migrate(xe, old_mem_type); + else + migrate = xe->tiles[0].migrate; + + xe_assert(xe, migrate); + + trace_xe_bo_move(bo); + xe_device_mem_access_get(xe); + + if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { + /* + * Kernel memory that is pinned should only be moved on suspend + * / resume, some of the pinned memory is required for the + * device to resume / use the GPU to move other evicted memory + * (user memory) around. This likely could be optimized a bit + * futher where we find the minimum set of pinned memory + * required for resume but for simplity doing a memcpy for all + * pinned memory. + */ + ret = xe_bo_vmap(bo); + if (!ret) { + ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem); + + /* Create a new VMAP once kernel BO back in VRAM */ + if (!ret && resource_is_vram(new_mem)) { + struct xe_mem_region *vram = res_to_mem_region(new_mem); + void *new_addr = vram->mapping + + (new_mem->start << PAGE_SHIFT); + + if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) { + ret = -EINVAL; + xe_device_mem_access_put(xe); + goto out; + } + + xe_assert(xe, new_mem->start == + bo->placements->fpfn); + + iosys_map_set_vaddr_iomem(&bo->vmap, new_addr); + } + } + } else { + if (move_lacks_source) + fence = xe_migrate_clear(migrate, bo, new_mem); + else + fence = xe_migrate_copy(migrate, bo, bo, old_mem, + new_mem, handle_system_ccs); + if (IS_ERR(fence)) { + ret = PTR_ERR(fence); + xe_device_mem_access_put(xe); + goto out; + } + if (!move_lacks_source) { + ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, + true, new_mem); + if (ret) { + dma_fence_wait(fence, false); + ttm_bo_move_null(ttm_bo, new_mem); + ret = 0; + } + } else { + /* + * ttm_bo_move_accel_cleanup() may blow up if + * bo->resource == NULL, so just attach the + * fence and set the new resource. + */ + dma_resv_add_fence(ttm_bo->base.resv, fence, + DMA_RESV_USAGE_KERNEL); + ttm_bo_move_null(ttm_bo, new_mem); + } + + dma_fence_put(fence); + } + + xe_device_mem_access_put(xe); + +out: + return ret; + +} + +/** + * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory + * @bo: The buffer object to move. + * + * On successful completion, the object memory will be moved to sytem memory. + * This function blocks until the object has been fully moved. + * + * This is needed to for special handling of pinned VRAM object during + * suspend-resume. + * + * Return: 0 on success. Negative error code on failure. + */ +int xe_bo_evict_pinned(struct xe_bo *bo) +{ + struct ttm_place place = { + .mem_type = XE_PL_TT, + }; + struct ttm_placement placement = { + .placement = &place, + .num_placement = 1, + }; + struct ttm_operation_ctx ctx = { + .interruptible = false, + }; + struct ttm_resource *new_mem; + int ret; + + xe_bo_assert_held(bo); + + if (WARN_ON(!bo->ttm.resource)) + return -EINVAL; + + if (WARN_ON(!xe_bo_is_pinned(bo))) + return -EINVAL; + + if (WARN_ON(!xe_bo_is_vram(bo))) + return -EINVAL; + + ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx); + if (ret) + return ret; + + if (!bo->ttm.ttm) { + bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0); + if (!bo->ttm.ttm) { + ret = -ENOMEM; + goto err_res_free; + } + } + + ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx); + if (ret) + goto err_res_free; + + ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); + if (ret) + goto err_res_free; + + ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL); + if (ret) + goto err_res_free; + + dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + + return 0; + +err_res_free: + ttm_resource_free(&bo->ttm, &new_mem); + return ret; +} + +/** + * xe_bo_restore_pinned() - Restore a pinned VRAM object + * @bo: The buffer object to move. + * + * On successful completion, the object memory will be moved back to VRAM. + * This function blocks until the object has been fully moved. + * + * This is needed to for special handling of pinned VRAM object during + * suspend-resume. + * + * Return: 0 on success. Negative error code on failure. + */ +int xe_bo_restore_pinned(struct xe_bo *bo) +{ + struct ttm_operation_ctx ctx = { + .interruptible = false, + }; + struct ttm_resource *new_mem; + int ret; + + xe_bo_assert_held(bo); + + if (WARN_ON(!bo->ttm.resource)) + return -EINVAL; + + if (WARN_ON(!xe_bo_is_pinned(bo))) + return -EINVAL; + + if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm)) + return -EINVAL; + + ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx); + if (ret) + return ret; + + ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx); + if (ret) + goto err_res_free; + + ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); + if (ret) + goto err_res_free; + + ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL); + if (ret) + goto err_res_free; + + dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + + return 0; + +err_res_free: + ttm_resource_free(&bo->ttm, &new_mem); + return ret; +} + +static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo, + unsigned long page_offset) +{ + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + struct xe_res_cursor cursor; + struct xe_mem_region *vram; + + if (ttm_bo->resource->mem_type == XE_PL_STOLEN) + return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT; + + vram = res_to_mem_region(ttm_bo->resource); + xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor); + return (vram->io_start + cursor.start) >> PAGE_SHIFT; +} + +static void __xe_bo_vunmap(struct xe_bo *bo); + +/* + * TODO: Move this function to TTM so we don't rely on how TTM does its + * locking, thereby abusing TTM internals. + */ +static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo) +{ + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + bool locked; + + xe_assert(xe, !kref_read(&ttm_bo->kref)); + + /* + * We can typically only race with TTM trylocking under the + * lru_lock, which will immediately be unlocked again since + * the ttm_bo refcount is zero at this point. So trylocking *should* + * always succeed here, as long as we hold the lru lock. + */ + spin_lock(&ttm_bo->bdev->lru_lock); + locked = dma_resv_trylock(ttm_bo->base.resv); + spin_unlock(&ttm_bo->bdev->lru_lock); + xe_assert(xe, locked); + + return locked; +} + +static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo) +{ + struct dma_resv_iter cursor; + struct dma_fence *fence; + struct dma_fence *replacement = NULL; + struct xe_bo *bo; + + if (!xe_bo_is_xe_bo(ttm_bo)) + return; + + bo = ttm_to_xe_bo(ttm_bo); + xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount))); + + /* + * Corner case where TTM fails to allocate memory and this BOs resv + * still points the VMs resv + */ + if (ttm_bo->base.resv != &ttm_bo->base._resv) + return; + + if (!xe_ttm_bo_lock_in_destructor(ttm_bo)) + return; + + /* + * Scrub the preempt fences if any. The unbind fence is already + * attached to the resv. + * TODO: Don't do this for external bos once we scrub them after + * unbind. + */ + dma_resv_for_each_fence(&cursor, ttm_bo->base.resv, + DMA_RESV_USAGE_BOOKKEEP, fence) { + if (xe_fence_is_xe_preempt(fence) && + !dma_fence_is_signaled(fence)) { + if (!replacement) + replacement = dma_fence_get_stub(); + + dma_resv_replace_fences(ttm_bo->base.resv, + fence->context, + replacement, + DMA_RESV_USAGE_BOOKKEEP); + } + } + dma_fence_put(replacement); + + dma_resv_unlock(ttm_bo->base.resv); +} + +static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo) +{ + if (!xe_bo_is_xe_bo(ttm_bo)) + return; + + /* + * Object is idle and about to be destroyed. Release the + * dma-buf attachment. + */ + if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) { + struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, + struct xe_ttm_tt, ttm); + + dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg, + DMA_BIDIRECTIONAL); + ttm_bo->sg = NULL; + xe_tt->sg = NULL; + } +} + +struct ttm_device_funcs xe_ttm_funcs = { + .ttm_tt_create = xe_ttm_tt_create, + .ttm_tt_populate = xe_ttm_tt_populate, + .ttm_tt_unpopulate = xe_ttm_tt_unpopulate, + .ttm_tt_destroy = xe_ttm_tt_destroy, + .evict_flags = xe_evict_flags, + .move = xe_bo_move, + .io_mem_reserve = xe_ttm_io_mem_reserve, + .io_mem_pfn = xe_ttm_io_mem_pfn, + .release_notify = xe_ttm_bo_release_notify, + .eviction_valuable = ttm_bo_eviction_valuable, + .delete_mem_notify = xe_ttm_bo_delete_mem_notify, +}; + +static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) +{ + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + + if (bo->ttm.base.import_attach) + drm_prime_gem_destroy(&bo->ttm.base, NULL); + drm_gem_object_release(&bo->ttm.base); + + xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list)); + + if (bo->ggtt_node.size) + xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo); + +#ifdef CONFIG_PROC_FS + if (bo->client) + xe_drm_client_remove_bo(bo); +#endif + + if (bo->vm && xe_bo_is_user(bo)) + xe_vm_put(bo->vm); + + kfree(bo); +} + +static void xe_gem_object_free(struct drm_gem_object *obj) +{ + /* Our BO reference counting scheme works as follows: + * + * The gem object kref is typically used throughout the driver, + * and the gem object holds a ttm_buffer_object refcount, so + * that when the last gem object reference is put, which is when + * we end up in this function, we put also that ttm_buffer_object + * refcount. Anything using gem interfaces is then no longer + * allowed to access the object in a way that requires a gem + * refcount, including locking the object. + * + * driver ttm callbacks is allowed to use the ttm_buffer_object + * refcount directly if needed. + */ + __xe_bo_vunmap(gem_to_xe_bo(obj)); + ttm_bo_put(container_of(obj, struct ttm_buffer_object, base)); +} + +static void xe_gem_object_close(struct drm_gem_object *obj, + struct drm_file *file_priv) +{ + struct xe_bo *bo = gem_to_xe_bo(obj); + + if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) { + xe_assert(xe_bo_device(bo), xe_bo_is_user(bo)); + + xe_bo_lock(bo, false); + ttm_bo_set_bulk_move(&bo->ttm, NULL); + xe_bo_unlock(bo); + } +} + +static bool should_migrate_to_system(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + + return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic; +} + +static vm_fault_t xe_gem_fault(struct vm_fault *vmf) +{ + struct ttm_buffer_object *tbo = vmf->vma->vm_private_data; + struct drm_device *ddev = tbo->base.dev; + vm_fault_t ret; + int idx, r = 0; + + ret = ttm_bo_vm_reserve(tbo, vmf); + if (ret) + return ret; + + if (drm_dev_enter(ddev, &idx)) { + struct xe_bo *bo = ttm_to_xe_bo(tbo); + + trace_xe_bo_cpu_fault(bo); + + if (should_migrate_to_system(bo)) { + r = xe_bo_migrate(bo, XE_PL_TT); + if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR) + ret = VM_FAULT_NOPAGE; + else if (r) + ret = VM_FAULT_SIGBUS; + } + if (!ret) + ret = ttm_bo_vm_fault_reserved(vmf, + vmf->vma->vm_page_prot, + TTM_BO_VM_NUM_PREFAULT); + drm_dev_exit(idx); + } else { + ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); + } + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) + return ret; + + dma_resv_unlock(tbo->base.resv); + return ret; +} + +static const struct vm_operations_struct xe_gem_vm_ops = { + .fault = xe_gem_fault, + .open = ttm_bo_vm_open, + .close = ttm_bo_vm_close, + .access = ttm_bo_vm_access +}; + +static const struct drm_gem_object_funcs xe_gem_object_funcs = { + .free = xe_gem_object_free, + .close = xe_gem_object_close, + .mmap = drm_gem_ttm_mmap, + .export = xe_gem_prime_export, + .vm_ops = &xe_gem_vm_ops, +}; + +/** + * xe_bo_alloc - Allocate storage for a struct xe_bo + * + * This funcition is intended to allocate storage to be used for input + * to __xe_bo_create_locked(), in the case a pointer to the bo to be + * created is needed before the call to __xe_bo_create_locked(). + * If __xe_bo_create_locked ends up never to be called, then the + * storage allocated with this function needs to be freed using + * xe_bo_free(). + * + * Return: A pointer to an uninitialized struct xe_bo on success, + * ERR_PTR(-ENOMEM) on error. + */ +struct xe_bo *xe_bo_alloc(void) +{ + struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL); + + if (!bo) + return ERR_PTR(-ENOMEM); + + return bo; +} + +/** + * xe_bo_free - Free storage allocated using xe_bo_alloc() + * @bo: The buffer object storage. + * + * Refer to xe_bo_alloc() documentation for valid use-cases. + */ +void xe_bo_free(struct xe_bo *bo) +{ + kfree(bo); +} + +struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, + struct xe_tile *tile, struct dma_resv *resv, + struct ttm_lru_bulk_move *bulk, size_t size, + u16 cpu_caching, enum ttm_bo_type type, + u32 flags) +{ + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + struct ttm_placement *placement; + uint32_t alignment; + size_t aligned_size; + int err; + + /* Only kernel objects should set GT */ + xe_assert(xe, !tile || type == ttm_bo_type_kernel); + + if (XE_WARN_ON(!size)) { + xe_bo_free(bo); + return ERR_PTR(-EINVAL); + } + + if (flags & (XE_BO_CREATE_VRAM_MASK | XE_BO_CREATE_STOLEN_BIT) && + !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) && + xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) { + aligned_size = ALIGN(size, SZ_64K); + if (type != ttm_bo_type_device) + size = ALIGN(size, SZ_64K); + flags |= XE_BO_INTERNAL_64K; + alignment = SZ_64K >> PAGE_SHIFT; + + } else { + aligned_size = ALIGN(size, SZ_4K); + flags &= ~XE_BO_INTERNAL_64K; + alignment = SZ_4K >> PAGE_SHIFT; + } + + if (type == ttm_bo_type_device && aligned_size != size) + return ERR_PTR(-EINVAL); + + if (!bo) { + bo = xe_bo_alloc(); + if (IS_ERR(bo)) + return bo; + } + + bo->ccs_cleared = false; + bo->tile = tile; + bo->size = size; + bo->flags = flags; + bo->cpu_caching = cpu_caching; + bo->ttm.base.funcs = &xe_gem_object_funcs; + bo->props.preferred_mem_class = XE_BO_PROPS_INVALID; + bo->props.preferred_gt = XE_BO_PROPS_INVALID; + bo->props.preferred_mem_type = XE_BO_PROPS_INVALID; + bo->ttm.priority = XE_BO_PRIORITY_NORMAL; + INIT_LIST_HEAD(&bo->pinned_link); +#ifdef CONFIG_PROC_FS + INIT_LIST_HEAD(&bo->client_link); +#endif + + drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size); + + if (resv) { + ctx.allow_res_evict = !(flags & XE_BO_CREATE_NO_RESV_EVICT); + ctx.resv = resv; + } + + if (!(flags & XE_BO_FIXED_PLACEMENT_BIT)) { + err = __xe_bo_placement_for_flags(xe, bo, bo->flags); + if (WARN_ON(err)) { + xe_ttm_bo_destroy(&bo->ttm); + return ERR_PTR(err); + } + } + + /* Defer populating type_sg bos */ + placement = (type == ttm_bo_type_sg || + bo->flags & XE_BO_DEFER_BACKING) ? &sys_placement : + &bo->placement; + err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type, + placement, alignment, + &ctx, NULL, resv, xe_ttm_bo_destroy); + if (err) + return ERR_PTR(err); + + /* + * The VRAM pages underneath are potentially still being accessed by the + * GPU, as per async GPU clearing and async evictions. However TTM makes + * sure to add any corresponding move/clear fences into the objects + * dma-resv using the DMA_RESV_USAGE_KERNEL slot. + * + * For KMD internal buffers we don't care about GPU clearing, however we + * still need to handle async evictions, where the VRAM is still being + * accessed by the GPU. Most internal callers are not expecting this, + * since they are missing the required synchronisation before accessing + * the memory. To keep things simple just sync wait any kernel fences + * here, if the buffer is designated KMD internal. + * + * For normal userspace objects we should already have the required + * pipelining or sync waiting elsewhere, since we already have to deal + * with things like async GPU clearing. + */ + if (type == ttm_bo_type_kernel) { + long timeout = dma_resv_wait_timeout(bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL, + ctx.interruptible, + MAX_SCHEDULE_TIMEOUT); + + if (timeout < 0) { + if (!resv) + dma_resv_unlock(bo->ttm.base.resv); + xe_bo_put(bo); + return ERR_PTR(timeout); + } + } + + bo->created = true; + if (bulk) + ttm_bo_set_bulk_move(&bo->ttm, bulk); + else + ttm_bo_move_to_lru_tail_unlocked(&bo->ttm); + + return bo; +} + +static int __xe_bo_fixed_placement(struct xe_device *xe, + struct xe_bo *bo, + u32 flags, + u64 start, u64 end, u64 size) +{ + struct ttm_place *place = bo->placements; + + if (flags & (XE_BO_CREATE_USER_BIT|XE_BO_CREATE_SYSTEM_BIT)) + return -EINVAL; + + place->flags = TTM_PL_FLAG_CONTIGUOUS; + place->fpfn = start >> PAGE_SHIFT; + place->lpfn = end >> PAGE_SHIFT; + + switch (flags & (XE_BO_CREATE_STOLEN_BIT | XE_BO_CREATE_VRAM_MASK)) { + case XE_BO_CREATE_VRAM0_BIT: + place->mem_type = XE_PL_VRAM0; + break; + case XE_BO_CREATE_VRAM1_BIT: + place->mem_type = XE_PL_VRAM1; + break; + case XE_BO_CREATE_STOLEN_BIT: + place->mem_type = XE_PL_STOLEN; + break; + + default: + /* 0 or multiple of the above set */ + return -EINVAL; + } + + bo->placement = (struct ttm_placement) { + .num_placement = 1, + .placement = place, + .num_busy_placement = 1, + .busy_placement = place, + }; + + return 0; +} + +static struct xe_bo * +__xe_bo_create_locked(struct xe_device *xe, + struct xe_tile *tile, struct xe_vm *vm, + size_t size, u64 start, u64 end, + u16 cpu_caching, enum ttm_bo_type type, u32 flags) +{ + struct xe_bo *bo = NULL; + int err; + + if (vm) + xe_vm_assert_held(vm); + + if (start || end != ~0ULL) { + bo = xe_bo_alloc(); + if (IS_ERR(bo)) + return bo; + + flags |= XE_BO_FIXED_PLACEMENT_BIT; + err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size); + if (err) { + xe_bo_free(bo); + return ERR_PTR(err); + } + } + + bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL, + vm && !xe_vm_in_fault_mode(vm) && + flags & XE_BO_CREATE_USER_BIT ? + &vm->lru_bulk_move : NULL, size, + cpu_caching, type, flags); + if (IS_ERR(bo)) + return bo; + + /* + * Note that instead of taking a reference no the drm_gpuvm_resv_bo(), + * to ensure the shared resv doesn't disappear under the bo, the bo + * will keep a reference to the vm, and avoid circular references + * by having all the vm's bo refereferences released at vm close + * time. + */ + if (vm && xe_bo_is_user(bo)) + xe_vm_get(vm); + bo->vm = vm; + + if (bo->flags & XE_BO_CREATE_GGTT_BIT) { + if (!tile && flags & XE_BO_CREATE_STOLEN_BIT) + tile = xe_device_get_root_tile(xe); + + xe_assert(xe, tile); + + if (flags & XE_BO_FIXED_PLACEMENT_BIT) { + err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo, + start + bo->size, U64_MAX); + } else { + err = xe_ggtt_insert_bo(tile->mem.ggtt, bo); + } + if (err) + goto err_unlock_put_bo; + } + + return bo; + +err_unlock_put_bo: + __xe_bo_unset_bulk_move(bo); + xe_bo_unlock_vm_held(bo); + xe_bo_put(bo); + return ERR_PTR(err); +} + +struct xe_bo * +xe_bo_create_locked_range(struct xe_device *xe, + struct xe_tile *tile, struct xe_vm *vm, + size_t size, u64 start, u64 end, + enum ttm_bo_type type, u32 flags) +{ + return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, flags); +} + +struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, + struct xe_vm *vm, size_t size, + enum ttm_bo_type type, u32 flags) +{ + return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, flags); +} + +struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, + struct xe_vm *vm, size_t size, + u16 cpu_caching, + enum ttm_bo_type type, + u32 flags) +{ + struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, + cpu_caching, type, + flags | XE_BO_CREATE_USER_BIT); + if (!IS_ERR(bo)) + xe_bo_unlock_vm_held(bo); + + return bo; +} + +struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, + struct xe_vm *vm, size_t size, + enum ttm_bo_type type, u32 flags) +{ + struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags); + + if (!IS_ERR(bo)) + xe_bo_unlock_vm_held(bo); + + return bo; +} + +struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile, + struct xe_vm *vm, + size_t size, u64 offset, + enum ttm_bo_type type, u32 flags) +{ + struct xe_bo *bo; + int err; + u64 start = offset == ~0ull ? 0 : offset; + u64 end = offset == ~0ull ? offset : start + size; + + if (flags & XE_BO_CREATE_STOLEN_BIT && + xe_ttm_stolen_cpu_access_needs_ggtt(xe)) + flags |= XE_BO_CREATE_GGTT_BIT; + + bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type, + flags | XE_BO_NEEDS_CPU_ACCESS); + if (IS_ERR(bo)) + return bo; + + err = xe_bo_pin(bo); + if (err) + goto err_put; + + err = xe_bo_vmap(bo); + if (err) + goto err_unpin; + + xe_bo_unlock_vm_held(bo); + + return bo; + +err_unpin: + xe_bo_unpin(bo); +err_put: + xe_bo_unlock_vm_held(bo); + xe_bo_put(bo); + return ERR_PTR(err); +} + +struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, + struct xe_vm *vm, size_t size, + enum ttm_bo_type type, u32 flags) +{ + return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags); +} + +struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, + const void *data, size_t size, + enum ttm_bo_type type, u32 flags) +{ + struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL, + ALIGN(size, PAGE_SIZE), + type, flags); + if (IS_ERR(bo)) + return bo; + + xe_map_memcpy_to(xe, &bo->vmap, 0, data, size); + + return bo; +} + +static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg) +{ + xe_bo_unpin_map_no_vm(arg); +} + +struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, + size_t size, u32 flags) +{ + struct xe_bo *bo; + int ret; + + bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags); + if (IS_ERR(bo)) + return bo; + + ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo); + if (ret) + return ERR_PTR(ret); + + return bo; +} + +struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, + const void *data, size_t size, u32 flags) +{ + struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags); + + if (IS_ERR(bo)) + return bo; + + xe_map_memcpy_to(xe, &bo->vmap, 0, data, size); + + return bo; +} + +/* + * XXX: This is in the VM bind data path, likely should calculate this once and + * store, with a recalculation if the BO is moved. + */ +uint64_t vram_region_gpu_offset(struct ttm_resource *res) +{ + struct xe_device *xe = ttm_to_xe_device(res->bo->bdev); + + if (res->mem_type == XE_PL_STOLEN) + return xe_ttm_stolen_gpu_offset(xe); + + return res_to_mem_region(res)->dpa_base; +} + +/** + * xe_bo_pin_external - pin an external BO + * @bo: buffer object to be pinned + * + * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD) + * BO. Unique call compared to xe_bo_pin as this function has it own set of + * asserts and code to ensure evict / restore on suspend / resume. + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_bo_pin_external(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + int err; + + xe_assert(xe, !bo->vm); + xe_assert(xe, xe_bo_is_user(bo)); + + if (!xe_bo_is_pinned(bo)) { + err = xe_bo_validate(bo, NULL, false); + if (err) + return err; + + if (xe_bo_is_vram(bo)) { + spin_lock(&xe->pinned.lock); + list_add_tail(&bo->pinned_link, + &xe->pinned.external_vram); + spin_unlock(&xe->pinned.lock); + } + } + + ttm_bo_pin(&bo->ttm); + + /* + * FIXME: If we always use the reserve / unreserve functions for locking + * we do not need this. + */ + ttm_bo_move_to_lru_tail_unlocked(&bo->ttm); + + return 0; +} + +int xe_bo_pin(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + int err; + + /* We currently don't expect user BO to be pinned */ + xe_assert(xe, !xe_bo_is_user(bo)); + + /* Pinned object must be in GGTT or have pinned flag */ + xe_assert(xe, bo->flags & (XE_BO_CREATE_PINNED_BIT | + XE_BO_CREATE_GGTT_BIT)); + + /* + * No reason we can't support pinning imported dma-bufs we just don't + * expect to pin an imported dma-buf. + */ + xe_assert(xe, !bo->ttm.base.import_attach); + + /* We only expect at most 1 pin */ + xe_assert(xe, !xe_bo_is_pinned(bo)); + + err = xe_bo_validate(bo, NULL, false); + if (err) + return err; + + /* + * For pinned objects in on DGFX, which are also in vram, we expect + * these to be in contiguous VRAM memory. Required eviction / restore + * during suspend / resume (force restore to same physical address). + */ + if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && + bo->flags & XE_BO_INTERNAL_TEST)) { + struct ttm_place *place = &(bo->placements[0]); + + if (mem_type_is_vram(place->mem_type)) { + xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS); + + place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) - + vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT; + place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT); + + spin_lock(&xe->pinned.lock); + list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); + spin_unlock(&xe->pinned.lock); + } + } + + ttm_bo_pin(&bo->ttm); + + /* + * FIXME: If we always use the reserve / unreserve functions for locking + * we do not need this. + */ + ttm_bo_move_to_lru_tail_unlocked(&bo->ttm); + + return 0; +} + +/** + * xe_bo_unpin_external - unpin an external BO + * @bo: buffer object to be unpinned + * + * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD) + * BO. Unique call compared to xe_bo_unpin as this function has it own set of + * asserts and code to ensure evict / restore on suspend / resume. + * + * Returns 0 for success, negative error code otherwise. + */ +void xe_bo_unpin_external(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + + xe_assert(xe, !bo->vm); + xe_assert(xe, xe_bo_is_pinned(bo)); + xe_assert(xe, xe_bo_is_user(bo)); + + if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) { + spin_lock(&xe->pinned.lock); + list_del_init(&bo->pinned_link); + spin_unlock(&xe->pinned.lock); + } + + ttm_bo_unpin(&bo->ttm); + + /* + * FIXME: If we always use the reserve / unreserve functions for locking + * we do not need this. + */ + ttm_bo_move_to_lru_tail_unlocked(&bo->ttm); +} + +void xe_bo_unpin(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + + xe_assert(xe, !bo->ttm.base.import_attach); + xe_assert(xe, xe_bo_is_pinned(bo)); + + if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && + bo->flags & XE_BO_INTERNAL_TEST)) { + struct ttm_place *place = &(bo->placements[0]); + + if (mem_type_is_vram(place->mem_type)) { + xe_assert(xe, !list_empty(&bo->pinned_link)); + + spin_lock(&xe->pinned.lock); + list_del_init(&bo->pinned_link); + spin_unlock(&xe->pinned.lock); + } + } + + ttm_bo_unpin(&bo->ttm); +} + +/** + * xe_bo_validate() - Make sure the bo is in an allowed placement + * @bo: The bo, + * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or + * NULL. Used together with @allow_res_evict. + * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's + * reservation object. + * + * Make sure the bo is in allowed placement, migrating it if necessary. If + * needed, other bos will be evicted. If bos selected for eviction shares + * the @vm's reservation object, they can be evicted iff @allow_res_evict is + * set to true, otherwise they will be bypassed. + * + * Return: 0 on success, negative error code on failure. May return + * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal. + */ +int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) +{ + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + + if (vm) { + lockdep_assert_held(&vm->lock); + xe_vm_assert_held(vm); + + ctx.allow_res_evict = allow_res_evict; + ctx.resv = xe_vm_resv(vm); + } + + return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); +} + +bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo) +{ + if (bo->destroy == &xe_ttm_bo_destroy) + return true; + + return false; +} + +/* + * Resolve a BO address. There is no assert to check if the proper lock is held + * so it should only be used in cases where it is not fatal to get the wrong + * address, such as printing debug information, but not in cases where memory is + * written based on this result. + */ +dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size) +{ + struct xe_device *xe = xe_bo_device(bo); + struct xe_res_cursor cur; + u64 page; + + xe_assert(xe, page_size <= PAGE_SIZE); + page = offset >> PAGE_SHIFT; + offset &= (PAGE_SIZE - 1); + + if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) { + xe_assert(xe, bo->ttm.ttm); + + xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT, + page_size, &cur); + return xe_res_dma(&cur) + offset; + } else { + struct xe_res_cursor cur; + + xe_res_first(bo->ttm.resource, page << PAGE_SHIFT, + page_size, &cur); + return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource); + } +} + +dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size) +{ + if (!READ_ONCE(bo->ttm.pin_count)) + xe_bo_assert_held(bo); + return __xe_bo_addr(bo, offset, page_size); +} + +int xe_bo_vmap(struct xe_bo *bo) +{ + void *virtual; + bool is_iomem; + int ret; + + xe_bo_assert_held(bo); + + if (!(bo->flags & XE_BO_NEEDS_CPU_ACCESS)) + return -EINVAL; + + if (!iosys_map_is_null(&bo->vmap)) + return 0; + + /* + * We use this more or less deprecated interface for now since + * ttm_bo_vmap() doesn't offer the optimization of kmapping + * single page bos, which is done here. + * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap + * to use struct iosys_map. + */ + ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap); + if (ret) + return ret; + + virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem); + if (is_iomem) + iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual); + else + iosys_map_set_vaddr(&bo->vmap, virtual); + + return 0; +} + +static void __xe_bo_vunmap(struct xe_bo *bo) +{ + if (!iosys_map_is_null(&bo->vmap)) { + iosys_map_clear(&bo->vmap); + ttm_bo_kunmap(&bo->kmap); + } +} + +void xe_bo_vunmap(struct xe_bo *bo) +{ + xe_bo_assert_held(bo); + __xe_bo_vunmap(bo); +} + +int xe_gem_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_gem_create *args = data; + struct xe_vm *vm = NULL; + struct xe_bo *bo; + unsigned int bo_flags; + u32 handle; + int err; + + if (XE_IOCTL_DBG(xe, args->extensions) || + XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + + /* at least one valid memory placement must be specified */ + if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) || + !args->placement)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->flags & + ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING | + DRM_XE_GEM_CREATE_FLAG_SCANOUT | + DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM))) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->handle)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, !args->size)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK)) + return -EINVAL; + + bo_flags = 0; + if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING) + bo_flags |= XE_BO_DEFER_BACKING; + + if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT) + bo_flags |= XE_BO_SCANOUT_BIT; + + bo_flags |= args->placement << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1); + + if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) { + if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK))) + return -EINVAL; + + bo_flags |= XE_BO_NEEDS_CPU_ACCESS; + } + + if (XE_IOCTL_DBG(xe, !args->cpu_caching || + args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_CREATE_VRAM_MASK && + args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_SCANOUT_BIT && + args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) + return -EINVAL; + + if (args->vm_id) { + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_DBG(xe, !vm)) + return -ENOENT; + err = xe_vm_lock(vm, true); + if (err) + goto out_vm; + } + + bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching, + ttm_bo_type_device, bo_flags); + + if (vm) + xe_vm_unlock(vm); + + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + goto out_vm; + } + + err = drm_gem_handle_create(file, &bo->ttm.base, &handle); + if (err) + goto out_bulk; + + args->handle = handle; + goto out_put; + +out_bulk: + if (vm && !xe_vm_in_fault_mode(vm)) { + xe_vm_lock(vm, false); + __xe_bo_unset_bulk_move(bo); + xe_vm_unlock(vm); + } +out_put: + xe_bo_put(bo); +out_vm: + if (vm) + xe_vm_put(vm); + + return err; +} + +int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct drm_xe_gem_mmap_offset *args = data; + struct drm_gem_object *gem_obj; + + if (XE_IOCTL_DBG(xe, args->extensions) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->flags)) + return -EINVAL; + + gem_obj = drm_gem_object_lookup(file, args->handle); + if (XE_IOCTL_DBG(xe, !gem_obj)) + return -ENOENT; + + /* The mmap offset was set up at BO allocation time. */ + args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node); + + xe_bo_put(gem_to_xe_bo(gem_obj)); + return 0; +} + +/** + * xe_bo_lock() - Lock the buffer object's dma_resv object + * @bo: The struct xe_bo whose lock is to be taken + * @intr: Whether to perform any wait interruptible + * + * Locks the buffer object's dma_resv object. If the buffer object is + * pointing to a shared dma_resv object, that shared lock is locked. + * + * Return: 0 on success, -EINTR if @intr is true and the wait for a + * contended lock was interrupted. If @intr is set to false, the + * function always returns 0. + */ +int xe_bo_lock(struct xe_bo *bo, bool intr) +{ + if (intr) + return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL); + + dma_resv_lock(bo->ttm.base.resv, NULL); + + return 0; +} + +/** + * xe_bo_unlock() - Unlock the buffer object's dma_resv object + * @bo: The struct xe_bo whose lock is to be released. + * + * Unlock a buffer object lock that was locked by xe_bo_lock(). + */ +void xe_bo_unlock(struct xe_bo *bo) +{ + dma_resv_unlock(bo->ttm.base.resv); +} + +/** + * xe_bo_can_migrate - Whether a buffer object likely can be migrated + * @bo: The buffer object to migrate + * @mem_type: The TTM memory type intended to migrate to + * + * Check whether the buffer object supports migration to the + * given memory type. Note that pinning may affect the ability to migrate as + * returned by this function. + * + * This function is primarily intended as a helper for checking the + * possibility to migrate buffer objects and can be called without + * the object lock held. + * + * Return: true if migration is possible, false otherwise. + */ +bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type) +{ + unsigned int cur_place; + + if (bo->ttm.type == ttm_bo_type_kernel) + return true; + + if (bo->ttm.type == ttm_bo_type_sg) + return false; + + for (cur_place = 0; cur_place < bo->placement.num_placement; + cur_place++) { + if (bo->placements[cur_place].mem_type == mem_type) + return true; + } + + return false; +} + +static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place) +{ + memset(place, 0, sizeof(*place)); + place->mem_type = mem_type; +} + +/** + * xe_bo_migrate - Migrate an object to the desired region id + * @bo: The buffer object to migrate. + * @mem_type: The TTM region type to migrate to. + * + * Attempt to migrate the buffer object to the desired memory region. The + * buffer object may not be pinned, and must be locked. + * On successful completion, the object memory type will be updated, + * but an async migration task may not have completed yet, and to + * accomplish that, the object's kernel fences must be signaled with + * the object lock held. + * + * Return: 0 on success. Negative error code on failure. In particular may + * return -EINTR or -ERESTARTSYS if signal pending. + */ +int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) +{ + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + struct ttm_placement placement; + struct ttm_place requested; + + xe_bo_assert_held(bo); + + if (bo->ttm.resource->mem_type == mem_type) + return 0; + + if (xe_bo_is_pinned(bo)) + return -EBUSY; + + if (!xe_bo_can_migrate(bo, mem_type)) + return -EINVAL; + + xe_place_from_ttm_type(mem_type, &requested); + placement.num_placement = 1; + placement.num_busy_placement = 1; + placement.placement = &requested; + placement.busy_placement = &requested; + + /* + * Stolen needs to be handled like below VRAM handling if we ever need + * to support it. + */ + drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN); + + if (mem_type_is_vram(mem_type)) { + u32 c = 0; + + add_vram(xe, bo, &requested, bo->flags, mem_type, &c); + } + + return ttm_bo_validate(&bo->ttm, &placement, &ctx); +} + +/** + * xe_bo_evict - Evict an object to evict placement + * @bo: The buffer object to migrate. + * @force_alloc: Set force_alloc in ttm_operation_ctx + * + * On successful completion, the object memory will be moved to evict + * placement. Ths function blocks until the object has been fully moved. + * + * Return: 0 on success. Negative error code on failure. + */ +int xe_bo_evict(struct xe_bo *bo, bool force_alloc) +{ + struct ttm_operation_ctx ctx = { + .interruptible = false, + .no_wait_gpu = false, + .force_alloc = force_alloc, + }; + struct ttm_placement placement; + int ret; + + xe_evict_flags(&bo->ttm, &placement); + ret = ttm_bo_validate(&bo->ttm, &placement, &ctx); + if (ret) + return ret; + + dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + + return 0; +} + +/** + * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when + * placed in system memory. + * @bo: The xe_bo + * + * Return: true if extra pages need to be allocated, false otherwise. + */ +bool xe_bo_needs_ccs_pages(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + + if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device) + return false; + + /* On discrete GPUs, if the GPU can access this buffer from + * system memory (i.e., it allows XE_PL_TT placement), FlatCCS + * can't be used since there's no CCS storage associated with + * non-VRAM addresses. + */ + if (IS_DGFX(xe) && (bo->flags & XE_BO_CREATE_SYSTEM_BIT)) + return false; + + return true; +} + +/** + * __xe_bo_release_dummy() - Dummy kref release function + * @kref: The embedded struct kref. + * + * Dummy release function for xe_bo_put_deferred(). Keep off. + */ +void __xe_bo_release_dummy(struct kref *kref) +{ +} + +/** + * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred(). + * @deferred: The lockless list used for the call to xe_bo_put_deferred(). + * + * Puts all bos whose put was deferred by xe_bo_put_deferred(). + * The @deferred list can be either an onstack local list or a global + * shared list used by a workqueue. + */ +void xe_bo_put_commit(struct llist_head *deferred) +{ + struct llist_node *freed; + struct xe_bo *bo, *next; + + if (!deferred) + return; + + freed = llist_del_all(deferred); + if (!freed) + return; + + llist_for_each_entry_safe(bo, next, freed, freed) + drm_gem_object_free(&bo->ttm.base.refcount); +} + +/** + * xe_bo_dumb_create - Create a dumb bo as backing for a fb + * @file_priv: ... + * @dev: ... + * @args: ... + * + * See dumb_create() hook in include/drm/drm_drv.h + * + * Return: ... + */ +int xe_bo_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_bo *bo; + uint32_t handle; + int cpp = DIV_ROUND_UP(args->bpp, 8); + int err; + u32 page_size = max_t(u32, PAGE_SIZE, + xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K); + + args->pitch = ALIGN(args->width * cpp, 64); + args->size = ALIGN(mul_u32_u32(args->pitch, args->height), + page_size); + + bo = xe_bo_create_user(xe, NULL, NULL, args->size, + DRM_XE_GEM_CPU_CACHING_WC, + ttm_bo_type_device, + XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | + XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT | + XE_BO_NEEDS_CPU_ACCESS); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_put(&bo->ttm.base); + if (!err) + args->handle = handle; + return err; +} + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_bo.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h new file mode 100644 index 000000000000..9b1279aca127 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -0,0 +1,355 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_BO_H_ +#define _XE_BO_H_ + +#include <drm/ttm/ttm_tt.h> + +#include "xe_bo_types.h" +#include "xe_macros.h" +#include "xe_vm_types.h" +#include "xe_vm.h" + +/** + * xe_vm_assert_held(vm) - Assert that the vm's reservation object is held. + * @vm: The vm + */ +#define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm)) + + + +#define XE_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ + +#define XE_BO_CREATE_USER_BIT BIT(0) +/* The bits below need to be contiguous, or things break */ +#define XE_BO_CREATE_SYSTEM_BIT BIT(1) +#define XE_BO_CREATE_VRAM0_BIT BIT(2) +#define XE_BO_CREATE_VRAM1_BIT BIT(3) +#define XE_BO_CREATE_VRAM_MASK (XE_BO_CREATE_VRAM0_BIT | \ + XE_BO_CREATE_VRAM1_BIT) +/* -- */ +#define XE_BO_CREATE_STOLEN_BIT BIT(4) +#define XE_BO_CREATE_VRAM_IF_DGFX(tile) \ + (IS_DGFX(tile_to_xe(tile)) ? XE_BO_CREATE_VRAM0_BIT << (tile)->id : \ + XE_BO_CREATE_SYSTEM_BIT) +#define XE_BO_CREATE_GGTT_BIT BIT(5) +#define XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT BIT(6) +#define XE_BO_CREATE_PINNED_BIT BIT(7) +#define XE_BO_CREATE_NO_RESV_EVICT BIT(8) +#define XE_BO_DEFER_BACKING BIT(9) +#define XE_BO_SCANOUT_BIT BIT(10) +#define XE_BO_FIXED_PLACEMENT_BIT BIT(11) +#define XE_BO_PAGETABLE BIT(12) +#define XE_BO_NEEDS_CPU_ACCESS BIT(13) +/* this one is trigger internally only */ +#define XE_BO_INTERNAL_TEST BIT(30) +#define XE_BO_INTERNAL_64K BIT(31) + +#define XELPG_PPGTT_PTE_PAT3 BIT_ULL(62) +#define XE2_PPGTT_PTE_PAT4 BIT_ULL(61) +#define XE_PPGTT_PDE_PDPE_PAT2 BIT_ULL(12) +#define XE_PPGTT_PTE_PAT2 BIT_ULL(7) +#define XE_PPGTT_PTE_PAT1 BIT_ULL(4) +#define XE_PPGTT_PTE_PAT0 BIT_ULL(3) + +#define XE_PTE_SHIFT 12 +#define XE_PAGE_SIZE (1 << XE_PTE_SHIFT) +#define XE_PTE_MASK (XE_PAGE_SIZE - 1) +#define XE_PDE_SHIFT (XE_PTE_SHIFT - 3) +#define XE_PDES (1 << XE_PDE_SHIFT) +#define XE_PDE_MASK (XE_PDES - 1) + +#define XE_64K_PTE_SHIFT 16 +#define XE_64K_PAGE_SIZE (1 << XE_64K_PTE_SHIFT) +#define XE_64K_PTE_MASK (XE_64K_PAGE_SIZE - 1) +#define XE_64K_PDE_MASK (XE_PDE_MASK >> 4) + +#define XE_PDE_PS_2M BIT_ULL(7) +#define XE_PDPE_PS_1G BIT_ULL(7) +#define XE_PDE_IPS_64K BIT_ULL(11) + +#define XE_GGTT_PTE_DM BIT_ULL(1) +#define XE_USM_PPGTT_PTE_AE BIT_ULL(10) +#define XE_PPGTT_PTE_DM BIT_ULL(11) +#define XE_PDE_64K BIT_ULL(6) +#define XE_PTE_PS64 BIT_ULL(8) +#define XE_PTE_NULL BIT_ULL(9) + +#define XE_PAGE_PRESENT BIT_ULL(0) +#define XE_PAGE_RW BIT_ULL(1) + +#define XE_PL_SYSTEM TTM_PL_SYSTEM +#define XE_PL_TT TTM_PL_TT +#define XE_PL_VRAM0 TTM_PL_VRAM +#define XE_PL_VRAM1 (XE_PL_VRAM0 + 1) +#define XE_PL_STOLEN (TTM_NUM_MEM_TYPES - 1) + +#define XE_BO_PROPS_INVALID (-1) + +struct sg_table; + +struct xe_bo *xe_bo_alloc(void); +void xe_bo_free(struct xe_bo *bo); + +struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, + struct xe_tile *tile, struct dma_resv *resv, + struct ttm_lru_bulk_move *bulk, size_t size, + u16 cpu_caching, enum ttm_bo_type type, + u32 flags); +struct xe_bo * +xe_bo_create_locked_range(struct xe_device *xe, + struct xe_tile *tile, struct xe_vm *vm, + size_t size, u64 start, u64 end, + enum ttm_bo_type type, u32 flags); +struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, + struct xe_vm *vm, size_t size, + enum ttm_bo_type type, u32 flags); +struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, + struct xe_vm *vm, size_t size, + enum ttm_bo_type type, u32 flags); +struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, + struct xe_vm *vm, size_t size, + u16 cpu_caching, + enum ttm_bo_type type, + u32 flags); +struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, + struct xe_vm *vm, size_t size, + enum ttm_bo_type type, u32 flags); +struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile, + struct xe_vm *vm, size_t size, u64 offset, + enum ttm_bo_type type, u32 flags); +struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, + const void *data, size_t size, + enum ttm_bo_type type, u32 flags); +struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, + size_t size, u32 flags); +struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, + const void *data, size_t size, u32 flags); + +int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, + u32 bo_flags); + +static inline struct xe_bo *ttm_to_xe_bo(const struct ttm_buffer_object *bo) +{ + return container_of(bo, struct xe_bo, ttm); +} + +static inline struct xe_bo *gem_to_xe_bo(const struct drm_gem_object *obj) +{ + return container_of(obj, struct xe_bo, ttm.base); +} + +#define xe_bo_device(bo) ttm_to_xe_device((bo)->ttm.bdev) + +static inline struct xe_bo *xe_bo_get(struct xe_bo *bo) +{ + if (bo) + drm_gem_object_get(&bo->ttm.base); + + return bo; +} + +static inline void xe_bo_put(struct xe_bo *bo) +{ + if (bo) + drm_gem_object_put(&bo->ttm.base); +} + +static inline void __xe_bo_unset_bulk_move(struct xe_bo *bo) +{ + if (bo) + ttm_bo_set_bulk_move(&bo->ttm, NULL); +} + +static inline void xe_bo_assert_held(struct xe_bo *bo) +{ + if (bo) + dma_resv_assert_held((bo)->ttm.base.resv); +} + +int xe_bo_lock(struct xe_bo *bo, bool intr); + +void xe_bo_unlock(struct xe_bo *bo); + +static inline void xe_bo_unlock_vm_held(struct xe_bo *bo) +{ + if (bo) { + XE_WARN_ON(bo->vm && bo->ttm.base.resv != xe_vm_resv(bo->vm)); + if (bo->vm) + xe_vm_assert_held(bo->vm); + else + dma_resv_unlock(bo->ttm.base.resv); + } +} + +int xe_bo_pin_external(struct xe_bo *bo); +int xe_bo_pin(struct xe_bo *bo); +void xe_bo_unpin_external(struct xe_bo *bo); +void xe_bo_unpin(struct xe_bo *bo); +int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict); + +static inline bool xe_bo_is_pinned(struct xe_bo *bo) +{ + return bo->ttm.pin_count; +} + +static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo) +{ + if (likely(bo)) { + xe_bo_lock(bo, false); + xe_bo_unpin(bo); + xe_bo_unlock(bo); + + xe_bo_put(bo); + } +} + +bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo); +dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size); +dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size); + +static inline dma_addr_t +xe_bo_main_addr(struct xe_bo *bo, size_t page_size) +{ + return xe_bo_addr(bo, 0, page_size); +} + +static inline u32 +xe_bo_ggtt_addr(struct xe_bo *bo) +{ + XE_WARN_ON(bo->ggtt_node.size > bo->size); + XE_WARN_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32)); + return bo->ggtt_node.start; +} + +int xe_bo_vmap(struct xe_bo *bo); +void xe_bo_vunmap(struct xe_bo *bo); + +bool mem_type_is_vram(u32 mem_type); +bool xe_bo_is_vram(struct xe_bo *bo); +bool xe_bo_is_stolen(struct xe_bo *bo); +bool xe_bo_is_stolen_devmem(struct xe_bo *bo); +uint64_t vram_region_gpu_offset(struct ttm_resource *res); + +bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type); + +int xe_bo_migrate(struct xe_bo *bo, u32 mem_type); +int xe_bo_evict(struct xe_bo *bo, bool force_alloc); + +int xe_bo_evict_pinned(struct xe_bo *bo); +int xe_bo_restore_pinned(struct xe_bo *bo); + +extern struct ttm_device_funcs xe_ttm_funcs; + +int xe_gem_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_bo_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args); + +bool xe_bo_needs_ccs_pages(struct xe_bo *bo); + +static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo) +{ + return PAGE_ALIGN(bo->ttm.base.size); +} + +static inline bool xe_bo_has_pages(struct xe_bo *bo) +{ + if ((bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) || + xe_bo_is_vram(bo)) + return true; + + return false; +} + +void __xe_bo_release_dummy(struct kref *kref); + +/** + * xe_bo_put_deferred() - Put a buffer object with delayed final freeing + * @bo: The bo to put. + * @deferred: List to which to add the buffer object if we cannot put, or + * NULL if the function is to put unconditionally. + * + * Since the final freeing of an object includes both sleeping and (!) + * memory allocation in the dma_resv individualization, it's not ok + * to put an object from atomic context nor from within a held lock + * tainted by reclaim. In such situations we want to defer the final + * freeing until we've exited the restricting context, or in the worst + * case to a workqueue. + * This function either puts the object if possible without the refcount + * reaching zero, or adds it to the @deferred list if that was not possible. + * The caller needs to follow up with a call to xe_bo_put_commit() to actually + * put the bo iff this function returns true. It's safe to always + * follow up with a call to xe_bo_put_commit(). + * TODO: It's TTM that is the villain here. Perhaps TTM should add an + * interface like this. + * + * Return: true if @bo was the first object put on the @freed list, + * false otherwise. + */ +static inline bool +xe_bo_put_deferred(struct xe_bo *bo, struct llist_head *deferred) +{ + if (!deferred) { + xe_bo_put(bo); + return false; + } + + if (!kref_put(&bo->ttm.base.refcount, __xe_bo_release_dummy)) + return false; + + return llist_add(&bo->freed, deferred); +} + +void xe_bo_put_commit(struct llist_head *deferred); + +struct sg_table *xe_bo_sg(struct xe_bo *bo); + +/* + * xe_sg_segment_size() - Provides upper limit for sg segment size. + * @dev: device pointer + * + * Returns the maximum segment size for the 'struct scatterlist' + * elements. + */ +static inline unsigned int xe_sg_segment_size(struct device *dev) +{ + struct scatterlist __maybe_unused sg; + size_t max = BIT_ULL(sizeof(sg.length) * 8) - 1; + + max = min_t(size_t, max, dma_max_mapping_size(dev)); + + /* + * The iommu_dma_map_sg() function ensures iova allocation doesn't + * cross dma segment boundary. It does so by padding some sg elements. + * This can cause overflow, ending up with sg->length being set to 0. + * Avoid this by ensuring maximum segment size is half of 'max' + * rounded down to PAGE_SIZE. + */ + return round_down(max / 2, PAGE_SIZE); +} + +#define i915_gem_object_flush_if_display(obj) ((void)(obj)) + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +/** + * xe_bo_is_mem_type - Whether the bo currently resides in the given + * TTM memory type + * @bo: The bo to check. + * @mem_type: The TTM memory type. + * + * Return: true iff the bo resides in @mem_type, false otherwise. + */ +static inline bool xe_bo_is_mem_type(struct xe_bo *bo, u32 mem_type) +{ + xe_bo_assert_held(bo); + return bo->ttm.resource->mem_type == mem_type; +} +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_bo_doc.h b/drivers/gpu/drm/xe/xe_bo_doc.h new file mode 100644 index 000000000000..f57d440cc95a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bo_doc.h @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_BO_DOC_H_ +#define _XE_BO_DOC_H_ + +/** + * DOC: Buffer Objects (BO) + * + * BO management + * ============= + * + * TTM manages (placement, eviction, etc...) all BOs in XE. + * + * BO creation + * =========== + * + * Create a chunk of memory which can be used by the GPU. Placement rules + * (sysmem or vram region) passed in upon creation. TTM handles placement of BO + * and can trigger eviction of other BOs to make space for the new BO. + * + * Kernel BOs + * ---------- + * + * A kernel BO is created as part of driver load (e.g. uC firmware images, GuC + * ADS, etc...) or a BO created as part of a user operation which requires + * a kernel BO (e.g. engine state, memory for page tables, etc...). These BOs + * are typically mapped in the GGTT (any kernel BOs aside memory for page tables + * are in the GGTT), are pinned (can't move or be evicted at runtime), have a + * vmap (XE can access the memory via xe_map layer) and have contiguous physical + * memory. + * + * More details of why kernel BOs are pinned and contiguous below. + * + * User BOs + * -------- + * + * A user BO is created via the DRM_IOCTL_XE_GEM_CREATE IOCTL. Once it is + * created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user + * access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All + * user BOs are evictable and user BOs are never pinned by XE. The allocation of + * the backing store can be defered from creation time until first use which is + * either mmap, bind, or pagefault. + * + * Private BOs + * ~~~~~~~~~~~ + * + * A private BO is a user BO created with a valid VM argument passed into the + * create IOCTL. If a BO is private it cannot be exported via prime FD and + * mappings can only be created for the BO within the VM it is tied to. Lastly, + * the BO dma-resv slots / lock point to the VM's dma-resv slots / lock (all + * private BOs to a VM share common dma-resv slots / lock). + * + * External BOs + * ~~~~~~~~~~~~ + * + * An external BO is a user BO created with a NULL VM argument passed into the + * create IOCTL. An external BO can be shared with different UMDs / devices via + * prime FD and the BO can be mapped into multiple VMs. An external BO has its + * own unique dma-resv slots / lock. An external BO will be in an array of all + * VMs which has a mapping of the BO. This allows VMs to lookup and lock all + * external BOs mapped in the VM as needed. + * + * BO placement + * ~~~~~~~~~~~~ + * + * When a user BO is created, a mask of valid placements is passed indicating + * which memory regions are considered valid. + * + * The memory region information is available via query uAPI (TODO: add link). + * + * BO validation + * ============= + * + * BO validation (ttm_bo_validate) refers to ensuring a BO has a valid + * placement. If a BO was swapped to temporary storage, a validation call will + * trigger a move back to a valid (location where GPU can access BO) placement. + * Validation of a BO may evict other BOs to make room for the BO being + * validated. + * + * BO eviction / moving + * ==================== + * + * All eviction (or in other words, moving a BO from one memory location to + * another) is routed through TTM with a callback into XE. + * + * Runtime eviction + * ---------------- + * + * Runtime evictions refers to during normal operations where TTM decides it + * needs to move a BO. Typically this is because TTM needs to make room for + * another BO and the evicted BO is first BO on LRU list that is not locked. + * + * An example of this is a new BO which can only be placed in VRAM but there is + * not space in VRAM. There could be multiple BOs which have sysmem and VRAM + * placement rules which currently reside in VRAM, TTM trigger a will move of + * one (or multiple) of these BO(s) until there is room in VRAM to place the new + * BO. The evicted BO(s) are valid but still need new bindings before the BO + * used again (exec or compute mode rebind worker). + * + * Another example would be, TTM can't find a BO to evict which has another + * valid placement. In this case TTM will evict one (or multiple) unlocked BO(s) + * to a temporary unreachable (invalid) placement. The evicted BO(s) are invalid + * and before next use need to be moved to a valid placement and rebound. + * + * In both cases, moves of these BOs are scheduled behind the fences in the BO's + * dma-resv slots. + * + * WW locking tries to ensures if 2 VMs use 51% of the memory forward progress + * is made on both VMs. + * + * Runtime eviction uses per a GT migration engine (TODO: link to migration + * engine doc) to do a GPU memcpy from one location to another. + * + * Rebinds after runtime eviction + * ------------------------------ + * + * When BOs are moved, every mapping (VMA) of the BO needs to rebound before + * the BO is used again. Every VMA is added to an evicted list of its VM when + * the BO is moved. This is safe because of the VM locking structure (TODO: link + * to VM locking doc). On the next use of a VM (exec or compute mode rebind + * worker) the evicted VMA list is checked and rebinds are triggered. In the + * case of faulting VM, the rebind is done in the page fault handler. + * + * Suspend / resume eviction of VRAM + * --------------------------------- + * + * During device suspend / resume VRAM may lose power which means the contents + * of VRAM's memory is blown away. Thus BOs present in VRAM at the time of + * suspend must be moved to sysmem in order for their contents to be saved. + * + * A simple TTM call (ttm_resource_manager_evict_all) can move all non-pinned + * (user) BOs to sysmem. External BOs that are pinned need to be manually + * evicted with a simple loop + xe_bo_evict call. It gets a little trickier + * with kernel BOs. + * + * Some kernel BOs are used by the GT migration engine to do moves, thus we + * can't move all of the BOs via the GT migration engine. For simplity, use a + * TTM memcpy (CPU) to move any kernel (pinned) BO on either suspend or resume. + * + * Some kernel BOs need to be restored to the exact same physical location. TTM + * makes this rather easy but the caveat is the memory must be contiguous. Again + * for simplity, we enforce that all kernel (pinned) BOs are contiguous and + * restored to the same physical location. + * + * Pinned external BOs in VRAM are restored on resume via the GPU. + * + * Rebinds after suspend / resume + * ------------------------------ + * + * Most kernel BOs have GGTT mappings which must be restored during the resume + * process. All user BOs are rebound after validation on their next use. + * + * Future work + * =========== + * + * Trim the list of BOs which is saved / restored via TTM memcpy on suspend / + * resume. All we really need to save / restore via TTM memcpy is the memory + * required for the GuC to load and the memory for the GT migrate engine to + * operate. + * + * Do not require kernel BOs to be contiguous in physical memory / restored to + * the same physical address on resume. In all likelihood the only memory that + * needs to be restored to the same physical address is memory used for page + * tables. All of that memory is allocated 1 page at time so the contiguous + * requirement isn't needed. Some work on the vmap code would need to be done if + * kernel BOs are not contiguous too. + * + * Make some kernel BO evictable rather than pinned. An example of this would be + * engine state, in all likelihood if the dma-slots of these BOs where properly + * used rather than pinning we could safely evict + rebind these BOs as needed. + * + * Some kernel BOs do not need to be restored on resume (e.g. GuC ADS as that is + * repopulated on resume), add flag to mark such objects as no save / restore. + */ + +#endif diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c new file mode 100644 index 000000000000..7a264a9ca06e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_bo_evict.h" + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_ggtt.h" +#include "xe_tile.h" + +/** + * xe_bo_evict_all - evict all BOs from VRAM + * + * @xe: xe device + * + * Evict non-pinned user BOs first (via GPU), evict pinned external BOs next + * (via GPU), wait for evictions, and finally evict pinned kernel BOs via CPU. + * All eviction magic done via TTM calls. + * + * Evict == move VRAM BOs to temporary (typically system) memory. + * + * This function should be called before the device goes into a suspend state + * where the VRAM loses power. + */ +int xe_bo_evict_all(struct xe_device *xe) +{ + struct ttm_device *bdev = &xe->ttm; + struct xe_bo *bo; + struct xe_tile *tile; + struct list_head still_in_list; + u32 mem_type; + u8 id; + int ret; + + if (!IS_DGFX(xe)) + return 0; + + /* User memory */ + for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { + struct ttm_resource_manager *man = + ttm_manager_type(bdev, mem_type); + + if (man) { + ret = ttm_resource_manager_evict_all(bdev, man); + if (ret) + return ret; + } + } + + /* Pinned user memory in VRAM */ + INIT_LIST_HEAD(&still_in_list); + spin_lock(&xe->pinned.lock); + for (;;) { + bo = list_first_entry_or_null(&xe->pinned.external_vram, + typeof(*bo), pinned_link); + if (!bo) + break; + xe_bo_get(bo); + list_move_tail(&bo->pinned_link, &still_in_list); + spin_unlock(&xe->pinned.lock); + + xe_bo_lock(bo, false); + ret = xe_bo_evict_pinned(bo); + xe_bo_unlock(bo); + xe_bo_put(bo); + if (ret) { + spin_lock(&xe->pinned.lock); + list_splice_tail(&still_in_list, + &xe->pinned.external_vram); + spin_unlock(&xe->pinned.lock); + return ret; + } + + spin_lock(&xe->pinned.lock); + } + list_splice_tail(&still_in_list, &xe->pinned.external_vram); + spin_unlock(&xe->pinned.lock); + + /* + * Wait for all user BO to be evicted as those evictions depend on the + * memory moved below. + */ + for_each_tile(tile, xe, id) + xe_tile_migrate_wait(tile); + + spin_lock(&xe->pinned.lock); + for (;;) { + bo = list_first_entry_or_null(&xe->pinned.kernel_bo_present, + typeof(*bo), pinned_link); + if (!bo) + break; + xe_bo_get(bo); + list_move_tail(&bo->pinned_link, &xe->pinned.evicted); + spin_unlock(&xe->pinned.lock); + + xe_bo_lock(bo, false); + ret = xe_bo_evict_pinned(bo); + xe_bo_unlock(bo); + xe_bo_put(bo); + if (ret) + return ret; + + spin_lock(&xe->pinned.lock); + } + spin_unlock(&xe->pinned.lock); + + return 0; +} + +/** + * xe_bo_restore_kernel - restore kernel BOs to VRAM + * + * @xe: xe device + * + * Move kernel BOs from temporary (typically system) memory to VRAM via CPU. All + * moves done via TTM calls. + * + * This function should be called early, before trying to init the GT, on device + * resume. + */ +int xe_bo_restore_kernel(struct xe_device *xe) +{ + struct xe_bo *bo; + int ret; + + if (!IS_DGFX(xe)) + return 0; + + spin_lock(&xe->pinned.lock); + for (;;) { + bo = list_first_entry_or_null(&xe->pinned.evicted, + typeof(*bo), pinned_link); + if (!bo) + break; + xe_bo_get(bo); + list_move_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); + spin_unlock(&xe->pinned.lock); + + xe_bo_lock(bo, false); + ret = xe_bo_restore_pinned(bo); + xe_bo_unlock(bo); + if (ret) { + xe_bo_put(bo); + return ret; + } + + if (bo->flags & XE_BO_CREATE_GGTT_BIT) { + struct xe_tile *tile = bo->tile; + + mutex_lock(&tile->mem.ggtt->lock); + xe_ggtt_map_bo(tile->mem.ggtt, bo); + mutex_unlock(&tile->mem.ggtt->lock); + } + + /* + * We expect validate to trigger a move VRAM and our move code + * should setup the iosys map. + */ + xe_assert(xe, !iosys_map_is_null(&bo->vmap)); + xe_assert(xe, xe_bo_is_vram(bo)); + + xe_bo_put(bo); + + spin_lock(&xe->pinned.lock); + } + spin_unlock(&xe->pinned.lock); + + return 0; +} + +/** + * xe_bo_restore_user - restore pinned user BOs to VRAM + * + * @xe: xe device + * + * Move pinned user BOs from temporary (typically system) memory to VRAM via + * CPU. All moves done via TTM calls. + * + * This function should be called late, after GT init, on device resume. + */ +int xe_bo_restore_user(struct xe_device *xe) +{ + struct xe_bo *bo; + struct xe_tile *tile; + struct list_head still_in_list; + u8 id; + int ret; + + if (!IS_DGFX(xe)) + return 0; + + /* Pinned user memory in VRAM should be validated on resume */ + INIT_LIST_HEAD(&still_in_list); + spin_lock(&xe->pinned.lock); + for (;;) { + bo = list_first_entry_or_null(&xe->pinned.external_vram, + typeof(*bo), pinned_link); + if (!bo) + break; + list_move_tail(&bo->pinned_link, &still_in_list); + xe_bo_get(bo); + spin_unlock(&xe->pinned.lock); + + xe_bo_lock(bo, false); + ret = xe_bo_restore_pinned(bo); + xe_bo_unlock(bo); + xe_bo_put(bo); + if (ret) { + spin_lock(&xe->pinned.lock); + list_splice_tail(&still_in_list, + &xe->pinned.external_vram); + spin_unlock(&xe->pinned.lock); + return ret; + } + + spin_lock(&xe->pinned.lock); + } + list_splice_tail(&still_in_list, &xe->pinned.external_vram); + spin_unlock(&xe->pinned.lock); + + /* Wait for validate to complete */ + for_each_tile(tile, xe, id) + xe_tile_migrate_wait(tile); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_bo_evict.h b/drivers/gpu/drm/xe/xe_bo_evict.h new file mode 100644 index 000000000000..746894798852 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bo_evict.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_BO_EVICT_H_ +#define _XE_BO_EVICT_H_ + +struct xe_device; + +int xe_bo_evict_all(struct xe_device *xe); +int xe_bo_restore_kernel(struct xe_device *xe); +int xe_bo_restore_user(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h new file mode 100644 index 000000000000..64c2249a4e40 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_BO_TYPES_H_ +#define _XE_BO_TYPES_H_ + +#include <linux/iosys-map.h> + +#include <drm/drm_mm.h> +#include <drm/ttm/ttm_bo.h> +#include <drm/ttm/ttm_device.h> +#include <drm/ttm/ttm_execbuf_util.h> +#include <drm/ttm/ttm_placement.h> + +struct xe_device; +struct xe_vm; + +#define XE_BO_MAX_PLACEMENTS 3 + +/* TODO: To be selected with VM_MADVISE */ +#define XE_BO_PRIORITY_NORMAL 1 + +/** @xe_bo: XE buffer object */ +struct xe_bo { + /** @ttm: TTM base buffer object */ + struct ttm_buffer_object ttm; + /** @size: Size of this buffer object */ + size_t size; + /** @flags: flags for this buffer object */ + u32 flags; + /** @vm: VM this BO is attached to, for extobj this will be NULL */ + struct xe_vm *vm; + /** @tile: Tile this BO is attached to (kernel BO only) */ + struct xe_tile *tile; + /** @placements: valid placements for this BO */ + struct ttm_place placements[XE_BO_MAX_PLACEMENTS]; + /** @placement: current placement for this BO */ + struct ttm_placement placement; + /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */ + struct drm_mm_node ggtt_node; + /** @vmap: iosys map of this buffer */ + struct iosys_map vmap; + /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */ + struct ttm_bo_kmap_obj kmap; + /** @pinned_link: link to present / evicted list of pinned BO */ + struct list_head pinned_link; +#ifdef CONFIG_PROC_FS + /** + * @client: @xe_drm_client which created the bo + */ + struct xe_drm_client *client; + /** + * @client_link: Link into @xe_drm_client.objects_list + */ + struct list_head client_link; +#endif + /** @props: BO user controlled properties */ + struct { + /** @preferred_mem: preferred memory class for this BO */ + s16 preferred_mem_class; + /** @prefered_gt: preferred GT for this BO */ + s16 preferred_gt; + /** @preferred_mem_type: preferred memory type */ + s32 preferred_mem_type; + /** + * @cpu_atomic: the CPU expects to do atomics operations to + * this BO + */ + bool cpu_atomic; + /** + * @device_atomic: the device expects to do atomics operations + * to this BO + */ + bool device_atomic; + } props; + /** @freed: List node for delayed put. */ + struct llist_node freed; + /** @created: Whether the bo has passed initial creation */ + bool created; + + /** @ccs_cleared */ + bool ccs_cleared; + + /** + * @cpu_caching: CPU caching mode. Currently only used for userspace + * objects. + */ + u16 cpu_caching; +}; + +#define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base) +#define intel_bo_to_i915(bo) to_i915(intel_bo_to_drm_bo(bo)->dev) + +#endif diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c new file mode 100644 index 000000000000..c56fd7d59f05 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_debugfs.h" + +#include <linux/string_helpers.h> + +#include <drm/drm_debugfs.h> + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt_debugfs.h" +#include "xe_step.h" + +#ifdef CONFIG_DRM_XE_DEBUG +#include "xe_bo_evict.h" +#include "xe_migrate.h" +#include "xe_vm.h" +#endif + +#ifdef CONFIG_FAULT_INJECTION +#include <linux/fault-inject.h> /* XXX: fault-inject.h is broken */ +DECLARE_FAULT_ATTR(gt_reset_failure); +#endif + +static struct xe_device *node_to_xe(struct drm_info_node *node) +{ + return to_xe_device(node->minor->dev); +} + +static int info(struct seq_file *m, void *data) +{ + struct xe_device *xe = node_to_xe(m->private); + struct drm_printer p = drm_seq_file_printer(m); + struct xe_gt *gt; + u8 id; + + drm_printf(&p, "graphics_verx100 %d\n", xe->info.graphics_verx100); + drm_printf(&p, "media_verx100 %d\n", xe->info.media_verx100); + drm_printf(&p, "stepping G:%s M:%s D:%s B:%s\n", + xe_step_name(xe->info.step.graphics), + xe_step_name(xe->info.step.media), + xe_step_name(xe->info.step.display), + xe_step_name(xe->info.step.basedie)); + drm_printf(&p, "is_dgfx %s\n", str_yes_no(xe->info.is_dgfx)); + drm_printf(&p, "platform %d\n", xe->info.platform); + drm_printf(&p, "subplatform %d\n", + xe->info.subplatform > XE_SUBPLATFORM_NONE ? xe->info.subplatform : 0); + drm_printf(&p, "devid 0x%x\n", xe->info.devid); + drm_printf(&p, "revid %d\n", xe->info.revid); + drm_printf(&p, "tile_count %d\n", xe->info.tile_count); + drm_printf(&p, "vm_max_level %d\n", xe->info.vm_max_level); + drm_printf(&p, "force_execlist %s\n", str_yes_no(xe->info.force_execlist)); + drm_printf(&p, "has_flat_ccs %s\n", str_yes_no(xe->info.has_flat_ccs)); + drm_printf(&p, "has_usm %s\n", str_yes_no(xe->info.has_usm)); + for_each_gt(gt, xe, id) { + drm_printf(&p, "gt%d force wake %d\n", id, + xe_force_wake_ref(gt_to_fw(gt), XE_FW_GT)); + drm_printf(&p, "gt%d engine_mask 0x%llx\n", id, + gt->info.engine_mask); + } + + return 0; +} + +static const struct drm_info_list debugfs_list[] = { + {"info", info, 0}, +}; + +static int forcewake_open(struct inode *inode, struct file *file) +{ + struct xe_device *xe = inode->i_private; + struct xe_gt *gt; + u8 id; + + xe_device_mem_access_get(xe); + + for_each_gt(gt, xe, id) + XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + + return 0; +} + +static int forcewake_release(struct inode *inode, struct file *file) +{ + struct xe_device *xe = inode->i_private; + struct xe_gt *gt; + u8 id; + + for_each_gt(gt, xe, id) + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + + xe_device_mem_access_put(xe); + + return 0; +} + +static const struct file_operations forcewake_all_fops = { + .owner = THIS_MODULE, + .open = forcewake_open, + .release = forcewake_release, +}; + +void xe_debugfs_register(struct xe_device *xe) +{ + struct ttm_device *bdev = &xe->ttm; + struct drm_minor *minor = xe->drm.primary; + struct dentry *root = minor->debugfs_root; + struct ttm_resource_manager *man; + struct xe_gt *gt; + u32 mem_type; + u8 id; + + drm_debugfs_create_files(debugfs_list, + ARRAY_SIZE(debugfs_list), + root, minor); + + debugfs_create_file("forcewake_all", 0400, root, xe, + &forcewake_all_fops); + + for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { + man = ttm_manager_type(bdev, mem_type); + + if (man) { + char name[16]; + + sprintf(name, "vram%d_mm", mem_type - XE_PL_VRAM0); + ttm_resource_manager_create_debugfs(man, root, name); + } + } + + man = ttm_manager_type(bdev, XE_PL_TT); + ttm_resource_manager_create_debugfs(man, root, "gtt_mm"); + + man = ttm_manager_type(bdev, XE_PL_STOLEN); + if (man) + ttm_resource_manager_create_debugfs(man, root, "stolen_mm"); + + for_each_gt(gt, xe, id) + xe_gt_debugfs_register(gt); + +#ifdef CONFIG_FAULT_INJECTION + fault_create_debugfs_attr("fail_gt_reset", root, >_reset_failure); +#endif + +} diff --git a/drivers/gpu/drm/xe/xe_debugfs.h b/drivers/gpu/drm/xe/xe_debugfs.h new file mode 100644 index 000000000000..715b8e2e0bd9 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_debugfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_DEBUGFS_H_ +#define _XE_DEBUGFS_H_ + +struct xe_device; + +void xe_debugfs_register(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c new file mode 100644 index 000000000000..68abc0b195be --- /dev/null +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_devcoredump.h" +#include "xe_devcoredump_types.h" + +#include <linux/devcoredump.h> +#include <generated/utsrelease.h> + +#include "xe_device.h" +#include "xe_exec_queue.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_guc_ct.h" +#include "xe_guc_submit.h" +#include "xe_hw_engine.h" + +/** + * DOC: Xe device coredump + * + * Devices overview: + * Xe uses dev_coredump infrastructure for exposing the crash errors in a + * standardized way. + * devcoredump exposes a temporary device under /sys/class/devcoredump/ + * which is linked with our card device directly. + * The core dump can be accessed either from + * /sys/class/drm/card<n>/device/devcoredump/ or from + * /sys/class/devcoredump/devcd<m> where + * /sys/class/devcoredump/devcd<m>/failing_device is a link to + * /sys/class/drm/card<n>/device/. + * + * Snapshot at hang: + * The 'data' file is printed with a drm_printer pointer at devcoredump read + * time. For this reason, we need to take snapshots from when the hang has + * happened, and not only when the user is reading the file. Otherwise the + * information is outdated since the resets might have happened in between. + * + * 'First' failure snapshot: + * In general, the first hang is the most critical one since the following hangs + * can be a consequence of the initial hang. For this reason we only take the + * snapshot of the 'first' failure and ignore subsequent calls of this function, + * at least while the coredump device is alive. Dev_coredump has a delayed work + * queue that will eventually delete the device and free all the dump + * information. + */ + +#ifdef CONFIG_DEV_COREDUMP + +static struct xe_device *coredump_to_xe(const struct xe_devcoredump *coredump) +{ + return container_of(coredump, struct xe_device, devcoredump); +} + +static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q) +{ + return &q->gt->uc.guc; +} + +static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, + size_t count, void *data, size_t datalen) +{ + struct xe_devcoredump *coredump = data; + struct xe_devcoredump_snapshot *ss; + struct drm_printer p; + struct drm_print_iterator iter; + struct timespec64 ts; + int i; + + /* Our device is gone already... */ + if (!data || !coredump_to_xe(coredump)) + return -ENODEV; + + iter.data = buffer; + iter.offset = 0; + iter.start = offset; + iter.remain = count; + + ss = &coredump->snapshot; + p = drm_coredump_printer(&iter); + + drm_printf(&p, "**** Xe Device Coredump ****\n"); + drm_printf(&p, "kernel: " UTS_RELEASE "\n"); + drm_printf(&p, "module: " KBUILD_MODNAME "\n"); + + ts = ktime_to_timespec64(ss->snapshot_time); + drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec); + ts = ktime_to_timespec64(ss->boot_time); + drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec); + + drm_printf(&p, "\n**** GuC CT ****\n"); + xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p); + xe_guc_exec_queue_snapshot_print(coredump->snapshot.ge, &p); + + drm_printf(&p, "\n**** HW Engines ****\n"); + for (i = 0; i < XE_NUM_HW_ENGINES; i++) + if (coredump->snapshot.hwe[i]) + xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i], + &p); + + return count - iter.remain; +} + +static void xe_devcoredump_free(void *data) +{ + struct xe_devcoredump *coredump = data; + int i; + + /* Our device is gone. Nothing to do... */ + if (!data || !coredump_to_xe(coredump)) + return; + + xe_guc_ct_snapshot_free(coredump->snapshot.ct); + xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge); + for (i = 0; i < XE_NUM_HW_ENGINES; i++) + if (coredump->snapshot.hwe[i]) + xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]); + + coredump->captured = false; + drm_info(&coredump_to_xe(coredump)->drm, + "Xe device coredump has been deleted.\n"); +} + +static void devcoredump_snapshot(struct xe_devcoredump *coredump, + struct xe_exec_queue *q) +{ + struct xe_devcoredump_snapshot *ss = &coredump->snapshot; + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + u32 adj_logical_mask = q->logical_mask; + u32 width_mask = (0x1 << q->width) - 1; + int i; + bool cookie; + + ss->snapshot_time = ktime_get_real(); + ss->boot_time = ktime_get_boottime(); + + cookie = dma_fence_begin_signalling(); + for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { + if (adj_logical_mask & BIT(i)) { + adj_logical_mask |= width_mask << i; + i += q->width; + } else { + ++i; + } + } + + xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); + + coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true); + coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(q); + + for_each_hw_engine(hwe, q->gt, id) { + if (hwe->class != q->hwe->class || + !(BIT(hwe->logical_instance) & adj_logical_mask)) { + coredump->snapshot.hwe[id] = NULL; + continue; + } + coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe); + } + + xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); + dma_fence_end_signalling(cookie); +} + +/** + * xe_devcoredump - Take the required snapshots and initialize coredump device. + * @q: The faulty xe_exec_queue, where the issue was detected. + * + * This function should be called at the crash time within the serialized + * gt_reset. It is skipped if we still have the core dump device available + * with the information of the 'first' snapshot. + */ +void xe_devcoredump(struct xe_exec_queue *q) +{ + struct xe_device *xe = gt_to_xe(q->gt); + struct xe_devcoredump *coredump = &xe->devcoredump; + + if (coredump->captured) { + drm_dbg(&xe->drm, "Multiple hangs are occurring, but only the first snapshot was taken\n"); + return; + } + + coredump->captured = true; + devcoredump_snapshot(coredump, q); + + drm_info(&xe->drm, "Xe device coredump has been created\n"); + drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n", + xe->drm.primary->index); + + dev_coredumpm(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL, + xe_devcoredump_read, xe_devcoredump_free); +} +#endif diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h new file mode 100644 index 000000000000..6ac218a5c194 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_devcoredump.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_DEVCOREDUMP_H_ +#define _XE_DEVCOREDUMP_H_ + +struct xe_device; +struct xe_exec_queue; + +#ifdef CONFIG_DEV_COREDUMP +void xe_devcoredump(struct xe_exec_queue *q); +#else +static inline void xe_devcoredump(struct xe_exec_queue *q) +{ +} +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h new file mode 100644 index 000000000000..7fdad9c3d3dd --- /dev/null +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_DEVCOREDUMP_TYPES_H_ +#define _XE_DEVCOREDUMP_TYPES_H_ + +#include <linux/ktime.h> +#include <linux/mutex.h> + +#include "xe_hw_engine_types.h" + +struct xe_device; + +/** + * struct xe_devcoredump_snapshot - Crash snapshot + * + * This struct contains all the useful information quickly captured at the time + * of the crash. So, any subsequent reads of the coredump points to a data that + * shows the state of the GPU of when the issue has happened. + */ +struct xe_devcoredump_snapshot { + /** @snapshot_time: Time of this capture. */ + ktime_t snapshot_time; + /** @boot_time: Relative boot time so the uptime can be calculated. */ + ktime_t boot_time; + + /* GuC snapshots */ + /** @ct: GuC CT snapshot */ + struct xe_guc_ct_snapshot *ct; + /** @ge: Guc Engine snapshot */ + struct xe_guc_submit_exec_queue_snapshot *ge; + /** @hwe: HW Engine snapshot array */ + struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES]; +}; + +/** + * struct xe_devcoredump - Xe devcoredump main structure + * + * This struct represents the live and active dev_coredump node. + * It is created/populated at the time of a crash/error. Then it + * is read later when user access the device coredump data file + * for reading the information. + */ +struct xe_devcoredump { + /** @xe: Xe device. */ + struct xe_device *xe; + /** @captured: The snapshot of the first hang has already been taken. */ + bool captured; + /** @snapshot: Snapshot is captured at time of the first crash */ + struct xe_devcoredump_snapshot snapshot; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c new file mode 100644 index 000000000000..d9ae77fe7382 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_device.c @@ -0,0 +1,700 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_device.h" + +#include <linux/units.h> + +#include <drm/drm_aperture.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_gem_ttm_helper.h> +#include <drm/drm_ioctl.h> +#include <drm/drm_managed.h> +#include <drm/drm_print.h> +#include <drm/xe_drm.h> + +#include "regs/xe_gt_regs.h" +#include "regs/xe_regs.h" +#include "xe_bo.h" +#include "xe_debugfs.h" +#include "xe_display.h" +#include "xe_dma_buf.h" +#include "xe_drm_client.h" +#include "xe_drv.h" +#include "xe_exec_queue.h" +#include "xe_exec.h" +#include "xe_ggtt.h" +#include "xe_gt.h" +#include "xe_gt_mcr.h" +#include "xe_irq.h" +#include "xe_mmio.h" +#include "xe_module.h" +#include "xe_pat.h" +#include "xe_pcode.h" +#include "xe_pm.h" +#include "xe_query.h" +#include "xe_tile.h" +#include "xe_ttm_stolen_mgr.h" +#include "xe_ttm_sys_mgr.h" +#include "xe_vm.h" +#include "xe_wait_user_fence.h" +#include "xe_hwmon.h" + +#ifdef CONFIG_LOCKDEP +struct lockdep_map xe_device_mem_access_lockdep_map = { + .name = "xe_device_mem_access_lockdep_map" +}; +#endif + +static int xe_file_open(struct drm_device *dev, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_drm_client *client; + struct xe_file *xef; + int ret = -ENOMEM; + + xef = kzalloc(sizeof(*xef), GFP_KERNEL); + if (!xef) + return ret; + + client = xe_drm_client_alloc(); + if (!client) { + kfree(xef); + return ret; + } + + xef->drm = file; + xef->client = client; + xef->xe = xe; + + mutex_init(&xef->vm.lock); + xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1); + + mutex_init(&xef->exec_queue.lock); + xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1); + + spin_lock(&xe->clients.lock); + xe->clients.count++; + spin_unlock(&xe->clients.lock); + + file->driver_priv = xef; + return 0; +} + +static void device_kill_persistent_exec_queues(struct xe_device *xe, + struct xe_file *xef); + +static void xe_file_close(struct drm_device *dev, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = file->driver_priv; + struct xe_vm *vm; + struct xe_exec_queue *q; + unsigned long idx; + + mutex_lock(&xef->exec_queue.lock); + xa_for_each(&xef->exec_queue.xa, idx, q) { + xe_exec_queue_kill(q); + xe_exec_queue_put(q); + } + mutex_unlock(&xef->exec_queue.lock); + xa_destroy(&xef->exec_queue.xa); + mutex_destroy(&xef->exec_queue.lock); + device_kill_persistent_exec_queues(xe, xef); + + mutex_lock(&xef->vm.lock); + xa_for_each(&xef->vm.xa, idx, vm) + xe_vm_close_and_put(vm); + mutex_unlock(&xef->vm.lock); + xa_destroy(&xef->vm.xa); + mutex_destroy(&xef->vm.lock); + + spin_lock(&xe->clients.lock); + xe->clients.count--; + spin_unlock(&xe->clients.lock); + + xe_drm_client_put(xef->client); + kfree(xef); +} + +static const struct drm_ioctl_desc xe_ioctls[] = { + DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, + DRM_RENDER_ALLOW), +}; + +static const struct file_operations xe_driver_fops = { + .owner = THIS_MODULE, + .open = drm_open, + .release = drm_release_noglobal, + .unlocked_ioctl = drm_ioctl, + .mmap = drm_gem_mmap, + .poll = drm_poll, + .read = drm_read, + .compat_ioctl = drm_compat_ioctl, + .llseek = noop_llseek, +#ifdef CONFIG_PROC_FS + .show_fdinfo = drm_show_fdinfo, +#endif +}; + +static void xe_driver_release(struct drm_device *dev) +{ + struct xe_device *xe = to_xe_device(dev); + + pci_set_drvdata(to_pci_dev(xe->drm.dev), NULL); +} + +static struct drm_driver driver = { + /* Don't use MTRRs here; the Xserver or userspace app should + * deal with them for Intel hardware. + */ + .driver_features = + DRIVER_GEM | + DRIVER_RENDER | DRIVER_SYNCOBJ | + DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA, + .open = xe_file_open, + .postclose = xe_file_close, + + .gem_prime_import = xe_gem_prime_import, + + .dumb_create = xe_bo_dumb_create, + .dumb_map_offset = drm_gem_ttm_dumb_map_offset, +#ifdef CONFIG_PROC_FS + .show_fdinfo = xe_drm_client_fdinfo, +#endif + .release = &xe_driver_release, + + .ioctls = xe_ioctls, + .num_ioctls = ARRAY_SIZE(xe_ioctls), + .fops = &xe_driver_fops, + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, + .patchlevel = DRIVER_PATCHLEVEL, +}; + +static void xe_device_destroy(struct drm_device *dev, void *dummy) +{ + struct xe_device *xe = to_xe_device(dev); + + if (xe->ordered_wq) + destroy_workqueue(xe->ordered_wq); + + if (xe->unordered_wq) + destroy_workqueue(xe->unordered_wq); + + ttm_device_fini(&xe->ttm); +} + +struct xe_device *xe_device_create(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct xe_device *xe; + int err; + + xe_display_driver_set_hooks(&driver); + + err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver); + if (err) + return ERR_PTR(err); + + xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm); + if (IS_ERR(xe)) + return xe; + + err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev, + xe->drm.anon_inode->i_mapping, + xe->drm.vma_offset_manager, false, false); + if (WARN_ON(err)) + goto err; + + err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL); + if (err) + goto err; + + xe->info.devid = pdev->device; + xe->info.revid = pdev->revision; + xe->info.force_execlist = xe_modparam.force_execlist; + + spin_lock_init(&xe->irq.lock); + spin_lock_init(&xe->clients.lock); + + init_waitqueue_head(&xe->ufence_wq); + + drmm_mutex_init(&xe->drm, &xe->usm.lock); + xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC); + + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { + /* Trigger a large asid and an early asid wrap. */ + u32 asid; + + BUILD_BUG_ON(XE_MAX_ASID < 2); + err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, NULL, + XA_LIMIT(XE_MAX_ASID - 2, XE_MAX_ASID - 1), + &xe->usm.next_asid, GFP_KERNEL); + drm_WARN_ON(&xe->drm, err); + if (err >= 0) + xa_erase(&xe->usm.asid_to_vm, asid); + } + + drmm_mutex_init(&xe->drm, &xe->persistent_engines.lock); + INIT_LIST_HEAD(&xe->persistent_engines.list); + + spin_lock_init(&xe->pinned.lock); + INIT_LIST_HEAD(&xe->pinned.kernel_bo_present); + INIT_LIST_HEAD(&xe->pinned.external_vram); + INIT_LIST_HEAD(&xe->pinned.evicted); + + xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); + xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); + if (!xe->ordered_wq || !xe->unordered_wq) { + drm_err(&xe->drm, "Failed to allocate xe workqueues\n"); + err = -ENOMEM; + goto err; + } + + err = xe_display_create(xe); + if (WARN_ON(err)) + goto err; + + return xe; + +err: + return ERR_PTR(err); +} + +/* + * The driver-initiated FLR is the highest level of reset that we can trigger + * from within the driver. It is different from the PCI FLR in that it doesn't + * fully reset the SGUnit and doesn't modify the PCI config space and therefore + * it doesn't require a re-enumeration of the PCI BARs. However, the + * driver-initiated FLR does still cause a reset of both GT and display and a + * memory wipe of local and stolen memory, so recovery would require a full HW + * re-init and saving/restoring (or re-populating) the wiped memory. Since we + * perform the FLR as the very last action before releasing access to the HW + * during the driver release flow, we don't attempt recovery at all, because + * if/when a new instance of i915 is bound to the device it will do a full + * re-init anyway. + */ +static void xe_driver_flr(struct xe_device *xe) +{ + const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */ + struct xe_gt *gt = xe_root_mmio_gt(xe); + int ret; + + if (xe_mmio_read32(gt, GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) { + drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n"); + return; + } + + drm_dbg(&xe->drm, "Triggering Driver-FLR\n"); + + /* + * Make sure any pending FLR requests have cleared by waiting for the + * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS + * to make sure it's not still set from a prior attempt (it's a write to + * clear bit). + * Note that we should never be in a situation where a previous attempt + * is still pending (unless the HW is totally dead), but better to be + * safe in case something unexpected happens + */ + ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false); + if (ret) { + drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret); + return; + } + xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS); + + /* Trigger the actual Driver-FLR */ + xe_mmio_rmw32(gt, GU_CNTL, 0, DRIVERFLR); + + /* Wait for hardware teardown to complete */ + ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false); + if (ret) { + drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret); + return; + } + + /* Wait for hardware/firmware re-init to complete */ + ret = xe_mmio_wait32(gt, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS, + flr_timeout, NULL, false); + if (ret) { + drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret); + return; + } + + /* Clear sticky completion status */ + xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS); +} + +static void xe_driver_flr_fini(struct drm_device *drm, void *arg) +{ + struct xe_device *xe = arg; + + if (xe->needs_flr_on_fini) + xe_driver_flr(xe); +} + +static void xe_device_sanitize(struct drm_device *drm, void *arg) +{ + struct xe_device *xe = arg; + struct xe_gt *gt; + u8 id; + + for_each_gt(gt, xe, id) + xe_gt_sanitize(gt); +} + +static int xe_set_dma_info(struct xe_device *xe) +{ + unsigned int mask_size = xe->info.dma_mask_size; + int err; + + dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev)); + + err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); + if (err) + goto mask_err; + + err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); + if (err) + goto mask_err; + + return 0; + +mask_err: + drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err); + return err; +} + +/* + * Initialize MMIO resources that don't require any knowledge about tile count. + */ +int xe_device_probe_early(struct xe_device *xe) +{ + int err; + + err = xe_mmio_init(xe); + if (err) + return err; + + err = xe_mmio_root_tile_init(xe); + if (err) + return err; + + return 0; +} + +static int xe_device_set_has_flat_ccs(struct xe_device *xe) +{ + u32 reg; + int err; + + if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs) + return 0; + + struct xe_gt *gt = xe_root_mmio_gt(xe); + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return err; + + reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER); + xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE); + + if (!xe->info.has_flat_ccs) + drm_dbg(&xe->drm, + "Flat CCS has been disabled in bios, May lead to performance impact"); + + return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} + +int xe_device_probe(struct xe_device *xe) +{ + struct xe_tile *tile; + struct xe_gt *gt; + int err; + u8 id; + + xe_pat_init_early(xe); + + xe->info.mem_region_mask = 1; + err = xe_display_init_nommio(xe); + if (err) + return err; + + err = xe_set_dma_info(xe); + if (err) + return err; + + xe_mmio_probe_tiles(xe); + + xe_ttm_sys_mgr_init(xe); + + for_each_gt(gt, xe, id) + xe_force_wake_init_gt(gt, gt_to_fw(gt)); + + for_each_tile(tile, xe, id) { + err = xe_ggtt_init_early(tile->mem.ggtt); + if (err) + return err; + } + + err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe); + if (err) + return err; + + for_each_gt(gt, xe, id) { + err = xe_pcode_probe(gt); + if (err) + return err; + } + + err = xe_display_init_noirq(xe); + if (err) + return err; + + err = xe_irq_install(xe); + if (err) + goto err; + + for_each_gt(gt, xe, id) { + err = xe_gt_init_early(gt); + if (err) + goto err_irq_shutdown; + } + + err = xe_device_set_has_flat_ccs(xe); + if (err) + return err; + + err = xe_mmio_probe_vram(xe); + if (err) + goto err_irq_shutdown; + + for_each_tile(tile, xe, id) { + err = xe_tile_init_noalloc(tile); + if (err) + goto err_irq_shutdown; + } + + /* Allocate and map stolen after potential VRAM resize */ + xe_ttm_stolen_mgr_init(xe); + + /* + * Now that GT is initialized (TTM in particular), + * we can try to init display, and inherit the initial fb. + * This is the reason the first allocation needs to be done + * inside display. + */ + err = xe_display_init_noaccel(xe); + if (err) + goto err_irq_shutdown; + + for_each_gt(gt, xe, id) { + err = xe_gt_init(gt); + if (err) + goto err_irq_shutdown; + } + + xe_heci_gsc_init(xe); + + err = xe_display_init(xe); + if (err) + goto err_irq_shutdown; + + err = drm_dev_register(&xe->drm, 0); + if (err) + goto err_fini_display; + + xe_display_register(xe); + + xe_debugfs_register(xe); + + xe_hwmon_register(xe); + + err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe); + if (err) + return err; + + return 0; + +err_fini_display: + xe_display_driver_remove(xe); + +err_irq_shutdown: + xe_irq_shutdown(xe); +err: + xe_display_fini(xe); + return err; +} + +static void xe_device_remove_display(struct xe_device *xe) +{ + xe_display_unregister(xe); + + drm_dev_unplug(&xe->drm); + xe_display_driver_remove(xe); +} + +void xe_device_remove(struct xe_device *xe) +{ + xe_device_remove_display(xe); + + xe_display_fini(xe); + + xe_heci_gsc_fini(xe); + + xe_irq_shutdown(xe); +} + +void xe_device_shutdown(struct xe_device *xe) +{ +} + +void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q) +{ + mutex_lock(&xe->persistent_engines.lock); + list_add_tail(&q->persistent.link, &xe->persistent_engines.list); + mutex_unlock(&xe->persistent_engines.lock); +} + +void xe_device_remove_persistent_exec_queues(struct xe_device *xe, + struct xe_exec_queue *q) +{ + mutex_lock(&xe->persistent_engines.lock); + if (!list_empty(&q->persistent.link)) + list_del(&q->persistent.link); + mutex_unlock(&xe->persistent_engines.lock); +} + +static void device_kill_persistent_exec_queues(struct xe_device *xe, + struct xe_file *xef) +{ + struct xe_exec_queue *q, *next; + + mutex_lock(&xe->persistent_engines.lock); + list_for_each_entry_safe(q, next, &xe->persistent_engines.list, + persistent.link) + if (q->persistent.xef == xef) { + xe_exec_queue_kill(q); + list_del_init(&q->persistent.link); + } + mutex_unlock(&xe->persistent_engines.lock); +} + +void xe_device_wmb(struct xe_device *xe) +{ + struct xe_gt *gt = xe_root_mmio_gt(xe); + + wmb(); + if (IS_DGFX(xe)) + xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33, 0); +} + +u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) +{ + return xe_device_has_flat_ccs(xe) ? + DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0; +} + +bool xe_device_mem_access_ongoing(struct xe_device *xe) +{ + if (xe_pm_read_callback_task(xe) != NULL) + return true; + + return atomic_read(&xe->mem_access.ref); +} + +void xe_device_assert_mem_access(struct xe_device *xe) +{ + XE_WARN_ON(!xe_device_mem_access_ongoing(xe)); +} + +bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe) +{ + bool active; + + if (xe_pm_read_callback_task(xe) == current) + return true; + + active = xe_pm_runtime_get_if_active(xe); + if (active) { + int ref = atomic_inc_return(&xe->mem_access.ref); + + xe_assert(xe, ref != S32_MAX); + } + + return active; +} + +void xe_device_mem_access_get(struct xe_device *xe) +{ + int ref; + + /* + * This looks racy, but should be fine since the pm_callback_task only + * transitions from NULL -> current (and back to NULL again), during the + * runtime_resume() or runtime_suspend() callbacks, for which there can + * only be a single one running for our device. We only need to prevent + * recursively calling the runtime_get or runtime_put from those + * callbacks, as well as preventing triggering any access_ongoing + * asserts. + */ + if (xe_pm_read_callback_task(xe) == current) + return; + + /* + * Since the resume here is synchronous it can be quite easy to deadlock + * if we are not careful. Also in practice it might be quite timing + * sensitive to ever see the 0 -> 1 transition with the callers locks + * held, so deadlocks might exist but are hard for lockdep to ever see. + * With this in mind, help lockdep learn about the potentially scary + * stuff that can happen inside the runtime_resume callback by acquiring + * a dummy lock (it doesn't protect anything and gets compiled out on + * non-debug builds). Lockdep then only needs to see the + * mem_access_lockdep_map -> runtime_resume callback once, and then can + * hopefully validate all the (callers_locks) -> mem_access_lockdep_map. + * For example if the (callers_locks) are ever grabbed in the + * runtime_resume callback, lockdep should give us a nice splat. + */ + lock_map_acquire(&xe_device_mem_access_lockdep_map); + lock_map_release(&xe_device_mem_access_lockdep_map); + + xe_pm_runtime_get(xe); + ref = atomic_inc_return(&xe->mem_access.ref); + + xe_assert(xe, ref != S32_MAX); + +} + +void xe_device_mem_access_put(struct xe_device *xe) +{ + int ref; + + if (xe_pm_read_callback_task(xe) == current) + return; + + ref = atomic_dec_return(&xe->mem_access.ref); + xe_pm_runtime_put(xe); + + xe_assert(xe, ref >= 0); +} diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h new file mode 100644 index 000000000000..3da83b233206 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_device.h @@ -0,0 +1,173 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_DEVICE_H_ +#define _XE_DEVICE_H_ + +struct xe_exec_queue; +struct xe_file; + +#include <drm/drm_util.h> + +#include "regs/xe_gpu_commands.h" +#include "xe_device_types.h" +#include "xe_force_wake.h" +#include "xe_macros.h" + +#ifdef CONFIG_LOCKDEP +extern struct lockdep_map xe_device_mem_access_lockdep_map; +#endif + +static inline struct xe_device *to_xe_device(const struct drm_device *dev) +{ + return container_of(dev, struct xe_device, drm); +} + +static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev) +{ + return pci_get_drvdata(pdev); +} + +static inline struct xe_device *ttm_to_xe_device(struct ttm_device *ttm) +{ + return container_of(ttm, struct xe_device, ttm); +} + +struct xe_device *xe_device_create(struct pci_dev *pdev, + const struct pci_device_id *ent); +int xe_device_probe_early(struct xe_device *xe); +int xe_device_probe(struct xe_device *xe); +void xe_device_remove(struct xe_device *xe); +void xe_device_shutdown(struct xe_device *xe); + +void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q); +void xe_device_remove_persistent_exec_queues(struct xe_device *xe, + struct xe_exec_queue *q); + +void xe_device_wmb(struct xe_device *xe); + +static inline struct xe_file *to_xe_file(const struct drm_file *file) +{ + return file->driver_priv; +} + +static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe) +{ + return &xe->tiles[0]; +} + +#define XE_MAX_GT_PER_TILE 2 + +static inline struct xe_gt *xe_tile_get_gt(struct xe_tile *tile, u8 gt_id) +{ + if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id > XE_MAX_GT_PER_TILE)) + gt_id = 0; + + return gt_id ? tile->media_gt : tile->primary_gt; +} + +static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) +{ + struct xe_tile *root_tile = xe_device_get_root_tile(xe); + struct xe_gt *gt; + + /* + * FIXME: This only works for now because multi-tile and standalone + * media are mutually exclusive on the platforms we have today. + * + * id => GT mapping may change once we settle on how we want to handle + * our UAPI. + */ + if (MEDIA_VER(xe) >= 13) { + gt = xe_tile_get_gt(root_tile, gt_id); + } else { + if (drm_WARN_ON(&xe->drm, gt_id > XE_MAX_TILES_PER_DEVICE)) + gt_id = 0; + + gt = xe->tiles[gt_id].primary_gt; + } + + if (!gt) + return NULL; + + drm_WARN_ON(&xe->drm, gt->info.id != gt_id); + drm_WARN_ON(&xe->drm, gt->info.type == XE_GT_TYPE_UNINITIALIZED); + + return gt; +} + +/* + * Provide a GT structure suitable for performing non-GT MMIO operations against + * the primary tile. Primarily intended for early tile initialization, display + * handling, top-most interrupt enable/disable, etc. Since anything using the + * MMIO handle returned by this function doesn't need GSI offset translation, + * we'll return the primary GT from the root tile. + * + * FIXME: Fix the driver design so that 'gt' isn't the target of all MMIO + * operations. + * + * Returns the primary gt of the root tile. + */ +static inline struct xe_gt *xe_root_mmio_gt(struct xe_device *xe) +{ + return xe_device_get_root_tile(xe)->primary_gt; +} + +static inline bool xe_device_uc_enabled(struct xe_device *xe) +{ + return !xe->info.force_execlist; +} + +#define for_each_tile(tile__, xe__, id__) \ + for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__)++) \ + for_each_if((tile__) = &(xe__)->tiles[(id__)]) + +#define for_each_remote_tile(tile__, xe__, id__) \ + for ((id__) = 1; (id__) < (xe__)->info.tile_count; (id__)++) \ + for_each_if((tile__) = &(xe__)->tiles[(id__)]) + +/* + * FIXME: This only works for now since multi-tile and standalone media + * happen to be mutually exclusive. Future platforms may change this... + */ +#define for_each_gt(gt__, xe__, id__) \ + for ((id__) = 0; (id__) < (xe__)->info.gt_count; (id__)++) \ + for_each_if((gt__) = xe_device_get_gt((xe__), (id__))) + +static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt) +{ + return >->mmio.fw; +} + +void xe_device_mem_access_get(struct xe_device *xe); +bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe); +void xe_device_mem_access_put(struct xe_device *xe); + +void xe_device_assert_mem_access(struct xe_device *xe); +bool xe_device_mem_access_ongoing(struct xe_device *xe); + +static inline bool xe_device_in_fault_mode(struct xe_device *xe) +{ + return xe->usm.num_vm_in_fault_mode != 0; +} + +static inline bool xe_device_in_non_fault_mode(struct xe_device *xe) +{ + return xe->usm.num_vm_in_non_fault_mode != 0; +} + +static inline bool xe_device_has_flat_ccs(struct xe_device *xe) +{ + return xe->info.has_flat_ccs; +} + +static inline bool xe_device_has_sriov(struct xe_device *xe) +{ + return xe->info.has_sriov; +} + +u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size); + +#endif diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c new file mode 100644 index 000000000000..99113a5a2b84 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <linux/kobject.h> +#include <linux/pci.h> +#include <linux/sysfs.h> + +#include <drm/drm_managed.h> + +#include "xe_device.h" +#include "xe_device_sysfs.h" +#include "xe_pm.h" + +/** + * DOC: Xe device sysfs + * Xe driver requires exposing certain tunable knobs controlled by user space for + * each graphics device. Considering this, we need to add sysfs attributes at device + * level granularity. + * These sysfs attributes will be available under pci device kobj directory. + * + * vram_d3cold_threshold - Report/change vram used threshold(in MB) below + * which vram save/restore is permissible during runtime D3cold entry/exit. + */ + +static ssize_t +vram_d3cold_threshold_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); + int ret; + + if (!xe) + return -EINVAL; + + ret = sysfs_emit(buf, "%d\n", xe->d3cold.vram_threshold); + + return ret; +} + +static ssize_t +vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr, + const char *buff, size_t count) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); + u32 vram_d3cold_threshold; + int ret; + + if (!xe) + return -EINVAL; + + ret = kstrtou32(buff, 0, &vram_d3cold_threshold); + if (ret) + return ret; + + drm_dbg(&xe->drm, "vram_d3cold_threshold: %u\n", vram_d3cold_threshold); + + ret = xe_pm_set_vram_threshold(xe, vram_d3cold_threshold); + + return ret ?: count; +} + +static DEVICE_ATTR_RW(vram_d3cold_threshold); + +static void xe_device_sysfs_fini(struct drm_device *drm, void *arg) +{ + struct xe_device *xe = arg; + + sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr); +} + +void xe_device_sysfs_init(struct xe_device *xe) +{ + struct device *dev = xe->drm.dev; + int ret; + + ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr); + if (ret) { + drm_warn(&xe->drm, "Failed to create sysfs file\n"); + return; + } + + ret = drmm_add_action_or_reset(&xe->drm, xe_device_sysfs_fini, xe); + if (ret) + drm_warn(&xe->drm, "Failed to add sysfs fini drm action\n"); +} diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.h b/drivers/gpu/drm/xe/xe_device_sysfs.h new file mode 100644 index 000000000000..38b240684bee --- /dev/null +++ b/drivers/gpu/drm/xe/xe_device_sysfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_DEVICE_SYSFS_H_ +#define _XE_DEVICE_SYSFS_H_ + +struct xe_device; + +void xe_device_sysfs_init(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h new file mode 100644 index 000000000000..c45ef17b3473 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -0,0 +1,545 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022-2023 Intel Corporation + */ + +#ifndef _XE_DEVICE_TYPES_H_ +#define _XE_DEVICE_TYPES_H_ + +#include <linux/pci.h> + +#include <drm/drm_device.h> +#include <drm/drm_file.h> +#include <drm/ttm/ttm_device.h> + +#include "xe_devcoredump_types.h" +#include "xe_heci_gsc.h" +#include "xe_gt_types.h" +#include "xe_lmtt_types.h" +#include "xe_platform_types.h" +#include "xe_pt_types.h" +#include "xe_sriov_types.h" +#include "xe_step_types.h" + +#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) +#include "soc/intel_pch.h" +#include "intel_display_core.h" +#include "intel_display_device.h" +#endif + +struct xe_ggtt; +struct xe_pat_ops; + +#define XE_BO_INVALID_OFFSET LONG_MAX + +#define GRAPHICS_VER(xe) ((xe)->info.graphics_verx100 / 100) +#define MEDIA_VER(xe) ((xe)->info.media_verx100 / 100) +#define GRAPHICS_VERx100(xe) ((xe)->info.graphics_verx100) +#define MEDIA_VERx100(xe) ((xe)->info.media_verx100) +#define IS_DGFX(xe) ((xe)->info.is_dgfx) +#define HAS_HECI_GSCFI(xe) ((xe)->info.has_heci_gscfi) + +#define XE_VRAM_FLAGS_NEED64K BIT(0) + +#define XE_GT0 0 +#define XE_GT1 1 +#define XE_MAX_TILES_PER_DEVICE (XE_GT1 + 1) + +#define XE_MAX_ASID (BIT(20)) + +#define IS_PLATFORM_STEP(_xe, _platform, min_step, max_step) \ + ((_xe)->info.platform == (_platform) && \ + (_xe)->info.step.graphics >= (min_step) && \ + (_xe)->info.step.graphics < (max_step)) +#define IS_SUBPLATFORM_STEP(_xe, _platform, sub, min_step, max_step) \ + ((_xe)->info.platform == (_platform) && \ + (_xe)->info.subplatform == (sub) && \ + (_xe)->info.step.graphics >= (min_step) && \ + (_xe)->info.step.graphics < (max_step)) + +#define tile_to_xe(tile__) \ + _Generic(tile__, \ + const struct xe_tile * : (const struct xe_device *)((tile__)->xe), \ + struct xe_tile * : (tile__)->xe) + +/** + * struct xe_mem_region - memory region structure + * This is used to describe a memory region in xe + * device, such as HBM memory or CXL extension memory. + */ +struct xe_mem_region { + /** @io_start: IO start address of this VRAM instance */ + resource_size_t io_start; + /** + * @io_size: IO size of this VRAM instance + * + * This represents how much of this VRAM we can access + * via the CPU through the VRAM BAR. This can be smaller + * than @usable_size, in which case only part of VRAM is CPU + * accessible (typically the first 256M). This + * configuration is known as small-bar. + */ + resource_size_t io_size; + /** @dpa_base: This memory regions's DPA (device physical address) base */ + resource_size_t dpa_base; + /** + * @usable_size: usable size of VRAM + * + * Usable size of VRAM excluding reserved portions + * (e.g stolen mem) + */ + resource_size_t usable_size; + /** + * @actual_physical_size: Actual VRAM size + * + * Actual VRAM size including reserved portions + * (e.g stolen mem) + */ + resource_size_t actual_physical_size; + /** @mapping: pointer to VRAM mappable space */ + void *__iomem mapping; +}; + +/** + * struct xe_tile - hardware tile structure + * + * From a driver perspective, a "tile" is effectively a complete GPU, containing + * an SGunit, 1-2 GTs, and (for discrete platforms) VRAM. + * + * Multi-tile platforms effectively bundle multiple GPUs behind a single PCI + * device and designate one "root" tile as being responsible for external PCI + * communication. PCI BAR0 exposes the GGTT and MMIO register space for each + * tile in a stacked layout, and PCI BAR2 exposes the local memory associated + * with each tile similarly. Device-wide interrupts can be enabled/disabled + * at the root tile, and the MSTR_TILE_INTR register will report which tiles + * have interrupts that need servicing. + */ +struct xe_tile { + /** @xe: Backpointer to tile's PCI device */ + struct xe_device *xe; + + /** @id: ID of the tile */ + u8 id; + + /** + * @primary_gt: Primary GT + */ + struct xe_gt *primary_gt; + + /** + * @media_gt: Media GT + * + * Only present on devices with media version >= 13. + */ + struct xe_gt *media_gt; + + /** + * @mmio: MMIO info for a tile. + * + * Each tile has its own 16MB space in BAR0, laid out as: + * * 0-4MB: registers + * * 4MB-8MB: reserved + * * 8MB-16MB: global GTT + */ + struct { + /** @size: size of tile's MMIO space */ + size_t size; + + /** @regs: pointer to tile's MMIO space (starting with registers) */ + void *regs; + } mmio; + + /** + * @mmio_ext: MMIO-extension info for a tile. + * + * Each tile has its own additional 256MB (28-bit) MMIO-extension space. + */ + struct { + /** @size: size of tile's additional MMIO-extension space */ + size_t size; + + /** @regs: pointer to tile's additional MMIO-extension space */ + void *regs; + } mmio_ext; + + /** @mem: memory management info for tile */ + struct { + /** + * @vram: VRAM info for tile. + * + * Although VRAM is associated with a specific tile, it can + * still be accessed by all tiles' GTs. + */ + struct xe_mem_region vram; + + /** @vram_mgr: VRAM TTM manager */ + struct xe_ttm_vram_mgr *vram_mgr; + + /** @ggtt: Global graphics translation table */ + struct xe_ggtt *ggtt; + + /** + * @kernel_bb_pool: Pool from which batchbuffers are allocated. + * + * Media GT shares a pool with its primary GT. + */ + struct xe_sa_manager *kernel_bb_pool; + } mem; + + /** @sriov: tile level virtualization data */ + union { + struct { + /** @sriov.pf.lmtt: Local Memory Translation Table. */ + struct xe_lmtt lmtt; + } pf; + } sriov; + + /** @migrate: Migration helper for vram blits and clearing */ + struct xe_migrate *migrate; + + /** @sysfs: sysfs' kobj used by xe_tile_sysfs */ + struct kobject *sysfs; +}; + +/** + * struct xe_device - Top level struct of XE device + */ +struct xe_device { + /** @drm: drm device */ + struct drm_device drm; + + /** @devcoredump: device coredump */ + struct xe_devcoredump devcoredump; + + /** @info: device info */ + struct intel_device_info { + /** @graphics_name: graphics IP name */ + const char *graphics_name; + /** @media_name: media IP name */ + const char *media_name; + /** @tile_mmio_ext_size: size of MMIO extension space, per-tile */ + u32 tile_mmio_ext_size; + /** @graphics_verx100: graphics IP version */ + u32 graphics_verx100; + /** @media_verx100: media IP version */ + u32 media_verx100; + /** @mem_region_mask: mask of valid memory regions */ + u32 mem_region_mask; + /** @platform: XE platform enum */ + enum xe_platform platform; + /** @subplatform: XE subplatform enum */ + enum xe_subplatform subplatform; + /** @devid: device ID */ + u16 devid; + /** @revid: device revision */ + u8 revid; + /** @step: stepping information for each IP */ + struct xe_step_info step; + /** @dma_mask_size: DMA address bits */ + u8 dma_mask_size; + /** @vram_flags: Vram flags */ + u8 vram_flags; + /** @tile_count: Number of tiles */ + u8 tile_count; + /** @gt_count: Total number of GTs for entire device */ + u8 gt_count; + /** @vm_max_level: Max VM level */ + u8 vm_max_level; + /** @va_bits: Maximum bits of a virtual address */ + u8 va_bits; + + /** @is_dgfx: is discrete device */ + u8 is_dgfx:1; + /** @has_asid: Has address space ID */ + u8 has_asid:1; + /** @force_execlist: Forced execlist submission */ + u8 force_execlist:1; + /** @has_flat_ccs: Whether flat CCS metadata is used */ + u8 has_flat_ccs:1; + /** @has_llc: Device has a shared CPU+GPU last level cache */ + u8 has_llc:1; + /** @has_mmio_ext: Device has extra MMIO address range */ + u8 has_mmio_ext:1; + /** @has_range_tlb_invalidation: Has range based TLB invalidations */ + u8 has_range_tlb_invalidation:1; + /** @has_sriov: Supports SR-IOV */ + u8 has_sriov:1; + /** @has_usm: Device has unified shared memory support */ + u8 has_usm:1; + /** @enable_display: display enabled */ + u8 enable_display:1; + /** @skip_mtcfg: skip Multi-Tile configuration from MTCFG register */ + u8 skip_mtcfg:1; + /** @skip_pcode: skip access to PCODE uC */ + u8 skip_pcode:1; + /** @has_heci_gscfi: device has heci gscfi */ + u8 has_heci_gscfi:1; + /** @skip_guc_pc: Skip GuC based PM feature init */ + u8 skip_guc_pc:1; + +#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) + struct { + u32 rawclk_freq; + } i915_runtime; +#endif + } info; + + /** @irq: device interrupt state */ + struct { + /** @lock: lock for processing irq's on this device */ + spinlock_t lock; + + /** @enabled: interrupts enabled on this device */ + bool enabled; + } irq; + + /** @ttm: ttm device */ + struct ttm_device ttm; + + /** @mmio: mmio info for device */ + struct { + /** @size: size of MMIO space for device */ + size_t size; + /** @regs: pointer to MMIO space for device */ + void *regs; + } mmio; + + /** @mem: memory info for device */ + struct { + /** @vram: VRAM info for device */ + struct xe_mem_region vram; + /** @sys_mgr: system TTM manager */ + struct ttm_resource_manager sys_mgr; + } mem; + + /** @sriov: device level virtualization data */ + struct { + /** @sriov.__mode: SR-IOV mode (Don't access directly!) */ + enum xe_sriov_mode __mode; + } sriov; + + /** @clients: drm clients info */ + struct { + /** @lock: Protects drm clients info */ + spinlock_t lock; + + /** @count: number of drm clients */ + u64 count; + } clients; + + /** @usm: unified memory state */ + struct { + /** @asid: convert a ASID to VM */ + struct xarray asid_to_vm; + /** @next_asid: next ASID, used to cyclical alloc asids */ + u32 next_asid; + /** @num_vm_in_fault_mode: number of VM in fault mode */ + u32 num_vm_in_fault_mode; + /** @num_vm_in_non_fault_mode: number of VM in non-fault mode */ + u32 num_vm_in_non_fault_mode; + /** @lock: protects UM state */ + struct mutex lock; + } usm; + + /** @persistent_engines: engines that are closed but still running */ + struct { + /** @lock: protects persistent engines */ + struct mutex lock; + /** @list: list of persistent engines */ + struct list_head list; + } persistent_engines; + + /** @pinned: pinned BO state */ + struct { + /** @lock: protected pinned BO list state */ + spinlock_t lock; + /** @evicted: pinned kernel BO that are present */ + struct list_head kernel_bo_present; + /** @evicted: pinned BO that have been evicted */ + struct list_head evicted; + /** @external_vram: pinned external BO in vram*/ + struct list_head external_vram; + } pinned; + + /** @ufence_wq: user fence wait queue */ + wait_queue_head_t ufence_wq; + + /** @ordered_wq: used to serialize compute mode resume */ + struct workqueue_struct *ordered_wq; + + /** @unordered_wq: used to serialize unordered work, mostly display */ + struct workqueue_struct *unordered_wq; + + /** @tiles: device tiles */ + struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE]; + + /** + * @mem_access: keep track of memory access in the device, possibly + * triggering additional actions when they occur. + */ + struct { + /** @ref: ref count of memory accesses */ + atomic_t ref; + } mem_access; + + /** + * @pat: Encapsulate PAT related stuff + */ + struct { + /** Internal operations to abstract platforms */ + const struct xe_pat_ops *ops; + /** PAT table to program in the HW */ + const struct xe_pat_table_entry *table; + /** Number of PAT entries */ + int n_entries; + u32 idx[__XE_CACHE_LEVEL_COUNT]; + } pat; + + /** @d3cold: Encapsulate d3cold related stuff */ + struct { + /** capable: Indicates if root port is d3cold capable */ + bool capable; + + /** @allowed: Indicates if d3cold is a valid device state */ + bool allowed; + + /** @power_lost: Indicates if card has really lost power. */ + bool power_lost; + + /** + * @vram_threshold: + * + * This represents the permissible threshold(in megabytes) + * for vram save/restore. d3cold will be disallowed, + * when vram_usages is above or equals the threshold value + * to avoid the vram save/restore latency. + * Default threshold value is 300mb. + */ + u32 vram_threshold; + /** @lock: protect vram_threshold */ + struct mutex lock; + } d3cold; + + /** + * @pm_callback_task: Track the active task that is running in either + * the runtime_suspend or runtime_resume callbacks. + */ + struct task_struct *pm_callback_task; + + /** @hwmon: hwmon subsystem integration */ + struct xe_hwmon *hwmon; + + /** @heci_gsc: graphics security controller */ + struct xe_heci_gsc heci_gsc; + + /** @needs_flr_on_fini: requests function-reset on fini */ + bool needs_flr_on_fini; + + /* private: */ + +#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) + /* + * Any fields below this point are the ones used by display. + * They are temporarily added here so xe_device can be desguised as + * drm_i915_private during build. After cleanup these should go away, + * migrating to the right sub-structs + */ + struct intel_display display; + enum intel_pch pch_type; + u16 pch_id; + + struct dram_info { + bool wm_lv_0_adjust_needed; + u8 num_channels; + bool symmetric_memory; + enum intel_dram_type { + INTEL_DRAM_UNKNOWN, + INTEL_DRAM_DDR3, + INTEL_DRAM_DDR4, + INTEL_DRAM_LPDDR3, + INTEL_DRAM_LPDDR4, + INTEL_DRAM_DDR5, + INTEL_DRAM_LPDDR5, + } type; + u8 num_qgv_points; + u8 num_psf_gv_points; + } dram_info; + + /* + * edram size in MB. + * Cannot be determined by PCIID. You must always read a register. + */ + u32 edram_size_mb; + + /* To shut up runtime pm macros.. */ + struct xe_runtime_pm {} runtime_pm; + + /* For pcode */ + struct mutex sb_lock; + + /* Should be in struct intel_display */ + u32 skl_preferred_vco_freq, max_dotclk_freq, hti_state; + u8 snps_phy_failed_calibration; + struct drm_atomic_state *modeset_restore_state; + struct list_head global_obj_list; + + union { + /* only to allow build, not used functionally */ + u32 irq_mask; + u32 de_irq_mask[I915_MAX_PIPES]; + }; + u32 pipestat_irq_mask[I915_MAX_PIPES]; + + bool display_irqs_enabled; + u32 enabled_irq_mask; + + struct intel_uncore { + spinlock_t lock; + } uncore; + + /* only to allow build, not used functionally */ + struct { + unsigned int hpll_freq; + unsigned int czclk_freq; + unsigned int fsb_freq, mem_freq, is_ddr3; + u8 vblank_enabled; + }; + struct { + const char *dmc_firmware_path; + } params; + + void *pxp; +#endif +}; + +/** + * struct xe_file - file handle for XE driver + */ +struct xe_file { + /** @xe: xe DEVICE **/ + struct xe_device *xe; + + /** @drm: base DRM file */ + struct drm_file *drm; + + /** @vm: VM state for file */ + struct { + /** @xe: xarray to store VMs */ + struct xarray xa; + /** @lock: protects file VM state */ + struct mutex lock; + } vm; + + /** @exec_queue: Submission exec queue state for file */ + struct { + /** @xe: xarray to store engines */ + struct xarray xa; + /** @lock: protects file engine state */ + struct mutex lock; + } exec_queue; + + /** @client: drm client */ + struct xe_drm_client *client; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_display.c b/drivers/gpu/drm/xe/xe_display.c new file mode 100644 index 000000000000..74391d9b11ae --- /dev/null +++ b/drivers/gpu/drm/xe/xe_display.c @@ -0,0 +1,422 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_display.h" +#include "regs/xe_regs.h" + +#include <linux/fb.h> + +#include <drm/drm_drv.h> +#include <drm/drm_managed.h> +#include <drm/xe_drm.h> + +#include "soc/intel_dram.h" +#include "i915_drv.h" /* FIXME: HAS_DISPLAY() depends on this */ +#include "intel_acpi.h" +#include "intel_audio.h" +#include "intel_bw.h" +#include "intel_display.h" +#include "intel_display_driver.h" +#include "intel_display_irq.h" +#include "intel_display_types.h" +#include "intel_dmc.h" +#include "intel_dp.h" +#include "intel_fbdev.h" +#include "intel_hdcp.h" +#include "intel_hotplug.h" +#include "intel_opregion.h" +#include "xe_module.h" + +/* Xe device functions */ + +static bool has_display(struct xe_device *xe) +{ + return HAS_DISPLAY(xe); +} + +/** + * xe_display_driver_probe_defer - Detect if we need to wait for other drivers + * early on + * @pdev: PCI device + * + * Returns: true if probe needs to be deferred, false otherwise + */ +bool xe_display_driver_probe_defer(struct pci_dev *pdev) +{ + if (!xe_modparam.enable_display) + return 0; + + return intel_display_driver_probe_defer(pdev); +} + +static void xe_display_last_close(struct drm_device *dev) +{ + struct xe_device *xe = to_xe_device(dev); + + if (xe->info.enable_display) + intel_fbdev_restore_mode(to_xe_device(dev)); +} + +/** + * xe_display_driver_set_hooks - Add driver flags and hooks for display + * @driver: DRM device driver + * + * Set features and function hooks in @driver that are needed for driving the + * display IP. This sets the driver's capability of driving display, regardless + * if the device has it enabled + */ +void xe_display_driver_set_hooks(struct drm_driver *driver) +{ + if (!xe_modparam.enable_display) + return; + + driver->driver_features |= DRIVER_MODESET | DRIVER_ATOMIC; + driver->lastclose = xe_display_last_close; +} + +static void unset_display_features(struct xe_device *xe) +{ + xe->drm.driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC); +} + +static void display_destroy(struct drm_device *dev, void *dummy) +{ + struct xe_device *xe = to_xe_device(dev); + + destroy_workqueue(xe->display.hotplug.dp_wq); +} + +/** + * xe_display_create - create display struct + * @xe: XE device instance + * + * Initialize all fields used by the display part. + * + * TODO: once everything can be inside a single struct, make the struct opaque + * to the rest of xe and return it to be xe->display. + * + * Returns: 0 on success + */ +int xe_display_create(struct xe_device *xe) +{ + int err; + + spin_lock_init(&xe->display.fb_tracking.lock); + + xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0); + + drmm_mutex_init(&xe->drm, &xe->sb_lock); + drmm_mutex_init(&xe->drm, &xe->display.backlight.lock); + drmm_mutex_init(&xe->drm, &xe->display.audio.mutex); + drmm_mutex_init(&xe->drm, &xe->display.wm.wm_mutex); + drmm_mutex_init(&xe->drm, &xe->display.pps.mutex); + drmm_mutex_init(&xe->drm, &xe->display.hdcp.hdcp_mutex); + xe->enabled_irq_mask = ~0; + + err = drmm_add_action_or_reset(&xe->drm, display_destroy, NULL); + if (err) + return err; + + return 0; +} + +static void xe_display_fini_nommio(struct drm_device *dev, void *dummy) +{ + struct xe_device *xe = to_xe_device(dev); + + if (!xe->info.enable_display) + return; + + intel_power_domains_cleanup(xe); +} + +int xe_display_init_nommio(struct xe_device *xe) +{ + int err; + + if (!xe->info.enable_display) + return 0; + + /* Fake uncore lock */ + spin_lock_init(&xe->uncore.lock); + + /* This must be called before any calls to HAS_PCH_* */ + intel_detect_pch(xe); + + err = intel_power_domains_init(xe); + if (err) + return err; + + return drmm_add_action_or_reset(&xe->drm, xe_display_fini_nommio, xe); +} + +static void xe_display_fini_noirq(struct drm_device *dev, void *dummy) +{ + struct xe_device *xe = to_xe_device(dev); + + if (!xe->info.enable_display) + return; + + intel_display_driver_remove_noirq(xe); + intel_power_domains_driver_remove(xe); +} + +int xe_display_init_noirq(struct xe_device *xe) +{ + int err; + + if (!xe->info.enable_display) + return 0; + + intel_display_driver_early_probe(xe); + + /* Early display init.. */ + intel_opregion_setup(xe); + + /* + * Fill the dram structure to get the system dram info. This will be + * used for memory latency calculation. + */ + intel_dram_detect(xe); + + intel_bw_init_hw(xe); + + intel_display_device_info_runtime_init(xe); + + err = intel_display_driver_probe_noirq(xe); + if (err) + return err; + + return drmm_add_action_or_reset(&xe->drm, xe_display_fini_noirq, NULL); +} + +static void xe_display_fini_noaccel(struct drm_device *dev, void *dummy) +{ + struct xe_device *xe = to_xe_device(dev); + + if (!xe->info.enable_display) + return; + + intel_display_driver_remove_nogem(xe); +} + +int xe_display_init_noaccel(struct xe_device *xe) +{ + int err; + + if (!xe->info.enable_display) + return 0; + + err = intel_display_driver_probe_nogem(xe); + if (err) + return err; + + return drmm_add_action_or_reset(&xe->drm, xe_display_fini_noaccel, NULL); +} + +int xe_display_init(struct xe_device *xe) +{ + if (!xe->info.enable_display) + return 0; + + return intel_display_driver_probe(xe); +} + +void xe_display_fini(struct xe_device *xe) +{ + if (!xe->info.enable_display) + return; + + /* poll work can call into fbdev, hence clean that up afterwards */ + intel_hpd_poll_fini(xe); + intel_fbdev_fini(xe); + + intel_hdcp_component_fini(xe); + intel_audio_deinit(xe); +} + +void xe_display_register(struct xe_device *xe) +{ + if (!xe->info.enable_display) + return; + + intel_display_driver_register(xe); + intel_register_dsm_handler(); + intel_power_domains_enable(xe); +} + +void xe_display_unregister(struct xe_device *xe) +{ + if (!xe->info.enable_display) + return; + + intel_unregister_dsm_handler(); + intel_power_domains_disable(xe); + intel_display_driver_unregister(xe); +} + +void xe_display_driver_remove(struct xe_device *xe) +{ + if (!xe->info.enable_display) + return; + + intel_display_driver_remove(xe); + + intel_display_device_remove(xe); +} + +/* IRQ-related functions */ + +void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) +{ + if (!xe->info.enable_display) + return; + + if (master_ctl & DISPLAY_IRQ) + gen11_display_irq_handler(xe); +} + +void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) +{ + if (!xe->info.enable_display) + return; + + if (gu_misc_iir & GU_MISC_GSE) + intel_opregion_asle_intr(xe); +} + +void xe_display_irq_reset(struct xe_device *xe) +{ + if (!xe->info.enable_display) + return; + + gen11_display_irq_reset(xe); +} + +void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) +{ + if (!xe->info.enable_display) + return; + + if (gt->info.id == XE_GT0) + gen11_de_irq_postinstall(xe); +} + +static void intel_suspend_encoders(struct xe_device *xe) +{ + struct drm_device *dev = &xe->drm; + struct intel_encoder *encoder; + + if (has_display(xe)) + return; + + drm_modeset_lock_all(dev); + for_each_intel_encoder(dev, encoder) + if (encoder->suspend) + encoder->suspend(encoder); + drm_modeset_unlock_all(dev); +} + +static bool suspend_to_idle(void) +{ +#if IS_ENABLED(CONFIG_ACPI_SLEEP) + if (acpi_target_system_state() < ACPI_STATE_S3) + return true; +#endif + return false; +} + +void xe_display_pm_suspend(struct xe_device *xe) +{ + bool s2idle = suspend_to_idle(); + if (!xe->info.enable_display) + return; + + /* + * We do a lot of poking in a lot of registers, make sure they work + * properly. + */ + intel_power_domains_disable(xe); + if (has_display(xe)) + drm_kms_helper_poll_disable(&xe->drm); + + intel_display_driver_suspend(xe); + + intel_dp_mst_suspend(xe); + + intel_hpd_cancel_work(xe); + + intel_suspend_encoders(xe); + + intel_opregion_suspend(xe, s2idle ? PCI_D1 : PCI_D3cold); + + intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); + + intel_dmc_suspend(xe); +} + +void xe_display_pm_suspend_late(struct xe_device *xe) +{ + bool s2idle = suspend_to_idle(); + if (!xe->info.enable_display) + return; + + intel_power_domains_suspend(xe, s2idle); + + intel_display_power_suspend_late(xe); +} + +void xe_display_pm_resume_early(struct xe_device *xe) +{ + if (!xe->info.enable_display) + return; + + intel_display_power_resume_early(xe); + + intel_power_domains_resume(xe); +} + +void xe_display_pm_resume(struct xe_device *xe) +{ + if (!xe->info.enable_display) + return; + + intel_dmc_resume(xe); + + if (has_display(xe)) + drm_mode_config_reset(&xe->drm); + + intel_display_driver_init_hw(xe); + intel_hpd_init(xe); + + /* MST sideband requires HPD interrupts enabled */ + intel_dp_mst_resume(xe); + intel_display_driver_resume(xe); + + intel_hpd_poll_disable(xe); + if (has_display(xe)) + drm_kms_helper_poll_enable(&xe->drm); + + intel_opregion_resume(xe); + + intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false); + + intel_power_domains_enable(xe); +} + +void xe_display_probe(struct xe_device *xe) +{ + if (!xe->info.enable_display) + goto no_display; + + intel_display_device_probe(xe); + + if (has_display(xe)) + return; + +no_display: + xe->info.enable_display = false; + unset_display_features(xe); +} diff --git a/drivers/gpu/drm/xe/xe_display.h b/drivers/gpu/drm/xe/xe_display.h new file mode 100644 index 000000000000..710e56180b52 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_display.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_DISPLAY_H_ +#define _XE_DISPLAY_H_ + +#include "xe_device.h" + +struct drm_driver; + +#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) + +bool xe_display_driver_probe_defer(struct pci_dev *pdev); +void xe_display_driver_set_hooks(struct drm_driver *driver); +void xe_display_driver_remove(struct xe_device *xe); + +int xe_display_create(struct xe_device *xe); + +void xe_display_probe(struct xe_device *xe); + +int xe_display_init_nommio(struct xe_device *xe); +int xe_display_init_noirq(struct xe_device *xe); +int xe_display_init_noaccel(struct xe_device *xe); +int xe_display_init(struct xe_device *xe); +void xe_display_fini(struct xe_device *xe); + +void xe_display_register(struct xe_device *xe); +void xe_display_unregister(struct xe_device *xe); + +void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl); +void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir); +void xe_display_irq_reset(struct xe_device *xe); +void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt); + +void xe_display_pm_suspend(struct xe_device *xe); +void xe_display_pm_suspend_late(struct xe_device *xe); +void xe_display_pm_resume_early(struct xe_device *xe); +void xe_display_pm_resume(struct xe_device *xe); + +#else + +static inline int xe_display_driver_probe_defer(struct pci_dev *pdev) { return 0; } +static inline void xe_display_driver_set_hooks(struct drm_driver *driver) { } +static inline void xe_display_driver_remove(struct xe_device *xe) {} + +static inline int xe_display_create(struct xe_device *xe) { return 0; } + +static inline void xe_display_probe(struct xe_device *xe) { } + +static inline int xe_display_init_nommio(struct xe_device *xe) { return 0; } +static inline int xe_display_init_noirq(struct xe_device *xe) { return 0; } +static inline int xe_display_init_noaccel(struct xe_device *xe) { return 0; } +static inline int xe_display_init(struct xe_device *xe) { return 0; } +static inline void xe_display_fini(struct xe_device *xe) {} + +static inline void xe_display_register(struct xe_device *xe) {} +static inline void xe_display_unregister(struct xe_device *xe) {} + +static inline void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) {} +static inline void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) {} +static inline void xe_display_irq_reset(struct xe_device *xe) {} +static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {} + +static inline void xe_display_pm_suspend(struct xe_device *xe) {} +static inline void xe_display_pm_suspend_late(struct xe_device *xe) {} +static inline void xe_display_pm_resume_early(struct xe_device *xe) {} +static inline void xe_display_pm_resume(struct xe_device *xe) {} + +#endif /* CONFIG_DRM_XE_DISPLAY */ +#endif /* _XE_DISPLAY_H_ */ diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c new file mode 100644 index 000000000000..64ed303728fd --- /dev/null +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_dma_buf.h" + +#include <kunit/test.h> +#include <linux/dma-buf.h> +#include <linux/pci-p2pdma.h> + +#include <drm/drm_device.h> +#include <drm/drm_prime.h> +#include <drm/ttm/ttm_tt.h> + +#include "tests/xe_test.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_ttm_vram_mgr.h" +#include "xe_vm.h" + +MODULE_IMPORT_NS(DMA_BUF); + +static int xe_dma_buf_attach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->dmabuf->priv; + + if (attach->peer2peer && + pci_p2pdma_distance(to_pci_dev(obj->dev->dev), attach->dev, false) < 0) + attach->peer2peer = false; + + if (!attach->peer2peer && !xe_bo_can_migrate(gem_to_xe_bo(obj), XE_PL_TT)) + return -EOPNOTSUPP; + + xe_device_mem_access_get(to_xe_device(obj->dev)); + return 0; +} + +static void xe_dma_buf_detach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->dmabuf->priv; + + xe_device_mem_access_put(to_xe_device(obj->dev)); +} + +static int xe_dma_buf_pin(struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->dmabuf->priv; + struct xe_bo *bo = gem_to_xe_bo(obj); + struct xe_device *xe = xe_bo_device(bo); + int ret; + + /* + * For now only support pinning in TT memory, for two reasons: + * 1) Avoid pinning in a placement not accessible to some importers. + * 2) Pinning in VRAM requires PIN accounting which is a to-do. + */ + if (xe_bo_is_pinned(bo) && bo->ttm.resource->placement != XE_PL_TT) { + drm_dbg(&xe->drm, "Can't migrate pinned bo for dma-buf pin.\n"); + return -EINVAL; + } + + ret = xe_bo_migrate(bo, XE_PL_TT); + if (ret) { + if (ret != -EINTR && ret != -ERESTARTSYS) + drm_dbg(&xe->drm, + "Failed migrating dma-buf to TT memory: %pe\n", + ERR_PTR(ret)); + return ret; + } + + ret = xe_bo_pin_external(bo); + xe_assert(xe, !ret); + + return 0; +} + +static void xe_dma_buf_unpin(struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->dmabuf->priv; + struct xe_bo *bo = gem_to_xe_bo(obj); + + xe_bo_unpin_external(bo); +} + +static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach, + enum dma_data_direction dir) +{ + struct dma_buf *dma_buf = attach->dmabuf; + struct drm_gem_object *obj = dma_buf->priv; + struct xe_bo *bo = gem_to_xe_bo(obj); + struct sg_table *sgt; + int r = 0; + + if (!attach->peer2peer && !xe_bo_can_migrate(bo, XE_PL_TT)) + return ERR_PTR(-EOPNOTSUPP); + + if (!xe_bo_is_pinned(bo)) { + if (!attach->peer2peer) + r = xe_bo_migrate(bo, XE_PL_TT); + else + r = xe_bo_validate(bo, NULL, false); + if (r) + return ERR_PTR(r); + } + + switch (bo->ttm.resource->mem_type) { + case XE_PL_TT: + sgt = drm_prime_pages_to_sg(obj->dev, + bo->ttm.ttm->pages, + bo->ttm.ttm->num_pages); + if (IS_ERR(sgt)) + return sgt; + + if (dma_map_sgtable(attach->dev, sgt, dir, + DMA_ATTR_SKIP_CPU_SYNC)) + goto error_free; + break; + + case XE_PL_VRAM0: + case XE_PL_VRAM1: + r = xe_ttm_vram_mgr_alloc_sgt(xe_bo_device(bo), + bo->ttm.resource, 0, + bo->ttm.base.size, attach->dev, + dir, &sgt); + if (r) + return ERR_PTR(r); + break; + default: + return ERR_PTR(-EINVAL); + } + + return sgt; + +error_free: + sg_free_table(sgt); + kfree(sgt); + return ERR_PTR(-EBUSY); +} + +static void xe_dma_buf_unmap(struct dma_buf_attachment *attach, + struct sg_table *sgt, + enum dma_data_direction dir) +{ + struct dma_buf *dma_buf = attach->dmabuf; + struct xe_bo *bo = gem_to_xe_bo(dma_buf->priv); + + if (!xe_bo_is_vram(bo)) { + dma_unmap_sgtable(attach->dev, sgt, dir, 0); + sg_free_table(sgt); + kfree(sgt); + } else { + xe_ttm_vram_mgr_free_sgt(attach->dev, dir, sgt); + } +} + +static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, + enum dma_data_direction direction) +{ + struct drm_gem_object *obj = dma_buf->priv; + struct xe_bo *bo = gem_to_xe_bo(obj); + bool reads = (direction == DMA_BIDIRECTIONAL || + direction == DMA_FROM_DEVICE); + + if (!reads) + return 0; + + /* Can we do interruptible lock here? */ + xe_bo_lock(bo, false); + (void)xe_bo_migrate(bo, XE_PL_TT); + xe_bo_unlock(bo); + + return 0; +} + +const struct dma_buf_ops xe_dmabuf_ops = { + .attach = xe_dma_buf_attach, + .detach = xe_dma_buf_detach, + .pin = xe_dma_buf_pin, + .unpin = xe_dma_buf_unpin, + .map_dma_buf = xe_dma_buf_map, + .unmap_dma_buf = xe_dma_buf_unmap, + .release = drm_gem_dmabuf_release, + .begin_cpu_access = xe_dma_buf_begin_cpu_access, + .mmap = drm_gem_dmabuf_mmap, + .vmap = drm_gem_dmabuf_vmap, + .vunmap = drm_gem_dmabuf_vunmap, +}; + +struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags) +{ + struct xe_bo *bo = gem_to_xe_bo(obj); + struct dma_buf *buf; + + if (bo->vm) + return ERR_PTR(-EPERM); + + buf = drm_gem_prime_export(obj, flags); + if (!IS_ERR(buf)) + buf->ops = &xe_dmabuf_ops; + + return buf; +} + +static struct drm_gem_object * +xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, + struct dma_buf *dma_buf) +{ + struct dma_resv *resv = dma_buf->resv; + struct xe_device *xe = to_xe_device(dev); + struct xe_bo *bo; + int ret; + + dma_resv_lock(resv, NULL); + bo = ___xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size, + 0, /* Will require 1way or 2way for vm_bind */ + ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT); + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + goto error; + } + dma_resv_unlock(resv); + + return &bo->ttm.base; + +error: + dma_resv_unlock(resv); + return ERR_PTR(ret); +} + +static void xe_dma_buf_move_notify(struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->importer_priv; + struct xe_bo *bo = gem_to_xe_bo(obj); + + XE_WARN_ON(xe_bo_evict(bo, false)); +} + +static const struct dma_buf_attach_ops xe_dma_buf_attach_ops = { + .allow_peer2peer = true, + .move_notify = xe_dma_buf_move_notify +}; + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) + +struct dma_buf_test_params { + struct xe_test_priv base; + const struct dma_buf_attach_ops *attach_ops; + bool force_different_devices; + u32 mem_mask; +}; + +#define to_dma_buf_test_params(_priv) \ + container_of(_priv, struct dma_buf_test_params, base) +#endif + +struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + XE_TEST_DECLARE(struct dma_buf_test_params *test = + to_dma_buf_test_params + (xe_cur_kunit_priv(XE_TEST_LIVE_DMA_BUF));) + const struct dma_buf_attach_ops *attach_ops; + struct dma_buf_attachment *attach; + struct drm_gem_object *obj; + struct xe_bo *bo; + + if (dma_buf->ops == &xe_dmabuf_ops) { + obj = dma_buf->priv; + if (obj->dev == dev && + !XE_TEST_ONLY(test && test->force_different_devices)) { + /* + * Importing dmabuf exported from out own gem increases + * refcount on gem itself instead of f_count of dmabuf. + */ + drm_gem_object_get(obj); + return obj; + } + } + + /* + * Don't publish the bo until we have a valid attachment, and a + * valid attachment needs the bo address. So pre-create a bo before + * creating the attachment and publish. + */ + bo = xe_bo_alloc(); + if (IS_ERR(bo)) + return ERR_CAST(bo); + + attach_ops = &xe_dma_buf_attach_ops; +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) + if (test) + attach_ops = test->attach_ops; +#endif + + attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, &bo->ttm.base); + if (IS_ERR(attach)) { + obj = ERR_CAST(attach); + goto out_err; + } + + /* Errors here will take care of freeing the bo. */ + obj = xe_dma_buf_init_obj(dev, bo, dma_buf); + if (IS_ERR(obj)) + return obj; + + + get_dma_buf(dma_buf); + obj->import_attach = attach; + return obj; + +out_err: + xe_bo_free(bo); + + return obj; +} + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_dma_buf.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_dma_buf.h b/drivers/gpu/drm/xe/xe_dma_buf.h new file mode 100644 index 000000000000..861dd28a862c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_dma_buf.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_DMA_BUF_H_ +#define _XE_DMA_BUF_H_ + +#include <drm/drm_gem.h> + +struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags); +struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf); + +#endif diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c new file mode 100644 index 000000000000..82d1305e831f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <drm/drm_print.h> +#include <drm/xe_drm.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/types.h> + +#include "xe_bo.h" +#include "xe_bo_types.h" +#include "xe_device_types.h" +#include "xe_drm_client.h" +#include "xe_trace.h" + +/** + * xe_drm_client_alloc() - Allocate drm client + * @void: No arg + * + * Allocate drm client struct to track client memory against + * same till client life. Call this API whenever new client + * has opened xe device. + * + * Return: pointer to client struct or NULL if can't allocate + */ +struct xe_drm_client *xe_drm_client_alloc(void) +{ + struct xe_drm_client *client; + + client = kzalloc(sizeof(*client), GFP_KERNEL); + if (!client) + return NULL; + + kref_init(&client->kref); + +#ifdef CONFIG_PROC_FS + spin_lock_init(&client->bos_lock); + INIT_LIST_HEAD(&client->bos_list); +#endif + return client; +} + +/** + * __xe_drm_client_free() - Free client struct + * @kref: The reference + * + * This frees client struct. Call this API when xe device is closed + * by drm client. + * + * Return: void + */ +void __xe_drm_client_free(struct kref *kref) +{ + struct xe_drm_client *client = + container_of(kref, typeof(*client), kref); + + kfree(client); +} + +#ifdef CONFIG_PROC_FS +/** + * xe_drm_client_add_bo() - Add BO for tracking client mem usage + * @client: The drm client ptr + * @bo: The xe BO ptr + * + * Add all BO created by individual drm client by calling this function. + * This helps in tracking client memory usage. + * + * Return: void + */ +void xe_drm_client_add_bo(struct xe_drm_client *client, + struct xe_bo *bo) +{ + XE_WARN_ON(bo->client); + XE_WARN_ON(!list_empty(&bo->client_link)); + + spin_lock(&client->bos_lock); + bo->client = xe_drm_client_get(client); + list_add_tail_rcu(&bo->client_link, &client->bos_list); + spin_unlock(&client->bos_lock); +} + +/** + * xe_drm_client_remove_bo() - Remove BO for tracking client mem usage + * @bo: The xe BO ptr + * + * Remove all BO removed by individual drm client by calling this function. + * This helps in tracking client memory usage. + * + * Return: void + */ +void xe_drm_client_remove_bo(struct xe_bo *bo) +{ + struct xe_drm_client *client = bo->client; + + spin_lock(&client->bos_lock); + list_del_rcu(&bo->client_link); + spin_unlock(&client->bos_lock); + + xe_drm_client_put(client); +} + +static void bo_meminfo(struct xe_bo *bo, + struct drm_memory_stats stats[TTM_NUM_MEM_TYPES]) +{ + u64 sz = bo->size; + u32 mem_type; + + if (bo->placement.placement) + mem_type = bo->placement.placement->mem_type; + else + mem_type = XE_PL_TT; + + if (bo->ttm.base.handle_count > 1) + stats[mem_type].shared += sz; + else + stats[mem_type].private += sz; + + if (xe_bo_has_pages(bo)) { + stats[mem_type].resident += sz; + + if (!dma_resv_test_signaled(bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP)) + stats[mem_type].active += sz; + else if (mem_type == XE_PL_SYSTEM) + stats[mem_type].purgeable += sz; + } +} + +static void show_meminfo(struct drm_printer *p, struct drm_file *file) +{ + static const char *const mem_type_to_name[TTM_NUM_MEM_TYPES] = { + [XE_PL_SYSTEM] = "system", + [XE_PL_TT] = "gtt", + [XE_PL_VRAM0] = "vram0", + [XE_PL_VRAM1] = "vram1", + [4 ... 6] = NULL, + [XE_PL_STOLEN] = "stolen" + }; + struct drm_memory_stats stats[TTM_NUM_MEM_TYPES] = {}; + struct xe_file *xef = file->driver_priv; + struct ttm_device *bdev = &xef->xe->ttm; + struct ttm_resource_manager *man; + struct xe_drm_client *client; + struct drm_gem_object *obj; + struct xe_bo *bo; + unsigned int id; + u32 mem_type; + + client = xef->client; + + /* Public objects. */ + spin_lock(&file->table_lock); + idr_for_each_entry(&file->object_idr, obj, id) { + struct xe_bo *bo = gem_to_xe_bo(obj); + + bo_meminfo(bo, stats); + } + spin_unlock(&file->table_lock); + + /* Internal objects. */ + spin_lock(&client->bos_lock); + list_for_each_entry_rcu(bo, &client->bos_list, client_link) { + if (!bo || !kref_get_unless_zero(&bo->ttm.base.refcount)) + continue; + bo_meminfo(bo, stats); + xe_bo_put(bo); + } + spin_unlock(&client->bos_lock); + + for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) { + if (!mem_type_to_name[mem_type]) + continue; + + man = ttm_manager_type(bdev, mem_type); + + if (man) { + drm_print_memory_stats(p, + &stats[mem_type], + DRM_GEM_OBJECT_RESIDENT | + (mem_type != XE_PL_SYSTEM ? 0 : + DRM_GEM_OBJECT_PURGEABLE), + mem_type_to_name[mem_type]); + } + } +} + +/** + * xe_drm_client_fdinfo() - Callback for fdinfo interface + * @p: The drm_printer ptr + * @file: The drm_file ptr + * + * This is callabck for drm fdinfo interface. Register this callback + * in drm driver ops for show_fdinfo. + * + * Return: void + */ +void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file) +{ + show_meminfo(p, file); +} +#endif diff --git a/drivers/gpu/drm/xe/xe_drm_client.h b/drivers/gpu/drm/xe/xe_drm_client.h new file mode 100644 index 000000000000..a9649aa36011 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_drm_client.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_DRM_CLIENT_H_ +#define _XE_DRM_CLIENT_H_ + +#include <linux/kref.h> +#include <linux/list.h> +#include <linux/pid.h> +#include <linux/rcupdate.h> +#include <linux/sched.h> +#include <linux/spinlock.h> + +struct drm_file; +struct drm_printer; +struct xe_bo; + +struct xe_drm_client { + struct kref kref; + unsigned int id; +#ifdef CONFIG_PROC_FS + /** + * @bos_lock: lock protecting @bos_list + */ + spinlock_t bos_lock; + /** + * @bos_list: list of bos created by this client + * + * Protected by @bos_lock. + */ + struct list_head bos_list; +#endif +}; + + static inline struct xe_drm_client * +xe_drm_client_get(struct xe_drm_client *client) +{ + kref_get(&client->kref); + return client; +} + +void __xe_drm_client_free(struct kref *kref); + +static inline void xe_drm_client_put(struct xe_drm_client *client) +{ + kref_put(&client->kref, __xe_drm_client_free); +} + +struct xe_drm_client *xe_drm_client_alloc(void); +static inline struct xe_drm_client * +xe_drm_client_get(struct xe_drm_client *client); +static inline void xe_drm_client_put(struct xe_drm_client *client); +#ifdef CONFIG_PROC_FS +void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file); +void xe_drm_client_add_bo(struct xe_drm_client *client, + struct xe_bo *bo); +void xe_drm_client_remove_bo(struct xe_bo *bo); +#else +static inline void xe_drm_client_add_bo(struct xe_drm_client *client, + struct xe_bo *bo) +{ +} + +static inline void xe_drm_client_remove_bo(struct xe_bo *bo) +{ +} +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h new file mode 100644 index 000000000000..d45b71426cc8 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_drv.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_DRV_H_ +#define _XE_DRV_H_ + +#include <drm/drm_drv.h> + +#define DRIVER_NAME "xe" +#define DRIVER_DESC "Intel Xe Graphics" +#define DRIVER_DATE "20201103" + +/* Interface history: + * + * 1.1: Original. + */ +#define DRIVER_MAJOR 1 +#define DRIVER_MINOR 1 +#define DRIVER_PATCHLEVEL 0 + +#endif diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c new file mode 100644 index 000000000000..d30c0d0689bc --- /dev/null +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_exec.h" + +#include <drm/drm_device.h> +#include <drm/drm_exec.h> +#include <drm/drm_file.h> +#include <drm/xe_drm.h> +#include <linux/delay.h> + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_exec_queue.h" +#include "xe_macros.h" +#include "xe_ring_ops_types.h" +#include "xe_sched_job.h" +#include "xe_sync.h" +#include "xe_vm.h" + +/** + * DOC: Execbuf (User GPU command submission) + * + * Execs have historically been rather complicated in DRM drivers (at least in + * the i915) because a few things: + * + * - Passing in a list BO which are read / written to creating implicit syncs + * - Binding at exec time + * - Flow controlling the ring at exec time + * + * In XE we avoid all of this complication by not allowing a BO list to be + * passed into an exec, using the dma-buf implicit sync uAPI, have binds as + * seperate operations, and using the DRM scheduler to flow control the ring. + * Let's deep dive on each of these. + * + * We can get away from a BO list by forcing the user to use in / out fences on + * every exec rather than the kernel tracking dependencies of BO (e.g. if the + * user knows an exec writes to a BO and reads from the BO in the next exec, it + * is the user's responsibility to pass in / out fence between the two execs). + * + * Implicit dependencies for external BOs are handled by using the dma-buf + * implicit dependency uAPI (TODO: add link). To make this works each exec must + * install the job's fence into the DMA_RESV_USAGE_WRITE slot of every external + * BO mapped in the VM. + * + * We do not allow a user to trigger a bind at exec time rather we have a VM + * bind IOCTL which uses the same in / out fence interface as exec. In that + * sense, a VM bind is basically the same operation as an exec from the user + * perspective. e.g. If an exec depends on a VM bind use the in / out fence + * interface (struct drm_xe_sync) to synchronize like syncing between two + * dependent execs. + * + * Although a user cannot trigger a bind, we still have to rebind userptrs in + * the VM that have been invalidated since the last exec, likewise we also have + * to rebind BOs that have been evicted by the kernel. We schedule these rebinds + * behind any pending kernel operations on any external BOs in VM or any BOs + * private to the VM. This is accomplished by the rebinds waiting on BOs + * DMA_RESV_USAGE_KERNEL slot (kernel ops) and kernel ops waiting on all BOs + * slots (inflight execs are in the DMA_RESV_USAGE_BOOKING for private BOs and + * in DMA_RESV_USAGE_WRITE for external BOs). + * + * Rebinds / dma-resv usage applies to non-compute mode VMs only as for compute + * mode VMs we use preempt fences and a rebind worker (TODO: add link). + * + * There is no need to flow control the ring in the exec as we write the ring at + * submission time and set the DRM scheduler max job limit SIZE_OF_RING / + * MAX_JOB_SIZE. The DRM scheduler will then hold all jobs until space in the + * ring is available. + * + * All of this results in a rather simple exec implementation. + * + * Flow + * ~~~~ + * + * .. code-block:: + * + * Parse input arguments + * Wait for any async VM bind passed as in-fences to start + * <----------------------------------------------------------------------| + * Lock global VM lock in read mode | + * Pin userptrs (also finds userptr invalidated since last exec) | + * Lock exec (VM dma-resv lock, external BOs dma-resv locks) | + * Validate BOs that have been evicted | + * Create job | + * Rebind invalidated userptrs + evicted BOs (non-compute-mode) | + * Add rebind fence dependency to job | + * Add job VM dma-resv bookkeeping slot (non-compute mode) | + * Add job to external BOs dma-resv write slots (non-compute mode) | + * Check if any userptrs invalidated since pin ------ Drop locks ---------| + * Install in / out fences for job + * Submit job + * Unlock all + */ + +static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec) +{ + return drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec); +} + +int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_exec *args = data; + struct drm_xe_sync __user *syncs_user = u64_to_user_ptr(args->syncs); + u64 __user *addresses_user = u64_to_user_ptr(args->address); + struct xe_exec_queue *q; + struct xe_sync_entry *syncs = NULL; + u64 addresses[XE_HW_ENGINE_MAX_INSTANCE]; + struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn}; + struct drm_exec *exec = &vm_exec.exec; + u32 i, num_syncs = 0; + struct xe_sched_job *job; + struct dma_fence *rebind_fence; + struct xe_vm *vm; + bool write_locked; + ktime_t end = 0; + int err = 0; + + if (XE_IOCTL_DBG(xe, args->extensions) || + XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + + q = xe_exec_queue_lookup(xef, args->exec_queue_id); + if (XE_IOCTL_DBG(xe, !q)) + return -ENOENT; + + if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->num_batch_buffer && + q->width != args->num_batch_buffer)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_BANNED)) { + err = -ECANCELED; + goto err_exec_queue; + } + + if (args->num_syncs) { + syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); + if (!syncs) { + err = -ENOMEM; + goto err_exec_queue; + } + } + + vm = q->vm; + + for (i = 0; i < args->num_syncs; i++) { + err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++], + &syncs_user[i], SYNC_PARSE_FLAG_EXEC | + (xe_vm_in_lr_mode(vm) ? + SYNC_PARSE_FLAG_LR_MODE : 0)); + if (err) + goto err_syncs; + } + + if (xe_exec_queue_is_parallel(q)) { + err = __copy_from_user(addresses, addresses_user, sizeof(u64) * + q->width); + if (err) { + err = -EFAULT; + goto err_syncs; + } + } + +retry: + if (!xe_vm_in_lr_mode(vm) && xe_vm_userptr_check_repin(vm)) { + err = down_write_killable(&vm->lock); + write_locked = true; + } else { + /* We don't allow execs while the VM is in error state */ + err = down_read_interruptible(&vm->lock); + write_locked = false; + } + if (err) + goto err_syncs; + + if (write_locked) { + err = xe_vm_userptr_pin(vm); + downgrade_write(&vm->lock); + write_locked = false; + if (err) + goto err_unlock_list; + } + + vm_exec.vm = &vm->gpuvm; + vm_exec.num_fences = 1 + vm->xe->info.tile_count; + vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT; + if (xe_vm_in_lr_mode(vm)) { + drm_exec_init(exec, vm_exec.flags, 0); + } else { + err = drm_gpuvm_exec_lock(&vm_exec); + if (err) { + if (xe_vm_validate_should_retry(exec, err, &end)) + err = -EAGAIN; + goto err_unlock_list; + } + } + + if (xe_vm_is_closed_or_banned(q->vm)) { + drm_warn(&xe->drm, "Trying to schedule after vm is closed or banned\n"); + err = -ECANCELED; + goto err_exec; + } + + if (!args->num_batch_buffer) { + if (!xe_vm_in_lr_mode(vm)) { + struct dma_fence *fence; + + fence = xe_sync_in_fence_get(syncs, num_syncs, q, vm); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto err_exec; + } + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, fence); + xe_exec_queue_last_fence_set(q, vm, fence); + dma_fence_put(fence); + } + + goto err_exec; + } + + if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) { + err = -EWOULDBLOCK; + goto err_exec; + } + + job = xe_sched_job_create(q, xe_exec_queue_is_parallel(q) ? + addresses : &args->address); + if (IS_ERR(job)) { + err = PTR_ERR(job); + goto err_exec; + } + + /* + * Rebind any invalidated userptr or evicted BOs in the VM, non-compute + * VM mode only. + */ + rebind_fence = xe_vm_rebind(vm, false); + if (IS_ERR(rebind_fence)) { + err = PTR_ERR(rebind_fence); + goto err_put_job; + } + + /* + * We store the rebind_fence in the VM so subsequent execs don't get + * scheduled before the rebinds of userptrs / evicted BOs is complete. + */ + if (rebind_fence) { + dma_fence_put(vm->rebind_fence); + vm->rebind_fence = rebind_fence; + } + if (vm->rebind_fence) { + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &vm->rebind_fence->flags)) { + dma_fence_put(vm->rebind_fence); + vm->rebind_fence = NULL; + } else { + dma_fence_get(vm->rebind_fence); + err = drm_sched_job_add_dependency(&job->drm, + vm->rebind_fence); + if (err) + goto err_put_job; + } + } + + /* Wait behind munmap style rebinds */ + if (!xe_vm_in_lr_mode(vm)) { + err = drm_sched_job_add_resv_dependencies(&job->drm, + xe_vm_resv(vm), + DMA_RESV_USAGE_KERNEL); + if (err) + goto err_put_job; + } + + for (i = 0; i < num_syncs && !err; i++) + err = xe_sync_entry_add_deps(&syncs[i], job); + if (err) + goto err_put_job; + + if (!xe_vm_in_lr_mode(vm)) { + err = xe_sched_job_last_fence_add_dep(job, vm); + if (err) + goto err_put_job; + + err = down_read_interruptible(&vm->userptr.notifier_lock); + if (err) + goto err_put_job; + + err = __xe_vm_userptr_needs_repin(vm); + if (err) + goto err_repin; + } + + /* + * Point of no return, if we error after this point just set an error on + * the job and let the DRM scheduler / backend clean up the job. + */ + xe_sched_job_arm(job); + if (!xe_vm_in_lr_mode(vm)) + drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, &job->drm.s_fence->finished, + DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE); + + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], job, + &job->drm.s_fence->finished); + + if (xe_exec_queue_is_lr(q)) + q->ring_ops->emit_job(job); + if (!xe_vm_in_lr_mode(vm)) + xe_exec_queue_last_fence_set(q, vm, &job->drm.s_fence->finished); + xe_sched_job_push(job); + xe_vm_reactivate_rebind(vm); + + if (!err && !xe_vm_in_lr_mode(vm)) { + spin_lock(&xe->ttm.lru_lock); + ttm_lru_bulk_move_tail(&vm->lru_bulk_move); + spin_unlock(&xe->ttm.lru_lock); + } + +err_repin: + if (!xe_vm_in_lr_mode(vm)) + up_read(&vm->userptr.notifier_lock); +err_put_job: + if (err) + xe_sched_job_put(job); +err_exec: + drm_exec_fini(exec); +err_unlock_list: + if (write_locked) + up_write(&vm->lock); + else + up_read(&vm->lock); + if (err == -EAGAIN) + goto retry; +err_syncs: + for (i = 0; i < num_syncs; i++) + xe_sync_entry_cleanup(&syncs[i]); + kfree(syncs); +err_exec_queue: + xe_exec_queue_put(q); + + return err; +} diff --git a/drivers/gpu/drm/xe/xe_exec.h b/drivers/gpu/drm/xe/xe_exec.h new file mode 100644 index 000000000000..e4932494cea3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_exec.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_EXEC_H_ +#define _XE_EXEC_H_ + +struct drm_device; +struct drm_file; + +int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file); + +#endif diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c new file mode 100644 index 000000000000..44fe8097b7cd --- /dev/null +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -0,0 +1,956 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_exec_queue.h" + +#include <linux/nospec.h> + +#include <drm/drm_device.h> +#include <drm/drm_file.h> +#include <drm/xe_drm.h> + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_hw_engine_class_sysfs.h" +#include "xe_hw_fence.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_migrate.h" +#include "xe_pm.h" +#include "xe_ring_ops_types.h" +#include "xe_trace.h" +#include "xe_vm.h" + +enum xe_exec_queue_sched_prop { + XE_EXEC_QUEUE_JOB_TIMEOUT = 0, + XE_EXEC_QUEUE_TIMESLICE = 1, + XE_EXEC_QUEUE_PREEMPT_TIMEOUT = 2, + XE_EXEC_QUEUE_SCHED_PROP_MAX = 3, +}; + +static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe, + struct xe_vm *vm, + u32 logical_mask, + u16 width, struct xe_hw_engine *hwe, + u32 flags) +{ + struct xe_exec_queue *q; + struct xe_gt *gt = hwe->gt; + int err; + int i; + + /* only kernel queues can be permanent */ + XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL)); + + q = kzalloc(sizeof(*q) + sizeof(struct xe_lrc) * width, GFP_KERNEL); + if (!q) + return ERR_PTR(-ENOMEM); + + kref_init(&q->refcount); + q->flags = flags; + q->hwe = hwe; + q->gt = gt; + if (vm) + q->vm = xe_vm_get(vm); + q->class = hwe->class; + q->width = width; + q->logical_mask = logical_mask; + q->fence_irq = >->fence_irq[hwe->class]; + q->ring_ops = gt->ring_ops[hwe->class]; + q->ops = gt->exec_queue_ops; + INIT_LIST_HEAD(&q->persistent.link); + INIT_LIST_HEAD(&q->compute.link); + INIT_LIST_HEAD(&q->multi_gt_link); + + q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; + q->sched_props.preempt_timeout_us = + hwe->eclass->sched_props.preempt_timeout_us; + + if (xe_exec_queue_is_parallel(q)) { + q->parallel.composite_fence_ctx = dma_fence_context_alloc(1); + q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO; + } + if (q->flags & EXEC_QUEUE_FLAG_VM) { + q->bind.fence_ctx = dma_fence_context_alloc(1); + q->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO; + } + + for (i = 0; i < width; ++i) { + err = xe_lrc_init(q->lrc + i, hwe, q, vm, SZ_16K); + if (err) + goto err_lrc; + } + + err = q->ops->init(q); + if (err) + goto err_lrc; + + /* + * Normally the user vm holds an rpm ref to keep the device + * awake, and the context holds a ref for the vm, however for + * some engines we use the kernels migrate vm underneath which offers no + * such rpm ref, or we lack a vm. Make sure we keep a ref here, so we + * can perform GuC CT actions when needed. Caller is expected to have + * already grabbed the rpm ref outside any sensitive locks. + */ + if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !vm)) + drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe)); + + return q; + +err_lrc: + for (i = i - 1; i >= 0; --i) + xe_lrc_finish(q->lrc + i); + kfree(q); + return ERR_PTR(err); +} + +struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, + u32 logical_mask, u16 width, + struct xe_hw_engine *hwe, u32 flags) +{ + struct xe_exec_queue *q; + int err; + + if (vm) { + err = xe_vm_lock(vm, true); + if (err) + return ERR_PTR(err); + } + q = __xe_exec_queue_create(xe, vm, logical_mask, width, hwe, flags); + if (vm) + xe_vm_unlock(vm); + + return q; +} + +struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, + enum xe_engine_class class, u32 flags) +{ + struct xe_hw_engine *hwe, *hwe0 = NULL; + enum xe_hw_engine_id id; + u32 logical_mask = 0; + + for_each_hw_engine(hwe, gt, id) { + if (xe_hw_engine_is_reserved(hwe)) + continue; + + if (hwe->class == class) { + logical_mask |= BIT(hwe->logical_instance); + if (!hwe0) + hwe0 = hwe; + } + } + + if (!logical_mask) + return ERR_PTR(-ENODEV); + + return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags); +} + +void xe_exec_queue_destroy(struct kref *ref) +{ + struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); + struct xe_exec_queue *eq, *next; + + xe_exec_queue_last_fence_put_unlocked(q); + if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { + list_for_each_entry_safe(eq, next, &q->multi_gt_list, + multi_gt_link) + xe_exec_queue_put(eq); + } + + q->ops->fini(q); +} + +void xe_exec_queue_fini(struct xe_exec_queue *q) +{ + int i; + + for (i = 0; i < q->width; ++i) + xe_lrc_finish(q->lrc + i); + if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm)) + xe_device_mem_access_put(gt_to_xe(q->gt)); + if (q->vm) + xe_vm_put(q->vm); + + kfree(q); +} + +void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance) +{ + switch (q->class) { + case XE_ENGINE_CLASS_RENDER: + sprintf(q->name, "rcs%d", instance); + break; + case XE_ENGINE_CLASS_VIDEO_DECODE: + sprintf(q->name, "vcs%d", instance); + break; + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + sprintf(q->name, "vecs%d", instance); + break; + case XE_ENGINE_CLASS_COPY: + sprintf(q->name, "bcs%d", instance); + break; + case XE_ENGINE_CLASS_COMPUTE: + sprintf(q->name, "ccs%d", instance); + break; + case XE_ENGINE_CLASS_OTHER: + sprintf(q->name, "gsccs%d", instance); + break; + default: + XE_WARN_ON(q->class); + } +} + +struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id) +{ + struct xe_exec_queue *q; + + mutex_lock(&xef->exec_queue.lock); + q = xa_load(&xef->exec_queue.xa, id); + if (q) + xe_exec_queue_get(q); + mutex_unlock(&xef->exec_queue.lock); + + return q; +} + +enum xe_exec_queue_priority +xe_exec_queue_device_get_max_priority(struct xe_device *xe) +{ + return capable(CAP_SYS_NICE) ? XE_EXEC_QUEUE_PRIORITY_HIGH : + XE_EXEC_QUEUE_PRIORITY_NORMAL; +} + +static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q, + u64 value, bool create) +{ + if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe))) + return -EPERM; + + return q->ops->set_priority(q, value); +} + +static bool xe_exec_queue_enforce_schedule_limit(void) +{ +#if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) + return true; +#else + return !capable(CAP_SYS_NICE); +#endif +} + +static void +xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass, + enum xe_exec_queue_sched_prop prop, + u32 *min, u32 *max) +{ + switch (prop) { + case XE_EXEC_QUEUE_JOB_TIMEOUT: + *min = eclass->sched_props.job_timeout_min; + *max = eclass->sched_props.job_timeout_max; + break; + case XE_EXEC_QUEUE_TIMESLICE: + *min = eclass->sched_props.timeslice_min; + *max = eclass->sched_props.timeslice_max; + break; + case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: + *min = eclass->sched_props.preempt_timeout_min; + *max = eclass->sched_props.preempt_timeout_max; + break; + default: + break; + } +#if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) + if (capable(CAP_SYS_NICE)) { + switch (prop) { + case XE_EXEC_QUEUE_JOB_TIMEOUT: + *min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; + *max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; + break; + case XE_EXEC_QUEUE_TIMESLICE: + *min = XE_HW_ENGINE_TIMESLICE_MIN; + *max = XE_HW_ENGINE_TIMESLICE_MAX; + break; + case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: + *min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; + *max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; + break; + default: + break; + } + } +#endif +} + +static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q, + u64 value, bool create) +{ + u32 min = 0, max = 0; + + xe_exec_queue_get_prop_minmax(q->hwe->eclass, + XE_EXEC_QUEUE_TIMESLICE, &min, &max); + + if (xe_exec_queue_enforce_schedule_limit() && + !xe_hw_engine_timeout_in_range(value, min, max)) + return -EINVAL; + + return q->ops->set_timeslice(q, value); +} + +static int exec_queue_set_preemption_timeout(struct xe_device *xe, + struct xe_exec_queue *q, u64 value, + bool create) +{ + u32 min = 0, max = 0; + + xe_exec_queue_get_prop_minmax(q->hwe->eclass, + XE_EXEC_QUEUE_PREEMPT_TIMEOUT, &min, &max); + + if (xe_exec_queue_enforce_schedule_limit() && + !xe_hw_engine_timeout_in_range(value, min, max)) + return -EINVAL; + + return q->ops->set_preempt_timeout(q, value); +} + +static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue *q, + u64 value, bool create) +{ + if (XE_IOCTL_DBG(xe, !create)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, xe_vm_in_preempt_fence_mode(q->vm))) + return -EINVAL; + + if (value) + q->flags |= EXEC_QUEUE_FLAG_PERSISTENT; + else + q->flags &= ~EXEC_QUEUE_FLAG_PERSISTENT; + + return 0; +} + +static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue *q, + u64 value, bool create) +{ + u32 min = 0, max = 0; + + if (XE_IOCTL_DBG(xe, !create)) + return -EINVAL; + + xe_exec_queue_get_prop_minmax(q->hwe->eclass, + XE_EXEC_QUEUE_JOB_TIMEOUT, &min, &max); + + if (xe_exec_queue_enforce_schedule_limit() && + !xe_hw_engine_timeout_in_range(value, min, max)) + return -EINVAL; + + return q->ops->set_job_timeout(q, value); +} + +static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue *q, + u64 value, bool create) +{ + if (XE_IOCTL_DBG(xe, !create)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) + return -EINVAL; + + q->usm.acc_trigger = value; + + return 0; +} + +static int exec_queue_set_acc_notify(struct xe_device *xe, struct xe_exec_queue *q, + u64 value, bool create) +{ + if (XE_IOCTL_DBG(xe, !create)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) + return -EINVAL; + + q->usm.acc_notify = value; + + return 0; +} + +static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_queue *q, + u64 value, bool create) +{ + if (XE_IOCTL_DBG(xe, !create)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) + return -EINVAL; + + if (value > DRM_XE_ACC_GRANULARITY_64M) + return -EINVAL; + + q->usm.acc_granularity = value; + + return 0; +} + +typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, + struct xe_exec_queue *q, + u64 value, bool create); + +static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE] = exec_queue_set_persistence, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity, +}; + +static int exec_queue_user_ext_set_property(struct xe_device *xe, + struct xe_exec_queue *q, + u64 extension, + bool create) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_ext_set_property ext; + int err; + u32 idx; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(xe, ext.property >= + ARRAY_SIZE(exec_queue_set_property_funcs)) || + XE_IOCTL_DBG(xe, ext.pad)) + return -EINVAL; + + idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); + return exec_queue_set_property_funcs[idx](xe, q, ext.value, create); +} + +typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe, + struct xe_exec_queue *q, + u64 extension, + bool create); + +static const xe_exec_queue_set_property_fn exec_queue_user_extension_funcs[] = { + [DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, +}; + +#define MAX_USER_EXTENSIONS 16 +static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, + u64 extensions, int ext_number, bool create) +{ + u64 __user *address = u64_to_user_ptr(extensions); + struct drm_xe_user_extension ext; + int err; + u32 idx; + + if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) + return -E2BIG; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(xe, ext.pad) || + XE_IOCTL_DBG(xe, ext.name >= + ARRAY_SIZE(exec_queue_user_extension_funcs))) + return -EINVAL; + + idx = array_index_nospec(ext.name, + ARRAY_SIZE(exec_queue_user_extension_funcs)); + err = exec_queue_user_extension_funcs[idx](xe, q, extensions, create); + if (XE_IOCTL_DBG(xe, err)) + return err; + + if (ext.next_extension) + return exec_queue_user_extensions(xe, q, ext.next_extension, + ++ext_number, create); + + return 0; +} + +static const enum xe_engine_class user_to_xe_engine_class[] = { + [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, + [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, + [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, + [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, + [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, +}; + +static struct xe_hw_engine * +find_hw_engine(struct xe_device *xe, + struct drm_xe_engine_class_instance eci) +{ + u32 idx; + + if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class)) + return NULL; + + if (eci.gt_id >= xe->info.gt_count) + return NULL; + + idx = array_index_nospec(eci.engine_class, + ARRAY_SIZE(user_to_xe_engine_class)); + + return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), + user_to_xe_engine_class[idx], + eci.engine_instance, true); +} + +static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt, + struct drm_xe_engine_class_instance *eci, + u16 width, u16 num_placements) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + u32 logical_mask = 0; + + if (XE_IOCTL_DBG(xe, width != 1)) + return 0; + if (XE_IOCTL_DBG(xe, num_placements != 1)) + return 0; + if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) + return 0; + + eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY; + + for_each_hw_engine(hwe, gt, id) { + if (xe_hw_engine_is_reserved(hwe)) + continue; + + if (hwe->class == + user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY]) + logical_mask |= BIT(hwe->logical_instance); + } + + return logical_mask; +} + +static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, + struct drm_xe_engine_class_instance *eci, + u16 width, u16 num_placements) +{ + int len = width * num_placements; + int i, j, n; + u16 class; + u16 gt_id; + u32 return_mask = 0, prev_mask; + + if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe) && + len > 1)) + return 0; + + for (i = 0; i < width; ++i) { + u32 current_mask = 0; + + for (j = 0; j < num_placements; ++j) { + struct xe_hw_engine *hwe; + + n = j * width + i; + + hwe = find_hw_engine(xe, eci[n]); + if (XE_IOCTL_DBG(xe, !hwe)) + return 0; + + if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe))) + return 0; + + if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) || + XE_IOCTL_DBG(xe, n && eci[n].engine_class != class)) + return 0; + + class = eci[n].engine_class; + gt_id = eci[n].gt_id; + + if (width == 1 || !i) + return_mask |= BIT(eci[n].engine_instance); + current_mask |= BIT(eci[n].engine_instance); + } + + /* Parallel submissions must be logically contiguous */ + if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1)) + return 0; + + prev_mask = current_mask; + } + + return return_mask; +} + +int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_exec_queue_create *args = data; + struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; + struct drm_xe_engine_class_instance __user *user_eci = + u64_to_user_ptr(args->instances); + struct xe_hw_engine *hwe; + struct xe_vm *vm, *migrate_vm; + struct xe_gt *gt; + struct xe_exec_queue *q = NULL; + u32 logical_mask; + u32 id; + u32 len; + int err; + + if (XE_IOCTL_DBG(xe, args->flags) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + + len = args->width * args->num_placements; + if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) + return -EINVAL; + + err = __copy_from_user(eci, user_eci, + sizeof(struct drm_xe_engine_class_instance) * + len); + if (XE_IOCTL_DBG(xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) + return -EINVAL; + + if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { + for_each_gt(gt, xe, id) { + struct xe_exec_queue *new; + + if (xe_gt_is_media_type(gt)) + continue; + + eci[0].gt_id = gt->info.id; + logical_mask = bind_exec_queue_logical_mask(xe, gt, eci, + args->width, + args->num_placements); + if (XE_IOCTL_DBG(xe, !logical_mask)) + return -EINVAL; + + hwe = find_hw_engine(xe, eci[0]); + if (XE_IOCTL_DBG(xe, !hwe)) + return -EINVAL; + + /* The migration vm doesn't hold rpm ref */ + xe_device_mem_access_get(xe); + + migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate); + new = xe_exec_queue_create(xe, migrate_vm, logical_mask, + args->width, hwe, + EXEC_QUEUE_FLAG_PERSISTENT | + EXEC_QUEUE_FLAG_VM | + (id ? + EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : + 0)); + + xe_device_mem_access_put(xe); /* now held by engine */ + + xe_vm_put(migrate_vm); + if (IS_ERR(new)) { + err = PTR_ERR(new); + if (q) + goto put_exec_queue; + return err; + } + if (id == 0) + q = new; + else + list_add_tail(&new->multi_gt_list, + &q->multi_gt_link); + } + } else { + gt = xe_device_get_gt(xe, eci[0].gt_id); + logical_mask = calc_validate_logical_mask(xe, gt, eci, + args->width, + args->num_placements); + if (XE_IOCTL_DBG(xe, !logical_mask)) + return -EINVAL; + + hwe = find_hw_engine(xe, eci[0]); + if (XE_IOCTL_DBG(xe, !hwe)) + return -EINVAL; + + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_DBG(xe, !vm)) + return -ENOENT; + + err = down_read_interruptible(&vm->lock); + if (err) { + xe_vm_put(vm); + return err; + } + + if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { + up_read(&vm->lock); + xe_vm_put(vm); + return -ENOENT; + } + + q = xe_exec_queue_create(xe, vm, logical_mask, + args->width, hwe, + xe_vm_in_lr_mode(vm) ? 0 : + EXEC_QUEUE_FLAG_PERSISTENT); + up_read(&vm->lock); + xe_vm_put(vm); + if (IS_ERR(q)) + return PTR_ERR(q); + + if (xe_vm_in_preempt_fence_mode(vm)) { + q->compute.context = dma_fence_context_alloc(1); + spin_lock_init(&q->compute.lock); + + err = xe_vm_add_compute_exec_queue(vm, q); + if (XE_IOCTL_DBG(xe, err)) + goto put_exec_queue; + } + } + + if (args->extensions) { + err = exec_queue_user_extensions(xe, q, args->extensions, 0, true); + if (XE_IOCTL_DBG(xe, err)) + goto kill_exec_queue; + } + + q->persistent.xef = xef; + + mutex_lock(&xef->exec_queue.lock); + err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); + mutex_unlock(&xef->exec_queue.lock); + if (err) + goto kill_exec_queue; + + args->exec_queue_id = id; + + return 0; + +kill_exec_queue: + xe_exec_queue_kill(q); +put_exec_queue: + xe_exec_queue_put(q); + return err; +} + +int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_exec_queue_get_property *args = data; + struct xe_exec_queue *q; + int ret; + + if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + + q = xe_exec_queue_lookup(xef, args->exec_queue_id); + if (XE_IOCTL_DBG(xe, !q)) + return -ENOENT; + + switch (args->property) { + case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN: + args->value = !!(q->flags & EXEC_QUEUE_FLAG_BANNED); + ret = 0; + break; + default: + ret = -EINVAL; + } + + xe_exec_queue_put(q); + + return ret; +} + +/** + * xe_exec_queue_is_lr() - Whether an exec_queue is long-running + * @q: The exec_queue + * + * Return: True if the exec_queue is long-running, false otherwise. + */ +bool xe_exec_queue_is_lr(struct xe_exec_queue *q) +{ + return q->vm && xe_vm_in_lr_mode(q->vm) && + !(q->flags & EXEC_QUEUE_FLAG_VM); +} + +static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q) +{ + return q->lrc->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc) - 1; +} + +/** + * xe_exec_queue_ring_full() - Whether an exec_queue's ring is full + * @q: The exec_queue + * + * Return: True if the exec_queue's ring is full, false otherwise. + */ +bool xe_exec_queue_ring_full(struct xe_exec_queue *q) +{ + struct xe_lrc *lrc = q->lrc; + s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES; + + return xe_exec_queue_num_job_inflight(q) >= max_job; +} + +/** + * xe_exec_queue_is_idle() - Whether an exec_queue is idle. + * @q: The exec_queue + * + * FIXME: Need to determine what to use as the short-lived + * timeline lock for the exec_queues, so that the return value + * of this function becomes more than just an advisory + * snapshot in time. The timeline lock must protect the + * seqno from racing submissions on the same exec_queue. + * Typically vm->resv, but user-created timeline locks use the migrate vm + * and never grabs the migrate vm->resv so we have a race there. + * + * Return: True if the exec_queue is idle, false otherwise. + */ +bool xe_exec_queue_is_idle(struct xe_exec_queue *q) +{ + if (xe_exec_queue_is_parallel(q)) { + int i; + + for (i = 0; i < q->width; ++i) { + if (xe_lrc_seqno(&q->lrc[i]) != + q->lrc[i].fence_ctx.next_seqno - 1) + return false; + } + + return true; + } + + return xe_lrc_seqno(&q->lrc[0]) == + q->lrc[0].fence_ctx.next_seqno - 1; +} + +void xe_exec_queue_kill(struct xe_exec_queue *q) +{ + struct xe_exec_queue *eq = q, *next; + + list_for_each_entry_safe(eq, next, &eq->multi_gt_list, + multi_gt_link) { + q->ops->kill(eq); + xe_vm_remove_compute_exec_queue(q->vm, eq); + } + + q->ops->kill(q); + xe_vm_remove_compute_exec_queue(q->vm, q); +} + +int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_exec_queue_destroy *args = data; + struct xe_exec_queue *q; + + if (XE_IOCTL_DBG(xe, args->pad) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + + mutex_lock(&xef->exec_queue.lock); + q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id); + mutex_unlock(&xef->exec_queue.lock); + if (XE_IOCTL_DBG(xe, !q)) + return -ENOENT; + + if (!(q->flags & EXEC_QUEUE_FLAG_PERSISTENT)) + xe_exec_queue_kill(q); + else + xe_device_add_persistent_exec_queues(xe, q); + + trace_xe_exec_queue_close(q); + xe_exec_queue_put(q); + + return 0; +} + +static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, + struct xe_vm *vm) +{ + if (q->flags & EXEC_QUEUE_FLAG_VM) + lockdep_assert_held(&vm->lock); + else + xe_vm_assert_held(vm); +} + +/** + * xe_exec_queue_last_fence_put() - Drop ref to last fence + * @q: The exec queue + * @vm: The VM the engine does a bind or exec for + */ +void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) +{ + xe_exec_queue_last_fence_lockdep_assert(q, vm); + + if (q->last_fence) { + dma_fence_put(q->last_fence); + q->last_fence = NULL; + } +} + +/** + * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked + * @q: The exec queue + * + * Only safe to be called from xe_exec_queue_destroy(). + */ +void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q) +{ + if (q->last_fence) { + dma_fence_put(q->last_fence); + q->last_fence = NULL; + } +} + +/** + * xe_exec_queue_last_fence_get() - Get last fence + * @q: The exec queue + * @vm: The VM the engine does a bind or exec for + * + * Get last fence, does not take a ref + * + * Returns: last fence if not signaled, dma fence stub if signaled + */ +struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, + struct xe_vm *vm) +{ + xe_exec_queue_last_fence_lockdep_assert(q, vm); + + if (q->last_fence && + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) + xe_exec_queue_last_fence_put(q, vm); + + return q->last_fence ? q->last_fence : dma_fence_get_stub(); +} + +/** + * xe_exec_queue_last_fence_set() - Set last fence + * @q: The exec queue + * @vm: The VM the engine does a bind or exec for + * @fence: The fence + * + * Set the last fence for the engine. Increases reference count for fence, when + * closing engine xe_exec_queue_last_fence_put should be called. + */ +void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, + struct dma_fence *fence) +{ + xe_exec_queue_last_fence_lockdep_assert(q, vm); + + xe_exec_queue_last_fence_put(q, vm); + q->last_fence = dma_fence_get(fence); +} diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h new file mode 100644 index 000000000000..d959cc4a1a82 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_EXEC_QUEUE_H_ +#define _XE_EXEC_QUEUE_H_ + +#include "xe_exec_queue_types.h" +#include "xe_vm_types.h" + +struct drm_device; +struct drm_file; +struct xe_device; +struct xe_file; + +struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, + u32 logical_mask, u16 width, + struct xe_hw_engine *hw_engine, u32 flags); +struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, + enum xe_engine_class class, u32 flags); + +void xe_exec_queue_fini(struct xe_exec_queue *q); +void xe_exec_queue_destroy(struct kref *ref); +void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance); + +struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id); + +static inline struct xe_exec_queue *xe_exec_queue_get(struct xe_exec_queue *q) +{ + kref_get(&q->refcount); + return q; +} + +static inline void xe_exec_queue_put(struct xe_exec_queue *q) +{ + kref_put(&q->refcount, xe_exec_queue_destroy); +} + +static inline bool xe_exec_queue_is_parallel(struct xe_exec_queue *q) +{ + return q->width > 1; +} + +bool xe_exec_queue_is_lr(struct xe_exec_queue *q); + +bool xe_exec_queue_ring_full(struct xe_exec_queue *q); + +bool xe_exec_queue_is_idle(struct xe_exec_queue *q); + +void xe_exec_queue_kill(struct xe_exec_queue *q); + +int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +enum xe_exec_queue_priority xe_exec_queue_device_get_max_priority(struct xe_device *xe); + +void xe_exec_queue_last_fence_put(struct xe_exec_queue *e, struct xe_vm *vm); +void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *e); +struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *e, + struct xe_vm *vm); +void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm, + struct dma_fence *fence); + +#endif diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h new file mode 100644 index 000000000000..3d7e704ec3d9 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -0,0 +1,222 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_EXEC_QUEUE_TYPES_H_ +#define _XE_EXEC_QUEUE_TYPES_H_ + +#include <linux/kref.h> + +#include <drm/gpu_scheduler.h> + +#include "xe_gpu_scheduler_types.h" +#include "xe_hw_engine_types.h" +#include "xe_hw_fence_types.h" +#include "xe_lrc_types.h" + +struct xe_execlist_exec_queue; +struct xe_gt; +struct xe_guc_exec_queue; +struct xe_hw_engine; +struct xe_vm; + +enum xe_exec_queue_priority { + XE_EXEC_QUEUE_PRIORITY_UNSET = -2, /* For execlist usage only */ + XE_EXEC_QUEUE_PRIORITY_LOW = 0, + XE_EXEC_QUEUE_PRIORITY_NORMAL, + XE_EXEC_QUEUE_PRIORITY_HIGH, + XE_EXEC_QUEUE_PRIORITY_KERNEL, + + XE_EXEC_QUEUE_PRIORITY_COUNT +}; + +/** + * struct xe_exec_queue - Execution queue + * + * Contains all state necessary for submissions. Can either be a user object or + * a kernel object. + */ +struct xe_exec_queue { + /** @gt: graphics tile this exec queue can submit to */ + struct xe_gt *gt; + /** + * @hwe: A hardware of the same class. May (physical engine) or may not + * (virtual engine) be where jobs actual engine up running. Should never + * really be used for submissions. + */ + struct xe_hw_engine *hwe; + /** @refcount: ref count of this exec queue */ + struct kref refcount; + /** @vm: VM (address space) for this exec queue */ + struct xe_vm *vm; + /** @class: class of this exec queue */ + enum xe_engine_class class; + /** @priority: priority of this exec queue */ + enum xe_exec_queue_priority priority; + /** + * @logical_mask: logical mask of where job submitted to exec queue can run + */ + u32 logical_mask; + /** @name: name of this exec queue */ + char name[MAX_FENCE_NAME_LEN]; + /** @width: width (number BB submitted per exec) of this exec queue */ + u16 width; + /** @fence_irq: fence IRQ used to signal job completion */ + struct xe_hw_fence_irq *fence_irq; + + /** + * @last_fence: last fence on exec queue, protected by vm->lock in write + * mode if bind exec queue, protected by dma resv lock if non-bind exec + * queue + */ + struct dma_fence *last_fence; + +/* queue no longer allowed to submit */ +#define EXEC_QUEUE_FLAG_BANNED BIT(0) +/* queue used for kernel submission only */ +#define EXEC_QUEUE_FLAG_KERNEL BIT(1) +/* kernel engine only destroyed at driver unload */ +#define EXEC_QUEUE_FLAG_PERMANENT BIT(2) +/* queue keeps running pending jobs after destroy ioctl */ +#define EXEC_QUEUE_FLAG_PERSISTENT BIT(3) +/* for VM jobs. Caller needs to hold rpm ref when creating queue with this flag */ +#define EXEC_QUEUE_FLAG_VM BIT(4) +/* child of VM queue for multi-tile VM jobs */ +#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(5) + + /** + * @flags: flags for this exec queue, should statically setup aside from ban + * bit + */ + unsigned long flags; + + union { + /** @multi_gt_list: list head for VM bind engines if multi-GT */ + struct list_head multi_gt_list; + /** @multi_gt_link: link for VM bind engines if multi-GT */ + struct list_head multi_gt_link; + }; + + union { + /** @execlist: execlist backend specific state for exec queue */ + struct xe_execlist_exec_queue *execlist; + /** @guc: GuC backend specific state for exec queue */ + struct xe_guc_exec_queue *guc; + }; + + /** + * @persistent: persistent exec queue state + */ + struct { + /** @xef: file which this exec queue belongs to */ + struct xe_file *xef; + /** @link: link in list of persistent exec queues */ + struct list_head link; + } persistent; + + union { + /** + * @parallel: parallel submission state + */ + struct { + /** @composite_fence_ctx: context composite fence */ + u64 composite_fence_ctx; + /** @composite_fence_seqno: seqno for composite fence */ + u32 composite_fence_seqno; + } parallel; + /** + * @bind: bind submission state + */ + struct { + /** @fence_ctx: context bind fence */ + u64 fence_ctx; + /** @fence_seqno: seqno for bind fence */ + u32 fence_seqno; + } bind; + }; + + /** @sched_props: scheduling properties */ + struct { + /** @timeslice_us: timeslice period in micro-seconds */ + u32 timeslice_us; + /** @preempt_timeout_us: preemption timeout in micro-seconds */ + u32 preempt_timeout_us; + } sched_props; + + /** @compute: compute exec queue state */ + struct { + /** @pfence: preemption fence */ + struct dma_fence *pfence; + /** @context: preemption fence context */ + u64 context; + /** @seqno: preemption fence seqno */ + u32 seqno; + /** @link: link into VM's list of exec queues */ + struct list_head link; + /** @lock: preemption fences lock */ + spinlock_t lock; + } compute; + + /** @usm: unified shared memory state */ + struct { + /** @acc_trigger: access counter trigger */ + u32 acc_trigger; + /** @acc_notify: access counter notify */ + u32 acc_notify; + /** @acc_granularity: access counter granularity */ + u32 acc_granularity; + } usm; + + /** @ops: submission backend exec queue operations */ + const struct xe_exec_queue_ops *ops; + + /** @ring_ops: ring operations for this exec queue */ + const struct xe_ring_ops *ring_ops; + /** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */ + struct drm_sched_entity *entity; + /** @lrc: logical ring context for this exec queue */ + struct xe_lrc lrc[]; +}; + +/** + * struct xe_exec_queue_ops - Submission backend exec queue operations + */ +struct xe_exec_queue_ops { + /** @init: Initialize exec queue for submission backend */ + int (*init)(struct xe_exec_queue *q); + /** @kill: Kill inflight submissions for backend */ + void (*kill)(struct xe_exec_queue *q); + /** @fini: Fini exec queue for submission backend */ + void (*fini)(struct xe_exec_queue *q); + /** @set_priority: Set priority for exec queue */ + int (*set_priority)(struct xe_exec_queue *q, + enum xe_exec_queue_priority priority); + /** @set_timeslice: Set timeslice for exec queue */ + int (*set_timeslice)(struct xe_exec_queue *q, u32 timeslice_us); + /** @set_preempt_timeout: Set preemption timeout for exec queue */ + int (*set_preempt_timeout)(struct xe_exec_queue *q, u32 preempt_timeout_us); + /** @set_job_timeout: Set job timeout for exec queue */ + int (*set_job_timeout)(struct xe_exec_queue *q, u32 job_timeout_ms); + /** + * @suspend: Suspend exec queue from executing, allowed to be called + * multiple times in a row before resume with the caveat that + * suspend_wait returns before calling suspend again. + */ + int (*suspend)(struct xe_exec_queue *q); + /** + * @suspend_wait: Wait for an exec queue to suspend executing, should be + * call after suspend. + */ + void (*suspend_wait)(struct xe_exec_queue *q); + /** + * @resume: Resume exec queue execution, exec queue must be in a suspended + * state and dma fence returned from most recent suspend call must be + * signalled when this function is called. + */ + void (*resume)(struct xe_exec_queue *q); + /** @reset_status: check exec queue reset status */ + bool (*reset_status)(struct xe_exec_queue *q); +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c new file mode 100644 index 000000000000..96b5224eb478 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -0,0 +1,474 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_execlist.h" + +#include <drm/drm_managed.h> + +#include "instructions/xe_mi_commands.h" +#include "regs/xe_engine_regs.h" +#include "regs/xe_gpu_commands.h" +#include "regs/xe_gt_regs.h" +#include "regs/xe_lrc_layout.h" +#include "xe_assert.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_exec_queue.h" +#include "xe_gt.h" +#include "xe_hw_fence.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_mmio.h" +#include "xe_mocs.h" +#include "xe_ring_ops_types.h" +#include "xe_sched_job.h" + +#define XE_EXECLIST_HANG_LIMIT 1 + +#define SW_CTX_ID_SHIFT 37 +#define SW_CTX_ID_WIDTH 11 +#define XEHP_SW_CTX_ID_SHIFT 39 +#define XEHP_SW_CTX_ID_WIDTH 16 + +#define SW_CTX_ID \ + GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \ + SW_CTX_ID_SHIFT) + +#define XEHP_SW_CTX_ID \ + GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \ + XEHP_SW_CTX_ID_SHIFT) + + +static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, + u32 ctx_id) +{ + struct xe_gt *gt = hwe->gt; + struct xe_device *xe = gt_to_xe(gt); + u64 lrc_desc; + + lrc_desc = xe_lrc_descriptor(lrc); + + if (GRAPHICS_VERx100(xe) >= 1250) { + xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id)); + lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id); + } else { + xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id)); + lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id); + } + + if (hwe->class == XE_ENGINE_CLASS_COMPUTE) + xe_mmio_write32(hwe->gt, RCU_MODE, + _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); + + xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); + lrc->ring.old_tail = lrc->ring.tail; + + /* + * Make sure the context image is complete before we submit it to HW. + * + * Ostensibly, writes (including the WCB) should be flushed prior to + * an uncached write such as our mmio register access, the empirical + * evidence (esp. on Braswell) suggests that the WC write into memory + * may not be visible to the HW prior to the completion of the UC + * register write and that we may begin execution from the context + * before its image is complete leading to invalid PD chasing. + */ + wmb(); + + xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base), + xe_bo_ggtt_addr(hwe->hwsp)); + xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base)); + xe_mmio_write32(gt, RING_MODE(hwe->mmio_base), + _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); + + xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base), + lower_32_bits(lrc_desc)); + xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base), + upper_32_bits(lrc_desc)); + xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base), + EL_CTRL_LOAD); +} + +static void __xe_execlist_port_start(struct xe_execlist_port *port, + struct xe_execlist_exec_queue *exl) +{ + struct xe_device *xe = gt_to_xe(port->hwe->gt); + int max_ctx = FIELD_MAX(SW_CTX_ID); + + if (GRAPHICS_VERx100(xe) >= 1250) + max_ctx = FIELD_MAX(XEHP_SW_CTX_ID); + + xe_execlist_port_assert_held(port); + + if (port->running_exl != exl || !exl->has_run) { + port->last_ctx_id++; + + /* 0 is reserved for the kernel context */ + if (port->last_ctx_id > max_ctx) + port->last_ctx_id = 1; + } + + __start_lrc(port->hwe, exl->q->lrc, port->last_ctx_id); + port->running_exl = exl; + exl->has_run = true; +} + +static void __xe_execlist_port_idle(struct xe_execlist_port *port) +{ + u32 noop[2] = { MI_NOOP, MI_NOOP }; + + xe_execlist_port_assert_held(port); + + if (!port->running_exl) + return; + + xe_lrc_write_ring(&port->hwe->kernel_lrc, noop, sizeof(noop)); + __start_lrc(port->hwe, &port->hwe->kernel_lrc, 0); + port->running_exl = NULL; +} + +static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl) +{ + struct xe_lrc *lrc = exl->q->lrc; + + return lrc->ring.tail == lrc->ring.old_tail; +} + +static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port) +{ + struct xe_execlist_exec_queue *exl = NULL; + int i; + + xe_execlist_port_assert_held(port); + + for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) { + while (!list_empty(&port->active[i])) { + exl = list_first_entry(&port->active[i], + struct xe_execlist_exec_queue, + active_link); + list_del(&exl->active_link); + + if (xe_execlist_is_idle(exl)) { + exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; + continue; + } + + list_add_tail(&exl->active_link, &port->active[i]); + __xe_execlist_port_start(port, exl); + return; + } + } + + __xe_execlist_port_idle(port); +} + +static u64 read_execlist_status(struct xe_hw_engine *hwe) +{ + struct xe_gt *gt = hwe->gt; + u32 hi, lo; + + lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base)); + hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base)); + + return lo | (u64)hi << 32; +} + +static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port) +{ + u64 status; + + xe_execlist_port_assert_held(port); + + status = read_execlist_status(port->hwe); + if (status & BIT(7)) + return; + + __xe_execlist_port_start_next_active(port); +} + +static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe, + u16 intr_vec) +{ + struct xe_execlist_port *port = hwe->exl_port; + + spin_lock(&port->lock); + xe_execlist_port_irq_handler_locked(port); + spin_unlock(&port->lock); +} + +static void xe_execlist_port_wake_locked(struct xe_execlist_port *port, + enum xe_exec_queue_priority priority) +{ + xe_execlist_port_assert_held(port); + + if (port->running_exl && port->running_exl->active_priority >= priority) + return; + + __xe_execlist_port_start_next_active(port); +} + +static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl) +{ + struct xe_execlist_port *port = exl->port; + enum xe_exec_queue_priority priority = exl->active_priority; + + XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET); + XE_WARN_ON(priority < 0); + XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active)); + + spin_lock_irq(&port->lock); + + if (exl->active_priority != priority && + exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) { + /* Priority changed, move it to the right list */ + list_del(&exl->active_link); + exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; + } + + if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) { + exl->active_priority = priority; + list_add_tail(&exl->active_link, &port->active[priority]); + } + + xe_execlist_port_wake_locked(exl->port, priority); + + spin_unlock_irq(&port->lock); +} + +static void xe_execlist_port_irq_fail_timer(struct timer_list *timer) +{ + struct xe_execlist_port *port = + container_of(timer, struct xe_execlist_port, irq_fail); + + spin_lock_irq(&port->lock); + xe_execlist_port_irq_handler_locked(port); + spin_unlock_irq(&port->lock); + + port->irq_fail.expires = jiffies + msecs_to_jiffies(1000); + add_timer(&port->irq_fail); +} + +struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, + struct xe_hw_engine *hwe) +{ + struct drm_device *drm = &xe->drm; + struct xe_execlist_port *port; + int i; + + port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL); + if (!port) + return ERR_PTR(-ENOMEM); + + port->hwe = hwe; + + spin_lock_init(&port->lock); + for (i = 0; i < ARRAY_SIZE(port->active); i++) + INIT_LIST_HEAD(&port->active[i]); + + port->last_ctx_id = 1; + port->running_exl = NULL; + + hwe->irq_handler = xe_execlist_port_irq_handler; + + /* TODO: Fix the interrupt code so it doesn't race like mad */ + timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0); + port->irq_fail.expires = jiffies + msecs_to_jiffies(1000); + add_timer(&port->irq_fail); + + return port; +} + +void xe_execlist_port_destroy(struct xe_execlist_port *port) +{ + del_timer(&port->irq_fail); + + /* Prevent an interrupt while we're destroying */ + spin_lock_irq(>_to_xe(port->hwe->gt)->irq.lock); + port->hwe->irq_handler = NULL; + spin_unlock_irq(>_to_xe(port->hwe->gt)->irq.lock); +} + +static struct dma_fence * +execlist_run_job(struct drm_sched_job *drm_job) +{ + struct xe_sched_job *job = to_xe_sched_job(drm_job); + struct xe_exec_queue *q = job->q; + struct xe_execlist_exec_queue *exl = job->q->execlist; + + q->ring_ops->emit_job(job); + xe_execlist_make_active(exl); + + return dma_fence_get(job->fence); +} + +static void execlist_job_free(struct drm_sched_job *drm_job) +{ + struct xe_sched_job *job = to_xe_sched_job(drm_job); + + xe_sched_job_put(job); +} + +static const struct drm_sched_backend_ops drm_sched_ops = { + .run_job = execlist_run_job, + .free_job = execlist_job_free, +}; + +static int execlist_exec_queue_init(struct xe_exec_queue *q) +{ + struct drm_gpu_scheduler *sched; + struct xe_execlist_exec_queue *exl; + struct xe_device *xe = gt_to_xe(q->gt); + int err; + + xe_assert(xe, !xe_device_uc_enabled(xe)); + + drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n"); + + exl = kzalloc(sizeof(*exl), GFP_KERNEL); + if (!exl) + return -ENOMEM; + + exl->q = q; + + err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1, + q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, + XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT, + NULL, NULL, q->hwe->name, + gt_to_xe(q->gt)->drm.dev); + if (err) + goto err_free; + + sched = &exl->sched; + err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL); + if (err) + goto err_sched; + + exl->port = q->hwe->exl_port; + exl->has_run = false; + exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; + q->execlist = exl; + q->entity = &exl->entity; + + xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1); + + return 0; + +err_sched: + drm_sched_fini(&exl->sched); +err_free: + kfree(exl); + return err; +} + +static void execlist_exec_queue_fini_async(struct work_struct *w) +{ + struct xe_execlist_exec_queue *ee = + container_of(w, struct xe_execlist_exec_queue, fini_async); + struct xe_exec_queue *q = ee->q; + struct xe_execlist_exec_queue *exl = q->execlist; + struct xe_device *xe = gt_to_xe(q->gt); + unsigned long flags; + + xe_assert(xe, !xe_device_uc_enabled(xe)); + + spin_lock_irqsave(&exl->port->lock, flags); + if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET)) + list_del(&exl->active_link); + spin_unlock_irqrestore(&exl->port->lock, flags); + + if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT) + xe_device_remove_persistent_exec_queues(xe, q); + drm_sched_entity_fini(&exl->entity); + drm_sched_fini(&exl->sched); + kfree(exl); + + xe_exec_queue_fini(q); +} + +static void execlist_exec_queue_kill(struct xe_exec_queue *q) +{ + /* NIY */ +} + +static void execlist_exec_queue_fini(struct xe_exec_queue *q) +{ + INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async); + queue_work(system_unbound_wq, &q->execlist->fini_async); +} + +static int execlist_exec_queue_set_priority(struct xe_exec_queue *q, + enum xe_exec_queue_priority priority) +{ + /* NIY */ + return 0; +} + +static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) +{ + /* NIY */ + return 0; +} + +static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, + u32 preempt_timeout_us) +{ + /* NIY */ + return 0; +} + +static int execlist_exec_queue_set_job_timeout(struct xe_exec_queue *q, + u32 job_timeout_ms) +{ + /* NIY */ + return 0; +} + +static int execlist_exec_queue_suspend(struct xe_exec_queue *q) +{ + /* NIY */ + return 0; +} + +static void execlist_exec_queue_suspend_wait(struct xe_exec_queue *q) + +{ + /* NIY */ +} + +static void execlist_exec_queue_resume(struct xe_exec_queue *q) +{ + /* NIY */ +} + +static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q) +{ + /* NIY */ + return false; +} + +static const struct xe_exec_queue_ops execlist_exec_queue_ops = { + .init = execlist_exec_queue_init, + .kill = execlist_exec_queue_kill, + .fini = execlist_exec_queue_fini, + .set_priority = execlist_exec_queue_set_priority, + .set_timeslice = execlist_exec_queue_set_timeslice, + .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout, + .set_job_timeout = execlist_exec_queue_set_job_timeout, + .suspend = execlist_exec_queue_suspend, + .suspend_wait = execlist_exec_queue_suspend_wait, + .resume = execlist_exec_queue_resume, + .reset_status = execlist_exec_queue_reset_status, +}; + +int xe_execlist_init(struct xe_gt *gt) +{ + /* GuC submission enabled, nothing to do */ + if (xe_device_uc_enabled(gt_to_xe(gt))) + return 0; + + gt->exec_queue_ops = &execlist_exec_queue_ops; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_execlist.h b/drivers/gpu/drm/xe/xe_execlist.h new file mode 100644 index 000000000000..26f600ac8552 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_execlist.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_EXECLIST_H_ +#define _XE_EXECLIST_H_ + +#include "xe_execlist_types.h" + +struct xe_device; +struct xe_gt; + +#define xe_execlist_port_assert_held(port) lockdep_assert_held(&(port)->lock) + +int xe_execlist_init(struct xe_gt *gt); +struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, + struct xe_hw_engine *hwe); +void xe_execlist_port_destroy(struct xe_execlist_port *port); + +#endif diff --git a/drivers/gpu/drm/xe/xe_execlist_types.h b/drivers/gpu/drm/xe/xe_execlist_types.h new file mode 100644 index 000000000000..f94bbf4c53e4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_execlist_types.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_EXECLIST_TYPES_H_ +#define _XE_EXECLIST_TYPES_H_ + +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/workqueue.h> + +#include "xe_exec_queue_types.h" + +struct xe_hw_engine; +struct xe_execlist_exec_queue; + +struct xe_execlist_port { + struct xe_hw_engine *hwe; + + spinlock_t lock; + + struct list_head active[XE_EXEC_QUEUE_PRIORITY_COUNT]; + + u32 last_ctx_id; + + struct xe_execlist_exec_queue *running_exl; + + struct timer_list irq_fail; +}; + +struct xe_execlist_exec_queue { + struct xe_exec_queue *q; + + struct drm_gpu_scheduler sched; + + struct drm_sched_entity entity; + + struct xe_execlist_port *port; + + bool has_run; + + struct work_struct fini_async; + + enum xe_exec_queue_priority active_priority; + struct list_head active_link; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c new file mode 100644 index 000000000000..9bbe8a5040da --- /dev/null +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_force_wake.h" + +#include <drm/drm_util.h> + +#include "regs/xe_gt_regs.h" +#include "regs/xe_reg_defs.h" +#include "xe_gt.h" +#include "xe_mmio.h" + +#define XE_FORCE_WAKE_ACK_TIMEOUT_MS 50 + +static struct xe_gt * +fw_to_gt(struct xe_force_wake *fw) +{ + return fw->gt; +} + +static struct xe_device * +fw_to_xe(struct xe_force_wake *fw) +{ + return gt_to_xe(fw_to_gt(fw)); +} + +static void domain_init(struct xe_force_wake_domain *domain, + enum xe_force_wake_domain_id id, + struct xe_reg reg, struct xe_reg ack, u32 val, u32 mask) +{ + domain->id = id; + domain->reg_ctl = reg; + domain->reg_ack = ack; + domain->val = val; + domain->mask = mask; +} + +void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) +{ + struct xe_device *xe = gt_to_xe(gt); + + fw->gt = gt; + spin_lock_init(&fw->lock); + + /* Assuming gen11+ so assert this assumption is correct */ + xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11); + + if (xe->info.graphics_verx100 >= 1270) { + domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], + XE_FW_DOMAIN_ID_GT, + FORCEWAKE_GT, + FORCEWAKE_ACK_GT_MTL, + BIT(0), BIT(16)); + } else { + domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], + XE_FW_DOMAIN_ID_GT, + FORCEWAKE_GT, + FORCEWAKE_ACK_GT, + BIT(0), BIT(16)); + } +} + +void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) +{ + int i, j; + + /* Assuming gen11+ so assert this assumption is correct */ + xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11); + + if (!xe_gt_is_media_type(gt)) + domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER], + XE_FW_DOMAIN_ID_RENDER, + FORCEWAKE_RENDER, + FORCEWAKE_ACK_RENDER, + BIT(0), BIT(16)); + + for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { + if (!(gt->info.engine_mask & BIT(i))) + continue; + + domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j], + XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j, + FORCEWAKE_MEDIA_VDBOX(j), + FORCEWAKE_ACK_MEDIA_VDBOX(j), + BIT(0), BIT(16)); + } + + for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) { + if (!(gt->info.engine_mask & BIT(i))) + continue; + + domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j], + XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j, + FORCEWAKE_MEDIA_VEBOX(j), + FORCEWAKE_ACK_MEDIA_VEBOX(j), + BIT(0), BIT(16)); + } + + if (gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)) + domain_init(&fw->domains[XE_FW_DOMAIN_ID_GSC], + XE_FW_DOMAIN_ID_GSC, + FORCEWAKE_GSC, + FORCEWAKE_ACK_GSC, + BIT(0), BIT(16)); +} + +static void domain_wake(struct xe_gt *gt, struct xe_force_wake_domain *domain) +{ + xe_mmio_write32(gt, domain->reg_ctl, domain->mask | domain->val); +} + +static int domain_wake_wait(struct xe_gt *gt, + struct xe_force_wake_domain *domain) +{ + return xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val, + XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, + NULL, true); +} + +static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain) +{ + xe_mmio_write32(gt, domain->reg_ctl, domain->mask); +} + +static int domain_sleep_wait(struct xe_gt *gt, + struct xe_force_wake_domain *domain) +{ + return xe_mmio_wait32(gt, domain->reg_ack, domain->val, 0, + XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, + NULL, true); +} + +#define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \ + for (tmp__ = (mask__); tmp__; tmp__ &= ~BIT(ffs(tmp__) - 1)) \ + for_each_if((domain__ = ((fw__)->domains + \ + (ffs(tmp__) - 1))) && \ + domain__->reg_ctl.addr) + +int xe_force_wake_get(struct xe_force_wake *fw, + enum xe_force_wake_domains domains) +{ + struct xe_device *xe = fw_to_xe(fw); + struct xe_gt *gt = fw_to_gt(fw); + struct xe_force_wake_domain *domain; + enum xe_force_wake_domains tmp, woken = 0; + unsigned long flags; + int ret, ret2 = 0; + + spin_lock_irqsave(&fw->lock, flags); + for_each_fw_domain_masked(domain, domains, fw, tmp) { + if (!domain->ref++) { + woken |= BIT(domain->id); + domain_wake(gt, domain); + } + } + for_each_fw_domain_masked(domain, woken, fw, tmp) { + ret = domain_wake_wait(gt, domain); + ret2 |= ret; + if (ret) + drm_notice(&xe->drm, "Force wake domain (%d) failed to ack wake, ret=%d\n", + domain->id, ret); + } + fw->awake_domains |= woken; + spin_unlock_irqrestore(&fw->lock, flags); + + return ret2; +} + +int xe_force_wake_put(struct xe_force_wake *fw, + enum xe_force_wake_domains domains) +{ + struct xe_device *xe = fw_to_xe(fw); + struct xe_gt *gt = fw_to_gt(fw); + struct xe_force_wake_domain *domain; + enum xe_force_wake_domains tmp, sleep = 0; + unsigned long flags; + int ret, ret2 = 0; + + spin_lock_irqsave(&fw->lock, flags); + for_each_fw_domain_masked(domain, domains, fw, tmp) { + if (!--domain->ref) { + sleep |= BIT(domain->id); + domain_sleep(gt, domain); + } + } + for_each_fw_domain_masked(domain, sleep, fw, tmp) { + ret = domain_sleep_wait(gt, domain); + ret2 |= ret; + if (ret) + drm_notice(&xe->drm, "Force wake domain (%d) failed to ack sleep, ret=%d\n", + domain->id, ret); + } + fw->awake_domains &= ~sleep; + spin_unlock_irqrestore(&fw->lock, flags); + + return ret2; +} diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h new file mode 100644 index 000000000000..83cb157da7cc --- /dev/null +++ b/drivers/gpu/drm/xe/xe_force_wake.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_FORCE_WAKE_H_ +#define _XE_FORCE_WAKE_H_ + +#include "xe_assert.h" +#include "xe_force_wake_types.h" + +struct xe_gt; + +void xe_force_wake_init_gt(struct xe_gt *gt, + struct xe_force_wake *fw); +void xe_force_wake_init_engines(struct xe_gt *gt, + struct xe_force_wake *fw); +int xe_force_wake_get(struct xe_force_wake *fw, + enum xe_force_wake_domains domains); +int xe_force_wake_put(struct xe_force_wake *fw, + enum xe_force_wake_domains domains); + +static inline int +xe_force_wake_ref(struct xe_force_wake *fw, + enum xe_force_wake_domains domain) +{ + xe_gt_assert(fw->gt, domain); + return fw->domains[ffs(domain) - 1].ref; +} + +static inline void +xe_force_wake_assert_held(struct xe_force_wake *fw, + enum xe_force_wake_domains domain) +{ + xe_gt_assert(fw->gt, fw->awake_domains & domain); +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h new file mode 100644 index 000000000000..ed0edc2cdf9f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_force_wake_types.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_FORCE_WAKE_TYPES_H_ +#define _XE_FORCE_WAKE_TYPES_H_ + +#include <linux/mutex.h> +#include <linux/types.h> + +#include "regs/xe_reg_defs.h" + +enum xe_force_wake_domain_id { + XE_FW_DOMAIN_ID_GT = 0, + XE_FW_DOMAIN_ID_RENDER, + XE_FW_DOMAIN_ID_MEDIA, + XE_FW_DOMAIN_ID_MEDIA_VDBOX0, + XE_FW_DOMAIN_ID_MEDIA_VDBOX1, + XE_FW_DOMAIN_ID_MEDIA_VDBOX2, + XE_FW_DOMAIN_ID_MEDIA_VDBOX3, + XE_FW_DOMAIN_ID_MEDIA_VDBOX4, + XE_FW_DOMAIN_ID_MEDIA_VDBOX5, + XE_FW_DOMAIN_ID_MEDIA_VDBOX6, + XE_FW_DOMAIN_ID_MEDIA_VDBOX7, + XE_FW_DOMAIN_ID_MEDIA_VEBOX0, + XE_FW_DOMAIN_ID_MEDIA_VEBOX1, + XE_FW_DOMAIN_ID_MEDIA_VEBOX2, + XE_FW_DOMAIN_ID_MEDIA_VEBOX3, + XE_FW_DOMAIN_ID_GSC, + XE_FW_DOMAIN_ID_COUNT +}; + +enum xe_force_wake_domains { + XE_FW_GT = BIT(XE_FW_DOMAIN_ID_GT), + XE_FW_RENDER = BIT(XE_FW_DOMAIN_ID_RENDER), + XE_FW_MEDIA = BIT(XE_FW_DOMAIN_ID_MEDIA), + XE_FW_MEDIA_VDBOX0 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX0), + XE_FW_MEDIA_VDBOX1 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX1), + XE_FW_MEDIA_VDBOX2 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX2), + XE_FW_MEDIA_VDBOX3 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX3), + XE_FW_MEDIA_VDBOX4 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX4), + XE_FW_MEDIA_VDBOX5 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX5), + XE_FW_MEDIA_VDBOX6 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX6), + XE_FW_MEDIA_VDBOX7 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX7), + XE_FW_MEDIA_VEBOX0 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX0), + XE_FW_MEDIA_VEBOX1 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX1), + XE_FW_MEDIA_VEBOX2 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX2), + XE_FW_MEDIA_VEBOX3 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX3), + XE_FW_GSC = BIT(XE_FW_DOMAIN_ID_GSC), + XE_FORCEWAKE_ALL = BIT(XE_FW_DOMAIN_ID_COUNT) - 1 +}; + +/** + * struct xe_force_wake_domain - XE force wake domains + */ +struct xe_force_wake_domain { + /** @id: domain force wake id */ + enum xe_force_wake_domain_id id; + /** @reg_ctl: domain wake control register address */ + struct xe_reg reg_ctl; + /** @reg_ack: domain ack register address */ + struct xe_reg reg_ack; + /** @val: domain wake write value */ + u32 val; + /** @mask: domain mask */ + u32 mask; + /** @ref: domain reference */ + u32 ref; +}; + +/** + * struct xe_force_wake - XE force wake + */ +struct xe_force_wake { + /** @gt: back pointers to GT */ + struct xe_gt *gt; + /** @lock: protects everything force wake struct */ + spinlock_t lock; + /** @awake_domains: mask of all domains awake */ + enum xe_force_wake_domains awake_domains; + /** @domains: force wake domains */ + struct xe_force_wake_domain domains[XE_FW_DOMAIN_ID_COUNT]; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c new file mode 100644 index 000000000000..106ee2b027f0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#define _GNU_SOURCE +#include <ctype.h> +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <string.h> + +#define HEADER \ + "// SPDX-License-Identifier: MIT\n" \ + "\n" \ + "/*\n" \ + " * DO NOT MODIFY.\n" \ + " *\n" \ + " * This file was generated from rules: %s\n" \ + " */\n" \ + "#ifndef _GENERATED_XE_WA_OOB_\n" \ + "#define _GENERATED_XE_WA_OOB_\n" \ + "\n" \ + "enum {\n" + +#define FOOTER \ + "};\n" \ + "\n" \ + "#endif\n" + +static void print_usage(FILE *f) +{ + fprintf(f, "usage: %s <input-rule-file> <generated-c-source-file> <generated-c-header-file>\n", + program_invocation_short_name); +} + +static void print_parse_error(const char *err_msg, const char *line, + unsigned int lineno) +{ + fprintf(stderr, "ERROR: %s\nERROR: %u: %.60s\n", + err_msg, lineno, line); +} + +static char *strip(char *line, size_t linelen) +{ + while (isspace(*(line + linelen))) + linelen--; + + line[linelen - 1] = '\0'; + + return line + strspn(line, " \f\n\r\t\v"); +} + +#define MAX_LINE_LEN 4096 +static int parse(FILE *input, FILE *csource, FILE *cheader) +{ + char line[MAX_LINE_LEN + 1]; + char *name, *prev_name = NULL, *rules; + unsigned int lineno = 0, idx = 0; + + while (fgets(line, sizeof(line), input)) { + size_t linelen; + bool is_continuation; + + if (line[0] == '\0' || line[0] == '#' || line[0] == '\n') { + lineno++; + continue; + } + + linelen = strlen(line); + if (linelen == MAX_LINE_LEN) { + print_parse_error("line too long", line, lineno); + return -EINVAL; + } + + is_continuation = isspace(line[0]); + name = strip(line, linelen); + + if (!is_continuation) { + name = strtok(name, " \t"); + rules = strtok(NULL, ""); + } else { + if (!prev_name) { + print_parse_error("invalid rule continuation", + line, lineno); + return -EINVAL; + } + + rules = name; + name = NULL; + } + + if (rules[0] == '\0') { + print_parse_error("invalid empty rule\n", line, lineno); + return -EINVAL; + } + + if (name) { + fprintf(cheader, "\tXE_WA_OOB_%s = %u,\n", name, idx); + fprintf(csource, "{ XE_RTP_NAME(\"%s\"), XE_RTP_RULES(%s) },\n", + name, rules); + } else { + fprintf(csource, "{ XE_RTP_NAME(NULL), XE_RTP_RULES(%s) },\n", + rules); + } + + idx++; + lineno++; + if (!is_continuation) + prev_name = name; + } + + fprintf(cheader, "\t_XE_WA_OOB_COUNT = %u\n", idx); + + return 0; +} + +int main(int argc, const char *argv[]) +{ + enum { + ARGS_INPUT, + ARGS_CSOURCE, + ARGS_CHEADER, + _ARGS_COUNT + }; + struct { + const char *fn; + const char *mode; + FILE *f; + } args[] = { + [ARGS_INPUT] = { .fn = argv[1], .mode = "r" }, + [ARGS_CSOURCE] = { .fn = argv[2], .mode = "w" }, + [ARGS_CHEADER] = { .fn = argv[3], .mode = "w" }, + }; + int ret = 1; + + if (argc < 3) { + fprintf(stderr, "ERROR: wrong arguments\n"); + print_usage(stderr); + return 1; + } + + for (int i = 0; i < _ARGS_COUNT; i++) { + args[i].f = fopen(args[i].fn, args[i].mode); + if (!args[i].f) { + fprintf(stderr, "ERROR: Can't open %s: %m\n", + args[i].fn); + goto err; + } + } + + fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn); + ret = parse(args[ARGS_INPUT].f, args[ARGS_CSOURCE].f, + args[ARGS_CHEADER].f); + if (!ret) + fprintf(args[ARGS_CHEADER].f, FOOTER); + +err: + for (int i = 0; i < _ARGS_COUNT; i++) { + if (args[i].f) + fclose(args[i].f); + } + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c new file mode 100644 index 000000000000..3efd2d066bf7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -0,0 +1,428 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_ggtt.h" + +#include <linux/sizes.h> + +#include <drm/drm_managed.h> +#include <drm/i915_drm.h> + +#include "regs/xe_gt_regs.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_tlb_invalidation.h" +#include "xe_map.h" +#include "xe_mmio.h" +#include "xe_wopcm.h" + +#define XELPG_GGTT_PTE_PAT0 BIT_ULL(52) +#define XELPG_GGTT_PTE_PAT1 BIT_ULL(53) + +/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ +#define GUC_GGTT_TOP 0xFEE00000 + +static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, + u16 pat_index) +{ + u64 pte; + + pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); + pte |= XE_PAGE_PRESENT; + + if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) + pte |= XE_GGTT_PTE_DM; + + return pte; +} + +static u64 xelpg_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, + u16 pat_index) +{ + struct xe_device *xe = xe_bo_device(bo); + u64 pte; + + pte = xelp_ggtt_pte_encode_bo(bo, bo_offset, pat_index); + + xe_assert(xe, pat_index <= 3); + + if (pat_index & BIT(0)) + pte |= XELPG_GGTT_PTE_PAT0; + + if (pat_index & BIT(1)) + pte |= XELPG_GGTT_PTE_PAT1; + + return pte; +} + +static unsigned int probe_gsm_size(struct pci_dev *pdev) +{ + u16 gmch_ctl, ggms; + + pci_read_config_word(pdev, SNB_GMCH_CTRL, &gmch_ctl); + ggms = (gmch_ctl >> BDW_GMCH_GGMS_SHIFT) & BDW_GMCH_GGMS_MASK; + return ggms ? SZ_1M << ggms : 0; +} + +void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) +{ + xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK)); + xe_tile_assert(ggtt->tile, addr < ggtt->size); + + writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]); +} + +static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) +{ + u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; + u64 end = start + size - 1; + u64 scratch_pte; + + xe_tile_assert(ggtt->tile, start < end); + + if (ggtt->scratch) + scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, + pat_index); + else + scratch_pte = 0; + + while (start < end) { + xe_ggtt_set_pte(ggtt, start, scratch_pte); + start += XE_PAGE_SIZE; + } +} + +static void ggtt_fini_early(struct drm_device *drm, void *arg) +{ + struct xe_ggtt *ggtt = arg; + + mutex_destroy(&ggtt->lock); + drm_mm_takedown(&ggtt->mm); +} + +static void ggtt_fini(struct drm_device *drm, void *arg) +{ + struct xe_ggtt *ggtt = arg; + + ggtt->scratch = NULL; +} + +static void primelockdep(struct xe_ggtt *ggtt) +{ + if (!IS_ENABLED(CONFIG_LOCKDEP)) + return; + + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&ggtt->lock); + fs_reclaim_release(GFP_KERNEL); +} + +static const struct xe_ggtt_pt_ops xelp_pt_ops = { + .pte_encode_bo = xelp_ggtt_pte_encode_bo, +}; + +static const struct xe_ggtt_pt_ops xelpg_pt_ops = { + .pte_encode_bo = xelpg_ggtt_pte_encode_bo, +}; + +/* + * Early GGTT initialization, which allows to create new mappings usable by the + * GuC. + * Mappings are not usable by the HW engines, as it doesn't have scratch / + * initial clear done to it yet. That will happen in the regular, non-early + * GGTT init. + */ +int xe_ggtt_init_early(struct xe_ggtt *ggtt) +{ + struct xe_device *xe = tile_to_xe(ggtt->tile); + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + unsigned int gsm_size; + + gsm_size = probe_gsm_size(pdev); + if (gsm_size == 0) { + drm_err(&xe->drm, "Hardware reported no preallocated GSM\n"); + return -ENOMEM; + } + + ggtt->gsm = ggtt->tile->mmio.regs + SZ_8M; + ggtt->size = (gsm_size / 8) * (u64) XE_PAGE_SIZE; + + if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) + ggtt->flags |= XE_GGTT_FLAGS_64K; + + /* + * 8B per entry, each points to a 4KB page. + * + * The GuC address space is limited on both ends of the GGTT, because + * the GuC shim HW redirects accesses to those addresses to other HW + * areas instead of going through the GGTT. On the bottom end, the GuC + * can't access offsets below the WOPCM size, while on the top side the + * limit is fixed at GUC_GGTT_TOP. To keep things simple, instead of + * checking each object to see if they are accessed by GuC or not, we + * just exclude those areas from the allocator. Additionally, to + * simplify the driver load, we use the maximum WOPCM size in this logic + * instead of the programmed one, so we don't need to wait until the + * actual size to be programmed is determined (which requires FW fetch) + * before initializing the GGTT. These simplifications might waste space + * in the GGTT (about 20-25 MBs depending on the platform) but we can + * live with this. + * + * Another benifit of this is the GuC bootrom can't access anything + * below the WOPCM max size so anything the bootom needs to access (e.g. + * a RSA key) needs to be placed in the GGTT above the WOPCM max size. + * Starting the GGTT allocations above the WOPCM max give us the correct + * placement for free. + */ + if (ggtt->size > GUC_GGTT_TOP) + ggtt->size = GUC_GGTT_TOP; + + if (GRAPHICS_VERx100(xe) >= 1270) + ggtt->pt_ops = &xelpg_pt_ops; + else + ggtt->pt_ops = &xelp_pt_ops; + + drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), + ggtt->size - xe_wopcm_size(xe)); + mutex_init(&ggtt->lock); + primelockdep(ggtt); + + return drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt); +} + +static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) +{ + struct drm_mm_node *hole; + u64 start, end; + + /* Display may have allocated inside ggtt, so be careful with clearing here */ + xe_device_mem_access_get(tile_to_xe(ggtt->tile)); + mutex_lock(&ggtt->lock); + drm_mm_for_each_hole(hole, &ggtt->mm, start, end) + xe_ggtt_clear(ggtt, start, end - start); + + xe_ggtt_invalidate(ggtt); + mutex_unlock(&ggtt->lock); + xe_device_mem_access_put(tile_to_xe(ggtt->tile)); +} + +int xe_ggtt_init(struct xe_ggtt *ggtt) +{ + struct xe_device *xe = tile_to_xe(ggtt->tile); + unsigned int flags; + int err; + + /* + * So we don't need to worry about 64K GGTT layout when dealing with + * scratch entires, rather keep the scratch page in system memory on + * platforms where 64K pages are needed for VRAM. + */ + flags = XE_BO_CREATE_PINNED_BIT; + if (ggtt->flags & XE_GGTT_FLAGS_64K) + flags |= XE_BO_CREATE_SYSTEM_BIT; + else + flags |= XE_BO_CREATE_VRAM_IF_DGFX(ggtt->tile); + + ggtt->scratch = xe_managed_bo_create_pin_map(xe, ggtt->tile, XE_PAGE_SIZE, flags); + if (IS_ERR(ggtt->scratch)) { + err = PTR_ERR(ggtt->scratch); + goto err; + } + + xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, ggtt->scratch->size); + + xe_ggtt_initial_clear(ggtt); + + return drmm_add_action_or_reset(&xe->drm, ggtt_fini, ggtt); +err: + ggtt->scratch = NULL; + return err; +} + +#define GUC_TLB_INV_CR XE_REG(0xcee8) +#define GUC_TLB_INV_CR_INVALIDATE REG_BIT(0) +#define PVC_GUC_TLB_INV_DESC0 XE_REG(0xcf7c) +#define PVC_GUC_TLB_INV_DESC0_VALID REG_BIT(0) +#define PVC_GUC_TLB_INV_DESC1 XE_REG(0xcf80) +#define PVC_GUC_TLB_INV_DESC1_INVALIDATE REG_BIT(6) + +static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) +{ + if (!gt) + return; + + /* + * Invalidation can happen when there's no in-flight work keeping the + * GT awake. We need to explicitly grab forcewake to ensure the GT + * and GuC are accessible. + */ + xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + + /* TODO: vfunc for GuC vs. non-GuC */ + + if (gt->uc.guc.submission_state.enabled) { + int seqno; + + seqno = xe_gt_tlb_invalidation_guc(gt); + xe_gt_assert(gt, seqno > 0); + if (seqno > 0) + xe_gt_tlb_invalidation_wait(gt, seqno); + } else if (xe_device_uc_enabled(gt_to_xe(gt))) { + struct xe_device *xe = gt_to_xe(gt); + + if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { + xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1, + PVC_GUC_TLB_INV_DESC1_INVALIDATE); + xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0, + PVC_GUC_TLB_INV_DESC0_VALID); + } else + xe_mmio_write32(gt, GUC_TLB_INV_CR, + GUC_TLB_INV_CR_INVALIDATE); + } + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} + +void xe_ggtt_invalidate(struct xe_ggtt *ggtt) +{ + /* Each GT in a tile has its own TLB to cache GGTT lookups */ + ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt); + ggtt_invalidate_gt_tlb(ggtt->tile->media_gt); +} + +void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix) +{ + u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; + u64 addr, scratch_pte; + + scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, pat_index); + + printk("%sGlobal GTT:", prefix); + for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) { + unsigned int i = addr / XE_PAGE_SIZE; + + xe_tile_assert(ggtt->tile, addr <= U32_MAX); + if (ggtt->gsm[i] == scratch_pte) + continue; + + printk("%s ggtt[0x%08x] = 0x%016llx", + prefix, (u32)addr, ggtt->gsm[i]); + } +} + +int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node, + u32 size, u32 align, u32 mm_flags) +{ + return drm_mm_insert_node_generic(&ggtt->mm, node, size, align, 0, + mm_flags); +} + +int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, + u32 size, u32 align) +{ + int ret; + + mutex_lock(&ggtt->lock); + ret = xe_ggtt_insert_special_node_locked(ggtt, node, size, + align, DRM_MM_INSERT_HIGH); + mutex_unlock(&ggtt->lock); + + return ret; +} + +void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) +{ + u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; + u64 start = bo->ggtt_node.start; + u64 offset, pte; + + for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { + pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); + xe_ggtt_set_pte(ggtt, start + offset, pte); + } + + xe_ggtt_invalidate(ggtt); +} + +static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, + u64 start, u64 end) +{ + int err; + u64 alignment = XE_PAGE_SIZE; + + if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) + alignment = SZ_64K; + + if (XE_WARN_ON(bo->ggtt_node.size)) { + /* Someone's already inserted this BO in the GGTT */ + xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); + return 0; + } + + err = xe_bo_validate(bo, NULL, false); + if (err) + return err; + + xe_device_mem_access_get(tile_to_xe(ggtt->tile)); + mutex_lock(&ggtt->lock); + err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size, + alignment, 0, start, end, 0); + if (!err) + xe_ggtt_map_bo(ggtt, bo); + mutex_unlock(&ggtt->lock); + xe_device_mem_access_put(tile_to_xe(ggtt->tile)); + + return err; +} + +int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, + u64 start, u64 end) +{ + return __xe_ggtt_insert_bo_at(ggtt, bo, start, end); +} + +int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) +{ + return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX); +} + +void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node) +{ + xe_device_mem_access_get(tile_to_xe(ggtt->tile)); + mutex_lock(&ggtt->lock); + + xe_ggtt_clear(ggtt, node->start, node->size); + drm_mm_remove_node(node); + node->size = 0; + + xe_ggtt_invalidate(ggtt); + + mutex_unlock(&ggtt->lock); + xe_device_mem_access_put(tile_to_xe(ggtt->tile)); +} + +void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) +{ + if (XE_WARN_ON(!bo->ggtt_node.size)) + return; + + /* This BO is not currently in the GGTT */ + xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); + + xe_ggtt_remove_node(ggtt, &bo->ggtt_node); +} + +int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) +{ + int err; + + err = mutex_lock_interruptible(&ggtt->lock); + if (err) + return err; + + drm_mm_print(&ggtt->mm, p); + mutex_unlock(&ggtt->lock); + return err; +} diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h new file mode 100644 index 000000000000..a09c166dff70 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_GGTT_H_ +#define _XE_GGTT_H_ + +#include "xe_ggtt_types.h" + +struct drm_printer; + +void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte); +void xe_ggtt_invalidate(struct xe_ggtt *ggtt); +int xe_ggtt_init_early(struct xe_ggtt *ggtt); +int xe_ggtt_init(struct xe_ggtt *ggtt); +void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix); + +int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, + u32 size, u32 align); +int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, + struct drm_mm_node *node, + u32 size, u32 align, u32 mm_flags); +void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node); +void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); +int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); +int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, + u64 start, u64 end); +void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); + +int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h new file mode 100644 index 000000000000..d8c584d9a8c3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GGTT_TYPES_H_ +#define _XE_GGTT_TYPES_H_ + +#include <drm/drm_mm.h> + +#include "xe_pt_types.h" + +struct xe_bo; +struct xe_gt; + +struct xe_ggtt_pt_ops { + u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index); +}; + +struct xe_ggtt { + struct xe_tile *tile; + + u64 size; + +#define XE_GGTT_FLAGS_64K BIT(0) + unsigned int flags; + + struct xe_bo *scratch; + + struct mutex lock; + + u64 __iomem *gsm; + + const struct xe_ggtt_pt_ops *pt_ops; + + struct drm_mm mm; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c new file mode 100644 index 000000000000..e4ad1d6ce1d5 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_gpu_scheduler.h" + +static void xe_sched_process_msg_queue(struct xe_gpu_scheduler *sched) +{ + if (!READ_ONCE(sched->base.pause_submit)) + queue_work(sched->base.submit_wq, &sched->work_process_msg); +} + +static void xe_sched_process_msg_queue_if_ready(struct xe_gpu_scheduler *sched) +{ + struct xe_sched_msg *msg; + + spin_lock(&sched->base.job_list_lock); + msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link); + if (msg) + xe_sched_process_msg_queue(sched); + spin_unlock(&sched->base.job_list_lock); +} + +static struct xe_sched_msg * +xe_sched_get_msg(struct xe_gpu_scheduler *sched) +{ + struct xe_sched_msg *msg; + + spin_lock(&sched->base.job_list_lock); + msg = list_first_entry_or_null(&sched->msgs, + struct xe_sched_msg, link); + if (msg) + list_del(&msg->link); + spin_unlock(&sched->base.job_list_lock); + + return msg; +} + +static void xe_sched_process_msg_work(struct work_struct *w) +{ + struct xe_gpu_scheduler *sched = + container_of(w, struct xe_gpu_scheduler, work_process_msg); + struct xe_sched_msg *msg; + + if (READ_ONCE(sched->base.pause_submit)) + return; + + msg = xe_sched_get_msg(sched); + if (msg) { + sched->ops->process_msg(msg); + + xe_sched_process_msg_queue_if_ready(sched); + } +} + +int xe_sched_init(struct xe_gpu_scheduler *sched, + const struct drm_sched_backend_ops *ops, + const struct xe_sched_backend_ops *xe_ops, + struct workqueue_struct *submit_wq, + uint32_t hw_submission, unsigned hang_limit, + long timeout, struct workqueue_struct *timeout_wq, + atomic_t *score, const char *name, + struct device *dev) +{ + sched->ops = xe_ops; + INIT_LIST_HEAD(&sched->msgs); + INIT_WORK(&sched->work_process_msg, xe_sched_process_msg_work); + + return drm_sched_init(&sched->base, ops, submit_wq, 1, hw_submission, + hang_limit, timeout, timeout_wq, score, name, + dev); +} + +void xe_sched_fini(struct xe_gpu_scheduler *sched) +{ + xe_sched_submission_stop(sched); + drm_sched_fini(&sched->base); +} + +void xe_sched_submission_start(struct xe_gpu_scheduler *sched) +{ + drm_sched_wqueue_start(&sched->base); + queue_work(sched->base.submit_wq, &sched->work_process_msg); +} + +void xe_sched_submission_stop(struct xe_gpu_scheduler *sched) +{ + drm_sched_wqueue_stop(&sched->base); + cancel_work_sync(&sched->work_process_msg); +} + +void xe_sched_add_msg(struct xe_gpu_scheduler *sched, + struct xe_sched_msg *msg) +{ + spin_lock(&sched->base.job_list_lock); + list_add_tail(&msg->link, &sched->msgs); + spin_unlock(&sched->base.job_list_lock); + + xe_sched_process_msg_queue(sched); +} diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h new file mode 100644 index 000000000000..10c6bb9c9386 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GPU_SCHEDULER_H_ +#define _XE_GPU_SCHEDULER_H_ + +#include "xe_gpu_scheduler_types.h" +#include "xe_sched_job_types.h" + +int xe_sched_init(struct xe_gpu_scheduler *sched, + const struct drm_sched_backend_ops *ops, + const struct xe_sched_backend_ops *xe_ops, + struct workqueue_struct *submit_wq, + uint32_t hw_submission, unsigned hang_limit, + long timeout, struct workqueue_struct *timeout_wq, + atomic_t *score, const char *name, + struct device *dev); +void xe_sched_fini(struct xe_gpu_scheduler *sched); + +void xe_sched_submission_start(struct xe_gpu_scheduler *sched); +void xe_sched_submission_stop(struct xe_gpu_scheduler *sched); + +void xe_sched_add_msg(struct xe_gpu_scheduler *sched, + struct xe_sched_msg *msg); + +static inline void xe_sched_stop(struct xe_gpu_scheduler *sched) +{ + drm_sched_stop(&sched->base, NULL); +} + +static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched) +{ + drm_sched_tdr_queue_imm(&sched->base); +} + +static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched) +{ + drm_sched_resubmit_jobs(&sched->base); +} + +static inline bool +xe_sched_invalidate_job(struct xe_sched_job *job, int threshold) +{ + return drm_sched_invalidate_job(&job->drm, threshold); +} + +static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched, + struct xe_sched_job *job) +{ + list_add(&job->drm.list, &sched->base.pending_list); +} + +static inline +struct xe_sched_job *xe_sched_first_pending_job(struct xe_gpu_scheduler *sched) +{ + return list_first_entry_or_null(&sched->base.pending_list, + struct xe_sched_job, drm.list); +} + +static inline int +xe_sched_entity_init(struct xe_sched_entity *entity, + struct xe_gpu_scheduler *sched) +{ + return drm_sched_entity_init(entity, 0, + (struct drm_gpu_scheduler **)&sched, + 1, NULL); +} + +#define xe_sched_entity_fini drm_sched_entity_fini + +#endif diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h b/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h new file mode 100644 index 000000000000..6731b13da8bb --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GPU_SCHEDULER_TYPES_H_ +#define _XE_GPU_SCHEDULER_TYPES_H_ + +#include <drm/gpu_scheduler.h> + +/** + * struct xe_sched_msg - an in-band (relative to GPU scheduler run queue) + * message + * + * Generic enough for backend defined messages, backend can expand if needed. + */ +struct xe_sched_msg { + /** @link: list link into the gpu scheduler list of messages */ + struct list_head link; + /** + * @private_data: opaque pointer to message private data (backend defined) + */ + void *private_data; + /** @opcode: opcode of message (backend defined) */ + unsigned int opcode; +}; + +/** + * struct xe_sched_backend_ops - Define the backend operations called by the + * scheduler + */ +struct xe_sched_backend_ops { + /** + * @process_msg: Process a message. Allowed to block, it is this + * function's responsibility to free message if dynamically allocated. + */ + void (*process_msg)(struct xe_sched_msg *msg); +}; + +/** + * struct xe_gpu_scheduler - Xe GPU scheduler + */ +struct xe_gpu_scheduler { + /** @base: DRM GPU scheduler */ + struct drm_gpu_scheduler base; + /** @ops: Xe scheduler ops */ + const struct xe_sched_backend_ops *ops; + /** @msgs: list of messages to be processed in @work_process_msg */ + struct list_head msgs; + /** @work_process_msg: processes messages */ + struct work_struct work_process_msg; +}; + +#define xe_sched_entity drm_sched_entity +#define xe_sched_policy drm_sched_policy + +#endif diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c new file mode 100644 index 000000000000..a8a895cf4b44 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -0,0 +1,438 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_gsc.h" + +#include <drm/drm_managed.h> + +#include "abi/gsc_mkhi_commands_abi.h" +#include "generated/xe_wa_oob.h" +#include "xe_bb.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_exec_queue.h" +#include "xe_gsc_submit.h" +#include "xe_gt.h" +#include "xe_gt_printk.h" +#include "xe_huc.h" +#include "xe_map.h" +#include "xe_mmio.h" +#include "xe_sched_job.h" +#include "xe_uc_fw.h" +#include "xe_wa.h" +#include "instructions/xe_gsc_commands.h" +#include "regs/xe_gsc_regs.h" + +static struct xe_gt * +gsc_to_gt(struct xe_gsc *gsc) +{ + return container_of(gsc, struct xe_gt, uc.gsc); +} + +static int memcpy_fw(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_device *xe = gt_to_xe(gt); + u32 fw_size = gsc->fw.size; + void *storage; + + /* + * FIXME: xe_migrate_copy does not work with stolen mem yet, so we use + * a memcpy for now. + */ + storage = kmalloc(fw_size, GFP_KERNEL); + if (!storage) + return -ENOMEM; + + xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size); + xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size); + xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size); + + kfree(storage); + + return 0; +} + +static int emit_gsc_upload(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + u64 offset = xe_bo_ggtt_addr(gsc->private); + struct xe_bb *bb; + struct xe_sched_job *job; + struct dma_fence *fence; + long timeout; + + bb = xe_bb_new(gt, 4, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + bb->cs[bb->len++] = GSC_FW_LOAD; + bb->cs[bb->len++] = lower_32_bits(offset); + bb->cs[bb->len++] = upper_32_bits(offset); + bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID; + + job = xe_bb_create_job(gsc->q, bb); + if (IS_ERR(job)) { + xe_bb_free(bb, NULL); + return PTR_ERR(job); + } + + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + timeout = dma_fence_wait_timeout(fence, false, HZ); + dma_fence_put(fence); + xe_bb_free(bb, NULL); + if (timeout < 0) + return timeout; + else if (!timeout) + return -ETIME; + + return 0; +} + +#define version_query_wr(xe_, map_, offset_, field_, val_) \ + xe_map_wr_field(xe_, map_, offset_, struct gsc_get_compatibility_version_in, field_, val_) +#define version_query_rd(xe_, map_, offset_, field_) \ + xe_map_rd_field(xe_, map_, offset_, struct gsc_get_compatibility_version_out, field_) + +static u32 emit_version_query_msg(struct xe_device *xe, struct iosys_map *map, u32 wr_offset) +{ + xe_map_memset(xe, map, wr_offset, 0, sizeof(struct gsc_get_compatibility_version_in)); + + version_query_wr(xe, map, wr_offset, header.group_id, MKHI_GROUP_ID_GFX_SRV); + version_query_wr(xe, map, wr_offset, header.command, + MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION); + + return wr_offset + sizeof(struct gsc_get_compatibility_version_in); +} + +#define GSC_VER_PKT_SZ SZ_4K /* 4K each for input and output */ +static int query_compatibility_version(struct xe_gsc *gsc) +{ + struct xe_uc_fw_version *compat = &gsc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = gt_to_xe(gt); + struct xe_bo *bo; + u32 wr_offset; + u32 rd_offset; + u64 ggtt_offset; + int err; + + bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2, + ttm_bo_type_kernel, + XE_BO_CREATE_SYSTEM_BIT | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) { + xe_gt_err(gt, "failed to allocate bo for GSC version query\n"); + return PTR_ERR(bo); + } + + ggtt_offset = xe_bo_ggtt_addr(bo); + + wr_offset = xe_gsc_emit_header(xe, &bo->vmap, 0, HECI_MEADDRESS_MKHI, 0, + sizeof(struct gsc_get_compatibility_version_in)); + wr_offset = emit_version_query_msg(xe, &bo->vmap, wr_offset); + + err = xe_gsc_pkt_submit_kernel(gsc, ggtt_offset, wr_offset, + ggtt_offset + GSC_VER_PKT_SZ, + GSC_VER_PKT_SZ); + if (err) { + xe_gt_err(gt, + "failed to submit GSC request for compatibility version: %d\n", + err); + goto out_bo; + } + + err = xe_gsc_read_out_header(xe, &bo->vmap, GSC_VER_PKT_SZ, + sizeof(struct gsc_get_compatibility_version_out), + &rd_offset); + if (err) { + xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err); + return err; + } + + compat->major = version_query_rd(xe, &bo->vmap, rd_offset, compat_major); + compat->minor = version_query_rd(xe, &bo->vmap, rd_offset, compat_minor); + + xe_gt_info(gt, "found GSC cv%u.%u\n", compat->major, compat->minor); + +out_bo: + xe_bo_unpin_map_no_vm(bo); + return err; +} + +static int gsc_fw_is_loaded(struct xe_gt *gt) +{ + return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) & + HECI1_FWSTS1_INIT_COMPLETE; +} + +static int gsc_fw_wait(struct xe_gt *gt) +{ + /* + * GSC load can take up to 250ms from the moment the instruction is + * executed by the GSCCS. To account for possible submission delays or + * other issues, we use a 500ms timeout in the wait here. + */ + return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE), + HECI1_FWSTS1_INIT_COMPLETE, + HECI1_FWSTS1_INIT_COMPLETE, + 500 * USEC_PER_MSEC, NULL, false); +} + +static int gsc_upload(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_device *xe = gt_to_xe(gt); + int err; + + /* we should only be here if the init step were successful */ + xe_assert(xe, xe_uc_fw_is_loadable(&gsc->fw) && gsc->q); + + if (gsc_fw_is_loaded(gt)) { + xe_gt_err(gt, "GSC already loaded at upload time\n"); + return -EEXIST; + } + + err = memcpy_fw(gsc); + if (err) { + xe_gt_err(gt, "Failed to memcpy GSC FW\n"); + return err; + } + + /* + * GSC is only killed by an FLR, so we need to trigger one on unload to + * make sure we stop it. This is because we assign a chunk of memory to + * the GSC as part of the FW load, so we need to make sure it stops + * using it when we release it to the system on driver unload. Note that + * this is not a problem of the unload per-se, because the GSC will not + * touch that memory unless there are requests for it coming from the + * driver; therefore, no accesses will happen while Xe is not loaded, + * but if we re-load the driver then the GSC might wake up and try to + * access that old memory location again. + * Given that an FLR is a very disruptive action (see the FLR function + * for details), we want to do it as the last action before releasing + * the access to the MMIO bar, which means we need to do it as part of + * mmio cleanup. + */ + xe->needs_flr_on_fini = true; + + err = emit_gsc_upload(gsc); + if (err) { + xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err)); + return err; + } + + err = gsc_fw_wait(gt); + if (err) { + xe_gt_err(gt, "Failed to wait for GSC load (%pe)\n", ERR_PTR(err)); + return err; + } + + err = query_compatibility_version(gsc); + if (err) + return err; + + err = xe_uc_fw_check_version_requirements(&gsc->fw); + if (err) + return err; + + xe_gt_dbg(gt, "GSC FW async load completed\n"); + + return 0; +} + +static void gsc_work(struct work_struct *work) +{ + struct xe_gsc *gsc = container_of(work, typeof(*gsc), work); + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_device *xe = gt_to_xe(gt); + int ret; + + xe_device_mem_access_get(xe); + xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); + + ret = gsc_upload(gsc); + if (ret && ret != -EEXIST) { + xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL); + goto out; + } + + xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); + + /* HuC auth failure is not fatal */ + if (xe_huc_is_authenticated(>->uc.huc, XE_HUC_AUTH_VIA_GUC)) + xe_huc_auth(>->uc.huc, XE_HUC_AUTH_VIA_GSC); + +out: + xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); + xe_device_mem_access_put(xe); +} + +int xe_gsc_init(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_tile *tile = gt_to_tile(gt); + int ret; + + gsc->fw.type = XE_UC_FW_TYPE_GSC; + INIT_WORK(&gsc->work, gsc_work); + + /* The GSC uC is only available on the media GT */ + if (tile->media_gt && (gt != tile->media_gt)) { + xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED); + return 0; + } + + /* + * Some platforms can have GuC but not GSC. That would cause + * xe_uc_fw_init(gsc) to return a "not supported" failure code and abort + * all firmware loading. So check for GSC being enabled before + * propagating the failure back up. That way the higher level will keep + * going and load GuC as appropriate. + */ + ret = xe_uc_fw_init(&gsc->fw); + if (!xe_uc_fw_is_enabled(&gsc->fw)) + return 0; + else if (ret) + goto out; + + return 0; + +out: + xe_gt_err(gt, "GSC init failed with %d", ret); + return ret; +} + +static void free_resources(struct drm_device *drm, void *arg) +{ + struct xe_gsc *gsc = arg; + + if (gsc->wq) { + destroy_workqueue(gsc->wq); + gsc->wq = NULL; + } + + if (gsc->q) { + xe_exec_queue_put(gsc->q); + gsc->q = NULL; + } + + if (gsc->private) { + xe_bo_unpin_map_no_vm(gsc->private); + gsc->private = NULL; + } +} + +int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = gt_to_xe(gt); + struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true); + struct xe_exec_queue *q; + struct workqueue_struct *wq; + struct xe_bo *bo; + int err; + + if (!xe_uc_fw_is_available(&gsc->fw)) + return 0; + + if (!hwe) + return -ENODEV; + + bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M, + ttm_bo_type_kernel, + XE_BO_CREATE_STOLEN_BIT | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + q = xe_exec_queue_create(xe, NULL, + BIT(hwe->logical_instance), 1, hwe, + EXEC_QUEUE_FLAG_KERNEL | + EXEC_QUEUE_FLAG_PERMANENT); + if (IS_ERR(q)) { + xe_gt_err(gt, "Failed to create queue for GSC submission\n"); + err = PTR_ERR(q); + goto out_bo; + } + + wq = alloc_ordered_workqueue("gsc-ordered-wq", 0); + if (!wq) { + err = -ENOMEM; + goto out_q; + } + + gsc->private = bo; + gsc->q = q; + gsc->wq = wq; + + err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc); + if (err) + return err; + + xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOADABLE); + + return 0; + +out_q: + xe_exec_queue_put(q); +out_bo: + xe_bo_unpin_map_no_vm(bo); + return err; +} + +void xe_gsc_load_start(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + + if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q) + return; + + /* GSC FW survives GT reset and D3Hot */ + if (gsc_fw_is_loaded(gt)) { + xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); + return; + } + + queue_work(gsc->wq, &gsc->work); +} + +void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc) +{ + if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq) + flush_work(&gsc->work); +} + +/* + * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a + * GSC engine reset by writing a notification bit in the GS1 register and then + * triggering an interrupt to GSC; from the interrupt it will take up to 200ms + * for the FW to get prepare for the reset, so we need to wait for that amount + * of time. + * After the reset is complete we need to then clear the GS1 register. + */ +void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep) +{ + u32 gs1_set = prep ? HECI_H_GS1_ER_PREP : 0; + u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP; + + /* WA only applies if the GSC is loaded */ + if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt)) + return; + + xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set); + + if (prep) { + /* make sure the reset bit is clear when writing the CSR reg */ + xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE), + HECI_H_CSR_RST, HECI_H_CSR_IG); + msleep(200); + } +} diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h new file mode 100644 index 000000000000..bc1ef7f31ea2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gsc.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GSC_H_ +#define _XE_GSC_H_ + +#include "xe_gsc_types.h" + +struct xe_gt; + +int xe_gsc_init(struct xe_gsc *gsc); +int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc); +void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc); +void xe_gsc_load_start(struct xe_gsc *gsc); + +void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep); + +#endif diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.c b/drivers/gpu/drm/xe/xe_gsc_submit.c new file mode 100644 index 000000000000..8c5381e5913f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gsc_submit.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_gsc_submit.h" + +#include "abi/gsc_command_header_abi.h" +#include "xe_bb.h" +#include "xe_exec_queue.h" +#include "xe_gt_printk.h" +#include "xe_gt_types.h" +#include "xe_map.h" +#include "xe_sched_job.h" +#include "instructions/xe_gsc_commands.h" +#include "regs/xe_gsc_regs.h" + +#define GSC_HDR_SIZE (sizeof(struct intel_gsc_mtl_header)) /* shorthand define */ + +#define mtl_gsc_header_wr(xe_, map_, offset_, field_, val_) \ + xe_map_wr_field(xe_, map_, offset_, struct intel_gsc_mtl_header, field_, val_) + +#define mtl_gsc_header_rd(xe_, map_, offset_, field_) \ + xe_map_rd_field(xe_, map_, offset_, struct intel_gsc_mtl_header, field_) + +/* + * GSC FW allows us to define the host_session_handle as we see fit, as long + * as we use unique identifier for each user, with handle 0 being reserved for + * kernel usage. + * To be able to differentiate which client subsystem owns the given session, we + * include the client id in the top 8 bits of the handle. + */ +#define HOST_SESSION_CLIENT_MASK GENMASK_ULL(63, 56) + +static struct xe_gt * +gsc_to_gt(struct xe_gsc *gsc) +{ + return container_of(gsc, struct xe_gt, uc.gsc); +} + +/** + * xe_gsc_emit_header - write the MTL GSC header in memory + * @xe: the Xe device + * @map: the iosys map to write to + * @offset: offset from the start of the map at which to write the header + * @heci_client_id: client id identifying the type of command (see abi for values) + * @host_session_id: host session ID of the caller + * @payload_size: size of the payload that follows the header + * + * Returns: offset memory location following the header + */ +u32 xe_gsc_emit_header(struct xe_device *xe, struct iosys_map *map, u32 offset, + u8 heci_client_id, u64 host_session_id, u32 payload_size) +{ + xe_assert(xe, !(host_session_id & HOST_SESSION_CLIENT_MASK)); + + if (host_session_id) + host_session_id |= FIELD_PREP(HOST_SESSION_CLIENT_MASK, heci_client_id); + + xe_map_memset(xe, map, offset, 0, GSC_HDR_SIZE); + + mtl_gsc_header_wr(xe, map, offset, validity_marker, GSC_HECI_VALIDITY_MARKER); + mtl_gsc_header_wr(xe, map, offset, heci_client_id, heci_client_id); + mtl_gsc_header_wr(xe, map, offset, host_session_handle, host_session_id); + mtl_gsc_header_wr(xe, map, offset, header_version, MTL_GSC_HEADER_VERSION); + mtl_gsc_header_wr(xe, map, offset, message_size, payload_size + GSC_HDR_SIZE); + + return offset + GSC_HDR_SIZE; +}; + +/** + * xe_gsc_check_and_update_pending - check the pending bit and update the input + * header with the retry handle from the output header + * @xe: the Xe device + * @in: the iosys map containing the input buffer + * @offset_in: offset within the iosys at which the input buffer is located + * @out: the iosys map containing the output buffer + * @offset_out: offset within the iosys at which the output buffer is located + * + * Returns: true if the pending bit was set, false otherwise + */ +bool xe_gsc_check_and_update_pending(struct xe_device *xe, + struct iosys_map *in, u32 offset_in, + struct iosys_map *out, u32 offset_out) +{ + if (mtl_gsc_header_rd(xe, out, offset_out, flags) & GSC_OUTFLAG_MSG_PENDING) { + u64 handle = mtl_gsc_header_rd(xe, out, offset_out, gsc_message_handle); + + mtl_gsc_header_wr(xe, in, offset_in, gsc_message_handle, handle); + + return true; + } + + return false; +} + +/** + * xe_gsc_read_out_header - reads and validates the output header and returns + * the offset of the reply following the header + * @xe: the Xe device + * @map: the iosys map containing the output buffer + * @offset: offset within the iosys at which the output buffer is located + * @min_payload_size: minimum size of the message excluding the gsc header + * @payload_offset: optional pointer to be set to the payload offset + * + * Returns: -errno value on failure, 0 otherwise + */ +int xe_gsc_read_out_header(struct xe_device *xe, + struct iosys_map *map, u32 offset, + u32 min_payload_size, + u32 *payload_offset) +{ + u32 marker = mtl_gsc_header_rd(xe, map, offset, validity_marker); + u32 size = mtl_gsc_header_rd(xe, map, offset, message_size); + u32 payload_size = size - GSC_HDR_SIZE; + + if (marker != GSC_HECI_VALIDITY_MARKER) + return -EPROTO; + + if (size < GSC_HDR_SIZE || payload_size < min_payload_size) + return -ENODATA; + + if (payload_offset) + *payload_offset = offset + GSC_HDR_SIZE; + + return 0; +} + +/** + * xe_gsc_pkt_submit_kernel - submit a kernel heci pkt to the GSC + * @gsc: the GSC uC + * @addr_in: GGTT address of the message to send to the GSC + * @size_in: size of the message to send to the GSC + * @addr_out: GGTT address for the GSC to write the reply to + * @size_out: size of the memory reserved for the reply + */ +int xe_gsc_pkt_submit_kernel(struct xe_gsc *gsc, u64 addr_in, u32 size_in, + u64 addr_out, u32 size_out) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_bb *bb; + struct xe_sched_job *job; + struct dma_fence *fence; + long timeout; + + if (size_in < GSC_HDR_SIZE) + return -ENODATA; + + if (size_out < GSC_HDR_SIZE) + return -ENOMEM; + + bb = xe_bb_new(gt, 8, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + bb->cs[bb->len++] = GSC_HECI_CMD_PKT; + bb->cs[bb->len++] = lower_32_bits(addr_in); + bb->cs[bb->len++] = upper_32_bits(addr_in); + bb->cs[bb->len++] = size_in; + bb->cs[bb->len++] = lower_32_bits(addr_out); + bb->cs[bb->len++] = upper_32_bits(addr_out); + bb->cs[bb->len++] = size_out; + bb->cs[bb->len++] = 0; + + job = xe_bb_create_job(gsc->q, bb); + if (IS_ERR(job)) { + xe_bb_free(bb, NULL); + return PTR_ERR(job); + } + + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + timeout = dma_fence_wait_timeout(fence, false, HZ); + dma_fence_put(fence); + xe_bb_free(bb, NULL); + if (timeout < 0) + return timeout; + else if (!timeout) + return -ETIME; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.h b/drivers/gpu/drm/xe/xe_gsc_submit.h new file mode 100644 index 000000000000..0801da5d446a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gsc_submit.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GSC_SUBMIT_H_ +#define _XE_GSC_SUBMIT_H_ + +#include <linux/types.h> + +struct iosys_map; +struct xe_device; +struct xe_gsc; + +u32 xe_gsc_emit_header(struct xe_device *xe, struct iosys_map *map, u32 offset, + u8 heci_client_id, u64 host_session_id, u32 payload_size); + +bool xe_gsc_check_and_update_pending(struct xe_device *xe, + struct iosys_map *in, u32 offset_in, + struct iosys_map *out, u32 offset_out); + +int xe_gsc_read_out_header(struct xe_device *xe, + struct iosys_map *map, u32 offset, + u32 min_payload_size, + u32 *payload_offset); + +int xe_gsc_pkt_submit_kernel(struct xe_gsc *gsc, u64 addr_in, u32 size_in, + u64 addr_out, u32 size_out); + +#endif diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h new file mode 100644 index 000000000000..57fefd66a7ea --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gsc_types.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GSC_TYPES_H_ +#define _XE_GSC_TYPES_H_ + +#include <linux/workqueue.h> + +#include "xe_uc_fw_types.h" + +struct xe_bo; +struct xe_exec_queue; + +/** + * struct xe_gsc - GSC + */ +struct xe_gsc { + /** @fw: Generic uC firmware management */ + struct xe_uc_fw fw; + + /** @security_version: SVN found in the fetched blob */ + u32 security_version; + + /** @private: Private data for use by the GSC FW */ + struct xe_bo *private; + + /** @q: Default queue used for submissions to GSC FW */ + struct xe_exec_queue *q; + + /** @wq: workqueue to handle jobs for delayed load and proxy handling */ + struct workqueue_struct *wq; + + /** @work: delayed load and proxy handling work */ + struct work_struct work; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c new file mode 100644 index 000000000000..3af2adec1295 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -0,0 +1,778 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_gt.h" + +#include <linux/minmax.h> + +#include <drm/drm_managed.h> +#include <drm/xe_drm.h> + +#include "instructions/xe_gfxpipe_commands.h" +#include "instructions/xe_mi_commands.h" +#include "regs/xe_gt_regs.h" +#include "xe_assert.h" +#include "xe_bb.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_exec_queue.h" +#include "xe_execlist.h" +#include "xe_force_wake.h" +#include "xe_ggtt.h" +#include "xe_gsc.h" +#include "xe_gt_ccs_mode.h" +#include "xe_gt_clock.h" +#include "xe_gt_freq.h" +#include "xe_gt_idle.h" +#include "xe_gt_mcr.h" +#include "xe_gt_pagefault.h" +#include "xe_gt_printk.h" +#include "xe_gt_sysfs.h" +#include "xe_gt_tlb_invalidation.h" +#include "xe_gt_topology.h" +#include "xe_guc_exec_queue_types.h" +#include "xe_guc_pc.h" +#include "xe_hw_fence.h" +#include "xe_hw_engine_class_sysfs.h" +#include "xe_irq.h" +#include "xe_lmtt.h" +#include "xe_lrc.h" +#include "xe_map.h" +#include "xe_migrate.h" +#include "xe_mmio.h" +#include "xe_pat.h" +#include "xe_mocs.h" +#include "xe_reg_sr.h" +#include "xe_ring_ops.h" +#include "xe_sa.h" +#include "xe_sched_job.h" +#include "xe_sriov.h" +#include "xe_tuning.h" +#include "xe_uc.h" +#include "xe_vm.h" +#include "xe_wa.h" +#include "xe_wopcm.h" + +struct xe_gt *xe_gt_alloc(struct xe_tile *tile) +{ + struct xe_gt *gt; + + gt = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*gt), GFP_KERNEL); + if (!gt) + return ERR_PTR(-ENOMEM); + + gt->tile = tile; + gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0); + + return gt; +} + +void xe_gt_sanitize(struct xe_gt *gt) +{ + /* + * FIXME: if xe_uc_sanitize is called here, on TGL driver will not + * reload + */ + gt->uc.guc.submission_state.enabled = false; +} + +static void gt_fini(struct drm_device *drm, void *arg) +{ + struct xe_gt *gt = arg; + int i; + + destroy_workqueue(gt->ordered_wq); + + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) + xe_hw_fence_irq_finish(>->fence_irq[i]); +} + +static void gt_reset_worker(struct work_struct *w); + +static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) +{ + struct xe_sched_job *job; + struct xe_bb *bb; + struct dma_fence *fence; + long timeout; + + bb = xe_bb_new(gt, 4, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + job = xe_bb_create_job(q, bb); + if (IS_ERR(job)) { + xe_bb_free(bb, NULL); + return PTR_ERR(job); + } + + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + timeout = dma_fence_wait_timeout(fence, false, HZ); + dma_fence_put(fence); + xe_bb_free(bb, NULL); + if (timeout < 0) + return timeout; + else if (!timeout) + return -ETIME; + + return 0; +} + +/* + * Convert back from encoded value to type-safe, only to be used when reg.mcr + * is true + */ +static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg) +{ + return (const struct xe_reg_mcr){.__reg.raw = reg.raw }; +} + +static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) +{ + struct xe_reg_sr *sr = &q->hwe->reg_lrc; + struct xe_reg_sr_entry *entry; + unsigned long idx; + struct xe_sched_job *job; + struct xe_bb *bb; + struct dma_fence *fence; + long timeout; + int count = 0; + + if (q->hwe->class == XE_ENGINE_CLASS_RENDER) + /* Big enough to emit all of the context's 3DSTATE */ + bb = xe_bb_new(gt, xe_lrc_size(gt_to_xe(gt), q->hwe->class), false); + else + /* Just pick a large BB size */ + bb = xe_bb_new(gt, SZ_4K, false); + + if (IS_ERR(bb)) + return PTR_ERR(bb); + + xa_for_each(&sr->xa, idx, entry) + ++count; + + if (count) { + xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name); + + bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); + + xa_for_each(&sr->xa, idx, entry) { + struct xe_reg reg = entry->reg; + struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg); + u32 val; + + /* + * Skip reading the register if it's not really needed + */ + if (reg.masked) + val = entry->clr_bits << 16; + else if (entry->clr_bits + 1) + val = (reg.mcr ? + xe_gt_mcr_unicast_read_any(gt, reg_mcr) : + xe_mmio_read32(gt, reg)) & (~entry->clr_bits); + else + val = 0; + + val |= entry->set_bits; + + bb->cs[bb->len++] = reg.addr; + bb->cs[bb->len++] = val; + xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val); + } + } + + xe_lrc_emit_hwe_state_instructions(q, bb); + + job = xe_bb_create_job(q, bb); + if (IS_ERR(job)) { + xe_bb_free(bb, NULL); + return PTR_ERR(job); + } + + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + timeout = dma_fence_wait_timeout(fence, false, HZ); + dma_fence_put(fence); + xe_bb_free(bb, NULL); + if (timeout < 0) + return timeout; + else if (!timeout) + return -ETIME; + + return 0; +} + +int xe_gt_record_default_lrcs(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + int err = 0; + + for_each_hw_engine(hwe, gt, id) { + struct xe_exec_queue *q, *nop_q; + void *default_lrc; + + if (gt->default_lrc[hwe->class]) + continue; + + xe_reg_sr_init(&hwe->reg_lrc, hwe->name, xe); + xe_wa_process_lrc(hwe); + xe_hw_engine_setup_default_lrc_state(hwe); + xe_tuning_process_lrc(hwe); + + default_lrc = drmm_kzalloc(&xe->drm, + xe_lrc_size(xe, hwe->class), + GFP_KERNEL); + if (!default_lrc) + return -ENOMEM; + + q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1, + hwe, EXEC_QUEUE_FLAG_KERNEL); + if (IS_ERR(q)) { + err = PTR_ERR(q); + xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n", + hwe->name, q); + return err; + } + + /* Prime golden LRC with known good state */ + err = emit_wa_job(gt, q); + if (err) { + xe_gt_err(gt, "hwe %s: emit_wa_job failed (%pe) guc_id=%u\n", + hwe->name, ERR_PTR(err), q->guc->id); + goto put_exec_queue; + } + + nop_q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), + 1, hwe, EXEC_QUEUE_FLAG_KERNEL); + if (IS_ERR(nop_q)) { + err = PTR_ERR(nop_q); + xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n", + hwe->name, nop_q); + goto put_exec_queue; + } + + /* Switch to different LRC */ + err = emit_nop_job(gt, nop_q); + if (err) { + xe_gt_err(gt, "hwe %s: nop emit_nop_job failed (%pe) guc_id=%u\n", + hwe->name, ERR_PTR(err), nop_q->guc->id); + goto put_nop_q; + } + + /* Reload golden LRC to record the effect of any indirect W/A */ + err = emit_nop_job(gt, q); + if (err) { + xe_gt_err(gt, "hwe %s: emit_nop_job failed (%pe) guc_id=%u\n", + hwe->name, ERR_PTR(err), q->guc->id); + goto put_nop_q; + } + + xe_map_memcpy_from(xe, default_lrc, + &q->lrc[0].bo->vmap, + xe_lrc_pphwsp_offset(&q->lrc[0]), + xe_lrc_size(xe, hwe->class)); + + gt->default_lrc[hwe->class] = default_lrc; +put_nop_q: + xe_exec_queue_put(nop_q); +put_exec_queue: + xe_exec_queue_put(q); + if (err) + break; + } + + return err; +} + +int xe_gt_init_early(struct xe_gt *gt) +{ + int err; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return err; + + xe_gt_topology_init(gt); + xe_gt_mcr_init(gt); + + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + if (err) + return err; + + xe_reg_sr_init(>->reg_sr, "GT", gt_to_xe(gt)); + + err = xe_wa_init(gt); + if (err) + return err; + + xe_wa_process_gt(gt); + xe_wa_process_oob(gt); + xe_tuning_process_gt(gt); + + return 0; +} + +static void dump_pat_on_error(struct xe_gt *gt) +{ + struct drm_printer p; + char prefix[32]; + + snprintf(prefix, sizeof(prefix), "[GT%u Error]", gt->info.id); + p = drm_debug_printer(prefix); + + xe_pat_dump(gt, &p); +} + +static int gt_fw_domain_init(struct xe_gt *gt) +{ + int err, i; + + xe_device_mem_access_get(gt_to_xe(gt)); + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + goto err_hw_fence_irq; + + xe_pat_init(gt); + + if (!xe_gt_is_media_type(gt)) { + err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); + if (err) + goto err_force_wake; + if (IS_SRIOV_PF(gt_to_xe(gt))) + xe_lmtt_init(>_to_tile(gt)->sriov.pf.lmtt); + } + + err = xe_uc_init(>->uc); + if (err) + goto err_force_wake; + + /* Raise GT freq to speed up HuC/GuC load */ + xe_guc_pc_init_early(>->uc.guc.pc); + + err = xe_uc_init_hwconfig(>->uc); + if (err) + goto err_force_wake; + + xe_gt_idle_sysfs_init(>->gtidle); + + /* XXX: Fake that we pull the engine mask from hwconfig blob */ + gt->info.engine_mask = gt->info.__engine_mask; + + /* Enable per hw engine IRQs */ + xe_irq_enable_hwe(gt); + + /* Rerun MCR init as we now have hw engine list */ + xe_gt_mcr_init(gt); + + err = xe_hw_engines_init_early(gt); + if (err) + goto err_force_wake; + + err = xe_hw_engine_class_sysfs_init(gt); + if (err) + drm_warn(>_to_xe(gt)->drm, + "failed to register engines sysfs directory, err: %d\n", + err); + + /* Initialize CCS mode sysfs after early initialization of HW engines */ + xe_gt_ccs_mode_sysfs_init(gt); + + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + XE_WARN_ON(err); + xe_device_mem_access_put(gt_to_xe(gt)); + + return 0; + +err_force_wake: + dump_pat_on_error(gt); + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +err_hw_fence_irq: + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) + xe_hw_fence_irq_finish(>->fence_irq[i]); + xe_device_mem_access_put(gt_to_xe(gt)); + + return err; +} + +static int all_fw_domain_init(struct xe_gt *gt) +{ + int err, i; + + xe_device_mem_access_get(gt_to_xe(gt)); + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (err) + goto err_hw_fence_irq; + + xe_gt_mcr_set_implicit_defaults(gt); + xe_reg_sr_apply_mmio(>->reg_sr, gt); + + err = xe_gt_clock_init(gt); + if (err) + goto err_force_wake; + + xe_mocs_init(gt); + err = xe_execlist_init(gt); + if (err) + goto err_force_wake; + + err = xe_hw_engines_init(gt); + if (err) + goto err_force_wake; + + err = xe_uc_init_post_hwconfig(>->uc); + if (err) + goto err_force_wake; + + if (!xe_gt_is_media_type(gt)) { + /* + * USM has its only SA pool to non-block behind user operations + */ + if (gt_to_xe(gt)->info.has_usm) { + gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt), SZ_1M, 16); + if (IS_ERR(gt->usm.bb_pool)) { + err = PTR_ERR(gt->usm.bb_pool); + goto err_force_wake; + } + } + } + + if (!xe_gt_is_media_type(gt)) { + struct xe_tile *tile = gt_to_tile(gt); + + tile->migrate = xe_migrate_init(tile); + if (IS_ERR(tile->migrate)) { + err = PTR_ERR(tile->migrate); + goto err_force_wake; + } + } + + err = xe_uc_init_hw(>->uc); + if (err) + goto err_force_wake; + + /* Configure default CCS mode of 1 engine with all resources */ + if (xe_gt_ccs_mode_enabled(gt)) { + gt->ccs_mode = 1; + xe_gt_apply_ccs_mode(gt); + } + + if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) + xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); + + err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + XE_WARN_ON(err); + xe_device_mem_access_put(gt_to_xe(gt)); + + return 0; + +err_force_wake: + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); +err_hw_fence_irq: + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) + xe_hw_fence_irq_finish(>->fence_irq[i]); + xe_device_mem_access_put(gt_to_xe(gt)); + + return err; +} + +int xe_gt_init(struct xe_gt *gt) +{ + int err; + int i; + + INIT_WORK(>->reset.worker, gt_reset_worker); + + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) { + gt->ring_ops[i] = xe_ring_ops_get(gt, i); + xe_hw_fence_irq_init(>->fence_irq[i]); + } + + err = xe_gt_tlb_invalidation_init(gt); + if (err) + return err; + + err = xe_gt_pagefault_init(gt); + if (err) + return err; + + xe_mocs_init_early(gt); + + xe_gt_sysfs_init(gt); + + err = gt_fw_domain_init(gt); + if (err) + return err; + + xe_gt_freq_init(gt); + + xe_force_wake_init_engines(gt, gt_to_fw(gt)); + + err = all_fw_domain_init(gt); + if (err) + return err; + + err = drmm_add_action_or_reset(>_to_xe(gt)->drm, gt_fini, gt); + if (err) + return err; + + return 0; +} + +static int do_gt_reset(struct xe_gt *gt) +{ + int err; + + xe_gsc_wa_14015076503(gt, true); + + xe_mmio_write32(gt, GDRST, GRDOM_FULL); + err = xe_mmio_wait32(gt, GDRST, GRDOM_FULL, 0, 5000, NULL, false); + if (err) + xe_gt_err(gt, "failed to clear GRDOM_FULL (%pe)\n", + ERR_PTR(err)); + + xe_gsc_wa_14015076503(gt, false); + + return err; +} + +static int do_gt_restart(struct xe_gt *gt) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + int err; + + xe_pat_init(gt); + + xe_gt_mcr_set_implicit_defaults(gt); + xe_reg_sr_apply_mmio(>->reg_sr, gt); + + err = xe_wopcm_init(>->uc.wopcm); + if (err) + return err; + + for_each_hw_engine(hwe, gt, id) + xe_hw_engine_enable_ring(hwe); + + err = xe_uc_sanitize_reset(>->uc); + if (err) + return err; + + err = xe_uc_init_hw(>->uc); + if (err) + return err; + + if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) + xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); + + xe_mocs_init(gt); + err = xe_uc_start(>->uc); + if (err) + return err; + + for_each_hw_engine(hwe, gt, id) { + xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); + xe_reg_sr_apply_whitelist(hwe); + } + + /* Get CCS mode in sync between sw/hw */ + xe_gt_apply_ccs_mode(gt); + + return 0; +} + +static int gt_reset(struct xe_gt *gt) +{ + int err; + + /* We only support GT resets with GuC submission */ + if (!xe_device_uc_enabled(gt_to_xe(gt))) + return -ENODEV; + + xe_gt_info(gt, "reset started\n"); + + if (xe_fault_inject_gt_reset()) { + err = -ECANCELED; + goto err_fail; + } + + xe_gt_sanitize(gt); + + xe_device_mem_access_get(gt_to_xe(gt)); + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (err) + goto err_msg; + + xe_uc_gucrc_disable(>->uc); + xe_uc_stop_prepare(>->uc); + xe_gt_pagefault_reset(gt); + + err = xe_uc_stop(>->uc); + if (err) + goto err_out; + + err = do_gt_reset(gt); + if (err) + goto err_out; + + xe_gt_tlb_invalidation_reset(gt); + + err = do_gt_restart(gt); + if (err) + goto err_out; + + err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + xe_device_mem_access_put(gt_to_xe(gt)); + XE_WARN_ON(err); + + xe_gt_info(gt, "reset done\n"); + + return 0; + +err_out: + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); +err_msg: + XE_WARN_ON(xe_uc_start(>->uc)); + xe_device_mem_access_put(gt_to_xe(gt)); +err_fail: + xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); + + gt_to_xe(gt)->needs_flr_on_fini = true; + + return err; +} + +static void gt_reset_worker(struct work_struct *w) +{ + struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker); + + gt_reset(gt); +} + +void xe_gt_reset_async(struct xe_gt *gt) +{ + xe_gt_info(gt, "trying reset\n"); + + /* Don't do a reset while one is already in flight */ + if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(>->uc)) + return; + + xe_gt_info(gt, "reset queued\n"); + queue_work(gt->ordered_wq, >->reset.worker); +} + +void xe_gt_suspend_prepare(struct xe_gt *gt) +{ + xe_device_mem_access_get(gt_to_xe(gt)); + XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + + xe_uc_stop_prepare(>->uc); + + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_device_mem_access_put(gt_to_xe(gt)); +} + +int xe_gt_suspend(struct xe_gt *gt) +{ + int err; + + xe_gt_sanitize(gt); + + xe_device_mem_access_get(gt_to_xe(gt)); + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (err) + goto err_msg; + + err = xe_uc_suspend(>->uc); + if (err) + goto err_force_wake; + + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_device_mem_access_put(gt_to_xe(gt)); + xe_gt_info(gt, "suspended\n"); + + return 0; + +err_force_wake: + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); +err_msg: + xe_device_mem_access_put(gt_to_xe(gt)); + xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err)); + + return err; +} + +int xe_gt_resume(struct xe_gt *gt) +{ + int err; + + xe_device_mem_access_get(gt_to_xe(gt)); + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (err) + goto err_msg; + + err = do_gt_restart(gt); + if (err) + goto err_force_wake; + + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_device_mem_access_put(gt_to_xe(gt)); + xe_gt_info(gt, "resumed\n"); + + return 0; + +err_force_wake: + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); +err_msg: + xe_device_mem_access_put(gt_to_xe(gt)); + xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err)); + + return err; +} + +struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt, + enum xe_engine_class class, + u16 instance, bool logical) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(hwe, gt, id) + if (hwe->class == class && + ((!logical && hwe->instance == instance) || + (logical && hwe->logical_instance == instance))) + return hwe; + + return NULL; +} + +struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt, + enum xe_engine_class class) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(hwe, gt, id) { + switch (class) { + case XE_ENGINE_CLASS_RENDER: + case XE_ENGINE_CLASS_COMPUTE: + if (hwe->class == XE_ENGINE_CLASS_RENDER || + hwe->class == XE_ENGINE_CLASS_COMPUTE) + return hwe; + break; + default: + if (hwe->class == class) + return hwe; + } + } + + return NULL; +} diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h new file mode 100644 index 000000000000..4486e083f5ef --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_H_ +#define _XE_GT_H_ + +#include <drm/drm_util.h> + +#include "xe_device_types.h" +#include "xe_hw_engine.h" + +#define for_each_hw_engine(hwe__, gt__, id__) \ + for ((id__) = 0; (id__) < ARRAY_SIZE((gt__)->hw_engines); (id__)++) \ + for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \ + xe_hw_engine_is_valid((hwe__))) + +#define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> XE_HW_ENGINE_CCS0) + +#ifdef CONFIG_FAULT_INJECTION +#include <linux/fault-inject.h> /* XXX: fault-inject.h is broken */ +extern struct fault_attr gt_reset_failure; +static inline bool xe_fault_inject_gt_reset(void) +{ + return should_fail(>_reset_failure, 1); +} +#else +static inline bool xe_fault_inject_gt_reset(void) +{ + return false; +} +#endif + +struct xe_gt *xe_gt_alloc(struct xe_tile *tile); +int xe_gt_init_early(struct xe_gt *gt); +int xe_gt_init(struct xe_gt *gt); +int xe_gt_record_default_lrcs(struct xe_gt *gt); +void xe_gt_suspend_prepare(struct xe_gt *gt); +int xe_gt_suspend(struct xe_gt *gt); +int xe_gt_resume(struct xe_gt *gt); +void xe_gt_reset_async(struct xe_gt *gt); +void xe_gt_sanitize(struct xe_gt *gt); + +/** + * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the + * first that matches the same reset domain as @class + * @gt: GT structure + * @class: hw engine class to lookup + */ +struct xe_hw_engine * +xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt, enum xe_engine_class class); + +struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt, + enum xe_engine_class class, + u16 instance, + bool logical); + +static inline bool xe_gt_is_media_type(struct xe_gt *gt) +{ + return gt->info.type == XE_GT_TYPE_MEDIA; +} + +static inline bool xe_gt_is_usm_hwe(struct xe_gt *gt, struct xe_hw_engine *hwe) +{ + struct xe_device *xe = gt_to_xe(gt); + + return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY && + hwe->instance == gt->usm.reserved_bcs_instance; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c new file mode 100644 index 000000000000..529fc286cd06 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "regs/xe_gt_regs.h" +#include "xe_assert.h" +#include "xe_gt.h" +#include "xe_gt_ccs_mode.h" +#include "xe_gt_sysfs.h" +#include "xe_mmio.h" + +static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines) +{ + u32 mode = CCS_MODE_CSLICE_0_3_MASK; /* disable all by default */ + int num_slices = hweight32(CCS_MASK(gt)); + struct xe_device *xe = gt_to_xe(gt); + int width, cslice = 0; + u32 config = 0; + + xe_assert(xe, xe_gt_ccs_mode_enabled(gt)); + + xe_assert(xe, num_engines && num_engines <= num_slices); + xe_assert(xe, !(num_slices % num_engines)); + + /* + * Loop over all available slices and assign each a user engine. + * For example, if there are four compute slices available, the + * assignment of compute slices to compute engines would be, + * + * With 1 engine (ccs0): + * slice 0, 1, 2, 3: ccs0 + * + * With 2 engines (ccs0, ccs1): + * slice 0, 2: ccs0 + * slice 1, 3: ccs1 + * + * With 4 engines (ccs0, ccs1, ccs2, ccs3): + * slice 0: ccs0 + * slice 1: ccs1 + * slice 2: ccs2 + * slice 3: ccs3 + */ + for (width = num_slices / num_engines; width; width--) { + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(hwe, gt, id) { + if (hwe->class != XE_ENGINE_CLASS_COMPUTE) + continue; + + if (hwe->logical_instance >= num_engines) + break; + + config |= BIT(hwe->instance) << XE_HW_ENGINE_CCS0; + + /* If a slice is fused off, leave disabled */ + while ((CCS_MASK(gt) & BIT(cslice)) == 0) + cslice++; + + mode &= ~CCS_MODE_CSLICE(cslice, CCS_MODE_CSLICE_MASK); + mode |= CCS_MODE_CSLICE(cslice, hwe->instance); + cslice++; + } + } + + xe_mmio_write32(gt, CCS_MODE, mode); + + xe_gt_info(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n", + mode, config, num_engines, num_slices); +} + +void xe_gt_apply_ccs_mode(struct xe_gt *gt) +{ + if (!gt->ccs_mode) + return; + + __xe_gt_apply_ccs_mode(gt, gt->ccs_mode); +} + +static ssize_t +num_cslices_show(struct device *kdev, + struct device_attribute *attr, char *buf) +{ + struct xe_gt *gt = kobj_to_gt(&kdev->kobj); + + return sysfs_emit(buf, "%u\n", hweight32(CCS_MASK(gt))); +} + +static DEVICE_ATTR_RO(num_cslices); + +static ssize_t +ccs_mode_show(struct device *kdev, + struct device_attribute *attr, char *buf) +{ + struct xe_gt *gt = kobj_to_gt(&kdev->kobj); + + return sysfs_emit(buf, "%u\n", gt->ccs_mode); +} + +static ssize_t +ccs_mode_store(struct device *kdev, struct device_attribute *attr, + const char *buff, size_t count) +{ + struct xe_gt *gt = kobj_to_gt(&kdev->kobj); + struct xe_device *xe = gt_to_xe(gt); + u32 num_engines, num_slices; + int ret; + + ret = kstrtou32(buff, 0, &num_engines); + if (ret) + return ret; + + /* + * Ensure number of engines specified is valid and there is an + * exact multiple of engines for slices. + */ + num_slices = hweight32(CCS_MASK(gt)); + if (!num_engines || num_engines > num_slices || num_slices % num_engines) { + xe_gt_dbg(gt, "Invalid compute config, %d engines %d slices\n", + num_engines, num_slices); + return -EINVAL; + } + + /* CCS mode can only be updated when there are no drm clients */ + spin_lock(&xe->clients.lock); + if (xe->clients.count) { + spin_unlock(&xe->clients.lock); + return -EBUSY; + } + + if (gt->ccs_mode != num_engines) { + xe_gt_info(gt, "Setting compute mode to %d\n", num_engines); + gt->ccs_mode = num_engines; + xe_gt_reset_async(gt); + } + + spin_unlock(&xe->clients.lock); + + return count; +} + +static DEVICE_ATTR_RW(ccs_mode); + +static const struct attribute *gt_ccs_mode_attrs[] = { + &dev_attr_ccs_mode.attr, + &dev_attr_num_cslices.attr, + NULL, +}; + +static void xe_gt_ccs_mode_sysfs_fini(struct drm_device *drm, void *arg) +{ + struct xe_gt *gt = arg; + + sysfs_remove_files(gt->sysfs, gt_ccs_mode_attrs); +} + +/** + * xe_gt_ccs_mode_sysfs_init - Initialize CCS mode sysfs interfaces + * @gt: GT structure + * + * Through a per-gt 'ccs_mode' sysfs interface, the user can enable a fixed + * number of compute hardware engines to which the available compute slices + * are to be allocated. This user configuration change triggers a gt reset + * and it is expected that there are no open drm clients while doing so. + * The number of available compute slices is exposed to user through a per-gt + * 'num_cslices' sysfs interface. + */ +void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + if (!xe_gt_ccs_mode_enabled(gt)) + return; + + err = sysfs_create_files(gt->sysfs, gt_ccs_mode_attrs); + if (err) { + drm_warn(&xe->drm, "Sysfs creation for ccs_mode failed err: %d\n", err); + return; + } + + err = drmm_add_action_or_reset(&xe->drm, xe_gt_ccs_mode_sysfs_fini, gt); + if (err) { + sysfs_remove_files(gt->sysfs, gt_ccs_mode_attrs); + drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); + } +} diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h new file mode 100644 index 000000000000..f39975aaaab0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_CCS_MODE_H_ +#define _XE_GT_CCS_MODE_H_ + +#include "xe_device_types.h" +#include "xe_gt.h" +#include "xe_gt_types.h" +#include "xe_platform_types.h" + +void xe_gt_apply_ccs_mode(struct xe_gt *gt); +void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt); + +static inline bool xe_gt_ccs_mode_enabled(const struct xe_gt *gt) +{ + /* Check if there are more than one compute engines available */ + return hweight32(CCS_MASK(gt)) > 1; +} + +#endif + diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c new file mode 100644 index 000000000000..937054e31d72 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_gt_clock.h" + +#include "regs/xe_gt_regs.h" +#include "regs/xe_regs.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_macros.h" +#include "xe_mmio.h" + +static u32 read_reference_ts_freq(struct xe_gt *gt) +{ + u32 ts_override = xe_mmio_read32(gt, TIMESTAMP_OVERRIDE); + u32 base_freq, frac_freq; + + base_freq = REG_FIELD_GET(TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK, + ts_override) + 1; + base_freq *= 1000000; + + frac_freq = REG_FIELD_GET(TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK, + ts_override); + frac_freq = 1000000 / (frac_freq + 1); + + return base_freq + frac_freq; +} + +static u32 get_crystal_clock_freq(u32 rpm_config_reg) +{ + const u32 f19_2_mhz = 19200000; + const u32 f24_mhz = 24000000; + const u32 f25_mhz = 25000000; + const u32 f38_4_mhz = 38400000; + u32 crystal_clock = REG_FIELD_GET(RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, + rpm_config_reg); + + switch (crystal_clock) { + case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: + return f24_mhz; + case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: + return f19_2_mhz; + case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ: + return f38_4_mhz; + case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ: + return f25_mhz; + default: + XE_WARN_ON("NOT_POSSIBLE"); + return 0; + } +} + +int xe_gt_clock_init(struct xe_gt *gt) +{ + u32 ctc_reg = xe_mmio_read32(gt, CTC_MODE); + u32 freq = 0; + + /* Assuming gen11+ so assert this assumption is correct */ + xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11); + + if (ctc_reg & CTC_SOURCE_DIVIDE_LOGIC) { + freq = read_reference_ts_freq(gt); + } else { + u32 c0 = xe_mmio_read32(gt, RPM_CONFIG0); + + freq = get_crystal_clock_freq(c0); + + /* + * Now figure out how the command stream's timestamp + * register increments from this frequency (it might + * increment only every few clock cycle). + */ + freq >>= 3 - REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, c0); + } + + gt->info.reference_clock = freq; + return 0; +} + +u64 xe_gt_clock_cycles_to_ns(const struct xe_gt *gt, u64 count) +{ + return DIV_ROUND_CLOSEST_ULL(count * NSEC_PER_SEC, gt->info.reference_clock); +} diff --git a/drivers/gpu/drm/xe/xe_gt_clock.h b/drivers/gpu/drm/xe/xe_gt_clock.h new file mode 100644 index 000000000000..aa162722f859 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_clock.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_CLOCK_H_ +#define _XE_GT_CLOCK_H_ + +#include <linux/types.h> + +struct xe_gt; + +int xe_gt_clock_init(struct xe_gt *gt); +u64 xe_gt_clock_cycles_to_ns(const struct xe_gt *gt, u64 count); +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c new file mode 100644 index 000000000000..c4b67cf09f8f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_gt_debugfs.h" + +#include <drm/drm_debugfs.h> +#include <drm/drm_managed.h> + +#include "xe_device.h" +#include "xe_force_wake.h" +#include "xe_ggtt.h" +#include "xe_gt.h" +#include "xe_gt_mcr.h" +#include "xe_gt_topology.h" +#include "xe_hw_engine.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_pat.h" +#include "xe_reg_sr.h" +#include "xe_reg_whitelist.h" +#include "xe_uc_debugfs.h" +#include "xe_wa.h" + +static struct xe_gt *node_to_gt(struct drm_info_node *node) +{ + return node->info_ent->data; +} + +static int hw_engines(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + struct xe_device *xe = gt_to_xe(gt); + struct drm_printer p = drm_seq_file_printer(m); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + int err; + + xe_device_mem_access_get(xe); + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (err) { + xe_device_mem_access_put(xe); + return err; + } + + for_each_hw_engine(hwe, gt, id) + xe_hw_engine_print(hwe, &p); + + err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + xe_device_mem_access_put(xe); + if (err) + return err; + + return 0; +} + +static int force_reset(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + + xe_gt_reset_async(gt); + + return 0; +} + +static int sa_info(struct seq_file *m, void *data) +{ + struct xe_tile *tile = gt_to_tile(node_to_gt(m->private)); + struct drm_printer p = drm_seq_file_printer(m); + + drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, &p, + tile->mem.kernel_bb_pool->gpu_addr); + + return 0; +} + +static int topology(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + xe_gt_topology_dump(gt, &p); + + return 0; +} + +static int steering(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + xe_gt_mcr_steering_dump(gt, &p); + + return 0; +} + +static int ggtt(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + return xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, &p); +} + +static int register_save_restore(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + struct drm_printer p = drm_seq_file_printer(m); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + xe_reg_sr_dump(>->reg_sr, &p); + drm_printf(&p, "\n"); + + drm_printf(&p, "Engine\n"); + for_each_hw_engine(hwe, gt, id) + xe_reg_sr_dump(&hwe->reg_sr, &p); + drm_printf(&p, "\n"); + + drm_printf(&p, "LRC\n"); + for_each_hw_engine(hwe, gt, id) + xe_reg_sr_dump(&hwe->reg_lrc, &p); + drm_printf(&p, "\n"); + + drm_printf(&p, "Whitelist\n"); + for_each_hw_engine(hwe, gt, id) + xe_reg_whitelist_dump(&hwe->reg_whitelist, &p); + + return 0; +} + +static int workarounds(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + xe_wa_dump(gt, &p); + + return 0; +} + +static int pat(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + xe_pat_dump(gt, &p); + + return 0; +} + +static int rcs_default_lrc(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + + xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_RENDER); + return 0; +} + +static int ccs_default_lrc(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + + xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COMPUTE); + return 0; +} + +static int bcs_default_lrc(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + + xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COPY); + return 0; +} + +static int vcs_default_lrc(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + + xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_DECODE); + return 0; +} + +static int vecs_default_lrc(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + + xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_ENHANCE); + return 0; +} + +static const struct drm_info_list debugfs_list[] = { + {"hw_engines", hw_engines, 0}, + {"force_reset", force_reset, 0}, + {"sa_info", sa_info, 0}, + {"topology", topology, 0}, + {"steering", steering, 0}, + {"ggtt", ggtt, 0}, + {"register-save-restore", register_save_restore, 0}, + {"workarounds", workarounds, 0}, + {"pat", pat, 0}, + {"default_lrc_rcs", rcs_default_lrc}, + {"default_lrc_ccs", ccs_default_lrc}, + {"default_lrc_bcs", bcs_default_lrc}, + {"default_lrc_vcs", vcs_default_lrc}, + {"default_lrc_vecs", vecs_default_lrc}, +}; + +void xe_gt_debugfs_register(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + struct drm_minor *minor = gt_to_xe(gt)->drm.primary; + struct dentry *root; + struct drm_info_list *local; + char name[8]; + int i; + + xe_gt_assert(gt, minor->debugfs_root); + + sprintf(name, "gt%d", gt->info.id); + root = debugfs_create_dir(name, minor->debugfs_root); + if (IS_ERR(root)) { + drm_warn(&xe->drm, "Create GT directory failed"); + return; + } + + /* + * Allocate local copy as we need to pass in the GT to the debugfs + * entry and drm_debugfs_create_files just references the drm_info_list + * passed in (e.g. can't define this on the stack). + */ +#define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)) + local = drmm_kmalloc(&xe->drm, DEBUGFS_SIZE, GFP_KERNEL); + if (!local) + return; + + memcpy(local, debugfs_list, DEBUGFS_SIZE); +#undef DEBUGFS_SIZE + + for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i) + local[i].data = gt; + + drm_debugfs_create_files(local, + ARRAY_SIZE(debugfs_list), + root, minor); + + xe_uc_debugfs_register(>->uc, root); +} diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.h b/drivers/gpu/drm/xe/xe_gt_debugfs.h new file mode 100644 index 000000000000..5a329f118a57 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_DEBUGFS_H_ +#define _XE_GT_DEBUGFS_H_ + +struct xe_gt; + +void xe_gt_debugfs_register(struct xe_gt *gt); + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c new file mode 100644 index 000000000000..3adfa6686e7c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_gt_freq.h" + +#include <linux/kobject.h> +#include <linux/sysfs.h> + +#include <drm/drm_managed.h> +#include <drm/drm_print.h> + +#include "xe_device_types.h" +#include "xe_gt_sysfs.h" +#include "xe_gt_throttle_sysfs.h" +#include "xe_guc_pc.h" + +/** + * DOC: Xe GT Frequency Management + * + * This component is responsible for the raw GT frequency management, including + * the sysfs API. + * + * Underneath, Xe enables GuC SLPC automated frequency management. GuC is then + * allowed to request PCODE any frequency between the Minimum and the Maximum + * selected by this component. Furthermore, it is important to highlight that + * PCODE is the ultimate decision maker of the actual running frequency, based + * on thermal and other running conditions. + * + * Xe's Freq provides a sysfs API for frequency management: + * + * device/tile#/gt#/freq0/<item>_freq *read-only* files: + * - act_freq: The actual resolved frequency decided by PCODE. + * - cur_freq: The current one requested by GuC PC to the PCODE. + * - rpn_freq: The Render Performance (RP) N level, which is the minimal one. + * - rpe_freq: The Render Performance (RP) E level, which is the efficient one. + * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one. + * + * device/tile#/gt#/freq0/<item>_freq *read-write* files: + * - min_freq: Min frequency request. + * - max_freq: Max frequency request. + * If max <= min, then freq_min becomes a fixed frequency request. + */ + +static struct xe_guc_pc * +dev_to_pc(struct device *dev) +{ + return &kobj_to_gt(dev->kobj.parent)->uc.guc.pc; +} + +static ssize_t act_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + + return sysfs_emit(buf, "%d\n", xe_guc_pc_get_act_freq(pc)); +} +static DEVICE_ATTR_RO(act_freq); + +static ssize_t cur_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + ssize_t ret; + + ret = xe_guc_pc_get_cur_freq(pc, &freq); + if (ret) + return ret; + + return sysfs_emit(buf, "%d\n", freq); +} +static DEVICE_ATTR_RO(cur_freq); + +static ssize_t rp0_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + + return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rp0_freq(pc)); +} +static DEVICE_ATTR_RO(rp0_freq); + +static ssize_t rpe_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + + return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rpe_freq(pc)); +} +static DEVICE_ATTR_RO(rpe_freq); + +static ssize_t rpn_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + + return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rpn_freq(pc)); +} +static DEVICE_ATTR_RO(rpn_freq); + +static ssize_t min_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + ssize_t ret; + + ret = xe_guc_pc_get_min_freq(pc, &freq); + if (ret) + return ret; + + return sysfs_emit(buf, "%d\n", freq); +} + +static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, + const char *buff, size_t count) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + ssize_t ret; + + ret = kstrtou32(buff, 0, &freq); + if (ret) + return ret; + + ret = xe_guc_pc_set_min_freq(pc, freq); + if (ret) + return ret; + + return count; +} +static DEVICE_ATTR_RW(min_freq); + +static ssize_t max_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + ssize_t ret; + + ret = xe_guc_pc_get_max_freq(pc, &freq); + if (ret) + return ret; + + return sysfs_emit(buf, "%d\n", freq); +} + +static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr, + const char *buff, size_t count) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + ssize_t ret; + + ret = kstrtou32(buff, 0, &freq); + if (ret) + return ret; + + ret = xe_guc_pc_set_max_freq(pc, freq); + if (ret) + return ret; + + return count; +} +static DEVICE_ATTR_RW(max_freq); + +static const struct attribute *freq_attrs[] = { + &dev_attr_act_freq.attr, + &dev_attr_cur_freq.attr, + &dev_attr_rp0_freq.attr, + &dev_attr_rpe_freq.attr, + &dev_attr_rpn_freq.attr, + &dev_attr_min_freq.attr, + &dev_attr_max_freq.attr, + NULL +}; + +static void freq_fini(struct drm_device *drm, void *arg) +{ + struct kobject *kobj = arg; + + sysfs_remove_files(kobj, freq_attrs); + kobject_put(kobj); +} + +/** + * xe_gt_freq_init - Initialize Xe Freq component + * @gt: Xe GT object + * + * It needs to be initialized after GT Sysfs and GuC PC components are ready. + */ +void xe_gt_freq_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + gt->freq = kobject_create_and_add("freq0", gt->sysfs); + if (!gt->freq) { + drm_warn(&xe->drm, "failed to add freq0 directory to %s\n", + kobject_name(gt->sysfs)); + return; + } + + err = drmm_add_action_or_reset(&xe->drm, freq_fini, gt->freq); + if (err) { + drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); + return; + } + + err = sysfs_create_files(gt->freq, freq_attrs); + if (err) + drm_warn(&xe->drm, "failed to add freq attrs to %s, err: %d\n", + kobject_name(gt->freq), err); + + xe_gt_throttle_sysfs_init(gt); +} diff --git a/drivers/gpu/drm/xe/xe_gt_freq.h b/drivers/gpu/drm/xe/xe_gt_freq.h new file mode 100644 index 000000000000..f3fe3c90491a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_freq.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_FREQ_H_ +#define _XE_GT_FREQ_H_ + +struct xe_gt; + +void xe_gt_freq_init(struct xe_gt *gt); + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c new file mode 100644 index 000000000000..9358f7336889 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_idle.h" +#include "xe_gt_sysfs.h" +#include "xe_guc_pc.h" +#include "regs/xe_gt_regs.h" +#include "xe_mmio.h" + +/** + * DOC: Xe GT Idle + * + * Contains functions that init GT idle features like C6 + * + * device/gt#/gtidle/name - name of the state + * device/gt#/gtidle/idle_residency_ms - Provides residency of the idle state in ms + * device/gt#/gtidle/idle_status - Provides current idle state + */ + +static struct xe_gt_idle *dev_to_gtidle(struct device *dev) +{ + struct kobject *kobj = &dev->kobj; + + return &kobj_to_gt(kobj->parent)->gtidle; +} + +static struct xe_gt *gtidle_to_gt(struct xe_gt_idle *gtidle) +{ + return container_of(gtidle, struct xe_gt, gtidle); +} + +static struct xe_guc_pc *gtidle_to_pc(struct xe_gt_idle *gtidle) +{ + return >idle_to_gt(gtidle)->uc.guc.pc; +} + +static const char *gt_idle_state_to_string(enum xe_gt_idle_state state) +{ + switch (state) { + case GT_IDLE_C0: + return "gt-c0"; + case GT_IDLE_C6: + return "gt-c6"; + default: + return "unknown"; + } +} + +static u64 get_residency_ms(struct xe_gt_idle *gtidle, u64 cur_residency) +{ + u64 delta, overflow_residency, prev_residency; + + overflow_residency = BIT_ULL(32); + + /* + * Counter wrap handling + * Store previous hw counter values for counter wrap-around handling + * Relying on sufficient frequency of queries otherwise counters can still wrap. + */ + prev_residency = gtidle->prev_residency; + gtidle->prev_residency = cur_residency; + + /* delta */ + if (cur_residency >= prev_residency) + delta = cur_residency - prev_residency; + else + delta = cur_residency + (overflow_residency - prev_residency); + + /* Add delta to extended raw driver copy of idle residency */ + cur_residency = gtidle->cur_residency + delta; + gtidle->cur_residency = cur_residency; + + /* residency multiplier in ns, convert to ms */ + cur_residency = mul_u64_u32_div(cur_residency, gtidle->residency_multiplier, 1e6); + + return cur_residency; +} + +static ssize_t name_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + struct xe_gt_idle *gtidle = dev_to_gtidle(dev); + + return sysfs_emit(buff, "%s\n", gtidle->name); +} +static DEVICE_ATTR_RO(name); + +static ssize_t idle_status_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + struct xe_gt_idle *gtidle = dev_to_gtidle(dev); + struct xe_guc_pc *pc = gtidle_to_pc(gtidle); + enum xe_gt_idle_state state; + + state = gtidle->idle_status(pc); + + return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state)); +} +static DEVICE_ATTR_RO(idle_status); + +static ssize_t idle_residency_ms_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + struct xe_gt_idle *gtidle = dev_to_gtidle(dev); + struct xe_guc_pc *pc = gtidle_to_pc(gtidle); + u64 residency; + + residency = gtidle->idle_residency(pc); + return sysfs_emit(buff, "%llu\n", get_residency_ms(gtidle, residency)); +} +static DEVICE_ATTR_RO(idle_residency_ms); + +static const struct attribute *gt_idle_attrs[] = { + &dev_attr_name.attr, + &dev_attr_idle_status.attr, + &dev_attr_idle_residency_ms.attr, + NULL, +}; + +static void gt_idle_sysfs_fini(struct drm_device *drm, void *arg) +{ + struct kobject *kobj = arg; + + sysfs_remove_files(kobj, gt_idle_attrs); + kobject_put(kobj); +} + +void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle) +{ + struct xe_gt *gt = gtidle_to_gt(gtidle); + struct xe_device *xe = gt_to_xe(gt); + struct kobject *kobj; + int err; + + kobj = kobject_create_and_add("gtidle", gt->sysfs); + if (!kobj) { + drm_warn(&xe->drm, "%s failed, err: %d\n", __func__, -ENOMEM); + return; + } + + if (xe_gt_is_media_type(gt)) { + sprintf(gtidle->name, "gt%d-mc\n", gt->info.id); + gtidle->idle_residency = xe_guc_pc_mc6_residency; + } else { + sprintf(gtidle->name, "gt%d-rc\n", gt->info.id); + gtidle->idle_residency = xe_guc_pc_rc6_residency; + } + + /* Multiplier for Residency counter in units of 1.28us */ + gtidle->residency_multiplier = 1280; + gtidle->idle_status = xe_guc_pc_c_status; + + err = sysfs_create_files(kobj, gt_idle_attrs); + if (err) { + kobject_put(kobj); + drm_warn(&xe->drm, "failed to register gtidle sysfs, err: %d\n", err); + return; + } + + err = drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, kobj); + if (err) + drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); +} + +void xe_gt_idle_enable_c6(struct xe_gt *gt) +{ + xe_device_assert_mem_access(gt_to_xe(gt)); + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + /* Units of 1280 ns for a total of 5s */ + xe_mmio_write32(gt, RC_IDLE_HYSTERSIS, 0x3B9ACA); + /* Enable RC6 */ + xe_mmio_write32(gt, RC_CONTROL, + RC_CTL_HW_ENABLE | RC_CTL_TO_MODE | RC_CTL_RC6_ENABLE); +} + +void xe_gt_idle_disable_c6(struct xe_gt *gt) +{ + xe_device_assert_mem_access(gt_to_xe(gt)); + xe_force_wake_assert_held(gt_to_fw(gt), XE_FORCEWAKE_ALL); + + xe_mmio_write32(gt, PG_ENABLE, 0); + xe_mmio_write32(gt, RC_CONTROL, 0); + xe_mmio_write32(gt, RC_STATE, 0); +} diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h new file mode 100644 index 000000000000..69280fd16b03 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_idle.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_IDLE_H_ +#define _XE_GT_IDLE_H_ + +#include "xe_gt_idle_types.h" + +struct xe_gt; + +void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle); +void xe_gt_idle_enable_c6(struct xe_gt *gt); +void xe_gt_idle_disable_c6(struct xe_gt *gt); + +#endif /* _XE_GT_IDLE_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_idle_types.h b/drivers/gpu/drm/xe/xe_gt_idle_types.h new file mode 100644 index 000000000000..f99b447534f3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_idle_types.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_IDLE_SYSFS_TYPES_H_ +#define _XE_GT_IDLE_SYSFS_TYPES_H_ + +#include <linux/types.h> + +struct xe_guc_pc; + +/* States of GT Idle */ +enum xe_gt_idle_state { + GT_IDLE_C0, + GT_IDLE_C6, + GT_IDLE_UNKNOWN, +}; + +/** + * struct xe_gt_idle - A struct that contains idle properties based of gt + */ +struct xe_gt_idle { + /** @name: name */ + char name[16]; + /** @residency_multiplier: residency multiplier in ns */ + u32 residency_multiplier; + /** @cur_residency: raw driver copy of idle residency */ + u64 cur_residency; + /** @prev_residency: previous residency counter */ + u64 prev_residency; + /** @idle_status: get the current idle state */ + enum xe_gt_idle_state (*idle_status)(struct xe_guc_pc *pc); + /** @idle_residency: get idle residency counter */ + u64 (*idle_residency)(struct xe_guc_pc *pc); +}; + +#endif /* _XE_GT_IDLE_SYSFS_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c new file mode 100644 index 000000000000..77925b35cf8d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -0,0 +1,685 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_gt_mcr.h" + +#include "regs/xe_gt_regs.h" +#include "xe_gt.h" +#include "xe_gt_topology.h" +#include "xe_gt_types.h" +#include "xe_mmio.h" + +/** + * DOC: GT Multicast/Replicated (MCR) Register Support + * + * Some GT registers are designed as "multicast" or "replicated" registers: + * multiple instances of the same register share a single MMIO offset. MCR + * registers are generally used when the hardware needs to potentially track + * independent values of a register per hardware unit (e.g., per-subslice, + * per-L3bank, etc.). The specific types of replication that exist vary + * per-platform. + * + * MMIO accesses to MCR registers are controlled according to the settings + * programmed in the platform's MCR_SELECTOR register(s). MMIO writes to MCR + * registers can be done in either multicast (a single write updates all + * instances of the register to the same value) or unicast (a write updates only + * one specific instance) form. Reads of MCR registers always operate in a + * unicast manner regardless of how the multicast/unicast bit is set in + * MCR_SELECTOR. Selection of a specific MCR instance for unicast operations is + * referred to as "steering." + * + * If MCR register operations are steered toward a hardware unit that is + * fused off or currently powered down due to power gating, the MMIO operation + * is "terminated" by the hardware. Terminated read operations will return a + * value of zero and terminated unicast write operations will be silently + * ignored. During device initialization, the goal of the various + * ``init_steering_*()`` functions is to apply the platform-specific rules for + * each MCR register type to identify a steering target that will select a + * non-terminated instance. + */ + +#define STEER_SEMAPHORE XE_REG(0xFD0) + +static inline struct xe_reg to_xe_reg(struct xe_reg_mcr reg_mcr) +{ + return reg_mcr.__reg; +} + +enum { + MCR_OP_READ, + MCR_OP_WRITE +}; + +static const struct xe_mmio_range xelp_l3bank_steering_table[] = { + { 0x00B100, 0x00B3FF }, + {}, +}; + +static const struct xe_mmio_range xehp_l3bank_steering_table[] = { + { 0x008C80, 0x008CFF }, + { 0x00B100, 0x00B3FF }, + {}, +}; + +/* + * Although the bspec lists more "MSLICE" ranges than shown here, some of those + * are of a "GAM" subclass that has special rules and doesn't need to be + * included here. + */ +static const struct xe_mmio_range xehp_mslice_steering_table[] = { + { 0x00DD00, 0x00DDFF }, + { 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */ + {}, +}; + +static const struct xe_mmio_range xehp_lncf_steering_table[] = { + { 0x00B000, 0x00B0FF }, + { 0x00D880, 0x00D8FF }, + {}, +}; + +/* + * We have several types of MCR registers where steering to (0,0) will always + * provide us with a non-terminated value. We'll stick them all in the same + * table for simplicity. + */ +static const struct xe_mmio_range xehpc_instance0_steering_table[] = { + { 0x004000, 0x004AFF }, /* HALF-BSLICE */ + { 0x008800, 0x00887F }, /* CC */ + { 0x008A80, 0x008AFF }, /* TILEPSMI */ + { 0x00B000, 0x00B0FF }, /* HALF-BSLICE */ + { 0x00B100, 0x00B3FF }, /* L3BANK */ + { 0x00C800, 0x00CFFF }, /* HALF-BSLICE */ + { 0x00D800, 0x00D8FF }, /* HALF-BSLICE */ + { 0x00DD00, 0x00DDFF }, /* BSLICE */ + { 0x00E900, 0x00E9FF }, /* HALF-BSLICE */ + { 0x00EC00, 0x00EEFF }, /* HALF-BSLICE */ + { 0x00F000, 0x00FFFF }, /* HALF-BSLICE */ + { 0x024180, 0x0241FF }, /* HALF-BSLICE */ + {}, +}; + +static const struct xe_mmio_range xelpg_instance0_steering_table[] = { + { 0x000B00, 0x000BFF }, /* SQIDI */ + { 0x001000, 0x001FFF }, /* SQIDI */ + { 0x004000, 0x0048FF }, /* GAM */ + { 0x008700, 0x0087FF }, /* SQIDI */ + { 0x00B000, 0x00B0FF }, /* NODE */ + { 0x00C800, 0x00CFFF }, /* GAM */ + { 0x00D880, 0x00D8FF }, /* NODE */ + { 0x00DD00, 0x00DDFF }, /* OAAL2 */ + {}, +}; + +static const struct xe_mmio_range xelpg_l3bank_steering_table[] = { + { 0x00B100, 0x00B3FF }, + {}, +}; + +static const struct xe_mmio_range xelp_dss_steering_table[] = { + { 0x008150, 0x00815F }, + { 0x009520, 0x00955F }, + { 0x00DE80, 0x00E8FF }, + { 0x024A00, 0x024A7F }, + {}, +}; + +/* DSS steering is used for GSLICE ranges as well */ +static const struct xe_mmio_range xehp_dss_steering_table[] = { + { 0x005200, 0x0052FF }, /* GSLICE */ + { 0x005400, 0x007FFF }, /* GSLICE */ + { 0x008140, 0x00815F }, /* GSLICE (0x8140-0x814F), DSS (0x8150-0x815F) */ + { 0x008D00, 0x008DFF }, /* DSS */ + { 0x0094D0, 0x00955F }, /* GSLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */ + { 0x009680, 0x0096FF }, /* DSS */ + { 0x00D800, 0x00D87F }, /* GSLICE */ + { 0x00DC00, 0x00DCFF }, /* GSLICE */ + { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved ) */ + { 0x017000, 0x017FFF }, /* GSLICE */ + { 0x024A00, 0x024A7F }, /* DSS */ + {}, +}; + +/* DSS steering is used for COMPUTE ranges as well */ +static const struct xe_mmio_range xehpc_dss_steering_table[] = { + { 0x008140, 0x00817F }, /* COMPUTE (0x8140-0x814F & 0x8160-0x817F), DSS (0x8150-0x815F) */ + { 0x0094D0, 0x00955F }, /* COMPUTE (0x94D0-0x951F), DSS (0x9520-0x955F) */ + { 0x009680, 0x0096FF }, /* DSS */ + { 0x00DC00, 0x00DCFF }, /* COMPUTE */ + { 0x00DE80, 0x00E7FF }, /* DSS (0xDF00-0xE1FF reserved ) */ + {}, +}; + +/* DSS steering is used for SLICE ranges as well */ +static const struct xe_mmio_range xelpg_dss_steering_table[] = { + { 0x005200, 0x0052FF }, /* SLICE */ + { 0x005500, 0x007FFF }, /* SLICE */ + { 0x008140, 0x00815F }, /* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */ + { 0x0094D0, 0x00955F }, /* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */ + { 0x009680, 0x0096FF }, /* DSS */ + { 0x00D800, 0x00D87F }, /* SLICE */ + { 0x00DC00, 0x00DCFF }, /* SLICE */ + { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */ + {}, +}; + +static const struct xe_mmio_range xelpmp_oaddrm_steering_table[] = { + { 0x393200, 0x39323F }, + { 0x393400, 0x3934FF }, + {}, +}; + +static const struct xe_mmio_range dg2_implicit_steering_table[] = { + { 0x000B00, 0x000BFF }, /* SF (SQIDI replication) */ + { 0x001000, 0x001FFF }, /* SF (SQIDI replication) */ + { 0x004000, 0x004AFF }, /* GAM (MSLICE replication) */ + { 0x008700, 0x0087FF }, /* MCFG (SQIDI replication) */ + { 0x00C800, 0x00CFFF }, /* GAM (MSLICE replication) */ + { 0x00F000, 0x00FFFF }, /* GAM (MSLICE replication) */ + {}, +}; + +static const struct xe_mmio_range xe2lpg_dss_steering_table[] = { + { 0x005200, 0x0052FF }, /* SLICE */ + { 0x005500, 0x007FFF }, /* SLICE */ + { 0x008140, 0x00815F }, /* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */ + { 0x0094D0, 0x00955F }, /* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */ + { 0x009680, 0x0096FF }, /* DSS */ + { 0x00D800, 0x00D87F }, /* SLICE */ + { 0x00DC00, 0x00DCFF }, /* SLICE */ + { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */ + { 0x00E980, 0x00E9FF }, /* SLICE */ + { 0x013000, 0x0133FF }, /* DSS (0x13000-0x131FF), SLICE (0x13200-0x133FF) */ + {}, +}; + +static const struct xe_mmio_range xe2lpg_sqidi_psmi_steering_table[] = { + { 0x000B00, 0x000BFF }, + { 0x001000, 0x001FFF }, + {}, +}; + +static const struct xe_mmio_range xe2lpg_instance0_steering_table[] = { + { 0x004000, 0x004AFF }, /* GAM, rsvd, GAMWKR */ + { 0x008700, 0x00887F }, /* SQIDI, MEMPIPE */ + { 0x00B000, 0x00B3FF }, /* NODE, L3BANK */ + { 0x00C800, 0x00CFFF }, /* GAM */ + { 0x00D880, 0x00D8FF }, /* NODE */ + { 0x00DD00, 0x00DDFF }, /* MEMPIPE */ + { 0x00E900, 0x00E97F }, /* MEMPIPE */ + { 0x00F000, 0x00FFFF }, /* GAM, GAMWKR */ + { 0x013400, 0x0135FF }, /* MEMPIPE */ + {}, +}; + +static const struct xe_mmio_range xe2lpm_gpmxmt_steering_table[] = { + { 0x388160, 0x38817F }, + { 0x389480, 0x3894CF }, + {}, +}; + +static const struct xe_mmio_range xe2lpm_instance0_steering_table[] = { + { 0x384000, 0x3847DF }, /* GAM, rsvd, GAM */ + { 0x384900, 0x384AFF }, /* GAM */ + { 0x389560, 0x3895FF }, /* MEDIAINF */ + { 0x38B600, 0x38B8FF }, /* L3BANK */ + { 0x38C800, 0x38D07F }, /* GAM, MEDIAINF */ + { 0x38F000, 0x38F0FF }, /* GAM */ + { 0x393C00, 0x393C7F }, /* MEDIAINF */ + {}, +}; + +static void init_steering_l3bank(struct xe_gt *gt) +{ + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { + u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK, + xe_mmio_read32(gt, MIRROR_FUSE3)); + u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK, + xe_mmio_read32(gt, XEHP_FUSE4)); + + /* + * Group selects mslice, instance selects bank within mslice. + * Bank 0 is always valid _except_ when the bank mask is 010b. + */ + gt->steering[L3BANK].group_target = __ffs(mslice_mask); + gt->steering[L3BANK].instance_target = + bank_mask & BIT(0) ? 0 : 2; + } else if (gt_to_xe(gt)->info.platform == XE_DG2) { + u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK, + xe_mmio_read32(gt, MIRROR_FUSE3)); + u32 bank = __ffs(mslice_mask) * 8; + + /* + * Like mslice registers, look for a valid mslice and steer to + * the first L3BANK of that quad. Access to the Nth L3 bank is + * split between the first bits of group and instance + */ + gt->steering[L3BANK].group_target = (bank >> 2) & 0x7; + gt->steering[L3BANK].instance_target = bank & 0x3; + } else { + u32 fuse = REG_FIELD_GET(L3BANK_MASK, + ~xe_mmio_read32(gt, MIRROR_FUSE3)); + + gt->steering[L3BANK].group_target = 0; /* unused */ + gt->steering[L3BANK].instance_target = __ffs(fuse); + } +} + +static void init_steering_mslice(struct xe_gt *gt) +{ + u32 mask = REG_FIELD_GET(MEML3_EN_MASK, + xe_mmio_read32(gt, MIRROR_FUSE3)); + + /* + * mslice registers are valid (not terminated) if either the meml3 + * associated with the mslice is present, or at least one DSS associated + * with the mslice is present. There will always be at least one meml3 + * so we can just use that to find a non-terminated mslice and ignore + * the DSS fusing. + */ + gt->steering[MSLICE].group_target = __ffs(mask); + gt->steering[MSLICE].instance_target = 0; /* unused */ + + /* + * LNCF termination is also based on mslice presence, so we'll set + * it up here. Either LNCF within a non-terminated mslice will work, + * so we just always pick LNCF 0 here. + */ + gt->steering[LNCF].group_target = __ffs(mask) << 1; + gt->steering[LNCF].instance_target = 0; /* unused */ +} + +static void init_steering_dss(struct xe_gt *gt) +{ + unsigned int dss = min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0), + xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0)); + unsigned int dss_per_grp = gt_to_xe(gt)->info.platform == XE_PVC ? 8 : 4; + + gt->steering[DSS].group_target = dss / dss_per_grp; + gt->steering[DSS].instance_target = dss % dss_per_grp; +} + +static void init_steering_oaddrm(struct xe_gt *gt) +{ + /* + * First instance is only terminated if the entire first media slice + * is absent (i.e., no VCS0 or VECS0). + */ + if (gt->info.engine_mask & (XE_HW_ENGINE_VCS0 | XE_HW_ENGINE_VECS0)) + gt->steering[OADDRM].group_target = 0; + else + gt->steering[OADDRM].group_target = 1; + + gt->steering[DSS].instance_target = 0; /* unused */ +} + +static void init_steering_sqidi_psmi(struct xe_gt *gt) +{ + u32 mask = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, + xe_mmio_read32(gt, MIRROR_FUSE3)); + u32 select = __ffs(mask); + + gt->steering[SQIDI_PSMI].group_target = select >> 1; + gt->steering[SQIDI_PSMI].instance_target = select & 0x1; +} + +static void init_steering_inst0(struct xe_gt *gt) +{ + gt->steering[DSS].group_target = 0; /* unused */ + gt->steering[DSS].instance_target = 0; /* unused */ +} + +static const struct { + const char *name; + void (*init)(struct xe_gt *gt); +} xe_steering_types[] = { + [L3BANK] = { "L3BANK", init_steering_l3bank }, + [MSLICE] = { "MSLICE", init_steering_mslice }, + [LNCF] = { "LNCF", NULL }, /* initialized by mslice init */ + [DSS] = { "DSS", init_steering_dss }, + [OADDRM] = { "OADDRM / GPMXMT", init_steering_oaddrm }, + [SQIDI_PSMI] = { "SQIDI_PSMI", init_steering_sqidi_psmi }, + [INSTANCE0] = { "INSTANCE 0", init_steering_inst0 }, + [IMPLICIT_STEERING] = { "IMPLICIT", NULL }, +}; + +void xe_gt_mcr_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + BUILD_BUG_ON(IMPLICIT_STEERING + 1 != NUM_STEERING_TYPES); + BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES); + + spin_lock_init(>->mcr_lock); + + if (gt->info.type == XE_GT_TYPE_MEDIA) { + drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13); + + if (MEDIA_VER(xe) >= 20) { + gt->steering[OADDRM].ranges = xe2lpm_gpmxmt_steering_table; + gt->steering[INSTANCE0].ranges = xe2lpm_instance0_steering_table; + } else { + gt->steering[OADDRM].ranges = xelpmp_oaddrm_steering_table; + } + } else { + if (GRAPHICS_VER(xe) >= 20) { + gt->steering[DSS].ranges = xe2lpg_dss_steering_table; + gt->steering[SQIDI_PSMI].ranges = xe2lpg_sqidi_psmi_steering_table; + gt->steering[INSTANCE0].ranges = xe2lpg_instance0_steering_table; + } else if (GRAPHICS_VERx100(xe) >= 1270) { + gt->steering[INSTANCE0].ranges = xelpg_instance0_steering_table; + gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table; + gt->steering[DSS].ranges = xelpg_dss_steering_table; + } else if (xe->info.platform == XE_PVC) { + gt->steering[INSTANCE0].ranges = xehpc_instance0_steering_table; + gt->steering[DSS].ranges = xehpc_dss_steering_table; + } else if (xe->info.platform == XE_DG2) { + gt->steering[L3BANK].ranges = xehp_l3bank_steering_table; + gt->steering[MSLICE].ranges = xehp_mslice_steering_table; + gt->steering[LNCF].ranges = xehp_lncf_steering_table; + gt->steering[DSS].ranges = xehp_dss_steering_table; + gt->steering[IMPLICIT_STEERING].ranges = dg2_implicit_steering_table; + } else { + gt->steering[L3BANK].ranges = xelp_l3bank_steering_table; + gt->steering[DSS].ranges = xelp_dss_steering_table; + } + } + + /* Select non-terminated steering target for each type */ + for (int i = 0; i < NUM_STEERING_TYPES; i++) + if (gt->steering[i].ranges && xe_steering_types[i].init) + xe_steering_types[i].init(gt); +} + +/** + * xe_gt_mcr_set_implicit_defaults - Initialize steer control registers + * @gt: GT structure + * + * Some register ranges don't need to have their steering control registers + * changed on each access - it's sufficient to set them once on initialization. + * This function sets those registers for each platform * + */ +void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (xe->info.platform == XE_DG2) { + u32 steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, 0) | + REG_FIELD_PREP(MCR_SUBSLICE_MASK, 2); + + xe_mmio_write32(gt, MCFG_MCR_SELECTOR, steer_val); + xe_mmio_write32(gt, SF_MCR_SELECTOR, steer_val); + /* + * For GAM registers, all reads should be directed to instance 1 + * (unicast reads against other instances are not allowed), + * and instance 1 is already the hardware's default steering + * target, which we never change + */ + } +} + +/* + * xe_gt_mcr_get_nonterminated_steering - find group/instance values that + * will steer a register to a non-terminated instance + * @gt: GT structure + * @reg: register for which the steering is required + * @group: return variable for group steering + * @instance: return variable for instance steering + * + * This function returns a group/instance pair that is guaranteed to work for + * read steering of the given register. Note that a value will be returned even + * if the register is not replicated and therefore does not actually require + * steering. + * + * Returns true if the caller should steer to the @group/@instance values + * returned. Returns false if the caller need not perform any steering + */ +static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, + struct xe_reg_mcr reg_mcr, + u8 *group, u8 *instance) +{ + const struct xe_reg reg = to_xe_reg(reg_mcr); + const struct xe_mmio_range *implicit_ranges; + + for (int type = 0; type < IMPLICIT_STEERING; type++) { + if (!gt->steering[type].ranges) + continue; + + for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) { + if (xe_mmio_in_range(gt, >->steering[type].ranges[i], reg)) { + *group = gt->steering[type].group_target; + *instance = gt->steering[type].instance_target; + return true; + } + } + } + + implicit_ranges = gt->steering[IMPLICIT_STEERING].ranges; + if (implicit_ranges) + for (int i = 0; implicit_ranges[i].end > 0; i++) + if (xe_mmio_in_range(gt, &implicit_ranges[i], reg)) + return false; + + /* + * Not found in a steering table and not a register with implicit + * steering. Just steer to 0/0 as a guess and raise a warning. + */ + drm_WARN(>_to_xe(gt)->drm, true, + "Did not find MCR register %#x in any MCR steering table\n", + reg.addr); + *group = 0; + *instance = 0; + + return true; +} + +/* + * Obtain exclusive access to MCR steering. On MTL and beyond we also need + * to synchronize with external clients (e.g., firmware), so a semaphore + * register will also need to be taken. + */ +static void mcr_lock(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int ret = 0; + + spin_lock(>->mcr_lock); + + /* + * Starting with MTL we also need to grab a semaphore register + * to synchronize with external agents (e.g., firmware) that now + * shares the same steering control register. The semaphore is obtained + * when a read to the relevant register returns 1. + */ + if (GRAPHICS_VERx100(xe) >= 1270) + ret = xe_mmio_wait32(gt, STEER_SEMAPHORE, 0x1, 0x1, 10, NULL, + true); + + drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT); +} + +static void mcr_unlock(struct xe_gt *gt) +{ + /* Release hardware semaphore - this is done by writing 1 to the register */ + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) + xe_mmio_write32(gt, STEER_SEMAPHORE, 0x1); + + spin_unlock(>->mcr_lock); +} + +/* + * Access a register with specific MCR steering + * + * Caller needs to make sure the relevant forcewake wells are up. + */ +static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr, + u8 rw_flag, int group, int instance, u32 value) +{ + const struct xe_reg reg = to_xe_reg(reg_mcr); + struct xe_reg steer_reg; + u32 steer_val, val = 0; + + lockdep_assert_held(>->mcr_lock); + + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { + steer_reg = MTL_MCR_SELECTOR; + steer_val = REG_FIELD_PREP(MTL_MCR_GROUPID, group) | + REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance); + } else { + steer_reg = MCR_SELECTOR; + steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, group) | + REG_FIELD_PREP(MCR_SUBSLICE_MASK, instance); + } + + /* + * Always leave the hardware in multicast mode when doing reads and only + * change it to unicast mode when doing writes of a specific instance. + * + * The setting of the multicast/unicast bit usually wouldn't matter for + * read operations (which always return the value from a single register + * instance regardless of how that bit is set), but some platforms may + * have workarounds requiring us to remain in multicast mode for reads, + * e.g. Wa_22013088509 on PVC. There's no real downside to this, so + * we'll just go ahead and do so on all platforms; we'll only clear the + * multicast bit from the mask when explicitly doing a write operation. + * + * No need to save old steering reg value. + */ + if (rw_flag == MCR_OP_READ) + steer_val |= MCR_MULTICAST; + + xe_mmio_write32(gt, steer_reg, steer_val); + + if (rw_flag == MCR_OP_READ) + val = xe_mmio_read32(gt, reg); + else + xe_mmio_write32(gt, reg, value); + + /* + * If we turned off the multicast bit (during a write) we're required + * to turn it back on before finishing. The group and instance values + * don't matter since they'll be re-programmed on the next MCR + * operation. + */ + if (rw_flag == MCR_OP_WRITE) + xe_mmio_write32(gt, steer_reg, MCR_MULTICAST); + + return val; +} + +/** + * xe_gt_mcr_unicast_read_any - reads a non-terminated instance of an MCR register + * @gt: GT structure + * @reg_mcr: register to read + * + * Reads a GT MCR register. The read will be steered to a non-terminated + * instance (i.e., one that isn't fused off or powered down by power gating). + * This function assumes the caller is already holding any necessary forcewake + * domains. + * + * Returns the value from a non-terminated instance of @reg. + */ +u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, struct xe_reg_mcr reg_mcr) +{ + const struct xe_reg reg = to_xe_reg(reg_mcr); + u8 group, instance; + u32 val; + bool steer; + + steer = xe_gt_mcr_get_nonterminated_steering(gt, reg_mcr, + &group, &instance); + + if (steer) { + mcr_lock(gt); + val = rw_with_mcr_steering(gt, reg_mcr, MCR_OP_READ, + group, instance, 0); + mcr_unlock(gt); + } else { + val = xe_mmio_read32(gt, reg); + } + + return val; +} + +/** + * xe_gt_mcr_unicast_read - read a specific instance of an MCR register + * @gt: GT structure + * @reg_mcr: the MCR register to read + * @group: the MCR group + * @instance: the MCR instance + * + * Returns the value read from an MCR register after steering toward a specific + * group/instance. + */ +u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, + struct xe_reg_mcr reg_mcr, + int group, int instance) +{ + u32 val; + + mcr_lock(gt); + val = rw_with_mcr_steering(gt, reg_mcr, MCR_OP_READ, group, instance, 0); + mcr_unlock(gt); + + return val; +} + +/** + * xe_gt_mcr_unicast_write - write a specific instance of an MCR register + * @gt: GT structure + * @reg_mcr: the MCR register to write + * @value: value to write + * @group: the MCR group + * @instance: the MCR instance + * + * Write an MCR register in unicast mode after steering toward a specific + * group/instance. + */ +void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr, + u32 value, int group, int instance) +{ + mcr_lock(gt); + rw_with_mcr_steering(gt, reg_mcr, MCR_OP_WRITE, group, instance, value); + mcr_unlock(gt); +} + +/** + * xe_gt_mcr_multicast_write - write a value to all instances of an MCR register + * @gt: GT structure + * @reg_mcr: the MCR register to write + * @value: value to write + * + * Write an MCR register in multicast mode to update all instances. + */ +void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr, + u32 value) +{ + struct xe_reg reg = to_xe_reg(reg_mcr); + + /* + * Synchronize with any unicast operations. Once we have exclusive + * access, the MULTICAST bit should already be set, so there's no need + * to touch the steering register. + */ + mcr_lock(gt); + xe_mmio_write32(gt, reg, value); + mcr_unlock(gt); +} + +void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p) +{ + for (int i = 0; i < NUM_STEERING_TYPES; i++) { + if (gt->steering[i].ranges) { + drm_printf(p, "%s steering: group=%#x, instance=%#x\n", + xe_steering_types[i].name, + gt->steering[i].group_target, + gt->steering[i].instance_target); + for (int j = 0; gt->steering[i].ranges[j].end; j++) + drm_printf(p, "\t0x%06x - 0x%06x\n", + gt->steering[i].ranges[j].start, + gt->steering[i].ranges[j].end); + } + } +} diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h new file mode 100644 index 000000000000..27ca1bc880a0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_MCR_H_ +#define _XE_GT_MCR_H_ + +#include "regs/xe_reg_defs.h" + +struct drm_printer; +struct xe_gt; + +void xe_gt_mcr_init(struct xe_gt *gt); + +void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt); + +u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, struct xe_reg_mcr mcr_reg, + int group, int instance); +u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, struct xe_reg_mcr mcr_reg); + +void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg, + u32 value, int group, int instance); +void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg, + u32 value); + +void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p); + +#endif /* _XE_GT_MCR_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c new file mode 100644 index 000000000000..59a70d2e0a7a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -0,0 +1,646 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_gt_pagefault.h" + +#include <linux/bitfield.h> +#include <linux/circ_buf.h> + +#include <drm/drm_exec.h> +#include <drm/drm_managed.h> +#include <drm/ttm/ttm_execbuf_util.h> + +#include "abi/guc_actions_abi.h" +#include "xe_bo.h" +#include "xe_gt.h" +#include "xe_gt_tlb_invalidation.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_migrate.h" +#include "xe_pt.h" +#include "xe_trace.h" +#include "xe_vm.h" + +struct pagefault { + u64 page_addr; + u32 asid; + u16 pdata; + u8 vfid; + u8 access_type; + u8 fault_type; + u8 fault_level; + u8 engine_class; + u8 engine_instance; + u8 fault_unsuccessful; + bool trva_fault; +}; + +enum access_type { + ACCESS_TYPE_READ = 0, + ACCESS_TYPE_WRITE = 1, + ACCESS_TYPE_ATOMIC = 2, + ACCESS_TYPE_RESERVED = 3, +}; + +enum fault_type { + NOT_PRESENT = 0, + WRITE_ACCESS_VIOLATION = 1, + ATOMIC_ACCESS_VIOLATION = 2, +}; + +struct acc { + u64 va_range_base; + u32 asid; + u32 sub_granularity; + u8 granularity; + u8 vfid; + u8 access_type; + u8 engine_class; + u8 engine_instance; +}; + +static bool access_is_atomic(enum access_type access_type) +{ + return access_type == ACCESS_TYPE_ATOMIC; +} + +static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma) +{ + return BIT(tile->id) & vma->tile_present && + !(BIT(tile->id) & vma->usm.tile_invalidated); +} + +static bool vma_matches(struct xe_vma *vma, u64 page_addr) +{ + if (page_addr > xe_vma_end(vma) - 1 || + page_addr + SZ_4K - 1 < xe_vma_start(vma)) + return false; + + return true; +} + +static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr) +{ + struct xe_vma *vma = NULL; + + if (vm->usm.last_fault_vma) { /* Fast lookup */ + if (vma_matches(vm->usm.last_fault_vma, page_addr)) + vma = vm->usm.last_fault_vma; + } + if (!vma) + vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); + + return vma; +} + +static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, + bool atomic, unsigned int id) +{ + struct xe_bo *bo = xe_vma_bo(vma); + struct xe_vm *vm = xe_vma_vm(vma); + unsigned int num_shared = 2; /* slots for bind + move */ + int err; + + err = xe_vm_prepare_vma(exec, vma, num_shared); + if (err) + return err; + + if (atomic && IS_DGFX(vm->xe)) { + if (xe_vma_is_userptr(vma)) { + err = -EACCES; + return err; + } + + /* Migrate to VRAM, move should invalidate the VMA first */ + err = xe_bo_migrate(bo, XE_PL_VRAM0 + id); + if (err) + return err; + } else if (bo) { + /* Create backing store if needed */ + err = xe_bo_validate(bo, vm, true); + if (err) + return err; + } + + return 0; +} + +static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_tile *tile = gt_to_tile(gt); + struct drm_exec exec; + struct xe_vm *vm; + struct xe_vma *vma = NULL; + struct dma_fence *fence; + bool write_locked; + int ret = 0; + bool atomic; + + /* SW isn't expected to handle TRTT faults */ + if (pf->trva_fault) + return -EFAULT; + + /* ASID to VM */ + mutex_lock(&xe->usm.lock); + vm = xa_load(&xe->usm.asid_to_vm, pf->asid); + if (vm) + xe_vm_get(vm); + mutex_unlock(&xe->usm.lock); + if (!vm || !xe_vm_in_fault_mode(vm)) + return -EINVAL; + +retry_userptr: + /* + * TODO: Avoid exclusive lock if VM doesn't have userptrs, or + * start out read-locked? + */ + down_write(&vm->lock); + write_locked = true; + vma = lookup_vma(vm, pf->page_addr); + if (!vma) { + ret = -EINVAL; + goto unlock_vm; + } + + if (!xe_vma_is_userptr(vma) || !xe_vma_userptr_check_repin(vma)) { + downgrade_write(&vm->lock); + write_locked = false; + } + + trace_xe_vma_pagefault(vma); + + atomic = access_is_atomic(pf->access_type); + + /* Check if VMA is valid */ + if (vma_is_valid(tile, vma) && !atomic) + goto unlock_vm; + + /* TODO: Validate fault */ + + if (xe_vma_is_userptr(vma) && write_locked) { + spin_lock(&vm->userptr.invalidated_lock); + list_del_init(&vma->userptr.invalidate_link); + spin_unlock(&vm->userptr.invalidated_lock); + + ret = xe_vma_userptr_pin_pages(vma); + if (ret) + goto unlock_vm; + + downgrade_write(&vm->lock); + write_locked = false; + } + + /* Lock VM and BOs dma-resv */ + drm_exec_init(&exec, 0, 0); + drm_exec_until_all_locked(&exec) { + ret = xe_pf_begin(&exec, vma, atomic, tile->id); + drm_exec_retry_on_contention(&exec); + if (ret) + goto unlock_dma_resv; + } + + /* Bind VMA only to the GT that has faulted */ + trace_xe_vma_pf_bind(vma); + fence = __xe_pt_bind_vma(tile, vma, xe_tile_migrate_engine(tile), NULL, 0, + vma->tile_present & BIT(tile->id)); + if (IS_ERR(fence)) { + ret = PTR_ERR(fence); + goto unlock_dma_resv; + } + + /* + * XXX: Should we drop the lock before waiting? This only helps if doing + * GPU binds which is currently only done if we have to wait for more + * than 10ms on a move. + */ + dma_fence_wait(fence, false); + dma_fence_put(fence); + + if (xe_vma_is_userptr(vma)) + ret = xe_vma_userptr_check_repin(vma); + vma->usm.tile_invalidated &= ~BIT(tile->id); + +unlock_dma_resv: + drm_exec_fini(&exec); +unlock_vm: + if (!ret) + vm->usm.last_fault_vma = vma; + if (write_locked) + up_write(&vm->lock); + else + up_read(&vm->lock); + if (ret == -EAGAIN) + goto retry_userptr; + + if (!ret) { + ret = xe_gt_tlb_invalidation_vma(gt, NULL, vma); + if (ret >= 0) + ret = 0; + } + xe_vm_put(vm); + + return ret; +} + +static int send_pagefault_reply(struct xe_guc *guc, + struct xe_guc_pagefault_reply *reply) +{ + u32 action[] = { + XE_GUC_ACTION_PAGE_FAULT_RES_DESC, + reply->dw0, + reply->dw1, + }; + + return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); +} + +static void print_pagefault(struct xe_device *xe, struct pagefault *pf) +{ + drm_dbg(&xe->drm, "\n\tASID: %d\n" + "\tVFID: %d\n" + "\tPDATA: 0x%04x\n" + "\tFaulted Address: 0x%08x%08x\n" + "\tFaultType: %d\n" + "\tAccessType: %d\n" + "\tFaultLevel: %d\n" + "\tEngineClass: %d\n" + "\tEngineInstance: %d\n", + pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr), + lower_32_bits(pf->page_addr), + pf->fault_type, pf->access_type, pf->fault_level, + pf->engine_class, pf->engine_instance); +} + +#define PF_MSG_LEN_DW 4 + +static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) +{ + const struct xe_guc_pagefault_desc *desc; + bool ret = false; + + spin_lock_irq(&pf_queue->lock); + if (pf_queue->head != pf_queue->tail) { + desc = (const struct xe_guc_pagefault_desc *) + (pf_queue->data + pf_queue->head); + + pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0); + pf->trva_fault = FIELD_GET(XE2_PFD_TRVA_FAULT, desc->dw0); + pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0); + pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0); + pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) << + PFD_PDATA_HI_SHIFT; + pf->pdata |= FIELD_GET(PFD_PDATA_LO, desc->dw0); + pf->asid = FIELD_GET(PFD_ASID, desc->dw1); + pf->vfid = FIELD_GET(PFD_VFID, desc->dw2); + pf->access_type = FIELD_GET(PFD_ACCESS_TYPE, desc->dw2); + pf->fault_type = FIELD_GET(PFD_FAULT_TYPE, desc->dw2); + pf->page_addr = (u64)(FIELD_GET(PFD_VIRTUAL_ADDR_HI, desc->dw3)) << + PFD_VIRTUAL_ADDR_HI_SHIFT; + pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) << + PFD_VIRTUAL_ADDR_LO_SHIFT; + + pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) % + PF_QUEUE_NUM_DW; + ret = true; + } + spin_unlock_irq(&pf_queue->lock); + + return ret; +} + +static bool pf_queue_full(struct pf_queue *pf_queue) +{ + lockdep_assert_held(&pf_queue->lock); + + return CIRC_SPACE(pf_queue->tail, pf_queue->head, PF_QUEUE_NUM_DW) <= + PF_MSG_LEN_DW; +} + +int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); + struct pf_queue *pf_queue; + unsigned long flags; + u32 asid; + bool full; + + if (unlikely(len != PF_MSG_LEN_DW)) + return -EPROTO; + + asid = FIELD_GET(PFD_ASID, msg[1]); + pf_queue = >->usm.pf_queue[asid % NUM_PF_QUEUE]; + + spin_lock_irqsave(&pf_queue->lock, flags); + full = pf_queue_full(pf_queue); + if (!full) { + memcpy(pf_queue->data + pf_queue->tail, msg, len * sizeof(u32)); + pf_queue->tail = (pf_queue->tail + len) % PF_QUEUE_NUM_DW; + queue_work(gt->usm.pf_wq, &pf_queue->worker); + } else { + drm_warn(&xe->drm, "PF Queue full, shouldn't be possible"); + } + spin_unlock_irqrestore(&pf_queue->lock, flags); + + return full ? -ENOSPC : 0; +} + +#define USM_QUEUE_MAX_RUNTIME_MS 20 + +static void pf_queue_work_func(struct work_struct *w) +{ + struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker); + struct xe_gt *gt = pf_queue->gt; + struct xe_device *xe = gt_to_xe(gt); + struct xe_guc_pagefault_reply reply = {}; + struct pagefault pf = {}; + unsigned long threshold; + int ret; + + threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); + + while (get_pagefault(pf_queue, &pf)) { + ret = handle_pagefault(gt, &pf); + if (unlikely(ret)) { + print_pagefault(xe, &pf); + pf.fault_unsuccessful = 1; + drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n", ret); + } + + reply.dw0 = FIELD_PREP(PFR_VALID, 1) | + FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) | + FIELD_PREP(PFR_REPLY, PFR_ACCESS) | + FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) | + FIELD_PREP(PFR_ASID, pf.asid); + + reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) | + FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) | + FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) | + FIELD_PREP(PFR_PDATA, pf.pdata); + + send_pagefault_reply(>->uc.guc, &reply); + + if (time_after(jiffies, threshold) && + pf_queue->head != pf_queue->tail) { + queue_work(gt->usm.pf_wq, w); + break; + } + } +} + +static void acc_queue_work_func(struct work_struct *w); + +int xe_gt_pagefault_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int i; + + if (!xe->info.has_usm) + return 0; + + for (i = 0; i < NUM_PF_QUEUE; ++i) { + gt->usm.pf_queue[i].gt = gt; + spin_lock_init(>->usm.pf_queue[i].lock); + INIT_WORK(>->usm.pf_queue[i].worker, pf_queue_work_func); + } + for (i = 0; i < NUM_ACC_QUEUE; ++i) { + gt->usm.acc_queue[i].gt = gt; + spin_lock_init(>->usm.acc_queue[i].lock); + INIT_WORK(>->usm.acc_queue[i].worker, acc_queue_work_func); + } + + gt->usm.pf_wq = alloc_workqueue("xe_gt_page_fault_work_queue", + WQ_UNBOUND | WQ_HIGHPRI, NUM_PF_QUEUE); + if (!gt->usm.pf_wq) + return -ENOMEM; + + gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue", + WQ_UNBOUND | WQ_HIGHPRI, + NUM_ACC_QUEUE); + if (!gt->usm.acc_wq) + return -ENOMEM; + + return 0; +} + +void xe_gt_pagefault_reset(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int i; + + if (!xe->info.has_usm) + return; + + for (i = 0; i < NUM_PF_QUEUE; ++i) { + spin_lock_irq(>->usm.pf_queue[i].lock); + gt->usm.pf_queue[i].head = 0; + gt->usm.pf_queue[i].tail = 0; + spin_unlock_irq(>->usm.pf_queue[i].lock); + } + + for (i = 0; i < NUM_ACC_QUEUE; ++i) { + spin_lock(>->usm.acc_queue[i].lock); + gt->usm.acc_queue[i].head = 0; + gt->usm.acc_queue[i].tail = 0; + spin_unlock(>->usm.acc_queue[i].lock); + } +} + +static int granularity_in_byte(int val) +{ + switch (val) { + case 0: + return SZ_128K; + case 1: + return SZ_2M; + case 2: + return SZ_16M; + case 3: + return SZ_64M; + default: + return 0; + } +} + +static int sub_granularity_in_byte(int val) +{ + return (granularity_in_byte(val) / 32); +} + +static void print_acc(struct xe_device *xe, struct acc *acc) +{ + drm_warn(&xe->drm, "Access counter request:\n" + "\tType: %s\n" + "\tASID: %d\n" + "\tVFID: %d\n" + "\tEngine: %d:%d\n" + "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n" + "\tSub_Granularity Vector: 0x%08x\n" + "\tVA Range base: 0x%016llx\n", + acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL", + acc->asid, acc->vfid, acc->engine_class, acc->engine_instance, + granularity_in_byte(acc->granularity) / SZ_1K, + sub_granularity_in_byte(acc->granularity) / SZ_1K, + acc->sub_granularity, acc->va_range_base); +} + +static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc) +{ + u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) * + sub_granularity_in_byte(acc->granularity); + + return xe_vm_find_overlapping_vma(vm, page_va, SZ_4K); +} + +static int handle_acc(struct xe_gt *gt, struct acc *acc) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_tile *tile = gt_to_tile(gt); + struct drm_exec exec; + struct xe_vm *vm; + struct xe_vma *vma; + int ret = 0; + + /* We only support ACC_TRIGGER at the moment */ + if (acc->access_type != ACC_TRIGGER) + return -EINVAL; + + /* ASID to VM */ + mutex_lock(&xe->usm.lock); + vm = xa_load(&xe->usm.asid_to_vm, acc->asid); + if (vm) + xe_vm_get(vm); + mutex_unlock(&xe->usm.lock); + if (!vm || !xe_vm_in_fault_mode(vm)) + return -EINVAL; + + down_read(&vm->lock); + + /* Lookup VMA */ + vma = get_acc_vma(vm, acc); + if (!vma) { + ret = -EINVAL; + goto unlock_vm; + } + + trace_xe_vma_acc(vma); + + /* Userptr or null can't be migrated, nothing to do */ + if (xe_vma_has_no_bo(vma)) + goto unlock_vm; + + /* Lock VM and BOs dma-resv */ + drm_exec_init(&exec, 0, 0); + drm_exec_until_all_locked(&exec) { + ret = xe_pf_begin(&exec, vma, true, tile->id); + drm_exec_retry_on_contention(&exec); + if (ret) + break; + } + + drm_exec_fini(&exec); +unlock_vm: + up_read(&vm->lock); + xe_vm_put(vm); + + return ret; +} + +#define make_u64(hi__, low__) ((u64)(hi__) << 32 | (u64)(low__)) + +#define ACC_MSG_LEN_DW 4 + +static bool get_acc(struct acc_queue *acc_queue, struct acc *acc) +{ + const struct xe_guc_acc_desc *desc; + bool ret = false; + + spin_lock(&acc_queue->lock); + if (acc_queue->head != acc_queue->tail) { + desc = (const struct xe_guc_acc_desc *) + (acc_queue->data + acc_queue->head); + + acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2); + acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 | + FIELD_GET(ACC_SUBG_LO, desc->dw0); + acc->engine_class = FIELD_GET(ACC_ENG_CLASS, desc->dw1); + acc->engine_instance = FIELD_GET(ACC_ENG_INSTANCE, desc->dw1); + acc->asid = FIELD_GET(ACC_ASID, desc->dw1); + acc->vfid = FIELD_GET(ACC_VFID, desc->dw2); + acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0); + acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI, + desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO); + + acc_queue->head = (acc_queue->head + ACC_MSG_LEN_DW) % + ACC_QUEUE_NUM_DW; + ret = true; + } + spin_unlock(&acc_queue->lock); + + return ret; +} + +static void acc_queue_work_func(struct work_struct *w) +{ + struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker); + struct xe_gt *gt = acc_queue->gt; + struct xe_device *xe = gt_to_xe(gt); + struct acc acc = {}; + unsigned long threshold; + int ret; + + threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); + + while (get_acc(acc_queue, &acc)) { + ret = handle_acc(gt, &acc); + if (unlikely(ret)) { + print_acc(xe, &acc); + drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret); + } + + if (time_after(jiffies, threshold) && + acc_queue->head != acc_queue->tail) { + queue_work(gt->usm.acc_wq, w); + break; + } + } +} + +static bool acc_queue_full(struct acc_queue *acc_queue) +{ + lockdep_assert_held(&acc_queue->lock); + + return CIRC_SPACE(acc_queue->tail, acc_queue->head, ACC_QUEUE_NUM_DW) <= + ACC_MSG_LEN_DW; +} + +int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct acc_queue *acc_queue; + u32 asid; + bool full; + + if (unlikely(len != ACC_MSG_LEN_DW)) + return -EPROTO; + + asid = FIELD_GET(ACC_ASID, msg[1]); + acc_queue = >->usm.acc_queue[asid % NUM_ACC_QUEUE]; + + spin_lock(&acc_queue->lock); + full = acc_queue_full(acc_queue); + if (!full) { + memcpy(acc_queue->data + acc_queue->tail, msg, + len * sizeof(u32)); + acc_queue->tail = (acc_queue->tail + len) % ACC_QUEUE_NUM_DW; + queue_work(gt->usm.acc_wq, &acc_queue->worker); + } else { + drm_warn(>_to_xe(gt)->drm, "ACC Queue full, dropping ACC"); + } + spin_unlock(&acc_queue->lock); + + return full ? -ENOSPC : 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.h b/drivers/gpu/drm/xe/xe_gt_pagefault.h new file mode 100644 index 000000000000..839c065a5e4c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_PAGEFAULT_H_ +#define _XE_GT_PAGEFAULT_H_ + +#include <linux/types.h> + +struct xe_gt; +struct xe_guc; + +int xe_gt_pagefault_init(struct xe_gt *gt); +void xe_gt_pagefault_reset(struct xe_gt *gt); +int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len); +int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len); + +#endif /* _XE_GT_PAGEFAULT_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h new file mode 100644 index 000000000000..5991bcadd47e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_printk.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_PRINTK_H_ +#define _XE_GT_PRINTK_H_ + +#include <drm/drm_print.h> + +#include "xe_device_types.h" + +#define xe_gt_printk(_gt, _level, _fmt, ...) \ + drm_##_level(>_to_xe(_gt)->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + +#define xe_gt_err(_gt, _fmt, ...) \ + xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__) + +#define xe_gt_warn(_gt, _fmt, ...) \ + xe_gt_printk((_gt), warn, _fmt, ##__VA_ARGS__) + +#define xe_gt_notice(_gt, _fmt, ...) \ + xe_gt_printk((_gt), notice, _fmt, ##__VA_ARGS__) + +#define xe_gt_info(_gt, _fmt, ...) \ + xe_gt_printk((_gt), info, _fmt, ##__VA_ARGS__) + +#define xe_gt_dbg(_gt, _fmt, ...) \ + xe_gt_printk((_gt), dbg, _fmt, ##__VA_ARGS__) + +#define xe_gt_err_ratelimited(_gt, _fmt, ...) \ + xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__) + +#define xe_gt_WARN(_gt, _condition, _fmt, ...) \ + drm_WARN(>_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + +#define xe_gt_WARN_ONCE(_gt, _condition, _fmt, ...) \ + drm_WARN_ONCE(>_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + +#define xe_gt_WARN_ON(_gt, _condition) \ + xe_gt_WARN((_gt), _condition, "%s(%s)", "gt_WARN_ON", __stringify(_condition)) + +#define xe_gt_WARN_ON_ONCE(_gt, _condition) \ + xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "gt_WARN_ON_ONCE", __stringify(_condition)) + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c new file mode 100644 index 000000000000..c69d2e8a0fe1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_gt_sysfs.h" + +#include <linux/kobject.h> +#include <linux/sysfs.h> + +#include <drm/drm_managed.h> + +#include "xe_gt.h" + +static void xe_gt_sysfs_kobj_release(struct kobject *kobj) +{ + kfree(kobj); +} + +static const struct kobj_type xe_gt_sysfs_kobj_type = { + .release = xe_gt_sysfs_kobj_release, + .sysfs_ops = &kobj_sysfs_ops, +}; + +static void gt_sysfs_fini(struct drm_device *drm, void *arg) +{ + struct xe_gt *gt = arg; + + kobject_put(gt->sysfs); +} + +void xe_gt_sysfs_init(struct xe_gt *gt) +{ + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = gt_to_xe(gt); + struct kobj_gt *kg; + int err; + + kg = kzalloc(sizeof(*kg), GFP_KERNEL); + if (!kg) + return; + + kobject_init(&kg->base, &xe_gt_sysfs_kobj_type); + kg->gt = gt; + + err = kobject_add(&kg->base, tile->sysfs, "gt%d", gt->info.id); + if (err) { + drm_warn(&xe->drm, "failed to add GT sysfs directory, err: %d\n", err); + kobject_put(&kg->base); + return; + } + + gt->sysfs = &kg->base; + + err = drmm_add_action_or_reset(&xe->drm, gt_sysfs_fini, gt); + if (err) { + drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); + return; + } +} diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.h b/drivers/gpu/drm/xe/xe_gt_sysfs.h new file mode 100644 index 000000000000..e3ec278ca0be --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_SYSFS_H_ +#define _XE_GT_SYSFS_H_ + +#include "xe_gt_sysfs_types.h" + +void xe_gt_sysfs_init(struct xe_gt *gt); + +static inline struct xe_gt * +kobj_to_gt(struct kobject *kobj) +{ + return container_of(kobj, struct kobj_gt, base)->gt; +} + +#endif /* _XE_GT_SYSFS_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs_types.h b/drivers/gpu/drm/xe/xe_gt_sysfs_types.h new file mode 100644 index 000000000000..d3bc6b83360f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sysfs_types.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_SYSFS_TYPES_H_ +#define _XE_GT_SYSFS_TYPES_H_ + +#include <linux/kobject.h> + +struct xe_gt; + +/** + * struct kobj_gt - A GT's kobject struct that connects the kobject and the GT + * + * When dealing with multiple GTs, this struct helps to understand which GT + * needs to be addressed on a given sysfs call. + */ +struct kobj_gt { + /** @base: The actual kobject */ + struct kobject base; + /** @gt: A pointer to the GT itself */ + struct xe_gt *gt; +}; + +#endif /* _XE_GT_SYSFS_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c new file mode 100644 index 000000000000..63d640591a52 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include <regs/xe_gt_regs.h> +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_sysfs.h" +#include "xe_gt_throttle_sysfs.h" +#include "xe_mmio.h" + +/** + * DOC: Xe GT Throttle + * + * Provides sysfs entries for frequency throttle reasons in GT + * + * device/gt#/freq0/throttle/status - Overall status + * device/gt#/freq0/throttle/reason_pl1 - Frequency throttle due to PL1 + * device/gt#/freq0/throttle/reason_pl2 - Frequency throttle due to PL2 + * device/gt#/freq0/throttle/reason_pl4 - Frequency throttle due to PL4, Iccmax etc. + * device/gt#/freq0/throttle/reason_thermal - Frequency throttle due to thermal + * device/gt#/freq0/throttle/reason_prochot - Frequency throttle due to prochot + * device/gt#/freq0/throttle/reason_ratl - Frequency throttle due to RATL + * device/gt#/freq0/throttle/reason_vr_thermalert - Frequency throttle due to VR THERMALERT + * device/gt#/freq0/throttle/reason_vr_tdc - Frequency throttle due to VR TDC + */ + +static struct xe_gt * +dev_to_gt(struct device *dev) +{ + return kobj_to_gt(dev->kobj.parent); +} + +static u32 read_perf_limit_reasons(struct xe_gt *gt) +{ + u32 reg; + + if (xe_gt_is_media_type(gt)) + reg = xe_mmio_read32(gt, MTL_MEDIA_PERF_LIMIT_REASONS); + else + reg = xe_mmio_read32(gt, GT0_PERF_LIMIT_REASONS); + + return reg; +} + +static u32 read_status(struct xe_gt *gt) +{ + u32 status = read_perf_limit_reasons(gt) & GT0_PERF_LIMIT_REASONS_MASK; + + return status; +} + +static u32 read_reason_pl1(struct xe_gt *gt) +{ + u32 pl1 = read_perf_limit_reasons(gt) & POWER_LIMIT_1_MASK; + + return pl1; +} + +static u32 read_reason_pl2(struct xe_gt *gt) +{ + u32 pl2 = read_perf_limit_reasons(gt) & POWER_LIMIT_2_MASK; + + return pl2; +} + +static u32 read_reason_pl4(struct xe_gt *gt) +{ + u32 pl4 = read_perf_limit_reasons(gt) & POWER_LIMIT_4_MASK; + + return pl4; +} + +static u32 read_reason_thermal(struct xe_gt *gt) +{ + u32 thermal = read_perf_limit_reasons(gt) & THERMAL_LIMIT_MASK; + + return thermal; +} + +static u32 read_reason_prochot(struct xe_gt *gt) +{ + u32 prochot = read_perf_limit_reasons(gt) & PROCHOT_MASK; + + return prochot; +} + +static u32 read_reason_ratl(struct xe_gt *gt) +{ + u32 ratl = read_perf_limit_reasons(gt) & RATL_MASK; + + return ratl; +} + +static u32 read_reason_vr_thermalert(struct xe_gt *gt) +{ + u32 thermalert = read_perf_limit_reasons(gt) & VR_THERMALERT_MASK; + + return thermalert; +} + +static u32 read_reason_vr_tdc(struct xe_gt *gt) +{ + u32 tdc = read_perf_limit_reasons(gt) & VR_TDC_MASK; + + return tdc; +} + +static ssize_t status_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct xe_gt *gt = dev_to_gt(dev); + bool status = !!read_status(gt); + + return sysfs_emit(buff, "%u\n", status); +} +static DEVICE_ATTR_RO(status); + +static ssize_t reason_pl1_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct xe_gt *gt = dev_to_gt(dev); + bool pl1 = !!read_reason_pl1(gt); + + return sysfs_emit(buff, "%u\n", pl1); +} +static DEVICE_ATTR_RO(reason_pl1); + +static ssize_t reason_pl2_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct xe_gt *gt = dev_to_gt(dev); + bool pl2 = !!read_reason_pl2(gt); + + return sysfs_emit(buff, "%u\n", pl2); +} +static DEVICE_ATTR_RO(reason_pl2); + +static ssize_t reason_pl4_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct xe_gt *gt = dev_to_gt(dev); + bool pl4 = !!read_reason_pl4(gt); + + return sysfs_emit(buff, "%u\n", pl4); +} +static DEVICE_ATTR_RO(reason_pl4); + +static ssize_t reason_thermal_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct xe_gt *gt = dev_to_gt(dev); + bool thermal = !!read_reason_thermal(gt); + + return sysfs_emit(buff, "%u\n", thermal); +} +static DEVICE_ATTR_RO(reason_thermal); + +static ssize_t reason_prochot_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct xe_gt *gt = dev_to_gt(dev); + bool prochot = !!read_reason_prochot(gt); + + return sysfs_emit(buff, "%u\n", prochot); +} +static DEVICE_ATTR_RO(reason_prochot); + +static ssize_t reason_ratl_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct xe_gt *gt = dev_to_gt(dev); + bool ratl = !!read_reason_ratl(gt); + + return sysfs_emit(buff, "%u\n", ratl); +} +static DEVICE_ATTR_RO(reason_ratl); + +static ssize_t reason_vr_thermalert_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct xe_gt *gt = dev_to_gt(dev); + bool thermalert = !!read_reason_vr_thermalert(gt); + + return sysfs_emit(buff, "%u\n", thermalert); +} +static DEVICE_ATTR_RO(reason_vr_thermalert); + +static ssize_t reason_vr_tdc_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct xe_gt *gt = dev_to_gt(dev); + bool tdc = !!read_reason_vr_tdc(gt); + + return sysfs_emit(buff, "%u\n", tdc); +} +static DEVICE_ATTR_RO(reason_vr_tdc); + +static struct attribute *throttle_attrs[] = { + &dev_attr_status.attr, + &dev_attr_reason_pl1.attr, + &dev_attr_reason_pl2.attr, + &dev_attr_reason_pl4.attr, + &dev_attr_reason_thermal.attr, + &dev_attr_reason_prochot.attr, + &dev_attr_reason_ratl.attr, + &dev_attr_reason_vr_thermalert.attr, + &dev_attr_reason_vr_tdc.attr, + NULL +}; + +static const struct attribute_group throttle_group_attrs = { + .name = "throttle", + .attrs = throttle_attrs, +}; + +static void gt_throttle_sysfs_fini(struct drm_device *drm, void *arg) +{ + struct xe_gt *gt = arg; + + sysfs_remove_group(gt->freq, &throttle_group_attrs); +} + +void xe_gt_throttle_sysfs_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + err = sysfs_create_group(gt->freq, &throttle_group_attrs); + if (err) { + drm_warn(&xe->drm, "failed to register throttle sysfs, err: %d\n", err); + return; + } + + err = drmm_add_action_or_reset(&xe->drm, gt_throttle_sysfs_fini, gt); + if (err) + drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); +} diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h new file mode 100644 index 000000000000..3ecfd4beffe1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_THROTTLE_SYSFS_H_ +#define _XE_GT_THROTTLE_SYSFS_H_ + +#include <drm/drm_managed.h> + +struct xe_gt; + +void xe_gt_throttle_sysfs_init(struct xe_gt *gt); + +#endif /* _XE_GT_THROTTLE_SYSFS_H_ */ + diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c new file mode 100644 index 000000000000..7eef23a00d77 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -0,0 +1,406 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_gt_tlb_invalidation.h" + +#include "abi/guc_actions_abi.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_trace.h" + +#define TLB_TIMEOUT (HZ / 4) + +static void xe_gt_tlb_fence_timeout(struct work_struct *work) +{ + struct xe_gt *gt = container_of(work, struct xe_gt, + tlb_invalidation.fence_tdr.work); + struct xe_gt_tlb_invalidation_fence *fence, *next; + + spin_lock_irq(>->tlb_invalidation.pending_lock); + list_for_each_entry_safe(fence, next, + >->tlb_invalidation.pending_fences, link) { + s64 since_inval_ms = ktime_ms_delta(ktime_get(), + fence->invalidation_time); + + if (msecs_to_jiffies(since_inval_ms) < TLB_TIMEOUT) + break; + + trace_xe_gt_tlb_invalidation_fence_timeout(fence); + drm_err(>_to_xe(gt)->drm, "gt%d: TLB invalidation fence timeout, seqno=%d recv=%d", + gt->info.id, fence->seqno, gt->tlb_invalidation.seqno_recv); + + list_del(&fence->link); + fence->base.error = -ETIME; + dma_fence_signal(&fence->base); + dma_fence_put(&fence->base); + } + if (!list_empty(>->tlb_invalidation.pending_fences)) + queue_delayed_work(system_wq, + >->tlb_invalidation.fence_tdr, + TLB_TIMEOUT); + spin_unlock_irq(>->tlb_invalidation.pending_lock); +} + +/** + * xe_gt_tlb_invalidation_init - Initialize GT TLB invalidation state + * @gt: graphics tile + * + * Initialize GT TLB invalidation state, purely software initialization, should + * be called once during driver load. + * + * Return: 0 on success, negative error code on error. + */ +int xe_gt_tlb_invalidation_init(struct xe_gt *gt) +{ + gt->tlb_invalidation.seqno = 1; + INIT_LIST_HEAD(>->tlb_invalidation.pending_fences); + spin_lock_init(>->tlb_invalidation.pending_lock); + spin_lock_init(>->tlb_invalidation.lock); + gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1); + INIT_DELAYED_WORK(>->tlb_invalidation.fence_tdr, + xe_gt_tlb_fence_timeout); + + return 0; +} + +static void +__invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) +{ + trace_xe_gt_tlb_invalidation_fence_signal(fence); + dma_fence_signal(&fence->base); + dma_fence_put(&fence->base); +} + +static void +invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) +{ + list_del(&fence->link); + __invalidation_fence_signal(fence); +} + +/** + * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset + * @gt: graphics tile + * + * Signal any pending invalidation fences, should be called during a GT reset + */ +void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) +{ + struct xe_gt_tlb_invalidation_fence *fence, *next; + struct xe_guc *guc = >->uc.guc; + int pending_seqno; + + /* + * CT channel is already disabled at this point. No new TLB requests can + * appear. + */ + + mutex_lock(>->uc.guc.ct.lock); + spin_lock_irq(>->tlb_invalidation.pending_lock); + cancel_delayed_work(>->tlb_invalidation.fence_tdr); + /* + * We might have various kworkers waiting for TLB flushes to complete + * which are not tracked with an explicit TLB fence, however at this + * stage that will never happen since the CT is already disabled, so + * make sure we signal them here under the assumption that we have + * completed a full GT reset. + */ + if (gt->tlb_invalidation.seqno == 1) + pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1; + else + pending_seqno = gt->tlb_invalidation.seqno - 1; + WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno); + wake_up_all(&guc->ct.wq); + + list_for_each_entry_safe(fence, next, + >->tlb_invalidation.pending_fences, link) + invalidation_fence_signal(fence); + spin_unlock_irq(>->tlb_invalidation.pending_lock); + mutex_unlock(>->uc.guc.ct.lock); +} + +static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno) +{ + int seqno_recv = READ_ONCE(gt->tlb_invalidation.seqno_recv); + + if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2)) + return false; + + if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2)) + return true; + + return seqno_recv >= seqno; +} + +static int send_tlb_invalidation(struct xe_guc *guc, + struct xe_gt_tlb_invalidation_fence *fence, + u32 *action, int len) +{ + struct xe_gt *gt = guc_to_gt(guc); + int seqno; + int ret; + + /* + * XXX: The seqno algorithm relies on TLB invalidation being processed + * in order which they currently are, if that changes the algorithm will + * need to be updated. + */ + + mutex_lock(&guc->ct.lock); + seqno = gt->tlb_invalidation.seqno; + if (fence) { + fence->seqno = seqno; + trace_xe_gt_tlb_invalidation_fence_send(fence); + } + action[1] = seqno; + ret = xe_guc_ct_send_locked(&guc->ct, action, len, + G2H_LEN_DW_TLB_INVALIDATE, 1); + if (!ret && fence) { + spin_lock_irq(>->tlb_invalidation.pending_lock); + /* + * We haven't actually published the TLB fence as per + * pending_fences, but in theory our seqno could have already + * been written as we acquired the pending_lock. In such a case + * we can just go ahead and signal the fence here. + */ + if (tlb_invalidation_seqno_past(gt, seqno)) { + __invalidation_fence_signal(fence); + } else { + fence->invalidation_time = ktime_get(); + list_add_tail(&fence->link, + >->tlb_invalidation.pending_fences); + + if (list_is_singular(>->tlb_invalidation.pending_fences)) + queue_delayed_work(system_wq, + >->tlb_invalidation.fence_tdr, + TLB_TIMEOUT); + } + spin_unlock_irq(>->tlb_invalidation.pending_lock); + } else if (ret < 0 && fence) { + __invalidation_fence_signal(fence); + } + if (!ret) { + gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) % + TLB_INVALIDATION_SEQNO_MAX; + if (!gt->tlb_invalidation.seqno) + gt->tlb_invalidation.seqno = 1; + ret = seqno; + } + mutex_unlock(&guc->ct.lock); + + return ret; +} + +#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \ + XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \ + XE_GUC_TLB_INVAL_FLUSH_CACHE) + +/** + * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC + * @gt: graphics tile + * + * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and + * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion. + * + * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, + * negative error code on error. + */ +int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) +{ + u32 action[] = { + XE_GUC_ACTION_TLB_INVALIDATION, + 0, /* seqno, replaced in send_tlb_invalidation */ + MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), + }; + + return send_tlb_invalidation(>->uc.guc, NULL, action, + ARRAY_SIZE(action)); +} + +/** + * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA + * @gt: graphics tile + * @fence: invalidation fence which will be signal on TLB invalidation + * completion, can be NULL + * @vma: VMA to invalidate + * + * Issue a range based TLB invalidation if supported, if not fallback to a full + * TLB invalidation. Completion of TLB is asynchronous and caller can either use + * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for + * completion. + * + * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, + * negative error code on error. + */ +int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + struct xe_vma *vma) +{ + struct xe_device *xe = gt_to_xe(gt); +#define MAX_TLB_INVALIDATION_LEN 7 + u32 action[MAX_TLB_INVALIDATION_LEN]; + int len = 0; + + xe_gt_assert(gt, vma); + + action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; + action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ + if (!xe->info.has_range_tlb_invalidation) { + action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); + } else { + u64 start = xe_vma_start(vma); + u64 length = xe_vma_size(vma); + u64 align, end; + + if (length < SZ_4K) + length = SZ_4K; + + /* + * We need to invalidate a higher granularity if start address + * is not aligned to length. When start is not aligned with + * length we need to find the length large enough to create an + * address mask covering the required range. + */ + align = roundup_pow_of_two(length); + start = ALIGN_DOWN(xe_vma_start(vma), align); + end = ALIGN(xe_vma_end(vma), align); + length = align; + while (start + length < end) { + length <<= 1; + start = ALIGN_DOWN(xe_vma_start(vma), length); + } + + /* + * Minimum invalidation size for a 2MB page that the hardware + * expects is 16MB + */ + if (length >= SZ_2M) { + length = max_t(u64, SZ_16M, length); + start = ALIGN_DOWN(xe_vma_start(vma), length); + } + + xe_gt_assert(gt, length >= SZ_4K); + xe_gt_assert(gt, is_power_of_2(length)); + xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1))); + xe_gt_assert(gt, IS_ALIGNED(start, length)); + + action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); + action[len++] = xe_vma_vm(vma)->usm.asid; + action[len++] = lower_32_bits(start); + action[len++] = upper_32_bits(start); + action[len++] = ilog2(length) - ilog2(SZ_4K); + } + + xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN); + + return send_tlb_invalidation(>->uc.guc, fence, action, len); +} + +/** + * xe_gt_tlb_invalidation_wait - Wait for TLB to complete + * @gt: graphics tile + * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation + * + * Wait for 200ms for a TLB invalidation to complete, in practice we always + * should receive the TLB invalidation within 200ms. + * + * Return: 0 on success, -ETIME on TLB invalidation timeout + */ +int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_guc *guc = >->uc.guc; + struct drm_printer p = drm_err_printer(__func__); + int ret; + + /* + * XXX: See above, this algorithm only works if seqno are always in + * order + */ + ret = wait_event_timeout(guc->ct.wq, + tlb_invalidation_seqno_past(gt, seqno), + TLB_TIMEOUT); + if (!ret) { + drm_err(&xe->drm, "gt%d: TLB invalidation time'd out, seqno=%d, recv=%d\n", + gt->info.id, seqno, gt->tlb_invalidation.seqno_recv); + xe_guc_ct_print(&guc->ct, &p, true); + return -ETIME; + } + + return 0; +} + +/** + * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler + * @guc: guc + * @msg: message indicating TLB invalidation done + * @len: length of message + * + * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any + * invalidation fences for seqno. Algorithm for this depends on seqno being + * received in-order and asserts this assumption. + * + * Return: 0 on success, -EPROTO for malformed messages. + */ +int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct xe_gt_tlb_invalidation_fence *fence, *next; + unsigned long flags; + + if (unlikely(len != 1)) + return -EPROTO; + + /* + * This can also be run both directly from the IRQ handler and also in + * process_g2h_msg(). Only one may process any individual CT message, + * however the order they are processed here could result in skipping a + * seqno. To handle that we just process all the seqnos from the last + * seqno_recv up to and including the one in msg[0]. The delta should be + * very small so there shouldn't be much of pending_fences we actually + * need to iterate over here. + * + * From GuC POV we expect the seqnos to always appear in-order, so if we + * see something later in the timeline we can be sure that anything + * appearing earlier has already signalled, just that we have yet to + * officially process the CT message like if racing against + * process_g2h_msg(). + */ + spin_lock_irqsave(>->tlb_invalidation.pending_lock, flags); + if (tlb_invalidation_seqno_past(gt, msg[0])) { + spin_unlock_irqrestore(>->tlb_invalidation.pending_lock, flags); + return 0; + } + + /* + * wake_up_all() and wait_event_timeout() already have the correct + * barriers. + */ + WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]); + wake_up_all(&guc->ct.wq); + + list_for_each_entry_safe(fence, next, + >->tlb_invalidation.pending_fences, link) { + trace_xe_gt_tlb_invalidation_fence_recv(fence); + + if (!tlb_invalidation_seqno_past(gt, fence->seqno)) + break; + + invalidation_fence_signal(fence); + } + + if (!list_empty(>->tlb_invalidation.pending_fences)) + mod_delayed_work(system_wq, + >->tlb_invalidation.fence_tdr, + TLB_TIMEOUT); + else + cancel_delayed_work(>->tlb_invalidation.fence_tdr); + + spin_unlock_irqrestore(>->tlb_invalidation.pending_lock, flags); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h new file mode 100644 index 000000000000..b333c1709397 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_TLB_INVALIDATION_H_ +#define _XE_GT_TLB_INVALIDATION_H_ + +#include <linux/types.h> + +#include "xe_gt_tlb_invalidation_types.h" + +struct xe_gt; +struct xe_guc; +struct xe_vma; + +int xe_gt_tlb_invalidation_init(struct xe_gt *gt); +void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); +int xe_gt_tlb_invalidation_guc(struct xe_gt *gt); +int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + struct xe_vma *vma); +int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); +int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); + +#endif /* _XE_GT_TLB_INVALIDATION_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h new file mode 100644 index 000000000000..934c828efe31 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_TLB_INVALIDATION_TYPES_H_ +#define _XE_GT_TLB_INVALIDATION_TYPES_H_ + +#include <linux/dma-fence.h> + +/** + * struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence + * + * Optionally passed to xe_gt_tlb_invalidation and will be signaled upon TLB + * invalidation completion. + */ +struct xe_gt_tlb_invalidation_fence { + /** @base: dma fence base */ + struct dma_fence base; + /** @link: link into list of pending tlb fences */ + struct list_head link; + /** @seqno: seqno of TLB invalidation to signal fence one */ + int seqno; + /** @invalidation_time: time of TLB invalidation */ + ktime_t invalidation_time; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c new file mode 100644 index 000000000000..a8d7f272c30a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_gt_topology.h" + +#include <linux/bitmap.h> + +#include "regs/xe_gt_regs.h" +#include "xe_gt.h" +#include "xe_mmio.h" + +#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS) +#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS) + +static void +load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...) +{ + va_list argp; + u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {}; + int i; + + if (drm_WARN_ON(>_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS)) + numregs = XE_MAX_DSS_FUSE_REGS; + + va_start(argp, numregs); + for (i = 0; i < numregs; i++) + fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, struct xe_reg)); + va_end(argp); + + bitmap_from_arr32(mask, fuse_val, numregs * 32); +} + +static void +load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE); + u32 val = 0; + int i; + + BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1); + + /* + * Pre-Xe_HP platforms inverted the bit meaning (disable instead + * of enable). + */ + if (GRAPHICS_VERx100(xe) < 1250) + reg_val = ~reg_val & XELP_EU_MASK; + + /* On PVC, one bit = one EU */ + if (GRAPHICS_VERx100(xe) == 1260) { + val = reg_val; + } else { + /* All other platforms, one bit = 2 EU */ + for (i = 0; i < fls(reg_val); i++) + if (reg_val & BIT(i)) + val |= 0x3 << 2 * i; + } + + bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS); +} + +static void +get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs) +{ + if (GRAPHICS_VER(xe) > 20) { + *geometry_regs = 3; + *compute_regs = 3; + } else if (GRAPHICS_VERx100(xe) == 1260) { + *geometry_regs = 0; + *compute_regs = 2; + } else if (GRAPHICS_VERx100(xe) >= 1250) { + *geometry_regs = 1; + *compute_regs = 1; + } else { + *geometry_regs = 1; + *compute_regs = 0; + } +} + +void +xe_gt_topology_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + struct drm_printer p = drm_debug_printer("GT topology"); + int num_geometry_regs, num_compute_regs; + + get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs); + + /* + * Register counts returned shouldn't exceed the number of registers + * passed as parameters below. + */ + drm_WARN_ON(&xe->drm, num_geometry_regs > 3); + drm_WARN_ON(&xe->drm, num_compute_regs > 3); + + load_dss_mask(gt, gt->fuse_topo.g_dss_mask, + num_geometry_regs, + XELP_GT_GEOMETRY_DSS_ENABLE, + XE2_GT_GEOMETRY_DSS_1, + XE2_GT_GEOMETRY_DSS_2); + load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs, + XEHP_GT_COMPUTE_DSS_ENABLE, + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, + XE2_GT_COMPUTE_DSS_2); + load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss); + + xe_gt_topology_dump(gt, &p); +} + +void +xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) +{ + drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS, + gt->fuse_topo.g_dss_mask); + drm_printf(p, "dss mask (compute): %*pb\n", XE_MAX_DSS_FUSE_BITS, + gt->fuse_topo.c_dss_mask); + + drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS, + gt->fuse_topo.eu_mask_per_dss); + +} + +/* + * Used to obtain the index of the first DSS. Can start searching from the + * beginning of a specific dss group (e.g., gslice, cslice, etc.) if + * groupsize and groupnum are non-zero. + */ +unsigned int +xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum) +{ + return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize); +} + +bool xe_dss_mask_empty(const xe_dss_mask_t mask) +{ + return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS); +} + +/** + * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant + * @gt: GT to check + * @quad: Which quadrant of the DSS space to check + * + * Since Xe_HP platforms can have up to four CCS engines, those engines + * are each logically associated with a quarter of the possible DSS. If there + * are no DSS present in one of the four quadrants of the DSS space, the + * corresponding CCS engine is also not available for use. + * + * Returns false if all DSS in a quadrant of the GT are fused off, else true. + */ +bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad) +{ + struct xe_device *xe = gt_to_xe(gt); + xe_dss_mask_t all_dss; + int g_dss_regs, c_dss_regs, dss_per_quad, quad_first; + + bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, + XE_MAX_DSS_FUSE_BITS); + + get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs); + dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4; + + quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad); + + return quad_first < (quad + 1) * dss_per_quad; +} diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h new file mode 100644 index 000000000000..d1b54fb52ea6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_TOPOLOGY_H_ +#define _XE_GT_TOPOLOGY_H_ + +#include "xe_gt_types.h" + +struct drm_printer; + +void xe_gt_topology_init(struct xe_gt *gt); + +void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p); + +unsigned int +xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum); + +bool xe_dss_mask_empty(const xe_dss_mask_t mask); + +bool +xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad); + +#endif /* _XE_GT_TOPOLOGY_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h new file mode 100644 index 000000000000..f74684660475 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -0,0 +1,363 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022-2023 Intel Corporation + */ + +#ifndef _XE_GT_TYPES_H_ +#define _XE_GT_TYPES_H_ + +#include "xe_force_wake_types.h" +#include "xe_gt_idle_types.h" +#include "xe_hw_engine_types.h" +#include "xe_hw_fence_types.h" +#include "xe_reg_sr_types.h" +#include "xe_sa_types.h" +#include "xe_uc_types.h" + +struct xe_exec_queue_ops; +struct xe_migrate; +struct xe_ring_ops; + +enum xe_gt_type { + XE_GT_TYPE_UNINITIALIZED, + XE_GT_TYPE_MAIN, + XE_GT_TYPE_MEDIA, +}; + +#define XE_MAX_DSS_FUSE_REGS 3 +#define XE_MAX_EU_FUSE_REGS 1 + +typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)]; +typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_EU_FUSE_REGS)]; + +struct xe_mmio_range { + u32 start; + u32 end; +}; + +/* + * The hardware has multiple kinds of multicast register ranges that need + * special register steering (and future platforms are expected to add + * additional types). + * + * During driver startup, we initialize the steering control register to + * direct reads to a slice/subslice that are valid for the 'subslice' class + * of multicast registers. If another type of steering does not have any + * overlap in valid steering targets with 'subslice' style registers, we will + * need to explicitly re-steer reads of registers of the other type. + * + * Only the replication types that may need additional non-default steering + * are listed here. + */ +enum xe_steering_type { + L3BANK, + MSLICE, + LNCF, + DSS, + OADDRM, + SQIDI_PSMI, + + /* + * On some platforms there are multiple types of MCR registers that + * will always return a non-terminated value at instance (0, 0). We'll + * lump those all into a single category to keep things simple. + */ + INSTANCE0, + + /* + * Register ranges that don't need special steering for each register: + * it's sufficient to keep the HW-default for the selector, or only + * change it once, on GT initialization. This needs to be the last + * steering type. + */ + IMPLICIT_STEERING, + NUM_STEERING_TYPES +}; + +#define gt_to_tile(gt__) \ + _Generic(gt__, \ + const struct xe_gt * : (const struct xe_tile *)((gt__)->tile), \ + struct xe_gt * : (gt__)->tile) + +#define gt_to_xe(gt__) \ + _Generic(gt__, \ + const struct xe_gt * : (const struct xe_device *)(gt_to_tile(gt__)->xe), \ + struct xe_gt * : gt_to_tile(gt__)->xe) + +/** + * struct xe_gt - A "Graphics Technology" unit of the GPU + * + * A GT ("Graphics Technology") is the subset of a GPU primarily responsible + * for implementing the graphics, compute, and/or media IP. It encapsulates + * the hardware engines, programmable execution units, and GuC. Each GT has + * its own handling of power management (RC6+forcewake) and multicast register + * steering. + * + * A GPU/tile may have a single GT that supplies all graphics, compute, and + * media functionality, or the graphics/compute and media may be split into + * separate GTs within a tile. + */ +struct xe_gt { + /** @tile: Backpointer to GT's tile */ + struct xe_tile *tile; + + /** @info: GT info */ + struct { + /** @type: type of GT */ + enum xe_gt_type type; + /** @id: Unique ID of this GT within the PCI Device */ + u8 id; + /** @reference_clock: clock frequency */ + u32 reference_clock; + /** @engine_mask: mask of engines present on GT */ + u64 engine_mask; + /** + * @__engine_mask: mask of engines present on GT read from + * xe_pci.c, used to fake reading the engine_mask from the + * hwconfig blob. + */ + u64 __engine_mask; + } info; + + /** + * @mmio: mmio info for GT. All GTs within a tile share the same + * register space, but have their own copy of GSI registers at a + * specific offset, as well as their own forcewake handling. + */ + struct { + /** @fw: force wake for GT */ + struct xe_force_wake fw; + /** + * @adj_limit: adjust MMIO address if address is below this + * value + */ + u32 adj_limit; + /** @adj_offset: offect to add to MMIO address when adjusting */ + u32 adj_offset; + } mmio; + + /** + * @reg_sr: table with registers to be restored on GT init/resume/reset + */ + struct xe_reg_sr reg_sr; + + /** @reset: state for GT resets */ + struct { + /** + * @worker: work so GT resets can done async allowing to reset + * code to safely flush all code paths + */ + struct work_struct worker; + } reset; + + /** @tlb_invalidation: TLB invalidation state */ + struct { + /** @seqno: TLB invalidation seqno, protected by CT lock */ +#define TLB_INVALIDATION_SEQNO_MAX 0x100000 + int seqno; + /** + * @seqno_recv: last received TLB invalidation seqno, protected by CT lock + */ + int seqno_recv; + /** + * @pending_fences: list of pending fences waiting TLB + * invaliations, protected by CT lock + */ + struct list_head pending_fences; + /** + * @pending_lock: protects @pending_fences and updating + * @seqno_recv. + */ + spinlock_t pending_lock; + /** + * @fence_tdr: schedules a delayed call to + * xe_gt_tlb_fence_timeout after the timeut interval is over. + */ + struct delayed_work fence_tdr; + /** @fence_context: context for TLB invalidation fences */ + u64 fence_context; + /** + * @fence_seqno: seqno to TLB invalidation fences, protected by + * tlb_invalidation.lock + */ + u32 fence_seqno; + /** @lock: protects TLB invalidation fences */ + spinlock_t lock; + } tlb_invalidation; + + /** + * @ccs_mode: Number of compute engines enabled. + * Allows fixed mapping of available compute slices to compute engines. + * By default only the first available compute engine is enabled and all + * available compute slices are allocated to it. + */ + u32 ccs_mode; + + /** @usm: unified shared memory state */ + struct { + /** + * @bb_pool: Pool from which batchbuffers, for USM operations + * (e.g. migrations, fixing page tables), are allocated. + * Dedicated pool needed so USM operations to not get blocked + * behind any user operations which may have resulted in a + * fault. + */ + struct xe_sa_manager *bb_pool; + /** + * @reserved_bcs_instance: reserved BCS instance used for USM + * operations (e.g. mmigrations, fixing page tables) + */ + u16 reserved_bcs_instance; + /** @pf_wq: page fault work queue, unbound, high priority */ + struct workqueue_struct *pf_wq; + /** @acc_wq: access counter work queue, unbound, high priority */ + struct workqueue_struct *acc_wq; + /** + * @pf_queue: Page fault queue used to sync faults so faults can + * be processed not under the GuC CT lock. The queue is sized so + * it can sync all possible faults (1 per physical engine). + * Multiple queues exists for page faults from different VMs are + * be processed in parallel. + */ + struct pf_queue { + /** @gt: back pointer to GT */ + struct xe_gt *gt; +#define PF_QUEUE_NUM_DW 128 + /** @data: data in the page fault queue */ + u32 data[PF_QUEUE_NUM_DW]; + /** + * @head: head pointer in DWs for page fault queue, + * moved by worker which processes faults. + */ + u16 head; + /** + * @tail: tail pointer in DWs for page fault queue, + * moved by G2H handler. + */ + u16 tail; + /** @lock: protects page fault queue */ + spinlock_t lock; + /** @worker: to process page faults */ + struct work_struct worker; +#define NUM_PF_QUEUE 4 + } pf_queue[NUM_PF_QUEUE]; + /** + * @acc_queue: Same as page fault queue, cannot process access + * counters under CT lock. + */ + struct acc_queue { + /** @gt: back pointer to GT */ + struct xe_gt *gt; +#define ACC_QUEUE_NUM_DW 128 + /** @data: data in the page fault queue */ + u32 data[ACC_QUEUE_NUM_DW]; + /** + * @head: head pointer in DWs for page fault queue, + * moved by worker which processes faults. + */ + u16 head; + /** + * @tail: tail pointer in DWs for page fault queue, + * moved by G2H handler. + */ + u16 tail; + /** @lock: protects page fault queue */ + spinlock_t lock; + /** @worker: to process access counters */ + struct work_struct worker; +#define NUM_ACC_QUEUE 4 + } acc_queue[NUM_ACC_QUEUE]; + } usm; + + /** @ordered_wq: used to serialize GT resets and TDRs */ + struct workqueue_struct *ordered_wq; + + /** @uc: micro controllers on the GT */ + struct xe_uc uc; + + /** @gtidle: idle properties of GT */ + struct xe_gt_idle gtidle; + + /** @exec_queue_ops: submission backend exec queue operations */ + const struct xe_exec_queue_ops *exec_queue_ops; + + /** + * @ring_ops: ring operations for this hw engine (1 per engine class) + */ + const struct xe_ring_ops *ring_ops[XE_ENGINE_CLASS_MAX]; + + /** @fence_irq: fence IRQs (1 per engine class) */ + struct xe_hw_fence_irq fence_irq[XE_ENGINE_CLASS_MAX]; + + /** @default_lrc: default LRC state */ + void *default_lrc[XE_ENGINE_CLASS_MAX]; + + /** @hw_engines: hardware engines on the GT */ + struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES]; + + /** @eclass: per hardware engine class interface on the GT */ + struct xe_hw_engine_class_intf eclass[XE_ENGINE_CLASS_MAX]; + + /** @pcode: GT's PCODE */ + struct { + /** @lock: protecting GT's PCODE mailbox data */ + struct mutex lock; + } pcode; + + /** @sysfs: sysfs' kobj used by xe_gt_sysfs */ + struct kobject *sysfs; + + /** @freq: Main GT freq sysfs control */ + struct kobject *freq; + + /** @mocs: info */ + struct { + /** @uc_index: UC index */ + u8 uc_index; + /** @wb_index: WB index, only used on L3_CCS platforms */ + u8 wb_index; + } mocs; + + /** @fuse_topo: GT topology reported by fuse registers */ + struct { + /** @g_dss_mask: dual-subslices usable by geometry */ + xe_dss_mask_t g_dss_mask; + + /** @c_dss_mask: dual-subslices usable by compute */ + xe_dss_mask_t c_dss_mask; + + /** @eu_mask_per_dss: EU mask per DSS*/ + xe_eu_mask_t eu_mask_per_dss; + } fuse_topo; + + /** @steering: register steering for individual HW units */ + struct { + /* @ranges: register ranges used for this steering type */ + const struct xe_mmio_range *ranges; + + /** @group_target: target to steer accesses to */ + u16 group_target; + /** @instance_target: instance to steer accesses to */ + u16 instance_target; + } steering[NUM_STEERING_TYPES]; + + /** + * @mcr_lock: protects the MCR_SELECTOR register for the duration + * of a steered operation + */ + spinlock_t mcr_lock; + + /** @wa_active: keep track of active workarounds */ + struct { + /** @gt: bitmap with active GT workarounds */ + unsigned long *gt; + /** @engine: bitmap with active engine workarounds */ + unsigned long *engine; + /** @lrc: bitmap with active LRC workarounds */ + unsigned long *lrc; + /** @oob: bitmap with active OOB workaroudns */ + unsigned long *oob; + } wa_active; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c new file mode 100644 index 000000000000..482cb0df9f15 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -0,0 +1,911 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_guc.h" + +#include <drm/drm_managed.h> + +#include "abi/guc_actions_abi.h" +#include "abi/guc_errors_abi.h" +#include "generated/xe_wa_oob.h" +#include "regs/xe_gt_regs.h" +#include "regs/xe_guc_regs.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_guc_ads.h" +#include "xe_guc_ct.h" +#include "xe_guc_hwconfig.h" +#include "xe_guc_log.h" +#include "xe_guc_pc.h" +#include "xe_guc_submit.h" +#include "xe_mmio.h" +#include "xe_platform_types.h" +#include "xe_uc.h" +#include "xe_uc_fw.h" +#include "xe_wa.h" +#include "xe_wopcm.h" + +/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ +#define GUC_GGTT_TOP 0xFEE00000 +static u32 guc_bo_ggtt_addr(struct xe_guc *guc, + struct xe_bo *bo) +{ + struct xe_device *xe = guc_to_xe(guc); + u32 addr = xe_bo_ggtt_addr(bo); + + xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc))); + xe_assert(xe, addr < GUC_GGTT_TOP); + xe_assert(xe, bo->size <= GUC_GGTT_TOP - addr); + + return addr; +} + +static u32 guc_ctl_debug_flags(struct xe_guc *guc) +{ + u32 level = xe_guc_log_get_level(&guc->log); + u32 flags = 0; + + if (!GUC_LOG_LEVEL_IS_VERBOSE(level)) + flags |= GUC_LOG_DISABLED; + else + flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) << + GUC_LOG_VERBOSITY_SHIFT; + + return flags; +} + +static u32 guc_ctl_feature_flags(struct xe_guc *guc) +{ + return GUC_CTL_ENABLE_SLPC; +} + +static u32 guc_ctl_log_params_flags(struct xe_guc *guc) +{ + u32 offset = guc_bo_ggtt_addr(guc, guc->log.bo) >> PAGE_SHIFT; + u32 flags; + + #if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0) + #define LOG_UNIT SZ_1M + #define LOG_FLAG GUC_LOG_LOG_ALLOC_UNITS + #else + #define LOG_UNIT SZ_4K + #define LOG_FLAG 0 + #endif + + #if (((CAPTURE_BUFFER_SIZE) % SZ_1M) == 0) + #define CAPTURE_UNIT SZ_1M + #define CAPTURE_FLAG GUC_LOG_CAPTURE_ALLOC_UNITS + #else + #define CAPTURE_UNIT SZ_4K + #define CAPTURE_FLAG 0 + #endif + + BUILD_BUG_ON(!CRASH_BUFFER_SIZE); + BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, LOG_UNIT)); + BUILD_BUG_ON(!DEBUG_BUFFER_SIZE); + BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, LOG_UNIT)); + BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE); + BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT)); + + BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) > + (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT)); + BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) > + (GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT)); + BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) > + (GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT)); + + flags = GUC_LOG_VALID | + GUC_LOG_NOTIFY_ON_HALF_FULL | + CAPTURE_FLAG | + LOG_FLAG | + ((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) | + ((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) | + ((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) << + GUC_LOG_CAPTURE_SHIFT) | + (offset << GUC_LOG_BUF_ADDR_SHIFT); + + #undef LOG_UNIT + #undef LOG_FLAG + #undef CAPTURE_UNIT + #undef CAPTURE_FLAG + + return flags; +} + +static u32 guc_ctl_ads_flags(struct xe_guc *guc) +{ + u32 ads = guc_bo_ggtt_addr(guc, guc->ads.bo) >> PAGE_SHIFT; + u32 flags = ads << GUC_ADS_ADDR_SHIFT; + + return flags; +} + +static u32 guc_ctl_wa_flags(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + u32 flags = 0; + + if (XE_WA(gt, 22012773006)) + flags |= GUC_WA_POLLCS; + + if (XE_WA(gt, 16011759253)) + flags |= GUC_WA_GAM_CREDITS; + + if (XE_WA(gt, 14014475959)) + flags |= GUC_WA_HOLD_CCS_SWITCHOUT; + + if (XE_WA(gt, 22011391025) || XE_WA(gt, 14012197797)) + flags |= GUC_WA_DUAL_QUEUE; + + /* + * Wa_22011802037: FIXME - there's more to be done than simply setting + * this flag: make sure each CS is stopped when preparing for GT reset + * and wait for pending MI_FW. + */ + if (GRAPHICS_VERx100(xe) < 1270) + flags |= GUC_WA_PRE_PARSER; + + if (XE_WA(gt, 16011777198)) + flags |= GUC_WA_RCS_RESET_BEFORE_RC6; + + if (XE_WA(gt, 22012727170) || XE_WA(gt, 22012727685)) + flags |= GUC_WA_CONTEXT_ISOLATION; + + if ((XE_WA(gt, 16015675438) || XE_WA(gt, 18020744125)) && + !xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER)) + flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST; + + if (XE_WA(gt, 1509372804)) + flags |= GUC_WA_RENDER_RST_RC6_EXIT; + + return flags; +} + +static u32 guc_ctl_devid(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + + return (((u32)xe->info.devid) << 16) | xe->info.revid; +} + +static void guc_init_params(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + u32 *params = guc->params; + int i; + + BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32)); + BUILD_BUG_ON(GUC_CTL_MAX_DWORDS + 2 != SOFT_SCRATCH_COUNT); + + params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc); + params[GUC_CTL_FEATURE] = 0; + params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc); + params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc); + params[GUC_CTL_WA] = 0; + params[GUC_CTL_DEVID] = guc_ctl_devid(guc); + + for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) + drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]); +} + +static void guc_init_params_post_hwconfig(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + u32 *params = guc->params; + int i; + + BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32)); + BUILD_BUG_ON(GUC_CTL_MAX_DWORDS + 2 != SOFT_SCRATCH_COUNT); + + params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc); + params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc); + params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc); + params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc); + params[GUC_CTL_WA] = guc_ctl_wa_flags(guc); + params[GUC_CTL_DEVID] = guc_ctl_devid(guc); + + for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) + drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]); +} + +/* + * Initialize the GuC parameter block before starting the firmware + * transfer. These parameters are read by the firmware on startup + * and cannot be changed thereafter. + */ +static void guc_write_params(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + int i; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + xe_mmio_write32(gt, SOFT_SCRATCH(0), 0); + + for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) + xe_mmio_write32(gt, SOFT_SCRATCH(1 + i), guc->params[i]); +} + +static void guc_fini(struct drm_device *drm, void *arg) +{ + struct xe_guc *guc = arg; + + xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); + xe_guc_pc_fini(&guc->pc); + xe_uc_fini_hw(&guc_to_gt(guc)->uc); + xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); +} + +int xe_guc_init(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + guc->fw.type = XE_UC_FW_TYPE_GUC; + ret = xe_uc_fw_init(&guc->fw); + if (ret) + goto out; + + if (!xe_uc_fw_is_enabled(&guc->fw)) + return 0; + + ret = xe_guc_log_init(&guc->log); + if (ret) + goto out; + + ret = xe_guc_ads_init(&guc->ads); + if (ret) + goto out; + + ret = xe_guc_ct_init(&guc->ct); + if (ret) + goto out; + + ret = xe_guc_pc_init(&guc->pc); + if (ret) + goto out; + + ret = drmm_add_action_or_reset(>_to_xe(gt)->drm, guc_fini, guc); + if (ret) + goto out; + + guc_init_params(guc); + + if (xe_gt_is_media_type(gt)) + guc->notify_reg = MED_GUC_HOST_INTERRUPT; + else + guc->notify_reg = GUC_HOST_INTERRUPT; + + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); + + return 0; + +out: + drm_err(&xe->drm, "GuC init failed with %d", ret); + return ret; +} + +/** + * xe_guc_init_post_hwconfig - initialize GuC post hwconfig load + * @guc: The GuC object + * + * Return: 0 on success, negative error code on error. + */ +int xe_guc_init_post_hwconfig(struct xe_guc *guc) +{ + guc_init_params_post_hwconfig(guc); + + return xe_guc_ads_init_post_hwconfig(&guc->ads); +} + +int xe_guc_post_load_init(struct xe_guc *guc) +{ + xe_guc_ads_populate_post_load(&guc->ads); + guc->submission_state.enabled = true; + + return 0; +} + +int xe_guc_reset(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + u32 guc_status, gdrst; + int ret; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + xe_mmio_write32(gt, GDRST, GRDOM_GUC); + + ret = xe_mmio_wait32(gt, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false); + if (ret) { + drm_err(&xe->drm, "GuC reset timed out, GDRST=0x%8x\n", + gdrst); + goto err_out; + } + + guc_status = xe_mmio_read32(gt, GUC_STATUS); + if (!(guc_status & GS_MIA_IN_RESET)) { + drm_err(&xe->drm, + "GuC status: 0x%x, MIA core expected to be in reset\n", + guc_status); + ret = -EIO; + goto err_out; + } + + return 0; + +err_out: + + return ret; +} + +static void guc_prepare_xfer(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = guc_to_xe(guc); + u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC | + GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA | + GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA | + GUC_ENABLE_MIA_CLOCK_GATING; + + if (GRAPHICS_VERx100(xe) < 1250) + shim_flags |= GUC_DISABLE_SRAM_INIT_TO_ZEROES | + GUC_ENABLE_MIA_CACHING; + + if (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) + shim_flags |= REG_FIELD_PREP(GUC_MOCS_INDEX_MASK, gt->mocs.uc_index); + + /* Must program this register before loading the ucode with DMA */ + xe_mmio_write32(gt, GUC_SHIM_CONTROL, shim_flags); + + xe_mmio_write32(gt, GT_PM_CONFIG, GT_DOORBELL_ENABLE); +} + +/* + * Supporting MMIO & in memory RSA + */ +static int guc_xfer_rsa(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 rsa[UOS_RSA_SCRATCH_COUNT]; + size_t copied; + int i; + + if (guc->fw.rsa_size > 256) { + u32 rsa_ggtt_addr = xe_bo_ggtt_addr(guc->fw.bo) + + xe_uc_fw_rsa_offset(&guc->fw); + xe_mmio_write32(gt, UOS_RSA_SCRATCH(0), rsa_ggtt_addr); + return 0; + } + + copied = xe_uc_fw_copy_rsa(&guc->fw, rsa, sizeof(rsa)); + if (copied < sizeof(rsa)) + return -ENOMEM; + + for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++) + xe_mmio_write32(gt, UOS_RSA_SCRATCH(i), rsa[i]); + + return 0; +} + +static int guc_wait_ucode(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + u32 status; + int ret; + + /* + * Wait for the GuC to start up. + * NB: Docs recommend not using the interrupt for completion. + * Measurements indicate this should take no more than 20ms + * (assuming the GT clock is at maximum frequency). So, a + * timeout here indicates that the GuC has failed and is unusable. + * (Higher levels of the driver may decide to reset the GuC and + * attempt the ucode load again if this happens.) + * + * FIXME: There is a known (but exceedingly unlikely) race condition + * where the asynchronous frequency management code could reduce + * the GT clock while a GuC reload is in progress (during a full + * GT reset). A fix is in progress but there are complex locking + * issues to be resolved. In the meantime bump the timeout to + * 200ms. Even at slowest clock, this should be sufficient. And + * in the working case, a larger timeout makes no difference. + */ + ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS, GS_UKERNEL_MASK, + FIELD_PREP(GS_UKERNEL_MASK, XE_GUC_LOAD_STATUS_READY), + 200000, &status, false); + + if (ret) { + struct drm_device *drm = &xe->drm; + struct drm_printer p = drm_info_printer(drm->dev); + + drm_info(drm, "GuC load failed: status = 0x%08X\n", status); + drm_info(drm, "GuC load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n", + REG_FIELD_GET(GS_MIA_IN_RESET, status), + REG_FIELD_GET(GS_BOOTROM_MASK, status), + REG_FIELD_GET(GS_UKERNEL_MASK, status), + REG_FIELD_GET(GS_MIA_MASK, status), + REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); + + if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { + drm_info(drm, "GuC firmware signature verification failed\n"); + ret = -ENOEXEC; + } + + if (REG_FIELD_GET(GS_UKERNEL_MASK, status) == + XE_GUC_LOAD_STATUS_EXCEPTION) { + drm_info(drm, "GuC firmware exception. EIP: %#x\n", + xe_mmio_read32(guc_to_gt(guc), + SOFT_SCRATCH(13))); + ret = -ENXIO; + } + + xe_guc_log_print(&guc->log, &p); + } else { + drm_dbg(&xe->drm, "GuC successfully loaded"); + } + + return ret; +} + +static int __xe_guc_upload(struct xe_guc *guc) +{ + int ret; + + guc_write_params(guc); + guc_prepare_xfer(guc); + + /* + * Note that GuC needs the CSS header plus uKernel code to be copied + * by the DMA engine in one operation, whereas the RSA signature is + * loaded separately, either by copying it to the UOS_RSA_SCRATCH + * register (if key size <= 256) or through a ggtt-pinned vma (if key + * size > 256). The RSA size and therefore the way we provide it to the + * HW is fixed for each platform and hard-coded in the bootrom. + */ + ret = guc_xfer_rsa(guc); + if (ret) + goto out; + /* + * Current uCode expects the code to be loaded at 8k; locations below + * this are used for the stack. + */ + ret = xe_uc_fw_upload(&guc->fw, 0x2000, UOS_MOVE); + if (ret) + goto out; + + /* Wait for authentication */ + ret = guc_wait_ucode(guc); + if (ret) + goto out; + + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING); + return 0; + +out: + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOAD_FAIL); + return 0 /* FIXME: ret, don't want to stop load currently */; +} + +/** + * xe_guc_min_load_for_hwconfig - load minimal GuC and read hwconfig table + * @guc: The GuC object + * + * This function uploads a minimal GuC that does not support submissions but + * in a state where the hwconfig table can be read. Next, it reads and parses + * the hwconfig table so it can be used for subsequent steps in the driver load. + * Lastly, it enables CT communication (XXX: this is needed for PFs/VFs only). + * + * Return: 0 on success, negative error code on error. + */ +int xe_guc_min_load_for_hwconfig(struct xe_guc *guc) +{ + int ret; + + xe_guc_ads_populate_minimal(&guc->ads); + + ret = __xe_guc_upload(guc); + if (ret) + return ret; + + ret = xe_guc_hwconfig_init(guc); + if (ret) + return ret; + + ret = xe_guc_enable_communication(guc); + if (ret) + return ret; + + return 0; +} + +int xe_guc_upload(struct xe_guc *guc) +{ + xe_guc_ads_populate(&guc->ads); + + return __xe_guc_upload(guc); +} + +static void guc_handle_mmio_msg(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 msg; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + msg = xe_mmio_read32(gt, SOFT_SCRATCH(15)); + msg &= XE_GUC_RECV_MSG_EXCEPTION | + XE_GUC_RECV_MSG_CRASH_DUMP_POSTED; + xe_mmio_write32(gt, SOFT_SCRATCH(15), 0); + + if (msg & XE_GUC_RECV_MSG_CRASH_DUMP_POSTED) + drm_err(&guc_to_xe(guc)->drm, + "Received early GuC crash dump notification!\n"); + + if (msg & XE_GUC_RECV_MSG_EXCEPTION) + drm_err(&guc_to_xe(guc)->drm, + "Received early GuC exception notification!\n"); +} + +static void guc_enable_irq(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 events = xe_gt_is_media_type(gt) ? + REG_FIELD_PREP(ENGINE0_MASK, GUC_INTR_GUC2HOST) : + REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST); + + /* Primary GuC and media GuC share a single enable bit */ + xe_mmio_write32(gt, GUC_SG_INTR_ENABLE, + REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST)); + + /* + * There are separate mask bits for primary and media GuCs, so use + * a RMW operation to avoid clobbering the other GuC's setting. + */ + xe_mmio_rmw32(gt, GUC_SG_INTR_MASK, events, 0); +} + +int xe_guc_enable_communication(struct xe_guc *guc) +{ + int err; + + guc_enable_irq(guc); + + xe_mmio_rmw32(guc_to_gt(guc), PMINTRMSK, + ARAT_EXPIRED_INTRMSK, 0); + + err = xe_guc_ct_enable(&guc->ct); + if (err) + return err; + + guc_handle_mmio_msg(guc); + + return 0; +} + +int xe_guc_suspend(struct xe_guc *guc) +{ + int ret; + u32 action[] = { + XE_GUC_ACTION_CLIENT_SOFT_RESET, + }; + + ret = xe_guc_mmio_send(guc, action, ARRAY_SIZE(action)); + if (ret) { + drm_err(&guc_to_xe(guc)->drm, + "GuC suspend: CLIENT_SOFT_RESET fail: %d!\n", ret); + return ret; + } + + xe_guc_sanitize(guc); + return 0; +} + +void xe_guc_notify(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + const u32 default_notify_data = 0; + + /* + * Both GUC_HOST_INTERRUPT and MED_GUC_HOST_INTERRUPT can pass + * additional payload data to the GuC but this capability is not + * used by the firmware yet. Use default value in the meantime. + */ + xe_mmio_write32(gt, guc->notify_reg, default_notify_data); +} + +int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr) +{ + u32 action[] = { + XE_GUC_ACTION_AUTHENTICATE_HUC, + rsa_addr + }; + + return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); +} + +int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, + u32 len, u32 *response_buf) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + u32 header, reply; + struct xe_reg reply_reg = xe_gt_is_media_type(gt) ? + MED_VF_SW_FLAG(0) : VF_SW_FLAG(0); + const u32 LAST_INDEX = VF_SW_FLAG_COUNT - 1; + int ret; + int i; + + BUILD_BUG_ON(VF_SW_FLAG_COUNT != MED_VF_SW_FLAG_COUNT); + + xe_assert(xe, !guc->ct.enabled); + xe_assert(xe, len); + xe_assert(xe, len <= VF_SW_FLAG_COUNT); + xe_assert(xe, len <= MED_VF_SW_FLAG_COUNT); + xe_assert(xe, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) == + GUC_HXG_ORIGIN_HOST); + xe_assert(xe, FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) == + GUC_HXG_TYPE_REQUEST); + +retry: + /* Not in critical data-path, just do if else for GT type */ + if (xe_gt_is_media_type(gt)) { + for (i = 0; i < len; ++i) + xe_mmio_write32(gt, MED_VF_SW_FLAG(i), + request[i]); + xe_mmio_read32(gt, MED_VF_SW_FLAG(LAST_INDEX)); + } else { + for (i = 0; i < len; ++i) + xe_mmio_write32(gt, VF_SW_FLAG(i), + request[i]); + xe_mmio_read32(gt, VF_SW_FLAG(LAST_INDEX)); + } + + xe_guc_notify(guc); + + ret = xe_mmio_wait32(gt, reply_reg, GUC_HXG_MSG_0_ORIGIN, + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC), + 50000, &reply, false); + if (ret) { +timeout: + drm_err(&xe->drm, "mmio request %#x: no reply %#x\n", + request[0], reply); + return ret; + } + + header = xe_mmio_read32(gt, reply_reg); + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == + GUC_HXG_TYPE_NO_RESPONSE_BUSY) { + /* + * Once we got a BUSY reply we must wait again for the final + * response but this time we can't use ORIGIN mask anymore. + * To spot a right change in the reply, we take advantage that + * response SUCCESS and FAILURE differ only by the single bit + * and all other bits are set and can be used as a new mask. + */ + u32 resp_bits = GUC_HXG_TYPE_RESPONSE_SUCCESS & GUC_HXG_TYPE_RESPONSE_FAILURE; + u32 resp_mask = FIELD_PREP(GUC_HXG_MSG_0_TYPE, resp_bits); + + BUILD_BUG_ON(FIELD_MAX(GUC_HXG_MSG_0_TYPE) != GUC_HXG_TYPE_RESPONSE_SUCCESS); + BUILD_BUG_ON((GUC_HXG_TYPE_RESPONSE_SUCCESS ^ GUC_HXG_TYPE_RESPONSE_FAILURE) != 1); + + ret = xe_mmio_wait32(gt, reply_reg, resp_mask, resp_mask, + 1000000, &header, false); + + if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != + GUC_HXG_ORIGIN_GUC)) + goto proto; + if (unlikely(ret)) + goto timeout; + } + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == + GUC_HXG_TYPE_NO_RESPONSE_RETRY) { + u32 reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, header); + + drm_dbg(&xe->drm, "mmio request %#x: retrying, reason %#x\n", + request[0], reason); + goto retry; + } + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == + GUC_HXG_TYPE_RESPONSE_FAILURE) { + u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header); + u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header); + + drm_err(&xe->drm, "mmio request %#x: failure %#x/%#x\n", + request[0], error, hint); + return -ENXIO; + } + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != + GUC_HXG_TYPE_RESPONSE_SUCCESS) { +proto: + drm_err(&xe->drm, "mmio request %#x: unexpected reply %#x\n", + request[0], header); + return -EPROTO; + } + + /* Just copy entire possible message response */ + if (response_buf) { + response_buf[0] = header; + + for (i = 1; i < VF_SW_FLAG_COUNT; i++) { + reply_reg.addr += sizeof(u32); + response_buf[i] = xe_mmio_read32(gt, reply_reg); + } + } + + /* Use data from the GuC response as our return value */ + return FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, header); +} + +int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len) +{ + return xe_guc_mmio_send_recv(guc, request, len, NULL); +} + +static int guc_self_cfg(struct xe_guc *guc, u16 key, u16 len, u64 val) +{ + struct xe_device *xe = guc_to_xe(guc); + u32 request[HOST2GUC_SELF_CFG_REQUEST_MSG_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, + GUC_ACTION_HOST2GUC_SELF_CFG), + FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY, key) | + FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN, len), + FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32, + lower_32_bits(val)), + FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64, + upper_32_bits(val)), + }; + int ret; + + xe_assert(xe, len <= 2); + xe_assert(xe, len != 1 || !upper_32_bits(val)); + + /* Self config must go over MMIO */ + ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request)); + + if (unlikely(ret < 0)) + return ret; + if (unlikely(ret > 1)) + return -EPROTO; + if (unlikely(!ret)) + return -ENOKEY; + + return 0; +} + +int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val) +{ + return guc_self_cfg(guc, key, 1, val); +} + +int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val) +{ + return guc_self_cfg(guc, key, 2, val); +} + +void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir) +{ + if (iir & GUC_INTR_GUC2HOST) + xe_guc_ct_irq_handler(&guc->ct); +} + +void xe_guc_sanitize(struct xe_guc *guc) +{ + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); + xe_guc_ct_disable(&guc->ct); + guc->submission_state.enabled = false; +} + +int xe_guc_reset_prepare(struct xe_guc *guc) +{ + return xe_guc_submit_reset_prepare(guc); +} + +void xe_guc_reset_wait(struct xe_guc *guc) +{ + xe_guc_submit_reset_wait(guc); +} + +void xe_guc_stop_prepare(struct xe_guc *guc) +{ + XE_WARN_ON(xe_guc_pc_stop(&guc->pc)); +} + +int xe_guc_stop(struct xe_guc *guc) +{ + int ret; + + xe_guc_ct_disable(&guc->ct); + + ret = xe_guc_submit_stop(guc); + if (ret) + return ret; + + return 0; +} + +int xe_guc_start(struct xe_guc *guc) +{ + int ret; + + ret = xe_guc_pc_start(&guc->pc); + XE_WARN_ON(ret); + + return xe_guc_submit_start(guc); +} + +void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 status; + int err; + int i; + + xe_uc_fw_print(&guc->fw, p); + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return; + + status = xe_mmio_read32(gt, GUC_STATUS); + + drm_printf(p, "\nGuC status 0x%08x:\n", status); + drm_printf(p, "\tBootrom status = 0x%x\n", + REG_FIELD_GET(GS_BOOTROM_MASK, status)); + drm_printf(p, "\tuKernel status = 0x%x\n", + REG_FIELD_GET(GS_UKERNEL_MASK, status)); + drm_printf(p, "\tMIA Core status = 0x%x\n", + REG_FIELD_GET(GS_MIA_MASK, status)); + drm_printf(p, "\tLog level = %d\n", + xe_guc_log_get_level(&guc->log)); + + drm_puts(p, "\nScratch registers:\n"); + for (i = 0; i < SOFT_SCRATCH_COUNT; i++) { + drm_printf(p, "\t%2d: \t0x%x\n", + i, xe_mmio_read32(gt, SOFT_SCRATCH(i))); + } + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + + xe_guc_ct_print(&guc->ct, p, false); + xe_guc_submit_print(guc, p); +} + +/** + * xe_guc_in_reset() - Detect if GuC MIA is in reset. + * @guc: The GuC object + * + * This function detects runtime resume from d3cold by leveraging + * GUC_STATUS, GUC doesn't get reset during d3hot, + * it strictly to be called from RPM resume handler. + * + * Return: true if failed to get forcewake or GuC MIA is in Reset, + * otherwise false. + */ +bool xe_guc_in_reset(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 status; + int err; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return true; + + status = xe_mmio_read32(gt, GUC_STATUS); + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + + return status & GS_MIA_IN_RESET; +} diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h new file mode 100644 index 000000000000..d3e49e7fd7c3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_H_ +#define _XE_GUC_H_ + +#include "xe_gt.h" +#include "xe_guc_types.h" +#include "xe_hw_engine_types.h" +#include "xe_macros.h" + +struct drm_printer; + +int xe_guc_init(struct xe_guc *guc); +int xe_guc_init_post_hwconfig(struct xe_guc *guc); +int xe_guc_post_load_init(struct xe_guc *guc); +int xe_guc_reset(struct xe_guc *guc); +int xe_guc_upload(struct xe_guc *guc); +int xe_guc_min_load_for_hwconfig(struct xe_guc *guc); +int xe_guc_enable_communication(struct xe_guc *guc); +int xe_guc_suspend(struct xe_guc *guc); +void xe_guc_notify(struct xe_guc *guc); +int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr); +int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len); +int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, u32 len, + u32 *response_buf); +int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val); +int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val); +void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir); +void xe_guc_sanitize(struct xe_guc *guc); +void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p); +int xe_guc_reset_prepare(struct xe_guc *guc); +void xe_guc_reset_wait(struct xe_guc *guc); +void xe_guc_stop_prepare(struct xe_guc *guc); +int xe_guc_stop(struct xe_guc *guc); +int xe_guc_start(struct xe_guc *guc); +bool xe_guc_in_reset(struct xe_guc *guc); + +static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class) +{ + switch (class) { + case XE_ENGINE_CLASS_RENDER: + return GUC_RENDER_CLASS; + case XE_ENGINE_CLASS_VIDEO_DECODE: + return GUC_VIDEO_CLASS; + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + return GUC_VIDEOENHANCE_CLASS; + case XE_ENGINE_CLASS_COPY: + return GUC_BLITTER_CLASS; + case XE_ENGINE_CLASS_COMPUTE: + return GUC_COMPUTE_CLASS; + case XE_ENGINE_CLASS_OTHER: + return GUC_GSC_OTHER_CLASS; + default: + XE_WARN_ON(class); + return -1; + } +} + +static inline struct xe_gt *guc_to_gt(struct xe_guc *guc) +{ + return container_of(guc, struct xe_gt, uc.guc); +} + +static inline struct xe_device *guc_to_xe(struct xe_guc *guc) +{ + return gt_to_xe(guc_to_gt(guc)); +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c new file mode 100644 index 000000000000..390e6f1bf4e1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -0,0 +1,672 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_guc_ads.h" + +#include <drm/drm_managed.h> + +#include "regs/xe_engine_regs.h" +#include "regs/xe_gt_regs.h" +#include "regs/xe_guc_regs.h" +#include "xe_bo.h" +#include "xe_gt.h" +#include "xe_gt_ccs_mode.h" +#include "xe_guc.h" +#include "xe_hw_engine.h" +#include "xe_lrc.h" +#include "xe_map.h" +#include "xe_mmio.h" +#include "xe_platform_types.h" + +/* Slack of a few additional entries per engine */ +#define ADS_REGSET_EXTRA_MAX 8 + +static struct xe_guc * +ads_to_guc(struct xe_guc_ads *ads) +{ + return container_of(ads, struct xe_guc, ads); +} + +static struct xe_gt * +ads_to_gt(struct xe_guc_ads *ads) +{ + return container_of(ads, struct xe_gt, uc.guc.ads); +} + +static struct xe_device * +ads_to_xe(struct xe_guc_ads *ads) +{ + return gt_to_xe(ads_to_gt(ads)); +} + +static struct iosys_map * +ads_to_map(struct xe_guc_ads *ads) +{ + return &ads->bo->vmap; +} + +/* UM Queue parameters: */ +#define GUC_UM_QUEUE_SIZE (SZ_64K) +#define GUC_PAGE_RES_TIMEOUT_US (-1) + +/* + * The Additional Data Struct (ADS) has pointers for different buffers used by + * the GuC. One single gem object contains the ADS struct itself (guc_ads) and + * all the extra buffers indirectly linked via the ADS struct's entries. + * + * Layout of the ADS blob allocated for the GuC: + * + * +---------------------------------------+ <== base + * | guc_ads | + * +---------------------------------------+ + * | guc_policies | + * +---------------------------------------+ + * | guc_gt_system_info | + * +---------------------------------------+ + * | guc_engine_usage | + * +---------------------------------------+ + * | guc_um_init_params | + * +---------------------------------------+ <== static + * | guc_mmio_reg[countA] (engine 0.0) | + * | guc_mmio_reg[countB] (engine 0.1) | + * | guc_mmio_reg[countC] (engine 1.0) | + * | ... | + * +---------------------------------------+ <== dynamic + * | padding | + * +---------------------------------------+ <== 4K aligned + * | golden contexts | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned + * | capture lists | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned + * | UM queues | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned + * | private data | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned + */ +struct __guc_ads_blob { + struct guc_ads ads; + struct guc_policies policies; + struct guc_gt_system_info system_info; + struct guc_engine_usage engine_usage; + struct guc_um_init_params um_init_params; + /* From here on, location is dynamic! Refer to above diagram. */ + struct guc_mmio_reg regset[0]; +} __packed; + +#define ads_blob_read(ads_, field_) \ + xe_map_rd_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \ + struct __guc_ads_blob, field_) + +#define ads_blob_write(ads_, field_, val_) \ + xe_map_wr_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \ + struct __guc_ads_blob, field_, val_) + +#define info_map_write(xe_, map_, field_, val_) \ + xe_map_wr_field(xe_, map_, 0, struct guc_gt_system_info, field_, val_) + +#define info_map_read(xe_, map_, field_) \ + xe_map_rd_field(xe_, map_, 0, struct guc_gt_system_info, field_) + +static size_t guc_ads_regset_size(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + + xe_assert(xe, ads->regset_size); + + return ads->regset_size; +} + +static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads) +{ + return PAGE_ALIGN(ads->golden_lrc_size); +} + +static size_t guc_ads_capture_size(struct xe_guc_ads *ads) +{ + /* FIXME: Allocate a proper capture list */ + return PAGE_ALIGN(PAGE_SIZE); +} + +static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + + if (!xe->info.has_usm) + return 0; + + return GUC_UM_QUEUE_SIZE * GUC_UM_HW_QUEUE_MAX; +} + +static size_t guc_ads_private_data_size(struct xe_guc_ads *ads) +{ + return PAGE_ALIGN(ads_to_guc(ads)->fw.private_data_size); +} + +static size_t guc_ads_regset_offset(struct xe_guc_ads *ads) +{ + return offsetof(struct __guc_ads_blob, regset); +} + +static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads) +{ + size_t offset; + + offset = guc_ads_regset_offset(ads) + + guc_ads_regset_size(ads); + + return PAGE_ALIGN(offset); +} + +static size_t guc_ads_capture_offset(struct xe_guc_ads *ads) +{ + size_t offset; + + offset = guc_ads_golden_lrc_offset(ads) + + guc_ads_golden_lrc_size(ads); + + return PAGE_ALIGN(offset); +} + +static size_t guc_ads_um_queues_offset(struct xe_guc_ads *ads) +{ + u32 offset; + + offset = guc_ads_capture_offset(ads) + + guc_ads_capture_size(ads); + + return PAGE_ALIGN(offset); +} + +static size_t guc_ads_private_data_offset(struct xe_guc_ads *ads) +{ + size_t offset; + + offset = guc_ads_um_queues_offset(ads) + + guc_ads_um_queues_size(ads); + + return PAGE_ALIGN(offset); +} + +static size_t guc_ads_size(struct xe_guc_ads *ads) +{ + return guc_ads_private_data_offset(ads) + + guc_ads_private_data_size(ads); +} + +static bool needs_wa_1607983814(struct xe_device *xe) +{ + return GRAPHICS_VERx100(xe) < 1250; +} + +static size_t calculate_regset_size(struct xe_gt *gt) +{ + struct xe_reg_sr_entry *sr_entry; + unsigned long sr_idx; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + unsigned int count = 0; + + for_each_hw_engine(hwe, gt, id) + xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry) + count++; + + count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES; + + if (needs_wa_1607983814(gt_to_xe(gt))) + count += LNCFCMOCS_REG_COUNT; + + return count * sizeof(struct guc_mmio_reg); +} + +static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + u32 mask = 0; + + for_each_hw_engine(hwe, gt, id) + if (hwe->class == class) + mask |= BIT(hwe->instance); + + return mask; +} + +static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + struct xe_gt *gt = ads_to_gt(ads); + size_t total_size = 0, alloc_size, real_size; + int class; + + for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { + if (!engine_enable_mask(gt, class)) + continue; + + real_size = xe_lrc_size(xe, class); + alloc_size = PAGE_ALIGN(real_size); + total_size += alloc_size; + } + + return total_size; +} + +#define MAX_GOLDEN_LRC_SIZE (SZ_4K * 64) + +int xe_guc_ads_init(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + struct xe_gt *gt = ads_to_gt(ads); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_bo *bo; + + ads->golden_lrc_size = calculate_golden_lrc_size(ads); + ads->regset_size = calculate_regset_size(gt); + + bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE, + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + ads->bo = bo; + + return 0; +} + +/** + * xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load + * @ads: Additional data structures object + * + * Recalcuate golden_lrc_size & regset_size as the number hardware engines may + * have changed after the hwconfig was loaded. Also verify the new sizes fit in + * the already allocated ADS buffer object. + * + * Return: 0 on success, negative error code on error. + */ +int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads) +{ + struct xe_gt *gt = ads_to_gt(ads); + u32 prev_regset_size = ads->regset_size; + + xe_gt_assert(gt, ads->bo); + + ads->golden_lrc_size = calculate_golden_lrc_size(ads); + ads->regset_size = calculate_regset_size(gt); + + xe_gt_assert(gt, ads->golden_lrc_size + + (ads->regset_size - prev_regset_size) <= + MAX_GOLDEN_LRC_SIZE); + + return 0; +} + +static void guc_policies_init(struct xe_guc_ads *ads) +{ + ads_blob_write(ads, policies.dpc_promote_time, + GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US); + ads_blob_write(ads, policies.max_num_work_items, + GLOBAL_POLICY_MAX_NUM_WI); + ads_blob_write(ads, policies.global_flags, 0); + ads_blob_write(ads, policies.is_valid, 1); +} + +static void fill_engine_enable_masks(struct xe_gt *gt, + struct iosys_map *info_map) +{ + struct xe_device *xe = gt_to_xe(gt); + + info_map_write(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS], + engine_enable_mask(gt, XE_ENGINE_CLASS_RENDER)); + info_map_write(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS], + engine_enable_mask(gt, XE_ENGINE_CLASS_COPY)); + info_map_write(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS], + engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_DECODE)); + info_map_write(xe, info_map, + engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], + engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE)); + info_map_write(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], + engine_enable_mask(gt, XE_ENGINE_CLASS_COMPUTE)); + info_map_write(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS], + engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER)); +} + +static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), + offsetof(struct __guc_ads_blob, system_info)); + u8 guc_class; + + for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) { + if (!info_map_read(xe, &info_map, + engine_enabled_masks[guc_class])) + continue; + + ads_blob_write(ads, ads.eng_state_size[guc_class], + guc_ads_golden_lrc_size(ads) - + xe_lrc_skip_size(xe)); + ads_blob_write(ads, ads.golden_context_lrca[guc_class], + xe_bo_ggtt_addr(ads->bo) + + guc_ads_golden_lrc_offset(ads)); + } +} + +static void guc_mapping_table_init_invalid(struct xe_gt *gt, + struct iosys_map *info_map) +{ + struct xe_device *xe = gt_to_xe(gt); + unsigned int i, j; + + /* Table must be set to invalid values for entries not used */ + for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i) + for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j) + info_map_write(xe, info_map, mapping_table[i][j], + GUC_MAX_INSTANCES_PER_CLASS); +} + +static void guc_mapping_table_init(struct xe_gt *gt, + struct iosys_map *info_map) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + guc_mapping_table_init_invalid(gt, info_map); + + for_each_hw_engine(hwe, gt, id) { + u8 guc_class; + + guc_class = xe_engine_class_to_guc_class(hwe->class); + info_map_write(xe, info_map, + mapping_table[guc_class][hwe->logical_instance], + hwe->instance); + } +} + +static void guc_capture_list_init(struct xe_guc_ads *ads) +{ + int i, j; + u32 addr = xe_bo_ggtt_addr(ads->bo) + guc_ads_capture_offset(ads); + + /* FIXME: Populate a proper capture list */ + for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) { + for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) { + ads_blob_write(ads, ads.capture_instance[i][j], addr); + ads_blob_write(ads, ads.capture_class[i][j], addr); + } + + ads_blob_write(ads, ads.capture_global[i], addr); + } +} + +static void guc_mmio_regset_write_one(struct xe_guc_ads *ads, + struct iosys_map *regset_map, + struct xe_reg reg, + unsigned int n_entry) +{ + struct guc_mmio_reg entry = { + .offset = reg.addr, + .flags = reg.masked ? GUC_REGSET_MASKED : 0, + }; + + xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry), + &entry, sizeof(entry)); +} + +static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, + struct iosys_map *regset_map, + struct xe_hw_engine *hwe) +{ + struct xe_device *xe = ads_to_xe(ads); + struct xe_hw_engine *hwe_rcs_reset_domain = + xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER); + struct xe_reg_sr_entry *entry; + unsigned long idx; + unsigned int count = 0; + const struct { + struct xe_reg reg; + bool skip; + } *e, extra_regs[] = { + { .reg = RING_MODE(hwe->mmio_base), }, + { .reg = RING_HWS_PGA(hwe->mmio_base), }, + { .reg = RING_IMR(hwe->mmio_base), }, + { .reg = RCU_MODE, .skip = hwe != hwe_rcs_reset_domain }, + { .reg = CCS_MODE, + .skip = hwe != hwe_rcs_reset_domain || !xe_gt_ccs_mode_enabled(hwe->gt) }, + }; + u32 i; + + BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX); + + xa_for_each(&hwe->reg_sr.xa, idx, entry) + guc_mmio_regset_write_one(ads, regset_map, entry->reg, count++); + + for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) { + if (e->skip) + continue; + + guc_mmio_regset_write_one(ads, regset_map, e->reg, count++); + } + + /* Wa_1607983814 */ + if (needs_wa_1607983814(xe) && hwe->class == XE_ENGINE_CLASS_RENDER) { + for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) { + guc_mmio_regset_write_one(ads, regset_map, + XELP_LNCFCMOCS(i), count++); + } + } + + return count; +} + +static void guc_mmio_reg_state_init(struct xe_guc_ads *ads) +{ + size_t regset_offset = guc_ads_regset_offset(ads); + struct xe_gt *gt = ads_to_gt(ads); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + u32 addr = xe_bo_ggtt_addr(ads->bo) + regset_offset; + struct iosys_map regset_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), + regset_offset); + unsigned int regset_used = 0; + + for_each_hw_engine(hwe, gt, id) { + unsigned int count; + u8 gc; + + /* + * 1. Write all MMIO entries for this exec queue to the table. No + * need to worry about fused-off engines and when there are + * entries in the regset: the reg_state_list has been zero'ed + * by xe_guc_ads_populate() + */ + count = guc_mmio_regset_write(ads, ®set_map, hwe); + if (!count) + continue; + + /* + * 2. Record in the header (ads.reg_state_list) the address + * location and number of entries + */ + gc = xe_engine_class_to_guc_class(hwe->class); + ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].address, addr); + ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].count, count); + + addr += count * sizeof(struct guc_mmio_reg); + iosys_map_incr(®set_map, count * sizeof(struct guc_mmio_reg)); + + regset_used += count * sizeof(struct guc_mmio_reg); + } + + xe_gt_assert(gt, regset_used <= ads->regset_size); +} + +static void guc_um_init_params(struct xe_guc_ads *ads) +{ + u32 um_queue_offset = guc_ads_um_queues_offset(ads); + u64 base_dpa; + u32 base_ggtt; + int i; + + base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset; + base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset; + + for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) { + ads_blob_write(ads, um_init_params.queue_params[i].base_dpa, + base_dpa + (i * GUC_UM_QUEUE_SIZE)); + ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address, + base_ggtt + (i * GUC_UM_QUEUE_SIZE)); + ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes, + GUC_UM_QUEUE_SIZE); + } + + ads_blob_write(ads, um_init_params.page_response_timeout_in_us, + GUC_PAGE_RES_TIMEOUT_US); +} + +static void guc_doorbell_init(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + struct xe_gt *gt = ads_to_gt(ads); + + if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) { + u32 distdbreg = + xe_mmio_read32(gt, DIST_DBS_POPULATED); + + ads_blob_write(ads, + system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI], + REG_FIELD_GET(DOORBELLS_PER_SQIDI_MASK, distdbreg) + 1); + } +} + +/** + * xe_guc_ads_populate_minimal - populate minimal ADS + * @ads: Additional data structures object + * + * This function populates a minimal ADS that does not support submissions but + * enough so the GuC can load and the hwconfig table can be read. + */ +void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads) +{ + struct xe_gt *gt = ads_to_gt(ads); + struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), + offsetof(struct __guc_ads_blob, system_info)); + u32 base = xe_bo_ggtt_addr(ads->bo); + + xe_gt_assert(gt, ads->bo); + + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); + guc_policies_init(ads); + guc_prep_golden_lrc_null(ads); + guc_mapping_table_init_invalid(gt, &info_map); + guc_doorbell_init(ads); + + ads_blob_write(ads, ads.scheduler_policies, base + + offsetof(struct __guc_ads_blob, policies)); + ads_blob_write(ads, ads.gt_system_info, base + + offsetof(struct __guc_ads_blob, system_info)); + ads_blob_write(ads, ads.private_data, base + + guc_ads_private_data_offset(ads)); +} + +void xe_guc_ads_populate(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + struct xe_gt *gt = ads_to_gt(ads); + struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), + offsetof(struct __guc_ads_blob, system_info)); + u32 base = xe_bo_ggtt_addr(ads->bo); + + xe_gt_assert(gt, ads->bo); + + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); + guc_policies_init(ads); + fill_engine_enable_masks(gt, &info_map); + guc_mmio_reg_state_init(ads); + guc_prep_golden_lrc_null(ads); + guc_mapping_table_init(gt, &info_map); + guc_capture_list_init(ads); + guc_doorbell_init(ads); + + if (xe->info.has_usm) { + guc_um_init_params(ads); + ads_blob_write(ads, ads.um_init_data, base + + offsetof(struct __guc_ads_blob, um_init_params)); + } + + ads_blob_write(ads, ads.scheduler_policies, base + + offsetof(struct __guc_ads_blob, policies)); + ads_blob_write(ads, ads.gt_system_info, base + + offsetof(struct __guc_ads_blob, system_info)); + ads_blob_write(ads, ads.private_data, base + + guc_ads_private_data_offset(ads)); +} + +static void guc_populate_golden_lrc(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + struct xe_gt *gt = ads_to_gt(ads); + struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), + offsetof(struct __guc_ads_blob, system_info)); + size_t total_size = 0, alloc_size, real_size; + u32 addr_ggtt, offset; + int class; + + offset = guc_ads_golden_lrc_offset(ads); + addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; + + for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { + u8 guc_class; + + guc_class = xe_engine_class_to_guc_class(class); + + if (!info_map_read(xe, &info_map, + engine_enabled_masks[guc_class])) + continue; + + xe_gt_assert(gt, gt->default_lrc[class]); + + real_size = xe_lrc_size(xe, class); + alloc_size = PAGE_ALIGN(real_size); + total_size += alloc_size; + + /* + * This interface is slightly confusing. We need to pass the + * base address of the full golden context and the size of just + * the engine state, which is the section of the context image + * that starts after the execlists LRC registers. This is + * required to allow the GuC to restore just the engine state + * when a watchdog reset occurs. + * We calculate the engine state size by removing the size of + * what comes before it in the context image (which is identical + * on all engines). + */ + ads_blob_write(ads, ads.eng_state_size[guc_class], + real_size - xe_lrc_skip_size(xe)); + ads_blob_write(ads, ads.golden_context_lrca[guc_class], + addr_ggtt); + + xe_map_memcpy_to(xe, ads_to_map(ads), offset, + gt->default_lrc[class], real_size); + + addr_ggtt += alloc_size; + offset += alloc_size; + } + + xe_gt_assert(gt, total_size == ads->golden_lrc_size); +} + +void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads) +{ + guc_populate_golden_lrc(ads); +} diff --git a/drivers/gpu/drm/xe/xe_guc_ads.h b/drivers/gpu/drm/xe/xe_guc_ads.h new file mode 100644 index 000000000000..138ef6267671 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_ads.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_ADS_H_ +#define _XE_GUC_ADS_H_ + +#include "xe_guc_ads_types.h" + +int xe_guc_ads_init(struct xe_guc_ads *ads); +int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads); +void xe_guc_ads_populate(struct xe_guc_ads *ads); +void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads); +void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_ads_types.h b/drivers/gpu/drm/xe/xe_guc_ads_types.h new file mode 100644 index 000000000000..4afe44bece4b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_ads_types.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_ADS_TYPES_H_ +#define _XE_GUC_ADS_TYPES_H_ + +#include <linux/types.h> + +struct xe_bo; + +/** + * struct xe_guc_ads - GuC additional data structures (ADS) + */ +struct xe_guc_ads { + /** @bo: XE BO for GuC ads blob */ + struct xe_bo *bo; + /** @golden_lrc_size: golden LRC size */ + size_t golden_lrc_size; + /** @regset_size: size of register set passed to GuC for save/restore */ + u32 regset_size; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c new file mode 100644 index 000000000000..24a33fa36496 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -0,0 +1,1320 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_guc_ct.h" + +#include <linux/bitfield.h> +#include <linux/circ_buf.h> +#include <linux/delay.h> + +#include <drm/drm_managed.h> + +#include "abi/guc_actions_abi.h" +#include "abi/guc_klvs_abi.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_pagefault.h" +#include "xe_gt_tlb_invalidation.h" +#include "xe_guc.h" +#include "xe_guc_submit.h" +#include "xe_map.h" +#include "xe_pm.h" +#include "xe_trace.h" + +/* Used when a CT send wants to block and / or receive data */ +struct g2h_fence { + u32 *response_buffer; + u32 seqno; + u16 response_len; + u16 error; + u16 hint; + u16 reason; + bool retry; + bool fail; + bool done; +}; + +static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer) +{ + g2h_fence->response_buffer = response_buffer; + g2h_fence->response_len = 0; + g2h_fence->fail = false; + g2h_fence->retry = false; + g2h_fence->done = false; + g2h_fence->seqno = ~0x0; +} + +static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence) +{ + return g2h_fence->seqno == ~0x0; +} + +static struct xe_guc * +ct_to_guc(struct xe_guc_ct *ct) +{ + return container_of(ct, struct xe_guc, ct); +} + +static struct xe_gt * +ct_to_gt(struct xe_guc_ct *ct) +{ + return container_of(ct, struct xe_gt, uc.guc.ct); +} + +static struct xe_device * +ct_to_xe(struct xe_guc_ct *ct) +{ + return gt_to_xe(ct_to_gt(ct)); +} + +/** + * DOC: GuC CTB Blob + * + * We allocate single blob to hold both CTB descriptors and buffers: + * + * +--------+-----------------------------------------------+------+ + * | offset | contents | size | + * +========+===============================================+======+ + * | 0x0000 | H2G CTB Descriptor (send) | | + * +--------+-----------------------------------------------+ 4K | + * | 0x0800 | G2H CTB Descriptor (g2h) | | + * +--------+-----------------------------------------------+------+ + * | 0x1000 | H2G CT Buffer (send) | n*4K | + * | | | | + * +--------+-----------------------------------------------+------+ + * | 0x1000 | G2H CT Buffer (g2h) | m*4K | + * | + n*4K | | | + * +--------+-----------------------------------------------+------+ + * + * Size of each ``CT Buffer`` must be multiple of 4K. + * We don't expect too many messages in flight at any time, unless we are + * using the GuC submission. In that case each request requires a minimum + * 2 dwords which gives us a maximum 256 queue'd requests. Hopefully this + * enough space to avoid backpressure on the driver. We increase the size + * of the receive buffer (relative to the send) to ensure a G2H response + * CTB has a landing spot. + */ + +#define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) +#define CTB_H2G_BUFFER_SIZE (SZ_4K) +#define CTB_G2H_BUFFER_SIZE (4 * CTB_H2G_BUFFER_SIZE) +#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 4) + +static size_t guc_ct_size(void) +{ + return 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE + + CTB_G2H_BUFFER_SIZE; +} + +static void guc_ct_fini(struct drm_device *drm, void *arg) +{ + struct xe_guc_ct *ct = arg; + + xa_destroy(&ct->fence_lookup); +} + +static void g2h_worker_func(struct work_struct *w); + +static void primelockdep(struct xe_guc_ct *ct) +{ + if (!IS_ENABLED(CONFIG_LOCKDEP)) + return; + + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&ct->lock); + fs_reclaim_release(GFP_KERNEL); +} + +int xe_guc_ct_init(struct xe_guc_ct *ct) +{ + struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_bo *bo; + int err; + + xe_assert(xe, !(guc_ct_size() % PAGE_SIZE)); + + drmm_mutex_init(&xe->drm, &ct->lock); + spin_lock_init(&ct->fast_lock); + xa_init(&ct->fence_lookup); + INIT_WORK(&ct->g2h_worker, g2h_worker_func); + init_waitqueue_head(&ct->wq); + init_waitqueue_head(&ct->g2h_fence_wq); + + primelockdep(ct); + + bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(), + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + ct->bo = bo; + + err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct); + if (err) + return err; + + return 0; +} + +#define desc_read(xe_, guc_ctb__, field_) \ + xe_map_rd_field(xe_, &guc_ctb__->desc, 0, \ + struct guc_ct_buffer_desc, field_) + +#define desc_write(xe_, guc_ctb__, field_, val_) \ + xe_map_wr_field(xe_, &guc_ctb__->desc, 0, \ + struct guc_ct_buffer_desc, field_, val_) + +static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g, + struct iosys_map *map) +{ + h2g->info.size = CTB_H2G_BUFFER_SIZE / sizeof(u32); + h2g->info.resv_space = 0; + h2g->info.tail = 0; + h2g->info.head = 0; + h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head, + h2g->info.size) - + h2g->info.resv_space; + h2g->info.broken = false; + + h2g->desc = *map; + xe_map_memset(xe, &h2g->desc, 0, 0, sizeof(struct guc_ct_buffer_desc)); + + h2g->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2); +} + +static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h, + struct iosys_map *map) +{ + g2h->info.size = CTB_G2H_BUFFER_SIZE / sizeof(u32); + g2h->info.resv_space = G2H_ROOM_BUFFER_SIZE / sizeof(u32); + g2h->info.head = 0; + g2h->info.tail = 0; + g2h->info.space = CIRC_SPACE(g2h->info.tail, g2h->info.head, + g2h->info.size) - + g2h->info.resv_space; + g2h->info.broken = false; + + g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE); + xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc)); + + g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2 + + CTB_H2G_BUFFER_SIZE); +} + +static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct) +{ + struct xe_guc *guc = ct_to_guc(ct); + u32 desc_addr, ctb_addr, size; + int err; + + desc_addr = xe_bo_ggtt_addr(ct->bo); + ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2; + size = ct->ctbs.h2g.info.size * sizeof(u32); + + err = xe_guc_self_cfg64(guc, + GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY, + desc_addr); + if (err) + return err; + + err = xe_guc_self_cfg64(guc, + GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY, + ctb_addr); + if (err) + return err; + + return xe_guc_self_cfg32(guc, + GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY, + size); +} + +static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct) +{ + struct xe_guc *guc = ct_to_guc(ct); + u32 desc_addr, ctb_addr, size; + int err; + + desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE; + ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2 + + CTB_H2G_BUFFER_SIZE; + size = ct->ctbs.g2h.info.size * sizeof(u32); + + err = xe_guc_self_cfg64(guc, + GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY, + desc_addr); + if (err) + return err; + + err = xe_guc_self_cfg64(guc, + GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY, + ctb_addr); + if (err) + return err; + + return xe_guc_self_cfg32(guc, + GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY, + size); +} + +static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable) +{ + u32 request[HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, + GUC_ACTION_HOST2GUC_CONTROL_CTB), + FIELD_PREP(HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL, + enable ? GUC_CTB_CONTROL_ENABLE : + GUC_CTB_CONTROL_DISABLE), + }; + int ret = xe_guc_mmio_send(ct_to_guc(ct), request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + +int xe_guc_ct_enable(struct xe_guc_ct *ct) +{ + struct xe_device *xe = ct_to_xe(ct); + int err; + + xe_assert(xe, !ct->enabled); + + guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); + guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); + + err = guc_ct_ctb_h2g_register(ct); + if (err) + goto err_out; + + err = guc_ct_ctb_g2h_register(ct); + if (err) + goto err_out; + + err = guc_ct_control_toggle(ct, true); + if (err) + goto err_out; + + mutex_lock(&ct->lock); + spin_lock_irq(&ct->fast_lock); + ct->g2h_outstanding = 0; + ct->enabled = true; + spin_unlock_irq(&ct->fast_lock); + mutex_unlock(&ct->lock); + + smp_mb(); + wake_up_all(&ct->wq); + drm_dbg(&xe->drm, "GuC CT communication channel enabled\n"); + + return 0; + +err_out: + drm_err(&xe->drm, "Failed to enable CT (%d)\n", err); + + return err; +} + +void xe_guc_ct_disable(struct xe_guc_ct *ct) +{ + mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */ + spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */ + ct->enabled = false; /* Finally disable CT communication */ + spin_unlock_irq(&ct->fast_lock); + mutex_unlock(&ct->lock); + + xa_destroy(&ct->fence_lookup); +} + +static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len) +{ + struct guc_ctb *h2g = &ct->ctbs.h2g; + + lockdep_assert_held(&ct->lock); + + if (cmd_len > h2g->info.space) { + h2g->info.head = desc_read(ct_to_xe(ct), h2g, head); + h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head, + h2g->info.size) - + h2g->info.resv_space; + if (cmd_len > h2g->info.space) + return false; + } + + return true; +} + +static bool g2h_has_room(struct xe_guc_ct *ct, u32 g2h_len) +{ + if (!g2h_len) + return true; + + lockdep_assert_held(&ct->fast_lock); + + return ct->ctbs.g2h.info.space > g2h_len; +} + +static int has_room(struct xe_guc_ct *ct, u32 cmd_len, u32 g2h_len) +{ + lockdep_assert_held(&ct->lock); + + if (!g2h_has_room(ct, g2h_len) || !h2g_has_room(ct, cmd_len)) + return -EBUSY; + + return 0; +} + +static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len) +{ + lockdep_assert_held(&ct->lock); + ct->ctbs.h2g.info.space -= cmd_len; +} + +static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) +{ + xe_assert(ct_to_xe(ct), g2h_len <= ct->ctbs.g2h.info.space); + + if (g2h_len) { + lockdep_assert_held(&ct->fast_lock); + + ct->ctbs.g2h.info.space -= g2h_len; + ct->g2h_outstanding += num_g2h; + } +} + +static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) +{ + lockdep_assert_held(&ct->fast_lock); + xe_assert(ct_to_xe(ct), ct->ctbs.g2h.info.space + g2h_len <= + ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space); + + ct->ctbs.g2h.info.space += g2h_len; + --ct->g2h_outstanding; +} + +static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) +{ + spin_lock_irq(&ct->fast_lock); + __g2h_release_space(ct, g2h_len); + spin_unlock_irq(&ct->fast_lock); +} + +#define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */ + +static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 ct_fence_value, bool want_response) +{ + struct xe_device *xe = ct_to_xe(ct); + struct guc_ctb *h2g = &ct->ctbs.h2g; + u32 cmd[H2G_CT_HEADERS]; + u32 tail = h2g->info.tail; + u32 full_len; + struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds, + tail * sizeof(u32)); + + full_len = len + GUC_CTB_HDR_LEN; + + lockdep_assert_held(&ct->lock); + xe_assert(xe, full_len <= GUC_CTB_MSG_MAX_LEN); + xe_assert(xe, tail <= h2g->info.size); + + /* Command will wrap, zero fill (NOPs), return and check credits again */ + if (tail + full_len > h2g->info.size) { + xe_map_memset(xe, &map, 0, 0, + (h2g->info.size - tail) * sizeof(u32)); + h2g_reserve_space(ct, (h2g->info.size - tail)); + h2g->info.tail = 0; + desc_write(xe, h2g, tail, h2g->info.tail); + + return -EAGAIN; + } + + /* + * dw0: CT header (including fence) + * dw1: HXG header (including action code) + * dw2+: action data + */ + cmd[0] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) | + FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) | + FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value); + if (want_response) { + cmd[1] = + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | + GUC_HXG_EVENT_MSG_0_DATA0, action[0]); + } else { + cmd[1] = + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) | + FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | + GUC_HXG_EVENT_MSG_0_DATA0, action[0]); + } + + /* H2G header in cmd[1] replaces action[0] so: */ + --len; + ++action; + + /* Write H2G ensuring visable before descriptor update */ + xe_map_memcpy_to(xe, &map, 0, cmd, H2G_CT_HEADERS * sizeof(u32)); + xe_map_memcpy_to(xe, &map, H2G_CT_HEADERS * sizeof(u32), action, len * sizeof(u32)); + xe_device_wmb(xe); + + /* Update local copies */ + h2g->info.tail = (tail + full_len) % h2g->info.size; + h2g_reserve_space(ct, full_len); + + /* Update descriptor */ + desc_write(xe, h2g, tail, h2g->info.tail); + + trace_xe_guc_ctb_h2g(ct_to_gt(ct)->info.id, *(action - 1), full_len, + desc_read(xe, h2g, head), h2g->info.tail); + + return 0; +} + +static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, + u32 len, u32 g2h_len, u32 num_g2h, + struct g2h_fence *g2h_fence) +{ + struct xe_device *xe = ct_to_xe(ct); + int ret; + + xe_assert(xe, !g2h_len || !g2h_fence); + xe_assert(xe, !num_g2h || !g2h_fence); + xe_assert(xe, !g2h_len || num_g2h); + xe_assert(xe, g2h_len || !num_g2h); + lockdep_assert_held(&ct->lock); + + if (unlikely(ct->ctbs.h2g.info.broken)) { + ret = -EPIPE; + goto out; + } + + if (unlikely(!ct->enabled)) { + ret = -ENODEV; + goto out; + } + + if (g2h_fence) { + g2h_len = GUC_CTB_HXG_MSG_MAX_LEN; + num_g2h = 1; + + if (g2h_fence_needs_alloc(g2h_fence)) { + void *ptr; + + g2h_fence->seqno = (ct->fence_seqno++ & 0xffff); + ptr = xa_store(&ct->fence_lookup, + g2h_fence->seqno, + g2h_fence, GFP_ATOMIC); + if (IS_ERR(ptr)) { + ret = PTR_ERR(ptr); + goto out; + } + } + } + + if (g2h_len) + spin_lock_irq(&ct->fast_lock); +retry: + ret = has_room(ct, len + GUC_CTB_HDR_LEN, g2h_len); + if (unlikely(ret)) + goto out_unlock; + + ret = h2g_write(ct, action, len, g2h_fence ? g2h_fence->seqno : 0, + !!g2h_fence); + if (unlikely(ret)) { + if (ret == -EAGAIN) + goto retry; + goto out_unlock; + } + + __g2h_reserve_space(ct, g2h_len, num_g2h); + xe_guc_notify(ct_to_guc(ct)); +out_unlock: + if (g2h_len) + spin_unlock_irq(&ct->fast_lock); +out: + return ret; +} + +static void kick_reset(struct xe_guc_ct *ct) +{ + xe_gt_reset_async(ct_to_gt(ct)); +} + +static int dequeue_one_g2h(struct xe_guc_ct *ct); + +static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 g2h_len, u32 num_g2h, + struct g2h_fence *g2h_fence) +{ + struct drm_device *drm = &ct_to_xe(ct)->drm; + struct drm_printer p = drm_info_printer(drm->dev); + unsigned int sleep_period_ms = 1; + int ret; + + xe_assert(ct_to_xe(ct), !g2h_len || !g2h_fence); + lockdep_assert_held(&ct->lock); + xe_device_assert_mem_access(ct_to_xe(ct)); + +try_again: + ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, + g2h_fence); + + /* + * We wait to try to restore credits for about 1 second before bailing. + * In the case of H2G credits we have no choice but just to wait for the + * GuC to consume H2Gs in the channel so we use a wait / sleep loop. In + * the case of G2H we process any G2H in the channel, hopefully freeing + * credits as we consume the G2H messages. + */ + if (unlikely(ret == -EBUSY && + !h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) { + struct guc_ctb *h2g = &ct->ctbs.h2g; + + if (sleep_period_ms == 1024) + goto broken; + + trace_xe_guc_ct_h2g_flow_control(h2g->info.head, h2g->info.tail, + h2g->info.size, + h2g->info.space, + len + GUC_CTB_HDR_LEN); + msleep(sleep_period_ms); + sleep_period_ms <<= 1; + + goto try_again; + } else if (unlikely(ret == -EBUSY)) { + struct xe_device *xe = ct_to_xe(ct); + struct guc_ctb *g2h = &ct->ctbs.g2h; + + trace_xe_guc_ct_g2h_flow_control(g2h->info.head, + desc_read(xe, g2h, tail), + g2h->info.size, + g2h->info.space, + g2h_fence ? + GUC_CTB_HXG_MSG_MAX_LEN : + g2h_len); + +#define g2h_avail(ct) \ + (desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.info.head) + if (!wait_event_timeout(ct->wq, !ct->g2h_outstanding || + g2h_avail(ct), HZ)) + goto broken; +#undef g2h_avail + + if (dequeue_one_g2h(ct) < 0) + goto broken; + + goto try_again; + } + + return ret; + +broken: + drm_err(drm, "No forward process on H2G, reset required"); + xe_guc_ct_print(ct, &p, true); + ct->ctbs.h2g.info.broken = true; + + return -EDEADLK; +} + +static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence) +{ + int ret; + + xe_assert(ct_to_xe(ct), !g2h_len || !g2h_fence); + + mutex_lock(&ct->lock); + ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence); + mutex_unlock(&ct->lock); + + return ret; +} + +int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 g2h_len, u32 num_g2h) +{ + int ret; + + ret = guc_ct_send(ct, action, len, g2h_len, num_g2h, NULL); + if (ret == -EDEADLK) + kick_reset(ct); + + return ret; +} + +int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 g2h_len, u32 num_g2h) +{ + int ret; + + ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, NULL); + if (ret == -EDEADLK) + kick_reset(ct); + + return ret; +} + +int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action, u32 len) +{ + int ret; + + lockdep_assert_held(&ct->lock); + + ret = guc_ct_send_locked(ct, action, len, 0, 0, NULL); + if (ret == -EDEADLK) + kick_reset(ct); + + return ret; +} + +/* + * Check if a GT reset is in progress or will occur and if GT reset brought the + * CT back up. Randomly picking 5 seconds for an upper limit to do a GT a reset. + */ +static bool retry_failure(struct xe_guc_ct *ct, int ret) +{ + if (!(ret == -EDEADLK || ret == -EPIPE || ret == -ENODEV)) + return false; + +#define ct_alive(ct) \ + (ct->enabled && !ct->ctbs.h2g.info.broken && !ct->ctbs.g2h.info.broken) + if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5)) + return false; +#undef ct_alive + + return true; +} + +static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 *response_buffer, bool no_fail) +{ + struct xe_device *xe = ct_to_xe(ct); + struct g2h_fence g2h_fence; + int ret = 0; + + /* + * We use a fence to implement blocking sends / receiving response data. + * The seqno of the fence is sent in the H2G, returned in the G2H, and + * an xarray is used as storage media with the seqno being to key. + * Fields in the fence hold success, failure, retry status and the + * response data. Safe to allocate on the stack as the xarray is the + * only reference and it cannot be present after this function exits. + */ +retry: + g2h_fence_init(&g2h_fence, response_buffer); +retry_same_fence: + ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence); + if (unlikely(ret == -ENOMEM)) { + void *ptr; + + /* Retry allocation /w GFP_KERNEL */ + ptr = xa_store(&ct->fence_lookup, + g2h_fence.seqno, + &g2h_fence, GFP_KERNEL); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + + goto retry_same_fence; + } else if (unlikely(ret)) { + if (ret == -EDEADLK) + kick_reset(ct); + + if (no_fail && retry_failure(ct, ret)) + goto retry_same_fence; + + if (!g2h_fence_needs_alloc(&g2h_fence)) + xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno); + + return ret; + } + + ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ); + if (!ret) { + drm_err(&xe->drm, "Timed out wait for G2H, fence %u, action %04x", + g2h_fence.seqno, action[0]); + xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno); + return -ETIME; + } + + if (g2h_fence.retry) { + drm_warn(&xe->drm, "Send retry, action 0x%04x, reason %d", + action[0], g2h_fence.reason); + goto retry; + } + if (g2h_fence.fail) { + drm_err(&xe->drm, "Send failed, action 0x%04x, error %d, hint %d", + action[0], g2h_fence.error, g2h_fence.hint); + ret = -EIO; + } + + return ret > 0 ? 0 : ret; +} + +int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 *response_buffer) +{ + return guc_ct_send_recv(ct, action, len, response_buffer, false); +} + +int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action, + u32 len, u32 *response_buffer) +{ + return guc_ct_send_recv(ct, action, len, response_buffer, true); +} + +static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len) +{ + u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]); + + lockdep_assert_held(&ct->lock); + + switch (action) { + case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: + case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE: + case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE: + case XE_GUC_ACTION_TLB_INVALIDATION_DONE: + g2h_release_space(ct, len); + } + + return 0; +} + +static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) +{ + struct xe_device *xe = ct_to_xe(ct); + u32 response_len = len - GUC_CTB_MSG_MIN_LEN; + u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]); + u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]); + struct g2h_fence *g2h_fence; + + lockdep_assert_held(&ct->lock); + + g2h_fence = xa_erase(&ct->fence_lookup, fence); + if (unlikely(!g2h_fence)) { + /* Don't tear down channel, as send could've timed out */ + drm_warn(&xe->drm, "G2H fence (%u) not found!\n", fence); + g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); + return 0; + } + + xe_assert(xe, fence == g2h_fence->seqno); + + if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) { + g2h_fence->fail = true; + g2h_fence->error = + FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[1]); + g2h_fence->hint = + FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[1]); + } else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) { + g2h_fence->retry = true; + g2h_fence->reason = + FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, msg[1]); + } else if (g2h_fence->response_buffer) { + g2h_fence->response_len = response_len; + memcpy(g2h_fence->response_buffer, msg + GUC_CTB_MSG_MIN_LEN, + response_len * sizeof(u32)); + } + + g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); + + g2h_fence->done = true; + smp_mb(); + + wake_up_all(&ct->g2h_fence_wq); + + return 0; +} + +static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) +{ + struct xe_device *xe = ct_to_xe(ct); + u32 hxg, origin, type; + int ret; + + lockdep_assert_held(&ct->lock); + + hxg = msg[1]; + + origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg); + if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) { + drm_err(&xe->drm, + "G2H channel broken on read, origin=%d, reset required\n", + origin); + ct->ctbs.g2h.info.broken = true; + + return -EPROTO; + } + + type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg); + switch (type) { + case GUC_HXG_TYPE_EVENT: + ret = parse_g2h_event(ct, msg, len); + break; + case GUC_HXG_TYPE_RESPONSE_SUCCESS: + case GUC_HXG_TYPE_RESPONSE_FAILURE: + case GUC_HXG_TYPE_NO_RESPONSE_RETRY: + ret = parse_g2h_response(ct, msg, len); + break; + default: + drm_err(&xe->drm, + "G2H channel broken on read, type=%d, reset required\n", + type); + ct->ctbs.g2h.info.broken = true; + + ret = -EOPNOTSUPP; + } + + return ret; +} + +static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) +{ + struct xe_device *xe = ct_to_xe(ct); + struct xe_guc *guc = ct_to_guc(ct); + u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]); + u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN; + u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN; + int ret = 0; + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT) + return 0; + + switch (action) { + case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: + ret = xe_guc_sched_done_handler(guc, payload, adj_len); + break; + case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE: + ret = xe_guc_deregister_done_handler(guc, payload, adj_len); + break; + case XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION: + ret = xe_guc_exec_queue_reset_handler(guc, payload, adj_len); + break; + case XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION: + ret = xe_guc_exec_queue_reset_failure_handler(guc, payload, + adj_len); + break; + case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE: + /* Selftest only at the moment */ + break; + case XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION: + case XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE: + /* FIXME: Handle this */ + break; + case XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR: + ret = xe_guc_exec_queue_memory_cat_error_handler(guc, payload, + adj_len); + break; + case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC: + ret = xe_guc_pagefault_handler(guc, payload, adj_len); + break; + case XE_GUC_ACTION_TLB_INVALIDATION_DONE: + ret = xe_guc_tlb_invalidation_done_handler(guc, payload, + adj_len); + break; + case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY: + ret = xe_guc_access_counter_notify_handler(guc, payload, + adj_len); + break; + default: + drm_err(&xe->drm, "unexpected action 0x%04x\n", action); + } + + if (ret) + drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n", + action, ret); + + return 0; +} + +static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) +{ + struct xe_device *xe = ct_to_xe(ct); + struct guc_ctb *g2h = &ct->ctbs.g2h; + u32 tail, head, len; + s32 avail; + u32 action; + + lockdep_assert_held(&ct->fast_lock); + + if (!ct->enabled) + return -ENODEV; + + if (g2h->info.broken) + return -EPIPE; + + /* Calculate DW available to read */ + tail = desc_read(xe, g2h, tail); + avail = tail - g2h->info.head; + if (unlikely(avail == 0)) + return 0; + + if (avail < 0) + avail += g2h->info.size; + + /* Read header */ + xe_map_memcpy_from(xe, msg, &g2h->cmds, sizeof(u32) * g2h->info.head, + sizeof(u32)); + len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN; + if (len > avail) { + drm_err(&xe->drm, + "G2H channel broken on read, avail=%d, len=%d, reset required\n", + avail, len); + g2h->info.broken = true; + + return -EPROTO; + } + + head = (g2h->info.head + 1) % g2h->info.size; + avail = len - 1; + + /* Read G2H message */ + if (avail + head > g2h->info.size) { + u32 avail_til_wrap = g2h->info.size - head; + + xe_map_memcpy_from(xe, msg + 1, + &g2h->cmds, sizeof(u32) * head, + avail_til_wrap * sizeof(u32)); + xe_map_memcpy_from(xe, msg + 1 + avail_til_wrap, + &g2h->cmds, 0, + (avail - avail_til_wrap) * sizeof(u32)); + } else { + xe_map_memcpy_from(xe, msg + 1, + &g2h->cmds, sizeof(u32) * head, + avail * sizeof(u32)); + } + + action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]); + + if (fast_path) { + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT) + return 0; + + switch (action) { + case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC: + case XE_GUC_ACTION_TLB_INVALIDATION_DONE: + break; /* Process these in fast-path */ + default: + return 0; + } + } + + /* Update local / descriptor header */ + g2h->info.head = (head + avail) % g2h->info.size; + desc_write(xe, g2h, head, g2h->info.head); + + trace_xe_guc_ctb_g2h(ct_to_gt(ct)->info.id, action, len, + g2h->info.head, tail); + + return len; +} + +static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len) +{ + struct xe_device *xe = ct_to_xe(ct); + struct xe_guc *guc = ct_to_guc(ct); + u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]); + u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN; + u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN; + int ret = 0; + + switch (action) { + case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC: + ret = xe_guc_pagefault_handler(guc, payload, adj_len); + break; + case XE_GUC_ACTION_TLB_INVALIDATION_DONE: + __g2h_release_space(ct, len); + ret = xe_guc_tlb_invalidation_done_handler(guc, payload, + adj_len); + break; + default: + drm_warn(&xe->drm, "NOT_POSSIBLE"); + } + + if (ret) + drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n", + action, ret); +} + +/** + * xe_guc_ct_fast_path - process critical G2H in the IRQ handler + * @ct: GuC CT object + * + * Anything related to page faults is critical for performance, process these + * critical G2H in the IRQ. This is safe as these handlers either just wake up + * waiters or queue another worker. + */ +void xe_guc_ct_fast_path(struct xe_guc_ct *ct) +{ + struct xe_device *xe = ct_to_xe(ct); + bool ongoing; + int len; + + ongoing = xe_device_mem_access_get_if_ongoing(ct_to_xe(ct)); + if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL) + return; + + spin_lock(&ct->fast_lock); + do { + len = g2h_read(ct, ct->fast_msg, true); + if (len > 0) + g2h_fast_path(ct, ct->fast_msg, len); + } while (len > 0); + spin_unlock(&ct->fast_lock); + + if (ongoing) + xe_device_mem_access_put(xe); +} + +/* Returns less than zero on error, 0 on done, 1 on more available */ +static int dequeue_one_g2h(struct xe_guc_ct *ct) +{ + int len; + int ret; + + lockdep_assert_held(&ct->lock); + + spin_lock_irq(&ct->fast_lock); + len = g2h_read(ct, ct->msg, false); + spin_unlock_irq(&ct->fast_lock); + if (len <= 0) + return len; + + ret = parse_g2h_msg(ct, ct->msg, len); + if (unlikely(ret < 0)) + return ret; + + ret = process_g2h_msg(ct, ct->msg, len); + if (unlikely(ret < 0)) + return ret; + + return 1; +} + +static void g2h_worker_func(struct work_struct *w) +{ + struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker); + bool ongoing; + int ret; + + /* + * Normal users must always hold mem_access.ref around CT calls. However + * during the runtime pm callbacks we rely on CT to talk to the GuC, but + * at this stage we can't rely on mem_access.ref and even the + * callback_task will be different than current. For such cases we just + * need to ensure we always process the responses from any blocking + * ct_send requests or where we otherwise expect some response when + * initiated from those callbacks (which will need to wait for the below + * dequeue_one_g2h()). The dequeue_one_g2h() will gracefully fail if + * the device has suspended to the point that the CT communication has + * been disabled. + * + * If we are inside the runtime pm callback, we can be the only task + * still issuing CT requests (since that requires having the + * mem_access.ref). It seems like it might in theory be possible to + * receive unsolicited events from the GuC just as we are + * suspending-resuming, but those will currently anyway be lost when + * eventually exiting from suspend, hence no need to wake up the device + * here. If we ever need something stronger than get_if_ongoing() then + * we need to be careful with blocking the pm callbacks from getting CT + * responses, if the worker here is blocked on those callbacks + * completing, creating a deadlock. + */ + ongoing = xe_device_mem_access_get_if_ongoing(ct_to_xe(ct)); + if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL) + return; + + do { + mutex_lock(&ct->lock); + ret = dequeue_one_g2h(ct); + mutex_unlock(&ct->lock); + + if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) { + struct drm_device *drm = &ct_to_xe(ct)->drm; + struct drm_printer p = drm_info_printer(drm->dev); + + xe_guc_ct_print(ct, &p, false); + kick_reset(ct); + } + } while (ret == 1); + + if (ongoing) + xe_device_mem_access_put(ct_to_xe(ct)); +} + +static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb, + struct guc_ctb_snapshot *snapshot, + bool atomic) +{ + u32 head, tail; + + xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0, + sizeof(struct guc_ct_buffer_desc)); + memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info)); + + snapshot->cmds = kmalloc_array(ctb->info.size, sizeof(u32), + atomic ? GFP_ATOMIC : GFP_KERNEL); + + if (!snapshot->cmds) { + drm_err(&xe->drm, "Skipping CTB commands snapshot. Only CTB info will be available.\n"); + return; + } + + head = snapshot->desc.head; + tail = snapshot->desc.tail; + + if (head != tail) { + struct iosys_map map = + IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32)); + + while (head != tail) { + snapshot->cmds[head] = xe_map_rd(xe, &map, 0, u32); + ++head; + if (head == ctb->info.size) { + head = 0; + map = ctb->cmds; + } else { + iosys_map_incr(&map, sizeof(u32)); + } + } + } +} + +static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot, + struct drm_printer *p) +{ + u32 head, tail; + + drm_printf(p, "\tsize: %d\n", snapshot->info.size); + drm_printf(p, "\tresv_space: %d\n", snapshot->info.resv_space); + drm_printf(p, "\thead: %d\n", snapshot->info.head); + drm_printf(p, "\ttail: %d\n", snapshot->info.tail); + drm_printf(p, "\tspace: %d\n", snapshot->info.space); + drm_printf(p, "\tbroken: %d\n", snapshot->info.broken); + drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head); + drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail); + drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status); + + if (!snapshot->cmds) + return; + + head = snapshot->desc.head; + tail = snapshot->desc.tail; + + while (head != tail) { + drm_printf(p, "\tcmd[%d]: 0x%08x\n", head, + snapshot->cmds[head]); + ++head; + if (head == snapshot->info.size) + head = 0; + } +} + +static void guc_ctb_snapshot_free(struct guc_ctb_snapshot *snapshot) +{ + kfree(snapshot->cmds); +} + +/** + * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state. + * @ct: GuC CT object. + * @atomic: Boolean to indicate if this is called from atomic context like + * reset or CTB handler or from some regular path like debugfs. + * + * This can be printed out in a later stage like during dev_coredump + * analysis. + * + * Returns: a GuC CT snapshot object that must be freed by the caller + * by using `xe_guc_ct_snapshot_free`. + */ +struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, + bool atomic) +{ + struct xe_device *xe = ct_to_xe(ct); + struct xe_guc_ct_snapshot *snapshot; + + snapshot = kzalloc(sizeof(*snapshot), + atomic ? GFP_ATOMIC : GFP_KERNEL); + + if (!snapshot) { + drm_err(&xe->drm, "Skipping CTB snapshot entirely.\n"); + return NULL; + } + + if (ct->enabled) { + snapshot->ct_enabled = true; + snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding); + guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g, + &snapshot->h2g, atomic); + guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h, + &snapshot->g2h, atomic); + } + + return snapshot; +} + +/** + * xe_guc_ct_snapshot_print - Print out a given GuC CT snapshot. + * @snapshot: GuC CT snapshot object. + * @p: drm_printer where it will be printed out. + * + * This function prints out a given GuC CT snapshot object. + */ +void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, + struct drm_printer *p) +{ + if (!snapshot) + return; + + if (snapshot->ct_enabled) { + drm_puts(p, "\nH2G CTB (all sizes in DW):\n"); + guc_ctb_snapshot_print(&snapshot->h2g, p); + + drm_puts(p, "\nG2H CTB (all sizes in DW):\n"); + guc_ctb_snapshot_print(&snapshot->g2h, p); + + drm_printf(p, "\tg2h outstanding: %d\n", + snapshot->g2h_outstanding); + } else { + drm_puts(p, "\nCT disabled\n"); + } +} + +/** + * xe_guc_ct_snapshot_free - Free all allocated objects for a given snapshot. + * @snapshot: GuC CT snapshot object. + * + * This function free all the memory that needed to be allocated at capture + * time. + */ +void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot) +{ + if (!snapshot) + return; + + guc_ctb_snapshot_free(&snapshot->h2g); + guc_ctb_snapshot_free(&snapshot->g2h); + kfree(snapshot); +} + +/** + * xe_guc_ct_print - GuC CT Print. + * @ct: GuC CT. + * @p: drm_printer where it will be printed out. + * @atomic: Boolean to indicate if this is called from atomic context like + * reset or CTB handler or from some regular path like debugfs. + * + * This function quickly capture a snapshot and immediately print it out. + */ +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic) +{ + struct xe_guc_ct_snapshot *snapshot; + + snapshot = xe_guc_ct_snapshot_capture(ct, atomic); + xe_guc_ct_snapshot_print(snapshot, p); + xe_guc_ct_snapshot_free(snapshot); +} diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h new file mode 100644 index 000000000000..f15f8a4857e0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_CT_H_ +#define _XE_GUC_CT_H_ + +#include "xe_guc_ct_types.h" + +struct drm_printer; + +int xe_guc_ct_init(struct xe_guc_ct *ct); +int xe_guc_ct_enable(struct xe_guc_ct *ct); +void xe_guc_ct_disable(struct xe_guc_ct *ct); +void xe_guc_ct_fast_path(struct xe_guc_ct *ct); + +struct xe_guc_ct_snapshot * +xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic); +void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, + struct drm_printer *p); +void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot); +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic); + +static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct) +{ + wake_up_all(&ct->wq); + if (ct->enabled) + queue_work(system_unbound_wq, &ct->g2h_worker); + xe_guc_ct_fast_path(ct); +} + +/* Basic CT send / receives */ +int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 g2h_len, u32 num_g2h); +int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 g2h_len, u32 num_g2h); +int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 *response_buffer); +static inline int +xe_guc_ct_send_block(struct xe_guc_ct *ct, const u32 *action, u32 len) +{ + return xe_guc_ct_send_recv(ct, action, len, NULL); +} + +/* This is only version of the send CT you can call from a G2H handler */ +int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action, + u32 len); + +/* Can't fail because a GT reset is in progress */ +int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action, + u32 len, u32 *response_buffer); +static inline int +xe_guc_ct_send_block_no_fail(struct xe_guc_ct *ct, const u32 *action, u32 len) +{ + return xe_guc_ct_send_recv_no_fail(ct, action, len, NULL); +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h new file mode 100644 index 000000000000..d814d4ee3fc6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_CT_TYPES_H_ +#define _XE_GUC_CT_TYPES_H_ + +#include <linux/interrupt.h> +#include <linux/iosys-map.h> +#include <linux/spinlock_types.h> +#include <linux/wait.h> +#include <linux/xarray.h> + +#include "abi/guc_communication_ctb_abi.h" + +struct xe_bo; + +/** + * struct guc_ctb_info - GuC command transport buffer (CTB) info + */ +struct guc_ctb_info { + /** @size: size of CTB commands (DW) */ + u32 size; + /** @resv_space: reserved space of CTB commands (DW) */ + u32 resv_space; + /** @head: head of CTB commands (DW) */ + u32 head; + /** @tail: tail of CTB commands (DW) */ + u32 tail; + /** @space: space in CTB commands (DW) */ + u32 space; + /** @broken: channel broken */ + bool broken; +}; + +/** + * struct guc_ctb - GuC command transport buffer (CTB) + */ +struct guc_ctb { + /** @desc: dma buffer map for CTB descriptor */ + struct iosys_map desc; + /** @cmds: dma buffer map for CTB commands */ + struct iosys_map cmds; + /** @info: CTB info */ + struct guc_ctb_info info; +}; + +/** + * struct guc_ctb_snapshot - GuC command transport buffer (CTB) snapshot + */ +struct guc_ctb_snapshot { + /** @desc: snapshot of the CTB descriptor */ + struct guc_ct_buffer_desc desc; + /** @cmds: snapshot of the CTB commands */ + u32 *cmds; + /** @info: snapshot of the CTB info */ + struct guc_ctb_info info; +}; + +/** + * struct xe_guc_ct_snapshot - GuC command transport (CT) snapshot + */ +struct xe_guc_ct_snapshot { + /** @ct_enabled: CT enabled info at capture time. */ + bool ct_enabled; + /** @g2h_outstanding: G2H outstanding info at the capture time */ + u32 g2h_outstanding; + /** @g2h: G2H CTB snapshot */ + struct guc_ctb_snapshot g2h; + /** @h2g: H2G CTB snapshot */ + struct guc_ctb_snapshot h2g; +}; + +/** + * struct xe_guc_ct - GuC command transport (CT) layer + * + * Includes a pair of CT buffers for bi-directional communication and tracking + * for the H2G and G2H requests sent and received through the buffers. + */ +struct xe_guc_ct { + /** @bo: XE BO for CT */ + struct xe_bo *bo; + /** @lock: protects everything in CT layer */ + struct mutex lock; + /** @fast_lock: protects G2H channel and credits */ + spinlock_t fast_lock; + /** @ctbs: buffers for sending and receiving commands */ + struct { + /** @send: Host to GuC (H2G, send) channel */ + struct guc_ctb h2g; + /** @recv: GuC to Host (G2H, receive) channel */ + struct guc_ctb g2h; + } ctbs; + /** @g2h_outstanding: number of outstanding G2H */ + u32 g2h_outstanding; + /** @g2h_worker: worker to process G2H messages */ + struct work_struct g2h_worker; + /** @enabled: CT enabled */ + bool enabled; + /** @fence_seqno: G2H fence seqno - 16 bits used by CT */ + u32 fence_seqno; + /** @fence_lookup: G2H fence lookup */ + struct xarray fence_lookup; + /** @wq: wait queue used for reliable CT sends and freeing G2H credits */ + wait_queue_head_t wq; + /** @g2h_fence_wq: wait queue used for G2H fencing */ + wait_queue_head_t g2h_fence_wq; + /** @msg: Message buffer */ + u32 msg[GUC_CTB_MSG_MAX_LEN]; + /** @fast_msg: Message buffer */ + u32 fast_msg[GUC_CTB_MSG_MAX_LEN]; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c new file mode 100644 index 000000000000..ffd7d53bcc42 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_guc_debugfs.h" + +#include <drm/drm_debugfs.h> +#include <drm/drm_managed.h> + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_guc_log.h" +#include "xe_macros.h" + +static struct xe_guc *node_to_guc(struct drm_info_node *node) +{ + return node->info_ent->data; +} + +static int guc_info(struct seq_file *m, void *data) +{ + struct xe_guc *guc = node_to_guc(m->private); + struct xe_device *xe = guc_to_xe(guc); + struct drm_printer p = drm_seq_file_printer(m); + + xe_device_mem_access_get(xe); + xe_guc_print_info(guc, &p); + xe_device_mem_access_put(xe); + + return 0; +} + +static int guc_log(struct seq_file *m, void *data) +{ + struct xe_guc *guc = node_to_guc(m->private); + struct xe_device *xe = guc_to_xe(guc); + struct drm_printer p = drm_seq_file_printer(m); + + xe_device_mem_access_get(xe); + xe_guc_log_print(&guc->log, &p); + xe_device_mem_access_put(xe); + + return 0; +} + +static const struct drm_info_list debugfs_list[] = { + {"guc_info", guc_info, 0}, + {"guc_log", guc_log, 0}, +}; + +void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent) +{ + struct drm_minor *minor = guc_to_xe(guc)->drm.primary; + struct drm_info_list *local; + int i; + +#define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)) + local = drmm_kmalloc(&guc_to_xe(guc)->drm, DEBUGFS_SIZE, GFP_KERNEL); + if (!local) + return; + + memcpy(local, debugfs_list, DEBUGFS_SIZE); +#undef DEBUGFS_SIZE + + for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i) + local[i].data = guc; + + drm_debugfs_create_files(local, + ARRAY_SIZE(debugfs_list), + parent, minor); +} diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.h b/drivers/gpu/drm/xe/xe_guc_debugfs.h new file mode 100644 index 000000000000..4756dff26fca --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_DEBUGFS_H_ +#define _XE_GUC_DEBUGFS_H_ + +struct dentry; +struct xe_guc; + +void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h new file mode 100644 index 000000000000..4c39f01e4f52 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_ENGINE_TYPES_H_ +#define _XE_GUC_ENGINE_TYPES_H_ + +#include <linux/spinlock.h> +#include <linux/workqueue.h> + +#include "xe_gpu_scheduler_types.h" + +struct dma_fence; +struct xe_exec_queue; + +/** + * struct xe_guc_exec_queue - GuC specific state for an xe_exec_queue + */ +struct xe_guc_exec_queue { + /** @q: Backpointer to parent xe_exec_queue */ + struct xe_exec_queue *q; + /** @sched: GPU scheduler for this xe_exec_queue */ + struct xe_gpu_scheduler sched; + /** @entity: Scheduler entity for this xe_exec_queue */ + struct xe_sched_entity entity; + /** + * @static_msgs: Static messages for this xe_exec_queue, used when + * a message needs to sent through the GPU scheduler but memory + * allocations are not allowed. + */ +#define MAX_STATIC_MSG_TYPE 3 + struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE]; + /** @lr_tdr: long running TDR worker */ + struct work_struct lr_tdr; + /** @fini_async: do final fini async from this worker */ + struct work_struct fini_async; + /** @resume_time: time of last resume */ + u64 resume_time; + /** @state: GuC specific state for this xe_exec_queue */ + atomic_t state; + /** @wqi_head: work queue item tail */ + u32 wqi_head; + /** @wqi_tail: work queue item tail */ + u32 wqi_tail; + /** @id: GuC id for this exec_queue */ + u16 id; + /** @suspend_wait: wait queue used to wait on pending suspends */ + wait_queue_head_t suspend_wait; + /** @suspend_pending: a suspend of the exec_queue is pending */ + bool suspend_pending; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h new file mode 100644 index 000000000000..4dd5a88a7826 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -0,0 +1,361 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_FWIF_H +#define _XE_GUC_FWIF_H + +#include <linux/bits.h> + +#include "abi/guc_klvs_abi.h" + +#define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 4 +#define G2H_LEN_DW_DEREGISTER_CONTEXT 3 +#define G2H_LEN_DW_TLB_INVALIDATE 3 + +#define GUC_CONTEXT_DISABLE 0 +#define GUC_CONTEXT_ENABLE 1 + +#define GUC_CLIENT_PRIORITY_KMD_HIGH 0 +#define GUC_CLIENT_PRIORITY_HIGH 1 +#define GUC_CLIENT_PRIORITY_KMD_NORMAL 2 +#define GUC_CLIENT_PRIORITY_NORMAL 3 +#define GUC_CLIENT_PRIORITY_NUM 4 + +#define GUC_RENDER_ENGINE 0 +#define GUC_VIDEO_ENGINE 1 +#define GUC_BLITTER_ENGINE 2 +#define GUC_VIDEOENHANCE_ENGINE 3 +#define GUC_VIDEO_ENGINE2 4 +#define GUC_MAX_ENGINES_NUM (GUC_VIDEO_ENGINE2 + 1) + +#define GUC_RENDER_CLASS 0 +#define GUC_VIDEO_CLASS 1 +#define GUC_VIDEOENHANCE_CLASS 2 +#define GUC_BLITTER_CLASS 3 +#define GUC_COMPUTE_CLASS 4 +#define GUC_GSC_OTHER_CLASS 5 +#define GUC_LAST_ENGINE_CLASS GUC_GSC_OTHER_CLASS +#define GUC_MAX_ENGINE_CLASSES 16 +#define GUC_MAX_INSTANCES_PER_CLASS 32 + +/* Helper for context registration H2G */ +struct guc_ctxt_registration_info { + u32 flags; + u32 context_idx; + u32 engine_class; + u32 engine_submit_mask; + u32 wq_desc_lo; + u32 wq_desc_hi; + u32 wq_base_lo; + u32 wq_base_hi; + u32 wq_size; + u32 hwlrca_lo; + u32 hwlrca_hi; +}; +#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) + +/* 32-bit KLV structure as used by policy updates and others */ +struct guc_klv_generic_dw_t { + u32 kl; + u32 value; +} __packed; + +/* Format of the UPDATE_CONTEXT_POLICIES H2G data packet */ +struct guc_update_exec_queue_policy_header { + u32 action; + u32 guc_id; +} __packed; + +struct guc_update_exec_queue_policy { + struct guc_update_exec_queue_policy_header header; + struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS]; +} __packed; + +/* GUC_CTL_* - Parameters for loading the GuC */ +#define GUC_CTL_LOG_PARAMS 0 +#define GUC_LOG_VALID BIT(0) +#define GUC_LOG_NOTIFY_ON_HALF_FULL BIT(1) +#define GUC_LOG_CAPTURE_ALLOC_UNITS BIT(2) +#define GUC_LOG_LOG_ALLOC_UNITS BIT(3) +#define GUC_LOG_CRASH_SHIFT 4 +#define GUC_LOG_CRASH_MASK (0x3 << GUC_LOG_CRASH_SHIFT) +#define GUC_LOG_DEBUG_SHIFT 6 +#define GUC_LOG_DEBUG_MASK (0xF << GUC_LOG_DEBUG_SHIFT) +#define GUC_LOG_CAPTURE_SHIFT 10 +#define GUC_LOG_CAPTURE_MASK (0x3 << GUC_LOG_CAPTURE_SHIFT) +#define GUC_LOG_BUF_ADDR_SHIFT 12 + +#define GUC_CTL_WA 1 +#define GUC_WA_GAM_CREDITS BIT(10) +#define GUC_WA_DUAL_QUEUE BIT(11) +#define GUC_WA_RCS_RESET_BEFORE_RC6 BIT(13) +#define GUC_WA_CONTEXT_ISOLATION BIT(15) +#define GUC_WA_PRE_PARSER BIT(14) +#define GUC_WA_HOLD_CCS_SWITCHOUT BIT(17) +#define GUC_WA_POLLCS BIT(18) +#define GUC_WA_RENDER_RST_RC6_EXIT BIT(19) +#define GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21) + +#define GUC_CTL_FEATURE 2 +#define GUC_CTL_ENABLE_SLPC BIT(2) +#define GUC_CTL_DISABLE_SCHEDULER BIT(14) + +#define GUC_CTL_DEBUG 3 +#define GUC_LOG_VERBOSITY_SHIFT 0 +#define GUC_LOG_VERBOSITY_LOW (0 << GUC_LOG_VERBOSITY_SHIFT) +#define GUC_LOG_VERBOSITY_MED (1 << GUC_LOG_VERBOSITY_SHIFT) +#define GUC_LOG_VERBOSITY_HIGH (2 << GUC_LOG_VERBOSITY_SHIFT) +#define GUC_LOG_VERBOSITY_ULTRA (3 << GUC_LOG_VERBOSITY_SHIFT) +#define GUC_LOG_VERBOSITY_MIN 0 +#define GUC_LOG_VERBOSITY_MAX 3 +#define GUC_LOG_VERBOSITY_MASK 0x0000000f +#define GUC_LOG_DESTINATION_MASK (3 << 4) +#define GUC_LOG_DISABLED (1 << 6) +#define GUC_PROFILE_ENABLED (1 << 7) + +#define GUC_CTL_ADS 4 +#define GUC_ADS_ADDR_SHIFT 1 +#define GUC_ADS_ADDR_MASK (0xFFFFF << GUC_ADS_ADDR_SHIFT) + +#define GUC_CTL_DEVID 5 + +#define GUC_CTL_MAX_DWORDS 14 + +/* Scheduling policy settings */ + +#define GLOBAL_POLICY_MAX_NUM_WI 15 + +/* Don't reset an engine upon preemption failure */ +#define GLOBAL_POLICY_DISABLE_ENGINE_RESET BIT(0) + +#define GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000 + +struct guc_policies { + u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES]; + /* + * In micro seconds. How much time to allow before DPC processing is + * called back via interrupt (to prevent DPC queue drain starving). + * Typically 1000s of micro seconds (example only, not granularity). + */ + u32 dpc_promote_time; + + /* Must be set to take these new values. */ + u32 is_valid; + + /* + * Max number of WIs to process per call. A large value may keep CS + * idle. + */ + u32 max_num_work_items; + + u32 global_flags; + u32 reserved[4]; +} __packed; + +/* GuC MMIO reg state struct */ +struct guc_mmio_reg { + u32 offset; + u32 value; + u32 flags; + u32 mask; +#define GUC_REGSET_MASKED BIT(0) +#define GUC_REGSET_MASKED_WITH_VALUE BIT(2) +#define GUC_REGSET_RESTORE_ONLY BIT(3) +} __packed; + +/* GuC register sets */ +struct guc_mmio_reg_set { + u32 address; + u16 count; + u16 reserved; +} __packed; + +/* Generic GT SysInfo data types */ +#define GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED 0 +#define GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK 1 +#define GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI 2 +#define GUC_GENERIC_GT_SYSINFO_MAX 16 + +/* HW info */ +struct guc_gt_system_info { + u8 mapping_table[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; + u32 engine_enabled_masks[GUC_MAX_ENGINE_CLASSES]; + u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX]; +} __packed; + +enum { + GUC_CAPTURE_LIST_INDEX_PF = 0, + GUC_CAPTURE_LIST_INDEX_VF = 1, + GUC_CAPTURE_LIST_INDEX_MAX = 2, +}; + +/* GuC Additional Data Struct */ +struct guc_ads { + struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; + u32 reserved0; + u32 scheduler_policies; + u32 gt_system_info; + u32 reserved1; + u32 control_data; + u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES]; + u32 eng_state_size[GUC_MAX_ENGINE_CLASSES]; + u32 private_data; + u32 um_init_data; + u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; + u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; + u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX]; + u32 reserved[14]; +} __packed; + +/* Engine usage stats */ +struct guc_engine_usage_record { + u32 current_context_index; + u32 last_switch_in_stamp; + u32 reserved0; + u32 total_runtime; + u32 reserved1[4]; +} __packed; + +struct guc_engine_usage { + struct guc_engine_usage_record engines[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; +} __packed; + +/* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */ +enum xe_guc_recv_message { + XE_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1), + XE_GUC_RECV_MSG_EXCEPTION = BIT(30), +}; + +/* Page fault structures */ +struct access_counter_desc { + u32 dw0; +#define ACCESS_COUNTER_TYPE BIT(0) +#define ACCESS_COUNTER_SUBG_LO GENMASK(31, 1) + + u32 dw1; +#define ACCESS_COUNTER_SUBG_HI BIT(0) +#define ACCESS_COUNTER_RSVD0 GENMASK(2, 1) +#define ACCESS_COUNTER_ENG_INSTANCE GENMASK(8, 3) +#define ACCESS_COUNTER_ENG_CLASS GENMASK(11, 9) +#define ACCESS_COUNTER_ASID GENMASK(31, 12) + + u32 dw2; +#define ACCESS_COUNTER_VFID GENMASK(5, 0) +#define ACCESS_COUNTER_RSVD1 GENMASK(7, 6) +#define ACCESS_COUNTER_GRANULARITY GENMASK(10, 8) +#define ACCESS_COUNTER_RSVD2 GENMASK(16, 11) +#define ACCESS_COUNTER_VIRTUAL_ADDR_RANGE_LO GENMASK(31, 17) + + u32 dw3; +#define ACCESS_COUNTER_VIRTUAL_ADDR_RANGE_HI GENMASK(31, 0) +} __packed; + +enum guc_um_queue_type { + GUC_UM_HW_QUEUE_PAGE_FAULT = 0, + GUC_UM_HW_QUEUE_PAGE_FAULT_RESPONSE, + GUC_UM_HW_QUEUE_ACCESS_COUNTER, + GUC_UM_HW_QUEUE_MAX +}; + +struct guc_um_queue_params { + u64 base_dpa; + u32 base_ggtt_address; + u32 size_in_bytes; + u32 rsvd[4]; +} __packed; + +struct guc_um_init_params { + u64 page_response_timeout_in_us; + u32 rsvd[6]; + struct guc_um_queue_params queue_params[GUC_UM_HW_QUEUE_MAX]; +} __packed; + +enum xe_guc_fault_reply_type { + PFR_ACCESS = 0, + PFR_ENGINE, + PFR_VFID, + PFR_ALL, + PFR_INVALID +}; + +enum xe_guc_response_desc_type { + TLB_INVALIDATION_DESC = 0, + FAULT_RESPONSE_DESC +}; + +struct xe_guc_pagefault_desc { + u32 dw0; +#define PFD_FAULT_LEVEL GENMASK(2, 0) +#define PFD_SRC_ID GENMASK(10, 3) +#define PFD_RSVD_0 GENMASK(17, 11) +#define XE2_PFD_TRVA_FAULT BIT(18) +#define PFD_ENG_INSTANCE GENMASK(24, 19) +#define PFD_ENG_CLASS GENMASK(27, 25) +#define PFD_PDATA_LO GENMASK(31, 28) + + u32 dw1; +#define PFD_PDATA_HI GENMASK(11, 0) +#define PFD_PDATA_HI_SHIFT 4 +#define PFD_ASID GENMASK(31, 12) + + u32 dw2; +#define PFD_ACCESS_TYPE GENMASK(1, 0) +#define PFD_FAULT_TYPE GENMASK(3, 2) +#define PFD_VFID GENMASK(9, 4) +#define PFD_RSVD_1 GENMASK(11, 10) +#define PFD_VIRTUAL_ADDR_LO GENMASK(31, 12) +#define PFD_VIRTUAL_ADDR_LO_SHIFT 12 + + u32 dw3; +#define PFD_VIRTUAL_ADDR_HI GENMASK(31, 0) +#define PFD_VIRTUAL_ADDR_HI_SHIFT 32 +} __packed; + +struct xe_guc_pagefault_reply { + u32 dw0; +#define PFR_VALID BIT(0) +#define PFR_SUCCESS BIT(1) +#define PFR_REPLY GENMASK(4, 2) +#define PFR_RSVD_0 GENMASK(9, 5) +#define PFR_DESC_TYPE GENMASK(11, 10) +#define PFR_ASID GENMASK(31, 12) + + u32 dw1; +#define PFR_VFID GENMASK(5, 0) +#define PFR_RSVD_1 BIT(6) +#define PFR_ENG_INSTANCE GENMASK(12, 7) +#define PFR_ENG_CLASS GENMASK(15, 13) +#define PFR_PDATA GENMASK(31, 16) + + u32 dw2; +#define PFR_RSVD_2 GENMASK(31, 0) +} __packed; + +struct xe_guc_acc_desc { + u32 dw0; +#define ACC_TYPE BIT(0) +#define ACC_TRIGGER 0 +#define ACC_NOTIFY 1 +#define ACC_SUBG_LO GENMASK(31, 1) + + u32 dw1; +#define ACC_SUBG_HI BIT(0) +#define ACC_RSVD0 GENMASK(2, 1) +#define ACC_ENG_INSTANCE GENMASK(8, 3) +#define ACC_ENG_CLASS GENMASK(11, 9) +#define ACC_ASID GENMASK(31, 12) + + u32 dw2; +#define ACC_VFID GENMASK(5, 0) +#define ACC_RSVD1 GENMASK(7, 6) +#define ACC_GRANULARITY GENMASK(10, 8) +#define ACC_RSVD2 GENMASK(16, 11) +#define ACC_VIRTUAL_ADDR_RANGE_LO GENMASK(31, 17) + + u32 dw3; +#define ACC_VIRTUAL_ADDR_RANGE_HI GENMASK(31, 0) +} __packed; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c new file mode 100644 index 000000000000..2a13a00917f8 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_guc_hwconfig.h" + +#include <drm/drm_managed.h> + +#include "abi/guc_actions_abi.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_map.h" + +static int send_get_hwconfig(struct xe_guc *guc, u32 ggtt_addr, u32 size) +{ + u32 action[] = { + XE_GUC_ACTION_GET_HWCONFIG, + lower_32_bits(ggtt_addr), + upper_32_bits(ggtt_addr), + size, + }; + + return xe_guc_mmio_send(guc, action, ARRAY_SIZE(action)); +} + +static int guc_hwconfig_size(struct xe_guc *guc, u32 *size) +{ + int ret = send_get_hwconfig(guc, 0, 0); + + if (ret < 0) + return ret; + + *size = ret; + return 0; +} + +static int guc_hwconfig_copy(struct xe_guc *guc) +{ + int ret = send_get_hwconfig(guc, xe_bo_ggtt_addr(guc->hwconfig.bo), + guc->hwconfig.size); + + if (ret < 0) + return ret; + + return 0; +} + +int xe_guc_hwconfig_init(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_bo *bo; + u32 size; + int err; + + /* Initialization already done */ + if (guc->hwconfig.bo) + return 0; + + /* + * All hwconfig the same across GTs so only GT0 needs to be configured + */ + if (gt->info.id != XE_GT0) + return 0; + + /* ADL_P, DG2+ supports hwconfig table */ + if (GRAPHICS_VERx100(xe) < 1255 && xe->info.platform != XE_ALDERLAKE_P) + return 0; + + err = guc_hwconfig_size(guc, &size); + if (err) + return err; + if (!size) + return -EINVAL; + + bo = xe_managed_bo_create_pin_map(xe, tile, PAGE_ALIGN(size), + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + guc->hwconfig.bo = bo; + guc->hwconfig.size = size; + + return guc_hwconfig_copy(guc); +} + +u32 xe_guc_hwconfig_size(struct xe_guc *guc) +{ + return !guc->hwconfig.bo ? 0 : guc->hwconfig.size; +} + +void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst) +{ + struct xe_device *xe = guc_to_xe(guc); + + XE_WARN_ON(!guc->hwconfig.bo); + + xe_map_memcpy_from(xe, dst, &guc->hwconfig.bo->vmap, 0, + guc->hwconfig.size); +} diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.h b/drivers/gpu/drm/xe/xe_guc_hwconfig.h new file mode 100644 index 000000000000..b5794d641900 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_HWCONFIG_H_ +#define _XE_GUC_HWCONFIG_H_ + +#include <linux/types.h> + +struct xe_guc; + +int xe_guc_hwconfig_init(struct xe_guc *guc); +u32 xe_guc_hwconfig_size(struct xe_guc *guc); +void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c new file mode 100644 index 000000000000..bcd2f4d34081 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_guc_log.h" + +#include <drm/drm_managed.h> + +#include "xe_bo.h" +#include "xe_gt.h" +#include "xe_map.h" +#include "xe_module.h" + +static struct xe_gt * +log_to_gt(struct xe_guc_log *log) +{ + return container_of(log, struct xe_gt, uc.guc.log); +} + +static struct xe_device * +log_to_xe(struct xe_guc_log *log) +{ + return gt_to_xe(log_to_gt(log)); +} + +static size_t guc_log_size(void) +{ + /* + * GuC Log buffer Layout + * + * +===============================+ 00B + * | Crash dump state header | + * +-------------------------------+ 32B + * | Debug state header | + * +-------------------------------+ 64B + * | Capture state header | + * +-------------------------------+ 96B + * | | + * +===============================+ PAGE_SIZE (4KB) + * | Crash Dump logs | + * +===============================+ + CRASH_SIZE + * | Debug logs | + * +===============================+ + DEBUG_SIZE + * | Capture logs | + * +===============================+ + CAPTURE_SIZE + */ + return PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE + + CAPTURE_BUFFER_SIZE; +} + +void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p) +{ + struct xe_device *xe = log_to_xe(log); + size_t size; + int i, j; + + xe_assert(xe, log->bo); + + size = log->bo->size; + +#define DW_PER_READ 128 + xe_assert(xe, !(size % (DW_PER_READ * sizeof(u32)))); + for (i = 0; i < size / sizeof(u32); i += DW_PER_READ) { + u32 read[DW_PER_READ]; + + xe_map_memcpy_from(xe, read, &log->bo->vmap, i * sizeof(u32), + DW_PER_READ * sizeof(u32)); +#define DW_PER_PRINT 4 + for (j = 0; j < DW_PER_READ / DW_PER_PRINT; ++j) { + u32 *print = read + j * DW_PER_PRINT; + + drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n", + *(print + 0), *(print + 1), + *(print + 2), *(print + 3)); + } + } +} + +int xe_guc_log_init(struct xe_guc_log *log) +{ + struct xe_device *xe = log_to_xe(log); + struct xe_tile *tile = gt_to_tile(log_to_gt(log)); + struct xe_bo *bo; + + bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(), + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + xe_map_memset(xe, &bo->vmap, 0, 0, guc_log_size()); + log->bo = bo; + log->level = xe_modparam.guc_log_level; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h new file mode 100644 index 000000000000..2d25ab28b4b3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_log.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_LOG_H_ +#define _XE_GUC_LOG_H_ + +#include "xe_guc_log_types.h" + +struct drm_printer; + +#if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER) +#define CRASH_BUFFER_SIZE SZ_1M +#define DEBUG_BUFFER_SIZE SZ_8M +#define CAPTURE_BUFFER_SIZE SZ_2M +#else +#define CRASH_BUFFER_SIZE SZ_8K +#define DEBUG_BUFFER_SIZE SZ_64K +#define CAPTURE_BUFFER_SIZE SZ_16K +#endif +/* + * While we're using plain log level in i915, GuC controls are much more... + * "elaborate"? We have a couple of bits for verbosity, separate bit for actual + * log enabling, and separate bit for default logging - which "conveniently" + * ignores the enable bit. + */ +#define GUC_LOG_LEVEL_DISABLED 0 +#define GUC_LOG_LEVEL_NON_VERBOSE 1 +#define GUC_LOG_LEVEL_IS_ENABLED(x) ((x) > GUC_LOG_LEVEL_DISABLED) +#define GUC_LOG_LEVEL_IS_VERBOSE(x) ((x) > GUC_LOG_LEVEL_NON_VERBOSE) +#define GUC_LOG_LEVEL_TO_VERBOSITY(x) ({ \ + typeof(x) _x = (x); \ + GUC_LOG_LEVEL_IS_VERBOSE(_x) ? _x - 2 : 0; \ +}) +#define GUC_VERBOSITY_TO_LOG_LEVEL(x) ((x) + 2) +#define GUC_LOG_LEVEL_MAX GUC_VERBOSITY_TO_LOG_LEVEL(GUC_LOG_VERBOSITY_MAX) + +int xe_guc_log_init(struct xe_guc_log *log); +void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p); + +static inline u32 +xe_guc_log_get_level(struct xe_guc_log *log) +{ + return log->level; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h new file mode 100644 index 000000000000..125080d138a7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_log_types.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_LOG_TYPES_H_ +#define _XE_GUC_LOG_TYPES_H_ + +#include <linux/types.h> + +struct xe_bo; + +/** + * struct xe_guc_log - GuC log + */ +struct xe_guc_log { + /** @level: GuC log level */ + u32 level; + /** @bo: XE BO for GuC log */ + struct xe_bo *bo; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c new file mode 100644 index 000000000000..f71085228cb3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -0,0 +1,1000 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_guc_pc.h" + +#include <linux/delay.h> + +#include <drm/drm_managed.h> + +#include "abi/guc_actions_abi.h" +#include "abi/guc_actions_slpc_abi.h" +#include "regs/xe_gt_regs.h" +#include "regs/xe_regs.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_idle.h" +#include "xe_gt_sysfs.h" +#include "xe_gt_types.h" +#include "xe_guc_ct.h" +#include "xe_map.h" +#include "xe_mmio.h" +#include "xe_pcode.h" + +#define MCHBAR_MIRROR_BASE_SNB 0x140000 + +#define RP_STATE_CAP XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5998) +#define RP0_MASK REG_GENMASK(7, 0) +#define RP1_MASK REG_GENMASK(15, 8) +#define RPN_MASK REG_GENMASK(23, 16) + +#define FREQ_INFO_REC XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5ef0) +#define RPE_MASK REG_GENMASK(15, 8) + +#define GT_PERF_STATUS XE_REG(0x1381b4) +#define CAGF_MASK REG_GENMASK(19, 11) + +#define GT_FREQUENCY_MULTIPLIER 50 +#define GT_FREQUENCY_SCALER 3 + +/** + * DOC: GuC Power Conservation (PC) + * + * GuC Power Conservation (PC) supports multiple features for the most + * efficient and performing use of the GT when GuC submission is enabled, + * including frequency management, Render-C states management, and various + * algorithms for power balancing. + * + * Single Loop Power Conservation (SLPC) is the name given to the suite of + * connected power conservation features in the GuC firmware. The firmware + * exposes a programming interface to the host for the control of SLPC. + * + * Frequency management: + * ===================== + * + * Xe driver enables SLPC with all of its defaults features and frequency + * selection, which varies per platform. + * + * Render-C States: + * ================ + * + * Render-C states is also a GuC PC feature that is now enabled in Xe for + * all platforms. + * + */ + +static struct xe_guc * +pc_to_guc(struct xe_guc_pc *pc) +{ + return container_of(pc, struct xe_guc, pc); +} + +static struct xe_device * +pc_to_xe(struct xe_guc_pc *pc) +{ + struct xe_guc *guc = pc_to_guc(pc); + struct xe_gt *gt = container_of(guc, struct xe_gt, uc.guc); + + return gt_to_xe(gt); +} + +static struct xe_gt * +pc_to_gt(struct xe_guc_pc *pc) +{ + return container_of(pc, struct xe_gt, uc.guc.pc); +} + +static struct iosys_map * +pc_to_maps(struct xe_guc_pc *pc) +{ + return &pc->bo->vmap; +} + +#define slpc_shared_data_read(pc_, field_) \ + xe_map_rd_field(pc_to_xe(pc_), pc_to_maps(pc_), 0, \ + struct slpc_shared_data, field_) + +#define slpc_shared_data_write(pc_, field_, val_) \ + xe_map_wr_field(pc_to_xe(pc_), pc_to_maps(pc_), 0, \ + struct slpc_shared_data, field_, val_) + +#define SLPC_EVENT(id, count) \ + (FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID, id) | \ + FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count)) + +static int wait_for_pc_state(struct xe_guc_pc *pc, + enum slpc_global_state state) +{ + int timeout_us = 5000; /* rought 5ms, but no need for precision */ + int slept, wait = 10; + + xe_device_assert_mem_access(pc_to_xe(pc)); + + for (slept = 0; slept < timeout_us;) { + if (slpc_shared_data_read(pc, header.global_state) == state) + return 0; + + usleep_range(wait, wait << 1); + slept += wait; + wait <<= 1; + if (slept + wait > timeout_us) + wait = timeout_us - slept; + } + + return -ETIMEDOUT; +} + +static int pc_action_reset(struct xe_guc_pc *pc) +{ + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; + int ret; + u32 action[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_RESET, 2), + xe_bo_ggtt_addr(pc->bo), + 0, + }; + + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); + if (ret) + drm_err(&pc_to_xe(pc)->drm, "GuC PC reset: %pe", ERR_PTR(ret)); + + return ret; +} + +static int pc_action_shutdown(struct xe_guc_pc *pc) +{ + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; + int ret; + u32 action[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_SHUTDOWN, 2), + xe_bo_ggtt_addr(pc->bo), + 0, + }; + + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); + if (ret) + drm_err(&pc_to_xe(pc)->drm, "GuC PC shutdown %pe", + ERR_PTR(ret)); + + return ret; +} + +static int pc_action_query_task_state(struct xe_guc_pc *pc) +{ + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; + int ret; + u32 action[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2), + xe_bo_ggtt_addr(pc->bo), + 0, + }; + + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) + return -EAGAIN; + + /* Blocking here to ensure the results are ready before reading them */ + ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action)); + if (ret) + drm_err(&pc_to_xe(pc)->drm, + "GuC PC query task state failed: %pe", ERR_PTR(ret)); + + return ret; +} + +static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value) +{ + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; + int ret; + u32 action[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), + id, + value, + }; + + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) + return -EAGAIN; + + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); + if (ret) + drm_err(&pc_to_xe(pc)->drm, "GuC PC set param failed: %pe", + ERR_PTR(ret)); + + return ret; +} + +static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode) +{ + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; + u32 action[] = { + XE_GUC_ACTION_SETUP_PC_GUCRC, + mode, + }; + int ret; + + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); + if (ret) + drm_err(&pc_to_xe(pc)->drm, "GuC RC enable failed: %pe", + ERR_PTR(ret)); + return ret; +} + +static u32 decode_freq(u32 raw) +{ + return DIV_ROUND_CLOSEST(raw * GT_FREQUENCY_MULTIPLIER, + GT_FREQUENCY_SCALER); +} + +static u32 encode_freq(u32 freq) +{ + return DIV_ROUND_CLOSEST(freq * GT_FREQUENCY_SCALER, + GT_FREQUENCY_MULTIPLIER); +} + +static u32 pc_get_min_freq(struct xe_guc_pc *pc) +{ + u32 freq; + + freq = FIELD_GET(SLPC_MIN_UNSLICE_FREQ_MASK, + slpc_shared_data_read(pc, task_state_data.freq)); + + return decode_freq(freq); +} + +static void pc_set_manual_rp_ctrl(struct xe_guc_pc *pc, bool enable) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 state = enable ? RPSWCTL_ENABLE : RPSWCTL_DISABLE; + + /* Allow/Disallow punit to process software freq requests */ + xe_mmio_write32(gt, RP_CONTROL, state); +} + +static void pc_set_cur_freq(struct xe_guc_pc *pc, u32 freq) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 rpnswreq; + + pc_set_manual_rp_ctrl(pc, true); + + /* Req freq is in units of 16.66 Mhz */ + rpnswreq = REG_FIELD_PREP(REQ_RATIO_MASK, encode_freq(freq)); + xe_mmio_write32(gt, RPNSWREQ, rpnswreq); + + /* Sleep for a small time to allow pcode to respond */ + usleep_range(100, 300); + + pc_set_manual_rp_ctrl(pc, false); +} + +static int pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) +{ + /* + * Let's only check for the rpn-rp0 range. If max < min, + * min becomes a fixed request. + */ + if (freq < pc->rpn_freq || freq > pc->rp0_freq) + return -EINVAL; + + /* + * GuC policy is to elevate minimum frequency to the efficient levels + * Our goal is to have the admin choices respected. + */ + pc_action_set_param(pc, SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, + freq < pc->rpe_freq); + + return pc_action_set_param(pc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + freq); +} + +static int pc_get_max_freq(struct xe_guc_pc *pc) +{ + u32 freq; + + freq = FIELD_GET(SLPC_MAX_UNSLICE_FREQ_MASK, + slpc_shared_data_read(pc, task_state_data.freq)); + + return decode_freq(freq); +} + +static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) +{ + /* + * Let's only check for the rpn-rp0 range. If max < min, + * min becomes a fixed request. + * Also, overclocking is not supported. + */ + if (freq < pc->rpn_freq || freq > pc->rp0_freq) + return -EINVAL; + + return pc_action_set_param(pc, + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ, + freq); +} + +static void mtl_update_rpe_value(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 reg; + + if (xe_gt_is_media_type(gt)) + reg = xe_mmio_read32(gt, MTL_MPE_FREQUENCY); + else + reg = xe_mmio_read32(gt, MTL_GT_RPE_FREQUENCY); + + pc->rpe_freq = decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg)); +} + +static void tgl_update_rpe_value(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); + u32 reg; + + /* + * For PVC we still need to use fused RP1 as the approximation for RPe + * For other platforms than PVC we get the resolved RPe directly from + * PCODE at a different register + */ + if (xe->info.platform == XE_PVC) + reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP); + else + reg = xe_mmio_read32(gt, FREQ_INFO_REC); + + pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; +} + +static void pc_update_rp_values(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); + + if (GRAPHICS_VERx100(xe) >= 1270) + mtl_update_rpe_value(pc); + else + tgl_update_rpe_value(pc); + + /* + * RPe is decided at runtime by PCODE. In the rare case where that's + * smaller than the fused min, we will trust the PCODE and use that + * as our minimum one. + */ + pc->rpn_freq = min(pc->rpn_freq, pc->rpe_freq); +} + +/** + * xe_guc_pc_get_act_freq - Get Actual running frequency + * @pc: The GuC PC + * + * Returns: The Actual running frequency. Which might be 0 if GT is in Render-C sleep state (RC6). + */ +u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); + u32 freq; + + xe_device_mem_access_get(gt_to_xe(gt)); + + /* When in RC6, actual frequency reported will be 0. */ + if (GRAPHICS_VERx100(xe) >= 1270) { + freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1); + freq = REG_FIELD_GET(MTL_CAGF_MASK, freq); + } else { + freq = xe_mmio_read32(gt, GT_PERF_STATUS); + freq = REG_FIELD_GET(CAGF_MASK, freq); + } + + freq = decode_freq(freq); + + xe_device_mem_access_put(gt_to_xe(gt)); + + return freq; +} + +/** + * xe_guc_pc_get_cur_freq - Get Current requested frequency + * @pc: The GuC PC + * @freq: A pointer to a u32 where the freq value will be returned + * + * Returns: 0 on success, + * -EAGAIN if GuC PC not ready (likely in middle of a reset). + */ +int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq) +{ + struct xe_gt *gt = pc_to_gt(pc); + int ret; + + xe_device_mem_access_get(gt_to_xe(gt)); + /* + * GuC SLPC plays with cur freq request when GuCRC is enabled + * Block RC6 for a more reliable read. + */ + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (ret) + goto out; + + *freq = xe_mmio_read32(gt, RPNSWREQ); + + *freq = REG_FIELD_GET(REQ_RATIO_MASK, *freq); + *freq = decode_freq(*freq); + + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); +out: + xe_device_mem_access_put(gt_to_xe(gt)); + return ret; +} + +/** + * xe_guc_pc_get_rp0_freq - Get the RP0 freq + * @pc: The GuC PC + * + * Returns: RP0 freq. + */ +u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc) +{ + return pc->rp0_freq; +} + +/** + * xe_guc_pc_get_rpe_freq - Get the RPe freq + * @pc: The GuC PC + * + * Returns: RPe freq. + */ +u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); + + xe_device_mem_access_get(xe); + pc_update_rp_values(pc); + xe_device_mem_access_put(xe); + + return pc->rpe_freq; +} + +/** + * xe_guc_pc_get_rpn_freq - Get the RPn freq + * @pc: The GuC PC + * + * Returns: RPn freq. + */ +u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) +{ + return pc->rpn_freq; +} + +/** + * xe_guc_pc_get_min_freq - Get the min operational frequency + * @pc: The GuC PC + * @freq: A pointer to a u32 where the freq value will be returned + * + * Returns: 0 on success, + * -EAGAIN if GuC PC not ready (likely in middle of a reset). + */ +int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) +{ + struct xe_gt *gt = pc_to_gt(pc); + int ret; + + xe_device_mem_access_get(pc_to_xe(pc)); + mutex_lock(&pc->freq_lock); + if (!pc->freq_ready) { + /* Might be in the middle of a gt reset */ + ret = -EAGAIN; + goto out; + } + + /* + * GuC SLPC plays with min freq request when GuCRC is enabled + * Block RC6 for a more reliable read. + */ + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (ret) + goto out; + + ret = pc_action_query_task_state(pc); + if (ret) + goto fw; + + *freq = pc_get_min_freq(pc); + +fw: + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); +out: + mutex_unlock(&pc->freq_lock); + xe_device_mem_access_put(pc_to_xe(pc)); + return ret; +} + +/** + * xe_guc_pc_set_min_freq - Set the minimal operational frequency + * @pc: The GuC PC + * @freq: The selected minimal frequency + * + * Returns: 0 on success, + * -EAGAIN if GuC PC not ready (likely in middle of a reset), + * -EINVAL if value out of bounds. + */ +int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) +{ + int ret; + + xe_device_mem_access_get(pc_to_xe(pc)); + mutex_lock(&pc->freq_lock); + if (!pc->freq_ready) { + /* Might be in the middle of a gt reset */ + ret = -EAGAIN; + goto out; + } + + ret = pc_set_min_freq(pc, freq); + if (ret) + goto out; + + pc->user_requested_min = freq; + +out: + mutex_unlock(&pc->freq_lock); + xe_device_mem_access_put(pc_to_xe(pc)); + + return ret; +} + +/** + * xe_guc_pc_get_max_freq - Get Maximum operational frequency + * @pc: The GuC PC + * @freq: A pointer to a u32 where the freq value will be returned + * + * Returns: 0 on success, + * -EAGAIN if GuC PC not ready (likely in middle of a reset). + */ +int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) +{ + int ret; + + xe_device_mem_access_get(pc_to_xe(pc)); + mutex_lock(&pc->freq_lock); + if (!pc->freq_ready) { + /* Might be in the middle of a gt reset */ + ret = -EAGAIN; + goto out; + } + + ret = pc_action_query_task_state(pc); + if (ret) + goto out; + + *freq = pc_get_max_freq(pc); + +out: + mutex_unlock(&pc->freq_lock); + xe_device_mem_access_put(pc_to_xe(pc)); + return ret; +} + +/** + * xe_guc_pc_set_max_freq - Set the maximum operational frequency + * @pc: The GuC PC + * @freq: The selected maximum frequency value + * + * Returns: 0 on success, + * -EAGAIN if GuC PC not ready (likely in middle of a reset), + * -EINVAL if value out of bounds. + */ +int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) +{ + int ret; + + xe_device_mem_access_get(pc_to_xe(pc)); + mutex_lock(&pc->freq_lock); + if (!pc->freq_ready) { + /* Might be in the middle of a gt reset */ + ret = -EAGAIN; + goto out; + } + + ret = pc_set_max_freq(pc, freq); + if (ret) + goto out; + + pc->user_requested_max = freq; + +out: + mutex_unlock(&pc->freq_lock); + xe_device_mem_access_put(pc_to_xe(pc)); + return ret; +} + +/** + * xe_guc_pc_c_status - get the current GT C state + * @pc: XE_GuC_PC instance + */ +enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 reg, gt_c_state; + + xe_device_mem_access_get(gt_to_xe(gt)); + + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { + reg = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1); + gt_c_state = REG_FIELD_GET(MTL_CC_MASK, reg); + } else { + reg = xe_mmio_read32(gt, GT_CORE_STATUS); + gt_c_state = REG_FIELD_GET(RCN_MASK, reg); + } + + xe_device_mem_access_put(gt_to_xe(gt)); + + switch (gt_c_state) { + case GT_C6: + return GT_IDLE_C6; + case GT_C0: + return GT_IDLE_C0; + default: + return GT_IDLE_UNKNOWN; + } +} + +/** + * xe_guc_pc_rc6_residency - rc6 residency counter + * @pc: Xe_GuC_PC instance + */ +u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 reg; + + xe_device_mem_access_get(gt_to_xe(gt)); + reg = xe_mmio_read32(gt, GT_GFX_RC6); + xe_device_mem_access_put(gt_to_xe(gt)); + + return reg; +} + +/** + * xe_guc_pc_mc6_residency - mc6 residency counter + * @pc: Xe_GuC_PC instance + */ +u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u64 reg; + + xe_device_mem_access_get(gt_to_xe(gt)); + reg = xe_mmio_read32(gt, MTL_MEDIA_MC6); + xe_device_mem_access_put(gt_to_xe(gt)); + + return reg; +} + +static void mtl_init_fused_rp_values(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 reg; + + xe_device_assert_mem_access(pc_to_xe(pc)); + + if (xe_gt_is_media_type(gt)) + reg = xe_mmio_read32(gt, MTL_MEDIAP_STATE_CAP); + else + reg = xe_mmio_read32(gt, MTL_RP_STATE_CAP); + + pc->rp0_freq = decode_freq(REG_FIELD_GET(MTL_RP0_CAP_MASK, reg)); + + pc->rpn_freq = decode_freq(REG_FIELD_GET(MTL_RPN_CAP_MASK, reg)); +} + +static void tgl_init_fused_rp_values(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); + u32 reg; + + xe_device_assert_mem_access(pc_to_xe(pc)); + + if (xe->info.platform == XE_PVC) + reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP); + else + reg = xe_mmio_read32(gt, RP_STATE_CAP); + pc->rp0_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER; + pc->rpn_freq = REG_FIELD_GET(RPN_MASK, reg) * GT_FREQUENCY_MULTIPLIER; +} + +static void pc_init_fused_rp_values(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); + + if (GRAPHICS_VERx100(xe) >= 1270) + mtl_init_fused_rp_values(pc); + else + tgl_init_fused_rp_values(pc); +} + +/** + * xe_guc_pc_init_early - Initialize RPx values and request a higher GT + * frequency to allow faster GuC load times + * @pc: Xe_GuC_PC instance + */ +void xe_guc_pc_init_early(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + pc_init_fused_rp_values(pc); + pc_set_cur_freq(pc, pc->rp0_freq); +} + +static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) +{ + int ret; + + lockdep_assert_held(&pc->freq_lock); + + ret = pc_action_query_task_state(pc); + if (ret) + return ret; + + /* + * GuC defaults to some RPmax that is not actually achievable without + * overclocking. Let's adjust it to the Hardware RP0, which is the + * regular maximum + */ + if (pc_get_max_freq(pc) > pc->rp0_freq) + pc_set_max_freq(pc, pc->rp0_freq); + + /* + * Same thing happens for Server platforms where min is listed as + * RPMax + */ + if (pc_get_min_freq(pc) > pc->rp0_freq) + pc_set_min_freq(pc, pc->rp0_freq); + + return 0; +} + +static int pc_adjust_requested_freq(struct xe_guc_pc *pc) +{ + int ret = 0; + + lockdep_assert_held(&pc->freq_lock); + + if (pc->user_requested_min != 0) { + ret = pc_set_min_freq(pc, pc->user_requested_min); + if (ret) + return ret; + } + + if (pc->user_requested_max != 0) { + ret = pc_set_max_freq(pc, pc->user_requested_max); + if (ret) + return ret; + } + + return ret; +} + +/** + * xe_guc_pc_gucrc_disable - Disable GuC RC + * @pc: Xe_GuC_PC instance + * + * Disables GuC RC by taking control of RC6 back from GuC. + * + * Return: 0 on success, negative error code on error. + */ +int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) +{ + struct xe_device *xe = pc_to_xe(pc); + struct xe_gt *gt = pc_to_gt(pc); + int ret = 0; + + if (xe->info.skip_guc_pc) + return 0; + + xe_device_mem_access_get(pc_to_xe(pc)); + + ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL); + if (ret) + goto out; + + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (ret) + goto out; + + xe_gt_idle_disable_c6(gt); + + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + +out: + xe_device_mem_access_put(pc_to_xe(pc)); + return ret; +} + +static void pc_init_pcode_freq(struct xe_guc_pc *pc) +{ + u32 min = DIV_ROUND_CLOSEST(pc->rpn_freq, GT_FREQUENCY_MULTIPLIER); + u32 max = DIV_ROUND_CLOSEST(pc->rp0_freq, GT_FREQUENCY_MULTIPLIER); + + XE_WARN_ON(xe_pcode_init_min_freq_table(pc_to_gt(pc), min, max)); +} + +static int pc_init_freqs(struct xe_guc_pc *pc) +{ + int ret; + + mutex_lock(&pc->freq_lock); + + ret = pc_adjust_freq_bounds(pc); + if (ret) + goto out; + + ret = pc_adjust_requested_freq(pc); + if (ret) + goto out; + + pc_update_rp_values(pc); + + pc_init_pcode_freq(pc); + + /* + * The frequencies are really ready for use only after the user + * requested ones got restored. + */ + pc->freq_ready = true; + +out: + mutex_unlock(&pc->freq_lock); + return ret; +} + +/** + * xe_guc_pc_start - Start GuC's Power Conservation component + * @pc: Xe_GuC_PC instance + */ +int xe_guc_pc_start(struct xe_guc_pc *pc) +{ + struct xe_device *xe = pc_to_xe(pc); + struct xe_gt *gt = pc_to_gt(pc); + u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); + int ret; + + xe_gt_assert(gt, xe_device_uc_enabled(xe)); + + xe_device_mem_access_get(pc_to_xe(pc)); + + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (ret) + goto out_fail_force_wake; + + if (xe->info.skip_guc_pc) { + if (xe->info.platform != XE_PVC) + xe_gt_idle_enable_c6(gt); + + /* Request max possible since dynamic freq mgmt is not enabled */ + pc_set_cur_freq(pc, UINT_MAX); + + ret = 0; + goto out; + } + + memset(pc->bo->vmap.vaddr, 0, size); + slpc_shared_data_write(pc, header.size, size); + + ret = pc_action_reset(pc); + if (ret) + goto out; + + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) { + drm_err(&pc_to_xe(pc)->drm, "GuC PC Start failed\n"); + ret = -EIO; + goto out; + } + + ret = pc_init_freqs(pc); + if (ret) + goto out; + + if (xe->info.platform == XE_PVC) { + xe_guc_pc_gucrc_disable(pc); + ret = 0; + goto out; + } + + ret = pc_action_setup_gucrc(pc, XE_GUCRC_FIRMWARE_CONTROL); + +out: + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); +out_fail_force_wake: + xe_device_mem_access_put(pc_to_xe(pc)); + return ret; +} + +/** + * xe_guc_pc_stop - Stop GuC's Power Conservation component + * @pc: Xe_GuC_PC instance + */ +int xe_guc_pc_stop(struct xe_guc_pc *pc) +{ + struct xe_device *xe = pc_to_xe(pc); + int ret; + + xe_device_mem_access_get(pc_to_xe(pc)); + + if (xe->info.skip_guc_pc) { + xe_gt_idle_disable_c6(pc_to_gt(pc)); + ret = 0; + goto out; + } + + mutex_lock(&pc->freq_lock); + pc->freq_ready = false; + mutex_unlock(&pc->freq_lock); + + ret = pc_action_shutdown(pc); + if (ret) + goto out; + + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING)) { + drm_err(&pc_to_xe(pc)->drm, "GuC PC Shutdown failed\n"); + ret = -EIO; + } + +out: + xe_device_mem_access_put(pc_to_xe(pc)); + return ret; +} + +/** + * xe_guc_pc_fini - Finalize GuC's Power Conservation component + * @pc: Xe_GuC_PC instance + */ +void xe_guc_pc_fini(struct xe_guc_pc *pc) +{ + struct xe_device *xe = pc_to_xe(pc); + + if (xe->info.skip_guc_pc) { + xe_gt_idle_disable_c6(pc_to_gt(pc)); + return; + } + + XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); + XE_WARN_ON(xe_guc_pc_stop(pc)); + mutex_destroy(&pc->freq_lock); +} + +/** + * xe_guc_pc_init - Initialize GuC's Power Conservation component + * @pc: Xe_GuC_PC instance + */ +int xe_guc_pc_init(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = gt_to_xe(gt); + struct xe_bo *bo; + u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); + + if (xe->info.skip_guc_pc) + return 0; + + mutex_init(&pc->freq_lock); + + bo = xe_managed_bo_create_pin_map(xe, tile, size, + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + pc->bo = bo; + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h new file mode 100644 index 000000000000..cecad8e9300b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_PC_H_ +#define _XE_GUC_PC_H_ + +#include "xe_guc_pc_types.h" + +int xe_guc_pc_init(struct xe_guc_pc *pc); +void xe_guc_pc_fini(struct xe_guc_pc *pc); +int xe_guc_pc_start(struct xe_guc_pc *pc); +int xe_guc_pc_stop(struct xe_guc_pc *pc); +int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc); + +u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc); +int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq); +u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc); +u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc); +u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc); +int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq); +int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq); +int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq); +int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq); + +enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc); +u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc); +u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc); +void xe_guc_pc_init_early(struct xe_guc_pc *pc); +#endif /* _XE_GUC_PC_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h new file mode 100644 index 000000000000..2afd0dbc3542 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_PC_TYPES_H_ +#define _XE_GUC_PC_TYPES_H_ + +#include <linux/mutex.h> +#include <linux/types.h> + +/** + * struct xe_guc_pc - GuC Power Conservation (PC) + */ +struct xe_guc_pc { + /** @bo: GGTT buffer object that is shared with GuC PC */ + struct xe_bo *bo; + /** @rp0_freq: HW RP0 frequency - The Maximum one */ + u32 rp0_freq; + /** @rpe_freq: HW RPe frequency - The Efficient one */ + u32 rpe_freq; + /** @rpn_freq: HW RPN frequency - The Minimum one */ + u32 rpn_freq; + /** @user_requested_min: Stash the minimum requested freq by user */ + u32 user_requested_min; + /** @user_requested_max: Stash the maximum requested freq by user */ + u32 user_requested_max; + /** @freq_lock: Let's protect the frequencies */ + struct mutex freq_lock; + /** @freq_ready: Only handle freq changes, if they are really ready */ + bool freq_ready; +}; + +#endif /* _XE_GUC_PC_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c new file mode 100644 index 000000000000..21ac68e3246f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -0,0 +1,1990 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_guc_submit.h" + +#include <linux/bitfield.h> +#include <linux/bitmap.h> +#include <linux/circ_buf.h> +#include <linux/delay.h> +#include <linux/dma-fence-array.h> + +#include <drm/drm_managed.h> + +#include "abi/guc_actions_abi.h" +#include "abi/guc_klvs_abi.h" +#include "regs/xe_lrc_layout.h" +#include "xe_assert.h" +#include "xe_devcoredump.h" +#include "xe_device.h" +#include "xe_exec_queue.h" +#include "xe_force_wake.h" +#include "xe_gpu_scheduler.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_guc_exec_queue_types.h" +#include "xe_guc_submit_types.h" +#include "xe_hw_engine.h" +#include "xe_hw_fence.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_map.h" +#include "xe_mocs.h" +#include "xe_ring_ops_types.h" +#include "xe_sched_job.h" +#include "xe_trace.h" +#include "xe_vm.h" + +static struct xe_guc * +exec_queue_to_guc(struct xe_exec_queue *q) +{ + return &q->gt->uc.guc; +} + +/* + * Helpers for engine state, using an atomic as some of the bits can transition + * as the same time (e.g. a suspend can be happning at the same time as schedule + * engine done being processed). + */ +#define EXEC_QUEUE_STATE_REGISTERED (1 << 0) +#define ENGINE_STATE_ENABLED (1 << 1) +#define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) +#define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) +#define EXEC_QUEUE_STATE_DESTROYED (1 << 4) +#define ENGINE_STATE_SUSPENDED (1 << 5) +#define EXEC_QUEUE_STATE_RESET (1 << 6) +#define ENGINE_STATE_KILLED (1 << 7) + +static bool exec_queue_registered(struct xe_exec_queue *q) +{ + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; +} + +static void set_exec_queue_registered(struct xe_exec_queue *q) +{ + atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); +} + +static void clear_exec_queue_registered(struct xe_exec_queue *q) +{ + atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); +} + +static bool exec_queue_enabled(struct xe_exec_queue *q) +{ + return atomic_read(&q->guc->state) & ENGINE_STATE_ENABLED; +} + +static void set_exec_queue_enabled(struct xe_exec_queue *q) +{ + atomic_or(ENGINE_STATE_ENABLED, &q->guc->state); +} + +static void clear_exec_queue_enabled(struct xe_exec_queue *q) +{ + atomic_and(~ENGINE_STATE_ENABLED, &q->guc->state); +} + +static bool exec_queue_pending_enable(struct xe_exec_queue *q) +{ + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; +} + +static void set_exec_queue_pending_enable(struct xe_exec_queue *q) +{ + atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); +} + +static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) +{ + atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); +} + +static bool exec_queue_pending_disable(struct xe_exec_queue *q) +{ + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; +} + +static void set_exec_queue_pending_disable(struct xe_exec_queue *q) +{ + atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); +} + +static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) +{ + atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); +} + +static bool exec_queue_destroyed(struct xe_exec_queue *q) +{ + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; +} + +static void set_exec_queue_destroyed(struct xe_exec_queue *q) +{ + atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); +} + +static bool exec_queue_banned(struct xe_exec_queue *q) +{ + return (q->flags & EXEC_QUEUE_FLAG_BANNED); +} + +static void set_exec_queue_banned(struct xe_exec_queue *q) +{ + q->flags |= EXEC_QUEUE_FLAG_BANNED; +} + +static bool exec_queue_suspended(struct xe_exec_queue *q) +{ + return atomic_read(&q->guc->state) & ENGINE_STATE_SUSPENDED; +} + +static void set_exec_queue_suspended(struct xe_exec_queue *q) +{ + atomic_or(ENGINE_STATE_SUSPENDED, &q->guc->state); +} + +static void clear_exec_queue_suspended(struct xe_exec_queue *q) +{ + atomic_and(~ENGINE_STATE_SUSPENDED, &q->guc->state); +} + +static bool exec_queue_reset(struct xe_exec_queue *q) +{ + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; +} + +static void set_exec_queue_reset(struct xe_exec_queue *q) +{ + atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); +} + +static bool exec_queue_killed(struct xe_exec_queue *q) +{ + return atomic_read(&q->guc->state) & ENGINE_STATE_KILLED; +} + +static void set_exec_queue_killed(struct xe_exec_queue *q) +{ + atomic_or(ENGINE_STATE_KILLED, &q->guc->state); +} + +static bool exec_queue_killed_or_banned(struct xe_exec_queue *q) +{ + return exec_queue_killed(q) || exec_queue_banned(q); +} + +#ifdef CONFIG_PROVE_LOCKING +static int alloc_submit_wq(struct xe_guc *guc) +{ + int i; + + for (i = 0; i < NUM_SUBMIT_WQ; ++i) { + guc->submission_state.submit_wq_pool[i] = + alloc_ordered_workqueue("submit_wq", 0); + if (!guc->submission_state.submit_wq_pool[i]) + goto err_free; + } + + return 0; + +err_free: + while (i) + destroy_workqueue(guc->submission_state.submit_wq_pool[--i]); + + return -ENOMEM; +} + +static void free_submit_wq(struct xe_guc *guc) +{ + int i; + + for (i = 0; i < NUM_SUBMIT_WQ; ++i) + destroy_workqueue(guc->submission_state.submit_wq_pool[i]); +} + +static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) +{ + int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ; + + return guc->submission_state.submit_wq_pool[idx]; +} +#else +static int alloc_submit_wq(struct xe_guc *guc) +{ + return 0; +} + +static void free_submit_wq(struct xe_guc *guc) +{ + +} + +static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) +{ + return NULL; +} +#endif + +static void guc_submit_fini(struct drm_device *drm, void *arg) +{ + struct xe_guc *guc = arg; + + xa_destroy(&guc->submission_state.exec_queue_lookup); + ida_destroy(&guc->submission_state.guc_ids); + bitmap_free(guc->submission_state.guc_ids_bitmap); + free_submit_wq(guc); + mutex_destroy(&guc->submission_state.lock); +} + +#define GUC_ID_MAX 65535 +#define GUC_ID_NUMBER_MLRC 4096 +#define GUC_ID_NUMBER_SLRC (GUC_ID_MAX - GUC_ID_NUMBER_MLRC) +#define GUC_ID_START_MLRC GUC_ID_NUMBER_SLRC + +static const struct xe_exec_queue_ops guc_exec_queue_ops; + +static void primelockdep(struct xe_guc *guc) +{ + if (!IS_ENABLED(CONFIG_LOCKDEP)) + return; + + fs_reclaim_acquire(GFP_KERNEL); + + mutex_lock(&guc->submission_state.lock); + might_lock(&guc->submission_state.suspend.lock); + mutex_unlock(&guc->submission_state.lock); + + fs_reclaim_release(GFP_KERNEL); +} + +int xe_guc_submit_init(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int err; + + guc->submission_state.guc_ids_bitmap = + bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL); + if (!guc->submission_state.guc_ids_bitmap) + return -ENOMEM; + + err = alloc_submit_wq(guc); + if (err) { + bitmap_free(guc->submission_state.guc_ids_bitmap); + return err; + } + + gt->exec_queue_ops = &guc_exec_queue_ops; + + mutex_init(&guc->submission_state.lock); + xa_init(&guc->submission_state.exec_queue_lookup); + ida_init(&guc->submission_state.guc_ids); + + spin_lock_init(&guc->submission_state.suspend.lock); + guc->submission_state.suspend.context = dma_fence_context_alloc(1); + + primelockdep(guc); + + err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); + if (err) + return err; + + return 0; +} + +static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) +{ + int i; + + lockdep_assert_held(&guc->submission_state.lock); + + for (i = 0; i < xa_count; ++i) + xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); + + if (xe_exec_queue_is_parallel(q)) + bitmap_release_region(guc->submission_state.guc_ids_bitmap, + q->guc->id - GUC_ID_START_MLRC, + order_base_2(q->width)); + else + ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id); +} + +static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) +{ + int ret; + void *ptr; + int i; + + /* + * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, + * worse case user gets -ENOMEM on engine create and has to try again. + * + * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent + * failure. + */ + lockdep_assert_held(&guc->submission_state.lock); + + if (xe_exec_queue_is_parallel(q)) { + void *bitmap = guc->submission_state.guc_ids_bitmap; + + ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC, + order_base_2(q->width)); + } else { + ret = ida_simple_get(&guc->submission_state.guc_ids, 0, + GUC_ID_NUMBER_SLRC, GFP_NOWAIT); + } + if (ret < 0) + return ret; + + q->guc->id = ret; + if (xe_exec_queue_is_parallel(q)) + q->guc->id += GUC_ID_START_MLRC; + + for (i = 0; i < q->width; ++i) { + ptr = xa_store(&guc->submission_state.exec_queue_lookup, + q->guc->id + i, q, GFP_NOWAIT); + if (IS_ERR(ptr)) { + ret = PTR_ERR(ptr); + goto err_release; + } + } + + return 0; + +err_release: + __release_guc_id(guc, q, i); + + return ret; +} + +static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) +{ + mutex_lock(&guc->submission_state.lock); + __release_guc_id(guc, q, q->width); + mutex_unlock(&guc->submission_state.lock); +} + +struct exec_queue_policy { + u32 count; + struct guc_update_exec_queue_policy h2g; +}; + +static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) +{ + size_t bytes = sizeof(policy->h2g.header) + + (sizeof(policy->h2g.klv[0]) * policy->count); + + return bytes / sizeof(u32); +} + +static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, + u16 guc_id) +{ + policy->h2g.header.action = + XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; + policy->h2g.header.guc_id = guc_id; + policy->count = 0; +} + +#define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ +static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ + u32 data) \ +{ \ + XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ +\ + policy->h2g.klv[policy->count].kl = \ + FIELD_PREP(GUC_KLV_0_KEY, \ + GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ + FIELD_PREP(GUC_KLV_0_LEN, 1); \ + policy->h2g.klv[policy->count].value = data; \ + policy->count++; \ +} + +MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) +MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) +MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) +#undef MAKE_EXEC_QUEUE_POLICY_ADD + +static const int xe_exec_queue_prio_to_guc[] = { + [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, + [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, + [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, + [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, +}; + +static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) +{ + struct exec_queue_policy policy; + struct xe_device *xe = guc_to_xe(guc); + enum xe_exec_queue_priority prio = q->priority; + u32 timeslice_us = q->sched_props.timeslice_us; + u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; + + xe_assert(xe, exec_queue_registered(q)); + + __guc_exec_queue_policy_start_klv(&policy, q->guc->id); + __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); + __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); + __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); + + xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, + __guc_exec_queue_policy_action_size(&policy), 0, 0); +} + +static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) +{ + struct exec_queue_policy policy; + + __guc_exec_queue_policy_start_klv(&policy, q->guc->id); + __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); + + xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, + __guc_exec_queue_policy_action_size(&policy), 0, 0); +} + +#define parallel_read(xe_, map_, field_) \ + xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ + field_) +#define parallel_write(xe_, map_, field_, val_) \ + xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ + field_, val_) + +static void __register_mlrc_engine(struct xe_guc *guc, + struct xe_exec_queue *q, + struct guc_ctxt_registration_info *info) +{ +#define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) + struct xe_device *xe = guc_to_xe(guc); + u32 action[MAX_MLRC_REG_SIZE]; + int len = 0; + int i; + + xe_assert(xe, xe_exec_queue_is_parallel(q)); + + action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; + action[len++] = info->flags; + action[len++] = info->context_idx; + action[len++] = info->engine_class; + action[len++] = info->engine_submit_mask; + action[len++] = info->wq_desc_lo; + action[len++] = info->wq_desc_hi; + action[len++] = info->wq_base_lo; + action[len++] = info->wq_base_hi; + action[len++] = info->wq_size; + action[len++] = q->width; + action[len++] = info->hwlrca_lo; + action[len++] = info->hwlrca_hi; + + for (i = 1; i < q->width; ++i) { + struct xe_lrc *lrc = q->lrc + i; + + action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); + action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); + } + + xe_assert(xe, len <= MAX_MLRC_REG_SIZE); +#undef MAX_MLRC_REG_SIZE + + xe_guc_ct_send(&guc->ct, action, len, 0, 0); +} + +static void __register_engine(struct xe_guc *guc, + struct guc_ctxt_registration_info *info) +{ + u32 action[] = { + XE_GUC_ACTION_REGISTER_CONTEXT, + info->flags, + info->context_idx, + info->engine_class, + info->engine_submit_mask, + info->wq_desc_lo, + info->wq_desc_hi, + info->wq_base_lo, + info->wq_base_hi, + info->wq_size, + info->hwlrca_lo, + info->hwlrca_hi, + }; + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); +} + +static void register_engine(struct xe_exec_queue *q) +{ + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + struct xe_lrc *lrc = q->lrc; + struct guc_ctxt_registration_info info; + + xe_assert(xe, !exec_queue_registered(q)); + + memset(&info, 0, sizeof(info)); + info.context_idx = q->guc->id; + info.engine_class = xe_engine_class_to_guc_class(q->class); + info.engine_submit_mask = q->logical_mask; + info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); + info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); + info.flags = CONTEXT_REGISTRATION_FLAG_KMD; + + if (xe_exec_queue_is_parallel(q)) { + u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); + struct iosys_map map = xe_lrc_parallel_map(lrc); + + info.wq_desc_lo = lower_32_bits(ggtt_addr + + offsetof(struct guc_submit_parallel_scratch, wq_desc)); + info.wq_desc_hi = upper_32_bits(ggtt_addr + + offsetof(struct guc_submit_parallel_scratch, wq_desc)); + info.wq_base_lo = lower_32_bits(ggtt_addr + + offsetof(struct guc_submit_parallel_scratch, wq[0])); + info.wq_base_hi = upper_32_bits(ggtt_addr + + offsetof(struct guc_submit_parallel_scratch, wq[0])); + info.wq_size = WQ_SIZE; + + q->guc->wqi_head = 0; + q->guc->wqi_tail = 0; + xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); + parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); + } + + /* + * We must keep a reference for LR engines if engine is registered with + * the GuC as jobs signal immediately and can't destroy an engine if the + * GuC has a reference to it. + */ + if (xe_exec_queue_is_lr(q)) + xe_exec_queue_get(q); + + set_exec_queue_registered(q); + trace_xe_exec_queue_register(q); + if (xe_exec_queue_is_parallel(q)) + __register_mlrc_engine(guc, q, &info); + else + __register_engine(guc, &info); + init_policies(guc, q); +} + +static u32 wq_space_until_wrap(struct xe_exec_queue *q) +{ + return (WQ_SIZE - q->guc->wqi_tail); +} + +static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) +{ + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + struct iosys_map map = xe_lrc_parallel_map(q->lrc); + unsigned int sleep_period_ms = 1; + +#define AVAILABLE_SPACE \ + CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) + if (wqi_size > AVAILABLE_SPACE) { +try_again: + q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); + if (wqi_size > AVAILABLE_SPACE) { + if (sleep_period_ms == 1024) { + xe_gt_reset_async(q->gt); + return -ENODEV; + } + + msleep(sleep_period_ms); + sleep_period_ms <<= 1; + goto try_again; + } + } +#undef AVAILABLE_SPACE + + return 0; +} + +static int wq_noop_append(struct xe_exec_queue *q) +{ + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + struct iosys_map map = xe_lrc_parallel_map(q->lrc); + u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; + + if (wq_wait_for_space(q, wq_space_until_wrap(q))) + return -ENODEV; + + xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw)); + + parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], + FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | + FIELD_PREP(WQ_LEN_MASK, len_dw)); + q->guc->wqi_tail = 0; + + return 0; +} + +static void wq_item_append(struct xe_exec_queue *q) +{ + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + struct iosys_map map = xe_lrc_parallel_map(q->lrc); +#define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ + u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; + u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); + u32 len_dw = (wqi_size / sizeof(u32)) - 1; + int i = 0, j; + + if (wqi_size > wq_space_until_wrap(q)) { + if (wq_noop_append(q)) + return; + } + if (wq_wait_for_space(q, wqi_size)) + return; + + wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | + FIELD_PREP(WQ_LEN_MASK, len_dw); + wqi[i++] = xe_lrc_descriptor(q->lrc); + wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | + FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc->ring.tail / sizeof(u64)); + wqi[i++] = 0; + for (j = 1; j < q->width; ++j) { + struct xe_lrc *lrc = q->lrc + j; + + wqi[i++] = lrc->ring.tail / sizeof(u64); + } + + xe_assert(xe, i == wqi_size / sizeof(u32)); + + iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, + wq[q->guc->wqi_tail / sizeof(u32)])); + xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); + q->guc->wqi_tail += wqi_size; + xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE); + + xe_device_wmb(xe); + + map = xe_lrc_parallel_map(q->lrc); + parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); +} + +#define RESUME_PENDING ~0x0ull +static void submit_exec_queue(struct xe_exec_queue *q) +{ + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + struct xe_lrc *lrc = q->lrc; + u32 action[3]; + u32 g2h_len = 0; + u32 num_g2h = 0; + int len = 0; + bool extra_submit = false; + + xe_assert(xe, exec_queue_registered(q)); + + if (xe_exec_queue_is_parallel(q)) + wq_item_append(q); + else + xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); + + if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) + return; + + if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { + action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; + action[len++] = q->guc->id; + action[len++] = GUC_CONTEXT_ENABLE; + g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; + num_g2h = 1; + if (xe_exec_queue_is_parallel(q)) + extra_submit = true; + + q->guc->resume_time = RESUME_PENDING; + set_exec_queue_pending_enable(q); + set_exec_queue_enabled(q); + trace_xe_exec_queue_scheduling_enable(q); + } else { + action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; + action[len++] = q->guc->id; + trace_xe_exec_queue_submit(q); + } + + xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); + + if (extra_submit) { + len = 0; + action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; + action[len++] = q->guc->id; + trace_xe_exec_queue_submit(q); + + xe_guc_ct_send(&guc->ct, action, len, 0, 0); + } +} + +static struct dma_fence * +guc_exec_queue_run_job(struct drm_sched_job *drm_job) +{ + struct xe_sched_job *job = to_xe_sched_job(drm_job); + struct xe_exec_queue *q = job->q; + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + bool lr = xe_exec_queue_is_lr(q); + + xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || + exec_queue_banned(q) || exec_queue_suspended(q)); + + trace_xe_sched_job_run(job); + + if (!exec_queue_killed_or_banned(q) && !xe_sched_job_is_error(job)) { + if (!exec_queue_registered(q)) + register_engine(q); + if (!lr) /* LR jobs are emitted in the exec IOCTL */ + q->ring_ops->emit_job(job); + submit_exec_queue(q); + } + + if (lr) { + xe_sched_job_set_error(job, -EOPNOTSUPP); + return NULL; + } else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) { + return job->fence; + } else { + return dma_fence_get(job->fence); + } +} + +static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) +{ + struct xe_sched_job *job = to_xe_sched_job(drm_job); + + trace_xe_sched_job_free(job); + xe_sched_job_put(job); +} + +static int guc_read_stopped(struct xe_guc *guc) +{ + return atomic_read(&guc->submission_state.stopped); +} + +#define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ + u32 action[] = { \ + XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ + q->guc->id, \ + GUC_CONTEXT_##enable_disable, \ + } + +static void disable_scheduling_deregister(struct xe_guc *guc, + struct xe_exec_queue *q) +{ + MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); + struct xe_device *xe = guc_to_xe(guc); + int ret; + + set_min_preemption_timeout(guc, q); + smp_rmb(); + ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) || + guc_read_stopped(guc), HZ * 5); + if (!ret) { + struct xe_gpu_scheduler *sched = &q->guc->sched; + + drm_warn(&xe->drm, "Pending enable failed to respond"); + xe_sched_submission_start(sched); + xe_gt_reset_async(q->gt); + xe_sched_tdr_queue_imm(sched); + return; + } + + clear_exec_queue_enabled(q); + set_exec_queue_pending_disable(q); + set_exec_queue_destroyed(q); + trace_xe_exec_queue_scheduling_disable(q); + + /* + * Reserve space for both G2H here as the 2nd G2H is sent from a G2H + * handler and we are not allowed to reserved G2H space in handlers. + */ + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + + G2H_LEN_DW_DEREGISTER_CONTEXT, 2); +} + +static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p); + +#if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE) +static void simple_error_capture(struct xe_exec_queue *q) +{ + struct xe_guc *guc = exec_queue_to_guc(q); + struct drm_printer p = drm_err_printer(""); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + u32 adj_logical_mask = q->logical_mask; + u32 width_mask = (0x1 << q->width) - 1; + int i; + bool cookie; + + if (q->vm && !q->vm->error_capture.capture_once) { + q->vm->error_capture.capture_once = true; + cookie = dma_fence_begin_signalling(); + for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { + if (adj_logical_mask & BIT(i)) { + adj_logical_mask |= width_mask << i; + i += q->width; + } else { + ++i; + } + } + + xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); + xe_guc_ct_print(&guc->ct, &p, true); + guc_exec_queue_print(q, &p); + for_each_hw_engine(hwe, guc_to_gt(guc), id) { + if (hwe->class != q->hwe->class || + !(BIT(hwe->logical_instance) & adj_logical_mask)) + continue; + xe_hw_engine_print(hwe, &p); + } + xe_analyze_vm(&p, q->vm, q->gt->info.id); + xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); + dma_fence_end_signalling(cookie); + } +} +#else +static void simple_error_capture(struct xe_exec_queue *q) +{ +} +#endif + +static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) +{ + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + + /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ + wake_up_all(&xe->ufence_wq); + + if (xe_exec_queue_is_lr(q)) + queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr); + else + xe_sched_tdr_queue_imm(&q->guc->sched); +} + +static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) +{ + struct xe_guc_exec_queue *ge = + container_of(w, struct xe_guc_exec_queue, lr_tdr); + struct xe_exec_queue *q = ge->q; + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + struct xe_gpu_scheduler *sched = &ge->sched; + + xe_assert(xe, xe_exec_queue_is_lr(q)); + trace_xe_exec_queue_lr_cleanup(q); + + /* Kill the run_job / process_msg entry points */ + xe_sched_submission_stop(sched); + + /* + * Engine state now mostly stable, disable scheduling / deregister if + * needed. This cleanup routine might be called multiple times, where + * the actual async engine deregister drops the final engine ref. + * Calling disable_scheduling_deregister will mark the engine as + * destroyed and fire off the CT requests to disable scheduling / + * deregister, which we only want to do once. We also don't want to mark + * the engine as pending_disable again as this may race with the + * xe_guc_deregister_done_handler() which treats it as an unexpected + * state. + */ + if (exec_queue_registered(q) && !exec_queue_destroyed(q)) { + struct xe_guc *guc = exec_queue_to_guc(q); + int ret; + + set_exec_queue_banned(q); + disable_scheduling_deregister(guc, q); + + /* + * Must wait for scheduling to be disabled before signalling + * any fences, if GT broken the GT reset code should signal us. + */ + ret = wait_event_timeout(guc->ct.wq, + !exec_queue_pending_disable(q) || + guc_read_stopped(guc), HZ * 5); + if (!ret) { + drm_warn(&xe->drm, "Schedule disable failed to respond"); + xe_sched_submission_start(sched); + xe_gt_reset_async(q->gt); + return; + } + } + + xe_sched_submission_start(sched); +} + +static enum drm_gpu_sched_stat +guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) +{ + struct xe_sched_job *job = to_xe_sched_job(drm_job); + struct xe_sched_job *tmp_job; + struct xe_exec_queue *q = job->q; + struct xe_gpu_scheduler *sched = &q->guc->sched; + struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q)); + int err = -ETIME; + int i = 0; + + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL)); + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))); + + drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", + xe_sched_job_seqno(job), q->guc->id, q->flags); + simple_error_capture(q); + xe_devcoredump(q); + } else { + drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx", + xe_sched_job_seqno(job), q->guc->id, q->flags); + } + trace_xe_sched_job_timedout(job); + + /* Kill the run_job entry point */ + xe_sched_submission_stop(sched); + + /* + * Kernel jobs should never fail, nor should VM jobs if they do + * somethings has gone wrong and the GT needs a reset + */ + if (q->flags & EXEC_QUEUE_FLAG_KERNEL || + (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))) { + if (!xe_sched_invalidate_job(job, 2)) { + xe_sched_add_pending_job(sched, job); + xe_sched_submission_start(sched); + xe_gt_reset_async(q->gt); + goto out; + } + } + + /* Engine state now stable, disable scheduling if needed */ + if (exec_queue_registered(q)) { + struct xe_guc *guc = exec_queue_to_guc(q); + int ret; + + if (exec_queue_reset(q)) + err = -EIO; + set_exec_queue_banned(q); + if (!exec_queue_destroyed(q)) { + xe_exec_queue_get(q); + disable_scheduling_deregister(guc, q); + } + + /* + * Must wait for scheduling to be disabled before signalling + * any fences, if GT broken the GT reset code should signal us. + * + * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault + * error) messages which can cause the schedule disable to get + * lost. If this occurs, trigger a GT reset to recover. + */ + smp_rmb(); + ret = wait_event_timeout(guc->ct.wq, + !exec_queue_pending_disable(q) || + guc_read_stopped(guc), HZ * 5); + if (!ret || guc_read_stopped(guc)) { + drm_warn(&xe->drm, "Schedule disable failed to respond"); + xe_sched_add_pending_job(sched, job); + xe_sched_submission_start(sched); + xe_gt_reset_async(q->gt); + xe_sched_tdr_queue_imm(sched); + goto out; + } + } + + /* Stop fence signaling */ + xe_hw_fence_irq_stop(q->fence_irq); + + /* + * Fence state now stable, stop / start scheduler which cleans up any + * fences that are complete + */ + xe_sched_add_pending_job(sched, job); + xe_sched_submission_start(sched); + xe_guc_exec_queue_trigger_cleanup(q); + + /* Mark all outstanding jobs as bad, thus completing them */ + spin_lock(&sched->base.job_list_lock); + list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list) + xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED); + spin_unlock(&sched->base.job_list_lock); + + /* Start fence signaling */ + xe_hw_fence_irq_start(q->fence_irq); + +out: + return DRM_GPU_SCHED_STAT_NOMINAL; +} + +static void __guc_exec_queue_fini_async(struct work_struct *w) +{ + struct xe_guc_exec_queue *ge = + container_of(w, struct xe_guc_exec_queue, fini_async); + struct xe_exec_queue *q = ge->q; + struct xe_guc *guc = exec_queue_to_guc(q); + + trace_xe_exec_queue_destroy(q); + + if (xe_exec_queue_is_lr(q)) + cancel_work_sync(&ge->lr_tdr); + if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT) + xe_device_remove_persistent_exec_queues(gt_to_xe(q->gt), q); + release_guc_id(guc, q); + xe_sched_entity_fini(&ge->entity); + xe_sched_fini(&ge->sched); + + kfree(ge); + xe_exec_queue_fini(q); +} + +static void guc_exec_queue_fini_async(struct xe_exec_queue *q) +{ + INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); + + /* We must block on kernel engines so slabs are empty on driver unload */ + if (q->flags & EXEC_QUEUE_FLAG_PERMANENT) + __guc_exec_queue_fini_async(&q->guc->fini_async); + else + queue_work(system_wq, &q->guc->fini_async); +} + +static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) +{ + /* + * Might be done from within the GPU scheduler, need to do async as we + * fini the scheduler when the engine is fini'd, the scheduler can't + * complete fini within itself (circular dependency). Async resolves + * this we and don't really care when everything is fini'd, just that it + * is. + */ + guc_exec_queue_fini_async(q); +} + +static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) +{ + struct xe_exec_queue *q = msg->private_data; + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); + trace_xe_exec_queue_cleanup_entity(q); + + if (exec_queue_registered(q)) + disable_scheduling_deregister(guc, q); + else + __guc_exec_queue_fini(guc, q); +} + +static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) +{ + return !exec_queue_killed_or_banned(q) && exec_queue_registered(q); +} + +static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) +{ + struct xe_exec_queue *q = msg->private_data; + struct xe_guc *guc = exec_queue_to_guc(q); + + if (guc_exec_queue_allowed_to_change_state(q)) + init_policies(guc, q); + kfree(msg); +} + +static void suspend_fence_signal(struct xe_exec_queue *q) +{ + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + + xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) || + guc_read_stopped(guc)); + xe_assert(xe, q->guc->suspend_pending); + + q->guc->suspend_pending = false; + smp_wmb(); + wake_up(&q->guc->suspend_wait); +} + +static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) +{ + struct xe_exec_queue *q = msg->private_data; + struct xe_guc *guc = exec_queue_to_guc(q); + + if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && + exec_queue_enabled(q)) { + wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING || + guc_read_stopped(guc)); + + if (!guc_read_stopped(guc)) { + MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); + s64 since_resume_ms = + ktime_ms_delta(ktime_get(), + q->guc->resume_time); + s64 wait_ms = q->vm->preempt.min_run_period_ms - + since_resume_ms; + + if (wait_ms > 0 && q->guc->resume_time) + msleep(wait_ms); + + set_exec_queue_suspended(q); + clear_exec_queue_enabled(q); + set_exec_queue_pending_disable(q); + trace_xe_exec_queue_scheduling_disable(q); + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); + } + } else if (q->guc->suspend_pending) { + set_exec_queue_suspended(q); + suspend_fence_signal(q); + } +} + +static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) +{ + struct xe_exec_queue *q = msg->private_data; + struct xe_guc *guc = exec_queue_to_guc(q); + + if (guc_exec_queue_allowed_to_change_state(q)) { + MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); + + q->guc->resume_time = RESUME_PENDING; + clear_exec_queue_suspended(q); + set_exec_queue_pending_enable(q); + set_exec_queue_enabled(q); + trace_xe_exec_queue_scheduling_enable(q); + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); + } else { + clear_exec_queue_suspended(q); + } +} + +#define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ +#define SET_SCHED_PROPS 2 +#define SUSPEND 3 +#define RESUME 4 + +static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) +{ + trace_xe_sched_msg_recv(msg); + + switch (msg->opcode) { + case CLEANUP: + __guc_exec_queue_process_msg_cleanup(msg); + break; + case SET_SCHED_PROPS: + __guc_exec_queue_process_msg_set_sched_props(msg); + break; + case SUSPEND: + __guc_exec_queue_process_msg_suspend(msg); + break; + case RESUME: + __guc_exec_queue_process_msg_resume(msg); + break; + default: + XE_WARN_ON("Unknown message type"); + } +} + +static const struct drm_sched_backend_ops drm_sched_ops = { + .run_job = guc_exec_queue_run_job, + .free_job = guc_exec_queue_free_job, + .timedout_job = guc_exec_queue_timedout_job, +}; + +static const struct xe_sched_backend_ops xe_sched_ops = { + .process_msg = guc_exec_queue_process_msg, +}; + +static int guc_exec_queue_init(struct xe_exec_queue *q) +{ + struct xe_gpu_scheduler *sched; + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + struct xe_guc_exec_queue *ge; + long timeout; + int err; + + xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); + + ge = kzalloc(sizeof(*ge), GFP_KERNEL); + if (!ge) + return -ENOMEM; + + q->guc = ge; + ge->q = q; + init_waitqueue_head(&ge->suspend_wait); + + timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : + q->hwe->eclass->sched_props.job_timeout_ms; + err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, + get_submit_wq(guc), + q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64, + timeout, guc_to_gt(guc)->ordered_wq, NULL, + q->name, gt_to_xe(q->gt)->drm.dev); + if (err) + goto err_free; + + sched = &ge->sched; + err = xe_sched_entity_init(&ge->entity, sched); + if (err) + goto err_sched; + q->priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; + + if (xe_exec_queue_is_lr(q)) + INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup); + + mutex_lock(&guc->submission_state.lock); + + err = alloc_guc_id(guc, q); + if (err) + goto err_entity; + + q->entity = &ge->entity; + + if (guc_read_stopped(guc)) + xe_sched_stop(sched); + + mutex_unlock(&guc->submission_state.lock); + + xe_exec_queue_assign_name(q, q->guc->id); + + trace_xe_exec_queue_create(q); + + return 0; + +err_entity: + xe_sched_entity_fini(&ge->entity); +err_sched: + xe_sched_fini(&ge->sched); +err_free: + kfree(ge); + + return err; +} + +static void guc_exec_queue_kill(struct xe_exec_queue *q) +{ + trace_xe_exec_queue_kill(q); + set_exec_queue_killed(q); + xe_guc_exec_queue_trigger_cleanup(q); +} + +static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, + u32 opcode) +{ + INIT_LIST_HEAD(&msg->link); + msg->opcode = opcode; + msg->private_data = q; + + trace_xe_sched_msg_add(msg); + xe_sched_add_msg(&q->guc->sched, msg); +} + +#define STATIC_MSG_CLEANUP 0 +#define STATIC_MSG_SUSPEND 1 +#define STATIC_MSG_RESUME 2 +static void guc_exec_queue_fini(struct xe_exec_queue *q) +{ + struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; + + if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT)) + guc_exec_queue_add_msg(q, msg, CLEANUP); + else + __guc_exec_queue_fini(exec_queue_to_guc(q), q); +} + +static int guc_exec_queue_set_priority(struct xe_exec_queue *q, + enum xe_exec_queue_priority priority) +{ + struct xe_sched_msg *msg; + + if (q->priority == priority || exec_queue_killed_or_banned(q)) + return 0; + + msg = kmalloc(sizeof(*msg), GFP_KERNEL); + if (!msg) + return -ENOMEM; + + guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); + q->priority = priority; + + return 0; +} + +static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) +{ + struct xe_sched_msg *msg; + + if (q->sched_props.timeslice_us == timeslice_us || + exec_queue_killed_or_banned(q)) + return 0; + + msg = kmalloc(sizeof(*msg), GFP_KERNEL); + if (!msg) + return -ENOMEM; + + q->sched_props.timeslice_us = timeslice_us; + guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); + + return 0; +} + +static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, + u32 preempt_timeout_us) +{ + struct xe_sched_msg *msg; + + if (q->sched_props.preempt_timeout_us == preempt_timeout_us || + exec_queue_killed_or_banned(q)) + return 0; + + msg = kmalloc(sizeof(*msg), GFP_KERNEL); + if (!msg) + return -ENOMEM; + + q->sched_props.preempt_timeout_us = preempt_timeout_us; + guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); + + return 0; +} + +static int guc_exec_queue_set_job_timeout(struct xe_exec_queue *q, u32 job_timeout_ms) +{ + struct xe_gpu_scheduler *sched = &q->guc->sched; + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + + xe_assert(xe, !exec_queue_registered(q)); + xe_assert(xe, !exec_queue_banned(q)); + xe_assert(xe, !exec_queue_killed(q)); + + sched->base.timeout = job_timeout_ms; + + return 0; +} + +static int guc_exec_queue_suspend(struct xe_exec_queue *q) +{ + struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; + + if (exec_queue_killed_or_banned(q) || q->guc->suspend_pending) + return -EINVAL; + + q->guc->suspend_pending = true; + guc_exec_queue_add_msg(q, msg, SUSPEND); + + return 0; +} + +static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q) +{ + struct xe_guc *guc = exec_queue_to_guc(q); + + wait_event(q->guc->suspend_wait, !q->guc->suspend_pending || + guc_read_stopped(guc)); +} + +static void guc_exec_queue_resume(struct xe_exec_queue *q) +{ + struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + + xe_assert(xe, !q->guc->suspend_pending); + + guc_exec_queue_add_msg(q, msg, RESUME); +} + +static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) +{ + return exec_queue_reset(q); +} + +/* + * All of these functions are an abstraction layer which other parts of XE can + * use to trap into the GuC backend. All of these functions, aside from init, + * really shouldn't do much other than trap into the DRM scheduler which + * synchronizes these operations. + */ +static const struct xe_exec_queue_ops guc_exec_queue_ops = { + .init = guc_exec_queue_init, + .kill = guc_exec_queue_kill, + .fini = guc_exec_queue_fini, + .set_priority = guc_exec_queue_set_priority, + .set_timeslice = guc_exec_queue_set_timeslice, + .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, + .set_job_timeout = guc_exec_queue_set_job_timeout, + .suspend = guc_exec_queue_suspend, + .suspend_wait = guc_exec_queue_suspend_wait, + .resume = guc_exec_queue_resume, + .reset_status = guc_exec_queue_reset_status, +}; + +static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) +{ + struct xe_gpu_scheduler *sched = &q->guc->sched; + + /* Stop scheduling + flush any DRM scheduler operations */ + xe_sched_submission_stop(sched); + + /* Clean up lost G2H + reset engine state */ + if (exec_queue_registered(q)) { + if ((exec_queue_banned(q) && exec_queue_destroyed(q)) || + xe_exec_queue_is_lr(q)) + xe_exec_queue_put(q); + else if (exec_queue_destroyed(q)) + __guc_exec_queue_fini(guc, q); + } + if (q->guc->suspend_pending) { + set_exec_queue_suspended(q); + suspend_fence_signal(q); + } + atomic_and(EXEC_QUEUE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED, + &q->guc->state); + q->guc->resume_time = 0; + trace_xe_exec_queue_stop(q); + + /* + * Ban any engine (aside from kernel and engines used for VM ops) with a + * started but not complete job or if a job has gone through a GT reset + * more than twice. + */ + if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { + struct xe_sched_job *job = xe_sched_first_pending_job(sched); + + if (job) { + if ((xe_sched_job_started(job) && + !xe_sched_job_completed(job)) || + xe_sched_invalidate_job(job, 2)) { + trace_xe_sched_job_ban(job); + xe_sched_tdr_queue_imm(&q->guc->sched); + set_exec_queue_banned(q); + } + } + } +} + +int xe_guc_submit_reset_prepare(struct xe_guc *guc) +{ + int ret; + + /* + * Using an atomic here rather than submission_state.lock as this + * function can be called while holding the CT lock (engine reset + * failure). submission_state.lock needs the CT lock to resubmit jobs. + * Atomic is not ideal, but it works to prevent against concurrent reset + * and releasing any TDRs waiting on guc->submission_state.stopped. + */ + ret = atomic_fetch_or(1, &guc->submission_state.stopped); + smp_wmb(); + wake_up_all(&guc->ct.wq); + + return ret; +} + +void xe_guc_submit_reset_wait(struct xe_guc *guc) +{ + wait_event(guc->ct.wq, !guc_read_stopped(guc)); +} + +int xe_guc_submit_stop(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + struct xe_device *xe = guc_to_xe(guc); + + xe_assert(xe, guc_read_stopped(guc) == 1); + + mutex_lock(&guc->submission_state.lock); + + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + guc_exec_queue_stop(guc, q); + + mutex_unlock(&guc->submission_state.lock); + + /* + * No one can enter the backend at this point, aside from new engine + * creation which is protected by guc->submission_state.lock. + */ + + return 0; +} + +static void guc_exec_queue_start(struct xe_exec_queue *q) +{ + struct xe_gpu_scheduler *sched = &q->guc->sched; + + if (!exec_queue_killed_or_banned(q)) { + int i; + + trace_xe_exec_queue_resubmit(q); + for (i = 0; i < q->width; ++i) + xe_lrc_set_ring_head(q->lrc + i, q->lrc[i].ring.tail); + xe_sched_resubmit_jobs(sched); + } + + xe_sched_submission_start(sched); +} + +int xe_guc_submit_start(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + struct xe_device *xe = guc_to_xe(guc); + + xe_assert(xe, guc_read_stopped(guc) == 1); + + mutex_lock(&guc->submission_state.lock); + atomic_dec(&guc->submission_state.stopped); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + guc_exec_queue_start(q); + mutex_unlock(&guc->submission_state.lock); + + wake_up_all(&guc->ct.wq); + + return 0; +} + +static struct xe_exec_queue * +g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_exec_queue *q; + + if (unlikely(guc_id >= GUC_ID_MAX)) { + drm_err(&xe->drm, "Invalid guc_id %u", guc_id); + return NULL; + } + + q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); + if (unlikely(!q)) { + drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); + return NULL; + } + + xe_assert(xe, guc_id >= q->guc->id); + xe_assert(xe, guc_id < (q->guc->id + q->width)); + + return q; +} + +static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) +{ + u32 action[] = { + XE_GUC_ACTION_DEREGISTER_CONTEXT, + q->guc->id, + }; + + trace_xe_exec_queue_deregister(q); + + xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); +} + +int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_exec_queue *q; + u32 guc_id = msg[0]; + + if (unlikely(len < 2)) { + drm_err(&xe->drm, "Invalid length %u", len); + return -EPROTO; + } + + q = g2h_exec_queue_lookup(guc, guc_id); + if (unlikely(!q)) + return -EPROTO; + + if (unlikely(!exec_queue_pending_enable(q) && + !exec_queue_pending_disable(q))) { + drm_err(&xe->drm, "Unexpected engine state 0x%04x", + atomic_read(&q->guc->state)); + return -EPROTO; + } + + trace_xe_exec_queue_scheduling_done(q); + + if (exec_queue_pending_enable(q)) { + q->guc->resume_time = ktime_get(); + clear_exec_queue_pending_enable(q); + smp_wmb(); + wake_up_all(&guc->ct.wq); + } else { + clear_exec_queue_pending_disable(q); + if (q->guc->suspend_pending) { + suspend_fence_signal(q); + } else { + if (exec_queue_banned(q)) { + smp_wmb(); + wake_up_all(&guc->ct.wq); + } + deregister_exec_queue(guc, q); + } + } + + return 0; +} + +int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_exec_queue *q; + u32 guc_id = msg[0]; + + if (unlikely(len < 1)) { + drm_err(&xe->drm, "Invalid length %u", len); + return -EPROTO; + } + + q = g2h_exec_queue_lookup(guc, guc_id); + if (unlikely(!q)) + return -EPROTO; + + if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || + exec_queue_pending_enable(q) || exec_queue_enabled(q)) { + drm_err(&xe->drm, "Unexpected engine state 0x%04x", + atomic_read(&q->guc->state)); + return -EPROTO; + } + + trace_xe_exec_queue_deregister_done(q); + + clear_exec_queue_registered(q); + + if (exec_queue_banned(q) || xe_exec_queue_is_lr(q)) + xe_exec_queue_put(q); + else + __guc_exec_queue_fini(guc, q); + + return 0; +} + +int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_exec_queue *q; + u32 guc_id = msg[0]; + + if (unlikely(len < 1)) { + drm_err(&xe->drm, "Invalid length %u", len); + return -EPROTO; + } + + q = g2h_exec_queue_lookup(guc, guc_id); + if (unlikely(!q)) + return -EPROTO; + + drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id); + + /* FIXME: Do error capture, most likely async */ + + trace_xe_exec_queue_reset(q); + + /* + * A banned engine is a NOP at this point (came from + * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel + * jobs by setting timeout of the job to the minimum value kicking + * guc_exec_queue_timedout_job. + */ + set_exec_queue_reset(q); + if (!exec_queue_banned(q)) + xe_guc_exec_queue_trigger_cleanup(q); + + return 0; +} + +int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, + u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_exec_queue *q; + u32 guc_id = msg[0]; + + if (unlikely(len < 1)) { + drm_err(&xe->drm, "Invalid length %u", len); + return -EPROTO; + } + + q = g2h_exec_queue_lookup(guc, guc_id); + if (unlikely(!q)) + return -EPROTO; + + drm_dbg(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id); + trace_xe_exec_queue_memory_cat_error(q); + + /* Treat the same as engine reset */ + set_exec_queue_reset(q); + if (!exec_queue_banned(q)) + xe_guc_exec_queue_trigger_cleanup(q); + + return 0; +} + +int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + u8 guc_class, instance; + u32 reason; + + if (unlikely(len != 3)) { + drm_err(&xe->drm, "Invalid length %u", len); + return -EPROTO; + } + + guc_class = msg[0]; + instance = msg[1]; + reason = msg[2]; + + /* Unexpected failure of a hardware feature, log an actual error */ + drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", + guc_class, instance, reason); + + xe_gt_reset_async(guc_to_gt(guc)); + + return 0; +} + +static void +guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, + struct xe_guc_submit_exec_queue_snapshot *snapshot) +{ + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + struct iosys_map map = xe_lrc_parallel_map(q->lrc); + int i; + + snapshot->guc.wqi_head = q->guc->wqi_head; + snapshot->guc.wqi_tail = q->guc->wqi_tail; + snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); + snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); + snapshot->parallel.wq_desc.status = parallel_read(xe, map, + wq_desc.wq_status); + + if (snapshot->parallel.wq_desc.head != + snapshot->parallel.wq_desc.tail) { + for (i = snapshot->parallel.wq_desc.head; + i != snapshot->parallel.wq_desc.tail; + i = (i + sizeof(u32)) % WQ_SIZE) + snapshot->parallel.wq[i / sizeof(u32)] = + parallel_read(xe, map, wq[i / sizeof(u32)]); + } +} + +static void +guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, + struct drm_printer *p) +{ + int i; + + drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", + snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); + drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", + snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); + drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); + + if (snapshot->parallel.wq_desc.head != + snapshot->parallel.wq_desc.tail) { + for (i = snapshot->parallel.wq_desc.head; + i != snapshot->parallel.wq_desc.tail; + i = (i + sizeof(u32)) % WQ_SIZE) + drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), + snapshot->parallel.wq[i / sizeof(u32)]); + } +} + +/** + * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. + * @q: Xe exec queue. + * + * This can be printed out in a later stage like during dev_coredump + * analysis. + * + * Returns: a GuC Submit Engine snapshot object that must be freed by the + * caller, using `xe_guc_exec_queue_snapshot_free`. + */ +struct xe_guc_submit_exec_queue_snapshot * +xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) +{ + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + struct xe_gpu_scheduler *sched = &q->guc->sched; + struct xe_sched_job *job; + struct xe_guc_submit_exec_queue_snapshot *snapshot; + int i; + + snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); + + if (!snapshot) { + drm_err(&xe->drm, "Skipping GuC Engine snapshot entirely.\n"); + return NULL; + } + + snapshot->guc.id = q->guc->id; + memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); + snapshot->class = q->class; + snapshot->logical_mask = q->logical_mask; + snapshot->width = q->width; + snapshot->refcount = kref_read(&q->refcount); + snapshot->sched_timeout = sched->base.timeout; + snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; + snapshot->sched_props.preempt_timeout_us = + q->sched_props.preempt_timeout_us; + + snapshot->lrc = kmalloc_array(q->width, sizeof(struct lrc_snapshot), + GFP_ATOMIC); + + if (!snapshot->lrc) { + drm_err(&xe->drm, "Skipping GuC Engine LRC snapshot.\n"); + } else { + for (i = 0; i < q->width; ++i) { + struct xe_lrc *lrc = q->lrc + i; + + snapshot->lrc[i].context_desc = + lower_32_bits(xe_lrc_ggtt_addr(lrc)); + snapshot->lrc[i].head = xe_lrc_ring_head(lrc); + snapshot->lrc[i].tail.internal = lrc->ring.tail; + snapshot->lrc[i].tail.memory = + xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL); + snapshot->lrc[i].start_seqno = xe_lrc_start_seqno(lrc); + snapshot->lrc[i].seqno = xe_lrc_seqno(lrc); + } + } + + snapshot->schedule_state = atomic_read(&q->guc->state); + snapshot->exec_queue_flags = q->flags; + + snapshot->parallel_execution = xe_exec_queue_is_parallel(q); + if (snapshot->parallel_execution) + guc_exec_queue_wq_snapshot_capture(q, snapshot); + + spin_lock(&sched->base.job_list_lock); + snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list); + snapshot->pending_list = kmalloc_array(snapshot->pending_list_size, + sizeof(struct pending_list_snapshot), + GFP_ATOMIC); + + if (!snapshot->pending_list) { + drm_err(&xe->drm, "Skipping GuC Engine pending_list snapshot.\n"); + } else { + i = 0; + list_for_each_entry(job, &sched->base.pending_list, drm.list) { + snapshot->pending_list[i].seqno = + xe_sched_job_seqno(job); + snapshot->pending_list[i].fence = + dma_fence_is_signaled(job->fence) ? 1 : 0; + snapshot->pending_list[i].finished = + dma_fence_is_signaled(&job->drm.s_fence->finished) + ? 1 : 0; + i++; + } + } + + spin_unlock(&sched->base.job_list_lock); + + return snapshot; +} + +/** + * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. + * @snapshot: GuC Submit Engine snapshot object. + * @p: drm_printer where it will be printed out. + * + * This function prints out a given GuC Submit Engine snapshot object. + */ +void +xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, + struct drm_printer *p) +{ + int i; + + if (!snapshot) + return; + + drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id); + drm_printf(p, "\tName: %s\n", snapshot->name); + drm_printf(p, "\tClass: %d\n", snapshot->class); + drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); + drm_printf(p, "\tWidth: %d\n", snapshot->width); + drm_printf(p, "\tRef: %d\n", snapshot->refcount); + drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); + drm_printf(p, "\tTimeslice: %u (us)\n", + snapshot->sched_props.timeslice_us); + drm_printf(p, "\tPreempt timeout: %u (us)\n", + snapshot->sched_props.preempt_timeout_us); + + for (i = 0; snapshot->lrc && i < snapshot->width; ++i) { + drm_printf(p, "\tHW Context Desc: 0x%08x\n", + snapshot->lrc[i].context_desc); + drm_printf(p, "\tLRC Head: (memory) %u\n", + snapshot->lrc[i].head); + drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", + snapshot->lrc[i].tail.internal, + snapshot->lrc[i].tail.memory); + drm_printf(p, "\tStart seqno: (memory) %d\n", + snapshot->lrc[i].start_seqno); + drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->lrc[i].seqno); + } + drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); + drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); + + if (snapshot->parallel_execution) + guc_exec_queue_wq_snapshot_print(snapshot, p); + + for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size; + i++) + drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", + snapshot->pending_list[i].seqno, + snapshot->pending_list[i].fence, + snapshot->pending_list[i].finished); +} + +/** + * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given + * snapshot. + * @snapshot: GuC Submit Engine snapshot object. + * + * This function free all the memory that needed to be allocated at capture + * time. + */ +void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) +{ + if (!snapshot) + return; + + kfree(snapshot->lrc); + kfree(snapshot->pending_list); + kfree(snapshot); +} + +static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) +{ + struct xe_guc_submit_exec_queue_snapshot *snapshot; + + snapshot = xe_guc_exec_queue_snapshot_capture(q); + xe_guc_exec_queue_snapshot_print(snapshot, p); + xe_guc_exec_queue_snapshot_free(snapshot); +} + +/** + * xe_guc_submit_print - GuC Submit Print. + * @guc: GuC. + * @p: drm_printer where it will be printed out. + * + * This function capture and prints snapshots of **all** GuC Engines. + */ +void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) +{ + struct xe_exec_queue *q; + unsigned long index; + + if (!xe_device_uc_enabled(guc_to_xe(guc))) + return; + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + guc_exec_queue_print(q, p); + mutex_unlock(&guc->submission_state.lock); +} diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h new file mode 100644 index 000000000000..fc97869c5b86 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_SUBMIT_H_ +#define _XE_GUC_SUBMIT_H_ + +#include <linux/types.h> + +struct drm_printer; +struct xe_exec_queue; +struct xe_guc; + +int xe_guc_submit_init(struct xe_guc *guc); + +int xe_guc_submit_reset_prepare(struct xe_guc *guc); +void xe_guc_submit_reset_wait(struct xe_guc *guc); +int xe_guc_submit_stop(struct xe_guc *guc); +int xe_guc_submit_start(struct xe_guc *guc); + +int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len); +int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len); +int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len); +int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, + u32 len); +int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len); + +struct xe_guc_submit_exec_queue_snapshot * +xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q); +void +xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, + struct drm_printer *p); +void +xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot); +void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h new file mode 100644 index 000000000000..649b0a852692 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h @@ -0,0 +1,155 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GUC_SUBMIT_TYPES_H_ +#define _XE_GUC_SUBMIT_TYPES_H_ + +#include "xe_hw_engine_types.h" + +/* Work item for submitting workloads into work queue of GuC. */ +#define WQ_STATUS_ACTIVE 1 +#define WQ_STATUS_SUSPENDED 2 +#define WQ_STATUS_CMD_ERROR 3 +#define WQ_STATUS_ENGINE_ID_NOT_USED 4 +#define WQ_STATUS_SUSPENDED_FROM_RESET 5 +#define WQ_TYPE_NOOP 0x4 +#define WQ_TYPE_MULTI_LRC 0x5 +#define WQ_TYPE_MASK GENMASK(7, 0) +#define WQ_LEN_MASK GENMASK(26, 16) + +#define WQ_GUC_ID_MASK GENMASK(15, 0) +#define WQ_RING_TAIL_MASK GENMASK(28, 18) + +#define PARALLEL_SCRATCH_SIZE 2048 +#define WQ_SIZE (PARALLEL_SCRATCH_SIZE / 2) +#define WQ_OFFSET (PARALLEL_SCRATCH_SIZE - WQ_SIZE) +#define CACHELINE_BYTES 64 + +struct guc_sched_wq_desc { + u32 head; + u32 tail; + u32 error_offset; + u32 wq_status; + u32 reserved[28]; +} __packed; + +struct sync_semaphore { + u32 semaphore; + u8 unused[CACHELINE_BYTES - sizeof(u32)]; +}; + +/** + * struct guc_submit_parallel_scratch - A scratch shared mapped buffer. + */ +struct guc_submit_parallel_scratch { + /** @wq_desc: Guc scheduler workqueue descriptor */ + struct guc_sched_wq_desc wq_desc; + + /** @go: Go Semaphore */ + struct sync_semaphore go; + /** @join: Joined semaphore for the relevant hw engine instances */ + struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE]; + + /** @unused: Unused/Reserved memory space */ + u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) - + sizeof(struct sync_semaphore) * + (XE_HW_ENGINE_MAX_INSTANCE + 1)]; + + /** @wq: Workqueue info */ + u32 wq[WQ_SIZE / sizeof(u32)]; +}; + +struct lrc_snapshot { + u32 context_desc; + u32 head; + struct { + u32 internal; + u32 memory; + } tail; + u32 start_seqno; + u32 seqno; +}; + +struct pending_list_snapshot { + u32 seqno; + bool fence; + bool finished; +}; + +/** + * struct xe_guc_submit_exec_queue_snapshot - Snapshot for devcoredump + */ +struct xe_guc_submit_exec_queue_snapshot { + /** @name: name of this exec queue */ + char name[MAX_FENCE_NAME_LEN]; + /** @class: class of this exec queue */ + enum xe_engine_class class; + /** + * @logical_mask: logical mask of where job submitted to exec queue can run + */ + u32 logical_mask; + /** @width: width (number BB submitted per exec) of this exec queue */ + u16 width; + /** @refcount: ref count of this exec queue */ + u32 refcount; + /** + * @sched_timeout: the time after which a job is removed from the + * scheduler. + */ + long sched_timeout; + + /** @sched_props: scheduling properties */ + struct { + /** @timeslice_us: timeslice period in micro-seconds */ + u32 timeslice_us; + /** @preempt_timeout_us: preemption timeout in micro-seconds */ + u32 preempt_timeout_us; + } sched_props; + + /** @lrc: LRC Snapshot */ + struct lrc_snapshot *lrc; + + /** @schedule_state: Schedule State at the moment of Crash */ + u32 schedule_state; + /** @exec_queue_flags: Flags of the faulty exec_queue */ + unsigned long exec_queue_flags; + + /** @guc: GuC Engine Snapshot */ + struct { + /** @wqi_head: work queue item head */ + u32 wqi_head; + /** @wqi_tail: work queue item tail */ + u32 wqi_tail; + /** @id: GuC id for this exec_queue */ + u16 id; + } guc; + + /** + * @parallel_execution: Indication if the failure was during parallel + * execution + */ + bool parallel_execution; + /** @parallel: snapshot of the useful parallel scratch */ + struct { + /** @wq_desc: Workqueue description */ + struct { + /** @head: Workqueue Head */ + u32 head; + /** @tail: Workqueue Tail */ + u32 tail; + /** @status: Workqueue Status */ + u32 status; + } wq_desc; + /** @wq: Workqueue Items */ + u32 wq[WQ_SIZE / sizeof(u32)]; + } parallel; + + /** @pending_list_size: Size of the pending list snapshot array */ + int pending_list_size; + /** @pending_list: snapshot of the pending list info */ + struct pending_list_snapshot *pending_list; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h new file mode 100644 index 000000000000..cd80802e8918 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_TYPES_H_ +#define _XE_GUC_TYPES_H_ + +#include <linux/idr.h> +#include <linux/xarray.h> + +#include "regs/xe_reg_defs.h" +#include "xe_guc_ads_types.h" +#include "xe_guc_ct_types.h" +#include "xe_guc_fwif.h" +#include "xe_guc_log_types.h" +#include "xe_guc_pc_types.h" +#include "xe_uc_fw_types.h" + +/** + * struct xe_guc - Graphic micro controller + */ +struct xe_guc { + /** @fw: Generic uC firmware management */ + struct xe_uc_fw fw; + /** @log: GuC log */ + struct xe_guc_log log; + /** @ads: GuC ads */ + struct xe_guc_ads ads; + /** @ct: GuC ct */ + struct xe_guc_ct ct; + /** @pc: GuC Power Conservation */ + struct xe_guc_pc pc; + /** @submission_state: GuC submission state */ + struct { + /** @exec_queue_lookup: Lookup an xe_engine from guc_id */ + struct xarray exec_queue_lookup; + /** @guc_ids: used to allocate new guc_ids, single-lrc */ + struct ida guc_ids; + /** @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */ + unsigned long *guc_ids_bitmap; + /** @stopped: submissions are stopped */ + atomic_t stopped; + /** @lock: protects submission state */ + struct mutex lock; + /** @suspend: suspend fence state */ + struct { + /** @lock: suspend fences lock */ + spinlock_t lock; + /** @context: suspend fences context */ + u64 context; + /** @seqno: suspend fences seqno */ + u32 seqno; + } suspend; +#ifdef CONFIG_PROVE_LOCKING +#define NUM_SUBMIT_WQ 256 + /** @submit_wq_pool: submission ordered workqueues pool */ + struct workqueue_struct *submit_wq_pool[NUM_SUBMIT_WQ]; + /** @submit_wq_idx: submission ordered workqueue index */ + int submit_wq_idx; +#endif + /** @enabled: submission is enabled */ + bool enabled; + } submission_state; + /** @hwconfig: Hardware config state */ + struct { + /** @bo: buffer object of the hardware config */ + struct xe_bo *bo; + /** @size: size of the hardware config */ + u32 size; + } hwconfig; + + /** + * @notify_reg: Register which is written to notify GuC of H2G messages + */ + struct xe_reg notify_reg; + /** @params: Control params for fw initialization */ + u32 params[GUC_CTL_MAX_DWORDS]; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c new file mode 100644 index 000000000000..bfdd33b9b23b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_heci_gsc.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright(c) 2023, Intel Corporation. All rights reserved. + */ + +#include <linux/irq.h> +#include <linux/mei_aux.h> +#include <linux/pci.h> +#include <linux/sizes.h> + +#include "xe_device_types.h" +#include "xe_drv.h" +#include "xe_heci_gsc.h" +#include "xe_platform_types.h" + +#define GSC_BAR_LENGTH 0x00000FFC + +#define DG1_GSC_HECI2_BASE 0x259000 +#define PVC_GSC_HECI2_BASE 0x285000 +#define DG2_GSC_HECI2_BASE 0x374000 + +static void heci_gsc_irq_mask(struct irq_data *d) +{ + /* generic irq handling */ +} + +static void heci_gsc_irq_unmask(struct irq_data *d) +{ + /* generic irq handling */ +} + +static struct irq_chip heci_gsc_irq_chip = { + .name = "gsc_irq_chip", + .irq_mask = heci_gsc_irq_mask, + .irq_unmask = heci_gsc_irq_unmask, +}; + +static int heci_gsc_irq_init(int irq) +{ + irq_set_chip_and_handler_name(irq, &heci_gsc_irq_chip, + handle_simple_irq, "heci_gsc_irq_handler"); + + return irq_set_chip_data(irq, NULL); +} + +/** + * struct heci_gsc_def - graphics security controller heci interface definitions + * + * @name: name of the heci device + * @bar: address of the mmio bar + * @bar_size: size of the mmio bar + * @use_polling: indication of using polling mode for the device + * @slow_firmware: indication of whether the device is slow (needs longer timeouts) + */ +struct heci_gsc_def { + const char *name; + unsigned long bar; + size_t bar_size; + bool use_polling; + bool slow_firmware; +}; + +/* gsc resources and definitions */ +static const struct heci_gsc_def heci_gsc_def_dg1 = { + .name = "mei-gscfi", + .bar = DG1_GSC_HECI2_BASE, + .bar_size = GSC_BAR_LENGTH, +}; + +static const struct heci_gsc_def heci_gsc_def_dg2 = { + .name = "mei-gscfi", + .bar = DG2_GSC_HECI2_BASE, + .bar_size = GSC_BAR_LENGTH, +}; + +static const struct heci_gsc_def heci_gsc_def_pvc = { + .name = "mei-gscfi", + .bar = PVC_GSC_HECI2_BASE, + .bar_size = GSC_BAR_LENGTH, + .slow_firmware = true, +}; + +static void heci_gsc_release_dev(struct device *dev) +{ + struct auxiliary_device *aux_dev = to_auxiliary_dev(dev); + struct mei_aux_device *adev = auxiliary_dev_to_mei_aux_dev(aux_dev); + + kfree(adev); +} + +void xe_heci_gsc_fini(struct xe_device *xe) +{ + struct xe_heci_gsc *heci_gsc = &xe->heci_gsc; + + if (!HAS_HECI_GSCFI(xe)) + return; + + if (heci_gsc->adev) { + struct auxiliary_device *aux_dev = &heci_gsc->adev->aux_dev; + + auxiliary_device_delete(aux_dev); + auxiliary_device_uninit(aux_dev); + heci_gsc->adev = NULL; + } + + if (heci_gsc->irq >= 0) + irq_free_desc(heci_gsc->irq); + heci_gsc->irq = -1; +} + +static int heci_gsc_irq_setup(struct xe_device *xe) +{ + struct xe_heci_gsc *heci_gsc = &xe->heci_gsc; + int ret; + + heci_gsc->irq = irq_alloc_desc(0); + if (heci_gsc->irq < 0) { + drm_err(&xe->drm, "gsc irq error %d\n", heci_gsc->irq); + return heci_gsc->irq; + } + + ret = heci_gsc_irq_init(heci_gsc->irq); + if (ret < 0) + drm_err(&xe->drm, "gsc irq init failed %d\n", ret); + + return ret; +} + +static int heci_gsc_add_device(struct xe_device *xe, const struct heci_gsc_def *def) +{ + struct xe_heci_gsc *heci_gsc = &xe->heci_gsc; + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct auxiliary_device *aux_dev; + struct mei_aux_device *adev; + int ret; + + adev = kzalloc(sizeof(*adev), GFP_KERNEL); + if (!adev) + return -ENOMEM; + adev->irq = heci_gsc->irq; + adev->bar.parent = &pdev->resource[0]; + adev->bar.start = def->bar + pdev->resource[0].start; + adev->bar.end = adev->bar.start + def->bar_size - 1; + adev->bar.flags = IORESOURCE_MEM; + adev->bar.desc = IORES_DESC_NONE; + adev->slow_firmware = def->slow_firmware; + + aux_dev = &adev->aux_dev; + aux_dev->name = def->name; + aux_dev->id = (pci_domain_nr(pdev->bus) << 16) | + PCI_DEVID(pdev->bus->number, pdev->devfn); + aux_dev->dev.parent = &pdev->dev; + aux_dev->dev.release = heci_gsc_release_dev; + + ret = auxiliary_device_init(aux_dev); + if (ret < 0) { + drm_err(&xe->drm, "gsc aux init failed %d\n", ret); + kfree(adev); + return ret; + } + + heci_gsc->adev = adev; /* needed by the notifier */ + ret = auxiliary_device_add(aux_dev); + if (ret < 0) { + drm_err(&xe->drm, "gsc aux add failed %d\n", ret); + heci_gsc->adev = NULL; + + /* adev will be freed with the put_device() and .release sequence */ + auxiliary_device_uninit(aux_dev); + } + return ret; +} + +void xe_heci_gsc_init(struct xe_device *xe) +{ + struct xe_heci_gsc *heci_gsc = &xe->heci_gsc; + const struct heci_gsc_def *def; + int ret; + + if (!HAS_HECI_GSCFI(xe)) + return; + + heci_gsc->irq = -1; + + if (xe->info.platform == XE_PVC) { + def = &heci_gsc_def_pvc; + } else if (xe->info.platform == XE_DG2) { + def = &heci_gsc_def_dg2; + } else if (xe->info.platform == XE_DG1) { + def = &heci_gsc_def_dg1; + } else { + drm_warn_once(&xe->drm, "Unknown platform\n"); + return; + } + + if (!def->name) { + drm_warn_once(&xe->drm, "HECI is not implemented!\n"); + return; + } + + if (!def->use_polling) { + ret = heci_gsc_irq_setup(xe); + if (ret) + goto fail; + } + + ret = heci_gsc_add_device(xe, def); + if (ret) + goto fail; + + return; +fail: + xe_heci_gsc_fini(xe); +} + +void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir) +{ + int ret; + + if ((iir & GSC_IRQ_INTF(1)) == 0) + return; + + if (!HAS_HECI_GSCFI(xe)) { + drm_warn_once(&xe->drm, "GSC irq: not supported"); + return; + } + + if (xe->heci_gsc.irq < 0) + return; + + ret = generic_handle_irq(xe->heci_gsc.irq); + if (ret) + drm_err_ratelimited(&xe->drm, "error handling GSC irq: %d\n", ret); +} diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.h b/drivers/gpu/drm/xe/xe_heci_gsc.h new file mode 100644 index 000000000000..9db454478fae --- /dev/null +++ b/drivers/gpu/drm/xe/xe_heci_gsc.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2023, Intel Corporation. All rights reserved. + */ +#ifndef __XE_HECI_GSC_DEV_H__ +#define __XE_HECI_GSC_DEV_H__ + +#include <linux/types.h> + +struct xe_device; +struct mei_aux_device; + +/* + * The HECI1 bit corresponds to bit15 and HECI2 to bit14. + * The reason for this is to allow growth for more interfaces in the future. + */ +#define GSC_IRQ_INTF(_x) BIT(15 - (_x)) + +/** + * struct xe_heci_gsc - graphics security controller for xe, HECI interface + * + * @adev : pointer to mei auxiliary device structure + * @irq : irq number + * + */ +struct xe_heci_gsc { + struct mei_aux_device *adev; + int irq; +}; + +void xe_heci_gsc_init(struct xe_device *xe); +void xe_heci_gsc_fini(struct xe_device *xe); +void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir); + +#endif /* __XE_HECI_GSC_DEV_H__ */ diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c new file mode 100644 index 000000000000..eca109791c6a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_huc.h" + +#include <drm/drm_managed.h> + +#include "abi/gsc_pxp_commands_abi.h" +#include "regs/xe_gsc_regs.h" +#include "regs/xe_guc_regs.h" +#include "xe_assert.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_force_wake.h" +#include "xe_gsc_submit.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_map.h" +#include "xe_mmio.h" +#include "xe_uc_fw.h" + +static struct xe_gt * +huc_to_gt(struct xe_huc *huc) +{ + return container_of(huc, struct xe_gt, uc.huc); +} + +static struct xe_device * +huc_to_xe(struct xe_huc *huc) +{ + return gt_to_xe(huc_to_gt(huc)); +} + +static struct xe_guc * +huc_to_guc(struct xe_huc *huc) +{ + return &container_of(huc, struct xe_uc, huc)->guc; +} + +static void free_gsc_pkt(struct drm_device *drm, void *arg) +{ + struct xe_huc *huc = arg; + + xe_bo_unpin_map_no_vm(huc->gsc_pkt); + huc->gsc_pkt = NULL; +} + +#define PXP43_HUC_AUTH_INOUT_SIZE SZ_4K +static int huc_alloc_gsc_pkt(struct xe_huc *huc) +{ + struct xe_gt *gt = huc_to_gt(huc); + struct xe_device *xe = gt_to_xe(gt); + struct xe_bo *bo; + int err; + + /* we use a single object for both input and output */ + bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL, + PXP43_HUC_AUTH_INOUT_SIZE * 2, + ttm_bo_type_kernel, + XE_BO_CREATE_SYSTEM_BIT | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + huc->gsc_pkt = bo; + + err = drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc); + if (err) { + free_gsc_pkt(&xe->drm, huc); + return err; + } + + return 0; +} + +int xe_huc_init(struct xe_huc *huc) +{ + struct xe_gt *gt = huc_to_gt(huc); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = gt_to_xe(gt); + int ret; + + huc->fw.type = XE_UC_FW_TYPE_HUC; + + /* On platforms with a media GT the HuC is only available there */ + if (tile->media_gt && (gt != tile->media_gt)) { + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED); + return 0; + } + + ret = xe_uc_fw_init(&huc->fw); + if (ret) + goto out; + + if (!xe_uc_fw_is_enabled(&huc->fw)) + return 0; + + if (huc->fw.has_gsc_headers) { + ret = huc_alloc_gsc_pkt(huc); + if (ret) + goto out; + } + + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE); + + return 0; + +out: + drm_err(&xe->drm, "HuC init failed with %d", ret); + return ret; +} + +int xe_huc_upload(struct xe_huc *huc) +{ + if (!xe_uc_fw_is_loadable(&huc->fw)) + return 0; + return xe_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL); +} + +#define huc_auth_msg_wr(xe_, map_, offset_, field_, val_) \ + xe_map_wr_field(xe_, map_, offset_, struct pxp43_new_huc_auth_in, field_, val_) +#define huc_auth_msg_rd(xe_, map_, offset_, field_) \ + xe_map_rd_field(xe_, map_, offset_, struct pxp43_huc_auth_out, field_) + +static u32 huc_emit_pxp_auth_msg(struct xe_device *xe, struct iosys_map *map, + u32 wr_offset, u32 huc_offset, u32 huc_size) +{ + xe_map_memset(xe, map, wr_offset, 0, sizeof(struct pxp43_new_huc_auth_in)); + + huc_auth_msg_wr(xe, map, wr_offset, header.api_version, PXP_APIVER(4, 3)); + huc_auth_msg_wr(xe, map, wr_offset, header.command_id, PXP43_CMDID_NEW_HUC_AUTH); + huc_auth_msg_wr(xe, map, wr_offset, header.status, 0); + huc_auth_msg_wr(xe, map, wr_offset, header.buffer_len, + sizeof(struct pxp43_new_huc_auth_in) - sizeof(struct pxp_cmd_header)); + huc_auth_msg_wr(xe, map, wr_offset, huc_base_address, huc_offset); + huc_auth_msg_wr(xe, map, wr_offset, huc_size, huc_size); + + return wr_offset + sizeof(struct pxp43_new_huc_auth_in); +} + +static int huc_auth_via_gsccs(struct xe_huc *huc) +{ + struct xe_gt *gt = huc_to_gt(huc); + struct xe_device *xe = gt_to_xe(gt); + struct xe_bo *pkt = huc->gsc_pkt; + u32 wr_offset; + u32 rd_offset; + u64 ggtt_offset; + u32 out_status; + int retry = 5; + int err = 0; + + if (!pkt) + return -ENODEV; + + ggtt_offset = xe_bo_ggtt_addr(pkt); + + wr_offset = xe_gsc_emit_header(xe, &pkt->vmap, 0, HECI_MEADDRESS_PXP, 0, + sizeof(struct pxp43_new_huc_auth_in)); + wr_offset = huc_emit_pxp_auth_msg(xe, &pkt->vmap, wr_offset, + xe_bo_ggtt_addr(huc->fw.bo), + huc->fw.bo->size); + do { + err = xe_gsc_pkt_submit_kernel(>->uc.gsc, ggtt_offset, wr_offset, + ggtt_offset + PXP43_HUC_AUTH_INOUT_SIZE, + PXP43_HUC_AUTH_INOUT_SIZE); + if (err) + break; + + if (xe_gsc_check_and_update_pending(xe, &pkt->vmap, 0, &pkt->vmap, + PXP43_HUC_AUTH_INOUT_SIZE)) { + err = -EBUSY; + msleep(50); + } + } while (--retry && err == -EBUSY); + + if (err) { + drm_err(&xe->drm, "failed to submit GSC request to auth: %d\n", err); + return err; + } + + err = xe_gsc_read_out_header(xe, &pkt->vmap, PXP43_HUC_AUTH_INOUT_SIZE, + sizeof(struct pxp43_huc_auth_out), &rd_offset); + if (err) { + drm_err(&xe->drm, "HuC: invalid GSC reply for auth (err=%d)\n", err); + return err; + } + + /* + * The GSC will return PXP_STATUS_OP_NOT_PERMITTED if the HuC is already + * authenticated. If the same error is ever returned with HuC not loaded + * we'll still catch it when we check the authentication bit later. + */ + out_status = huc_auth_msg_rd(xe, &pkt->vmap, rd_offset, header.status); + if (out_status != PXP_STATUS_SUCCESS && out_status != PXP_STATUS_OP_NOT_PERMITTED) { + drm_err(&xe->drm, "auth failed with GSC error = 0x%x\n", out_status); + return -EIO; + } + + return 0; +} + +static const struct { + const char *name; + struct xe_reg reg; + u32 val; +} huc_auth_modes[XE_HUC_AUTH_TYPES_COUNT] = { + [XE_HUC_AUTH_VIA_GUC] = { "GuC", + HUC_KERNEL_LOAD_INFO, + HUC_LOAD_SUCCESSFUL }, + [XE_HUC_AUTH_VIA_GSC] = { "GSC", + HECI_FWSTS5(MTL_GSC_HECI1_BASE), + HECI1_FWSTS5_HUC_AUTH_DONE }, +}; + +bool xe_huc_is_authenticated(struct xe_huc *huc, enum xe_huc_auth_types type) +{ + struct xe_gt *gt = huc_to_gt(huc); + + return xe_mmio_read32(gt, huc_auth_modes[type].reg) & huc_auth_modes[type].val; +} + +int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type) +{ + struct xe_device *xe = huc_to_xe(huc); + struct xe_gt *gt = huc_to_gt(huc); + struct xe_guc *guc = huc_to_guc(huc); + int ret; + + if (!xe_uc_fw_is_loadable(&huc->fw)) + return 0; + + /* On newer platforms the HuC survives reset, so no need to re-auth */ + if (xe_huc_is_authenticated(huc, type)) { + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING); + return 0; + } + + if (!xe_uc_fw_is_loaded(&huc->fw)) + return -ENOEXEC; + + switch (type) { + case XE_HUC_AUTH_VIA_GUC: + ret = xe_guc_auth_huc(guc, xe_bo_ggtt_addr(huc->fw.bo) + + xe_uc_fw_rsa_offset(&huc->fw)); + break; + case XE_HUC_AUTH_VIA_GSC: + ret = huc_auth_via_gsccs(huc); + break; + default: + XE_WARN_ON(type); + return -EINVAL; + } + if (ret) { + drm_err(&xe->drm, "Failed to trigger HuC auth via %s: %d\n", + huc_auth_modes[type].name, ret); + goto fail; + } + + ret = xe_mmio_wait32(gt, huc_auth_modes[type].reg, huc_auth_modes[type].val, + huc_auth_modes[type].val, 100000, NULL, false); + if (ret) { + drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret); + goto fail; + } + + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING); + drm_dbg(&xe->drm, "HuC authenticated via %s\n", huc_auth_modes[type].name); + + return 0; + +fail: + drm_err(&xe->drm, "HuC: Auth via %s failed: %d\n", + huc_auth_modes[type].name, ret); + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOAD_FAIL); + + return ret; +} + +void xe_huc_sanitize(struct xe_huc *huc) +{ + if (!xe_uc_fw_is_loadable(&huc->fw)) + return; + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE); +} + +void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p) +{ + struct xe_gt *gt = huc_to_gt(huc); + int err; + + xe_uc_fw_print(&huc->fw, p); + + if (!xe_uc_fw_is_enabled(&huc->fw)) + return; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return; + + drm_printf(p, "\nHuC status: 0x%08x\n", + xe_mmio_read32(gt, HUC_KERNEL_LOAD_INFO)); + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} diff --git a/drivers/gpu/drm/xe/xe_huc.h b/drivers/gpu/drm/xe/xe_huc.h new file mode 100644 index 000000000000..532017230287 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_huc.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_HUC_H_ +#define _XE_HUC_H_ + +#include "xe_huc_types.h" + +struct drm_printer; + +enum xe_huc_auth_types { + XE_HUC_AUTH_VIA_GUC = 0, + XE_HUC_AUTH_VIA_GSC, + XE_HUC_AUTH_TYPES_COUNT +}; + +int xe_huc_init(struct xe_huc *huc); +int xe_huc_upload(struct xe_huc *huc); +int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type); +bool xe_huc_is_authenticated(struct xe_huc *huc, enum xe_huc_auth_types type); +void xe_huc_sanitize(struct xe_huc *huc); +void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c new file mode 100644 index 000000000000..18585a7eeb9d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_huc_debugfs.h" + +#include <drm/drm_debugfs.h> +#include <drm/drm_managed.h> + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_huc.h" +#include "xe_macros.h" + +static struct xe_gt * +huc_to_gt(struct xe_huc *huc) +{ + return container_of(huc, struct xe_gt, uc.huc); +} + +static struct xe_device * +huc_to_xe(struct xe_huc *huc) +{ + return gt_to_xe(huc_to_gt(huc)); +} + +static struct xe_huc *node_to_huc(struct drm_info_node *node) +{ + return node->info_ent->data; +} + +static int huc_info(struct seq_file *m, void *data) +{ + struct xe_huc *huc = node_to_huc(m->private); + struct xe_device *xe = huc_to_xe(huc); + struct drm_printer p = drm_seq_file_printer(m); + + xe_device_mem_access_get(xe); + xe_huc_print_info(huc, &p); + xe_device_mem_access_put(xe); + + return 0; +} + +static const struct drm_info_list debugfs_list[] = { + {"huc_info", huc_info, 0}, +}; + +void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent) +{ + struct drm_minor *minor = huc_to_xe(huc)->drm.primary; + struct drm_info_list *local; + int i; + +#define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)) + local = drmm_kmalloc(&huc_to_xe(huc)->drm, DEBUGFS_SIZE, GFP_KERNEL); + if (!local) + return; + + memcpy(local, debugfs_list, DEBUGFS_SIZE); +#undef DEBUGFS_SIZE + + for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i) + local[i].data = huc; + + drm_debugfs_create_files(local, + ARRAY_SIZE(debugfs_list), + parent, minor); +} diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.h b/drivers/gpu/drm/xe/xe_huc_debugfs.h new file mode 100644 index 000000000000..ec58f1818804 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_huc_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_HUC_DEBUGFS_H_ +#define _XE_HUC_DEBUGFS_H_ + +struct dentry; +struct xe_huc; + +void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent); + +#endif diff --git a/drivers/gpu/drm/xe/xe_huc_types.h b/drivers/gpu/drm/xe/xe_huc_types.h new file mode 100644 index 000000000000..cfbaa5e0dfca --- /dev/null +++ b/drivers/gpu/drm/xe/xe_huc_types.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_HUC_TYPES_H_ +#define _XE_HUC_TYPES_H_ + +#include "xe_uc_fw_types.h" + +struct xe_bo; + +/** + * struct xe_huc - HuC + */ +struct xe_huc { + /** @fw: Generic uC firmware management */ + struct xe_uc_fw fw; + + /** @gsc_pkt: bo to store the packet for auth via GSC */ + struct xe_bo *gsc_pkt; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c new file mode 100644 index 000000000000..1fa5cf5eea97 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -0,0 +1,883 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_hw_engine.h" + +#include <drm/drm_managed.h> + +#include "regs/xe_engine_regs.h" +#include "regs/xe_gt_regs.h" +#include "xe_assert.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_execlist.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_gt_ccs_mode.h" +#include "xe_gt_topology.h" +#include "xe_hw_fence.h" +#include "xe_irq.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_mmio.h" +#include "xe_reg_sr.h" +#include "xe_rtp.h" +#include "xe_sched_job.h" +#include "xe_tuning.h" +#include "xe_uc_fw.h" +#include "xe_wa.h" + +#define MAX_MMIO_BASES 3 +struct engine_info { + const char *name; + unsigned int class : 8; + unsigned int instance : 8; + enum xe_force_wake_domains domain; + u32 mmio_base; +}; + +static const struct engine_info engine_infos[] = { + [XE_HW_ENGINE_RCS0] = { + .name = "rcs0", + .class = XE_ENGINE_CLASS_RENDER, + .instance = 0, + .domain = XE_FW_RENDER, + .mmio_base = RENDER_RING_BASE, + }, + [XE_HW_ENGINE_BCS0] = { + .name = "bcs0", + .class = XE_ENGINE_CLASS_COPY, + .instance = 0, + .domain = XE_FW_RENDER, + .mmio_base = BLT_RING_BASE, + }, + [XE_HW_ENGINE_BCS1] = { + .name = "bcs1", + .class = XE_ENGINE_CLASS_COPY, + .instance = 1, + .domain = XE_FW_RENDER, + .mmio_base = XEHPC_BCS1_RING_BASE, + }, + [XE_HW_ENGINE_BCS2] = { + .name = "bcs2", + .class = XE_ENGINE_CLASS_COPY, + .instance = 2, + .domain = XE_FW_RENDER, + .mmio_base = XEHPC_BCS2_RING_BASE, + }, + [XE_HW_ENGINE_BCS3] = { + .name = "bcs3", + .class = XE_ENGINE_CLASS_COPY, + .instance = 3, + .domain = XE_FW_RENDER, + .mmio_base = XEHPC_BCS3_RING_BASE, + }, + [XE_HW_ENGINE_BCS4] = { + .name = "bcs4", + .class = XE_ENGINE_CLASS_COPY, + .instance = 4, + .domain = XE_FW_RENDER, + .mmio_base = XEHPC_BCS4_RING_BASE, + }, + [XE_HW_ENGINE_BCS5] = { + .name = "bcs5", + .class = XE_ENGINE_CLASS_COPY, + .instance = 5, + .domain = XE_FW_RENDER, + .mmio_base = XEHPC_BCS5_RING_BASE, + }, + [XE_HW_ENGINE_BCS6] = { + .name = "bcs6", + .class = XE_ENGINE_CLASS_COPY, + .instance = 6, + .domain = XE_FW_RENDER, + .mmio_base = XEHPC_BCS6_RING_BASE, + }, + [XE_HW_ENGINE_BCS7] = { + .name = "bcs7", + .class = XE_ENGINE_CLASS_COPY, + .instance = 7, + .domain = XE_FW_RENDER, + .mmio_base = XEHPC_BCS7_RING_BASE, + }, + [XE_HW_ENGINE_BCS8] = { + .name = "bcs8", + .class = XE_ENGINE_CLASS_COPY, + .instance = 8, + .domain = XE_FW_RENDER, + .mmio_base = XEHPC_BCS8_RING_BASE, + }, + + [XE_HW_ENGINE_VCS0] = { + .name = "vcs0", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 0, + .domain = XE_FW_MEDIA_VDBOX0, + .mmio_base = BSD_RING_BASE, + }, + [XE_HW_ENGINE_VCS1] = { + .name = "vcs1", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 1, + .domain = XE_FW_MEDIA_VDBOX1, + .mmio_base = BSD2_RING_BASE, + }, + [XE_HW_ENGINE_VCS2] = { + .name = "vcs2", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 2, + .domain = XE_FW_MEDIA_VDBOX2, + .mmio_base = BSD3_RING_BASE, + }, + [XE_HW_ENGINE_VCS3] = { + .name = "vcs3", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 3, + .domain = XE_FW_MEDIA_VDBOX3, + .mmio_base = BSD4_RING_BASE, + }, + [XE_HW_ENGINE_VCS4] = { + .name = "vcs4", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 4, + .domain = XE_FW_MEDIA_VDBOX4, + .mmio_base = XEHP_BSD5_RING_BASE, + }, + [XE_HW_ENGINE_VCS5] = { + .name = "vcs5", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 5, + .domain = XE_FW_MEDIA_VDBOX5, + .mmio_base = XEHP_BSD6_RING_BASE, + }, + [XE_HW_ENGINE_VCS6] = { + .name = "vcs6", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 6, + .domain = XE_FW_MEDIA_VDBOX6, + .mmio_base = XEHP_BSD7_RING_BASE, + }, + [XE_HW_ENGINE_VCS7] = { + .name = "vcs7", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 7, + .domain = XE_FW_MEDIA_VDBOX7, + .mmio_base = XEHP_BSD8_RING_BASE, + }, + [XE_HW_ENGINE_VECS0] = { + .name = "vecs0", + .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, + .instance = 0, + .domain = XE_FW_MEDIA_VEBOX0, + .mmio_base = VEBOX_RING_BASE, + }, + [XE_HW_ENGINE_VECS1] = { + .name = "vecs1", + .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, + .instance = 1, + .domain = XE_FW_MEDIA_VEBOX1, + .mmio_base = VEBOX2_RING_BASE, + }, + [XE_HW_ENGINE_VECS2] = { + .name = "vecs2", + .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, + .instance = 2, + .domain = XE_FW_MEDIA_VEBOX2, + .mmio_base = XEHP_VEBOX3_RING_BASE, + }, + [XE_HW_ENGINE_VECS3] = { + .name = "vecs3", + .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, + .instance = 3, + .domain = XE_FW_MEDIA_VEBOX3, + .mmio_base = XEHP_VEBOX4_RING_BASE, + }, + [XE_HW_ENGINE_CCS0] = { + .name = "ccs0", + .class = XE_ENGINE_CLASS_COMPUTE, + .instance = 0, + .domain = XE_FW_RENDER, + .mmio_base = COMPUTE0_RING_BASE, + }, + [XE_HW_ENGINE_CCS1] = { + .name = "ccs1", + .class = XE_ENGINE_CLASS_COMPUTE, + .instance = 1, + .domain = XE_FW_RENDER, + .mmio_base = COMPUTE1_RING_BASE, + }, + [XE_HW_ENGINE_CCS2] = { + .name = "ccs2", + .class = XE_ENGINE_CLASS_COMPUTE, + .instance = 2, + .domain = XE_FW_RENDER, + .mmio_base = COMPUTE2_RING_BASE, + }, + [XE_HW_ENGINE_CCS3] = { + .name = "ccs3", + .class = XE_ENGINE_CLASS_COMPUTE, + .instance = 3, + .domain = XE_FW_RENDER, + .mmio_base = COMPUTE3_RING_BASE, + }, + [XE_HW_ENGINE_GSCCS0] = { + .name = "gsccs0", + .class = XE_ENGINE_CLASS_OTHER, + .instance = OTHER_GSC_INSTANCE, + .domain = XE_FW_GSC, + .mmio_base = GSCCS_RING_BASE, + }, +}; + +static void hw_engine_fini(struct drm_device *drm, void *arg) +{ + struct xe_hw_engine *hwe = arg; + + if (hwe->exl_port) + xe_execlist_port_destroy(hwe->exl_port); + xe_lrc_finish(&hwe->kernel_lrc); + + hwe->gt = NULL; +} + +static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, + u32 val) +{ + xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); + xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); + + reg.addr += hwe->mmio_base; + + xe_mmio_write32(hwe->gt, reg, val); +} + +static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) +{ + xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); + xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); + + reg.addr += hwe->mmio_base; + + return xe_mmio_read32(hwe->gt, reg); +} + +void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) +{ + u32 ccs_mask = + xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE); + + if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask) + xe_mmio_write32(hwe->gt, RCU_MODE, + _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); + + hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); + hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), + xe_bo_ggtt_addr(hwe->hwsp)); + hw_engine_mmio_write32(hwe, RING_MODE(0), + _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); + hw_engine_mmio_write32(hwe, RING_MI_MODE(0), + _MASKED_BIT_DISABLE(STOP_RING)); + hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); +} + +static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + return xe_gt_ccs_mode_enabled(gt) && + xe_rtp_match_first_render_or_compute(gt, hwe); +} + +void +xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) +{ + struct xe_gt *gt = hwe->gt; + const u8 mocs_write_idx = gt->mocs.uc_index; + const u8 mocs_read_idx = gt->mocs.uc_index; + u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) | + REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx); + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); + const struct xe_rtp_entry_sr lrc_was[] = { + /* + * Some blitter commands do not have a field for MOCS, those + * commands will use MOCS index pointed by BLIT_CCTL. + * BLIT_CCTL registers are needed to be programmed to un-cached. + */ + { XE_RTP_NAME("BLIT_CCTL_default_MOCS"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED), + ENGINE_CLASS(COPY)), + XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0), + BLIT_CCTL_DST_MOCS_MASK | + BLIT_CCTL_SRC_MOCS_MASK, + blit_cctl_val, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, + /* Use Fixed slice CCS mode */ + { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"), + XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)), + XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, + RCU_MODE_FIXED_SLICE_CCS_MODE)) + }, + {} + }; + + xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc); +} + +static void +hw_engine_setup_default_state(struct xe_hw_engine *hwe) +{ + struct xe_gt *gt = hwe->gt; + struct xe_device *xe = gt_to_xe(gt); + /* + * RING_CMD_CCTL specifies the default MOCS entry that will be + * used by the command streamer when executing commands that + * don't have a way to explicitly specify a MOCS setting. + * The default should usually reference whichever MOCS entry + * corresponds to uncached behavior, although use of a WB cached + * entry is recommended by the spec in certain circumstances on + * specific platforms. + * Bspec: 72161 + */ + const u8 mocs_write_idx = gt->mocs.uc_index; + const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && + (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ? + gt->mocs.wb_index : gt->mocs.uc_index; + u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) | + REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx); + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); + const struct xe_rtp_entry_sr engine_entries[] = { + { XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0), + CMD_CCTL_WRITE_OVERRIDE_MASK | + CMD_CCTL_READ_OVERRIDE_MASK, + ring_cmd_cctl_val, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, + /* + * To allow the GSC engine to go idle on MTL we need to enable + * idle messaging and set the hysteresis value (we use 0xA=5us + * as recommended in spec). On platforms after MTL this is + * enabled by default. + */ + { XE_RTP_NAME("MTL GSCCS IDLE MSG enable"), + XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)), + XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0), + IDLE_MSG_DISABLE, + XE_RTP_ACTION_FLAG(ENGINE_BASE)), + FIELD_SET(RING_PWRCTX_MAXCNT(0), + IDLE_WAIT_TIME, + 0xA, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, + {} + }; + + xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr); +} + +static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, + enum xe_hw_engine_id id) +{ + const struct engine_info *info; + + if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name)) + return; + + if (!(gt->info.engine_mask & BIT(id))) + return; + + info = &engine_infos[id]; + + xe_gt_assert(gt, !hwe->gt); + + hwe->gt = gt; + hwe->class = info->class; + hwe->instance = info->instance; + hwe->mmio_base = info->mmio_base; + hwe->domain = info->domain; + hwe->name = info->name; + hwe->fence_irq = >->fence_irq[info->class]; + hwe->engine_id = id; + + hwe->eclass = >->eclass[hwe->class]; + if (!hwe->eclass->sched_props.job_timeout_ms) { + hwe->eclass->sched_props.job_timeout_ms = 5 * 1000; + hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; + hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; + hwe->eclass->sched_props.timeslice_us = 1 * 1000; + hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN; + hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX; + hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT; + hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; + hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; + /* Record default props */ + hwe->eclass->defaults = hwe->eclass->sched_props; + } + + xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt)); + xe_tuning_process_engine(hwe); + xe_wa_process_engine(hwe); + hw_engine_setup_default_state(hwe); + + xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt)); + xe_reg_whitelist_process_engine(hwe); +} + +static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, + enum xe_hw_engine_id id) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_tile *tile = gt_to_tile(gt); + int err; + + xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name); + xe_gt_assert(gt, gt->info.engine_mask & BIT(id)); + + xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); + xe_reg_sr_apply_whitelist(hwe); + + hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K, + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(hwe->hwsp)) { + err = PTR_ERR(hwe->hwsp); + goto err_name; + } + + err = xe_lrc_init(&hwe->kernel_lrc, hwe, NULL, NULL, SZ_16K); + if (err) + goto err_hwsp; + + if (!xe_device_uc_enabled(xe)) { + hwe->exl_port = xe_execlist_port_create(xe, hwe); + if (IS_ERR(hwe->exl_port)) { + err = PTR_ERR(hwe->exl_port); + goto err_kernel_lrc; + } + } + + if (xe_device_uc_enabled(xe)) + xe_hw_engine_enable_ring(hwe); + + /* We reserve the highest BCS instance for USM */ + if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY) + gt->usm.reserved_bcs_instance = hwe->instance; + + err = drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe); + if (err) + return err; + + return 0; + +err_kernel_lrc: + xe_lrc_finish(&hwe->kernel_lrc); +err_hwsp: + xe_bo_unpin_map_no_vm(hwe->hwsp); +err_name: + hwe->name = NULL; + + return err; +} + +static void hw_engine_setup_logical_mapping(struct xe_gt *gt) +{ + int class; + + /* FIXME: Doing a simple logical mapping that works for most hardware */ + for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + int logical_instance = 0; + + for_each_hw_engine(hwe, gt, id) + if (hwe->class == class) + hwe->logical_instance = logical_instance++; + } +} + +static void read_media_fuses(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 media_fuse; + u16 vdbox_mask; + u16 vebox_mask; + int i, j; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE); + + /* + * Pre-Xe_HP platforms had register bits representing absent engines, + * whereas Xe_HP and beyond have bits representing present engines. + * Invert the polarity on old platforms so that we can use common + * handling below. + */ + if (GRAPHICS_VERx100(xe) < 1250) + media_fuse = ~media_fuse; + + vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse); + vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse); + + for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { + if (!(gt->info.engine_mask & BIT(i))) + continue; + + if (!(BIT(j) & vdbox_mask)) { + gt->info.engine_mask &= ~BIT(i); + drm_info(&xe->drm, "vcs%u fused off\n", j); + } + } + + for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) { + if (!(gt->info.engine_mask & BIT(i))) + continue; + + if (!(BIT(j) & vebox_mask)) { + gt->info.engine_mask &= ~BIT(i); + drm_info(&xe->drm, "vecs%u fused off\n", j); + } + } +} + +static void read_copy_fuses(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 bcs_mask; + + if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270) + return; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3); + bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask); + + /* BCS0 is always present; only BCS1-BCS8 may be fused off */ + for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) { + if (!(gt->info.engine_mask & BIT(i))) + continue; + + if (!(BIT(j / 2) & bcs_mask)) { + gt->info.engine_mask &= ~BIT(i); + drm_info(&xe->drm, "bcs%u fused off\n", j); + } + } +} + +static void read_compute_fuses_from_dss(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + /* + * CCS fusing based on DSS masks only applies to platforms that can + * have more than one CCS. + */ + if (hweight64(gt->info.engine_mask & + GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1) + return; + + /* + * CCS availability on Xe_HP is inferred from the presence of DSS in + * each quadrant. + */ + for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) { + if (!(gt->info.engine_mask & BIT(i))) + continue; + + if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) { + gt->info.engine_mask &= ~BIT(i); + drm_info(&xe->drm, "ccs%u fused off\n", j); + } + } +} + +static void read_compute_fuses_from_reg(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 ccs_mask; + + ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4); + ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask); + + for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) { + if (!(gt->info.engine_mask & BIT(i))) + continue; + + if ((ccs_mask & BIT(j)) == 0) { + gt->info.engine_mask &= ~BIT(i); + drm_info(&xe->drm, "ccs%u fused off\n", j); + } + } +} + +static void read_compute_fuses(struct xe_gt *gt) +{ + if (GRAPHICS_VER(gt_to_xe(gt)) >= 20) + read_compute_fuses_from_reg(gt); + else + read_compute_fuses_from_dss(gt); +} + +static void check_gsc_availability(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0))) + return; + + /* + * The GSCCS is only used to communicate with the GSC FW, so if we don't + * have the FW there is nothing we need the engine for and can therefore + * skip its initialization. + */ + if (!xe_uc_fw_is_available(>->uc.gsc.fw)) { + gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0); + drm_info(&xe->drm, "gsccs disabled due to lack of FW\n"); + } +} + +int xe_hw_engines_init_early(struct xe_gt *gt) +{ + int i; + + read_media_fuses(gt); + read_copy_fuses(gt); + read_compute_fuses(gt); + check_gsc_availability(gt); + + BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN); + BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX); + + for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++) + hw_engine_init_early(gt, >->hw_engines[i], i); + + return 0; +} + +int xe_hw_engines_init(struct xe_gt *gt) +{ + int err; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(hwe, gt, id) { + err = hw_engine_init(gt, hwe, id); + if (err) + return err; + } + + hw_engine_setup_logical_mapping(gt); + + return 0; +} + +void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) +{ + wake_up_all(>_to_xe(hwe->gt)->ufence_wq); + + if (hwe->irq_handler) + hwe->irq_handler(hwe, intr_vec); + + if (intr_vec & GT_RENDER_USER_INTERRUPT) + xe_hw_fence_irq_run(hwe->fence_irq); +} + +/** + * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine. + * @hwe: Xe HW Engine. + * + * This can be printed out in a later stage like during dev_coredump + * analysis. + * + * Returns: a Xe HW Engine snapshot object that must be freed by the + * caller, using `xe_hw_engine_snapshot_free`. + */ +struct xe_hw_engine_snapshot * +xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) +{ + struct xe_hw_engine_snapshot *snapshot; + int len; + + if (!xe_hw_engine_is_valid(hwe)) + return NULL; + + snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); + + if (!snapshot) + return NULL; + + len = strlen(hwe->name) + 1; + snapshot->name = kzalloc(len, GFP_ATOMIC); + if (snapshot->name) + strscpy(snapshot->name, hwe->name, len); + + snapshot->class = hwe->class; + snapshot->logical_instance = hwe->logical_instance; + snapshot->forcewake.domain = hwe->domain; + snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt), + hwe->domain); + snapshot->mmio_base = hwe->mmio_base; + + snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); + snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, + RING_HWS_PGA(0)); + snapshot->reg.ring_execlist_status_lo = + hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); + snapshot->reg.ring_execlist_status_hi = + hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); + snapshot->reg.ring_execlist_sq_contents_lo = + hw_engine_mmio_read32(hwe, + RING_EXECLIST_SQ_CONTENTS_LO(0)); + snapshot->reg.ring_execlist_sq_contents_hi = + hw_engine_mmio_read32(hwe, + RING_EXECLIST_SQ_CONTENTS_HI(0)); + snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0)); + snapshot->reg.ring_head = + hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; + snapshot->reg.ring_tail = + hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR; + snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0)); + snapshot->reg.ring_mi_mode = + hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); + snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0)); + snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0)); + snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0)); + snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0)); + snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0)); + snapshot->reg.ring_acthd_udw = + hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); + snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0)); + snapshot->reg.ring_bbaddr_udw = + hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); + snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0)); + snapshot->reg.ring_dma_fadd_udw = + hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); + snapshot->reg.ring_dma_fadd = + hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); + snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0)); + + if (snapshot->class == XE_ENGINE_CLASS_COMPUTE) + snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE); + + return snapshot; +} + +/** + * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot. + * @snapshot: Xe HW Engine snapshot object. + * @p: drm_printer where it will be printed out. + * + * This function prints out a given Xe HW Engine snapshot object. + */ +void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, + struct drm_printer *p) +{ + if (!snapshot) + return; + + drm_printf(p, "%s (physical), logical instance=%d\n", + snapshot->name ? snapshot->name : "", + snapshot->logical_instance); + drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n", + snapshot->forcewake.domain, snapshot->forcewake.ref); + drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam); + drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga); + drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n", + snapshot->reg.ring_execlist_status_lo); + drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n", + snapshot->reg.ring_execlist_status_hi); + drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n", + snapshot->reg.ring_execlist_sq_contents_lo); + drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n", + snapshot->reg.ring_execlist_sq_contents_hi); + drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start); + drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head); + drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail); + drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl); + drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode); + drm_printf(p, "\tRING_MODE: 0x%08x\n", + snapshot->reg.ring_mode); + drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr); + drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr); + drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr); + drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir); + drm_printf(p, "\tACTHD: 0x%08x_%08x\n", snapshot->reg.ring_acthd_udw, + snapshot->reg.ring_acthd); + drm_printf(p, "\tBBADDR: 0x%08x_%08x\n", snapshot->reg.ring_bbaddr_udw, + snapshot->reg.ring_bbaddr); + drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n", + snapshot->reg.ring_dma_fadd_udw, + snapshot->reg.ring_dma_fadd); + drm_printf(p, "\tIPEHR: 0x%08x\n\n", snapshot->reg.ipehr); + if (snapshot->class == XE_ENGINE_CLASS_COMPUTE) + drm_printf(p, "\tRCU_MODE: 0x%08x\n", + snapshot->reg.rcu_mode); +} + +/** + * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot. + * @snapshot: Xe HW Engine snapshot object. + * + * This function free all the memory that needed to be allocated at capture + * time. + */ +void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot) +{ + if (!snapshot) + return; + + kfree(snapshot->name); + kfree(snapshot); +} + +/** + * xe_hw_engine_print - Xe HW Engine Print. + * @hwe: Hardware Engine. + * @p: drm_printer. + * + * This function quickly capture a snapshot and immediately print it out. + */ +void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p) +{ + struct xe_hw_engine_snapshot *snapshot; + + snapshot = xe_hw_engine_snapshot_capture(hwe); + xe_hw_engine_snapshot_print(snapshot, p); + xe_hw_engine_snapshot_free(snapshot); +} + +u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, + enum xe_engine_class engine_class) +{ + u32 mask = 0; + enum xe_hw_engine_id id; + + for (id = 0; id < XE_NUM_HW_ENGINES; ++id) { + if (engine_infos[id].class == engine_class && + gt->info.engine_mask & BIT(id)) + mask |= BIT(engine_infos[id].instance); + } + return mask; +} + +bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe) +{ + struct xe_gt *gt = hwe->gt; + struct xe_device *xe = gt_to_xe(gt); + + if (hwe->class == XE_ENGINE_CLASS_OTHER) + return true; + + /* Check for engines disabled by ccs_mode setting */ + if (xe_gt_ccs_mode_enabled(gt) && + hwe->class == XE_ENGINE_CLASS_COMPUTE && + hwe->logical_instance >= gt->ccs_mode) + return true; + + return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY && + hwe->instance == gt->usm.reserved_bcs_instance; +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h new file mode 100644 index 000000000000..71968ee2f600 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_HW_ENGINE_H_ +#define _XE_HW_ENGINE_H_ + +#include "xe_hw_engine_types.h" + +struct drm_printer; + +#ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN +#define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN +#else +#define XE_HW_ENGINE_JOB_TIMEOUT_MIN 1 +#endif +#ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MAX +#define XE_HW_ENGINE_JOB_TIMEOUT_MAX CONFIG_DRM_XE_JOB_TIMEOUT_MAX +#else +#define XE_HW_ENGINE_JOB_TIMEOUT_MAX (10 * 1000) +#endif +#ifdef CONFIG_DRM_XE_TIMESLICE_MIN +#define XE_HW_ENGINE_TIMESLICE_MIN CONFIG_DRM_XE_TIMESLICE_MIN +#else +#define XE_HW_ENGINE_TIMESLICE_MIN 1 +#endif +#ifdef CONFIG_DRM_XE_TIMESLICE_MAX +#define XE_HW_ENGINE_TIMESLICE_MAX CONFIG_DRM_XE_TIMESLICE_MAX +#else +#define XE_HW_ENGINE_TIMESLICE_MAX (10 * 1000 * 1000) +#endif +#ifdef CONFIG_DRM_XE_PREEMPT_TIMEOUT +#define XE_HW_ENGINE_PREEMPT_TIMEOUT CONFIG_DRM_XE_PREEMPT_TIMEOUT +#else +#define XE_HW_ENGINE_PREEMPT_TIMEOUT (640 * 1000) +#endif +#ifdef CONFIG_DRM_XE_PREEMPT_TIMEOUT_MIN +#define XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN CONFIG_DRM_XE_PREEMPT_TIMEOUT_MIN +#else +#define XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN 1 +#endif +#ifdef CONFIG_DRM_XE_PREEMPT_TIMEOUT_MAX +#define XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX CONFIG_DRM_XE_PREEMPT_TIMEOUT_MAX +#else +#define XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX (10 * 1000 * 1000) +#endif + +int xe_hw_engines_init_early(struct xe_gt *gt); +int xe_hw_engines_init(struct xe_gt *gt); +void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec); +void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe); +u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, + enum xe_engine_class engine_class); + +struct xe_hw_engine_snapshot * +xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe); +void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot); +void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, + struct drm_printer *p); +void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p); +void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe); + +bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe); +static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe) +{ + return hwe->name; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c new file mode 100644 index 000000000000..e49bc14f0ecf --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -0,0 +1,675 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <drm/drm_managed.h> +#include <linux/kobject.h> +#include <linux/sysfs.h> + +#include "xe_gt.h" +#include "xe_hw_engine_class_sysfs.h" + +#define MAX_ENGINE_CLASS_NAME_LEN 16 +static int xe_add_hw_engine_class_defaults(struct xe_device *xe, + struct kobject *parent); + +/** + * xe_hw_engine_timeout_in_range - Helper to check if timeout is in range + * @timeout: timeout to validate + * @min: min value of valid range + * @max: max value of valid range + * + * This helper helps to validate if timeout is in min-max range of HW engine + * scheduler. + * + * Returns: Returns false value for failure and true for success. + */ +bool xe_hw_engine_timeout_in_range(u64 timeout, u64 min, u64 max) +{ + return timeout >= min && timeout <= max; +} + +static void kobj_xe_hw_engine_release(struct kobject *kobj) +{ + kfree(kobj); +} + +static const struct kobj_type kobj_xe_hw_engine_type = { + .release = kobj_xe_hw_engine_release, + .sysfs_ops = &kobj_sysfs_ops +}; + +static ssize_t job_timeout_max_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 timeout; + int err; + + err = kstrtou32(buf, 0, &timeout); + if (err) + return err; + + if (timeout < eclass->sched_props.job_timeout_min) + return -EINVAL; + + if (!xe_hw_engine_timeout_in_range(timeout, + XE_HW_ENGINE_JOB_TIMEOUT_MIN, + XE_HW_ENGINE_JOB_TIMEOUT_MAX)) + return -EINVAL; + + WRITE_ONCE(eclass->sched_props.job_timeout_max, timeout); + + return count; +} + +static ssize_t job_timeout_max_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + + return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_max); +} + +static struct kobj_attribute job_timeout_max_attr = +__ATTR(job_timeout_max, 0644, job_timeout_max_show, job_timeout_max_store); + +static ssize_t job_timeout_min_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 timeout; + int err; + + err = kstrtou32(buf, 0, &timeout); + if (err) + return err; + + if (timeout > eclass->sched_props.job_timeout_max) + return -EINVAL; + + if (!xe_hw_engine_timeout_in_range(timeout, + XE_HW_ENGINE_JOB_TIMEOUT_MIN, + XE_HW_ENGINE_JOB_TIMEOUT_MAX)) + return -EINVAL; + + WRITE_ONCE(eclass->sched_props.job_timeout_min, timeout); + + return count; +} + +static ssize_t job_timeout_min_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + + return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_min); +} + +static struct kobj_attribute job_timeout_min_attr = +__ATTR(job_timeout_min, 0644, job_timeout_min_show, job_timeout_min_store); + +static ssize_t job_timeout_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 min = eclass->sched_props.job_timeout_min; + u32 max = eclass->sched_props.job_timeout_max; + u32 timeout; + int err; + + err = kstrtou32(buf, 0, &timeout); + if (err) + return err; + + if (!xe_hw_engine_timeout_in_range(timeout, min, max)) + return -EINVAL; + + WRITE_ONCE(eclass->sched_props.job_timeout_ms, timeout); + + return count; +} + +static ssize_t job_timeout_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + + return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_ms); +} + +static struct kobj_attribute job_timeout_attr = +__ATTR(job_timeout_ms, 0644, job_timeout_show, job_timeout_store); + +static ssize_t job_timeout_default(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); + + return sprintf(buf, "%u\n", eclass->defaults.job_timeout_ms); +} + +static struct kobj_attribute job_timeout_def = +__ATTR(job_timeout_ms, 0444, job_timeout_default, NULL); + +static ssize_t job_timeout_min_default(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); + + return sprintf(buf, "%u\n", eclass->defaults.job_timeout_min); +} + +static struct kobj_attribute job_timeout_min_def = +__ATTR(job_timeout_min, 0444, job_timeout_min_default, NULL); + +static ssize_t job_timeout_max_default(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); + + return sprintf(buf, "%u\n", eclass->defaults.job_timeout_max); +} + +static struct kobj_attribute job_timeout_max_def = +__ATTR(job_timeout_max, 0444, job_timeout_max_default, NULL); + +static ssize_t timeslice_duration_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 min = eclass->sched_props.timeslice_min; + u32 max = eclass->sched_props.timeslice_max; + u32 duration; + int err; + + err = kstrtou32(buf, 0, &duration); + if (err) + return err; + + if (!xe_hw_engine_timeout_in_range(duration, min, max)) + return -EINVAL; + + WRITE_ONCE(eclass->sched_props.timeslice_us, duration); + + return count; +} + +static ssize_t timeslice_duration_max_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 duration; + int err; + + err = kstrtou32(buf, 0, &duration); + if (err) + return err; + + if (duration < eclass->sched_props.timeslice_min) + return -EINVAL; + + if (!xe_hw_engine_timeout_in_range(duration, + XE_HW_ENGINE_TIMESLICE_MIN, + XE_HW_ENGINE_TIMESLICE_MAX)) + return -EINVAL; + + WRITE_ONCE(eclass->sched_props.timeslice_max, duration); + + return count; +} + +static ssize_t timeslice_duration_max_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + + return sprintf(buf, "%u\n", eclass->sched_props.timeslice_max); +} + +static struct kobj_attribute timeslice_duration_max_attr = + __ATTR(timeslice_duration_max, 0644, timeslice_duration_max_show, + timeslice_duration_max_store); + +static ssize_t timeslice_duration_min_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 duration; + int err; + + err = kstrtou32(buf, 0, &duration); + if (err) + return err; + + if (duration > eclass->sched_props.timeslice_max) + return -EINVAL; + + if (!xe_hw_engine_timeout_in_range(duration, + XE_HW_ENGINE_TIMESLICE_MIN, + XE_HW_ENGINE_TIMESLICE_MAX)) + return -EINVAL; + + WRITE_ONCE(eclass->sched_props.timeslice_min, duration); + + return count; +} + +static ssize_t timeslice_duration_min_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + + return sprintf(buf, "%u\n", eclass->sched_props.timeslice_min); +} + +static struct kobj_attribute timeslice_duration_min_attr = + __ATTR(timeslice_duration_min, 0644, timeslice_duration_min_show, + timeslice_duration_min_store); + +static ssize_t timeslice_duration_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + + return sprintf(buf, "%u\n", eclass->sched_props.timeslice_us); +} + +static struct kobj_attribute timeslice_duration_attr = + __ATTR(timeslice_duration_us, 0644, timeslice_duration_show, + timeslice_duration_store); + +static ssize_t timeslice_default(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); + + return sprintf(buf, "%u\n", eclass->defaults.timeslice_us); +} + +static struct kobj_attribute timeslice_duration_def = +__ATTR(timeslice_duration_us, 0444, timeslice_default, NULL); + +static ssize_t timeslice_min_default(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); + + return sprintf(buf, "%u\n", eclass->defaults.timeslice_min); +} + +static struct kobj_attribute timeslice_duration_min_def = +__ATTR(timeslice_duration_min, 0444, timeslice_min_default, NULL); + +static ssize_t timeslice_max_default(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); + + return sprintf(buf, "%u\n", eclass->defaults.timeslice_max); +} + +static struct kobj_attribute timeslice_duration_max_def = +__ATTR(timeslice_duration_max, 0444, timeslice_max_default, NULL); + +static ssize_t preempt_timeout_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 min = eclass->sched_props.preempt_timeout_min; + u32 max = eclass->sched_props.preempt_timeout_max; + u32 timeout; + int err; + + err = kstrtou32(buf, 0, &timeout); + if (err) + return err; + + if (!xe_hw_engine_timeout_in_range(timeout, min, max)) + return -EINVAL; + + WRITE_ONCE(eclass->sched_props.preempt_timeout_us, timeout); + + return count; +} + +static ssize_t preempt_timeout_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + + return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_us); +} + +static struct kobj_attribute preempt_timeout_attr = +__ATTR(preempt_timeout_us, 0644, preempt_timeout_show, preempt_timeout_store); + +static ssize_t preempt_timeout_default(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); + + return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_us); +} + +static struct kobj_attribute preempt_timeout_def = +__ATTR(preempt_timeout_us, 0444, preempt_timeout_default, NULL); + +static ssize_t preempt_timeout_min_default(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); + + return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_min); +} + +static struct kobj_attribute preempt_timeout_min_def = +__ATTR(preempt_timeout_min, 0444, preempt_timeout_min_default, NULL); + +static ssize_t preempt_timeout_max_default(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); + + return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_max); +} + +static struct kobj_attribute preempt_timeout_max_def = +__ATTR(preempt_timeout_max, 0444, preempt_timeout_max_default, NULL); + +static ssize_t preempt_timeout_max_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 timeout; + int err; + + err = kstrtou32(buf, 0, &timeout); + if (err) + return err; + + if (timeout < eclass->sched_props.preempt_timeout_min) + return -EINVAL; + + if (!xe_hw_engine_timeout_in_range(timeout, + XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN, + XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX)) + return -EINVAL; + + WRITE_ONCE(eclass->sched_props.preempt_timeout_max, timeout); + + return count; +} + +static ssize_t preempt_timeout_max_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + + return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_max); +} + +static struct kobj_attribute preempt_timeout_max_attr = + __ATTR(preempt_timeout_max, 0644, preempt_timeout_max_show, + preempt_timeout_max_store); + +static ssize_t preempt_timeout_min_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 timeout; + int err; + + err = kstrtou32(buf, 0, &timeout); + if (err) + return err; + + if (timeout > eclass->sched_props.preempt_timeout_max) + return -EINVAL; + + if (!xe_hw_engine_timeout_in_range(timeout, + XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN, + XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX)) + return -EINVAL; + + WRITE_ONCE(eclass->sched_props.preempt_timeout_min, timeout); + + return count; +} + +static ssize_t preempt_timeout_min_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + + return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_min); +} + +static struct kobj_attribute preempt_timeout_min_attr = + __ATTR(preempt_timeout_min, 0644, preempt_timeout_min_show, + preempt_timeout_min_store); + +static const struct attribute *defaults[] = { + &job_timeout_def.attr, + &job_timeout_min_def.attr, + &job_timeout_max_def.attr, + ×lice_duration_def.attr, + ×lice_duration_min_def.attr, + ×lice_duration_max_def.attr, + &preempt_timeout_def.attr, + &preempt_timeout_min_def.attr, + &preempt_timeout_max_def.attr, + NULL +}; + +static const struct attribute *files[] = { + &job_timeout_attr.attr, + &job_timeout_min_attr.attr, + &job_timeout_max_attr.attr, + ×lice_duration_attr.attr, + ×lice_duration_min_attr.attr, + ×lice_duration_max_attr.attr, + &preempt_timeout_attr.attr, + &preempt_timeout_min_attr.attr, + &preempt_timeout_max_attr.attr, + NULL +}; + +static void kobj_xe_hw_engine_class_fini(struct drm_device *drm, void *arg) +{ + struct kobject *kobj = arg; + + sysfs_remove_files(kobj, files); + kobject_put(kobj); +} + + static struct kobj_eclass * +kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, char *name) +{ + struct kobj_eclass *keclass; + int err = 0; + + keclass = kzalloc(sizeof(*keclass), GFP_KERNEL); + if (!keclass) + return NULL; + + kobject_init(&keclass->base, &kobj_xe_hw_engine_type); + if (kobject_add(&keclass->base, parent, "%s", name)) { + kobject_put(&keclass->base); + return NULL; + } + + err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_class_fini, + &keclass->base); + if (err) + drm_warn(&xe->drm, + "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); + return keclass; +} + +static void hw_engine_class_defaults_fini(struct drm_device *drm, void *arg) +{ + struct kobject *kobj = arg; + + sysfs_remove_files(kobj, defaults); + kobject_put(kobj); +} + +static int xe_add_hw_engine_class_defaults(struct xe_device *xe, + struct kobject *parent) +{ + struct kobject *kobj; + int err = 0; + + kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); + if (!kobj) + return -ENOMEM; + + kobject_init(kobj, &kobj_xe_hw_engine_type); + err = kobject_add(kobj, parent, "%s", ".defaults"); + if (err) + goto err_object; + + err = sysfs_create_files(kobj, defaults); + if (err) + goto err_object; + + err = drmm_add_action_or_reset(&xe->drm, hw_engine_class_defaults_fini, + kobj); + if (err) + drm_warn(&xe->drm, + "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); + return err; +err_object: + kobject_put(kobj); + return err; +} + +static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj) +{ + kfree(kobj); +} + +static const struct kobj_type xe_hw_engine_sysfs_kobj_type = { + .release = xe_hw_engine_sysfs_kobj_release, + .sysfs_ops = &kobj_sysfs_ops, +}; + +static void hw_engine_class_sysfs_fini(struct drm_device *drm, void *arg) +{ + struct kobject *kobj = arg; + + kobject_put(kobj); +} + +/** + * xe_hw_engine_class_sysfs_init - Init HW engine classes on GT. + * @gt: Xe GT. + * + * This routine creates sysfs for HW engine classes and adds methods + * to get/set different scheduling properties for HW engines class. + * + * Returns: Returns error value for failure and 0 for success. + */ +int xe_hw_engine_class_sysfs_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct kobject *kobj; + u16 class_mask = 0; + int err = 0; + + kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); + if (!kobj) + return -ENOMEM; + + kobject_init(kobj, &xe_hw_engine_sysfs_kobj_type); + + err = kobject_add(kobj, gt->sysfs, "engines"); + if (err) + goto err_object; + + for_each_hw_engine(hwe, gt, id) { + char name[MAX_ENGINE_CLASS_NAME_LEN]; + struct kobj_eclass *keclass; + + if (hwe->class == XE_ENGINE_CLASS_OTHER || + hwe->class == XE_ENGINE_CLASS_MAX) + continue; + + if ((class_mask >> hwe->class) & 1) + continue; + + class_mask |= 1 << hwe->class; + + switch (hwe->class) { + case XE_ENGINE_CLASS_RENDER: + strcpy(name, "rcs"); + break; + case XE_ENGINE_CLASS_VIDEO_DECODE: + strcpy(name, "vcs"); + break; + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + strcpy(name, "vecs"); + break; + case XE_ENGINE_CLASS_COPY: + strcpy(name, "bcs"); + break; + case XE_ENGINE_CLASS_COMPUTE: + strcpy(name, "ccs"); + break; + default: + err = -EINVAL; + goto err_object; + } + + keclass = kobj_xe_hw_engine_class(xe, kobj, name); + if (!keclass) { + err = -EINVAL; + goto err_object; + } + + keclass->eclass = hwe->eclass; + err = xe_add_hw_engine_class_defaults(xe, &keclass->base); + if (err) { + drm_warn(&xe->drm, + "Add .defaults to engines failed!, err: %d\n", + err); + goto err_object; + } + + err = sysfs_create_files(&keclass->base, files); + if (err) + goto err_object; + } + + err = drmm_add_action_or_reset(&xe->drm, hw_engine_class_sysfs_fini, + kobj); + if (err) + drm_warn(&xe->drm, + "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); + + return err; +err_object: + kobject_put(kobj); + return err; +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h new file mode 100644 index 000000000000..ec5ba673b314 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_ENGINE_CLASS_SYSFS_H_ +#define _XE_ENGINE_CLASS_SYSFS_H_ + +#include <linux/kobject.h> + +struct xe_gt; +struct xe_hw_engine_class_intf; + +int xe_hw_engine_class_sysfs_init(struct xe_gt *gt); +bool xe_hw_engine_timeout_in_range(u64 timeout, u64 min, u64 max); + +/** + * struct kobj_eclass - A eclass's kobject struct that connects the kobject and the + * eclass. + * + * When dealing with multiple eclass, this struct helps to understand which eclass + * needs to be addressed on a given sysfs call. + */ +struct kobj_eclass { + /** @base: The actual kobject */ + struct kobject base; + /** @eclass: A pointer to the hw engine class interface */ + struct xe_hw_engine_class_intf *eclass; +}; + +static inline struct xe_hw_engine_class_intf *kobj_to_eclass(struct kobject *kobj) +{ + return container_of(kobj, struct kobj_eclass, base)->eclass; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h new file mode 100644 index 000000000000..39908dec042a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -0,0 +1,225 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_HW_ENGINE_TYPES_H_ +#define _XE_HW_ENGINE_TYPES_H_ + +#include "xe_force_wake_types.h" +#include "xe_lrc_types.h" +#include "xe_reg_sr_types.h" + +/* See "Engine ID Definition" struct in the Icelake PRM */ +enum xe_engine_class { + XE_ENGINE_CLASS_RENDER = 0, + XE_ENGINE_CLASS_VIDEO_DECODE = 1, + XE_ENGINE_CLASS_VIDEO_ENHANCE = 2, + XE_ENGINE_CLASS_COPY = 3, + XE_ENGINE_CLASS_OTHER = 4, + XE_ENGINE_CLASS_COMPUTE = 5, + XE_ENGINE_CLASS_MAX = 6, +}; + +enum xe_hw_engine_id { + XE_HW_ENGINE_RCS0, +#define XE_HW_ENGINE_RCS_MASK GENMASK_ULL(XE_HW_ENGINE_RCS0, XE_HW_ENGINE_RCS0) + XE_HW_ENGINE_BCS0, + XE_HW_ENGINE_BCS1, + XE_HW_ENGINE_BCS2, + XE_HW_ENGINE_BCS3, + XE_HW_ENGINE_BCS4, + XE_HW_ENGINE_BCS5, + XE_HW_ENGINE_BCS6, + XE_HW_ENGINE_BCS7, + XE_HW_ENGINE_BCS8, +#define XE_HW_ENGINE_BCS_MASK GENMASK_ULL(XE_HW_ENGINE_BCS8, XE_HW_ENGINE_BCS0) + XE_HW_ENGINE_VCS0, + XE_HW_ENGINE_VCS1, + XE_HW_ENGINE_VCS2, + XE_HW_ENGINE_VCS3, + XE_HW_ENGINE_VCS4, + XE_HW_ENGINE_VCS5, + XE_HW_ENGINE_VCS6, + XE_HW_ENGINE_VCS7, +#define XE_HW_ENGINE_VCS_MASK GENMASK_ULL(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) + XE_HW_ENGINE_VECS0, + XE_HW_ENGINE_VECS1, + XE_HW_ENGINE_VECS2, + XE_HW_ENGINE_VECS3, +#define XE_HW_ENGINE_VECS_MASK GENMASK_ULL(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0) + XE_HW_ENGINE_CCS0, + XE_HW_ENGINE_CCS1, + XE_HW_ENGINE_CCS2, + XE_HW_ENGINE_CCS3, +#define XE_HW_ENGINE_CCS_MASK GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0) + XE_HW_ENGINE_GSCCS0, +#define XE_HW_ENGINE_GSCCS_MASK GENMASK_ULL(XE_HW_ENGINE_GSCCS0, XE_HW_ENGINE_GSCCS0) + XE_NUM_HW_ENGINES, +}; + +/* FIXME: s/XE_HW_ENGINE_MAX_INSTANCE/XE_HW_ENGINE_MAX_COUNT */ +#define XE_HW_ENGINE_MAX_INSTANCE 9 + +struct xe_bo; +struct xe_execlist_port; +struct xe_gt; + +/** + * struct xe_hw_engine_class_intf - per hw engine class struct interface + * + * Contains all the hw engine properties per engine class. + * + * @sched_props: scheduling properties + * @defaults: default scheduling properties + */ +struct xe_hw_engine_class_intf { + /** + * @sched_props: scheduling properties + * @defaults: default scheduling properties + */ + struct { + /** @set_job_timeout: Set job timeout in ms for engine */ + u32 job_timeout_ms; + /** @job_timeout_min: Min job timeout in ms for engine */ + u32 job_timeout_min; + /** @job_timeout_max: Max job timeout in ms for engine */ + u32 job_timeout_max; + /** @timeslice_us: timeslice period in micro-seconds */ + u32 timeslice_us; + /** @timeslice_min: min timeslice period in micro-seconds */ + u32 timeslice_min; + /** @timeslice_max: max timeslice period in micro-seconds */ + u32 timeslice_max; + /** @preempt_timeout_us: preemption timeout in micro-seconds */ + u32 preempt_timeout_us; + /** @preempt_timeout_min: min preemption timeout in micro-seconds */ + u32 preempt_timeout_min; + /** @preempt_timeout_max: max preemption timeout in micro-seconds */ + u32 preempt_timeout_max; + } sched_props, defaults; +}; + +/** + * struct xe_hw_engine - Hardware engine + * + * Contains all the hardware engine state for physical instances. + */ +struct xe_hw_engine { + /** @gt: graphics tile this hw engine belongs to */ + struct xe_gt *gt; + /** @name: name of this hw engine */ + const char *name; + /** @class: class of this hw engine */ + enum xe_engine_class class; + /** @instance: physical instance of this hw engine */ + u16 instance; + /** @logical_instance: logical instance of this hw engine */ + u16 logical_instance; + /** @mmio_base: MMIO base address of this hw engine*/ + u32 mmio_base; + /** + * @reg_sr: table with registers to be restored on GT init/resume/reset + */ + struct xe_reg_sr reg_sr; + /** + * @reg_whitelist: table with registers to be whitelisted + */ + struct xe_reg_sr reg_whitelist; + /** + * @reg_lrc: LRC workaround registers + */ + struct xe_reg_sr reg_lrc; + /** @domain: force wake domain of this hw engine */ + enum xe_force_wake_domains domain; + /** @hwsp: hardware status page buffer object */ + struct xe_bo *hwsp; + /** @kernel_lrc: Kernel LRC (should be replaced /w an xe_engine) */ + struct xe_lrc kernel_lrc; + /** @exl_port: execlists port */ + struct xe_execlist_port *exl_port; + /** @fence_irq: fence IRQ to run when a hw engine IRQ is received */ + struct xe_hw_fence_irq *fence_irq; + /** @irq_handler: IRQ handler to run when hw engine IRQ is received */ + void (*irq_handler)(struct xe_hw_engine *hwe, u16 intr_vec); + /** @engine_id: id for this hw engine */ + enum xe_hw_engine_id engine_id; + /** @eclass: pointer to per hw engine class interface */ + struct xe_hw_engine_class_intf *eclass; +}; + +/** + * struct xe_hw_engine_snapshot - Hardware engine snapshot + * + * Contains the snapshot of useful hardware engine info and registers. + */ +struct xe_hw_engine_snapshot { + /** @name: name of the hw engine */ + char *name; + /** @class: class of this hw engine */ + enum xe_engine_class class; + /** @logical_instance: logical instance of this hw engine */ + u16 logical_instance; + /** @forcewake: Force Wake information snapshot */ + struct { + /** @domain: force wake domain of this hw engine */ + enum xe_force_wake_domains domain; + /** @ref: Forcewake ref for the above domain */ + int ref; + } forcewake; + /** @mmio_base: MMIO base address of this hw engine*/ + u32 mmio_base; + /** @reg: Useful MMIO register snapshot */ + struct { + /** @ring_hwstam: RING_HWSTAM */ + u32 ring_hwstam; + /** @ring_hws_pga: RING_HWS_PGA */ + u32 ring_hws_pga; + /** @ring_execlist_status_lo: RING_EXECLIST_STATUS_LO */ + u32 ring_execlist_status_lo; + /** @ring_execlist_status_hi: RING_EXECLIST_STATUS_HI */ + u32 ring_execlist_status_hi; + /** @ring_execlist_sq_contents_lo: RING_EXECLIST_SQ_CONTENTS */ + u32 ring_execlist_sq_contents_lo; + /** @ring_execlist_sq_contents_hi: RING_EXECLIST_SQ_CONTENTS + 4 */ + u32 ring_execlist_sq_contents_hi; + /** @ring_start: RING_START */ + u32 ring_start; + /** @ring_head: RING_HEAD */ + u32 ring_head; + /** @ring_tail: RING_TAIL */ + u32 ring_tail; + /** @ring_ctl: RING_CTL */ + u32 ring_ctl; + /** @ring_mi_mode: RING_MI_MODE */ + u32 ring_mi_mode; + /** @ring_mode: RING_MODE */ + u32 ring_mode; + /** @ring_imr: RING_IMR */ + u32 ring_imr; + /** @ring_esr: RING_ESR */ + u32 ring_esr; + /** @ring_emr: RING_EMR */ + u32 ring_emr; + /** @ring_eir: RING_EIR */ + u32 ring_eir; + /** @ring_acthd_udw: RING_ACTHD_UDW */ + u32 ring_acthd_udw; + /** @ring_acthd: RING_ACTHD */ + u32 ring_acthd; + /** @ring_bbaddr_udw: RING_BBADDR_UDW */ + u32 ring_bbaddr_udw; + /** @ring_bbaddr: RING_BBADDR */ + u32 ring_bbaddr; + /** @ring_dma_fadd_udw: RING_DMA_FADD_UDW */ + u32 ring_dma_fadd_udw; + /** @ring_dma_fadd: RING_DMA_FADD */ + u32 ring_dma_fadd; + /** @ipehr: IPEHR */ + u32 ipehr; + /** @rcu_mode: RCU_MODE */ + u32 rcu_mode; + } reg; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c new file mode 100644 index 000000000000..a6094c81f2ad --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_hw_fence.h" + +#include <linux/device.h> +#include <linux/slab.h> + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_hw_engine.h" +#include "xe_macros.h" +#include "xe_map.h" +#include "xe_trace.h" + +static struct kmem_cache *xe_hw_fence_slab; + +int __init xe_hw_fence_module_init(void) +{ + xe_hw_fence_slab = kmem_cache_create("xe_hw_fence", + sizeof(struct xe_hw_fence), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!xe_hw_fence_slab) + return -ENOMEM; + + return 0; +} + +void xe_hw_fence_module_exit(void) +{ + rcu_barrier(); + kmem_cache_destroy(xe_hw_fence_slab); +} + +static struct xe_hw_fence *fence_alloc(void) +{ + return kmem_cache_zalloc(xe_hw_fence_slab, GFP_KERNEL); +} + +static void fence_free(struct rcu_head *rcu) +{ + struct xe_hw_fence *fence = + container_of(rcu, struct xe_hw_fence, dma.rcu); + + if (!WARN_ON_ONCE(!fence)) + kmem_cache_free(xe_hw_fence_slab, fence); +} + +static void hw_fence_irq_run_cb(struct irq_work *work) +{ + struct xe_hw_fence_irq *irq = container_of(work, typeof(*irq), work); + struct xe_hw_fence *fence, *next; + bool tmp; + + tmp = dma_fence_begin_signalling(); + spin_lock(&irq->lock); + if (irq->enabled) { + list_for_each_entry_safe(fence, next, &irq->pending, irq_link) { + struct dma_fence *dma_fence = &fence->dma; + + trace_xe_hw_fence_try_signal(fence); + if (dma_fence_is_signaled_locked(dma_fence)) { + trace_xe_hw_fence_signal(fence); + list_del_init(&fence->irq_link); + dma_fence_put(dma_fence); + } + } + } + spin_unlock(&irq->lock); + dma_fence_end_signalling(tmp); +} + +void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq) +{ + spin_lock_init(&irq->lock); + init_irq_work(&irq->work, hw_fence_irq_run_cb); + INIT_LIST_HEAD(&irq->pending); + irq->enabled = true; +} + +void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq) +{ + struct xe_hw_fence *fence, *next; + unsigned long flags; + int err; + bool tmp; + + if (XE_WARN_ON(!list_empty(&irq->pending))) { + tmp = dma_fence_begin_signalling(); + spin_lock_irqsave(&irq->lock, flags); + list_for_each_entry_safe(fence, next, &irq->pending, irq_link) { + list_del_init(&fence->irq_link); + err = dma_fence_signal_locked(&fence->dma); + dma_fence_put(&fence->dma); + XE_WARN_ON(err); + } + spin_unlock_irqrestore(&irq->lock, flags); + dma_fence_end_signalling(tmp); + } +} + +void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq) +{ + irq_work_queue(&irq->work); +} + +void xe_hw_fence_irq_stop(struct xe_hw_fence_irq *irq) +{ + spin_lock_irq(&irq->lock); + irq->enabled = false; + spin_unlock_irq(&irq->lock); +} + +void xe_hw_fence_irq_start(struct xe_hw_fence_irq *irq) +{ + spin_lock_irq(&irq->lock); + irq->enabled = true; + spin_unlock_irq(&irq->lock); + + irq_work_queue(&irq->work); +} + +void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt, + struct xe_hw_fence_irq *irq, const char *name) +{ + ctx->gt = gt; + ctx->irq = irq; + ctx->dma_fence_ctx = dma_fence_context_alloc(1); + ctx->next_seqno = XE_FENCE_INITIAL_SEQNO; + sprintf(ctx->name, "%s", name); +} + +void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx) +{ +} + +static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence); + +static struct xe_hw_fence_irq *xe_hw_fence_irq(struct xe_hw_fence *fence) +{ + return container_of(fence->dma.lock, struct xe_hw_fence_irq, lock); +} + +static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence) +{ + struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); + + return dev_name(gt_to_xe(fence->ctx->gt)->drm.dev); +} + +static const char *xe_hw_fence_get_timeline_name(struct dma_fence *dma_fence) +{ + struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); + + return fence->ctx->name; +} + +static bool xe_hw_fence_signaled(struct dma_fence *dma_fence) +{ + struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); + struct xe_device *xe = gt_to_xe(fence->ctx->gt); + u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32); + + return dma_fence->error || + !__dma_fence_is_later(dma_fence->seqno, seqno, dma_fence->ops); +} + +static bool xe_hw_fence_enable_signaling(struct dma_fence *dma_fence) +{ + struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); + struct xe_hw_fence_irq *irq = xe_hw_fence_irq(fence); + + dma_fence_get(dma_fence); + list_add_tail(&fence->irq_link, &irq->pending); + + /* SW completed (no HW IRQ) so kick handler to signal fence */ + if (xe_hw_fence_signaled(dma_fence)) + xe_hw_fence_irq_run(irq); + + return true; +} + +static void xe_hw_fence_release(struct dma_fence *dma_fence) +{ + struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); + + trace_xe_hw_fence_free(fence); + XE_WARN_ON(!list_empty(&fence->irq_link)); + call_rcu(&dma_fence->rcu, fence_free); +} + +static const struct dma_fence_ops xe_hw_fence_ops = { + .get_driver_name = xe_hw_fence_get_driver_name, + .get_timeline_name = xe_hw_fence_get_timeline_name, + .enable_signaling = xe_hw_fence_enable_signaling, + .signaled = xe_hw_fence_signaled, + .release = xe_hw_fence_release, +}; + +static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence) +{ + if (XE_WARN_ON(fence->ops != &xe_hw_fence_ops)) + return NULL; + + return container_of(fence, struct xe_hw_fence, dma); +} + +struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx, + struct iosys_map seqno_map) +{ + struct xe_hw_fence *fence; + + fence = fence_alloc(); + if (!fence) + return ERR_PTR(-ENOMEM); + + dma_fence_init(&fence->dma, &xe_hw_fence_ops, &ctx->irq->lock, + ctx->dma_fence_ctx, ctx->next_seqno++); + + fence->ctx = ctx; + fence->seqno_map = seqno_map; + INIT_LIST_HEAD(&fence->irq_link); + + trace_xe_hw_fence_create(fence); + + return fence; +} diff --git a/drivers/gpu/drm/xe/xe_hw_fence.h b/drivers/gpu/drm/xe/xe_hw_fence.h new file mode 100644 index 000000000000..cfe5fd603787 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_fence.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_HW_FENCE_H_ +#define _XE_HW_FENCE_H_ + +#include "xe_hw_fence_types.h" + +/* Cause an early wrap to catch wrapping errors */ +#define XE_FENCE_INITIAL_SEQNO (-127) + +int xe_hw_fence_module_init(void); +void xe_hw_fence_module_exit(void); + +void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq); +void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq); +void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq); +void xe_hw_fence_irq_stop(struct xe_hw_fence_irq *irq); +void xe_hw_fence_irq_start(struct xe_hw_fence_irq *irq); + +void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt, + struct xe_hw_fence_irq *irq, const char *name); +void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx); + +struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx, + struct iosys_map seqno_map); + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h new file mode 100644 index 000000000000..b33c4956e8ea --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_HW_FENCE_TYPES_H_ +#define _XE_HW_FENCE_TYPES_H_ + +#include <linux/dma-fence.h> +#include <linux/iosys-map.h> +#include <linux/irq_work.h> +#include <linux/list.h> +#include <linux/spinlock.h> + +struct xe_gt; + +/** + * struct xe_hw_fence_irq - hardware fence IRQ handler + * + * One per engine class, signals completed xe_hw_fences, triggered via hw engine + * interrupt. On each trigger, search list of pending fences and signal. + */ +struct xe_hw_fence_irq { + /** @lock: protects all xe_hw_fences + pending list */ + spinlock_t lock; + /** @work: IRQ worker run to signal the fences */ + struct irq_work work; + /** @pending: list of pending xe_hw_fences */ + struct list_head pending; + /** @enabled: fence signaling enabled */ + bool enabled; +}; + +#define MAX_FENCE_NAME_LEN 16 + +/** + * struct xe_hw_fence_ctx - hardware fence context + * + * The context for a hardware fence. 1 to 1 relationship with xe_engine. Points + * to a xe_hw_fence_irq, maintains serial seqno. + */ +struct xe_hw_fence_ctx { + /** @gt: graphics tile of hardware fence context */ + struct xe_gt *gt; + /** @irq: fence irq handler */ + struct xe_hw_fence_irq *irq; + /** @dma_fence_ctx: dma fence context for hardware fence */ + u64 dma_fence_ctx; + /** @next_seqno: next seqno for hardware fence */ + u32 next_seqno; + /** @name: name of hardware fence context */ + char name[MAX_FENCE_NAME_LEN]; +}; + +/** + * struct xe_hw_fence - hardware fence + * + * Used to indicate a xe_sched_job is complete via a seqno written to memory. + * Signals on error or seqno past. + */ +struct xe_hw_fence { + /** @dma: base dma fence for hardware fence context */ + struct dma_fence dma; + /** @ctx: hardware fence context */ + struct xe_hw_fence_ctx *ctx; + /** @seqno_map: I/O map for seqno */ + struct iosys_map seqno_map; + /** @irq_link: Link in struct xe_hw_fence_irq.pending */ + struct list_head irq_link; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c new file mode 100644 index 000000000000..6ef2aa1eae8b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -0,0 +1,776 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <linux/hwmon-sysfs.h> +#include <linux/hwmon.h> +#include <linux/types.h> + +#include <drm/drm_managed.h> +#include "regs/xe_gt_regs.h" +#include "regs/xe_mchbar_regs.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_hwmon.h" +#include "xe_mmio.h" +#include "xe_pcode.h" +#include "xe_pcode_api.h" + +enum xe_hwmon_reg { + REG_PKG_RAPL_LIMIT, + REG_PKG_POWER_SKU, + REG_PKG_POWER_SKU_UNIT, + REG_GT_PERF_STATUS, + REG_PKG_ENERGY_STATUS, +}; + +enum xe_hwmon_reg_operation { + REG_READ32, + REG_RMW32, + REG_READ64, +}; + +/* + * SF_* - scale factors for particular quantities according to hwmon spec. + */ +#define SF_POWER 1000000 /* microwatts */ +#define SF_CURR 1000 /* milliamperes */ +#define SF_VOLTAGE 1000 /* millivolts */ +#define SF_ENERGY 1000000 /* microjoules */ +#define SF_TIME 1000 /* milliseconds */ + +/** + * struct xe_hwmon_energy_info - to accumulate energy + */ +struct xe_hwmon_energy_info { + /** @reg_val_prev: previous energy reg val */ + u32 reg_val_prev; + /** @accum_energy: accumulated energy */ + long accum_energy; +}; + +/** + * struct xe_hwmon - xe hwmon data structure + */ +struct xe_hwmon { + /** @hwmon_dev: hwmon device for xe */ + struct device *hwmon_dev; + /** @gt: primary gt */ + struct xe_gt *gt; + /** @hwmon_lock: lock for rw attributes*/ + struct mutex hwmon_lock; + /** @scl_shift_power: pkg power unit */ + int scl_shift_power; + /** @scl_shift_energy: pkg energy unit */ + int scl_shift_energy; + /** @scl_shift_time: pkg time unit */ + int scl_shift_time; + /** @ei: Energy info for energy1_input */ + struct xe_hwmon_energy_info ei; +}; + +static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg) +{ + struct xe_device *xe = gt_to_xe(hwmon->gt); + struct xe_reg reg = XE_REG(0); + + switch (hwmon_reg) { + case REG_PKG_RAPL_LIMIT: + if (xe->info.platform == XE_DG2) + reg = PCU_CR_PACKAGE_RAPL_LIMIT; + else if (xe->info.platform == XE_PVC) + reg = PVC_GT0_PACKAGE_RAPL_LIMIT; + break; + case REG_PKG_POWER_SKU: + if (xe->info.platform == XE_DG2) + reg = PCU_CR_PACKAGE_POWER_SKU; + else if (xe->info.platform == XE_PVC) + reg = PVC_GT0_PACKAGE_POWER_SKU; + break; + case REG_PKG_POWER_SKU_UNIT: + if (xe->info.platform == XE_DG2) + reg = PCU_CR_PACKAGE_POWER_SKU_UNIT; + else if (xe->info.platform == XE_PVC) + reg = PVC_GT0_PACKAGE_POWER_SKU_UNIT; + break; + case REG_GT_PERF_STATUS: + if (xe->info.platform == XE_DG2) + reg = GT_PERF_STATUS; + break; + case REG_PKG_ENERGY_STATUS: + if (xe->info.platform == XE_DG2) + reg = PCU_CR_PACKAGE_ENERGY_STATUS; + else if (xe->info.platform == XE_PVC) + reg = PVC_GT0_PLATFORM_ENERGY_STATUS; + break; + default: + drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg); + break; + } + + return reg.raw; +} + +static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, + enum xe_hwmon_reg_operation operation, u64 *value, + u32 clr, u32 set) +{ + struct xe_reg reg; + + reg.raw = xe_hwmon_get_reg(hwmon, hwmon_reg); + + if (!reg.raw) + return; + + switch (operation) { + case REG_READ32: + *value = xe_mmio_read32(hwmon->gt, reg); + break; + case REG_RMW32: + *value = xe_mmio_rmw32(hwmon->gt, reg, clr, set); + break; + case REG_READ64: + *value = xe_mmio_read64_2x32(hwmon->gt, reg); + break; + default: + drm_warn(>_to_xe(hwmon->gt)->drm, "Invalid xe hwmon reg operation: %d\n", + operation); + break; + } +} + +#define PL1_DISABLE 0 + +/* + * HW allows arbitrary PL1 limits to be set but silently clamps these values to + * "typical but not guaranteed" min/max values in REG_PKG_POWER_SKU. Follow the + * same pattern for sysfs, allow arbitrary PL1 limits to be set but display + * clamped values when read. + */ +static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value) +{ + u64 reg_val, min, max; + + mutex_lock(&hwmon->hwmon_lock); + + xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, ®_val, 0, 0); + /* Check if PL1 limit is disabled */ + if (!(reg_val & PKG_PWR_LIM_1_EN)) { + *value = PL1_DISABLE; + goto unlock; + } + + reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val); + *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); + + xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ64, ®_val, 0, 0); + min = REG_FIELD_GET(PKG_MIN_PWR, reg_val); + min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); + max = REG_FIELD_GET(PKG_MAX_PWR, reg_val); + max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); + + if (min && max) + *value = clamp_t(u64, *value, min, max); +unlock: + mutex_unlock(&hwmon->hwmon_lock); +} + +static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value) +{ + int ret = 0; + u64 reg_val; + + mutex_lock(&hwmon->hwmon_lock); + + /* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */ + if (value == PL1_DISABLE) { + xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, ®_val, + PKG_PWR_LIM_1_EN, 0); + xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, ®_val, + PKG_PWR_LIM_1_EN, 0); + + if (reg_val & PKG_PWR_LIM_1_EN) { + ret = -EOPNOTSUPP; + goto unlock; + } + } + + /* Computation in 64-bits to avoid overflow. Round to nearest. */ + reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); + reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val); + + xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, ®_val, + PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val); +unlock: + mutex_unlock(&hwmon->hwmon_lock); + return ret; +} + +static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value) +{ + u64 reg_val; + + xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ32, ®_val, 0, 0); + reg_val = REG_FIELD_GET(PKG_TDP, reg_val); + *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); +} + +/* + * xe_hwmon_energy_get - Obtain energy value + * + * The underlying energy hardware register is 32-bits and is subject to + * overflow. How long before overflow? For example, with an example + * scaling bit shift of 14 bits (see register *PACKAGE_POWER_SKU_UNIT) and + * a power draw of 1000 watts, the 32-bit counter will overflow in + * approximately 4.36 minutes. + * + * Examples: + * 1 watt: (2^32 >> 14) / 1 W / (60 * 60 * 24) secs/day -> 3 days + * 1000 watts: (2^32 >> 14) / 1000 W / 60 secs/min -> 4.36 minutes + * + * The function significantly increases overflow duration (from 4.36 + * minutes) by accumulating the energy register into a 'long' as allowed by + * the hwmon API. Using x86_64 128 bit arithmetic (see mul_u64_u32_shr()), + * a 'long' of 63 bits, SF_ENERGY of 1e6 (~20 bits) and + * hwmon->scl_shift_energy of 14 bits we have 57 (63 - 20 + 14) bits before + * energy1_input overflows. This at 1000 W is an overflow duration of 278 years. + */ +static void +xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy) +{ + struct xe_hwmon_energy_info *ei = &hwmon->ei; + u64 reg_val; + + xe_hwmon_process_reg(hwmon, REG_PKG_ENERGY_STATUS, REG_READ32, + ®_val, 0, 0); + + if (reg_val >= ei->reg_val_prev) + ei->accum_energy += reg_val - ei->reg_val_prev; + else + ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val; + + ei->reg_val_prev = reg_val; + + *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY, + hwmon->scl_shift_energy); +} + +static ssize_t +xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct xe_hwmon *hwmon = dev_get_drvdata(dev); + u32 x, y, x_w = 2; /* 2 bits */ + u64 r, tau4, out; + + xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + + mutex_lock(&hwmon->hwmon_lock); + + xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, + REG_READ32, &r, 0, 0); + + mutex_unlock(&hwmon->hwmon_lock); + + xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + + x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r); + y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r); + + /* + * tau = 1.x * power(2,y), x = bits(23:22), y = bits(21:17) + * = (4 | x) << (y - 2) + * + * Here (y - 2) ensures a 1.x fixed point representation of 1.x + * As x is 2 bits so 1.x can be 1.0, 1.25, 1.50, 1.75 + * + * As y can be < 2, we compute tau4 = (4 | x) << y + * and then add 2 when doing the final right shift to account for units + */ + tau4 = ((1 << x_w) | x) << y; + + /* val in hwmon interface units (millisec) */ + out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); + + return sysfs_emit(buf, "%llu\n", out); +} + +static ssize_t +xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct xe_hwmon *hwmon = dev_get_drvdata(dev); + u32 x, y, rxy, x_w = 2; /* 2 bits */ + u64 tau4, r, max_win; + unsigned long val; + int ret; + + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; + + /* + * Max HW supported tau in '1.x * power(2,y)' format, x = 0, y = 0x12. + * The hwmon->scl_shift_time default of 0xa results in a max tau of 256 seconds. + * + * The ideal scenario is for PKG_MAX_WIN to be read from the PKG_PWR_SKU register. + * However, it is observed that existing discrete GPUs does not provide correct + * PKG_MAX_WIN value, therefore a using default constant value. For future discrete GPUs + * this may get resolved, in which case PKG_MAX_WIN should be obtained from PKG_PWR_SKU. + */ +#define PKG_MAX_WIN_DEFAULT 0x12ull + + /* + * val must be < max in hwmon interface units. The steps below are + * explained in xe_hwmon_power1_max_interval_show() + */ + r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT); + x = REG_FIELD_GET(PKG_MAX_WIN_X, r); + y = REG_FIELD_GET(PKG_MAX_WIN_Y, r); + tau4 = ((1 << x_w) | x) << y; + max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); + + if (val > max_win) + return -EINVAL; + + /* val in hw units */ + val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME); + + /* + * Convert val to 1.x * power(2,y) + * y = ilog2(val) + * x = (val - (1 << y)) >> (y - 2) + */ + if (!val) { + y = 0; + x = 0; + } else { + y = ilog2(val); + x = (val - (1ul << y)) << x_w >> y; + } + + rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y); + + xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + + mutex_lock(&hwmon->hwmon_lock); + + xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, (u64 *)&r, + PKG_PWR_LIM_1_TIME, rxy); + + mutex_unlock(&hwmon->hwmon_lock); + + xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + + return count; +} + +static SENSOR_DEVICE_ATTR(power1_max_interval, 0664, + xe_hwmon_power1_max_interval_show, + xe_hwmon_power1_max_interval_store, 0); + +static struct attribute *hwmon_attributes[] = { + &sensor_dev_attr_power1_max_interval.dev_attr.attr, + NULL +}; + +static umode_t xe_hwmon_attributes_visible(struct kobject *kobj, + struct attribute *attr, int index) +{ + struct device *dev = kobj_to_dev(kobj); + struct xe_hwmon *hwmon = dev_get_drvdata(dev); + int ret = 0; + + xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + + if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr) + ret = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? attr->mode : 0; + + xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + + return ret; +} + +static const struct attribute_group hwmon_attrgroup = { + .attrs = hwmon_attributes, + .is_visible = xe_hwmon_attributes_visible, +}; + +static const struct attribute_group *hwmon_groups[] = { + &hwmon_attrgroup, + NULL +}; + +static const struct hwmon_channel_info *hwmon_info[] = { + HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT), + HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT), + HWMON_CHANNEL_INFO(in, HWMON_I_INPUT), + HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT), + NULL +}; + +/* I1 is exposed as power_crit or as curr_crit depending on bit 31 */ +static int xe_hwmon_pcode_read_i1(struct xe_gt *gt, u32 *uval) +{ + /* Avoid Illegal Subcommand error */ + if (gt_to_xe(gt)->info.platform == XE_DG2) + return -ENXIO; + + return xe_pcode_read(gt, PCODE_MBOX(PCODE_POWER_SETUP, + POWER_SETUP_SUBCOMMAND_READ_I1, 0), + uval, 0); +} + +static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval) +{ + return xe_pcode_write(gt, PCODE_MBOX(PCODE_POWER_SETUP, + POWER_SETUP_SUBCOMMAND_WRITE_I1, 0), + uval); +} + +static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, long *value, u32 scale_factor) +{ + int ret; + u32 uval; + + mutex_lock(&hwmon->hwmon_lock); + + ret = xe_hwmon_pcode_read_i1(hwmon->gt, &uval); + if (ret) + goto unlock; + + *value = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval), + scale_factor, POWER_SETUP_I1_SHIFT); +unlock: + mutex_unlock(&hwmon->hwmon_lock); + return ret; +} + +static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, long value, u32 scale_factor) +{ + int ret; + u32 uval; + + mutex_lock(&hwmon->hwmon_lock); + + uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor); + ret = xe_hwmon_pcode_write_i1(hwmon->gt, uval); + + mutex_unlock(&hwmon->hwmon_lock); + return ret; +} + +static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, long *value) +{ + u64 reg_val; + + xe_hwmon_process_reg(hwmon, REG_GT_PERF_STATUS, + REG_READ32, ®_val, 0, 0); + /* HW register value in units of 2.5 millivolt */ + *value = DIV_ROUND_CLOSEST(REG_FIELD_GET(VOLTAGE_MASK, reg_val) * 2500, SF_VOLTAGE); +} + +static umode_t +xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int chan) +{ + u32 uval; + + switch (attr) { + case hwmon_power_max: + return xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? 0664 : 0; + case hwmon_power_rated_max: + return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU) ? 0444 : 0; + case hwmon_power_crit: + return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || + !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; + default: + return 0; + } +} + +static int +xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int chan, long *val) +{ + switch (attr) { + case hwmon_power_max: + xe_hwmon_power_max_read(hwmon, val); + return 0; + case hwmon_power_rated_max: + xe_hwmon_power_rated_max_read(hwmon, val); + return 0; + case hwmon_power_crit: + return xe_hwmon_power_curr_crit_read(hwmon, val, SF_POWER); + default: + return -EOPNOTSUPP; + } +} + +static int +xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int chan, long val) +{ + switch (attr) { + case hwmon_power_max: + return xe_hwmon_power_max_write(hwmon, val); + case hwmon_power_crit: + return xe_hwmon_power_curr_crit_write(hwmon, val, SF_POWER); + default: + return -EOPNOTSUPP; + } +} + +static umode_t +xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr) +{ + u32 uval; + + switch (attr) { + case hwmon_curr_crit: + return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || + (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; + default: + return 0; + } +} + +static int +xe_hwmon_curr_read(struct xe_hwmon *hwmon, u32 attr, long *val) +{ + switch (attr) { + case hwmon_curr_crit: + return xe_hwmon_power_curr_crit_read(hwmon, val, SF_CURR); + default: + return -EOPNOTSUPP; + } +} + +static int +xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, long val) +{ + switch (attr) { + case hwmon_curr_crit: + return xe_hwmon_power_curr_crit_write(hwmon, val, SF_CURR); + default: + return -EOPNOTSUPP; + } +} + +static umode_t +xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr) +{ + switch (attr) { + case hwmon_in_input: + return xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS) ? 0444 : 0; + default: + return 0; + } +} + +static int +xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, long *val) +{ + switch (attr) { + case hwmon_in_input: + xe_hwmon_get_voltage(hwmon, val); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static umode_t +xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr) +{ + switch (attr) { + case hwmon_energy_input: + return xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS) ? 0444 : 0; + default: + return 0; + } +} + +static int +xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, long *val) +{ + switch (attr) { + case hwmon_energy_input: + xe_hwmon_energy_get(hwmon, val); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static umode_t +xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, + u32 attr, int channel) +{ + struct xe_hwmon *hwmon = (struct xe_hwmon *)drvdata; + int ret; + + xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + + switch (type) { + case hwmon_power: + ret = xe_hwmon_power_is_visible(hwmon, attr, channel); + break; + case hwmon_curr: + ret = xe_hwmon_curr_is_visible(hwmon, attr); + break; + case hwmon_in: + ret = xe_hwmon_in_is_visible(hwmon, attr); + break; + case hwmon_energy: + ret = xe_hwmon_energy_is_visible(hwmon, attr); + break; + default: + ret = 0; + break; + } + + xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + + return ret; +} + +static int +xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, + int channel, long *val) +{ + struct xe_hwmon *hwmon = dev_get_drvdata(dev); + int ret; + + xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + + switch (type) { + case hwmon_power: + ret = xe_hwmon_power_read(hwmon, attr, channel, val); + break; + case hwmon_curr: + ret = xe_hwmon_curr_read(hwmon, attr, val); + break; + case hwmon_in: + ret = xe_hwmon_in_read(hwmon, attr, val); + break; + case hwmon_energy: + ret = xe_hwmon_energy_read(hwmon, attr, val); + break; + default: + ret = -EOPNOTSUPP; + break; + } + + xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + + return ret; +} + +static int +xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, + int channel, long val) +{ + struct xe_hwmon *hwmon = dev_get_drvdata(dev); + int ret; + + xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + + switch (type) { + case hwmon_power: + ret = xe_hwmon_power_write(hwmon, attr, channel, val); + break; + case hwmon_curr: + ret = xe_hwmon_curr_write(hwmon, attr, val); + break; + default: + ret = -EOPNOTSUPP; + break; + } + + xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + + return ret; +} + +static const struct hwmon_ops hwmon_ops = { + .is_visible = xe_hwmon_is_visible, + .read = xe_hwmon_read, + .write = xe_hwmon_write, +}; + +static const struct hwmon_chip_info hwmon_chip_info = { + .ops = &hwmon_ops, + .info = hwmon_info, +}; + +static void +xe_hwmon_get_preregistration_info(struct xe_device *xe) +{ + struct xe_hwmon *hwmon = xe->hwmon; + long energy; + u64 val_sku_unit = 0; + + /* + * The contents of register PKG_POWER_SKU_UNIT do not change, + * so read it once and store the shift values. + */ + if (xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT)) { + xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU_UNIT, + REG_READ32, &val_sku_unit, 0, 0); + hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); + hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit); + hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit); + } + + /* + * Initialize 'struct xe_hwmon_energy_info', i.e. set fields to the + * first value of the energy register read + */ + if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, 0)) + xe_hwmon_energy_get(hwmon, &energy); +} + +static void xe_hwmon_mutex_destroy(void *arg) +{ + struct xe_hwmon *hwmon = arg; + + mutex_destroy(&hwmon->hwmon_lock); +} + +void xe_hwmon_register(struct xe_device *xe) +{ + struct device *dev = xe->drm.dev; + struct xe_hwmon *hwmon; + + /* hwmon is available only for dGfx */ + if (!IS_DGFX(xe)) + return; + + hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL); + if (!hwmon) + return; + + xe->hwmon = hwmon; + + mutex_init(&hwmon->hwmon_lock); + if (devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon)) + return; + + /* primary GT to access device level properties */ + hwmon->gt = xe->tiles[0].primary_gt; + + xe_hwmon_get_preregistration_info(xe); + + drm_dbg(&xe->drm, "Register xe hwmon interface\n"); + + /* hwmon_dev points to device hwmon<i> */ + hwmon->hwmon_dev = devm_hwmon_device_register_with_info(dev, "xe", hwmon, + &hwmon_chip_info, + hwmon_groups); + + if (IS_ERR(hwmon->hwmon_dev)) { + drm_warn(&xe->drm, "Failed to register xe hwmon (%pe)\n", hwmon->hwmon_dev); + xe->hwmon = NULL; + return; + } +} + diff --git a/drivers/gpu/drm/xe/xe_hwmon.h b/drivers/gpu/drm/xe/xe_hwmon.h new file mode 100644 index 000000000000..c42a1de2cd7a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hwmon.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_HWMON_H_ +#define _XE_HWMON_H_ + +#include <linux/types.h> + +struct xe_device; + +#if IS_REACHABLE(CONFIG_HWMON) +void xe_hwmon_register(struct xe_device *xe); +#else +static inline void xe_hwmon_register(struct xe_device *xe) { }; +#endif + +#endif /* _XE_HWMON_H_ */ diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c new file mode 100644 index 000000000000..d1f5ba4bb745 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -0,0 +1,666 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_irq.h" + +#include <linux/sched/clock.h> + +#include <drm/drm_managed.h> + +#include "regs/xe_gt_regs.h" +#include "regs/xe_regs.h" +#include "xe_device.h" +#include "xe_display.h" +#include "xe_drv.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_hw_engine.h" +#include "xe_mmio.h" + +/* + * Interrupt registers for a unit are always consecutive and ordered + * ISR, IMR, IIR, IER. + */ +#define IMR(offset) XE_REG(offset + 0x4) +#define IIR(offset) XE_REG(offset + 0x8) +#define IER(offset) XE_REG(offset + 0xc) + +static void assert_iir_is_zero(struct xe_gt *mmio, struct xe_reg reg) +{ + u32 val = xe_mmio_read32(mmio, reg); + + if (val == 0) + return; + + drm_WARN(>_to_xe(mmio)->drm, 1, + "Interrupt register 0x%x is not zero: 0x%08x\n", + reg.addr, val); + xe_mmio_write32(mmio, reg, 0xffffffff); + xe_mmio_read32(mmio, reg); + xe_mmio_write32(mmio, reg, 0xffffffff); + xe_mmio_read32(mmio, reg); +} + +/* + * Unmask and enable the specified interrupts. Does not check current state, + * so any bits not specified here will become masked and disabled. + */ +static void unmask_and_enable(struct xe_tile *tile, u32 irqregs, u32 bits) +{ + struct xe_gt *mmio = tile->primary_gt; + + /* + * If we're just enabling an interrupt now, it shouldn't already + * be raised in the IIR. + */ + assert_iir_is_zero(mmio, IIR(irqregs)); + + xe_mmio_write32(mmio, IER(irqregs), bits); + xe_mmio_write32(mmio, IMR(irqregs), ~bits); + + /* Posting read */ + xe_mmio_read32(mmio, IMR(irqregs)); +} + +/* Mask and disable all interrupts. */ +static void mask_and_disable(struct xe_tile *tile, u32 irqregs) +{ + struct xe_gt *mmio = tile->primary_gt; + + xe_mmio_write32(mmio, IMR(irqregs), ~0); + /* Posting read */ + xe_mmio_read32(mmio, IMR(irqregs)); + + xe_mmio_write32(mmio, IER(irqregs), 0); + + /* IIR can theoretically queue up two events. Be paranoid. */ + xe_mmio_write32(mmio, IIR(irqregs), ~0); + xe_mmio_read32(mmio, IIR(irqregs)); + xe_mmio_write32(mmio, IIR(irqregs), ~0); + xe_mmio_read32(mmio, IIR(irqregs)); +} + +static u32 xelp_intr_disable(struct xe_device *xe) +{ + struct xe_gt *mmio = xe_root_mmio_gt(xe); + + xe_mmio_write32(mmio, GFX_MSTR_IRQ, 0); + + /* + * Now with master disabled, get a sample of level indications + * for this interrupt. Indications will be cleared on related acks. + * New indications can and will light up during processing, + * and will generate new interrupt after enabling master. + */ + return xe_mmio_read32(mmio, GFX_MSTR_IRQ); +} + +static u32 +gu_misc_irq_ack(struct xe_device *xe, const u32 master_ctl) +{ + struct xe_gt *mmio = xe_root_mmio_gt(xe); + u32 iir; + + if (!(master_ctl & GU_MISC_IRQ)) + return 0; + + iir = xe_mmio_read32(mmio, IIR(GU_MISC_IRQ_OFFSET)); + if (likely(iir)) + xe_mmio_write32(mmio, IIR(GU_MISC_IRQ_OFFSET), iir); + + return iir; +} + +static inline void xelp_intr_enable(struct xe_device *xe, bool stall) +{ + struct xe_gt *mmio = xe_root_mmio_gt(xe); + + xe_mmio_write32(mmio, GFX_MSTR_IRQ, MASTER_IRQ); + if (stall) + xe_mmio_read32(mmio, GFX_MSTR_IRQ); +} + +/* Enable/unmask the HWE interrupts for a specific GT's engines. */ +void xe_irq_enable_hwe(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 ccs_mask, bcs_mask; + u32 irqs, dmask, smask; + u32 gsc_mask = 0; + + if (xe_device_uc_enabled(xe)) { + irqs = GT_RENDER_USER_INTERRUPT | + GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; + } else { + irqs = GT_RENDER_USER_INTERRUPT | + GT_CS_MASTER_ERROR_INTERRUPT | + GT_CONTEXT_SWITCH_INTERRUPT | + GT_WAIT_SEMAPHORE_INTERRUPT; + } + + ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE); + bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY); + + dmask = irqs << 16 | irqs; + smask = irqs << 16; + + if (!xe_gt_is_media_type(gt)) { + /* Enable interrupts for each engine class */ + xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE, dmask); + if (ccs_mask) + xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE, smask); + + /* Unmask interrupts for each engine instance */ + xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK, ~smask); + xe_mmio_write32(gt, BCS_RSVD_INTR_MASK, ~smask); + if (bcs_mask & (BIT(1)|BIT(2))) + xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask); + if (bcs_mask & (BIT(3)|BIT(4))) + xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask); + if (bcs_mask & (BIT(5)|BIT(6))) + xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask); + if (bcs_mask & (BIT(7)|BIT(8))) + xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask); + if (ccs_mask & (BIT(0)|BIT(1))) + xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK, ~dmask); + if (ccs_mask & (BIT(2)|BIT(3))) + xe_mmio_write32(gt, CCS2_CCS3_INTR_MASK, ~dmask); + } + + if (xe_gt_is_media_type(gt) || MEDIA_VER(xe) < 13) { + /* Enable interrupts for each engine class */ + xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE, dmask); + + /* Unmask interrupts for each engine instance */ + xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK, ~dmask); + xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask); + xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask); + + if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) + gsc_mask = irqs; + else if (HAS_HECI_GSCFI(xe)) + gsc_mask = GSC_IRQ_INTF(1); + if (gsc_mask) { + xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, gsc_mask); + xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~gsc_mask); + } + } +} + +static u32 +gt_engine_identity(struct xe_device *xe, + struct xe_gt *mmio, + const unsigned int bank, + const unsigned int bit) +{ + u32 timeout_ts; + u32 ident; + + lockdep_assert_held(&xe->irq.lock); + + xe_mmio_write32(mmio, IIR_REG_SELECTOR(bank), BIT(bit)); + + /* + * NB: Specs do not specify how long to spin wait, + * so we do ~100us as an educated guess. + */ + timeout_ts = (local_clock() >> 10) + 100; + do { + ident = xe_mmio_read32(mmio, INTR_IDENTITY_REG(bank)); + } while (!(ident & INTR_DATA_VALID) && + !time_after32(local_clock() >> 10, timeout_ts)); + + if (unlikely(!(ident & INTR_DATA_VALID))) { + drm_err(&xe->drm, "INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n", + bank, bit, ident); + return 0; + } + + xe_mmio_write32(mmio, INTR_IDENTITY_REG(bank), ident); + + return ident; +} + +#define OTHER_MEDIA_GUC_INSTANCE 16 + +static void +gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir) +{ + if (instance == OTHER_GUC_INSTANCE && !xe_gt_is_media_type(gt)) + return xe_guc_irq_handler(>->uc.guc, iir); + if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt)) + return xe_guc_irq_handler(>->uc.guc, iir); + + if (instance != OTHER_GUC_INSTANCE && + instance != OTHER_MEDIA_GUC_INSTANCE) { + WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n", + instance, iir); + } +} + +static struct xe_gt *pick_engine_gt(struct xe_tile *tile, + enum xe_engine_class class, + unsigned int instance) +{ + struct xe_device *xe = tile_to_xe(tile); + + if (MEDIA_VER(xe) < 13) + return tile->primary_gt; + + if (class == XE_ENGINE_CLASS_VIDEO_DECODE || + class == XE_ENGINE_CLASS_VIDEO_ENHANCE) + return tile->media_gt; + + if (class == XE_ENGINE_CLASS_OTHER && + (instance == OTHER_MEDIA_GUC_INSTANCE || instance == OTHER_GSC_INSTANCE)) + return tile->media_gt; + + return tile->primary_gt; +} + +static void gt_irq_handler(struct xe_tile *tile, + u32 master_ctl, unsigned long *intr_dw, + u32 *identity) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_gt *mmio = tile->primary_gt; + unsigned int bank, bit; + u16 instance, intr_vec; + enum xe_engine_class class; + struct xe_hw_engine *hwe; + + spin_lock(&xe->irq.lock); + + for (bank = 0; bank < 2; bank++) { + if (!(master_ctl & GT_DW_IRQ(bank))) + continue; + + intr_dw[bank] = xe_mmio_read32(mmio, GT_INTR_DW(bank)); + for_each_set_bit(bit, intr_dw + bank, 32) + identity[bit] = gt_engine_identity(xe, mmio, bank, bit); + xe_mmio_write32(mmio, GT_INTR_DW(bank), intr_dw[bank]); + + for_each_set_bit(bit, intr_dw + bank, 32) { + struct xe_gt *engine_gt; + + class = INTR_ENGINE_CLASS(identity[bit]); + instance = INTR_ENGINE_INSTANCE(identity[bit]); + intr_vec = INTR_ENGINE_INTR(identity[bit]); + + engine_gt = pick_engine_gt(tile, class, instance); + + hwe = xe_gt_hw_engine(engine_gt, class, instance, false); + if (hwe) { + xe_hw_engine_handle_irq(hwe, intr_vec); + continue; + } + + if (class == XE_ENGINE_CLASS_OTHER) { + /* HECI GSCFI interrupts come from outside of GT */ + if (HAS_HECI_GSCFI(xe) && instance == OTHER_GSC_INSTANCE) + xe_heci_gsc_irq_handler(xe, intr_vec); + else + gt_other_irq_handler(engine_gt, instance, intr_vec); + continue; + } + } + } + + spin_unlock(&xe->irq.lock); +} + +/* + * Top-level interrupt handler for Xe_LP platforms (which did not have + * a "master tile" interrupt register. + */ +static irqreturn_t xelp_irq_handler(int irq, void *arg) +{ + struct xe_device *xe = arg; + struct xe_tile *tile = xe_device_get_root_tile(xe); + u32 master_ctl, gu_misc_iir; + unsigned long intr_dw[2]; + u32 identity[32]; + + spin_lock(&xe->irq.lock); + if (!xe->irq.enabled) { + spin_unlock(&xe->irq.lock); + return IRQ_NONE; + } + spin_unlock(&xe->irq.lock); + + master_ctl = xelp_intr_disable(xe); + if (!master_ctl) { + xelp_intr_enable(xe, false); + return IRQ_NONE; + } + + gt_irq_handler(tile, master_ctl, intr_dw, identity); + + xe_display_irq_handler(xe, master_ctl); + + gu_misc_iir = gu_misc_irq_ack(xe, master_ctl); + + xelp_intr_enable(xe, false); + + xe_display_irq_enable(xe, gu_misc_iir); + + return IRQ_HANDLED; +} + +static u32 dg1_intr_disable(struct xe_device *xe) +{ + struct xe_gt *mmio = xe_root_mmio_gt(xe); + u32 val; + + /* First disable interrupts */ + xe_mmio_write32(mmio, DG1_MSTR_TILE_INTR, 0); + + /* Get the indication levels and ack the master unit */ + val = xe_mmio_read32(mmio, DG1_MSTR_TILE_INTR); + if (unlikely(!val)) + return 0; + + xe_mmio_write32(mmio, DG1_MSTR_TILE_INTR, val); + + return val; +} + +static void dg1_intr_enable(struct xe_device *xe, bool stall) +{ + struct xe_gt *mmio = xe_root_mmio_gt(xe); + + xe_mmio_write32(mmio, DG1_MSTR_TILE_INTR, DG1_MSTR_IRQ); + if (stall) + xe_mmio_read32(mmio, DG1_MSTR_TILE_INTR); +} + +/* + * Top-level interrupt handler for Xe_LP+ and beyond. These platforms have + * a "master tile" interrupt register which must be consulted before the + * "graphics master" interrupt register. + */ +static irqreturn_t dg1_irq_handler(int irq, void *arg) +{ + struct xe_device *xe = arg; + struct xe_tile *tile; + u32 master_tile_ctl, master_ctl = 0, gu_misc_iir = 0; + unsigned long intr_dw[2]; + u32 identity[32]; + u8 id; + + /* TODO: This really shouldn't be copied+pasted */ + + spin_lock(&xe->irq.lock); + if (!xe->irq.enabled) { + spin_unlock(&xe->irq.lock); + return IRQ_NONE; + } + spin_unlock(&xe->irq.lock); + + master_tile_ctl = dg1_intr_disable(xe); + if (!master_tile_ctl) { + dg1_intr_enable(xe, false); + return IRQ_NONE; + } + + for_each_tile(tile, xe, id) { + struct xe_gt *mmio = tile->primary_gt; + + if ((master_tile_ctl & DG1_MSTR_TILE(tile->id)) == 0) + continue; + + master_ctl = xe_mmio_read32(mmio, GFX_MSTR_IRQ); + + /* + * We might be in irq handler just when PCIe DPC is initiated + * and all MMIO reads will be returned with all 1's. Ignore this + * irq as device is inaccessible. + */ + if (master_ctl == REG_GENMASK(31, 0)) { + dev_dbg(tile_to_xe(tile)->drm.dev, + "Ignore this IRQ as device might be in DPC containment.\n"); + return IRQ_HANDLED; + } + + xe_mmio_write32(mmio, GFX_MSTR_IRQ, master_ctl); + + gt_irq_handler(tile, master_ctl, intr_dw, identity); + + /* + * Display interrupts (including display backlight operations + * that get reported as Gunit GSE) would only be hooked up to + * the primary tile. + */ + if (id == 0) { + xe_display_irq_handler(xe, master_ctl); + gu_misc_iir = gu_misc_irq_ack(xe, master_ctl); + } + } + + dg1_intr_enable(xe, false); + xe_display_irq_enable(xe, gu_misc_iir); + + return IRQ_HANDLED; +} + +static void gt_irq_reset(struct xe_tile *tile) +{ + struct xe_gt *mmio = tile->primary_gt; + + u32 ccs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, + XE_ENGINE_CLASS_COMPUTE); + u32 bcs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, + XE_ENGINE_CLASS_COPY); + + /* Disable RCS, BCS, VCS and VECS class engines. */ + xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, 0); + xe_mmio_write32(mmio, VCS_VECS_INTR_ENABLE, 0); + if (ccs_mask) + xe_mmio_write32(mmio, CCS_RSVD_INTR_ENABLE, 0); + + /* Restore masks irqs on RCS, BCS, VCS and VECS engines. */ + xe_mmio_write32(mmio, RCS0_RSVD_INTR_MASK, ~0); + xe_mmio_write32(mmio, BCS_RSVD_INTR_MASK, ~0); + if (bcs_mask & (BIT(1)|BIT(2))) + xe_mmio_write32(mmio, XEHPC_BCS1_BCS2_INTR_MASK, ~0); + if (bcs_mask & (BIT(3)|BIT(4))) + xe_mmio_write32(mmio, XEHPC_BCS3_BCS4_INTR_MASK, ~0); + if (bcs_mask & (BIT(5)|BIT(6))) + xe_mmio_write32(mmio, XEHPC_BCS5_BCS6_INTR_MASK, ~0); + if (bcs_mask & (BIT(7)|BIT(8))) + xe_mmio_write32(mmio, XEHPC_BCS7_BCS8_INTR_MASK, ~0); + xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK, ~0); + xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK, ~0); + xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK, ~0); + if (ccs_mask & (BIT(0)|BIT(1))) + xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~0); + if (ccs_mask & (BIT(2)|BIT(3))) + xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~0); + + if ((tile->media_gt && + xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) || + HAS_HECI_GSCFI(tile_to_xe(tile))) { + xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, 0); + xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~0); + } + + xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_ENABLE, 0); + xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_MASK, ~0); + xe_mmio_write32(mmio, GUC_SG_INTR_ENABLE, 0); + xe_mmio_write32(mmio, GUC_SG_INTR_MASK, ~0); +} + +static void xelp_irq_reset(struct xe_tile *tile) +{ + xelp_intr_disable(tile_to_xe(tile)); + + gt_irq_reset(tile); + + mask_and_disable(tile, PCU_IRQ_OFFSET); +} + +static void dg1_irq_reset(struct xe_tile *tile) +{ + if (tile->id == 0) + dg1_intr_disable(tile_to_xe(tile)); + + gt_irq_reset(tile); + + mask_and_disable(tile, PCU_IRQ_OFFSET); +} + +static void dg1_irq_reset_mstr(struct xe_tile *tile) +{ + struct xe_gt *mmio = tile->primary_gt; + + xe_mmio_write32(mmio, GFX_MSTR_IRQ, ~0); +} + +static void xe_irq_reset(struct xe_device *xe) +{ + struct xe_tile *tile; + u8 id; + + for_each_tile(tile, xe, id) { + if (GRAPHICS_VERx100(xe) >= 1210) + dg1_irq_reset(tile); + else + xelp_irq_reset(tile); + } + + tile = xe_device_get_root_tile(xe); + mask_and_disable(tile, GU_MISC_IRQ_OFFSET); + xe_display_irq_reset(xe); + + /* + * The tile's top-level status register should be the last one + * to be reset to avoid possible bit re-latching from lower + * level interrupts. + */ + if (GRAPHICS_VERx100(xe) >= 1210) { + for_each_tile(tile, xe, id) + dg1_irq_reset_mstr(tile); + } +} + +static void xe_irq_postinstall(struct xe_device *xe) +{ + xe_display_irq_postinstall(xe, xe_root_mmio_gt(xe)); + + /* + * ASLE backlight operations are reported via GUnit GSE interrupts + * on the root tile. + */ + unmask_and_enable(xe_device_get_root_tile(xe), + GU_MISC_IRQ_OFFSET, GU_MISC_GSE); + + /* Enable top-level interrupts */ + if (GRAPHICS_VERx100(xe) >= 1210) + dg1_intr_enable(xe, true); + else + xelp_intr_enable(xe, true); +} + +static irq_handler_t xe_irq_handler(struct xe_device *xe) +{ + if (GRAPHICS_VERx100(xe) >= 1210) + return dg1_irq_handler; + else + return xelp_irq_handler; +} + +static void irq_uninstall(struct drm_device *drm, void *arg) +{ + struct xe_device *xe = arg; + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int irq; + + if (!xe->irq.enabled) + return; + + xe->irq.enabled = false; + xe_irq_reset(xe); + + irq = pci_irq_vector(pdev, 0); + free_irq(irq, xe); +} + +int xe_irq_install(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + irq_handler_t irq_handler; + int err, irq; + + irq_handler = xe_irq_handler(xe); + if (!irq_handler) { + drm_err(&xe->drm, "No supported interrupt handler"); + return -EINVAL; + } + + xe_irq_reset(xe); + + err = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (err < 0) { + drm_err(&xe->drm, "MSI/MSIX: Failed to enable support %d\n", err); + return err; + } + + irq = pci_irq_vector(pdev, 0); + err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe); + if (err < 0) { + drm_err(&xe->drm, "Failed to request MSI/MSIX IRQ %d\n", err); + return err; + } + + xe->irq.enabled = true; + + xe_irq_postinstall(xe); + + err = drmm_add_action_or_reset(&xe->drm, irq_uninstall, xe); + if (err) + goto free_irq_handler; + + return 0; + +free_irq_handler: + free_irq(irq, xe); + + return err; +} + +void xe_irq_shutdown(struct xe_device *xe) +{ + irq_uninstall(&xe->drm, xe); +} + +void xe_irq_suspend(struct xe_device *xe) +{ + int irq = to_pci_dev(xe->drm.dev)->irq; + + spin_lock_irq(&xe->irq.lock); + xe->irq.enabled = false; /* no new irqs */ + spin_unlock_irq(&xe->irq.lock); + + synchronize_irq(irq); /* flush irqs */ + xe_irq_reset(xe); /* turn irqs off */ +} + +void xe_irq_resume(struct xe_device *xe) +{ + struct xe_gt *gt; + int id; + + /* + * lock not needed: + * 1. no irq will arrive before the postinstall + * 2. display is not yet resumed + */ + xe->irq.enabled = true; + xe_irq_reset(xe); + xe_irq_postinstall(xe); /* turn irqs on */ + + for_each_gt(gt, xe, id) + xe_irq_enable_hwe(gt); +} diff --git a/drivers/gpu/drm/xe/xe_irq.h b/drivers/gpu/drm/xe/xe_irq.h new file mode 100644 index 000000000000..bc42bc90d967 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_irq.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_IRQ_H_ +#define _XE_IRQ_H_ + +struct xe_device; +struct xe_tile; +struct xe_gt; + +int xe_irq_install(struct xe_device *xe); +void xe_irq_shutdown(struct xe_device *xe); +void xe_irq_suspend(struct xe_device *xe); +void xe_irq_resume(struct xe_device *xe); +void xe_irq_enable_hwe(struct xe_gt *gt); + +#endif diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c new file mode 100644 index 000000000000..0d7c5514e092 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -0,0 +1,506 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <linux/align.h> + +#include <drm/drm_managed.h> + +#include "regs/xe_sriov_regs.h" + +#include "xe_assert.h" +#include "xe_bo.h" +#include "xe_lmtt.h" +#include "xe_map.h" +#include "xe_mmio.h" +#include "xe_res_cursor.h" +#include "xe_sriov.h" +#include "xe_sriov_printk.h" + +/** + * DOC: Local Memory Translation Table + * + * The Local Memory Translation Table (LMTT) provides additional abstraction + * when Virtual Function (VF) is accessing device Local Memory (VRAM). + * + * The Root LMTT Page Directory contains one entry for each VF. Entries are + * indexed by the function number (1-based, index 0 is unused). + * + * See `Two-Level LMTT Structure`_ and `Multi-Level LMTT Structure`_. + */ + +#define lmtt_assert(lmtt, condition) xe_tile_assert(lmtt_to_tile(lmtt), condition) +#define lmtt_debug(lmtt, msg...) xe_sriov_dbg_verbose(lmtt_to_xe(lmtt), "LMTT: " msg) + +static bool xe_has_multi_level_lmtt(struct xe_device *xe) +{ + return xe->info.platform == XE_PVC; +} + +static struct xe_tile *lmtt_to_tile(struct xe_lmtt *lmtt) +{ + return container_of(lmtt, struct xe_tile, sriov.pf.lmtt); +} + +static struct xe_device *lmtt_to_xe(struct xe_lmtt *lmtt) +{ + return tile_to_xe(lmtt_to_tile(lmtt)); +} + +static u64 lmtt_page_size(struct xe_lmtt *lmtt) +{ + return BIT_ULL(lmtt->ops->lmtt_pte_shift(0)); +} + +static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level) +{ + unsigned int num_entries = level ? lmtt->ops->lmtt_pte_num(level) : 0; + struct xe_lmtt_pt *pt; + struct xe_bo *bo; + int err; + + pt = kzalloc(struct_size(pt, entries, num_entries), GFP_KERNEL); + if (!pt) { + err = -ENOMEM; + goto out; + } + + bo = xe_bo_create_pin_map(lmtt_to_xe(lmtt), lmtt_to_tile(lmtt), NULL, + PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) * + lmtt->ops->lmtt_pte_num(level)), + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + goto out_free_pt; + } + + lmtt_assert(lmtt, xe_bo_is_vram(bo)); + + pt->level = level; + pt->bo = bo; + return pt; + +out_free_pt: + kfree(pt); +out: + return ERR_PTR(err); +} + +static void lmtt_pt_free(struct xe_lmtt_pt *pt) +{ + xe_bo_unpin_map_no_vm(pt->bo); + kfree(pt); +} + +static int lmtt_init_pd(struct xe_lmtt *lmtt) +{ + struct xe_lmtt_pt *pd; + + lmtt_assert(lmtt, !lmtt->pd); + lmtt_assert(lmtt, lmtt->ops->lmtt_root_pd_level()); + + pd = lmtt_pt_alloc(lmtt, lmtt->ops->lmtt_root_pd_level()); + if (IS_ERR(pd)) + return PTR_ERR(pd); + + lmtt->pd = pd; + return 0; +} + +static void lmtt_fini_pd(struct xe_lmtt *lmtt) +{ + struct xe_lmtt_pt *pd = lmtt->pd; + unsigned int num_entries = lmtt->ops->lmtt_pte_num(pd->level); + unsigned int n = 0; + + /* make sure we don't leak */ + for (n = 0; n < num_entries; n++) + lmtt_assert(lmtt, !pd->entries[n]); + + lmtt->pd = NULL; + lmtt_pt_free(pd); +} + +static void fini_lmtt(struct drm_device *drm, void *arg) +{ + struct xe_lmtt *lmtt = arg; + + lmtt_assert(lmtt, !(!!lmtt->ops ^ !!lmtt->pd)); + + if (!lmtt->pd) + return; + + lmtt_fini_pd(lmtt); + lmtt->ops = NULL; +} + +/** + * xe_lmtt_init - LMTT software initialization. + * @lmtt: the &xe_lmtt to initialize + * + * The LMTT initialization requires two steps. + * + * The xe_lmtt_init() checks if LMTT is required on current device and selects + * and initialize proper variant of the LMTT Root Directory. Currently supported + * variants are `Two-Level LMTT Structure`_ and `Multi-Level LMTT Structure`_. + * + * In next step xe_lmtt_init_hw() will register this directory on the hardware. + * + * Notes: + * The LMTT allocations are managed and will be implicitly released on driver unload. + * This function shall be called only once and only when running as a PF driver. + * Any LMTT initialization failure should block VFs enabling. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_lmtt_init(struct xe_lmtt *lmtt) +{ + struct xe_device *xe = lmtt_to_xe(lmtt); + int err; + + lmtt_assert(lmtt, IS_SRIOV_PF(xe)); + lmtt_assert(lmtt, !lmtt->ops); + + if (!IS_DGFX(xe)) + return 0; + + if (xe_has_multi_level_lmtt(xe)) + lmtt->ops = &lmtt_ml_ops; + else + lmtt->ops = &lmtt_2l_ops; + + err = lmtt_init_pd(lmtt); + if (unlikely(err)) + goto fail; + + return drmm_add_action_or_reset(&xe->drm, fini_lmtt, lmtt); + +fail: + lmtt->ops = NULL; + return err; +} + +static void lmtt_setup_dir_ptr(struct xe_lmtt *lmtt) +{ + struct xe_tile *tile = lmtt_to_tile(lmtt); + struct xe_device *xe = tile_to_xe(tile); + dma_addr_t offset = xe_bo_main_addr(lmtt->pd->bo, XE_PAGE_SIZE); + + lmtt_debug(lmtt, "DIR offset %pad\n", &offset); + lmtt_assert(lmtt, xe_bo_is_vram(lmtt->pd->bo)); + lmtt_assert(lmtt, IS_ALIGNED(offset, SZ_64K)); + + xe_mmio_write32(tile->primary_gt, + GRAPHICS_VER(xe) >= 20 ? XE2_LMEM_CFG : LMEM_CFG, + LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K)); +} + +/** + * xe_lmtt_init_hw - Perform LMTT hardware initialization. + * @lmtt: the &xe_lmtt to initialize + * + * This function is a second step of the LMTT initialization. + * This function registers LMTT Root Directory prepared in xe_lmtt_init(). + * + * This function shall be called after every hardware reset. + * This function shall be called only when running as a PF driver. + */ +void xe_lmtt_init_hw(struct xe_lmtt *lmtt) +{ + if (!lmtt->pd) + return; + + lmtt_setup_dir_ptr(lmtt); +} + +static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, + u64 pte, unsigned int idx) +{ + unsigned int level = pt->level; + + lmtt_assert(lmtt, idx <= lmtt->ops->lmtt_pte_num(level)); + lmtt_debug(lmtt, "WRITE level=%u index=%u pte=%#llx\n", level, idx, pte); + + switch (lmtt->ops->lmtt_pte_size(level)) { + case sizeof(u32): + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte); + break; + case sizeof(u64): + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte); + break; + default: + lmtt_assert(lmtt, !!!"invalid pte size"); + } +} + +static void lmtt_destroy_pt(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pd) +{ + unsigned int num_entries = pd->level ? lmtt->ops->lmtt_pte_num(pd->level) : 0; + struct xe_lmtt_pt *pt; + unsigned int i; + + for (i = 0; i < num_entries; i++) { + pt = pd->entries[i]; + pd->entries[i] = NULL; + if (!pt) + continue; + + lmtt_destroy_pt(lmtt, pt); + } + + lmtt_pt_free(pd); +} + +static void lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid) +{ + struct xe_lmtt_pt *pd = lmtt->pd; + struct xe_lmtt_pt *pt; + + pt = pd->entries[vfid]; + pd->entries[vfid] = NULL; + if (!pt) + return; + + lmtt_write_pte(lmtt, pd, LMTT_PTE_INVALID, vfid); + + lmtt_assert(lmtt, pd->level > 0); + lmtt_assert(lmtt, pt->level == pd->level - 1); + lmtt_destroy_pt(lmtt, pt); +} + +static int __lmtt_alloc_range(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pd, + u64 start, u64 end) +{ + u64 pte_addr_shift = BIT_ULL(lmtt->ops->lmtt_pte_shift(pd->level)); + u64 offset; + int err; + + lmtt_assert(lmtt, pd->level > 0); + + offset = start; + while (offset < end) { + struct xe_lmtt_pt *pt; + u64 next, pde, pt_addr; + unsigned int idx; + + pt = lmtt_pt_alloc(lmtt, pd->level - 1); + if (IS_ERR(pt)) + return PTR_ERR(pt); + + pt_addr = xe_bo_main_addr(pt->bo, XE_PAGE_SIZE); + + idx = lmtt->ops->lmtt_pte_index(offset, pd->level); + pde = lmtt->ops->lmtt_pte_encode(pt_addr, pd->level); + + lmtt_write_pte(lmtt, pd, pde, idx); + + pd->entries[idx] = pt; + + next = min(end, round_up(offset + 1, pte_addr_shift)); + + if (pt->level != 0) { + err = __lmtt_alloc_range(lmtt, pt, offset, next); + if (err) + return err; + } + + offset = next; + } + + return 0; +} + +static int lmtt_alloc_range(struct xe_lmtt *lmtt, unsigned int vfid, u64 start, u64 end) +{ + struct xe_lmtt_pt *pd = lmtt->pd; + struct xe_lmtt_pt *pt; + u64 pt_addr; + u64 pde; + int err; + + lmtt_assert(lmtt, pd->level > 0); + lmtt_assert(lmtt, vfid <= lmtt->ops->lmtt_pte_num(pd->level)); + lmtt_assert(lmtt, IS_ALIGNED(start, lmtt_page_size(lmtt))); + lmtt_assert(lmtt, IS_ALIGNED(end, lmtt_page_size(lmtt))); + + if (pd->entries[vfid]) + return -ENOTEMPTY; + + pt = lmtt_pt_alloc(lmtt, pd->level - 1); + if (IS_ERR(pt)) + return PTR_ERR(pt); + + pt_addr = xe_bo_main_addr(pt->bo, XE_PAGE_SIZE); + + pde = lmtt->ops->lmtt_pte_encode(pt_addr, pd->level); + + lmtt_write_pte(lmtt, pd, pde, vfid); + + pd->entries[vfid] = pt; + + if (pt->level != 0) { + err = __lmtt_alloc_range(lmtt, pt, start, end); + if (err) + goto out_free_pt; + } + + return 0; + +out_free_pt: + lmtt_pt_free(pt); + return err; +} + +static struct xe_lmtt_pt *lmtt_leaf_pt(struct xe_lmtt *lmtt, unsigned int vfid, u64 addr) +{ + struct xe_lmtt_pt *pd = lmtt->pd; + struct xe_lmtt_pt *pt; + + lmtt_assert(lmtt, vfid <= lmtt->ops->lmtt_pte_num(pd->level)); + pt = pd->entries[vfid]; + + while (pt->level) { + lmtt_assert(lmtt, lmtt->ops->lmtt_pte_index(addr, pt->level) <= + lmtt->ops->lmtt_pte_num(pt->level)); + + pt = pt->entries[lmtt->ops->lmtt_pte_index(addr, pt->level)]; + + addr >>= lmtt->ops->lmtt_pte_shift(pt->level); + } + + lmtt_assert(lmtt, lmtt->ops->lmtt_pte_index(addr, pt->level) <= + lmtt->ops->lmtt_pte_num(pt->level)); + lmtt_assert(lmtt, pt->level != pd->level); + lmtt_assert(lmtt, pt->level == 0); + return pt; +} + +static void lmtt_insert_bo(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 start) +{ + u64 page_size = lmtt_page_size(lmtt); + struct xe_res_cursor cur; + struct xe_lmtt_pt *pt; + u64 addr, vram_offset; + + lmtt_assert(lmtt, IS_ALIGNED(start, page_size)); + lmtt_assert(lmtt, IS_ALIGNED(bo->size, page_size)); + lmtt_assert(lmtt, xe_bo_is_vram(bo)); + + vram_offset = vram_region_gpu_offset(bo->ttm.resource); + xe_res_first(bo->ttm.resource, 0, bo->size, &cur); + while (cur.remaining) { + addr = xe_res_dma(&cur); + addr += vram_offset; /* XXX */ + + pt = lmtt_leaf_pt(lmtt, vfid, start); + + lmtt_write_pte(lmtt, pt, lmtt->ops->lmtt_pte_encode(addr, 0), + lmtt->ops->lmtt_pte_index(start, 0)); + + xe_res_next(&cur, page_size); + start += page_size; + } +} + +/** + * xe_lmtt_prepare_pages - Create VF's LMTT Page Tables. + * @lmtt: the &xe_lmtt to update + * @vfid: the VF identifier (1-based) + * @range: top range of LMEM offset to be supported + * + * This function creates empty LMTT page tables for given VF to support + * up to maximum #range LMEM offset. The LMTT page tables created by this + * function must be released using xe_lmtt_drop_pages() function. + * + * Notes: + * This function shall be called only after successful LMTT initialization. + * See xe_lmtt_init(). + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range) +{ + lmtt_assert(lmtt, lmtt->pd); + lmtt_assert(lmtt, vfid); + + return lmtt_alloc_range(lmtt, vfid, 0, range); +} + +/** + * xe_lmtt_populate_pages - Update VF's LMTT Page Table Entries. + * @lmtt: the &xe_lmtt to update + * @vfid: the VF identifier (1-based) + * @bo: the buffer object with LMEM allocation to be mapped + * @offset: the offset at which #bo should be mapped + * + * This function updates VF's LMTT entries to use given buffer object as a backstore. + * + * Notes: + * This function shall be called only after successful preparation of the + * VF's LMTT Page Tables. See xe_lmtt_prepare(). + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset) +{ + lmtt_assert(lmtt, lmtt->pd); + lmtt_assert(lmtt, vfid); + + lmtt_insert_bo(lmtt, vfid, bo, offset); + return 0; +} + +/** + * xe_lmtt_drop_pages - Remove VF's LMTT Pages. + * @lmtt: the &xe_lmtt to update + * @vfid: the VF identifier (1-based) + * + * This function removes all LMTT Page Tables prepared by xe_lmtt_prepare_pages(). + * + * This function shall be called only after successful LMTT initialization. + * See xe_lmtt_init(). + */ +void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid) +{ + lmtt_assert(lmtt, lmtt->pd); + lmtt_assert(lmtt, vfid); + + lmtt_drop_pages(lmtt, vfid); +} + +/** + * xe_lmtt_estimate_pt_size - Estimate size of LMTT PT allocations. + * @lmtt: the &xe_lmtt + * @size: the size of the LMEM to be mapped over LMTT (including any offset) + * + * This function shall be called only by PF. + * + * Return: size of the PT allocation(s) needed to support given LMEM size. + */ +u64 xe_lmtt_estimate_pt_size(struct xe_lmtt *lmtt, u64 size) +{ + unsigned int level = 0; + u64 pt_size; + + lmtt_assert(lmtt, IS_SRIOV_PF(lmtt_to_xe(lmtt))); + lmtt_assert(lmtt, IS_DGFX(lmtt_to_xe(lmtt))); + lmtt_assert(lmtt, lmtt->ops); + + pt_size = PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) * + lmtt->ops->lmtt_pte_num(level)); + + while (++level < lmtt->ops->lmtt_root_pd_level()) { + pt_size *= lmtt->ops->lmtt_pte_index(size, level) + 1; + pt_size += PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) * + lmtt->ops->lmtt_pte_num(level)); + } + + return pt_size; +} + +#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_lmtt_test.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_lmtt.h b/drivers/gpu/drm/xe/xe_lmtt.h new file mode 100644 index 000000000000..cb10ef994db6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_lmtt.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_LMTT_H_ +#define _XE_LMTT_H_ + +#include <linux/types.h> + +struct xe_bo; +struct xe_lmtt; +struct xe_lmtt_ops; + +#ifdef CONFIG_PCI_IOV +int xe_lmtt_init(struct xe_lmtt *lmtt); +void xe_lmtt_init_hw(struct xe_lmtt *lmtt); +int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range); +int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset); +void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid); +u64 xe_lmtt_estimate_pt_size(struct xe_lmtt *lmtt, u64 size); +#else +static inline int xe_lmtt_init(struct xe_lmtt *lmtt) { return 0; } +static inline void xe_lmtt_init_hw(struct xe_lmtt *lmtt) { } +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_lmtt_2l.c b/drivers/gpu/drm/xe/xe_lmtt_2l.c new file mode 100644 index 000000000000..84bc5c4212b5 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_lmtt_2l.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <linux/align.h> +#include <linux/bitfield.h> +#include <linux/log2.h> +#include <linux/sizes.h> + +#include "xe_lmtt_types.h" +#include "xe_macros.h" + +/** + * DOC: Two-Level LMTT Structure + * + * LMHAW (Local Memory Host Address Width) is 37 bit (128GB) + * + * LMGAW (Local Memory Guest Address Width) is 37 bit (128GB) + * + * The following figure illustrates the structure and function of the 2L LMTT:: + * + * LMTT Directory + * (1 Entry per VF) + * +-----------+ LMTT (per VF) + * | | +-----------+ + * | | | | + * | | index: | | + * | | LMEM VF +===========+ + * | | offset --> | PTE | ==> LMEM PF offset + * | | +===========+ + * index: +===========+ | | + * VFID --> | PDE | -----------------> +-----------+ + * +===========+ / \. + * | | / \. + * | | / \. + * | | / \. + * +-----------+ <== [LMTT Directory Ptr] \. + * / \ / \. + * / \ +-----------+-----------------+------+---+ + * / \ | 31:HAW-16 | HAW-17:5 | 4:1 | 0 | + * / \ +===========+=================+======+===+ + * / \ | Reserved | LMEM Page (2MB) | Rsvd | V | + * / \ +-----------+-----------------+------+---+ + * / \. + * +-----------+-----------------+------+---+ + * | 31:HAW-12 | HAW-13:4 | 3:1 | 0 | + * +===========+=================+======+===+ + * | Reserved | LMTT Ptr (64KB) | Rsvd | V | + * +-----------+-----------------+------+---+ + * + */ + +typedef u32 lmtt_2l_pde_t; +typedef u32 lmtt_2l_pte_t; + +#if IS_ENABLED(CONFIG_DRM_XE_LMTT_2L_128GB) +#define LMTT_2L_HAW 37 /* 128 GiB */ +#else +#define LMTT_2L_HAW 35 /* 32 GiB */ +#endif + +#define LMTT_2L_PDE_MAX_NUM 64 /* SRIOV with PF and 63 VFs, index 0 (PF) is unused */ +#define LMTT_2L_PDE_LMTT_PTR GENMASK(LMTT_2L_HAW - 13, 4) +#define LMTT_2L_PDE_VALID BIT(0) + +#define LMTT_2L_PTE_MAX_NUM BIT(LMTT_2L_HAW - ilog2(SZ_2M)) +#define LMTT_2L_PTE_LMEM_PAGE GENMASK(LMTT_2L_HAW - 17, 5) +#define LMTT_2L_PTE_VALID BIT(0) + +static unsigned int lmtt_2l_root_pd_level(void) +{ + return 1; /* implementation is 0-based */ +} + +static unsigned int lmtt_2l_pte_num(unsigned int level) +{ + switch (level) { + case 1: + return LMTT_2L_PDE_MAX_NUM; + case 0: + BUILD_BUG_ON(LMTT_2L_HAW == 37 && LMTT_2L_PTE_MAX_NUM != SZ_64K); + BUILD_BUG_ON(LMTT_2L_HAW == 35 && LMTT_2L_PTE_MAX_NUM != SZ_16K); + return LMTT_2L_PTE_MAX_NUM; + default: + return 0; + } +} + +static unsigned int lmtt_2l_pte_size(unsigned int level) +{ + switch (level) { + case 1: + return sizeof(lmtt_2l_pde_t); + case 0: + return sizeof(lmtt_2l_pte_t); + default: + return 0; + } +} + +static unsigned int lmtt_2l_pte_shift(unsigned int level) +{ + switch (level) { + case 0: + return ilog2(SZ_2M); + default: + return 0; + } +} + +static unsigned int lmtt_2l_pte_index(u64 addr, unsigned int level) +{ + addr >>= lmtt_2l_pte_shift(level); + + switch (level) { + case 0: + /* SZ_2M increments */ + BUILD_BUG_ON_NOT_POWER_OF_2(LMTT_2L_PTE_MAX_NUM); + return addr & (LMTT_2L_PTE_MAX_NUM - 1); + default: + return 0; + } +} + +static u64 lmtt_2l_pte_encode(unsigned long offset, unsigned int level) +{ + switch (level) { + case 0: + XE_WARN_ON(!IS_ALIGNED(offset, SZ_2M)); + XE_WARN_ON(!FIELD_FIT(LMTT_2L_PTE_LMEM_PAGE, offset / SZ_2M)); + return FIELD_PREP(LMTT_2L_PTE_LMEM_PAGE, offset / SZ_2M) | LMTT_2L_PTE_VALID; + case 1: + XE_WARN_ON(!IS_ALIGNED(offset, SZ_64K)); + XE_WARN_ON(!FIELD_FIT(LMTT_2L_PDE_LMTT_PTR, offset / SZ_64K)); + return FIELD_PREP(LMTT_2L_PDE_LMTT_PTR, offset / SZ_64K) | LMTT_2L_PDE_VALID; + default: + XE_WARN_ON(true); + return 0; + } +} + +const struct xe_lmtt_ops lmtt_2l_ops = { + .lmtt_root_pd_level = lmtt_2l_root_pd_level, + .lmtt_pte_num = lmtt_2l_pte_num, + .lmtt_pte_size = lmtt_2l_pte_size, + .lmtt_pte_shift = lmtt_2l_pte_shift, + .lmtt_pte_index = lmtt_2l_pte_index, + .lmtt_pte_encode = lmtt_2l_pte_encode, +}; diff --git a/drivers/gpu/drm/xe/xe_lmtt_ml.c b/drivers/gpu/drm/xe/xe_lmtt_ml.c new file mode 100644 index 000000000000..b21215a2edd6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_lmtt_ml.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <linux/align.h> +#include <linux/bitfield.h> +#include <linux/log2.h> +#include <linux/sizes.h> + +#include "xe_lmtt_types.h" +#include "xe_macros.h" + +/** + * DOC: Multi-Level LMTT Structure + * + * LMHAW (Local Memory Host Address Width) is 48 bit (256TB) + * + * LMGAW (Local Memory Guest Address Width) is 48 bit (256TB) + * + * The following figure illustrates the structure and function of the ML LMTT:: + * + * LMTT L3 Directory + * (1 Entry per VF) LMTT L1 Leaf + * +-----------+ +-----------+ + * | | LMTT L2 (per VF) | | + * | | +-----------+ | | + * | | | | index: +===========+ + * | | | | GDPA --> | PTE | => LMEM PF offset + * | | | | 34:21 +===========+ + * | | index: | | | | + * | | LMEM VF +===========+ | | + * | | offset -> | PTE | ----------> +-----------+ + * | | GAW-1:35 +===========+ / \. + * index: +===========+ | | / \. + * VFID --> | PDE | ---------> +-----------+ / \. + * +===========+ / / / \. + * | | / / / \. + * +-----------+ <== [LMTT Directory Ptr] / \. + * / \ / / / \. + * / \ / / +-----------+-----------------+------+---+ + * / /\ / | 31:HAW-16 | HAW-17:5 | 4:1 | 0 | + * / / \ / +===========+=================+======+===+ + * / / \ / | Reserved | LMEM Page (2MB) | Rsvd | V | + * / / +-----------+-----------------+------+---+ + * / / + * +-----------+-----------------+------+---+ + * | 63:HAW-12 | HAW-13:4 | 3:1 | 0 | + * +===========+=================+======+===+ + * | Reserved | LMTT Ptr (64KB) | Rsvd | V | + * +-----------+-----------------+------+---+ + * + */ + +typedef u64 lmtt_ml_pde_t; +typedef u32 lmtt_ml_pte_t; + +#define LMTT_ML_HAW 48 /* 256 TiB */ + +#define LMTT_ML_PDE_MAX_NUM 64 /* SRIOV with PF and 63 VFs, index 0 (PF) is unused */ +#define LMTT_ML_PDE_LMTT_PTR GENMASK_ULL(LMTT_ML_HAW - 13, 4) +#define LMTT_ML_PDE_VALID BIT(0) + +#define LMTT_ML_PDE_L2_SHIFT 35 +#define LMTT_ML_PDE_L2_MAX_NUM BIT_ULL(LMTT_ML_HAW - 35) + +#define LMTT_ML_PTE_MAX_NUM BIT(35 - ilog2(SZ_2M)) +#define LMTT_ML_PTE_LMEM_PAGE GENMASK(LMTT_ML_HAW - 17, 5) +#define LMTT_ML_PTE_VALID BIT(0) + +static unsigned int lmtt_ml_root_pd_level(void) +{ + return 2; /* implementation is 0-based */ +} + +static unsigned int lmtt_ml_pte_num(unsigned int level) +{ + switch (level) { + case 2: + return LMTT_ML_PDE_MAX_NUM; + case 1: + BUILD_BUG_ON(LMTT_ML_HAW == 48 && LMTT_ML_PDE_L2_MAX_NUM != SZ_8K); + return LMTT_ML_PDE_L2_MAX_NUM; + case 0: + BUILD_BUG_ON(LMTT_ML_PTE_MAX_NUM != SZ_16K); + return LMTT_ML_PTE_MAX_NUM; + default: + return 0; + } +} + +static unsigned int lmtt_ml_pte_size(unsigned int level) +{ + switch (level) { + case 2: + case 1: + return sizeof(lmtt_ml_pde_t); + case 0: + return sizeof(lmtt_ml_pte_t); + default: + return 0; + } +} + +static unsigned int lmtt_ml_pte_shift(unsigned int level) +{ + switch (level) { + case 1: + BUILD_BUG_ON(BIT_ULL(LMTT_ML_PDE_L2_SHIFT) != SZ_32G); + return ilog2(SZ_32G); + case 0: + return ilog2(SZ_2M); + default: + return 0; + } +} + +static unsigned int lmtt_ml_pte_index(u64 addr, unsigned int level) +{ + addr >>= lmtt_ml_pte_shift(level); + + switch (level) { + case 1: + /* SZ_32G increments */ + BUILD_BUG_ON_NOT_POWER_OF_2(LMTT_ML_PDE_L2_MAX_NUM); + return addr & (LMTT_ML_PDE_L2_MAX_NUM - 1); + case 0: + /* SZ_2M increments */ + BUILD_BUG_ON_NOT_POWER_OF_2(LMTT_ML_PTE_MAX_NUM); + return addr & (LMTT_ML_PTE_MAX_NUM - 1); + default: + return 0; + } +} + +static u64 lmtt_ml_pte_encode(unsigned long offset, unsigned int level) +{ + switch (level) { + case 0: + XE_WARN_ON(!IS_ALIGNED(offset, SZ_2M)); + XE_WARN_ON(!FIELD_FIT(LMTT_ML_PTE_LMEM_PAGE, offset / SZ_2M)); + return FIELD_PREP(LMTT_ML_PTE_LMEM_PAGE, offset / SZ_2M) | LMTT_ML_PTE_VALID; + case 1: + case 2: + XE_WARN_ON(!IS_ALIGNED(offset, SZ_64K)); + XE_WARN_ON(!FIELD_FIT(LMTT_ML_PDE_LMTT_PTR, offset / SZ_64K)); + return FIELD_PREP(LMTT_ML_PDE_LMTT_PTR, offset / SZ_64K) | LMTT_ML_PDE_VALID; + default: + XE_WARN_ON(true); + return 0; + } +} + +const struct xe_lmtt_ops lmtt_ml_ops = { + .lmtt_root_pd_level = lmtt_ml_root_pd_level, + .lmtt_pte_num = lmtt_ml_pte_num, + .lmtt_pte_size = lmtt_ml_pte_size, + .lmtt_pte_shift = lmtt_ml_pte_shift, + .lmtt_pte_index = lmtt_ml_pte_index, + .lmtt_pte_encode = lmtt_ml_pte_encode, +}; diff --git a/drivers/gpu/drm/xe/xe_lmtt_types.h b/drivers/gpu/drm/xe/xe_lmtt_types.h new file mode 100644 index 000000000000..b37abad23416 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_lmtt_types.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_LMTT_TYPES_H_ +#define _XE_LMTT_TYPES_H_ + +#include <linux/types.h> + +struct xe_bo; +struct xe_lmtt; +struct xe_lmtt_pt; +struct xe_lmtt_ops; + +#define LMTT_PTE_INVALID ULL(0) + +/** + * struct xe_lmtt - Local Memory Translation Table Manager + */ +struct xe_lmtt { + /** @pd: root LMTT Directory */ + struct xe_lmtt_pt *pd; + + /** @ops: LMTT functions */ + const struct xe_lmtt_ops *ops; +}; + +/** + * struct xe_lmtt_pt - Local Memory Translation Table Page Table + * + * Represents single level of the LMTT. + */ +struct xe_lmtt_pt { + /** @level: page table level, 0 is leaf */ + unsigned int level; + + /** @bo: buffer object with actual LMTT PTE values */ + struct xe_bo *bo; + + /** @entries: leaf page tables, exist only for root/non-leaf */ + struct xe_lmtt_pt *entries[]; +}; + +/** + * struct xe_lmtt_ops - Local Memory Translation Table Operations + * + * Provides abstraction of the LMTT variants. + */ +struct xe_lmtt_ops { + /* private: */ + unsigned int (*lmtt_root_pd_level)(void); + unsigned int (*lmtt_pte_num)(unsigned int level); + unsigned int (*lmtt_pte_size)(unsigned int level); + unsigned int (*lmtt_pte_shift)(unsigned int level); + unsigned int (*lmtt_pte_index)(u64 addr, unsigned int level); + u64 (*lmtt_pte_encode)(unsigned long offset, unsigned int level); +}; + +extern const struct xe_lmtt_ops lmtt_2l_ops; +extern const struct xe_lmtt_ops lmtt_ml_ops; + +#endif diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c new file mode 100644 index 000000000000..b7fa3831b684 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -0,0 +1,1272 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_lrc.h" + +#include "instructions/xe_mi_commands.h" +#include "instructions/xe_gfxpipe_commands.h" +#include "regs/xe_engine_regs.h" +#include "regs/xe_gpu_commands.h" +#include "regs/xe_lrc_layout.h" +#include "xe_bb.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_drm_client.h" +#include "xe_exec_queue_types.h" +#include "xe_gt.h" +#include "xe_gt_printk.h" +#include "xe_hw_fence.h" +#include "xe_map.h" +#include "xe_vm.h" + +#define CTX_VALID (1 << 0) +#define CTX_PRIVILEGE (1 << 8) +#define CTX_ADDRESSING_MODE_SHIFT 3 +#define LEGACY_64B_CONTEXT 3 + +#define ENGINE_CLASS_SHIFT 61 +#define ENGINE_INSTANCE_SHIFT 48 + +static struct xe_device * +lrc_to_xe(struct xe_lrc *lrc) +{ + return gt_to_xe(lrc->fence_ctx.gt); +} + +size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class) +{ + switch (class) { + case XE_ENGINE_CLASS_RENDER: + if (GRAPHICS_VER(xe) >= 20) + return 4 * SZ_4K; + else + return 14 * SZ_4K; + case XE_ENGINE_CLASS_COMPUTE: + /* 14 pages since graphics_ver == 11 */ + if (GRAPHICS_VER(xe) >= 20) + return 3 * SZ_4K; + else + return 14 * SZ_4K; + default: + WARN(1, "Unknown engine class: %d", class); + fallthrough; + case XE_ENGINE_CLASS_COPY: + case XE_ENGINE_CLASS_VIDEO_DECODE: + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + case XE_ENGINE_CLASS_OTHER: + return 2 * SZ_4K; + } +} + +/* + * The per-platform tables are u8-encoded in @data. Decode @data and set the + * addresses' offset and commands in @regs. The following encoding is used + * for each byte. There are 2 steps: decoding commands and decoding addresses. + * + * Commands: + * [7]: create NOPs - number of NOPs are set in lower bits + * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set + * MI_LRI_FORCE_POSTED + * [5:0]: Number of NOPs or registers to set values to in case of + * MI_LOAD_REGISTER_IMM + * + * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" + * number of registers. They are set by using the REG/REG16 macros: the former + * is used for offsets smaller than 0x200 while the latter is for values bigger + * than that. Those macros already set all the bits documented below correctly: + * + * [7]: When a register offset needs more than 6 bits, use additional bytes, to + * follow, for the lower bits + * [6:0]: Register offset, without considering the engine base. + * + * This function only tweaks the commands and register offsets. Values are not + * filled out. + */ +static void set_offsets(u32 *regs, + const u8 *data, + const struct xe_hw_engine *hwe) +#define NOP(x) (BIT(7) | (x)) +#define LRI(count, flags) ((flags) << 6 | (count) | \ + BUILD_BUG_ON_ZERO(count >= BIT(6))) +#define POSTED BIT(0) +#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) +#define REG16(x) \ + (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ + (((x) >> 2) & 0x7f) +#define END 0 +{ + const u32 base = hwe->mmio_base; + + while (*data) { + u8 count, flags; + + if (*data & BIT(7)) { /* skip */ + count = *data++ & ~BIT(7); + regs += count; + continue; + } + + count = *data & 0x3f; + flags = *data >> 6; + data++; + + *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); + if (flags & POSTED) + *regs |= MI_LRI_FORCE_POSTED; + *regs |= MI_LRI_LRM_CS_MMIO; + regs++; + + xe_gt_assert(hwe->gt, count); + do { + u32 offset = 0; + u8 v; + + do { + v = *data++; + offset <<= 7; + offset |= v & ~BIT(7); + } while (v & BIT(7)); + + regs[0] = base + (offset << 2); + regs += 2; + } while (--count); + } + + *regs = MI_BATCH_BUFFER_END | BIT(0); +} + +static const u8 gen12_xcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + END +}; + +static const u8 dg2_xcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + END +}; + +static const u8 gen12_rcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + NOP(3 + 9 + 1), + + LRI(51, POSTED), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG(0x028), + REG(0x09c), + REG(0x0c0), + REG(0x178), + REG(0x17c), + REG16(0x358), + REG(0x170), + REG(0x150), + REG(0x154), + REG(0x158), + REG16(0x41c), + REG16(0x600), + REG16(0x604), + REG16(0x608), + REG16(0x60c), + REG16(0x610), + REG16(0x614), + REG16(0x618), + REG16(0x61c), + REG16(0x620), + REG16(0x624), + REG16(0x628), + REG16(0x62c), + REG16(0x630), + REG16(0x634), + REG16(0x638), + REG16(0x63c), + REG16(0x640), + REG16(0x644), + REG16(0x648), + REG16(0x64c), + REG16(0x650), + REG16(0x654), + REG16(0x658), + REG16(0x65c), + REG16(0x660), + REG16(0x664), + REG16(0x668), + REG16(0x66c), + REG16(0x670), + REG16(0x674), + REG16(0x678), + REG16(0x67c), + REG(0x068), + REG(0x084), + NOP(1), + + END +}; + +static const u8 xehp_rcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END +}; + +static const u8 dg2_rcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END +}; + +static const u8 mtl_rcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(2), + LRI(2, POSTED), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END +}; + +#define XE2_CTX_COMMON \ + NOP(1), /* [0x00] */ \ + LRI(15, POSTED), /* [0x01] */ \ + REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ + REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ + REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ + REG(0x038), /* [0x08] RING_BUFFER_START */ \ + REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ + REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ + REG(0x140), /* [0x0e] BB_ADDR */ \ + REG(0x110), /* [0x10] BB_STATE */ \ + REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ + REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ + REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ + REG(0x180), /* [0x18] CCID */ \ + REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ + REG(0x120), /* [0x1c] PRT_BB_STATE */ \ + REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ + \ + NOP(1), /* [0x20] */ \ + LRI(9, POSTED), /* [0x21] */ \ + REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ + REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ + REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ + REG16(0x284), /* [0x28] dummy reg */ \ + REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ + REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ + REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ + REG16(0x274), /* [0x30] PTBP_UDW */ \ + REG16(0x270) /* [0x32] PTBP_LDW */ + +static const u8 xe2_rcs_offsets[] = { + XE2_CTX_COMMON, + + NOP(2), /* [0x34] */ + LRI(2, POSTED), /* [0x36] */ + REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ + REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ + + NOP(6), /* [0x41] */ + LRI(1, 0), /* [0x47] */ + REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ + + END +}; + +static const u8 xe2_bcs_offsets[] = { + XE2_CTX_COMMON, + + NOP(4 + 8 + 1), /* [0x34] */ + LRI(2, POSTED), /* [0x41] */ + REG16(0x200), /* [0x42] BCS_SWCTRL */ + REG16(0x204), /* [0x44] BLIT_CCTL */ + + END +}; + +static const u8 xe2_xcs_offsets[] = { + XE2_CTX_COMMON, + + END +}; + +#undef END +#undef REG16 +#undef REG +#undef LRI +#undef NOP + +static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) +{ + if (class == XE_ENGINE_CLASS_RENDER) { + if (GRAPHICS_VER(xe) >= 20) + return xe2_rcs_offsets; + else if (GRAPHICS_VERx100(xe) >= 1270) + return mtl_rcs_offsets; + else if (GRAPHICS_VERx100(xe) >= 1255) + return dg2_rcs_offsets; + else if (GRAPHICS_VERx100(xe) >= 1250) + return xehp_rcs_offsets; + else + return gen12_rcs_offsets; + } else if (class == XE_ENGINE_CLASS_COPY) { + if (GRAPHICS_VER(xe) >= 20) + return xe2_bcs_offsets; + else + return gen12_xcs_offsets; + } else { + if (GRAPHICS_VER(xe) >= 20) + return xe2_xcs_offsets; + else if (GRAPHICS_VERx100(xe) >= 1255) + return dg2_xcs_offsets; + else + return gen12_xcs_offsets; + } +} + +static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) +{ + regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) | + _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | + CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; + + /* TODO: Timestamp */ +} + +static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) +{ + struct xe_device *xe = gt_to_xe(hwe->gt); + + if (GRAPHICS_VERx100(xe) >= 1250) + return 0x70; + else + return 0x60; +} + +static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) +{ + int x; + + x = lrc_ring_mi_mode(hwe); + regs[x + 1] &= ~STOP_RING; + regs[x + 1] |= STOP_RING << 16; +} + +static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) +{ + return 0; +} + +u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) +{ + return lrc->ring.size; +} + +/* Make the magic macros work */ +#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset + +#define LRC_SEQNO_PPHWSP_OFFSET 512 +#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) +#define LRC_PARALLEL_PPHWSP_OFFSET 2048 +#define LRC_PPHWSP_SIZE SZ_4K + +static size_t lrc_reg_size(struct xe_device *xe) +{ + if (GRAPHICS_VERx100(xe) >= 1250) + return 96 * sizeof(u32); + else + return 80 * sizeof(u32); +} + +size_t xe_lrc_skip_size(struct xe_device *xe) +{ + return LRC_PPHWSP_SIZE + lrc_reg_size(xe); +} + +static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) +{ + /* The seqno is stored in the driver-defined portion of PPHWSP */ + return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; +} + +static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) +{ + /* The start seqno is stored in the driver-defined portion of PPHWSP */ + return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; +} + +static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) +{ + /* The parallel is stored in the driver-defined portion of PPHWSP */ + return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; +} + +static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc) +{ + return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; +} + +#define DECL_MAP_ADDR_HELPERS(elem) \ +static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ +{ \ + struct iosys_map map = lrc->bo->vmap; \ +\ + xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ + iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ + return map; \ +} \ +static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ +{ \ + return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ +} \ + +DECL_MAP_ADDR_HELPERS(ring) +DECL_MAP_ADDR_HELPERS(pphwsp) +DECL_MAP_ADDR_HELPERS(seqno) +DECL_MAP_ADDR_HELPERS(regs) +DECL_MAP_ADDR_HELPERS(start_seqno) +DECL_MAP_ADDR_HELPERS(parallel) + +#undef DECL_MAP_ADDR_HELPERS + +u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_pphwsp_ggtt_addr(lrc); +} + +u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map map; + + map = __xe_lrc_regs_map(lrc); + iosys_map_incr(&map, reg_nr * sizeof(u32)); + return xe_map_read32(xe, &map); +} + +void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map map; + + map = __xe_lrc_regs_map(lrc); + iosys_map_incr(&map, reg_nr * sizeof(u32)); + xe_map_write32(xe, &map, val); +} + +static void *empty_lrc_data(struct xe_hw_engine *hwe) +{ + struct xe_device *xe = gt_to_xe(hwe->gt); + void *data; + u32 *regs; + + data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL); + if (!data) + return NULL; + + /* 1st page: Per-Process of HW status Page */ + regs = data + LRC_PPHWSP_SIZE; + set_offsets(regs, reg_offsets(xe, hwe->class), hwe); + set_context_control(regs, hwe); + reset_stop_ring(regs, hwe); + + return data; +} + +static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) +{ + u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); + + xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); + xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); +} + +#define PVC_CTX_ASID (0x2e + 1) +#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) +#define ACC_GRANULARITY_S 20 +#define ACC_NOTIFY_S 16 + +int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size) +{ + struct xe_gt *gt = hwe->gt; + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = gt_to_xe(gt); + struct iosys_map map; + void *init_data = NULL; + u32 arb_enable; + int err; + + lrc->flags = 0; + + /* + * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address + * via VM bind calls. + */ + lrc->bo = xe_bo_create_pin_map(xe, tile, vm, + ring_size + xe_lrc_size(xe, hwe->class), + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(lrc->bo)) + return PTR_ERR(lrc->bo); + + lrc->tile = gt_to_tile(hwe->gt); + lrc->ring.size = ring_size; + lrc->ring.tail = 0; + + xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, + hwe->fence_irq, hwe->name); + + if (!gt->default_lrc[hwe->class]) { + init_data = empty_lrc_data(hwe); + if (!init_data) { + err = -ENOMEM; + goto err_lrc_finish; + } + } + + /* + * Init Per-Process of HW status Page, LRC / context state to known + * values + */ + map = __xe_lrc_pphwsp_map(lrc); + if (!init_data) { + xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ + xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, + gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, + xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE); + } else { + xe_map_memcpy_to(xe, &map, 0, init_data, + xe_lrc_size(xe, hwe->class)); + kfree(init_data); + } + + if (vm) { + xe_lrc_set_ppgtt(lrc, vm); + + if (vm->xef) + xe_drm_client_add_bo(vm->xef->client, lrc->bo); + } + + xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); + xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); + xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); + xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, + RING_CTL_SIZE(lrc->ring.size) | RING_VALID); + if (xe->info.has_asid && vm) + xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, + (q->usm.acc_granularity << + ACC_GRANULARITY_S) | vm->usm.asid); + if (xe->info.has_usm && vm) + xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD, + (q->usm.acc_notify << ACC_NOTIFY_S) | + q->usm.acc_trigger); + + lrc->desc = CTX_VALID; + lrc->desc |= LEGACY_64B_CONTEXT << CTX_ADDRESSING_MODE_SHIFT; + /* TODO: Priority */ + + /* While this appears to have something about privileged batches or + * some such, it really just means PPGTT mode. + */ + if (vm) + lrc->desc |= CTX_PRIVILEGE; + + if (GRAPHICS_VERx100(xe) < 1250) { + lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT; + lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT; + } + + arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; + xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); + + map = __xe_lrc_seqno_map(lrc); + xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); + + map = __xe_lrc_start_seqno_map(lrc); + xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); + + return 0; + +err_lrc_finish: + xe_lrc_finish(lrc); + return err; +} + +void xe_lrc_finish(struct xe_lrc *lrc) +{ + xe_hw_fence_ctx_finish(&lrc->fence_ctx); + xe_bo_lock(lrc->bo, false); + xe_bo_unpin(lrc->bo); + xe_bo_unlock(lrc->bo); + xe_bo_put(lrc->bo); +} + +void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) +{ + xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); +} + +u32 xe_lrc_ring_head(struct xe_lrc *lrc) +{ + return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; +} + +u32 xe_lrc_ring_space(struct xe_lrc *lrc) +{ + const u32 head = xe_lrc_ring_head(lrc); + const u32 tail = lrc->ring.tail; + const u32 size = lrc->ring.size; + + return ((head - tail - 1) & (size - 1)) + 1; +} + +static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, + const void *data, size_t size) +{ + struct xe_device *xe = lrc_to_xe(lrc); + + iosys_map_incr(&ring, lrc->ring.tail); + xe_map_memcpy_to(xe, &ring, 0, data, size); + lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); +} + +void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map ring; + u32 rhs; + size_t aligned_size; + + xe_assert(xe, IS_ALIGNED(size, 4)); + aligned_size = ALIGN(size, 8); + + ring = __xe_lrc_ring_map(lrc); + + xe_assert(xe, lrc->ring.tail < lrc->ring.size); + rhs = lrc->ring.size - lrc->ring.tail; + if (size > rhs) { + __xe_lrc_write_ring(lrc, ring, data, rhs); + __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); + } else { + __xe_lrc_write_ring(lrc, ring, data, size); + } + + if (aligned_size > size) { + u32 noop = MI_NOOP; + + __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); + } +} + +u64 xe_lrc_descriptor(struct xe_lrc *lrc) +{ + return lrc->desc | xe_lrc_ggtt_addr(lrc); +} + +u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_seqno_ggtt_addr(lrc); +} + +struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc) +{ + return &xe_hw_fence_create(&lrc->fence_ctx, + __xe_lrc_seqno_map(lrc))->dma; +} + +s32 xe_lrc_seqno(struct xe_lrc *lrc) +{ + struct iosys_map map = __xe_lrc_seqno_map(lrc); + + return xe_map_read32(lrc_to_xe(lrc), &map); +} + +s32 xe_lrc_start_seqno(struct xe_lrc *lrc) +{ + struct iosys_map map = __xe_lrc_start_seqno_map(lrc); + + return xe_map_read32(lrc_to_xe(lrc), &map); +} + +u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_start_seqno_ggtt_addr(lrc); +} + +u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_parallel_ggtt_addr(lrc); +} + +struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) +{ + return __xe_lrc_parallel_map(lrc); +} + +static int instr_dw(u32 cmd_header) +{ + /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ + if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == + GFXPIPE_SINGLE_DW_CMD(0, 0)) + return 1; + + /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ + if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) + return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; + + /* Most instructions have the # of dwords (minus 2) in 7:0 */ + return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; +} + +static int dump_mi_command(struct drm_printer *p, + struct xe_gt *gt, + u32 *dw, + int remaining_dw) +{ + u32 inst_header = *dw; + u32 numdw = instr_dw(inst_header); + u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); + int num_noop; + + /* First check for commands that don't have/use a '# DW' field */ + switch (inst_header & MI_OPCODE) { + case MI_NOOP: + num_noop = 1; + while (num_noop < remaining_dw && + (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) + num_noop++; + drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); + return num_noop; + + case MI_TOPOLOGY_FILTER: + drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); + return 1; + + case MI_BATCH_BUFFER_END: + drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); + /* Return 'remaining_dw' to consume the rest of the LRC */ + return remaining_dw; + } + + /* + * Any remaining commands include a # of dwords. We should make sure + * it doesn't exceed the remaining size of the LRC. + */ + if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) + numdw = remaining_dw; + + switch (inst_header & MI_OPCODE) { + case MI_LOAD_REGISTER_IMM: + drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", + inst_header, (numdw - 1) / 2); + for (int i = 1; i < numdw; i += 2) + drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); + return numdw; + + case MI_FORCE_WAKEUP: + drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); + return numdw; + + default: + drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", + inst_header, opcode, numdw); + return numdw; + } +} + +static int dump_gfxpipe_command(struct drm_printer *p, + struct xe_gt *gt, + u32 *dw, + int remaining_dw) +{ + u32 numdw = instr_dw(*dw); + u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); + u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); + u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); + + /* + * Make sure we haven't mis-parsed a number of dwords that exceeds the + * remaining size of the LRC. + */ + if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) + numdw = remaining_dw; + + switch (*dw & GFXPIPE_MATCH_MASK) { +#define MATCH(cmd) \ + case cmd: \ + drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ + return numdw +#define MATCH3D(cmd) \ + case CMD_##cmd: \ + drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ + return numdw + + MATCH(STATE_BASE_ADDRESS); + MATCH(STATE_SIP); + MATCH(GPGPU_CSR_BASE_ADDRESS); + MATCH(STATE_COMPUTE_MODE); + MATCH3D(3DSTATE_BTD); + + MATCH3D(3DSTATE_VF_STATISTICS); + + MATCH(PIPELINE_SELECT); + + MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); + MATCH3D(3DSTATE_CLEAR_PARAMS); + MATCH3D(3DSTATE_DEPTH_BUFFER); + MATCH3D(3DSTATE_STENCIL_BUFFER); + MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); + MATCH3D(3DSTATE_VERTEX_BUFFERS); + MATCH3D(3DSTATE_VERTEX_ELEMENTS); + MATCH3D(3DSTATE_INDEX_BUFFER); + MATCH3D(3DSTATE_VF); + MATCH3D(3DSTATE_MULTISAMPLE); + MATCH3D(3DSTATE_CC_STATE_POINTERS); + MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); + MATCH3D(3DSTATE_VS); + MATCH3D(3DSTATE_GS); + MATCH3D(3DSTATE_CLIP); + MATCH3D(3DSTATE_SF); + MATCH3D(3DSTATE_WM); + MATCH3D(3DSTATE_CONSTANT_VS); + MATCH3D(3DSTATE_CONSTANT_GS); + MATCH3D(3DSTATE_SAMPLE_MASK); + MATCH3D(3DSTATE_CONSTANT_HS); + MATCH3D(3DSTATE_CONSTANT_DS); + MATCH3D(3DSTATE_HS); + MATCH3D(3DSTATE_TE); + MATCH3D(3DSTATE_DS); + MATCH3D(3DSTATE_STREAMOUT); + MATCH3D(3DSTATE_SBE); + MATCH3D(3DSTATE_PS); + MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); + MATCH3D(3DSTATE_CPS_POINTERS); + MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); + MATCH3D(3DSTATE_BLEND_STATE_POINTERS); + MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); + MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); + MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); + MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); + MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); + MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); + MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); + MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); + MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); + MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); + MATCH3D(3DSTATE_VF_INSTANCING); + MATCH3D(3DSTATE_VF_SGVS); + MATCH3D(3DSTATE_VF_TOPOLOGY); + MATCH3D(3DSTATE_WM_CHROMAKEY); + MATCH3D(3DSTATE_PS_BLEND); + MATCH3D(3DSTATE_WM_DEPTH_STENCIL); + MATCH3D(3DSTATE_PS_EXTRA); + MATCH3D(3DSTATE_RASTER); + MATCH3D(3DSTATE_SBE_SWIZ); + MATCH3D(3DSTATE_WM_HZ_OP); + MATCH3D(3DSTATE_VF_COMPONENT_PACKING); + MATCH3D(3DSTATE_VF_SGVS_2); + MATCH3D(3DSTATE_VFG); + MATCH3D(3DSTATE_URB_ALLOC_VS); + MATCH3D(3DSTATE_URB_ALLOC_HS); + MATCH3D(3DSTATE_URB_ALLOC_DS); + MATCH3D(3DSTATE_URB_ALLOC_GS); + MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); + MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); + MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); + MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); + MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); + MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); + MATCH3D(3DSTATE_AMFS); + MATCH3D(3DSTATE_DEPTH_BOUNDS); + MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); + MATCH3D(3DSTATE_CONSTANT_TS_POINTER); + MATCH3D(3DSTATE_MESH_CONTROL); + MATCH3D(3DSTATE_MESH_DISTRIB); + MATCH3D(3DSTATE_TASK_REDISTRIB); + MATCH3D(3DSTATE_MESH_SHADER); + MATCH3D(3DSTATE_MESH_SHADER_DATA); + MATCH3D(3DSTATE_TASK_CONTROL); + MATCH3D(3DSTATE_TASK_SHADER); + MATCH3D(3DSTATE_TASK_SHADER_DATA); + MATCH3D(3DSTATE_URB_ALLOC_MESH); + MATCH3D(3DSTATE_URB_ALLOC_TASK); + MATCH3D(3DSTATE_CLIP_MESH); + MATCH3D(3DSTATE_SBE_MESH); + MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); + + MATCH3D(3DSTATE_DRAWING_RECTANGLE); + MATCH3D(3DSTATE_CHROMA_KEY); + MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); + MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); + MATCH3D(3DSTATE_LINE_STIPPLE); + MATCH3D(3DSTATE_AA_LINE_PARAMETERS); + MATCH3D(3DSTATE_MONOFILTER_SIZE); + MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); + MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); + MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); + MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); + MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); + MATCH3D(3DSTATE_SO_DECL_LIST); + MATCH3D(3DSTATE_SO_BUFFER); + MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); + MATCH3D(3DSTATE_SAMPLE_PATTERN); + MATCH3D(3DSTATE_3D_MODE); + MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); + MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); + MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); + + default: + drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", + *dw, pipeline, opcode, subopcode, numdw); + return numdw; + } +} + +void xe_lrc_dump_default(struct drm_printer *p, + struct xe_gt *gt, + enum xe_engine_class hwe_class) +{ + u32 *dw; + int remaining_dw, num_dw; + + if (!gt->default_lrc[hwe_class]) { + drm_printf(p, "No default LRC for class %d\n", hwe_class); + return; + } + + /* + * Skip the beginning of the LRC since it contains the per-process + * hardware status page. + */ + dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; + remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4; + + while (remaining_dw > 0) { + if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { + num_dw = dump_mi_command(p, gt, dw, remaining_dw); + } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { + num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); + } else { + num_dw = min(instr_dw(*dw), remaining_dw); + drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", + *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), + num_dw); + } + + dw += num_dw; + remaining_dw -= num_dw; + } +} + +struct instr_state { + u32 instr; + u16 num_dw; +}; + +static const struct instr_state xe_hpg_svg_state[] = { + { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, + { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, + { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, + { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, + { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, + { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, + { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, + { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, + { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, + { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, + { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, + { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, + { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, + { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, + { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, + { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, + { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, + { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, + { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, + { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, + { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, + { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, + { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, + { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, + { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, + { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, + { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, + { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, + { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, + { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, + { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, + { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, + { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, + { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, + { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, + { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, + { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, + { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, + { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, + { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, + { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, + { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, + { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, + { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, + { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, + { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, + { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, + { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, + { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, + { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, +}; + +void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) +{ + struct xe_gt *gt = q->hwe->gt; + struct xe_device *xe = gt_to_xe(gt); + const struct instr_state *state_table = NULL; + int state_table_size = 0; + + /* + * At the moment we only need to emit non-register state for the RCS + * engine. + */ + if (q->hwe->class != XE_ENGINE_CLASS_RENDER) + return; + + switch (GRAPHICS_VERx100(xe)) { + case 1255: + case 1270 ... 2004: + state_table = xe_hpg_svg_state; + state_table_size = ARRAY_SIZE(xe_hpg_svg_state); + break; + default: + xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", + GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); + return; + } + + for (int i = 0; i < state_table_size; i++) { + u32 instr = state_table[i].instr; + u16 num_dw = state_table[i].num_dw; + bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); + + xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); + xe_gt_assert(gt, num_dw != 0); + xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); + + /* + * Xe2's SVG context is the same as the one on DG2 / MTL + * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has + * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). + * Just make the replacement here rather than defining a + * whole separate table for the single trivial change. + */ + if (GRAPHICS_VER(xe) >= 20 && + instr == CMD_3DSTATE_DRAWING_RECTANGLE) + instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; + + bb->cs[bb->len] = instr; + if (!is_single_dw) + bb->cs[bb->len] |= (num_dw - 2); + + bb->len += num_dw; + } +} diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h new file mode 100644 index 000000000000..28b1d3f404d4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ +#ifndef _XE_LRC_H_ +#define _XE_LRC_H_ + +#include "xe_lrc_types.h" + +struct drm_printer; +struct xe_bb; +struct xe_device; +struct xe_exec_queue; +enum xe_engine_class; +struct xe_hw_engine; +struct xe_vm; + +#define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4) + +int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size); +void xe_lrc_finish(struct xe_lrc *lrc); + +size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class); +u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc); + +void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head); +u32 xe_lrc_ring_head(struct xe_lrc *lrc); +u32 xe_lrc_ring_space(struct xe_lrc *lrc); +void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size); + +u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc); +u32 *xe_lrc_regs(struct xe_lrc *lrc); + +u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr); +void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val); + +u64 xe_lrc_descriptor(struct xe_lrc *lrc); + +u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc); +struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc); +s32 xe_lrc_seqno(struct xe_lrc *lrc); + +u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc); +s32 xe_lrc_start_seqno(struct xe_lrc *lrc); + +u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc); +struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc); + +size_t xe_lrc_skip_size(struct xe_device *xe); + +void xe_lrc_dump_default(struct drm_printer *p, + struct xe_gt *gt, + enum xe_engine_class); + +void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb); + +#endif diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h new file mode 100644 index 000000000000..78220336062c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_LRC_TYPES_H_ +#define _XE_LRC_TYPES_H_ + +#include "xe_hw_fence_types.h" + +struct xe_bo; + +/** + * struct xe_lrc - Logical ring context (LRC) and submission ring object + */ +struct xe_lrc { + /** + * @bo: buffer object (memory) for logical ring context, per process HW + * status page, and submission ring. + */ + struct xe_bo *bo; + + /** @tile: tile which this LRC belongs to */ + struct xe_tile *tile; + + /** @flags: LRC flags */ + u32 flags; + + /** @ring: submission ring state */ + struct { + /** @size: size of submission ring */ + u32 size; + /** @tail: tail of submission ring */ + u32 tail; + /** @old_tail: shadow of tail */ + u32 old_tail; + } ring; + + /** @desc: LRC descriptor */ + u64 desc; + + /** @fence_ctx: context for hw fence */ + struct xe_hw_fence_ctx fence_ctx; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h new file mode 100644 index 000000000000..daf56c846d03 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_macros.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_MACROS_H_ +#define _XE_MACROS_H_ + +#include <linux/bug.h> + +#define XE_WARN_ON WARN_ON + +#define XE_IOCTL_DBG(xe, cond) \ + ((cond) && (drm_dbg(&(xe)->drm, \ + "Ioctl argument check failed at %s:%d: %s", \ + __FILE__, __LINE__, #cond), 1)) + +#endif diff --git a/drivers/gpu/drm/xe/xe_map.h b/drivers/gpu/drm/xe/xe_map.h new file mode 100644 index 000000000000..f62e0c8b67ab --- /dev/null +++ b/drivers/gpu/drm/xe/xe_map.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_MAP_H_ +#define _XE_MAP_H_ + +#include <linux/iosys-map.h> + +#include <xe_device.h> + +/** + * DOC: Map layer + * + * All access to any memory shared with a device (both sysmem and vram) in the + * XE driver should go through this layer (xe_map). This layer is built on top + * of :ref:`driver-api/device-io:Generalizing Access to System and I/O Memory` + * and with extra hooks into the XE driver that allows adding asserts to memory + * accesses (e.g. for blocking runtime_pm D3Cold on Discrete Graphics). + */ + +static inline void xe_map_memcpy_to(struct xe_device *xe, struct iosys_map *dst, + size_t dst_offset, const void *src, + size_t len) +{ + xe_device_assert_mem_access(xe); + iosys_map_memcpy_to(dst, dst_offset, src, len); +} + +static inline void xe_map_memcpy_from(struct xe_device *xe, void *dst, + const struct iosys_map *src, + size_t src_offset, size_t len) +{ + xe_device_assert_mem_access(xe); + iosys_map_memcpy_from(dst, src, src_offset, len); +} + +static inline void xe_map_memset(struct xe_device *xe, + struct iosys_map *dst, size_t offset, + int value, size_t len) +{ + xe_device_assert_mem_access(xe); + iosys_map_memset(dst, offset, value, len); +} + +/* FIXME: We likely should kill these two functions sooner or later */ +static inline u32 xe_map_read32(struct xe_device *xe, struct iosys_map *map) +{ + xe_device_assert_mem_access(xe); + + if (map->is_iomem) + return readl(map->vaddr_iomem); + else + return READ_ONCE(*(u32 *)map->vaddr); +} + +static inline void xe_map_write32(struct xe_device *xe, struct iosys_map *map, + u32 val) +{ + xe_device_assert_mem_access(xe); + + if (map->is_iomem) + writel(val, map->vaddr_iomem); + else + *(u32 *)map->vaddr = val; +} + +#define xe_map_rd(xe__, map__, offset__, type__) ({ \ + struct xe_device *__xe = xe__; \ + xe_device_assert_mem_access(__xe); \ + iosys_map_rd(map__, offset__, type__); \ +}) + +#define xe_map_wr(xe__, map__, offset__, type__, val__) ({ \ + struct xe_device *__xe = xe__; \ + xe_device_assert_mem_access(__xe); \ + iosys_map_wr(map__, offset__, type__, val__); \ +}) + +#define xe_map_rd_field(xe__, map__, struct_offset__, struct_type__, field__) ({ \ + struct xe_device *__xe = xe__; \ + xe_device_assert_mem_access(__xe); \ + iosys_map_rd_field(map__, struct_offset__, struct_type__, field__); \ +}) + +#define xe_map_wr_field(xe__, map__, struct_offset__, struct_type__, field__, val__) ({ \ + struct xe_device *__xe = xe__; \ + xe_device_assert_mem_access(__xe); \ + iosys_map_wr_field(map__, struct_offset__, struct_type__, field__, val__); \ +}) + +#endif diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c new file mode 100644 index 000000000000..adf1dab5eba2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -0,0 +1,1410 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "xe_migrate.h" + +#include <linux/bitfield.h> +#include <linux/sizes.h> + +#include <drm/drm_managed.h> +#include <drm/ttm/ttm_tt.h> +#include <drm/xe_drm.h> + +#include "generated/xe_wa_oob.h" +#include "instructions/xe_mi_commands.h" +#include "regs/xe_gpu_commands.h" +#include "tests/xe_test.h" +#include "xe_assert.h" +#include "xe_bb.h" +#include "xe_bo.h" +#include "xe_exec_queue.h" +#include "xe_ggtt.h" +#include "xe_gt.h" +#include "xe_hw_engine.h" +#include "xe_lrc.h" +#include "xe_map.h" +#include "xe_mocs.h" +#include "xe_pt.h" +#include "xe_res_cursor.h" +#include "xe_sched_job.h" +#include "xe_sync.h" +#include "xe_trace.h" +#include "xe_vm.h" +#include "xe_wa.h" + +/** + * struct xe_migrate - migrate context. + */ +struct xe_migrate { + /** @q: Default exec queue used for migration */ + struct xe_exec_queue *q; + /** @tile: Backpointer to the tile this struct xe_migrate belongs to. */ + struct xe_tile *tile; + /** @job_mutex: Timeline mutex for @eng. */ + struct mutex job_mutex; + /** @pt_bo: Page-table buffer object. */ + struct xe_bo *pt_bo; + /** @batch_base_ofs: VM offset of the migration batch buffer */ + u64 batch_base_ofs; + /** @usm_batch_base_ofs: VM offset of the usm batch buffer */ + u64 usm_batch_base_ofs; + /** @cleared_mem_ofs: VM offset of @cleared_bo. */ + u64 cleared_mem_ofs; + /** + * @fence: dma-fence representing the last migration job batch. + * Protected by @job_mutex. + */ + struct dma_fence *fence; + /** + * @vm_update_sa: For integrated, used to suballocate page-tables + * out of the pt_bo. + */ + struct drm_suballoc_manager vm_update_sa; +}; + +#define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */ +#define MAX_CCS_LIMITED_TRANSFER SZ_4M /* XE_PAGE_SIZE * (FIELD_MAX(XE2_CCS_SIZE_MASK) + 1) */ +#define NUM_KERNEL_PDE 17 +#define NUM_PT_SLOTS 32 +#define LEVEL0_PAGE_TABLE_ENCODE_SIZE SZ_2M + +/** + * xe_tile_migrate_engine() - Get this tile's migrate engine. + * @tile: The tile. + * + * Returns the default migrate engine of this tile. + * TODO: Perhaps this function is slightly misplaced, and even unneeded? + * + * Return: The default migrate engine + */ +struct xe_exec_queue *xe_tile_migrate_engine(struct xe_tile *tile) +{ + return tile->migrate->q; +} + +static void xe_migrate_fini(struct drm_device *dev, void *arg) +{ + struct xe_migrate *m = arg; + + xe_vm_lock(m->q->vm, false); + xe_bo_unpin(m->pt_bo); + xe_vm_unlock(m->q->vm); + + dma_fence_put(m->fence); + xe_bo_put(m->pt_bo); + drm_suballoc_manager_fini(&m->vm_update_sa); + mutex_destroy(&m->job_mutex); + xe_vm_close_and_put(m->q->vm); + xe_exec_queue_put(m->q); +} + +static u64 xe_migrate_vm_addr(u64 slot, u32 level) +{ + XE_WARN_ON(slot >= NUM_PT_SLOTS); + + /* First slot is reserved for mapping of PT bo and bb, start from 1 */ + return (slot + 1ULL) << xe_pt_shift(level + 1); +} + +static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr) +{ + /* + * Remove the DPA to get a correct offset into identity table for the + * migrate offset + */ + addr -= xe->mem.vram.dpa_base; + return addr + (256ULL << xe_pt_shift(2)); +} + +static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, + struct xe_vm *vm) +{ + struct xe_device *xe = tile_to_xe(tile); + u16 pat_index = xe->pat.idx[XE_CACHE_WB]; + u8 id = tile->id; + u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level; + u32 map_ofs, level, i; + struct xe_bo *bo, *batch = tile->mem.kernel_bb_pool->bo; + u64 entry; + + /* Can't bump NUM_PT_SLOTS too high */ + BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/XE_PAGE_SIZE); + /* Must be a multiple of 64K to support all platforms */ + BUILD_BUG_ON(NUM_PT_SLOTS * XE_PAGE_SIZE % SZ_64K); + /* And one slot reserved for the 4KiB page table updates */ + BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1)); + + /* Need to be sure everything fits in the first PT, or create more */ + xe_tile_assert(tile, m->batch_base_ofs + batch->size < SZ_2M); + + bo = xe_bo_create_pin_map(vm->xe, tile, vm, + num_entries * XE_PAGE_SIZE, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + entry = vm->pt_ops->pde_encode_bo(bo, bo->size - XE_PAGE_SIZE, pat_index); + xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); + + map_ofs = (num_entries - num_level) * XE_PAGE_SIZE; + + /* Map the entire BO in our level 0 pt */ + for (i = 0, level = 0; i < num_entries; level++) { + entry = vm->pt_ops->pte_encode_bo(bo, i * XE_PAGE_SIZE, + pat_index, 0); + + xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry); + + if (vm->flags & XE_VM_FLAG_64K) + i += 16; + else + i += 1; + } + + if (!IS_DGFX(xe)) { + /* Write out batch too */ + m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE; + if (xe->info.has_usm) { + batch = tile->primary_gt->usm.bb_pool->bo; + m->usm_batch_base_ofs = m->batch_base_ofs; + } + + for (i = 0; i < batch->size; + i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : + XE_PAGE_SIZE) { + entry = vm->pt_ops->pte_encode_bo(batch, i, + pat_index, 0); + + xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, + entry); + level++; + } + } else { + u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE); + + m->batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr); + + if (xe->info.has_usm) { + batch = tile->primary_gt->usm.bb_pool->bo; + batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE); + m->usm_batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr); + } + } + + for (level = 1; level < num_level; level++) { + u32 flags = 0; + + if (vm->flags & XE_VM_FLAG_64K && level == 1) + flags = XE_PDE_64K; + + entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (level - 1) * + XE_PAGE_SIZE, pat_index); + xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64, + entry | flags); + } + + /* Write PDE's that point to our BO. */ + for (i = 0; i < num_entries - num_level; i++) { + entry = vm->pt_ops->pde_encode_bo(bo, i * XE_PAGE_SIZE, + pat_index); + + xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE + + (i + 1) * 8, u64, entry); + } + + /* Set up a 1GiB NULL mapping at 255GiB offset. */ + level = 2; + xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level + 255 * 8, u64, + vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) + | XE_PTE_NULL); + m->cleared_mem_ofs = (255ULL << xe_pt_shift(level)); + + /* Identity map the entire vram at 256GiB offset */ + if (IS_DGFX(xe)) { + u64 pos, ofs, flags; + + level = 2; + ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8; + flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, + true, 0); + + /* + * Use 1GB pages, it shouldn't matter the physical amount of + * vram is less, when we don't access it. + */ + for (pos = xe->mem.vram.dpa_base; + pos < xe->mem.vram.actual_physical_size + xe->mem.vram.dpa_base; + pos += SZ_1G, ofs += 8) + xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags); + } + + /* + * Example layout created above, with root level = 3: + * [PT0...PT7]: kernel PT's for copy/clear; 64 or 4KiB PTE's + * [PT8]: Kernel PT for VM_BIND, 4 KiB PTE's + * [PT9...PT28]: Userspace PT's for VM_BIND, 4 KiB PTE's + * [PT29 = PDE 0] [PT30 = PDE 1] [PT31 = PDE 2] + * + * This makes the lowest part of the VM point to the pagetables. + * Hence the lowest 2M in the vm should point to itself, with a few writes + * and flushes, other parts of the VM can be used either for copying and + * clearing. + * + * For performance, the kernel reserves PDE's, so about 20 are left + * for async VM updates. + * + * To make it easier to work, each scratch PT is put in slot (1 + PT #) + * everywhere, this allows lockless updates to scratch pages by using + * the different addresses in VM. + */ +#define NUM_VMUSA_UNIT_PER_PAGE 32 +#define VM_SA_UPDATE_UNIT_SIZE (XE_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE) +#define NUM_VMUSA_WRITES_PER_UNIT (VM_SA_UPDATE_UNIT_SIZE / sizeof(u64)) + drm_suballoc_manager_init(&m->vm_update_sa, + (map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) * + NUM_VMUSA_UNIT_PER_PAGE, 0); + + m->pt_bo = bo; + return 0; +} + +/* + * Due to workaround 16017236439, odd instance hardware copy engines are + * faster than even instance ones. + * This function returns the mask involving all fast copy engines and the + * reserved copy engine to be used as logical mask for migrate engine. + * Including the reserved copy engine is required to avoid deadlocks due to + * migrate jobs servicing the faults gets stuck behind the job that faulted. + */ +static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt) +{ + u32 logical_mask = 0; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(hwe, gt, id) { + if (hwe->class != XE_ENGINE_CLASS_COPY) + continue; + + if (!XE_WA(gt, 16017236439) || + xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1) + logical_mask |= BIT(hwe->logical_instance); + } + + return logical_mask; +} + +/** + * xe_migrate_init() - Initialize a migrate context + * @tile: Back-pointer to the tile we're initializing for. + * + * Return: Pointer to a migrate context on success. Error pointer on error. + */ +struct xe_migrate *xe_migrate_init(struct xe_tile *tile) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_gt *primary_gt = tile->primary_gt; + struct xe_migrate *m; + struct xe_vm *vm; + int err; + + m = drmm_kzalloc(&xe->drm, sizeof(*m), GFP_KERNEL); + if (!m) + return ERR_PTR(-ENOMEM); + + m->tile = tile; + + /* Special layout, prepared below.. */ + vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION | + XE_VM_FLAG_SET_TILE_ID(tile)); + if (IS_ERR(vm)) + return ERR_CAST(vm); + + xe_vm_lock(vm, false); + err = xe_migrate_prepare_vm(tile, m, vm); + xe_vm_unlock(vm); + if (err) { + xe_vm_close_and_put(vm); + return ERR_PTR(err); + } + + if (xe->info.has_usm) { + struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt, + XE_ENGINE_CLASS_COPY, + primary_gt->usm.reserved_bcs_instance, + false); + u32 logical_mask = xe_migrate_usm_logical_mask(primary_gt); + + if (!hwe || !logical_mask) + return ERR_PTR(-EINVAL); + + m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe, + EXEC_QUEUE_FLAG_KERNEL | + EXEC_QUEUE_FLAG_PERMANENT); + } else { + m->q = xe_exec_queue_create_class(xe, primary_gt, vm, + XE_ENGINE_CLASS_COPY, + EXEC_QUEUE_FLAG_KERNEL | + EXEC_QUEUE_FLAG_PERMANENT); + } + if (IS_ERR(m->q)) { + xe_vm_close_and_put(vm); + return ERR_CAST(m->q); + } + if (xe->info.has_usm) + m->q->priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; + + mutex_init(&m->job_mutex); + + err = drmm_add_action_or_reset(&xe->drm, xe_migrate_fini, m); + if (err) + return ERR_PTR(err); + + return m; +} + +static u64 max_mem_transfer_per_pass(struct xe_device *xe) +{ + if (!IS_DGFX(xe) && xe_device_has_flat_ccs(xe)) + return MAX_CCS_LIMITED_TRANSFER; + + return MAX_PREEMPTDISABLE_TRANSFER; +} + +static u64 xe_migrate_res_sizes(struct xe_device *xe, struct xe_res_cursor *cur) +{ + /* + * For VRAM we use identity mapped pages so we are limited to current + * cursor size. For system we program the pages ourselves so we have no + * such limitation. + */ + return min_t(u64, max_mem_transfer_per_pass(xe), + mem_type_is_vram(cur->mem_type) ? cur->size : + cur->remaining); +} + +static u32 pte_update_size(struct xe_migrate *m, + bool is_vram, + struct ttm_resource *res, + struct xe_res_cursor *cur, + u64 *L0, u64 *L0_ofs, u32 *L0_pt, + u32 cmd_size, u32 pt_ofs, u32 avail_pts) +{ + u32 cmds = 0; + + *L0_pt = pt_ofs; + if (!is_vram) { + /* Clip L0 to available size */ + u64 size = min(*L0, (u64)avail_pts * SZ_2M); + u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE); + + *L0 = size; + *L0_ofs = xe_migrate_vm_addr(pt_ofs, 0); + + /* MI_STORE_DATA_IMM */ + cmds += 3 * DIV_ROUND_UP(num_4k_pages, 0x1ff); + + /* PDE qwords */ + cmds += num_4k_pages * 2; + + /* Each chunk has a single blit command */ + cmds += cmd_size; + } else { + /* Offset into identity map. */ + *L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile), + cur->start + vram_region_gpu_offset(res)); + cmds += cmd_size; + } + + return cmds; +} + +static void emit_pte(struct xe_migrate *m, + struct xe_bb *bb, u32 at_pt, + bool is_vram, bool is_comp_pte, + struct xe_res_cursor *cur, + u32 size, struct xe_bo *bo) +{ + struct xe_device *xe = tile_to_xe(m->tile); + + u16 pat_index; + u32 ptes; + u64 ofs = at_pt * XE_PAGE_SIZE; + u64 cur_ofs; + + /* Indirect access needs compression enabled uncached PAT index */ + if (GRAPHICS_VERx100(xe) >= 2000) + pat_index = is_comp_pte ? xe->pat.idx[XE_CACHE_NONE_COMPRESSION] : + xe->pat.idx[XE_CACHE_NONE]; + else + pat_index = xe->pat.idx[XE_CACHE_WB]; + + /* + * FIXME: Emitting VRAM PTEs to L0 PTs is forbidden. Currently + * we're only emitting VRAM PTEs during sanity tests, so when + * that's moved to a Kunit test, we should condition VRAM PTEs + * on running tests. + */ + + ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); + + while (ptes) { + u32 chunk = min(0x1ffU, ptes); + + bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); + bb->cs[bb->len++] = ofs; + bb->cs[bb->len++] = 0; + + cur_ofs = ofs; + ofs += chunk * 8; + ptes -= chunk; + + while (chunk--) { + u64 addr, flags = 0; + bool devmem = false; + + addr = xe_res_dma(cur) & PAGE_MASK; + if (is_vram) { + /* Is this a 64K PTE entry? */ + if ((m->q->vm->flags & XE_VM_FLAG_64K) && + !(cur_ofs & (16 * 8 - 1))) { + xe_tile_assert(m->tile, IS_ALIGNED(addr, SZ_64K)); + flags |= XE_PTE_PS64; + } + + addr += vram_region_gpu_offset(bo->ttm.resource); + devmem = true; + } + + addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe, + addr, pat_index, + 0, devmem, flags); + bb->cs[bb->len++] = lower_32_bits(addr); + bb->cs[bb->len++] = upper_32_bits(addr); + + xe_res_next(cur, min_t(u32, size, PAGE_SIZE)); + cur_ofs += 8; + } + } +} + +#define EMIT_COPY_CCS_DW 5 +static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb, + u64 dst_ofs, bool dst_is_indirect, + u64 src_ofs, bool src_is_indirect, + u32 size) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 *cs = bb->cs + bb->len; + u32 num_ccs_blks; + u32 num_pages; + u32 ccs_copy_size; + u32 mocs; + + if (GRAPHICS_VERx100(xe) >= 2000) { + num_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE); + xe_gt_assert(gt, FIELD_FIT(XE2_CCS_SIZE_MASK, num_pages - 1)); + + ccs_copy_size = REG_FIELD_PREP(XE2_CCS_SIZE_MASK, num_pages - 1); + mocs = FIELD_PREP(XE2_XY_CTRL_SURF_MOCS_INDEX_MASK, gt->mocs.uc_index); + + } else { + num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size), + NUM_CCS_BYTES_PER_BLOCK); + xe_gt_assert(gt, FIELD_FIT(CCS_SIZE_MASK, num_ccs_blks - 1)); + + ccs_copy_size = REG_FIELD_PREP(CCS_SIZE_MASK, num_ccs_blks - 1); + mocs = FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, gt->mocs.uc_index); + } + + *cs++ = XY_CTRL_SURF_COPY_BLT | + (src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT | + (dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT | + ccs_copy_size; + *cs++ = lower_32_bits(src_ofs); + *cs++ = upper_32_bits(src_ofs) | mocs; + *cs++ = lower_32_bits(dst_ofs); + *cs++ = upper_32_bits(dst_ofs) | mocs; + + bb->len = cs - bb->cs; +} + +#define EMIT_COPY_DW 10 +static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, + u64 src_ofs, u64 dst_ofs, unsigned int size, + unsigned int pitch) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 mocs = 0; + u32 tile_y = 0; + + xe_gt_assert(gt, size / pitch <= S16_MAX); + xe_gt_assert(gt, pitch / 4 <= S16_MAX); + xe_gt_assert(gt, pitch <= U16_MAX); + + if (GRAPHICS_VER(xe) >= 20) + mocs = FIELD_PREP(XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index); + + if (GRAPHICS_VERx100(xe) >= 1250) + tile_y = XY_FAST_COPY_BLT_D1_SRC_TILE4 | XY_FAST_COPY_BLT_D1_DST_TILE4; + + bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2); + bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch | tile_y | mocs; + bb->cs[bb->len++] = 0; + bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4; + bb->cs[bb->len++] = lower_32_bits(dst_ofs); + bb->cs[bb->len++] = upper_32_bits(dst_ofs); + bb->cs[bb->len++] = 0; + bb->cs[bb->len++] = pitch | mocs; + bb->cs[bb->len++] = lower_32_bits(src_ofs); + bb->cs[bb->len++] = upper_32_bits(src_ofs); +} + +static int job_add_deps(struct xe_sched_job *job, struct dma_resv *resv, + enum dma_resv_usage usage) +{ + return drm_sched_job_add_resv_dependencies(&job->drm, resv, usage); +} + +static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm) +{ + return usm ? m->usm_batch_base_ofs : m->batch_base_ofs; +} + +static u32 xe_migrate_ccs_copy(struct xe_migrate *m, + struct xe_bb *bb, + u64 src_ofs, bool src_is_indirect, + u64 dst_ofs, bool dst_is_indirect, u32 dst_size, + u64 ccs_ofs, bool copy_ccs) +{ + struct xe_gt *gt = m->tile->primary_gt; + u32 flush_flags = 0; + + if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) { + /* + * If the src is already in vram, then it should already + * have been cleared by us, or has been populated by the + * user. Make sure we copy the CCS aux state as-is. + * + * Otherwise if the bo doesn't have any CCS metadata attached, + * we still need to clear it for security reasons. + */ + u64 ccs_src_ofs = src_is_indirect ? src_ofs : m->cleared_mem_ofs; + + emit_copy_ccs(gt, bb, + dst_ofs, true, + ccs_src_ofs, src_is_indirect, dst_size); + + flush_flags = MI_FLUSH_DW_CCS; + } else if (copy_ccs) { + if (!src_is_indirect) + src_ofs = ccs_ofs; + else if (!dst_is_indirect) + dst_ofs = ccs_ofs; + + xe_gt_assert(gt, src_is_indirect || dst_is_indirect); + + emit_copy_ccs(gt, bb, dst_ofs, dst_is_indirect, src_ofs, + src_is_indirect, dst_size); + if (dst_is_indirect) + flush_flags = MI_FLUSH_DW_CCS; + } + + return flush_flags; +} + +/** + * xe_migrate_copy() - Copy content of TTM resources. + * @m: The migration context. + * @src_bo: The buffer object @src is currently bound to. + * @dst_bo: If copying between resources created for the same bo, set this to + * the same value as @src_bo. If copying between buffer objects, set it to + * the buffer object @dst is currently bound to. + * @src: The source TTM resource. + * @dst: The dst TTM resource. + * @copy_only_ccs: If true copy only CCS metadata + * + * Copies the contents of @src to @dst: On flat CCS devices, + * the CCS metadata is copied as well if needed, or if not present, + * the CCS metadata of @dst is cleared for security reasons. + * + * Return: Pointer to a dma_fence representing the last copy batch, or + * an error pointer on failure. If there is a failure, any copy operation + * started by the function call has been synced. + */ +struct dma_fence *xe_migrate_copy(struct xe_migrate *m, + struct xe_bo *src_bo, + struct xe_bo *dst_bo, + struct ttm_resource *src, + struct ttm_resource *dst, + bool copy_only_ccs) +{ + struct xe_gt *gt = m->tile->primary_gt; + struct xe_device *xe = gt_to_xe(gt); + struct dma_fence *fence = NULL; + u64 size = src_bo->size; + struct xe_res_cursor src_it, dst_it, ccs_it; + u64 src_L0_ofs, dst_L0_ofs; + u32 src_L0_pt, dst_L0_pt; + u64 src_L0, dst_L0; + int pass = 0; + int err; + bool src_is_pltt = src->mem_type == XE_PL_TT; + bool dst_is_pltt = dst->mem_type == XE_PL_TT; + bool src_is_vram = mem_type_is_vram(src->mem_type); + bool dst_is_vram = mem_type_is_vram(dst->mem_type); + bool copy_ccs = xe_device_has_flat_ccs(xe) && + xe_bo_needs_ccs_pages(src_bo) && xe_bo_needs_ccs_pages(dst_bo); + bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram); + + /* Copying CCS between two different BOs is not supported yet. */ + if (XE_WARN_ON(copy_ccs && src_bo != dst_bo)) + return ERR_PTR(-EINVAL); + + if (src_bo != dst_bo && XE_WARN_ON(src_bo->size != dst_bo->size)) + return ERR_PTR(-EINVAL); + + if (!src_is_vram) + xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it); + else + xe_res_first(src, 0, size, &src_it); + if (!dst_is_vram) + xe_res_first_sg(xe_bo_sg(dst_bo), 0, size, &dst_it); + else + xe_res_first(dst, 0, size, &dst_it); + + if (copy_system_ccs) + xe_res_first_sg(xe_bo_sg(src_bo), xe_bo_ccs_pages_start(src_bo), + PAGE_ALIGN(xe_device_ccs_bytes(xe, size)), + &ccs_it); + + while (size) { + u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */ + struct xe_sched_job *job; + struct xe_bb *bb; + u32 flush_flags; + u32 update_idx; + u64 ccs_ofs, ccs_size; + u32 ccs_pt; + + bool usm = xe->info.has_usm; + u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; + + src_L0 = xe_migrate_res_sizes(xe, &src_it); + dst_L0 = xe_migrate_res_sizes(xe, &dst_it); + + drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n", + pass++, src_L0, dst_L0); + + src_L0 = min(src_L0, dst_L0); + + batch_size += pte_update_size(m, src_is_vram, src, &src_it, &src_L0, + &src_L0_ofs, &src_L0_pt, 0, 0, + avail_pts); + + batch_size += pte_update_size(m, dst_is_vram, dst, &dst_it, &src_L0, + &dst_L0_ofs, &dst_L0_pt, 0, + avail_pts, avail_pts); + + if (copy_system_ccs) { + ccs_size = xe_device_ccs_bytes(xe, src_L0); + batch_size += pte_update_size(m, false, NULL, &ccs_it, &ccs_size, + &ccs_ofs, &ccs_pt, 0, + 2 * avail_pts, + avail_pts); + } + + /* Add copy commands size here */ + batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) + + ((xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0)); + + bb = xe_bb_new(gt, batch_size, usm); + if (IS_ERR(bb)) { + err = PTR_ERR(bb); + goto err_sync; + } + + if (!src_is_vram) + emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0, + src_bo); + else + xe_res_next(&src_it, src_L0); + + if (!dst_is_vram) + emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0, + dst_bo); + else + xe_res_next(&dst_it, src_L0); + + if (copy_system_ccs) + emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src_bo); + + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + update_idx = bb->len; + + if (!copy_only_ccs) + emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE); + + flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, + IS_DGFX(xe) ? src_is_vram : src_is_pltt, + dst_L0_ofs, + IS_DGFX(xe) ? dst_is_vram : dst_is_pltt, + src_L0, ccs_ofs, copy_ccs); + + mutex_lock(&m->job_mutex); + job = xe_bb_create_migration_job(m->q, bb, + xe_migrate_batch_base(m, usm), + update_idx); + if (IS_ERR(job)) { + err = PTR_ERR(job); + goto err; + } + + xe_sched_job_add_migrate_flush(job, flush_flags); + if (!fence) { + err = job_add_deps(job, src_bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP); + if (!err && src_bo != dst_bo) + err = job_add_deps(job, dst_bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP); + if (err) + goto err_job; + } + + xe_sched_job_arm(job); + dma_fence_put(fence); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + dma_fence_put(m->fence); + m->fence = dma_fence_get(fence); + + mutex_unlock(&m->job_mutex); + + xe_bb_free(bb, fence); + size -= src_L0; + continue; + +err_job: + xe_sched_job_put(job); +err: + mutex_unlock(&m->job_mutex); + xe_bb_free(bb, NULL); + +err_sync: + /* Sync partial copy if any. FIXME: under job_mutex? */ + if (fence) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + } + + return ERR_PTR(err); + } + + return fence; +} + +static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, + u32 size, u32 pitch) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 *cs = bb->cs + bb->len; + u32 len = PVC_MEM_SET_CMD_LEN_DW; + + *cs++ = PVC_MEM_SET_CMD | PVC_MEM_SET_MATRIX | (len - 2); + *cs++ = pitch - 1; + *cs++ = (size / pitch) - 1; + *cs++ = pitch - 1; + *cs++ = lower_32_bits(src_ofs); + *cs++ = upper_32_bits(src_ofs); + if (GRAPHICS_VERx100(xe) >= 2000) + *cs++ = FIELD_PREP(XE2_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index); + else + *cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index); + + xe_gt_assert(gt, cs - bb->cs == len + bb->len); + + bb->len += len; +} + +static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb, + u64 src_ofs, u32 size, u32 pitch, bool is_vram) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 *cs = bb->cs + bb->len; + u32 len = XY_FAST_COLOR_BLT_DW; + + if (GRAPHICS_VERx100(xe) < 1250) + len = 11; + + *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 | + (len - 2); + if (GRAPHICS_VERx100(xe) >= 2000) + *cs++ = FIELD_PREP(XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index) | + (pitch - 1); + else + *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, gt->mocs.uc_index) | + (pitch - 1); + *cs++ = 0; + *cs++ = (size / pitch) << 16 | pitch / 4; + *cs++ = lower_32_bits(src_ofs); + *cs++ = upper_32_bits(src_ofs); + *cs++ = (is_vram ? 0x0 : 0x1) << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + + if (len > 11) { + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + } + + xe_gt_assert(gt, cs - bb->cs == len + bb->len); + + bb->len += len; +} + +static bool has_service_copy_support(struct xe_gt *gt) +{ + /* + * What we care about is whether the architecture was designed with + * service copy functionality (specifically the new MEM_SET / MEM_COPY + * instructions) so check the architectural engine list rather than the + * actual list since these instructions are usable on BCS0 even if + * all of the actual service copy engines (BCS1-BCS8) have been fused + * off. + */ + return gt->info.__engine_mask & GENMASK(XE_HW_ENGINE_BCS8, + XE_HW_ENGINE_BCS1); +} + +static u32 emit_clear_cmd_len(struct xe_gt *gt) +{ + if (has_service_copy_support(gt)) + return PVC_MEM_SET_CMD_LEN_DW; + else + return XY_FAST_COLOR_BLT_DW; +} + +static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, + u32 size, u32 pitch, bool is_vram) +{ + if (has_service_copy_support(gt)) + emit_clear_link_copy(gt, bb, src_ofs, size, pitch); + else + emit_clear_main_copy(gt, bb, src_ofs, size, pitch, + is_vram); +} + +/** + * xe_migrate_clear() - Copy content of TTM resources. + * @m: The migration context. + * @bo: The buffer object @dst is currently bound to. + * @dst: The dst TTM resource to be cleared. + * + * Clear the contents of @dst to zero. On flat CCS devices, + * the CCS metadata is cleared to zero as well on VRAM destinations. + * TODO: Eliminate the @bo argument. + * + * Return: Pointer to a dma_fence representing the last clear batch, or + * an error pointer on failure. If there is a failure, any clear operation + * started by the function call has been synced. + */ +struct dma_fence *xe_migrate_clear(struct xe_migrate *m, + struct xe_bo *bo, + struct ttm_resource *dst) +{ + bool clear_vram = mem_type_is_vram(dst->mem_type); + struct xe_gt *gt = m->tile->primary_gt; + struct xe_device *xe = gt_to_xe(gt); + bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) && !IS_DGFX(xe)) ? true : false; + struct dma_fence *fence = NULL; + u64 size = bo->size; + struct xe_res_cursor src_it; + struct ttm_resource *src = dst; + int err; + int pass = 0; + + if (!clear_vram) + xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it); + else + xe_res_first(src, 0, bo->size, &src_it); + + while (size) { + u64 clear_L0_ofs; + u32 clear_L0_pt; + u32 flush_flags = 0; + u64 clear_L0; + struct xe_sched_job *job; + struct xe_bb *bb; + u32 batch_size, update_idx; + + bool usm = xe->info.has_usm; + u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; + + clear_L0 = xe_migrate_res_sizes(xe, &src_it); + + drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0); + + /* Calculate final sizes and batch size.. */ + batch_size = 2 + + pte_update_size(m, clear_vram, src, &src_it, + &clear_L0, &clear_L0_ofs, &clear_L0_pt, + clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0, + avail_pts); + + if (xe_device_has_flat_ccs(xe)) + batch_size += EMIT_COPY_CCS_DW; + + /* Clear commands */ + + if (WARN_ON_ONCE(!clear_L0)) + break; + + bb = xe_bb_new(gt, batch_size, usm); + if (IS_ERR(bb)) { + err = PTR_ERR(bb); + goto err_sync; + } + + size -= clear_L0; + /* Preemption is enabled again by the ring ops. */ + if (!clear_vram) { + emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0, + bo); + } else { + xe_res_next(&src_it, clear_L0); + } + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + update_idx = bb->len; + + if (!clear_system_ccs) + emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram); + + if (xe_device_has_flat_ccs(xe)) { + emit_copy_ccs(gt, bb, clear_L0_ofs, true, + m->cleared_mem_ofs, false, clear_L0); + flush_flags = MI_FLUSH_DW_CCS; + } + + mutex_lock(&m->job_mutex); + job = xe_bb_create_migration_job(m->q, bb, + xe_migrate_batch_base(m, usm), + update_idx); + if (IS_ERR(job)) { + err = PTR_ERR(job); + goto err; + } + + xe_sched_job_add_migrate_flush(job, flush_flags); + if (!fence) { + /* + * There can't be anything userspace related at this + * point, so we just need to respect any potential move + * fences, which are always tracked as + * DMA_RESV_USAGE_KERNEL. + */ + err = job_add_deps(job, bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL); + if (err) + goto err_job; + } + + xe_sched_job_arm(job); + dma_fence_put(fence); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + dma_fence_put(m->fence); + m->fence = dma_fence_get(fence); + + mutex_unlock(&m->job_mutex); + + xe_bb_free(bb, fence); + continue; + +err_job: + xe_sched_job_put(job); +err: + mutex_unlock(&m->job_mutex); + xe_bb_free(bb, NULL); +err_sync: + /* Sync partial copies if any. FIXME: job_mutex? */ + if (fence) { + dma_fence_wait(m->fence, false); + dma_fence_put(fence); + } + + return ERR_PTR(err); + } + + if (clear_system_ccs) + bo->ccs_cleared = true; + + return fence; +} + +static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs, + const struct xe_vm_pgtable_update *update, + struct xe_migrate_pt_update *pt_update) +{ + const struct xe_migrate_pt_update_ops *ops = pt_update->ops; + u32 chunk; + u32 ofs = update->ofs, size = update->qwords; + + /* + * If we have 512 entries (max), we would populate it ourselves, + * and update the PDE above it to the new pointer. + * The only time this can only happen if we have to update the top + * PDE. This requires a BO that is almost vm->size big. + * + * This shouldn't be possible in practice.. might change when 16K + * pages are used. Hence the assert. + */ + xe_tile_assert(tile, update->qwords <= 0x1ff); + if (!ppgtt_ofs) + ppgtt_ofs = xe_migrate_vram_ofs(tile_to_xe(tile), + xe_bo_addr(update->pt_bo, 0, + XE_PAGE_SIZE)); + + do { + u64 addr = ppgtt_ofs + ofs * 8; + + chunk = min(update->qwords, 0x1ffU); + + /* Ensure populatefn can do memset64 by aligning bb->cs */ + if (!(bb->len & 1)) + bb->cs[bb->len++] = MI_NOOP; + + bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); + bb->cs[bb->len++] = lower_32_bits(addr); + bb->cs[bb->len++] = upper_32_bits(addr); + ops->populate(pt_update, tile, NULL, bb->cs + bb->len, ofs, chunk, + update); + + bb->len += chunk * 2; + ofs += chunk; + size -= chunk; + } while (size); +} + +struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m) +{ + return xe_vm_get(m->q->vm); +} + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +struct migrate_test_params { + struct xe_test_priv base; + bool force_gpu; +}; + +#define to_migrate_test_params(_priv) \ + container_of(_priv, struct migrate_test_params, base) +#endif + +static struct dma_fence * +xe_migrate_update_pgtables_cpu(struct xe_migrate *m, + struct xe_vm *vm, struct xe_bo *bo, + const struct xe_vm_pgtable_update *updates, + u32 num_updates, bool wait_vm, + struct xe_migrate_pt_update *pt_update) +{ + XE_TEST_DECLARE(struct migrate_test_params *test = + to_migrate_test_params + (xe_cur_kunit_priv(XE_TEST_LIVE_MIGRATE));) + const struct xe_migrate_pt_update_ops *ops = pt_update->ops; + struct dma_fence *fence; + int err; + u32 i; + + if (XE_TEST_ONLY(test && test->force_gpu)) + return ERR_PTR(-ETIME); + + if (bo && !dma_resv_test_signaled(bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL)) + return ERR_PTR(-ETIME); + + if (wait_vm && !dma_resv_test_signaled(xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP)) + return ERR_PTR(-ETIME); + + if (ops->pre_commit) { + pt_update->job = NULL; + err = ops->pre_commit(pt_update); + if (err) + return ERR_PTR(err); + } + for (i = 0; i < num_updates; i++) { + const struct xe_vm_pgtable_update *update = &updates[i]; + + ops->populate(pt_update, m->tile, &update->pt_bo->vmap, NULL, + update->ofs, update->qwords, update); + } + + if (vm) { + trace_xe_vm_cpu_bind(vm); + xe_device_wmb(vm->xe); + } + + fence = dma_fence_get_stub(); + + return fence; +} + +static bool no_in_syncs(struct xe_vm *vm, struct xe_exec_queue *q, + struct xe_sync_entry *syncs, u32 num_syncs) +{ + struct dma_fence *fence; + int i; + + for (i = 0; i < num_syncs; i++) { + fence = syncs[i].fence; + + if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &fence->flags)) + return false; + } + if (q) { + fence = xe_exec_queue_last_fence_get(q, vm); + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + return false; + } + + return true; +} + +/** + * xe_migrate_update_pgtables() - Pipelined page-table update + * @m: The migrate context. + * @vm: The vm we'll be updating. + * @bo: The bo whose dma-resv we will await before updating, or NULL if userptr. + * @q: The exec queue to be used for the update or NULL if the default + * migration engine is to be used. + * @updates: An array of update descriptors. + * @num_updates: Number of descriptors in @updates. + * @syncs: Array of xe_sync_entry to await before updating. Note that waits + * will block the engine timeline. + * @num_syncs: Number of entries in @syncs. + * @pt_update: Pointer to a struct xe_migrate_pt_update, which contains + * pointers to callback functions and, if subclassed, private arguments to + * those. + * + * Perform a pipelined page-table update. The update descriptors are typically + * built under the same lock critical section as a call to this function. If + * using the default engine for the updates, they will be performed in the + * order they grab the job_mutex. If different engines are used, external + * synchronization is needed for overlapping updates to maintain page-table + * consistency. Note that the meaing of "overlapping" is that the updates + * touch the same page-table, which might be a higher-level page-directory. + * If no pipelining is needed, then updates may be performed by the cpu. + * + * Return: A dma_fence that, when signaled, indicates the update completion. + */ +struct dma_fence * +xe_migrate_update_pgtables(struct xe_migrate *m, + struct xe_vm *vm, + struct xe_bo *bo, + struct xe_exec_queue *q, + const struct xe_vm_pgtable_update *updates, + u32 num_updates, + struct xe_sync_entry *syncs, u32 num_syncs, + struct xe_migrate_pt_update *pt_update) +{ + const struct xe_migrate_pt_update_ops *ops = pt_update->ops; + struct xe_tile *tile = m->tile; + struct xe_gt *gt = tile->primary_gt; + struct xe_device *xe = tile_to_xe(tile); + struct xe_sched_job *job; + struct dma_fence *fence; + struct drm_suballoc *sa_bo = NULL; + struct xe_vma *vma = pt_update->vma; + struct xe_bb *bb; + u32 i, batch_size, ppgtt_ofs, update_idx, page_ofs = 0; + u64 addr; + int err = 0; + bool usm = !q && xe->info.has_usm; + bool first_munmap_rebind = vma && + vma->gpuva.flags & XE_VMA_FIRST_REBIND; + struct xe_exec_queue *q_override = !q ? m->q : q; + u16 pat_index = xe->pat.idx[XE_CACHE_WB]; + + /* Use the CPU if no in syncs and engine is idle */ + if (no_in_syncs(vm, q, syncs, num_syncs) && xe_exec_queue_is_idle(q_override)) { + fence = xe_migrate_update_pgtables_cpu(m, vm, bo, updates, + num_updates, + first_munmap_rebind, + pt_update); + if (!IS_ERR(fence) || fence == ERR_PTR(-EAGAIN)) + return fence; + } + + /* fixed + PTE entries */ + if (IS_DGFX(xe)) + batch_size = 2; + else + batch_size = 6 + num_updates * 2; + + for (i = 0; i < num_updates; i++) { + u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, 0x1ff); + + /* align noop + MI_STORE_DATA_IMM cmd prefix */ + batch_size += 4 * num_cmds + updates[i].qwords * 2; + } + + /* + * XXX: Create temp bo to copy from, if batch_size becomes too big? + * + * Worst case: Sum(2 * (each lower level page size) + (top level page size)) + * Should be reasonably bound.. + */ + xe_tile_assert(tile, batch_size < SZ_128K); + + bb = xe_bb_new(gt, batch_size, !q && xe->info.has_usm); + if (IS_ERR(bb)) + return ERR_CAST(bb); + + /* For sysmem PTE's, need to map them in our hole.. */ + if (!IS_DGFX(xe)) { + ppgtt_ofs = NUM_KERNEL_PDE - 1; + if (q) { + xe_tile_assert(tile, num_updates <= NUM_VMUSA_WRITES_PER_UNIT); + + sa_bo = drm_suballoc_new(&m->vm_update_sa, 1, + GFP_KERNEL, true, 0); + if (IS_ERR(sa_bo)) { + err = PTR_ERR(sa_bo); + goto err; + } + + ppgtt_ofs = NUM_KERNEL_PDE + + (drm_suballoc_soffset(sa_bo) / + NUM_VMUSA_UNIT_PER_PAGE); + page_ofs = (drm_suballoc_soffset(sa_bo) % + NUM_VMUSA_UNIT_PER_PAGE) * + VM_SA_UPDATE_UNIT_SIZE; + } + + /* Map our PT's to gtt */ + bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(num_updates); + bb->cs[bb->len++] = ppgtt_ofs * XE_PAGE_SIZE + page_ofs; + bb->cs[bb->len++] = 0; /* upper_32_bits */ + + for (i = 0; i < num_updates; i++) { + struct xe_bo *pt_bo = updates[i].pt_bo; + + xe_tile_assert(tile, pt_bo->size == SZ_4K); + + addr = vm->pt_ops->pte_encode_bo(pt_bo, 0, pat_index, 0); + bb->cs[bb->len++] = lower_32_bits(addr); + bb->cs[bb->len++] = upper_32_bits(addr); + } + + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + update_idx = bb->len; + + addr = xe_migrate_vm_addr(ppgtt_ofs, 0) + + (page_ofs / sizeof(u64)) * XE_PAGE_SIZE; + for (i = 0; i < num_updates; i++) + write_pgtable(tile, bb, addr + i * XE_PAGE_SIZE, + &updates[i], pt_update); + } else { + /* phys pages, no preamble required */ + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + update_idx = bb->len; + + for (i = 0; i < num_updates; i++) + write_pgtable(tile, bb, 0, &updates[i], pt_update); + } + + if (!q) + mutex_lock(&m->job_mutex); + + job = xe_bb_create_migration_job(q ?: m->q, bb, + xe_migrate_batch_base(m, usm), + update_idx); + if (IS_ERR(job)) { + err = PTR_ERR(job); + goto err_bb; + } + + /* Wait on BO move */ + if (bo) { + err = job_add_deps(job, bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL); + if (err) + goto err_job; + } + + /* + * Munmap style VM unbind, need to wait for all jobs to be complete / + * trigger preempts before moving forward + */ + if (first_munmap_rebind) { + err = job_add_deps(job, xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP); + if (err) + goto err_job; + } + + err = xe_sched_job_last_fence_add_dep(job, vm); + for (i = 0; !err && i < num_syncs; i++) + err = xe_sync_entry_add_deps(&syncs[i], job); + + if (err) + goto err_job; + + if (ops->pre_commit) { + pt_update->job = job; + err = ops->pre_commit(pt_update); + if (err) + goto err_job; + } + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + if (!q) + mutex_unlock(&m->job_mutex); + + xe_bb_free(bb, fence); + drm_suballoc_free(sa_bo, fence); + + return fence; + +err_job: + xe_sched_job_put(job); +err_bb: + if (!q) + mutex_unlock(&m->job_mutex); + xe_bb_free(bb, NULL); +err: + drm_suballoc_free(sa_bo, NULL); + return ERR_PTR(err); +} + +/** + * xe_migrate_wait() - Complete all operations using the xe_migrate context + * @m: Migrate context to wait for. + * + * Waits until the GPU no longer uses the migrate context's default engine + * or its page-table objects. FIXME: What about separate page-table update + * engines? + */ +void xe_migrate_wait(struct xe_migrate *m) +{ + if (m->fence) + dma_fence_wait(m->fence, false); +} + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_migrate.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h new file mode 100644 index 000000000000..951f19318ea4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef _XE_MIGRATE_ +#define _XE_MIGRATE_ + +#include <drm/drm_mm.h> + +struct dma_fence; +struct iosys_map; +struct ttm_resource; + +struct xe_bo; +struct xe_gt; +struct xe_exec_queue; +struct xe_migrate; +struct xe_migrate_pt_update; +struct xe_sync_entry; +struct xe_pt; +struct xe_tile; +struct xe_vm; +struct xe_vm_pgtable_update; +struct xe_vma; + +/** + * struct xe_migrate_pt_update_ops - Callbacks for the + * xe_migrate_update_pgtables() function. + */ +struct xe_migrate_pt_update_ops { + /** + * @populate: Populate a command buffer or page-table with ptes. + * @pt_update: Embeddable callback argument. + * @tile: The tile for the current operation. + * @map: struct iosys_map into the memory to be populated. + * @pos: If @map is NULL, map into the memory to be populated. + * @ofs: qword offset into @map, unused if @map is NULL. + * @num_qwords: Number of qwords to write. + * @update: Information about the PTEs to be inserted. + * + * This interface is intended to be used as a callback into the + * page-table system to populate command buffers or shared + * page-tables with PTEs. + */ + void (*populate)(struct xe_migrate_pt_update *pt_update, + struct xe_tile *tile, struct iosys_map *map, + void *pos, u32 ofs, u32 num_qwords, + const struct xe_vm_pgtable_update *update); + + /** + * @pre_commit: Callback to be called just before arming the + * sched_job. + * @pt_update: Pointer to embeddable callback argument. + * + * Return: 0 on success, negative error code on error. + */ + int (*pre_commit)(struct xe_migrate_pt_update *pt_update); +}; + +/** + * struct xe_migrate_pt_update - Argument to the + * struct xe_migrate_pt_update_ops callbacks. + * + * Intended to be subclassed to support additional arguments if necessary. + */ +struct xe_migrate_pt_update { + /** @ops: Pointer to the struct xe_migrate_pt_update_ops callbacks */ + const struct xe_migrate_pt_update_ops *ops; + /** @vma: The vma we're updating the pagetable for. */ + struct xe_vma *vma; + /** @job: The job if a GPU page-table update. NULL otherwise */ + struct xe_sched_job *job; + /** @start: Start of update for the range fence */ + u64 start; + /** @last: Last of update for the range fence */ + u64 last; + /** @tile_id: Tile ID of the update */ + u8 tile_id; +}; + +struct xe_migrate *xe_migrate_init(struct xe_tile *tile); + +struct dma_fence *xe_migrate_copy(struct xe_migrate *m, + struct xe_bo *src_bo, + struct xe_bo *dst_bo, + struct ttm_resource *src, + struct ttm_resource *dst, + bool copy_only_ccs); + +struct dma_fence *xe_migrate_clear(struct xe_migrate *m, + struct xe_bo *bo, + struct ttm_resource *dst); + +struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m); + +struct dma_fence * +xe_migrate_update_pgtables(struct xe_migrate *m, + struct xe_vm *vm, + struct xe_bo *bo, + struct xe_exec_queue *q, + const struct xe_vm_pgtable_update *updates, + u32 num_updates, + struct xe_sync_entry *syncs, u32 num_syncs, + struct xe_migrate_pt_update *pt_update); + +void xe_migrate_wait(struct xe_migrate *m); + +struct xe_exec_queue *xe_tile_migrate_engine(struct xe_tile *tile); +#endif diff --git a/drivers/gpu/drm/xe/xe_migrate_doc.h b/drivers/gpu/drm/xe/xe_migrate_doc.h new file mode 100644 index 000000000000..63c7d67b5b62 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_migrate_doc.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_MIGRATE_DOC_H_ +#define _XE_MIGRATE_DOC_H_ + +/** + * DOC: Migrate Layer + * + * The XE migrate layer is used generate jobs which can copy memory (eviction), + * clear memory, or program tables (binds). This layer exists in every GT, has + * a migrate engine, and uses a special VM for all generated jobs. + * + * Special VM details + * ================== + * + * The special VM is configured with a page structure where we can dynamically + * map BOs which need to be copied and cleared, dynamically map other VM's page + * table BOs for updates, and identity map the entire device's VRAM with 1 GB + * pages. + * + * Currently the page structure consists of 32 physical pages with 16 being + * reserved for BO mapping during copies and clear, 1 reserved for kernel binds, + * several pages are needed to setup the identity mappings (exact number based + * on how many bits of address space the device has), and the rest are reserved + * user bind operations. + * + * TODO: Diagram of layout + * + * Bind jobs + * ========= + * + * A bind job consist of two batches and runs either on the migrate engine + * (kernel binds) or the bind engine passed in (user binds). In both cases the + * VM of the engine is the migrate VM. + * + * The first batch is used to update the migration VM page structure to point to + * the bind VM page table BOs which need to be updated. A physical page is + * required for this. If it is a user bind, the page is allocated from pool of + * pages reserved user bind operations with drm_suballoc managing this pool. If + * it is a kernel bind, the page reserved for kernel binds is used. + * + * The first batch is only required for devices without VRAM as when the device + * has VRAM the bind VM page table BOs are in VRAM and the identity mapping can + * be used. + * + * The second batch is used to program page table updated in the bind VM. Why + * not just one batch? Well the TLBs need to be invalidated between these two + * batches and that only can be done from the ring. + * + * When the bind job complete, the page allocated is returned the pool of pages + * reserved for user bind operations if a user bind. No need do this for kernel + * binds as the reserved kernel page is serially used by each job. + * + * Copy / clear jobs + * ================= + * + * A copy or clear job consist of two batches and runs on the migrate engine. + * + * Like binds, the first batch is used update the migration VM page structure. + * In copy jobs, we need to map the source and destination of the BO into page + * the structure. In clear jobs, we just need to add 1 mapping of BO into the + * page structure. We use the 16 reserved pages in migration VM for mappings, + * this gives us a maximum copy size of 16 MB and maximum clear size of 32 MB. + * + * The second batch is used do either do the copy or clear. Again similar to + * binds, two batches are required as the TLBs need to be invalidated from the + * ring between the batches. + * + * More than one job will be generated if the BO is larger than maximum copy / + * clear size. + * + * Future work + * =========== + * + * Update copy and clear code to use identity mapped VRAM. + * + * Can we rework the use of the pages async binds to use all the entries in each + * page? + * + * Using large pages for sysmem mappings. + * + * Is it possible to identity map the sysmem? We should explore this. + */ + +#endif diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c new file mode 100644 index 000000000000..f660cfb79f50 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -0,0 +1,524 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021-2023 Intel Corporation + */ + +#include <linux/minmax.h> + +#include "xe_mmio.h" + +#include <drm/drm_managed.h> +#include <drm/xe_drm.h> + +#include "regs/xe_engine_regs.h" +#include "regs/xe_gt_regs.h" +#include "regs/xe_regs.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_ggtt.h" +#include "xe_gt.h" +#include "xe_gt_mcr.h" +#include "xe_macros.h" +#include "xe_module.h" +#include "xe_tile.h" + +#define XEHP_MTCFG_ADDR XE_REG(0x101800) +#define TILE_COUNT REG_GENMASK(15, 8) + +#define BAR_SIZE_SHIFT 20 + +static void +_resize_bar(struct xe_device *xe, int resno, resource_size_t size) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int bar_size = pci_rebar_bytes_to_size(size); + int ret; + + if (pci_resource_len(pdev, resno)) + pci_release_resource(pdev, resno); + + ret = pci_resize_resource(pdev, resno, bar_size); + if (ret) { + drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n", + resno, 1 << bar_size, ERR_PTR(ret)); + return; + } + + drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size); +} + +/* + * if force_vram_bar_size is set, attempt to set to the requested size + * else set to maximum possible size + */ +static void xe_resize_vram_bar(struct xe_device *xe) +{ + u64 force_vram_bar_size = xe_modparam.force_vram_bar_size; + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct pci_bus *root = pdev->bus; + resource_size_t current_size; + resource_size_t rebar_size; + struct resource *root_res; + u32 bar_size_mask; + u32 pci_cmd; + int i; + + /* gather some relevant info */ + current_size = pci_resource_len(pdev, LMEM_BAR); + bar_size_mask = pci_rebar_get_possible_sizes(pdev, LMEM_BAR); + + if (!bar_size_mask) + return; + + /* set to a specific size? */ + if (force_vram_bar_size) { + u32 bar_size_bit; + + rebar_size = force_vram_bar_size * (resource_size_t)SZ_1M; + + bar_size_bit = bar_size_mask & BIT(pci_rebar_bytes_to_size(rebar_size)); + + if (!bar_size_bit) { + drm_info(&xe->drm, + "Requested size: %lluMiB is not supported by rebar sizes: 0x%x. Leaving default: %lluMiB\n", + (u64)rebar_size >> 20, bar_size_mask, (u64)current_size >> 20); + return; + } + + rebar_size = 1ULL << (__fls(bar_size_bit) + BAR_SIZE_SHIFT); + + if (rebar_size == current_size) + return; + } else { + rebar_size = 1ULL << (__fls(bar_size_mask) + BAR_SIZE_SHIFT); + + /* only resize if larger than current */ + if (rebar_size <= current_size) + return; + } + + drm_info(&xe->drm, "Attempting to resize bar from %lluMiB -> %lluMiB\n", + (u64)current_size >> 20, (u64)rebar_size >> 20); + + while (root->parent) + root = root->parent; + + pci_bus_for_each_resource(root, root_res, i) { + if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && + root_res->start > 0x100000000ull) + break; + } + + if (!root_res) { + drm_info(&xe->drm, "Can't resize VRAM BAR - platform support is missing. Consider enabling 'Resizable BAR' support in your BIOS\n"); + return; + } + + pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); + pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); + + _resize_bar(xe, LMEM_BAR, rebar_size); + + pci_assign_unassigned_bus_resources(pdev->bus); + pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); +} + +static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar) +{ + if (!pci_resource_flags(pdev, bar)) + return false; + + if (pci_resource_flags(pdev, bar) & IORESOURCE_UNSET) + return false; + + if (!pci_resource_len(pdev, bar)) + return false; + + return true; +} + +static int xe_determine_lmem_bar_size(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + + if (!xe_pci_resource_valid(pdev, LMEM_BAR)) { + drm_err(&xe->drm, "pci resource is not valid\n"); + return -ENXIO; + } + + xe_resize_vram_bar(xe); + + xe->mem.vram.io_start = pci_resource_start(pdev, LMEM_BAR); + xe->mem.vram.io_size = pci_resource_len(pdev, LMEM_BAR); + if (!xe->mem.vram.io_size) + return -EIO; + + /* XXX: Need to change when xe link code is ready */ + xe->mem.vram.dpa_base = 0; + + /* set up a map to the total memory area. */ + xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size); + + return 0; +} + +/** + * xe_mmio_tile_vram_size() - Collect vram size and offset information + * @tile: tile to get info for + * @vram_size: available vram (size - device reserved portions) + * @tile_size: actual vram size + * @tile_offset: physical start point in the vram address space + * + * There are 4 places for size information: + * - io size (from pci_resource_len of LMEM bar) (only used for small bar and DG1) + * - TILEx size (actual vram size) + * - GSMBASE offset (TILEx - "stolen") + * - CSSBASE offset (TILEx - CSS space necessary) + * + * CSSBASE is always a lower/smaller offset then GSMBASE. + * + * The actual available size of memory is to the CCS or GSM base. + * NOTE: multi-tile bases will include the tile offset. + * + */ +static int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, + u64 *tile_size, u64 *tile_offset) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_gt *gt = tile->primary_gt; + u64 offset; + int err; + u32 reg; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return err; + + /* actual size */ + if (unlikely(xe->info.platform == XE_DG1)) { + *tile_size = pci_resource_len(to_pci_dev(xe->drm.dev), LMEM_BAR); + *tile_offset = 0; + } else { + reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id)); + *tile_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G; + *tile_offset = (u64)REG_FIELD_GET(GENMASK(7, 1), reg) * SZ_1G; + } + + /* minus device usage */ + if (xe->info.has_flat_ccs) { + reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); + offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K; + } else { + offset = xe_mmio_read64_2x32(gt, GSMBASE); + } + + /* remove the tile offset so we have just the available size */ + *vram_size = offset - *tile_offset; + + return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} + +int xe_mmio_probe_vram(struct xe_device *xe) +{ + struct xe_tile *tile; + resource_size_t io_size; + u64 available_size = 0; + u64 total_size = 0; + u64 tile_offset; + u64 tile_size; + u64 vram_size; + int err; + u8 id; + + if (!IS_DGFX(xe)) + return 0; + + /* Get the size of the root tile's vram for later accessibility comparison */ + tile = xe_device_get_root_tile(xe); + err = xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); + if (err) + return err; + + err = xe_determine_lmem_bar_size(xe); + if (err) + return err; + + drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start, + &xe->mem.vram.io_size); + + io_size = xe->mem.vram.io_size; + + /* tile specific ranges */ + for_each_tile(tile, xe, id) { + err = xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); + if (err) + return err; + + tile->mem.vram.actual_physical_size = tile_size; + tile->mem.vram.io_start = xe->mem.vram.io_start + tile_offset; + tile->mem.vram.io_size = min_t(u64, vram_size, io_size); + + if (!tile->mem.vram.io_size) { + drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); + return -ENODEV; + } + + tile->mem.vram.dpa_base = xe->mem.vram.dpa_base + tile_offset; + tile->mem.vram.usable_size = vram_size; + tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset; + + if (tile->mem.vram.io_size < tile->mem.vram.usable_size) + drm_info(&xe->drm, "Small BAR device\n"); + drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id, + tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size); + drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id, + &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + tile->mem.vram.actual_physical_size, + &tile->mem.vram.io_start, tile->mem.vram.io_start + tile->mem.vram.io_size); + + /* calculate total size using tile size to get the correct HW sizing */ + total_size += tile_size; + available_size += vram_size; + + if (total_size > xe->mem.vram.io_size) { + drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n", + &total_size, &xe->mem.vram.io_size); + } + + io_size -= min_t(u64, tile_size, io_size); + } + + xe->mem.vram.actual_physical_size = total_size; + + drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram.io_start, + &xe->mem.vram.actual_physical_size); + drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start, + &available_size); + + return 0; +} + +void xe_mmio_probe_tiles(struct xe_device *xe) +{ + size_t tile_mmio_size = SZ_16M, tile_mmio_ext_size = xe->info.tile_mmio_ext_size; + u8 id, tile_count = xe->info.tile_count; + struct xe_gt *gt = xe_root_mmio_gt(xe); + struct xe_tile *tile; + void *regs; + u32 mtcfg; + + if (tile_count == 1) + goto add_mmio_ext; + + if (!xe->info.skip_mtcfg) { + mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR); + tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; + if (tile_count < xe->info.tile_count) { + drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n", + xe->info.tile_count, tile_count); + xe->info.tile_count = tile_count; + + /* + * FIXME: Needs some work for standalone media, but should be impossible + * with multi-tile for now. + */ + xe->info.gt_count = xe->info.tile_count; + } + } + + regs = xe->mmio.regs; + for_each_tile(tile, xe, id) { + tile->mmio.size = tile_mmio_size; + tile->mmio.regs = regs; + regs += tile_mmio_size; + } + +add_mmio_ext: + /* + * By design, there's a contiguous multi-tile MMIO space (16MB hard coded per tile). + * When supported, there could be an additional contiguous multi-tile MMIO extension + * space ON TOP of it, and hence the necessity for distinguished MMIO spaces. + */ + if (xe->info.has_mmio_ext) { + regs = xe->mmio.regs + tile_mmio_size * tile_count; + + for_each_tile(tile, xe, id) { + tile->mmio_ext.size = tile_mmio_ext_size; + tile->mmio_ext.regs = regs; + + regs += tile_mmio_ext_size; + } + } +} + +static void mmio_fini(struct drm_device *drm, void *arg) +{ + struct xe_device *xe = arg; + + pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); + if (xe->mem.vram.mapping) + iounmap(xe->mem.vram.mapping); +} + +static int xe_verify_lmem_ready(struct xe_device *xe) +{ + struct xe_gt *gt = xe_root_mmio_gt(xe); + + /* + * The boot firmware initializes local memory and assesses its health. + * If memory training fails, the punit will have been instructed to + * keep the GT powered down; we won't be able to communicate with it + * and we should not continue with driver initialization. + */ + if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) { + drm_err(&xe->drm, "VRAM not initialized by firmware\n"); + return -ENODEV; + } + + return 0; +} + +int xe_mmio_init(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + const int mmio_bar = 0; + + /* + * Map the entire BAR. + * The first 16MB of the BAR, belong to the root tile, and include: + * registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB). + */ + xe->mmio.size = pci_resource_len(pdev, mmio_bar); + xe->mmio.regs = pci_iomap(pdev, mmio_bar, 0); + if (xe->mmio.regs == NULL) { + drm_err(&xe->drm, "failed to map registers\n"); + return -EIO; + } + + return drmm_add_action_or_reset(&xe->drm, mmio_fini, xe); +} + +int xe_mmio_root_tile_init(struct xe_device *xe) +{ + struct xe_tile *root_tile = xe_device_get_root_tile(xe); + int err; + + /* Setup first tile; other tiles (if present) will be setup later. */ + root_tile->mmio.size = SZ_16M; + root_tile->mmio.regs = xe->mmio.regs; + + err = xe_verify_lmem_ready(xe); + if (err) + return err; + + return 0; +} + +/** + * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads + * @gt: MMIO target GT + * @reg: register to read value from + * + * Although Intel GPUs have some 64-bit registers, the hardware officially + * only supports GTTMMADR register reads of 32 bits or smaller. Even if + * a readq operation may return a reasonable value, that violation of the + * spec shouldn't be relied upon and all 64-bit register reads should be + * performed as two 32-bit reads of the upper and lower dwords. + * + * When reading registers that may be changing (such as + * counters), a rollover of the lower dword between the two 32-bit reads + * can be problematic. This function attempts to ensure the upper dword has + * stabilized before returning the 64-bit value. + * + * Note that because this function may re-read the register multiple times + * while waiting for the value to stabilize it should not be used to read + * any registers where read operations have side effects. + * + * Returns the value of the 64-bit register. + */ +u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg) +{ + struct xe_reg reg_udw = { .addr = reg.addr + 0x4 }; + u32 ldw, udw, oldudw, retries; + + if (reg.addr < gt->mmio.adj_limit) { + reg.addr += gt->mmio.adj_offset; + reg_udw.addr += gt->mmio.adj_offset; + } + + oldudw = xe_mmio_read32(gt, reg_udw); + for (retries = 5; retries; --retries) { + ldw = xe_mmio_read32(gt, reg); + udw = xe_mmio_read32(gt, reg_udw); + + if (udw == oldudw) + break; + + oldudw = udw; + } + + xe_gt_WARN(gt, retries == 0, + "64-bit read of %#x did not stabilize\n", reg.addr); + + return (u64)udw << 32 | ldw; +} + +/** + * xe_mmio_wait32() - Wait for a register to match the desired masked value + * @gt: MMIO target GT + * @reg: register to read value from + * @mask: mask to be applied to the value read from the register + * @val: desired value after applying the mask + * @timeout_us: time out after this period of time. Wait logic tries to be + * smart, applying an exponential backoff until @timeout_us is reached. + * @out_val: if not NULL, points where to store the last unmasked value + * @atomic: needs to be true if calling from an atomic context + * + * This function polls for the desired masked value and returns zero on success + * or -ETIMEDOUT if timed out. + * + * Note that @timeout_us represents the minimum amount of time to wait before + * giving up. The actual time taken by this function can be a little more than + * @timeout_us for different reasons, specially in non-atomic contexts. Thus, + * it is possible that this function succeeds even after @timeout_us has passed. + */ +int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, + u32 *out_val, bool atomic) +{ + ktime_t cur = ktime_get_raw(); + const ktime_t end = ktime_add_us(cur, timeout_us); + int ret = -ETIMEDOUT; + s64 wait = 10; + u32 read; + + for (;;) { + read = xe_mmio_read32(gt, reg); + if ((read & mask) == val) { + ret = 0; + break; + } + + cur = ktime_get_raw(); + if (!ktime_before(cur, end)) + break; + + if (ktime_after(ktime_add_us(cur, wait), end)) + wait = ktime_us_delta(end, cur); + + if (atomic) + udelay(wait); + else + usleep_range(wait, wait << 1); + wait <<= 1; + } + + if (ret != 0) { + read = xe_mmio_read32(gt, reg); + if ((read & mask) == val) + ret = 0; + } + + if (out_val) + *out_val = read; + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h new file mode 100644 index 000000000000..98de5c13c89b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021-2023 Intel Corporation + */ + +#ifndef _XE_MMIO_H_ +#define _XE_MMIO_H_ + +#include <linux/delay.h> +#include <linux/io-64-nonatomic-lo-hi.h> + +#include "regs/xe_reg_defs.h" +#include "xe_device_types.h" +#include "xe_gt_printk.h" +#include "xe_gt_types.h" + +struct drm_device; +struct drm_file; +struct xe_device; + +#define LMEM_BAR 2 + +int xe_mmio_init(struct xe_device *xe); +int xe_mmio_root_tile_init(struct xe_device *xe); +void xe_mmio_probe_tiles(struct xe_device *xe); + +static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) +{ + struct xe_tile *tile = gt_to_tile(gt); + + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; + + return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); +} + +static inline u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg) +{ + struct xe_tile *tile = gt_to_tile(gt); + + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; + + return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); +} + +static inline void xe_mmio_write32(struct xe_gt *gt, + struct xe_reg reg, u32 val) +{ + struct xe_tile *tile = gt_to_tile(gt); + + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; + + writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); +} + +static inline u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) +{ + struct xe_tile *tile = gt_to_tile(gt); + + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; + + return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); +} + +static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, + u32 set) +{ + u32 old, reg_val; + + old = xe_mmio_read32(gt, reg); + reg_val = (old & ~clr) | set; + xe_mmio_write32(gt, reg, reg_val); + + return old; +} + +static inline int xe_mmio_write32_and_verify(struct xe_gt *gt, + struct xe_reg reg, u32 val, + u32 mask, u32 eval) +{ + u32 reg_val; + + xe_mmio_write32(gt, reg, val); + reg_val = xe_mmio_read32(gt, reg); + + return (reg_val & mask) != eval ? -EINVAL : 0; +} + +static inline bool xe_mmio_in_range(const struct xe_gt *gt, + const struct xe_mmio_range *range, + struct xe_reg reg) +{ + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; + + return range && reg.addr >= range->start && reg.addr <= range->end; +} + +int xe_mmio_probe_vram(struct xe_device *xe); +u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg); +int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, + u32 *out_val, bool atomic); + +#endif diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c new file mode 100644 index 000000000000..ef79552e4f2f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -0,0 +1,580 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_mocs.h" + +#include "regs/xe_gt_regs.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_exec_queue.h" +#include "xe_gt.h" +#include "xe_gt_mcr.h" +#include "xe_mmio.h" +#include "xe_platform_types.h" +#include "xe_step_types.h" + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +#define mocs_dbg drm_dbg +#else +__printf(2, 3) +static inline void mocs_dbg(const struct drm_device *dev, + const char *format, ...) +{ /* noop */ } +#endif + +enum { + HAS_GLOBAL_MOCS = BIT(0), + HAS_LNCF_MOCS = BIT(1), +}; + +struct xe_mocs_entry { + u32 control_value; + u16 l3cc_value; + u16 used; +}; + +struct xe_mocs_info { + unsigned int size; + unsigned int n_entries; + const struct xe_mocs_entry *table; + u8 uc_index; + u8 wb_index; + u8 unused_entries_index; +}; + +/* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ +#define _LE_CACHEABILITY(value) ((value) << 0) +#define _LE_TGT_CACHE(value) ((value) << 2) +#define LE_LRUM(value) ((value) << 4) +#define LE_AOM(value) ((value) << 6) +#define LE_RSC(value) ((value) << 7) +#define LE_SCC(value) ((value) << 8) +#define LE_PFM(value) ((value) << 11) +#define LE_SCF(value) ((value) << 14) +#define LE_COS(value) ((value) << 15) +#define LE_SSE(value) ((value) << 17) + +/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */ +#define L3_ESC(value) ((value) << 0) +#define L3_SCC(value) ((value) << 1) +#define _L3_CACHEABILITY(value) ((value) << 4) +#define L3_GLBGO(value) ((value) << 6) +#define L3_LKUP(value) ((value) << 7) + +/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */ +#define IG_PAT REG_BIT(8) +#define L3_CACHE_POLICY_MASK REG_GENMASK(5, 4) +#define L4_CACHE_POLICY_MASK REG_GENMASK(3, 2) + +/* Helper defines */ +#define XELP_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ +#define PVC_NUM_MOCS_ENTRIES 3 +#define MTL_NUM_MOCS_ENTRIES 16 +#define XE2_NUM_MOCS_ENTRIES 16 + +/* (e)LLC caching options */ +/* + * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means + * the same as LE_UC + */ +#define LE_0_PAGETABLE _LE_CACHEABILITY(0) +#define LE_1_UC _LE_CACHEABILITY(1) +#define LE_2_WT _LE_CACHEABILITY(2) +#define LE_3_WB _LE_CACHEABILITY(3) + +/* Target cache */ +#define LE_TC_0_PAGETABLE _LE_TGT_CACHE(0) +#define LE_TC_1_LLC _LE_TGT_CACHE(1) +#define LE_TC_2_LLC_ELLC _LE_TGT_CACHE(2) +#define LE_TC_3_LLC_ELLC_ALT _LE_TGT_CACHE(3) + +/* L3 caching options */ +#define L3_0_DIRECT _L3_CACHEABILITY(0) +#define L3_1_UC _L3_CACHEABILITY(1) +#define L3_2_RESERVED _L3_CACHEABILITY(2) +#define L3_3_WB _L3_CACHEABILITY(3) + +/* L4 caching options */ +#define L4_0_WB REG_FIELD_PREP(L4_CACHE_POLICY_MASK, 0) +#define L4_1_WT REG_FIELD_PREP(L4_CACHE_POLICY_MASK, 1) +#define L4_3_UC REG_FIELD_PREP(L4_CACHE_POLICY_MASK, 3) + +#define XE2_L3_0_WB REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 0) +/* XD: WB Transient Display */ +#define XE2_L3_1_XD REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 1) +#define XE2_L3_3_UC REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 3) + +#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \ + [__idx] = { \ + .control_value = __control_value, \ + .l3cc_value = __l3cc_value, \ + .used = 1, \ + } + +/* + * MOCS tables + * + * These are the MOCS tables that are programmed across all the rings. + * The control value is programmed to all the rings that support the + * MOCS registers. While the l3cc_values are only programmed to the + * LNCFCMOCS0 - LNCFCMOCS32 registers. + * + * These tables are intended to be kept reasonably consistent across + * HW platforms, and for ICL+, be identical across OSes. To achieve + * that, the list of entries is published as part of bspec. + * + * Entries not part of the following tables are undefined as far as userspace is + * concerned and shouldn't be relied upon. The last few entries are reserved by + * the hardware. They should be initialized according to bspec and never used. + * + * NOTE1: These tables are part of bspec and defined as part of the hardware + * interface. It is expected that, for specific hardware platform, existing + * entries will remain constant and the table will only be updated by adding new + * entries, filling unused positions. + * + * NOTE2: Reserved and unspecified MOCS indices have been set to L3 WB. These + * reserved entries should never be used. They may be changed to low performant + * variants with better coherency in the future if more entries are needed. + */ + +static const struct xe_mocs_entry gen12_mocs_desc[] = { + /* Base - L3 + LLC */ + MOCS_ENTRY(2, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + /* Base - Uncached */ + MOCS_ENTRY(3, + LE_1_UC | LE_TC_1_LLC, + L3_1_UC), + /* Base - L3 */ + MOCS_ENTRY(4, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), + /* Base - LLC */ + MOCS_ENTRY(5, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* Age 0 - LLC */ + MOCS_ENTRY(6, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), + L3_1_UC), + /* Age 0 - L3 + LLC */ + MOCS_ENTRY(7, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), + L3_3_WB), + /* Age: Don't Chg. - LLC */ + MOCS_ENTRY(8, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), + L3_1_UC), + /* Age: Don't Chg. - L3 + LLC */ + MOCS_ENTRY(9, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), + L3_3_WB), + /* No AOM - LLC */ + MOCS_ENTRY(10, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), + L3_1_UC), + /* No AOM - L3 + LLC */ + MOCS_ENTRY(11, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), + L3_3_WB), + /* No AOM; Age 0 - LLC */ + MOCS_ENTRY(12, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), + L3_1_UC), + /* No AOM; Age 0 - L3 + LLC */ + MOCS_ENTRY(13, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), + L3_3_WB), + /* No AOM; Age:DC - LLC */ + MOCS_ENTRY(14, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), + L3_1_UC), + /* No AOM; Age:DC - L3 + LLC */ + MOCS_ENTRY(15, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), + L3_3_WB), + /* Self-Snoop - L3 + LLC */ + MOCS_ENTRY(18, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), + L3_3_WB), + /* Skip Caching - L3 + LLC(12.5%) */ + MOCS_ENTRY(19, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), + L3_3_WB), + /* Skip Caching - L3 + LLC(25%) */ + MOCS_ENTRY(20, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), + L3_3_WB), + /* Skip Caching - L3 + LLC(50%) */ + MOCS_ENTRY(21, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), + L3_3_WB), + /* Skip Caching - L3 + LLC(75%) */ + MOCS_ENTRY(22, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), + L3_3_WB), + /* Skip Caching - L3 + LLC(87.5%) */ + MOCS_ENTRY(23, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ + MOCS_ENTRY(48, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + L3 */ + MOCS_ENTRY(49, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + LLC */ + MOCS_ENTRY(50, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* Implicitly enable L1 - HDC:L1 */ + MOCS_ENTRY(51, + LE_1_UC | LE_TC_1_LLC, + L3_1_UC), + /* HW Special Case (CCS) */ + MOCS_ENTRY(60, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* HW Special Case (Displayable) */ + MOCS_ENTRY(61, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), + /* HW Reserved - SW program but never use */ + MOCS_ENTRY(62, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* HW Reserved - SW program but never use */ + MOCS_ENTRY(63, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC) +}; + +static const struct xe_mocs_entry dg1_mocs_desc[] = { + /* UC */ + MOCS_ENTRY(1, 0, L3_1_UC), + /* WB - L3 */ + MOCS_ENTRY(5, 0, L3_3_WB), + /* WB - L3 50% */ + MOCS_ENTRY(6, 0, L3_ESC(1) | L3_SCC(1) | L3_3_WB), + /* WB - L3 25% */ + MOCS_ENTRY(7, 0, L3_ESC(1) | L3_SCC(3) | L3_3_WB), + /* WB - L3 12.5% */ + MOCS_ENTRY(8, 0, L3_ESC(1) | L3_SCC(7) | L3_3_WB), + + /* HDC:L1 + L3 */ + MOCS_ENTRY(48, 0, L3_3_WB), + /* HDC:L1 */ + MOCS_ENTRY(49, 0, L3_1_UC), + + /* HW Reserved */ + MOCS_ENTRY(60, 0, L3_1_UC), + MOCS_ENTRY(61, 0, L3_1_UC), + MOCS_ENTRY(62, 0, L3_1_UC), + MOCS_ENTRY(63, 0, L3_1_UC), +}; + +static const struct xe_mocs_entry dg2_mocs_desc[] = { + /* UC - Coherent; GO:L3 */ + MOCS_ENTRY(0, 0, L3_1_UC | L3_LKUP(1)), + /* UC - Coherent; GO:Memory */ + MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Non-Coherent; GO:Memory */ + MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), + + /* WB - LC */ + MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), +}; + +static const struct xe_mocs_entry dg2_mocs_desc_g10_ax[] = { + /* Wa_14011441408: Set Go to Memory for MOCS#0 */ + MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Coherent; GO:Memory */ + MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Non-Coherent; GO:Memory */ + MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), + + /* WB - LC */ + MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), +}; + +static const struct xe_mocs_entry pvc_mocs_desc[] = { + /* Error */ + MOCS_ENTRY(0, 0, L3_3_WB), + + /* UC */ + MOCS_ENTRY(1, 0, L3_1_UC), + + /* WB */ + MOCS_ENTRY(2, 0, L3_3_WB), +}; + +static const struct xe_mocs_entry mtl_mocs_desc[] = { + /* Error - Reserved for Non-Use */ + MOCS_ENTRY(0, + 0, + L3_LKUP(1) | L3_3_WB), + /* Cached - L3 + L4 */ + MOCS_ENTRY(1, + IG_PAT, + L3_LKUP(1) | L3_3_WB), + /* L4 - GO:L3 */ + MOCS_ENTRY(2, + IG_PAT, + L3_LKUP(1) | L3_1_UC), + /* Uncached - GO:L3 */ + MOCS_ENTRY(3, + IG_PAT | L4_3_UC, + L3_LKUP(1) | L3_1_UC), + /* L4 - GO:Mem */ + MOCS_ENTRY(4, + IG_PAT, + L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC), + /* Uncached - GO:Mem */ + MOCS_ENTRY(5, + IG_PAT | L4_3_UC, + L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC), + /* L4 - L3:NoLKUP; GO:L3 */ + MOCS_ENTRY(6, + IG_PAT, + L3_1_UC), + /* Uncached - L3:NoLKUP; GO:L3 */ + MOCS_ENTRY(7, + IG_PAT | L4_3_UC, + L3_1_UC), + /* L4 - L3:NoLKUP; GO:Mem */ + MOCS_ENTRY(8, + IG_PAT, + L3_GLBGO(1) | L3_1_UC), + /* Uncached - L3:NoLKUP; GO:Mem */ + MOCS_ENTRY(9, + IG_PAT | L4_3_UC, + L3_GLBGO(1) | L3_1_UC), + /* Display - L3; L4:WT */ + MOCS_ENTRY(14, + IG_PAT | L4_1_WT, + L3_LKUP(1) | L3_3_WB), + /* CCS - Non-Displayable */ + MOCS_ENTRY(15, + IG_PAT, + L3_GLBGO(1) | L3_1_UC), +}; + +static const struct xe_mocs_entry xe2_mocs_table[] = { + /* Defer to PAT */ + MOCS_ENTRY(0, XE2_L3_0_WB | L4_3_UC, 0), + /* Cached L3, Uncached L4 */ + MOCS_ENTRY(1, IG_PAT | XE2_L3_0_WB | L4_3_UC, 0), + /* Uncached L3, Cached L4 */ + MOCS_ENTRY(2, IG_PAT | XE2_L3_3_UC | L4_0_WB, 0), + /* Uncached L3 + L4 */ + MOCS_ENTRY(3, IG_PAT | XE2_L3_3_UC | L4_3_UC, 0), + /* Cached L3 + L4 */ + MOCS_ENTRY(4, IG_PAT | XE2_L3_0_WB | L4_0_WB, 0), +}; + +static unsigned int get_mocs_settings(struct xe_device *xe, + struct xe_mocs_info *info) +{ + unsigned int flags = 0; + + memset(info, 0, sizeof(struct xe_mocs_info)); + + switch (xe->info.platform) { + case XE_LUNARLAKE: + info->size = ARRAY_SIZE(xe2_mocs_table); + info->table = xe2_mocs_table; + info->n_entries = XE2_NUM_MOCS_ENTRIES; + info->uc_index = 3; + info->wb_index = 4; + info->unused_entries_index = 4; + break; + case XE_PVC: + info->size = ARRAY_SIZE(pvc_mocs_desc); + info->table = pvc_mocs_desc; + info->n_entries = PVC_NUM_MOCS_ENTRIES; + info->uc_index = 1; + info->wb_index = 2; + info->unused_entries_index = 2; + break; + case XE_METEORLAKE: + info->size = ARRAY_SIZE(mtl_mocs_desc); + info->table = mtl_mocs_desc; + info->n_entries = MTL_NUM_MOCS_ENTRIES; + info->uc_index = 9; + info->unused_entries_index = 1; + break; + case XE_DG2: + if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10 && + xe->info.step.graphics >= STEP_A0 && + xe->info.step.graphics <= STEP_B0) { + info->size = ARRAY_SIZE(dg2_mocs_desc_g10_ax); + info->table = dg2_mocs_desc_g10_ax; + } else { + info->size = ARRAY_SIZE(dg2_mocs_desc); + info->table = dg2_mocs_desc; + } + info->uc_index = 1; + info->n_entries = XELP_NUM_MOCS_ENTRIES; + info->unused_entries_index = 3; + break; + case XE_DG1: + info->size = ARRAY_SIZE(dg1_mocs_desc); + info->table = dg1_mocs_desc; + info->uc_index = 1; + info->n_entries = XELP_NUM_MOCS_ENTRIES; + info->unused_entries_index = 5; + break; + case XE_TIGERLAKE: + case XE_ROCKETLAKE: + case XE_ALDERLAKE_S: + case XE_ALDERLAKE_P: + case XE_ALDERLAKE_N: + info->size = ARRAY_SIZE(gen12_mocs_desc); + info->table = gen12_mocs_desc; + info->n_entries = XELP_NUM_MOCS_ENTRIES; + info->uc_index = 3; + info->unused_entries_index = 2; + break; + default: + drm_err(&xe->drm, "Platform that should have a MOCS table does not.\n"); + return 0; + } + + /* + * Index 0 is a reserved/unused table entry on most platforms, but + * even on those where it does represent a legitimate MOCS entry, it + * never represents the "most cached, least coherent" behavior we want + * to populate undefined table rows with. So if unused_entries_index + * is still 0 at this point, we'll assume that it was omitted by + * mistake in the switch statement above. + */ + xe_assert(xe, info->unused_entries_index != 0); + + if (XE_WARN_ON(info->size > info->n_entries)) { + info->table = NULL; + return 0; + } + + if (!IS_DGFX(xe) || GRAPHICS_VER(xe) >= 20) + flags |= HAS_GLOBAL_MOCS; + if (GRAPHICS_VER(xe) < 20) + flags |= HAS_LNCF_MOCS; + + return flags; +} + +/* + * Get control_value from MOCS entry. If the table entry is not defined, the + * settings from unused_entries_index will be returned. + */ +static u32 get_entry_control(const struct xe_mocs_info *info, + unsigned int index) +{ + if (index < info->size && info->table[index].used) + return info->table[index].control_value; + return info->table[info->unused_entries_index].control_value; +} + +static void __init_mocs_table(struct xe_gt *gt, + const struct xe_mocs_info *info) +{ + struct xe_device *xe = gt_to_xe(gt); + + unsigned int i; + u32 mocs; + + mocs_dbg(>_to_xe(gt)->drm, "entries:%d\n", info->n_entries); + drm_WARN_ONCE(&xe->drm, !info->unused_entries_index, + "Unused entries index should have been defined\n"); + for (i = 0; + i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0; + i++) { + mocs_dbg(>_to_xe(gt)->drm, "GLOB_MOCS[%d] 0x%x 0x%x\n", i, + XELP_GLOBAL_MOCS(i).addr, mocs); + + if (GRAPHICS_VERx100(gt_to_xe(gt)) > 1250) + xe_gt_mcr_multicast_write(gt, XEHP_GLOBAL_MOCS(i), mocs); + else + xe_mmio_write32(gt, XELP_GLOBAL_MOCS(i), mocs); + } +} + +/* + * Get l3cc_value from MOCS entry taking into account when it's not used + * then if unused_entries_index is not zero then its value will be returned + * otherwise I915_MOCS_PTE's value is returned in this case. + */ +static u16 get_entry_l3cc(const struct xe_mocs_info *info, + unsigned int index) +{ + if (index < info->size && info->table[index].used) + return info->table[index].l3cc_value; + return info->table[info->unused_entries_index].l3cc_value; +} + +static u32 l3cc_combine(u16 low, u16 high) +{ + return low | (u32)high << 16; +} + +static void init_l3cc_table(struct xe_gt *gt, + const struct xe_mocs_info *info) +{ + unsigned int i; + u32 l3cc; + + mocs_dbg(>_to_xe(gt)->drm, "entries:%d\n", info->n_entries); + for (i = 0; + i < (info->n_entries + 1) / 2 ? + (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), + get_entry_l3cc(info, 2 * i + 1))), 1 : 0; + i++) { + mocs_dbg(>_to_xe(gt)->drm, "LNCFCMOCS[%d] 0x%x 0x%x\n", i, XELP_LNCFCMOCS(i).addr, + l3cc); + + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) + xe_gt_mcr_multicast_write(gt, XEHP_LNCFCMOCS(i), l3cc); + else + xe_mmio_write32(gt, XELP_LNCFCMOCS(i), l3cc); + } +} + +void xe_mocs_init_early(struct xe_gt *gt) +{ + struct xe_mocs_info table; + + get_mocs_settings(gt_to_xe(gt), &table); + gt->mocs.uc_index = table.uc_index; + gt->mocs.wb_index = table.wb_index; +} + +void xe_mocs_init(struct xe_gt *gt) +{ + struct xe_mocs_info table; + unsigned int flags; + + /* + * MOCS settings are split between "GLOB_MOCS" and/or "LNCFCMOCS" + * registers depending on platform. + * + * These registers should be programmed before GuC initialization + * since their values will affect some of the memory transactions + * performed by the GuC. + */ + flags = get_mocs_settings(gt_to_xe(gt), &table); + mocs_dbg(>_to_xe(gt)->drm, "flag:0x%x\n", flags); + + if (flags & HAS_GLOBAL_MOCS) + __init_mocs_table(gt, &table); + if (flags & HAS_LNCF_MOCS) + init_l3cc_table(gt, &table); +} + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_mocs.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h new file mode 100644 index 000000000000..053754c5a94e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mocs.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_MOCS_H_ +#define _XE_MOCS_H_ + +#include <linux/types.h> + +struct xe_exec_queue; +struct xe_gt; + +void xe_mocs_init_early(struct xe_gt *gt); +void xe_mocs_init(struct xe_gt *gt); + +#endif diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c new file mode 100644 index 000000000000..110b69864656 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_module.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_module.h" + +#include <linux/init.h> +#include <linux/module.h> + +#include "xe_drv.h" +#include "xe_hw_fence.h" +#include "xe_pci.h" +#include "xe_sched_job.h" + +struct xe_modparam xe_modparam = { + .enable_display = true, + .guc_log_level = 5, + .force_probe = CONFIG_DRM_XE_FORCE_PROBE, + /* the rest are 0 by default */ +}; + +module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); +MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); + +module_param_named(enable_display, xe_modparam.enable_display, bool, 0444); +MODULE_PARM_DESC(enable_display, "Enable display"); + +module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, uint, 0600); +MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size(in MiB)"); + +module_param_named(guc_log_level, xe_modparam.guc_log_level, int, 0600); +MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)"); + +module_param_named_unsafe(guc_firmware_path, xe_modparam.guc_firmware_path, charp, 0400); +MODULE_PARM_DESC(guc_firmware_path, + "GuC firmware path to use instead of the default one"); + +module_param_named_unsafe(huc_firmware_path, xe_modparam.huc_firmware_path, charp, 0400); +MODULE_PARM_DESC(huc_firmware_path, + "HuC firmware path to use instead of the default one - empty string disables"); + +module_param_named_unsafe(gsc_firmware_path, xe_modparam.gsc_firmware_path, charp, 0400); +MODULE_PARM_DESC(gsc_firmware_path, + "GSC firmware path to use instead of the default one - empty string disables"); + +module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400); +MODULE_PARM_DESC(force_probe, + "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details."); + +struct init_funcs { + int (*init)(void); + void (*exit)(void); +}; + +static const struct init_funcs init_funcs[] = { + { + .init = xe_hw_fence_module_init, + .exit = xe_hw_fence_module_exit, + }, + { + .init = xe_sched_job_module_init, + .exit = xe_sched_job_module_exit, + }, + { + .init = xe_register_pci_driver, + .exit = xe_unregister_pci_driver, + }, +}; + +static int __init xe_init(void) +{ + int err, i; + + for (i = 0; i < ARRAY_SIZE(init_funcs); i++) { + err = init_funcs[i].init(); + if (err) { + while (i--) + init_funcs[i].exit(); + return err; + } + } + + return 0; +} + +static void __exit xe_exit(void) +{ + int i; + + for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--) + init_funcs[i].exit(); +} + +module_init(xe_init); +module_exit(xe_exit); + +MODULE_AUTHOR("Intel Corporation"); + +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h new file mode 100644 index 000000000000..88ef0e8b2bfd --- /dev/null +++ b/drivers/gpu/drm/xe/xe_module.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_MODULE_H_ +#define _XE_MODULE_H_ + +#include <linux/types.h> + +/* Module modprobe variables */ +struct xe_modparam { + bool force_execlist; + bool enable_display; + u32 force_vram_bar_size; + int guc_log_level; + char *guc_firmware_path; + char *huc_firmware_path; + char *gsc_firmware_path; + char *force_probe; +}; + +extern struct xe_modparam xe_modparam; + +#endif + diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c new file mode 100644 index 000000000000..1ff6bc79e7d4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -0,0 +1,459 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_pat.h" + +#include <drm/xe_drm.h> + +#include "regs/xe_reg_defs.h" +#include "xe_assert.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_mcr.h" +#include "xe_mmio.h" + +#define _PAT_ATS 0x47fc +#define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \ + 0x4800, 0x4804, \ + 0x4848, 0x484c) +#define _PAT_PTA 0x4820 + +#define XE2_NO_PROMOTE REG_BIT(10) +#define XE2_COMP_EN REG_BIT(9) +#define XE2_L3_CLOS REG_GENMASK(7, 6) +#define XE2_L3_POLICY REG_GENMASK(5, 4) +#define XE2_L4_POLICY REG_GENMASK(3, 2) +#define XE2_COH_MODE REG_GENMASK(1, 0) + +#define XELPG_L4_POLICY_MASK REG_GENMASK(3, 2) +#define XELPG_PAT_3_UC REG_FIELD_PREP(XELPG_L4_POLICY_MASK, 3) +#define XELPG_PAT_1_WT REG_FIELD_PREP(XELPG_L4_POLICY_MASK, 1) +#define XELPG_PAT_0_WB REG_FIELD_PREP(XELPG_L4_POLICY_MASK, 0) +#define XELPG_INDEX_COH_MODE_MASK REG_GENMASK(1, 0) +#define XELPG_3_COH_2W REG_FIELD_PREP(XELPG_INDEX_COH_MODE_MASK, 3) +#define XELPG_2_COH_1W REG_FIELD_PREP(XELPG_INDEX_COH_MODE_MASK, 2) +#define XELPG_0_COH_NON REG_FIELD_PREP(XELPG_INDEX_COH_MODE_MASK, 0) + +#define XEHPC_CLOS_LEVEL_MASK REG_GENMASK(3, 2) +#define XEHPC_PAT_CLOS(x) REG_FIELD_PREP(XEHPC_CLOS_LEVEL_MASK, x) + +#define XELP_MEM_TYPE_MASK REG_GENMASK(1, 0) +#define XELP_PAT_WB REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 3) +#define XELP_PAT_WT REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 2) +#define XELP_PAT_WC REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 1) +#define XELP_PAT_UC REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 0) + +static const char *XELP_MEM_TYPE_STR_MAP[] = { "UC", "WC", "WT", "WB" }; + +struct xe_pat_ops { + void (*program_graphics)(struct xe_gt *gt, const struct xe_pat_table_entry table[], + int n_entries); + void (*program_media)(struct xe_gt *gt, const struct xe_pat_table_entry table[], + int n_entries); + void (*dump)(struct xe_gt *gt, struct drm_printer *p); +}; + +static const struct xe_pat_table_entry xelp_pat_table[] = { + [0] = { XELP_PAT_WB, XE_COH_AT_LEAST_1WAY }, + [1] = { XELP_PAT_WC, XE_COH_NONE }, + [2] = { XELP_PAT_WT, XE_COH_NONE }, + [3] = { XELP_PAT_UC, XE_COH_NONE }, +}; + +static const struct xe_pat_table_entry xehpc_pat_table[] = { + [0] = { XELP_PAT_UC, XE_COH_NONE }, + [1] = { XELP_PAT_WC, XE_COH_NONE }, + [2] = { XELP_PAT_WT, XE_COH_NONE }, + [3] = { XELP_PAT_WB, XE_COH_AT_LEAST_1WAY }, + [4] = { XEHPC_PAT_CLOS(1) | XELP_PAT_WT, XE_COH_NONE }, + [5] = { XEHPC_PAT_CLOS(1) | XELP_PAT_WB, XE_COH_AT_LEAST_1WAY }, + [6] = { XEHPC_PAT_CLOS(2) | XELP_PAT_WT, XE_COH_NONE }, + [7] = { XEHPC_PAT_CLOS(2) | XELP_PAT_WB, XE_COH_AT_LEAST_1WAY }, +}; + +static const struct xe_pat_table_entry xelpg_pat_table[] = { + [0] = { XELPG_PAT_0_WB, XE_COH_NONE }, + [1] = { XELPG_PAT_1_WT, XE_COH_NONE }, + [2] = { XELPG_PAT_3_UC, XE_COH_NONE }, + [3] = { XELPG_PAT_0_WB | XELPG_2_COH_1W, XE_COH_AT_LEAST_1WAY }, + [4] = { XELPG_PAT_0_WB | XELPG_3_COH_2W, XE_COH_AT_LEAST_1WAY }, +}; + +/* + * The Xe2 table is getting large/complicated so it's easier to review if + * provided in a form that exactly matches the bspec's formatting. The meaning + * of the fields here are: + * - no_promote: 0=promotable, 1=no promote + * - comp_en: 0=disable, 1=enable + * - l3clos: L3 class of service (0-3) + * - l3_policy: 0=WB, 1=XD ("WB - Transient Display"), 3=UC + * - l4_policy: 0=WB, 1=WT, 3=UC + * - coh_mode: 0=no snoop, 2=1-way coherent, 3=2-way coherent + * + * Reserved entries should be programmed with the maximum caching, minimum + * coherency (which matches an all-0's encoding), so we can just omit them + * in the table. + */ +#define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, __coh_mode) \ + { \ + .value = (no_promote ? XE2_NO_PROMOTE : 0) | \ + (comp_en ? XE2_COMP_EN : 0) | \ + REG_FIELD_PREP(XE2_L3_CLOS, l3clos) | \ + REG_FIELD_PREP(XE2_L3_POLICY, l3_policy) | \ + REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \ + REG_FIELD_PREP(XE2_COH_MODE, __coh_mode), \ + .coh_mode = __coh_mode ? XE_COH_AT_LEAST_1WAY : XE_COH_NONE \ + } + +static const struct xe_pat_table_entry xe2_pat_table[] = { + [ 0] = XE2_PAT( 0, 0, 0, 0, 3, 0 ), + [ 1] = XE2_PAT( 0, 0, 0, 0, 3, 2 ), + [ 2] = XE2_PAT( 0, 0, 0, 0, 3, 3 ), + [ 3] = XE2_PAT( 0, 0, 0, 3, 3, 0 ), + [ 4] = XE2_PAT( 0, 0, 0, 3, 0, 2 ), + [ 5] = XE2_PAT( 0, 0, 0, 3, 3, 2 ), + [ 6] = XE2_PAT( 1, 0, 0, 1, 3, 0 ), + [ 7] = XE2_PAT( 0, 0, 0, 3, 0, 3 ), + [ 8] = XE2_PAT( 0, 0, 0, 3, 0, 0 ), + [ 9] = XE2_PAT( 0, 1, 0, 0, 3, 0 ), + [10] = XE2_PAT( 0, 1, 0, 3, 0, 0 ), + [11] = XE2_PAT( 1, 1, 0, 1, 3, 0 ), + [12] = XE2_PAT( 0, 1, 0, 3, 3, 0 ), + [13] = XE2_PAT( 0, 0, 0, 0, 0, 0 ), + [14] = XE2_PAT( 0, 1, 0, 0, 0, 0 ), + [15] = XE2_PAT( 1, 1, 0, 1, 1, 0 ), + /* 16..19 are reserved; leave set to all 0's */ + [20] = XE2_PAT( 0, 0, 1, 0, 3, 0 ), + [21] = XE2_PAT( 0, 1, 1, 0, 3, 0 ), + [22] = XE2_PAT( 0, 0, 1, 0, 3, 2 ), + [23] = XE2_PAT( 0, 0, 1, 0, 3, 3 ), + [24] = XE2_PAT( 0, 0, 2, 0, 3, 0 ), + [25] = XE2_PAT( 0, 1, 2, 0, 3, 0 ), + [26] = XE2_PAT( 0, 0, 2, 0, 3, 2 ), + [27] = XE2_PAT( 0, 0, 2, 0, 3, 3 ), + [28] = XE2_PAT( 0, 0, 3, 0, 3, 0 ), + [29] = XE2_PAT( 0, 1, 3, 0, 3, 0 ), + [30] = XE2_PAT( 0, 0, 3, 0, 3, 2 ), + [31] = XE2_PAT( 0, 0, 3, 0, 3, 3 ), +}; + +/* Special PAT values programmed outside the main table */ +static const struct xe_pat_table_entry xe2_pat_ats = XE2_PAT( 0, 0, 0, 0, 3, 3 ); + +u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index) +{ + WARN_ON(pat_index >= xe->pat.n_entries); + return xe->pat.table[pat_index].coh_mode; +} + +static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], + int n_entries) +{ + for (int i = 0; i < n_entries; i++) { + struct xe_reg reg = XE_REG(_PAT_INDEX(i)); + + xe_mmio_write32(gt, reg, table[i].value); + } +} + +static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry table[], + int n_entries) +{ + for (int i = 0; i < n_entries; i++) { + struct xe_reg_mcr reg_mcr = XE_REG_MCR(_PAT_INDEX(i)); + + xe_gt_mcr_multicast_write(gt, reg_mcr, table[i].value); + } +} + +static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_device *xe = gt_to_xe(gt); + int i, err; + + xe_device_mem_access_get(xe); + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + goto err_fw; + + drm_printf(p, "PAT table:\n"); + + for (i = 0; i < xe->pat.n_entries; i++) { + u32 pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i))); + u8 mem_type = REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat); + + drm_printf(p, "PAT[%2d] = %s (%#8x)\n", i, + XELP_MEM_TYPE_STR_MAP[mem_type], pat); + } + + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +err_fw: + xe_assert(xe, !err); + xe_device_mem_access_put(xe); +} + +static const struct xe_pat_ops xelp_pat_ops = { + .program_graphics = program_pat, + .dump = xelp_dump, +}; + +static void xehp_dump(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_device *xe = gt_to_xe(gt); + int i, err; + + xe_device_mem_access_get(xe); + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + goto err_fw; + + drm_printf(p, "PAT table:\n"); + + for (i = 0; i < xe->pat.n_entries; i++) { + u32 pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); + u8 mem_type; + + mem_type = REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat); + + drm_printf(p, "PAT[%2d] = %s (%#8x)\n", i, + XELP_MEM_TYPE_STR_MAP[mem_type], pat); + } + + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +err_fw: + xe_assert(xe, !err); + xe_device_mem_access_put(xe); +} + +static const struct xe_pat_ops xehp_pat_ops = { + .program_graphics = program_pat_mcr, + .dump = xehp_dump, +}; + +static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_device *xe = gt_to_xe(gt); + int i, err; + + xe_device_mem_access_get(xe); + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + goto err_fw; + + drm_printf(p, "PAT table:\n"); + + for (i = 0; i < xe->pat.n_entries; i++) { + u32 pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); + + drm_printf(p, "PAT[%2d] = [ %u, %u ] (%#8x)\n", i, + REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat), + REG_FIELD_GET(XEHPC_CLOS_LEVEL_MASK, pat), pat); + } + + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +err_fw: + xe_assert(xe, !err); + xe_device_mem_access_put(xe); +} + +static const struct xe_pat_ops xehpc_pat_ops = { + .program_graphics = program_pat_mcr, + .dump = xehpc_dump, +}; + +static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_device *xe = gt_to_xe(gt); + int i, err; + + xe_device_mem_access_get(xe); + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + goto err_fw; + + drm_printf(p, "PAT table:\n"); + + for (i = 0; i < xe->pat.n_entries; i++) { + u32 pat; + + if (xe_gt_is_media_type(gt)) + pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i))); + else + pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); + + drm_printf(p, "PAT[%2d] = [ %u, %u ] (%#8x)\n", i, + REG_FIELD_GET(XELPG_L4_POLICY_MASK, pat), + REG_FIELD_GET(XELPG_INDEX_COH_MODE_MASK, pat), pat); + } + + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +err_fw: + xe_assert(xe, !err); + xe_device_mem_access_put(xe); +} + +/* + * SAMedia register offsets are adjusted by the write methods and they target + * registers that are not MCR, while for normal GT they are MCR + */ +static const struct xe_pat_ops xelpg_pat_ops = { + .program_graphics = program_pat, + .program_media = program_pat_mcr, + .dump = xelpg_dump, +}; + +static void xe2lpg_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], + int n_entries) +{ + program_pat_mcr(gt, table, n_entries); + xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe2_pat_ats.value); +} + +static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], + int n_entries) +{ + program_pat(gt, table, n_entries); + xe_mmio_write32(gt, XE_REG(_PAT_ATS), xe2_pat_ats.value); +} + +static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_device *xe = gt_to_xe(gt); + int i, err; + u32 pat; + + xe_device_mem_access_get(xe); + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + goto err_fw; + + drm_printf(p, "PAT table:\n"); + + for (i = 0; i < xe->pat.n_entries; i++) { + if (xe_gt_is_media_type(gt)) + pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i))); + else + pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); + + drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u, %u ] (%#8x)\n", i, + !!(pat & XE2_NO_PROMOTE), + !!(pat & XE2_COMP_EN), + REG_FIELD_GET(XE2_L3_CLOS, pat), + REG_FIELD_GET(XE2_L3_POLICY, pat), + REG_FIELD_GET(XE2_L4_POLICY, pat), + REG_FIELD_GET(XE2_COH_MODE, pat), + pat); + } + + /* + * Also print PTA_MODE, which describes how the hardware accesses + * PPGTT entries. + */ + if (xe_gt_is_media_type(gt)) + pat = xe_mmio_read32(gt, XE_REG(_PAT_PTA)); + else + pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_PTA)); + + drm_printf(p, "Page Table Access:\n"); + drm_printf(p, "PTA_MODE= [ %u, %u, %u, %u, %u, %u ] (%#8x)\n", + !!(pat & XE2_NO_PROMOTE), + !!(pat & XE2_COMP_EN), + REG_FIELD_GET(XE2_L3_CLOS, pat), + REG_FIELD_GET(XE2_L3_POLICY, pat), + REG_FIELD_GET(XE2_L4_POLICY, pat), + REG_FIELD_GET(XE2_COH_MODE, pat), + pat); + + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +err_fw: + xe_assert(xe, !err); + xe_device_mem_access_put(xe); +} + +static const struct xe_pat_ops xe2_pat_ops = { + .program_graphics = xe2lpg_program_pat, + .program_media = xe2lpm_program_pat, + .dump = xe2_dump, +}; + +void xe_pat_init_early(struct xe_device *xe) +{ + if (GRAPHICS_VER(xe) == 20) { + xe->pat.ops = &xe2_pat_ops; + xe->pat.table = xe2_pat_table; + xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table); + xe->pat.idx[XE_CACHE_NONE] = 3; + xe->pat.idx[XE_CACHE_WT] = 15; + xe->pat.idx[XE_CACHE_WB] = 2; + xe->pat.idx[XE_CACHE_NONE_COMPRESSION] = 12; /*Applicable on xe2 and beyond */ + } else if (xe->info.platform == XE_METEORLAKE) { + xe->pat.ops = &xelpg_pat_ops; + xe->pat.table = xelpg_pat_table; + xe->pat.n_entries = ARRAY_SIZE(xelpg_pat_table); + xe->pat.idx[XE_CACHE_NONE] = 2; + xe->pat.idx[XE_CACHE_WT] = 1; + xe->pat.idx[XE_CACHE_WB] = 3; + } else if (xe->info.platform == XE_PVC) { + xe->pat.ops = &xehpc_pat_ops; + xe->pat.table = xehpc_pat_table; + xe->pat.n_entries = ARRAY_SIZE(xehpc_pat_table); + xe->pat.idx[XE_CACHE_NONE] = 0; + xe->pat.idx[XE_CACHE_WT] = 2; + xe->pat.idx[XE_CACHE_WB] = 3; + } else if (xe->info.platform == XE_DG2) { + /* + * Table is the same as previous platforms, but programming + * method has changed. + */ + xe->pat.ops = &xehp_pat_ops; + xe->pat.table = xelp_pat_table; + xe->pat.n_entries = ARRAY_SIZE(xelp_pat_table); + xe->pat.idx[XE_CACHE_NONE] = 3; + xe->pat.idx[XE_CACHE_WT] = 2; + xe->pat.idx[XE_CACHE_WB] = 0; + } else if (GRAPHICS_VERx100(xe) <= 1210) { + WARN_ON_ONCE(!IS_DGFX(xe) && !xe->info.has_llc); + xe->pat.ops = &xelp_pat_ops; + xe->pat.table = xelp_pat_table; + xe->pat.n_entries = ARRAY_SIZE(xelp_pat_table); + xe->pat.idx[XE_CACHE_NONE] = 3; + xe->pat.idx[XE_CACHE_WT] = 2; + xe->pat.idx[XE_CACHE_WB] = 0; + } else { + /* + * Going forward we expect to need new PAT settings for most + * new platforms; failure to provide a new table can easily + * lead to subtle, hard-to-debug problems. If none of the + * conditions above match the platform we're running on we'll + * raise an error rather than trying to silently inherit the + * most recent platform's behavior. + */ + drm_err(&xe->drm, "Missing PAT table for platform with graphics version %d.%02d!\n", + GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); + } +} + +void xe_pat_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (!xe->pat.ops) + return; + + if (xe_gt_is_media_type(gt)) + xe->pat.ops->program_media(gt, xe->pat.table, xe->pat.n_entries); + else + xe->pat.ops->program_graphics(gt, xe->pat.table, xe->pat.n_entries); +} + +void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (!xe->pat.ops->dump) + return; + + xe->pat.ops->dump(gt, p); +} diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h new file mode 100644 index 000000000000..fa0dfbe525cd --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pat.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_PAT_H_ +#define _XE_PAT_H_ + +#include <linux/types.h> + +struct drm_printer; +struct xe_device; +struct xe_gt; + +/** + * struct xe_pat_table_entry - The pat_index encoding and other meta information. + */ +struct xe_pat_table_entry { + /** + * @value: The platform specific value encoding the various memory + * attributes (this maps to some fixed pat_index). So things like + * caching, coherency, compression etc can be encoded here. + */ + u32 value; + + /** + * @coh_mode: The GPU coherency mode that @value maps to. + */ +#define XE_COH_NONE 1 +#define XE_COH_AT_LEAST_1WAY 2 + u16 coh_mode; +}; + +/** + * xe_pat_init_early - SW initialization, setting up data based on device + * @xe: xe device + */ +void xe_pat_init_early(struct xe_device *xe); + +/** + * xe_pat_init - Program HW PAT table + * @gt: GT structure + */ +void xe_pat_init(struct xe_gt *gt); + +/** + * xe_pat_dump - Dump PAT table + * @gt: GT structure + * @p: Printer to dump info to + */ +void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p); + +/** + * xe_pat_index_get_coh_mode - Extract the coherency mode for the given + * pat_index. + * @xe: xe device + * @pat_index: The pat_index to query + */ +u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index); + +#endif diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c new file mode 100644 index 000000000000..dcc5ded1558e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -0,0 +1,951 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_pci.h" + +#include <kunit/static_stub.h> +#include <linux/device/driver.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/pm_runtime.h> + +#include <drm/drm_color_mgmt.h> +#include <drm/drm_drv.h> +#include <drm/xe_pciids.h> + +#include "regs/xe_gt_regs.h" +#include "xe_device.h" +#include "xe_display.h" +#include "xe_drv.h" +#include "xe_gt.h" +#include "xe_macros.h" +#include "xe_mmio.h" +#include "xe_module.h" +#include "xe_pci_types.h" +#include "xe_pm.h" +#include "xe_sriov.h" +#include "xe_step.h" +#include "xe_tile.h" + +enum toggle_d3cold { + D3COLD_DISABLE, + D3COLD_ENABLE, +}; + +struct xe_subplatform_desc { + enum xe_subplatform subplatform; + const char *name; + const u16 *pciidlist; +}; + +struct xe_gt_desc { + enum xe_gt_type type; + u32 mmio_adj_limit; + u32 mmio_adj_offset; +}; + +struct xe_device_desc { + /* Should only ever be set for platforms without GMD_ID */ + const struct xe_graphics_desc *graphics; + /* Should only ever be set for platforms without GMD_ID */ + const struct xe_media_desc *media; + + const char *platform_name; + const struct xe_subplatform_desc *subplatforms; + + enum xe_platform platform; + + u8 require_force_probe:1; + u8 is_dgfx:1; + + u8 has_display:1; + u8 has_heci_gscfi:1; + u8 has_llc:1; + u8 has_mmio_ext:1; + u8 has_sriov:1; + u8 skip_guc_pc:1; + u8 skip_mtcfg:1; + u8 skip_pcode:1; +}; + +__diag_push(); +__diag_ignore_all("-Woverride-init", "Allow field overrides in table"); + +#define PLATFORM(x) \ + .platform = (x), \ + .platform_name = #x + +#define NOP(x) x + +static const struct xe_graphics_desc graphics_xelp = { + .name = "Xe_LP", + .ver = 12, + .rel = 0, + + .hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0), + + .dma_mask_size = 39, + .va_bits = 48, + .vm_max_level = 3, +}; + +static const struct xe_graphics_desc graphics_xelpp = { + .name = "Xe_LP+", + .ver = 12, + .rel = 10, + + .hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0), + + .dma_mask_size = 39, + .va_bits = 48, + .vm_max_level = 3, +}; + +#define XE_HP_FEATURES \ + .has_range_tlb_invalidation = true, \ + .has_flat_ccs = true, \ + .dma_mask_size = 46, \ + .va_bits = 48, \ + .vm_max_level = 3 + +static const struct xe_graphics_desc graphics_xehpg = { + .name = "Xe_HPG", + .ver = 12, + .rel = 55, + + .hw_engine_mask = + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | + BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | + BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), + + XE_HP_FEATURES, + .vram_flags = XE_VRAM_FLAGS_NEED64K, +}; + +static const struct xe_graphics_desc graphics_xehpc = { + .name = "Xe_HPC", + .ver = 12, + .rel = 60, + + .hw_engine_mask = + BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_BCS1) | + BIT(XE_HW_ENGINE_BCS2) | BIT(XE_HW_ENGINE_BCS3) | + BIT(XE_HW_ENGINE_BCS4) | BIT(XE_HW_ENGINE_BCS5) | + BIT(XE_HW_ENGINE_BCS6) | BIT(XE_HW_ENGINE_BCS7) | + BIT(XE_HW_ENGINE_BCS8) | + BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | + BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), + + XE_HP_FEATURES, + .dma_mask_size = 52, + .max_remote_tiles = 1, + .va_bits = 57, + .vm_max_level = 4, + .vram_flags = XE_VRAM_FLAGS_NEED64K, + + .has_asid = 1, + .has_flat_ccs = 0, + .has_usm = 1, +}; + +static const struct xe_graphics_desc graphics_xelpg = { + .name = "Xe_LPG", + .hw_engine_mask = + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | + BIT(XE_HW_ENGINE_CCS0), + + XE_HP_FEATURES, + .has_flat_ccs = 0, +}; + +#define XE2_GFX_FEATURES \ + .dma_mask_size = 46, \ + .has_asid = 1, \ + .has_flat_ccs = 1, \ + .has_range_tlb_invalidation = 1, \ + .has_usm = 0 /* FIXME: implementation missing */, \ + .va_bits = 48, \ + .vm_max_level = 4, \ + .hw_engine_mask = \ + BIT(XE_HW_ENGINE_RCS0) | \ + BIT(XE_HW_ENGINE_BCS8) | BIT(XE_HW_ENGINE_BCS0) | \ + GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0) + +static const struct xe_graphics_desc graphics_xe2 = { + .name = "Xe2_LPG", + + XE2_GFX_FEATURES, +}; + +static const struct xe_media_desc media_xem = { + .name = "Xe_M", + .ver = 12, + .rel = 0, + + .hw_engine_mask = + BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | + BIT(XE_HW_ENGINE_VECS0), +}; + +static const struct xe_media_desc media_xehpm = { + .name = "Xe_HPM", + .ver = 12, + .rel = 55, + + .hw_engine_mask = + BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | + BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VECS1), +}; + +static const struct xe_media_desc media_xelpmp = { + .name = "Xe_LPM+", + .hw_engine_mask = + BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | + BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_GSCCS0) +}; + +static const struct xe_media_desc media_xe2 = { + .name = "Xe2_LPM", + .hw_engine_mask = + BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0), /* TODO: GSC0 */ +}; + +static const struct xe_device_desc tgl_desc = { + .graphics = &graphics_xelp, + .media = &media_xem, + PLATFORM(XE_TIGERLAKE), + .has_display = true, + .has_llc = true, + .require_force_probe = true, +}; + +static const struct xe_device_desc rkl_desc = { + .graphics = &graphics_xelp, + .media = &media_xem, + PLATFORM(XE_ROCKETLAKE), + .has_display = true, + .has_llc = true, + .require_force_probe = true, +}; + +static const u16 adls_rpls_ids[] = { XE_RPLS_IDS(NOP), 0 }; + +static const struct xe_device_desc adl_s_desc = { + .graphics = &graphics_xelp, + .media = &media_xem, + PLATFORM(XE_ALDERLAKE_S), + .has_display = true, + .has_llc = true, + .require_force_probe = true, + .subplatforms = (const struct xe_subplatform_desc[]) { + { XE_SUBPLATFORM_ALDERLAKE_S_RPLS, "RPLS", adls_rpls_ids }, + {}, + }, +}; + +static const u16 adlp_rplu_ids[] = { XE_RPLU_IDS(NOP), 0 }; + +static const struct xe_device_desc adl_p_desc = { + .graphics = &graphics_xelp, + .media = &media_xem, + PLATFORM(XE_ALDERLAKE_P), + .has_display = true, + .has_llc = true, + .require_force_probe = true, + .subplatforms = (const struct xe_subplatform_desc[]) { + { XE_SUBPLATFORM_ALDERLAKE_P_RPLU, "RPLU", adlp_rplu_ids }, + {}, + }, +}; + +static const struct xe_device_desc adl_n_desc = { + .graphics = &graphics_xelp, + .media = &media_xem, + PLATFORM(XE_ALDERLAKE_N), + .has_display = true, + .has_llc = true, + .require_force_probe = true, +}; + +#define DGFX_FEATURES \ + .is_dgfx = 1 + +static const struct xe_device_desc dg1_desc = { + .graphics = &graphics_xelpp, + .media = &media_xem, + DGFX_FEATURES, + PLATFORM(XE_DG1), + .has_display = true, + .has_heci_gscfi = 1, + .require_force_probe = true, +}; + +static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 }; +static const u16 dg2_g11_ids[] = { XE_DG2_G11_IDS(NOP), XE_ATS_M75_IDS(NOP), 0 }; +static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 }; + +#define DG2_FEATURES \ + DGFX_FEATURES, \ + PLATFORM(XE_DG2), \ + .has_heci_gscfi = 1, \ + .subplatforms = (const struct xe_subplatform_desc[]) { \ + { XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \ + { XE_SUBPLATFORM_DG2_G11, "G11", dg2_g11_ids }, \ + { XE_SUBPLATFORM_DG2_G12, "G12", dg2_g12_ids }, \ + { } \ + } + +static const struct xe_device_desc ats_m_desc = { + .graphics = &graphics_xehpg, + .media = &media_xehpm, + .require_force_probe = true, + + DG2_FEATURES, + .has_display = false, +}; + +static const struct xe_device_desc dg2_desc = { + .graphics = &graphics_xehpg, + .media = &media_xehpm, + .require_force_probe = true, + + DG2_FEATURES, + .has_display = true, +}; + +static const __maybe_unused struct xe_device_desc pvc_desc = { + .graphics = &graphics_xehpc, + DGFX_FEATURES, + PLATFORM(XE_PVC), + .has_display = false, + .has_heci_gscfi = 1, + .require_force_probe = true, +}; + +static const struct xe_device_desc mtl_desc = { + /* .graphics and .media determined via GMD_ID */ + .require_force_probe = true, + PLATFORM(XE_METEORLAKE), + .has_display = true, +}; + +static const struct xe_device_desc lnl_desc = { + PLATFORM(XE_LUNARLAKE), + .require_force_probe = true, +}; + +#undef PLATFORM +__diag_pop(); + +/* Map of GMD_ID values to graphics IP */ +static struct gmdid_map graphics_ip_map[] = { + { 1270, &graphics_xelpg }, + { 1271, &graphics_xelpg }, + { 2004, &graphics_xe2 }, +}; + +/* Map of GMD_ID values to media IP */ +static struct gmdid_map media_ip_map[] = { + { 1300, &media_xelpmp }, + { 2000, &media_xe2 }, +}; + +#define INTEL_VGA_DEVICE(id, info) { \ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, id), \ + PCI_BASE_CLASS_DISPLAY << 16, 0xff << 16, \ + (unsigned long) info } + +/* + * Make sure any device matches here are from most specific to most + * general. For example, since the Quanta match is based on the subsystem + * and subvendor IDs, we need it to come before the more general IVB + * PCI ID matches, otherwise we'll use the wrong info struct above. + */ +static const struct pci_device_id pciidlist[] = { + XE_TGL_IDS(INTEL_VGA_DEVICE, &tgl_desc), + XE_RKL_IDS(INTEL_VGA_DEVICE, &rkl_desc), + XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc), + XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc), + XE_ADLN_IDS(INTEL_VGA_DEVICE, &adl_n_desc), + XE_RPLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc), + XE_RPLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc), + XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc), + XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc), + XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc), + XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc), + XE_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc), + { } +}; +MODULE_DEVICE_TABLE(pci, pciidlist); + +#undef INTEL_VGA_DEVICE + +/* is device_id present in comma separated list of ids */ +static bool device_id_in_list(u16 device_id, const char *devices, bool negative) +{ + char *s, *p, *tok; + bool ret; + + if (!devices || !*devices) + return false; + + /* match everything */ + if (negative && strcmp(devices, "!*") == 0) + return true; + if (!negative && strcmp(devices, "*") == 0) + return true; + + s = kstrdup(devices, GFP_KERNEL); + if (!s) + return false; + + for (p = s, ret = false; (tok = strsep(&p, ",")) != NULL; ) { + u16 val; + + if (negative && tok[0] == '!') + tok++; + else if ((negative && tok[0] != '!') || + (!negative && tok[0] == '!')) + continue; + + if (kstrtou16(tok, 16, &val) == 0 && val == device_id) { + ret = true; + break; + } + } + + kfree(s); + + return ret; +} + +static bool id_forced(u16 device_id) +{ + return device_id_in_list(device_id, xe_modparam.force_probe, false); +} + +static bool id_blocked(u16 device_id) +{ + return device_id_in_list(device_id, xe_modparam.force_probe, true); +} + +static const struct xe_subplatform_desc * +find_subplatform(const struct xe_device *xe, const struct xe_device_desc *desc) +{ + const struct xe_subplatform_desc *sp; + const u16 *id; + + for (sp = desc->subplatforms; sp && sp->subplatform; sp++) + for (id = sp->pciidlist; *id; id++) + if (*id == xe->info.devid) + return sp; + + return NULL; +} + +enum xe_gmdid_type { + GMDID_GRAPHICS, + GMDID_MEDIA +}; + +static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u32 *revid) +{ + struct xe_gt *gt = xe_root_mmio_gt(xe); + struct xe_reg gmdid_reg = GMD_ID; + u32 val; + + KUNIT_STATIC_STUB_REDIRECT(read_gmdid, xe, type, ver, revid); + + if (type == GMDID_MEDIA) + gmdid_reg.addr += MEDIA_GT_GSI_OFFSET; + + val = xe_mmio_read32(gt, gmdid_reg); + *ver = REG_FIELD_GET(GMD_ID_ARCH_MASK, val) * 100 + REG_FIELD_GET(GMD_ID_RELEASE_MASK, val); + *revid = REG_FIELD_GET(GMD_ID_REVID, val); +} + +/* + * Pre-GMD_ID platform: device descriptor already points to the appropriate + * graphics descriptor. Simply forward the description and calculate the version + * appropriately. "graphics" should be present in all such platforms, while + * media is optional. + */ +static void handle_pre_gmdid(struct xe_device *xe, + const struct xe_graphics_desc *graphics, + const struct xe_media_desc *media) +{ + xe->info.graphics_verx100 = graphics->ver * 100 + graphics->rel; + + if (media) + xe->info.media_verx100 = media->ver * 100 + media->rel; + +} + +/* + * GMD_ID platform: read IP version from hardware and select graphics descriptor + * based on the result. + */ +static void handle_gmdid(struct xe_device *xe, + const struct xe_graphics_desc **graphics, + const struct xe_media_desc **media, + u32 *graphics_revid, + u32 *media_revid) +{ + u32 ver; + + read_gmdid(xe, GMDID_GRAPHICS, &ver, graphics_revid); + + for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) { + if (ver == graphics_ip_map[i].ver) { + xe->info.graphics_verx100 = ver; + *graphics = graphics_ip_map[i].ip; + + break; + } + } + + if (!xe->info.graphics_verx100) { + drm_err(&xe->drm, "Hardware reports unknown graphics version %u.%02u\n", + ver / 100, ver % 100); + } + + read_gmdid(xe, GMDID_MEDIA, &ver, media_revid); + + /* Media may legitimately be fused off / not present */ + if (ver == 0) + return; + + for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) { + if (ver == media_ip_map[i].ver) { + xe->info.media_verx100 = ver; + *media = media_ip_map[i].ip; + + break; + } + } + + if (!xe->info.media_verx100) { + drm_err(&xe->drm, "Hardware reports unknown media version %u.%02u\n", + ver / 100, ver % 100); + } +} + +/* + * Initialize device info content that only depends on static driver_data + * passed to the driver at probe time from PCI ID table. + */ +static int xe_info_init_early(struct xe_device *xe, + const struct xe_device_desc *desc, + const struct xe_subplatform_desc *subplatform_desc) +{ + int err; + + xe->info.platform = desc->platform; + xe->info.subplatform = subplatform_desc ? + subplatform_desc->subplatform : XE_SUBPLATFORM_NONE; + + xe->info.is_dgfx = desc->is_dgfx; + xe->info.has_heci_gscfi = desc->has_heci_gscfi; + xe->info.has_llc = desc->has_llc; + xe->info.has_mmio_ext = desc->has_mmio_ext; + xe->info.has_sriov = desc->has_sriov; + xe->info.skip_guc_pc = desc->skip_guc_pc; + xe->info.skip_mtcfg = desc->skip_mtcfg; + xe->info.skip_pcode = desc->skip_pcode; + + xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && + xe_modparam.enable_display && + desc->has_display; + + err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0); + if (err) + return err; + + return 0; +} + +/* + * Initialize device info content that does require knowledge about + * graphics / media IP version. + * Make sure that GT / tile structures allocated by the driver match the data + * present in device info. + */ +static int xe_info_init(struct xe_device *xe, + const struct xe_graphics_desc *graphics_desc, + const struct xe_media_desc *media_desc) +{ + u32 graphics_gmdid_revid = 0, media_gmdid_revid = 0; + struct xe_tile *tile; + struct xe_gt *gt; + u8 id; + + /* + * If this platform supports GMD_ID, we'll detect the proper IP + * descriptor to use from hardware registers. desc->graphics will only + * ever be set at this point for platforms before GMD_ID. In that case + * the IP descriptions and versions are simply derived from that. + */ + if (graphics_desc) { + handle_pre_gmdid(xe, graphics_desc, media_desc); + xe->info.step = xe_step_pre_gmdid_get(xe); + } else { + xe_assert(xe, !media_desc); + handle_gmdid(xe, &graphics_desc, &media_desc, + &graphics_gmdid_revid, &media_gmdid_revid); + xe->info.step = xe_step_gmdid_get(xe, + graphics_gmdid_revid, + media_gmdid_revid); + } + + /* + * If we couldn't detect the graphics IP, that's considered a fatal + * error and we should abort driver load. Failing to detect media + * IP is non-fatal; we'll just proceed without enabling media support. + */ + if (!graphics_desc) + return -ENODEV; + + xe->info.graphics_name = graphics_desc->name; + xe->info.media_name = media_desc ? media_desc->name : "none"; + xe->info.tile_mmio_ext_size = graphics_desc->tile_mmio_ext_size; + + xe->info.dma_mask_size = graphics_desc->dma_mask_size; + xe->info.vram_flags = graphics_desc->vram_flags; + xe->info.va_bits = graphics_desc->va_bits; + xe->info.vm_max_level = graphics_desc->vm_max_level; + xe->info.has_asid = graphics_desc->has_asid; + xe->info.has_flat_ccs = graphics_desc->has_flat_ccs; + xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; + xe->info.has_usm = graphics_desc->has_usm; + + /* + * All platforms have at least one primary GT. Any platform with media + * version 13 or higher has an additional dedicated media GT. And + * depending on the graphics IP there may be additional "remote tiles." + * All of these together determine the overall GT count. + * + * FIXME: 'tile_count' here is misnamed since the rest of the driver + * treats it as the number of GTs rather than just the number of tiles. + */ + xe->info.tile_count = 1 + graphics_desc->max_remote_tiles; + + for_each_remote_tile(tile, xe, id) { + int err; + + err = xe_tile_init_early(tile, xe, id); + if (err) + return err; + } + + for_each_tile(tile, xe, id) { + gt = tile->primary_gt; + gt->info.id = xe->info.gt_count++; + gt->info.type = XE_GT_TYPE_MAIN; + gt->info.__engine_mask = graphics_desc->hw_engine_mask; + if (MEDIA_VER(xe) < 13 && media_desc) + gt->info.__engine_mask |= media_desc->hw_engine_mask; + + if (MEDIA_VER(xe) < 13 || !media_desc) + continue; + + /* + * Allocate and setup media GT for platforms with standalone + * media. + */ + tile->media_gt = xe_gt_alloc(tile); + if (IS_ERR(tile->media_gt)) + return PTR_ERR(tile->media_gt); + + gt = tile->media_gt; + gt->info.type = XE_GT_TYPE_MEDIA; + gt->info.__engine_mask = media_desc->hw_engine_mask; + gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET; + gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH; + + /* + * FIXME: At the moment multi-tile and standalone media are + * mutually exclusive on current platforms. We'll need to + * come up with a better way to number GTs if we ever wind + * up with platforms that support both together. + */ + drm_WARN_ON(&xe->drm, id != 0); + gt->info.id = xe->info.gt_count++; + } + + return 0; +} + +static void xe_pci_remove(struct pci_dev *pdev) +{ + struct xe_device *xe; + + xe = pci_get_drvdata(pdev); + if (!xe) /* driver load aborted, nothing to cleanup */ + return; + + xe_device_remove(xe); + xe_pm_runtime_fini(xe); + pci_set_drvdata(pdev, NULL); +} + +static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + const struct xe_device_desc *desc = (const void *)ent->driver_data; + const struct xe_subplatform_desc *subplatform_desc; + struct xe_device *xe; + int err; + + if (desc->require_force_probe && !id_forced(pdev->device)) { + dev_info(&pdev->dev, + "Your graphics device %04x is not officially supported\n" + "by xe driver in this kernel version. To force Xe probe,\n" + "use xe.force_probe='%04x' and i915.force_probe='!%04x'\n" + "module parameters or CONFIG_DRM_XE_FORCE_PROBE='%04x' and\n" + "CONFIG_DRM_I915_FORCE_PROBE='!%04x' configuration options.\n", + pdev->device, pdev->device, pdev->device, + pdev->device, pdev->device); + return -ENODEV; + } + + if (id_blocked(pdev->device)) { + dev_info(&pdev->dev, "Probe blocked for device [%04x:%04x].\n", + pdev->vendor, pdev->device); + return -ENODEV; + } + + if (xe_display_driver_probe_defer(pdev)) + return -EPROBE_DEFER; + + err = pcim_enable_device(pdev); + if (err) + return err; + + xe = xe_device_create(pdev, ent); + if (IS_ERR(xe)) + return PTR_ERR(xe); + + pci_set_drvdata(pdev, xe); + + xe_pm_assert_unbounded_bridge(xe); + subplatform_desc = find_subplatform(xe, desc); + + pci_set_master(pdev); + + err = xe_info_init_early(xe, desc, subplatform_desc); + if (err) + return err; + + xe_sriov_probe_early(xe, desc->has_sriov); + + err = xe_device_probe_early(xe); + if (err) + return err; + + err = xe_info_init(xe, desc->graphics, desc->media); + if (err) + return err; + + xe_display_probe(xe); + + drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) display:%s dma_m_s:%d tc:%d gscfi:%d", + desc->platform_name, + subplatform_desc ? subplatform_desc->name : "", + xe->info.devid, xe->info.revid, + xe->info.is_dgfx, + xe->info.graphics_name, + xe->info.graphics_verx100 / 100, + xe->info.graphics_verx100 % 100, + xe->info.media_name, + xe->info.media_verx100 / 100, + xe->info.media_verx100 % 100, + str_yes_no(xe->info.enable_display), + xe->info.dma_mask_size, xe->info.tile_count, + xe->info.has_heci_gscfi); + + drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, D:%s, B:%s)\n", + xe_step_name(xe->info.step.graphics), + xe_step_name(xe->info.step.media), + xe_step_name(xe->info.step.display), + xe_step_name(xe->info.step.basedie)); + + drm_dbg(&xe->drm, "SR-IOV support: %s (mode: %s)\n", + str_yes_no(xe_device_has_sriov(xe)), + xe_sriov_mode_to_string(xe_device_sriov_mode(xe))); + + err = xe_device_probe(xe); + if (err) + return err; + + xe_pm_init(xe); + + drm_dbg(&xe->drm, "d3cold: capable=%s\n", + str_yes_no(xe->d3cold.capable)); + + return 0; +} + +static void xe_pci_shutdown(struct pci_dev *pdev) +{ + xe_device_shutdown(pdev_to_xe_device(pdev)); +} + +#ifdef CONFIG_PM_SLEEP +static void d3cold_toggle(struct pci_dev *pdev, enum toggle_d3cold toggle) +{ + struct xe_device *xe = pdev_to_xe_device(pdev); + struct pci_dev *root_pdev; + + if (!xe->d3cold.capable) + return; + + root_pdev = pcie_find_root_port(pdev); + if (!root_pdev) + return; + + switch (toggle) { + case D3COLD_DISABLE: + pci_d3cold_disable(root_pdev); + break; + case D3COLD_ENABLE: + pci_d3cold_enable(root_pdev); + break; + } +} + +static int xe_pci_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int err; + + err = xe_pm_suspend(pdev_to_xe_device(pdev)); + if (err) + return err; + + /* + * Enabling D3Cold is needed for S2Idle/S0ix. + * It is save to allow here since xe_pm_suspend has evicted + * the local memory and the direct complete optimization is disabled. + */ + d3cold_toggle(pdev, D3COLD_ENABLE); + + pci_save_state(pdev); + pci_disable_device(pdev); + + return 0; +} + +static int xe_pci_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int err; + + /* Give back the D3Cold decision to the runtime P M*/ + d3cold_toggle(pdev, D3COLD_DISABLE); + + err = pci_set_power_state(pdev, PCI_D0); + if (err) + return err; + + err = pci_enable_device(pdev); + if (err) + return err; + + pci_set_master(pdev); + + err = xe_pm_resume(pdev_to_xe_device(pdev)); + if (err) + return err; + + return 0; +} + +static int xe_pci_runtime_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); + int err; + + err = xe_pm_runtime_suspend(xe); + if (err) + return err; + + pci_save_state(pdev); + + if (xe->d3cold.allowed) { + d3cold_toggle(pdev, D3COLD_ENABLE); + pci_disable_device(pdev); + pci_ignore_hotplug(pdev); + pci_set_power_state(pdev, PCI_D3cold); + } else { + d3cold_toggle(pdev, D3COLD_DISABLE); + pci_set_power_state(pdev, PCI_D3hot); + } + + return 0; +} + +static int xe_pci_runtime_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); + int err; + + err = pci_set_power_state(pdev, PCI_D0); + if (err) + return err; + + pci_restore_state(pdev); + + if (xe->d3cold.allowed) { + err = pci_enable_device(pdev); + if (err) + return err; + + pci_set_master(pdev); + } + + return xe_pm_runtime_resume(xe); +} + +static int xe_pci_runtime_idle(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); + + xe_pm_d3cold_allowed_toggle(xe); + + return 0; +} + +static const struct dev_pm_ops xe_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(xe_pci_suspend, xe_pci_resume) + SET_RUNTIME_PM_OPS(xe_pci_runtime_suspend, xe_pci_runtime_resume, xe_pci_runtime_idle) +}; +#endif + +static struct pci_driver xe_pci_driver = { + .name = DRIVER_NAME, + .id_table = pciidlist, + .probe = xe_pci_probe, + .remove = xe_pci_remove, + .shutdown = xe_pci_shutdown, +#ifdef CONFIG_PM_SLEEP + .driver.pm = &xe_pm_ops, +#endif +}; + +int xe_register_pci_driver(void) +{ + return pci_register_driver(&xe_pci_driver); +} + +void xe_unregister_pci_driver(void) +{ + pci_unregister_driver(&xe_pci_driver); +} + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_pci.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h new file mode 100644 index 000000000000..611c1209b14c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pci.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_PCI_H_ +#define _XE_PCI_H_ + +int xe_register_pci_driver(void); +void xe_unregister_pci_driver(void); + +#endif diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h new file mode 100644 index 000000000000..b1ad12fa22d6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_PCI_TYPES_H_ +#define _XE_PCI_TYPES_H_ + +#include <linux/types.h> + +struct xe_graphics_desc { + const char *name; + u8 ver; + u8 rel; + + u8 dma_mask_size; /* available DMA address bits */ + u8 va_bits; + u8 vm_max_level; + u8 vram_flags; + + u64 hw_engine_mask; /* hardware engines provided by graphics IP */ + + u32 tile_mmio_ext_size; /* size of MMIO extension space, per-tile */ + + u8 max_remote_tiles:2; + + u8 has_asid:1; + u8 has_flat_ccs:1; + u8 has_range_tlb_invalidation:1; + u8 has_usm:1; +}; + +struct xe_media_desc { + const char *name; + u8 ver; + u8 rel; + + u64 hw_engine_mask; /* hardware engines provided by media IP */ +}; + +struct gmdid_map { + unsigned int ver; + const void *ip; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c new file mode 100644 index 000000000000..b324dc2a5deb --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_pcode.h" + +#include <linux/delay.h> +#include <linux/errno.h> + +#include <drm/drm_managed.h> + +#include "xe_gt.h" +#include "xe_mmio.h" +#include "xe_pcode_api.h" + +/** + * DOC: PCODE + * + * Xe PCODE is the component responsible for interfacing with the PCODE + * firmware. + * It shall provide a very simple ABI to other Xe components, but be the + * single and consolidated place that will communicate with PCODE. All read + * and write operations to PCODE will be internal and private to this component. + * + * What's next: + * - PCODE hw metrics + * - PCODE for display operations + */ + +static int pcode_mailbox_status(struct xe_gt *gt) +{ + u32 err; + static const struct pcode_err_decode err_decode[] = { + [PCODE_ILLEGAL_CMD] = {-ENXIO, "Illegal Command"}, + [PCODE_TIMEOUT] = {-ETIMEDOUT, "Timed out"}, + [PCODE_ILLEGAL_DATA] = {-EINVAL, "Illegal Data"}, + [PCODE_ILLEGAL_SUBCOMMAND] = {-ENXIO, "Illegal Subcommand"}, + [PCODE_LOCKED] = {-EBUSY, "PCODE Locked"}, + [PCODE_GT_RATIO_OUT_OF_RANGE] = {-EOVERFLOW, + "GT ratio out of range"}, + [PCODE_REJECTED] = {-EACCES, "PCODE Rejected"}, + [PCODE_ERROR_MASK] = {-EPROTO, "Unknown"}, + }; + + lockdep_assert_held(>->pcode.lock); + + err = xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_ERROR_MASK; + if (err) { + drm_err(>_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err, + err_decode[err].str ?: "Unknown"); + return err_decode[err].errno ?: -EPROTO; + } + + return 0; +} + +static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, + unsigned int timeout_ms, bool return_data, + bool atomic) +{ + int err; + + if (gt_to_xe(gt)->info.skip_pcode) + return 0; + + lockdep_assert_held(>->pcode.lock); + + if ((xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_READY) != 0) + return -EAGAIN; + + xe_mmio_write32(gt, PCODE_DATA0, *data0); + xe_mmio_write32(gt, PCODE_DATA1, data1 ? *data1 : 0); + xe_mmio_write32(gt, PCODE_MAILBOX, PCODE_READY | mbox); + + err = xe_mmio_wait32(gt, PCODE_MAILBOX, PCODE_READY, 0, + timeout_ms * 1000, NULL, atomic); + if (err) + return err; + + if (return_data) { + *data0 = xe_mmio_read32(gt, PCODE_DATA0); + if (data1) + *data1 = xe_mmio_read32(gt, PCODE_DATA1); + } + + return pcode_mailbox_status(gt); +} + +int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 data, int timeout) +{ + int err; + + mutex_lock(>->pcode.lock); + err = pcode_mailbox_rw(gt, mbox, &data, NULL, timeout, false, false); + mutex_unlock(>->pcode.lock); + + return err; +} + +int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1) +{ + int err; + + mutex_lock(>->pcode.lock); + err = pcode_mailbox_rw(gt, mbox, val, val1, 1, true, false); + mutex_unlock(>->pcode.lock); + + return err; +} + +static int xe_pcode_try_request(struct xe_gt *gt, u32 mbox, + u32 request, u32 reply_mask, u32 reply, + u32 *status, bool atomic, int timeout_us) +{ + int slept, wait = 10; + + for (slept = 0; slept < timeout_us; slept += wait) { + *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, + atomic); + if ((*status == 0) && ((request & reply_mask) == reply)) + return 0; + + if (atomic) + udelay(wait); + else + usleep_range(wait, wait << 1); + wait <<= 1; + } + + return -ETIMEDOUT; +} + +/** + * xe_pcode_request - send PCODE request until acknowledgment + * @gt: gt + * @mbox: PCODE mailbox ID the request is targeted for + * @request: request ID + * @reply_mask: mask used to check for request acknowledgment + * @reply: value used to check for request acknowledgment + * @timeout_base_ms: timeout for polling with preemption enabled + * + * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE + * reports an error or an overall timeout of @timeout_base_ms+50 ms expires. + * The request is acknowledged once the PCODE reply dword equals @reply after + * applying @reply_mask. Polling is first attempted with preemption enabled + * for @timeout_base_ms and if this times out for another 50 ms with + * preemption disabled. + * + * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some + * other error as reported by PCODE. + */ +int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms) +{ + u32 status; + int ret; + + mutex_lock(>->pcode.lock); + + ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status, + false, timeout_base_ms * 1000); + if (!ret) + goto out; + + /* + * The above can time out if the number of requests was low (2 in the + * worst case) _and_ PCODE was busy for some reason even after a + * (queued) request and @timeout_base_ms delay. As a workaround retry + * the poll with preemption disabled to maximize the number of + * requests. Increase the timeout from @timeout_base_ms to 50ms to + * account for interrupts that could reduce the number of these + * requests, and for any quirks of the PCODE firmware that delays + * the request completion. + */ + drm_err(>_to_xe(gt)->drm, + "PCODE timeout, retrying with preemption disabled\n"); + drm_WARN_ON_ONCE(>_to_xe(gt)->drm, timeout_base_ms > 1); + preempt_disable(); + ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status, + true, timeout_base_ms * 1000); + preempt_enable(); + +out: + mutex_unlock(>->pcode.lock); + return status ? status : ret; +} +/** + * xe_pcode_init_min_freq_table - Initialize PCODE's QOS frequency table + * @gt: gt instance + * @min_gt_freq: Minimal (RPn) GT frequency in units of 50MHz. + * @max_gt_freq: Maximal (RP0) GT frequency in units of 50MHz. + * + * This function initialize PCODE's QOS frequency table for a proper minimal + * frequency/power steering decision, depending on the current requested GT + * frequency. For older platforms this was a more complete table including + * the IA freq. However for the latest platforms this table become a simple + * 1-1 Ring vs GT frequency. Even though, without setting it, PCODE might + * not take the right decisions for some memory frequencies and affect latency. + * + * It returns 0 on success, and -ERROR number on failure, -EINVAL if max + * frequency is higher then the minimal, and other errors directly translated + * from the PCODE Error returs: + * - -ENXIO: "Illegal Command" + * - -ETIMEDOUT: "Timed out" + * - -EINVAL: "Illegal Data" + * - -ENXIO, "Illegal Subcommand" + * - -EBUSY: "PCODE Locked" + * - -EOVERFLOW, "GT ratio out of range" + * - -EACCES, "PCODE Rejected" + * - -EPROTO, "Unknown" + */ +int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq, + u32 max_gt_freq) +{ + int ret; + u32 freq; + + if (!gt_to_xe(gt)->info.has_llc) + return 0; + + if (max_gt_freq <= min_gt_freq) + return -EINVAL; + + mutex_lock(>->pcode.lock); + for (freq = min_gt_freq; freq <= max_gt_freq; freq++) { + u32 data = freq << PCODE_FREQ_RING_RATIO_SHIFT | freq; + + ret = pcode_mailbox_rw(gt, PCODE_WRITE_MIN_FREQ_TABLE, + &data, NULL, 1, false, false); + if (ret) + goto unlock; + } + +unlock: + mutex_unlock(>->pcode.lock); + return ret; +} + +/** + * xe_pcode_init - Ensure PCODE is initialized + * @gt: gt instance + * + * This function ensures that PCODE is properly initialized. To be called during + * probe and resume paths. + * + * It returns 0 on success, and -error number on failure. + */ +int xe_pcode_init(struct xe_gt *gt) +{ + u32 status, request = DGFX_GET_INIT_STATUS; + int timeout_us = 180000000; /* 3 min */ + int ret; + + if (gt_to_xe(gt)->info.skip_pcode) + return 0; + + if (!IS_DGFX(gt_to_xe(gt))) + return 0; + + mutex_lock(>->pcode.lock); + ret = xe_pcode_try_request(gt, DGFX_PCODE_STATUS, request, + DGFX_INIT_STATUS_COMPLETE, + DGFX_INIT_STATUS_COMPLETE, + &status, false, timeout_us); + mutex_unlock(>->pcode.lock); + + if (ret) + drm_err(>_to_xe(gt)->drm, + "PCODE initialization timedout after: 3 min\n"); + + return ret; +} + +/** + * xe_pcode_probe - Prepare xe_pcode and also ensure PCODE is initialized. + * @gt: gt instance + * + * This function initializes the xe_pcode component, and when needed, it ensures + * that PCODE has properly performed its initialization and it is really ready + * to go. To be called once only during probe. + * + * It returns 0 on success, and -error number on failure. + */ +int xe_pcode_probe(struct xe_gt *gt) +{ + drmm_mutex_init(>_to_xe(gt)->drm, >->pcode.lock); + + if (gt_to_xe(gt)->info.skip_pcode) + return 0; + + if (!IS_DGFX(gt_to_xe(gt))) + return 0; + + return xe_pcode_init(gt); +} diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h new file mode 100644 index 000000000000..08cb1d047cba --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pcode.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_PCODE_H_ +#define _XE_PCODE_H_ + +#include <linux/types.h> +struct xe_gt; + +int xe_pcode_probe(struct xe_gt *gt); +int xe_pcode_init(struct xe_gt *gt); +int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq, + u32 max_gt_freq); +int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1); +int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 val, + int timeout_ms); +#define xe_pcode_write(gt, mbox, val) \ + xe_pcode_write_timeout(gt, mbox, val, 1) + +int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_ms); + +#define PCODE_MBOX(mbcmd, param1, param2)\ + (FIELD_PREP(PCODE_MB_COMMAND, mbcmd)\ + | FIELD_PREP(PCODE_MB_PARAM1, param1)\ + | FIELD_PREP(PCODE_MB_PARAM2, param2)) + +#endif diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h new file mode 100644 index 000000000000..5935cfe30204 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +/* Internal to xe_pcode */ + +#include "regs/xe_reg_defs.h" + +#define PCODE_MAILBOX XE_REG(0x138124) +#define PCODE_READY REG_BIT(31) +#define PCODE_MB_PARAM2 REG_GENMASK(23, 16) +#define PCODE_MB_PARAM1 REG_GENMASK(15, 8) +#define PCODE_MB_COMMAND REG_GENMASK(7, 0) +#define PCODE_ERROR_MASK 0xFF +#define PCODE_SUCCESS 0x0 +#define PCODE_ILLEGAL_CMD 0x1 +#define PCODE_TIMEOUT 0x2 +#define PCODE_ILLEGAL_DATA 0x3 +#define PCODE_ILLEGAL_SUBCOMMAND 0x4 +#define PCODE_LOCKED 0x6 +#define PCODE_GT_RATIO_OUT_OF_RANGE 0x10 +#define PCODE_REJECTED 0x11 + +#define PCODE_DATA0 XE_REG(0x138128) +#define PCODE_DATA1 XE_REG(0x13812C) + +/* Min Freq QOS Table */ +#define PCODE_WRITE_MIN_FREQ_TABLE 0x8 +#define PCODE_READ_MIN_FREQ_TABLE 0x9 +#define PCODE_FREQ_RING_RATIO_SHIFT 16 + +/* PCODE Init */ +#define DGFX_PCODE_STATUS 0x7E +#define DGFX_GET_INIT_STATUS 0x0 +#define DGFX_INIT_STATUS_COMPLETE 0x1 + +#define PCODE_POWER_SETUP 0x7C +#define POWER_SETUP_SUBCOMMAND_READ_I1 0x4 +#define POWER_SETUP_SUBCOMMAND_WRITE_I1 0x5 +#define POWER_SETUP_I1_WATTS REG_BIT(31) +#define POWER_SETUP_I1_SHIFT 6 /* 10.6 fixed point format */ +#define POWER_SETUP_I1_DATA_MASK REG_GENMASK(15, 0) + +struct pcode_err_decode { + int errno; + const char *str; +}; + diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h new file mode 100644 index 000000000000..553f53dbd093 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_platform_types.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_PLATFORM_INFO_TYPES_H_ +#define _XE_PLATFORM_INFO_TYPES_H_ + +/* + * Keep this in graphics version based order and chronological order within a + * version + */ +enum xe_platform { + XE_PLATFORM_UNINITIALIZED = 0, + XE_TIGERLAKE, + XE_ROCKETLAKE, + XE_ALDERLAKE_S, + XE_ALDERLAKE_P, + XE_ALDERLAKE_N, + XE_DG1, + XE_DG2, + XE_PVC, + XE_METEORLAKE, + XE_LUNARLAKE, +}; + +enum xe_subplatform { + XE_SUBPLATFORM_UNINITIALIZED = 0, + XE_SUBPLATFORM_NONE, + XE_SUBPLATFORM_ALDERLAKE_P_RPLU, + XE_SUBPLATFORM_ALDERLAKE_S_RPLS, + XE_SUBPLATFORM_DG2_G10, + XE_SUBPLATFORM_DG2_G11, + XE_SUBPLATFORM_DG2_G12, +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c new file mode 100644 index 000000000000..b429c2876a76 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -0,0 +1,405 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_pm.h" + +#include <linux/pm_runtime.h> + +#include <drm/drm_managed.h> +#include <drm/ttm/ttm_placement.h> + +#include "xe_bo.h" +#include "xe_bo_evict.h" +#include "xe_device.h" +#include "xe_device_sysfs.h" +#include "xe_display.h" +#include "xe_ggtt.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_irq.h" +#include "xe_pcode.h" +#include "xe_wa.h" + +/** + * DOC: Xe Power Management + * + * Xe PM shall be guided by the simplicity. + * Use the simplest hook options whenever possible. + * Let's not reinvent the runtime_pm references and hooks. + * Shall have a clear separation of display and gt underneath this component. + * + * What's next: + * + * For now s2idle and s3 are only working in integrated devices. The next step + * is to iterate through all VRAM's BO backing them up into the system memory + * before allowing the system suspend. + * + * Also runtime_pm needs to be here from the beginning. + * + * RC6/RPS are also critical PM features. Let's start with GuCRC and GuC SLPC + * and no wait boost. Frequency optimizations should come on a next stage. + */ + +/** + * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle + * @xe: xe device instance + * + * Return: 0 on success + */ +int xe_pm_suspend(struct xe_device *xe) +{ + struct xe_gt *gt; + u8 id; + int err; + + for_each_gt(gt, xe, id) + xe_gt_suspend_prepare(gt); + + /* FIXME: Super racey... */ + err = xe_bo_evict_all(xe); + if (err) + return err; + + xe_display_pm_suspend(xe); + + for_each_gt(gt, xe, id) { + err = xe_gt_suspend(gt); + if (err) { + xe_display_pm_resume(xe); + return err; + } + } + + xe_irq_suspend(xe); + + xe_display_pm_suspend_late(xe); + + return 0; +} + +/** + * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0 + * @xe: xe device instance + * + * Return: 0 on success + */ +int xe_pm_resume(struct xe_device *xe) +{ + struct xe_tile *tile; + struct xe_gt *gt; + u8 id; + int err; + + for_each_tile(tile, xe, id) + xe_wa_apply_tile_workarounds(tile); + + for_each_gt(gt, xe, id) { + err = xe_pcode_init(gt); + if (err) + return err; + } + + xe_display_pm_resume_early(xe); + + /* + * This only restores pinned memory which is the memory required for the + * GT(s) to resume. + */ + err = xe_bo_restore_kernel(xe); + if (err) + return err; + + xe_irq_resume(xe); + + xe_display_pm_resume(xe); + + for_each_gt(gt, xe, id) + xe_gt_resume(gt); + + err = xe_bo_restore_user(xe); + if (err) + return err; + + return 0; +} + +static bool xe_pm_pci_d3cold_capable(struct pci_dev *pdev) +{ + struct pci_dev *root_pdev; + + root_pdev = pcie_find_root_port(pdev); + if (!root_pdev) + return false; + + /* D3Cold requires PME capability and _PR3 power resource */ + if (!pci_pme_capable(root_pdev, PCI_D3cold) || !pci_pr3_present(root_pdev)) + return false; + + return true; +} + +static void xe_pm_runtime_init(struct xe_device *xe) +{ + struct device *dev = xe->drm.dev; + + /* + * Disable the system suspend direct complete optimization. + * We need to ensure that the regular device suspend/resume functions + * are called since our runtime_pm cannot guarantee local memory + * eviction for d3cold. + * TODO: Check HDA audio dependencies claimed by i915, and then enforce + * this option to integrated graphics as well. + */ + if (IS_DGFX(xe)) + dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE); + + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, 1000); + pm_runtime_set_active(dev); + pm_runtime_allow(dev); + pm_runtime_mark_last_busy(dev); + pm_runtime_put(dev); +} + +void xe_pm_init(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + + /* For now suspend/resume is only allowed with GuC */ + if (!xe_device_uc_enabled(xe)) + return; + + drmm_mutex_init(&xe->drm, &xe->d3cold.lock); + + xe->d3cold.capable = xe_pm_pci_d3cold_capable(pdev); + + if (xe->d3cold.capable) { + xe_device_sysfs_init(xe); + xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD); + } + + xe_pm_runtime_init(xe); +} + +void xe_pm_runtime_fini(struct xe_device *xe) +{ + struct device *dev = xe->drm.dev; + + pm_runtime_get_sync(dev); + pm_runtime_forbid(dev); +} + +static void xe_pm_write_callback_task(struct xe_device *xe, + struct task_struct *task) +{ + WRITE_ONCE(xe->pm_callback_task, task); + + /* + * Just in case it's somehow possible for our writes to be reordered to + * the extent that something else re-uses the task written in + * pm_callback_task. For example after returning from the callback, but + * before the reordered write that resets pm_callback_task back to NULL. + */ + smp_mb(); /* pairs with xe_pm_read_callback_task */ +} + +struct task_struct *xe_pm_read_callback_task(struct xe_device *xe) +{ + smp_mb(); /* pairs with xe_pm_write_callback_task */ + + return READ_ONCE(xe->pm_callback_task); +} + +int xe_pm_runtime_suspend(struct xe_device *xe) +{ + struct xe_gt *gt; + u8 id; + int err = 0; + + if (xe->d3cold.allowed && xe_device_mem_access_ongoing(xe)) + return -EBUSY; + + /* Disable access_ongoing asserts and prevent recursive pm calls */ + xe_pm_write_callback_task(xe, current); + + /* + * The actual xe_device_mem_access_put() is always async underneath, so + * exactly where that is called should makes no difference to us. However + * we still need to be very careful with the locks that this callback + * acquires and the locks that are acquired and held by any callers of + * xe_device_mem_access_get(). We already have the matching annotation + * on that side, but we also need it here. For example lockdep should be + * able to tell us if the following scenario is in theory possible: + * + * CPU0 | CPU1 (kworker) + * lock(A) | + * | xe_pm_runtime_suspend() + * | lock(A) + * xe_device_mem_access_get() | + * + * This will clearly deadlock since rpm core needs to wait for + * xe_pm_runtime_suspend() to complete, but here we are holding lock(A) + * on CPU0 which prevents CPU1 making forward progress. With the + * annotation here and in xe_device_mem_access_get() lockdep will see + * the potential lock inversion and give us a nice splat. + */ + lock_map_acquire(&xe_device_mem_access_lockdep_map); + + if (xe->d3cold.allowed) { + err = xe_bo_evict_all(xe); + if (err) + goto out; + } + + for_each_gt(gt, xe, id) { + err = xe_gt_suspend(gt); + if (err) + goto out; + } + + xe_irq_suspend(xe); +out: + lock_map_release(&xe_device_mem_access_lockdep_map); + xe_pm_write_callback_task(xe, NULL); + return err; +} + +int xe_pm_runtime_resume(struct xe_device *xe) +{ + struct xe_gt *gt; + u8 id; + int err = 0; + + /* Disable access_ongoing asserts and prevent recursive pm calls */ + xe_pm_write_callback_task(xe, current); + + lock_map_acquire(&xe_device_mem_access_lockdep_map); + + /* + * It can be possible that xe has allowed d3cold but other pcie devices + * in gfx card soc would have blocked d3cold, therefore card has not + * really lost power. Detecting primary Gt power is sufficient. + */ + gt = xe_device_get_gt(xe, 0); + xe->d3cold.power_lost = xe_guc_in_reset(>->uc.guc); + + if (xe->d3cold.allowed && xe->d3cold.power_lost) { + for_each_gt(gt, xe, id) { + err = xe_pcode_init(gt); + if (err) + goto out; + } + + /* + * This only restores pinned memory which is the memory + * required for the GT(s) to resume. + */ + err = xe_bo_restore_kernel(xe); + if (err) + goto out; + } + + xe_irq_resume(xe); + + for_each_gt(gt, xe, id) + xe_gt_resume(gt); + + if (xe->d3cold.allowed && xe->d3cold.power_lost) { + err = xe_bo_restore_user(xe); + if (err) + goto out; + } +out: + lock_map_release(&xe_device_mem_access_lockdep_map); + xe_pm_write_callback_task(xe, NULL); + return err; +} + +int xe_pm_runtime_get(struct xe_device *xe) +{ + return pm_runtime_get_sync(xe->drm.dev); +} + +int xe_pm_runtime_put(struct xe_device *xe) +{ + pm_runtime_mark_last_busy(xe->drm.dev); + return pm_runtime_put(xe->drm.dev); +} + +int xe_pm_runtime_get_if_active(struct xe_device *xe) +{ + return pm_runtime_get_if_active(xe->drm.dev, true); +} + +void xe_pm_assert_unbounded_bridge(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct pci_dev *bridge = pci_upstream_bridge(pdev); + + if (!bridge) + return; + + if (!bridge->driver) { + drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n"); + device_set_pm_not_required(&pdev->dev); + } +} + +int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) +{ + struct ttm_resource_manager *man; + u32 vram_total_mb = 0; + int i; + + for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { + man = ttm_manager_type(&xe->ttm, i); + if (man) + vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024); + } + + drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb); + + if (threshold > vram_total_mb) + return -EINVAL; + + mutex_lock(&xe->d3cold.lock); + xe->d3cold.vram_threshold = threshold; + mutex_unlock(&xe->d3cold.lock); + + return 0; +} + +void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) +{ + struct ttm_resource_manager *man; + u32 total_vram_used_mb = 0; + u64 vram_used; + int i; + + if (!xe->d3cold.capable) { + xe->d3cold.allowed = false; + return; + } + + for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { + man = ttm_manager_type(&xe->ttm, i); + if (man) { + vram_used = ttm_resource_manager_usage(man); + total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024); + } + } + + mutex_lock(&xe->d3cold.lock); + + if (total_vram_used_mb < xe->d3cold.vram_threshold) + xe->d3cold.allowed = true; + else + xe->d3cold.allowed = false; + + mutex_unlock(&xe->d3cold.lock); + + drm_dbg(&xe->drm, + "d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed)); +} diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h new file mode 100644 index 000000000000..6b9031f7af24 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_PM_H_ +#define _XE_PM_H_ + +#include <linux/pm_runtime.h> + +/* + * TODO: Threshold = 0 will block D3Cold. + * Before we can move this to a higher value (like 300), we need to: + * 1. rewrite the VRAM save / restore to avoid buffer object locks + */ +#define DEFAULT_VRAM_THRESHOLD 0 /* in MB */ + +struct xe_device; + +int xe_pm_suspend(struct xe_device *xe); +int xe_pm_resume(struct xe_device *xe); + +void xe_pm_init(struct xe_device *xe); +void xe_pm_runtime_fini(struct xe_device *xe); +int xe_pm_runtime_suspend(struct xe_device *xe); +int xe_pm_runtime_resume(struct xe_device *xe); +int xe_pm_runtime_get(struct xe_device *xe); +int xe_pm_runtime_put(struct xe_device *xe); +int xe_pm_runtime_get_if_active(struct xe_device *xe); +void xe_pm_assert_unbounded_bridge(struct xe_device *xe); +int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold); +void xe_pm_d3cold_allowed_toggle(struct xe_device *xe); +struct task_struct *xe_pm_read_callback_task(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c new file mode 100644 index 000000000000..7bce2a332603 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_preempt_fence.h" + +#include <linux/slab.h> + +#include "xe_exec_queue.h" +#include "xe_vm.h" + +static void preempt_fence_work_func(struct work_struct *w) +{ + bool cookie = dma_fence_begin_signalling(); + struct xe_preempt_fence *pfence = + container_of(w, typeof(*pfence), preempt_work); + struct xe_exec_queue *q = pfence->q; + + if (pfence->error) + dma_fence_set_error(&pfence->base, pfence->error); + else + q->ops->suspend_wait(q); + + dma_fence_signal(&pfence->base); + dma_fence_end_signalling(cookie); + + xe_vm_queue_rebind_worker(q->vm); + + xe_exec_queue_put(q); +} + +static const char * +preempt_fence_get_driver_name(struct dma_fence *fence) +{ + return "xe"; +} + +static const char * +preempt_fence_get_timeline_name(struct dma_fence *fence) +{ + return "preempt"; +} + +static bool preempt_fence_enable_signaling(struct dma_fence *fence) +{ + struct xe_preempt_fence *pfence = + container_of(fence, typeof(*pfence), base); + struct xe_exec_queue *q = pfence->q; + + pfence->error = q->ops->suspend(q); + queue_work(system_unbound_wq, &pfence->preempt_work); + return true; +} + +static const struct dma_fence_ops preempt_fence_ops = { + .get_driver_name = preempt_fence_get_driver_name, + .get_timeline_name = preempt_fence_get_timeline_name, + .enable_signaling = preempt_fence_enable_signaling, +}; + +/** + * xe_preempt_fence_alloc() - Allocate a preempt fence with minimal + * initialization + * + * Allocate a preempt fence, and initialize its list head. + * If the preempt_fence allocated has been armed with + * xe_preempt_fence_arm(), it must be freed using dma_fence_put(). If not, + * it must be freed using xe_preempt_fence_free(). + * + * Return: A struct xe_preempt_fence pointer used for calling into + * xe_preempt_fence_arm() or xe_preempt_fence_free(). + * An error pointer on error. + */ +struct xe_preempt_fence *xe_preempt_fence_alloc(void) +{ + struct xe_preempt_fence *pfence; + + pfence = kmalloc(sizeof(*pfence), GFP_KERNEL); + if (!pfence) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&pfence->link); + INIT_WORK(&pfence->preempt_work, preempt_fence_work_func); + + return pfence; +} + +/** + * xe_preempt_fence_free() - Free a preempt fence allocated using + * xe_preempt_fence_alloc(). + * @pfence: pointer obtained from xe_preempt_fence_alloc(); + * + * Free a preempt fence that has not yet been armed. + */ +void xe_preempt_fence_free(struct xe_preempt_fence *pfence) +{ + list_del(&pfence->link); + kfree(pfence); +} + +/** + * xe_preempt_fence_arm() - Arm a preempt fence allocated using + * xe_preempt_fence_alloc(). + * @pfence: The struct xe_preempt_fence pointer returned from + * xe_preempt_fence_alloc(). + * @q: The struct xe_exec_queue used for arming. + * @context: The dma-fence context used for arming. + * @seqno: The dma-fence seqno used for arming. + * + * Inserts the preempt fence into @context's timeline, takes @link off any + * list, and registers the struct xe_exec_queue as the xe_engine to be preempted. + * + * Return: A pointer to a struct dma_fence embedded into the preempt fence. + * This function doesn't error. + */ +struct dma_fence * +xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q, + u64 context, u32 seqno) +{ + list_del_init(&pfence->link); + pfence->q = xe_exec_queue_get(q); + dma_fence_init(&pfence->base, &preempt_fence_ops, + &q->compute.lock, context, seqno); + + return &pfence->base; +} + +/** + * xe_preempt_fence_create() - Helper to create and arm a preempt fence. + * @q: The struct xe_exec_queue used for arming. + * @context: The dma-fence context used for arming. + * @seqno: The dma-fence seqno used for arming. + * + * Allocates and inserts the preempt fence into @context's timeline, + * and registers @e as the struct xe_exec_queue to be preempted. + * + * Return: A pointer to the resulting struct dma_fence on success. An error + * pointer on error. In particular if allocation fails it returns + * ERR_PTR(-ENOMEM); + */ +struct dma_fence * +xe_preempt_fence_create(struct xe_exec_queue *q, + u64 context, u32 seqno) +{ + struct xe_preempt_fence *pfence; + + pfence = xe_preempt_fence_alloc(); + if (IS_ERR(pfence)) + return ERR_CAST(pfence); + + return xe_preempt_fence_arm(pfence, q, context, seqno); +} + +bool xe_fence_is_xe_preempt(const struct dma_fence *fence) +{ + return fence->ops == &preempt_fence_ops; +} diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.h b/drivers/gpu/drm/xe/xe_preempt_fence.h new file mode 100644 index 000000000000..9406c6fea525 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_preempt_fence.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_PREEMPT_FENCE_H_ +#define _XE_PREEMPT_FENCE_H_ + +#include "xe_preempt_fence_types.h" + +struct list_head; + +struct dma_fence * +xe_preempt_fence_create(struct xe_exec_queue *q, + u64 context, u32 seqno); + +struct xe_preempt_fence *xe_preempt_fence_alloc(void); + +void xe_preempt_fence_free(struct xe_preempt_fence *pfence); + +struct dma_fence * +xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q, + u64 context, u32 seqno); + +static inline struct xe_preempt_fence * +to_preempt_fence(struct dma_fence *fence) +{ + return container_of(fence, struct xe_preempt_fence, base); +} + +/** + * xe_preempt_fence_link() - Return a link used to keep unarmed preempt + * fences on a list. + * @pfence: Pointer to the preempt fence. + * + * The link is embedded in the struct xe_preempt_fence. Use + * link_to_preempt_fence() to convert back to the preempt fence. + * + * Return: A pointer to an embedded struct list_head. + */ +static inline struct list_head * +xe_preempt_fence_link(struct xe_preempt_fence *pfence) +{ + return &pfence->link; +} + +/** + * to_preempt_fence_from_link() - Convert back to a preempt fence pointer + * from a link obtained with xe_preempt_fence_link(). + * @link: The struct list_head obtained from xe_preempt_fence_link(). + * + * Return: A pointer to the embedding struct xe_preempt_fence. + */ +static inline struct xe_preempt_fence * +to_preempt_fence_from_link(struct list_head *link) +{ + return container_of(link, struct xe_preempt_fence, link); +} + +bool xe_fence_is_xe_preempt(const struct dma_fence *fence); +#endif diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h new file mode 100644 index 000000000000..b54b5c29b533 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_PREEMPT_FENCE_TYPES_H_ +#define _XE_PREEMPT_FENCE_TYPES_H_ + +#include <linux/dma-fence.h> +#include <linux/workqueue.h> + +struct xe_exec_queue; + +/** + * struct xe_preempt_fence - XE preempt fence + * + * hardware and triggers a callback once the xe_engine is complete. + */ +struct xe_preempt_fence { + /** @base: dma fence base */ + struct dma_fence base; + /** @link: link into list of pending preempt fences */ + struct list_head link; + /** @q: exec queue for this preempt fence */ + struct xe_exec_queue *q; + /** @preempt_work: work struct which issues preemption */ + struct work_struct preempt_work; + /** @error: preempt fence is in error state */ + int error; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c new file mode 100644 index 000000000000..de1030a47588 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -0,0 +1,1653 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_pt.h" + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_drm_client.h" +#include "xe_gt.h" +#include "xe_gt_tlb_invalidation.h" +#include "xe_migrate.h" +#include "xe_pt_types.h" +#include "xe_pt_walk.h" +#include "xe_res_cursor.h" +#include "xe_trace.h" +#include "xe_ttm_stolen_mgr.h" +#include "xe_vm.h" + +struct xe_pt_dir { + struct xe_pt pt; + /** @dir: Directory structure for the xe_pt_walk functionality */ + struct xe_ptw_dir dir; +}; + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) +#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr)) +#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr) +#else +#define xe_pt_set_addr(__xe_pt, __addr) +#define xe_pt_addr(__xe_pt) 0ull +#endif + +static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48}; +static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48}; + +#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1) + +static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) +{ + return container_of(pt, struct xe_pt_dir, pt); +} + +static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) +{ + return container_of(pt_dir->dir.entries[index], struct xe_pt, base); +} + +static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, + unsigned int level) +{ + struct xe_device *xe = tile_to_xe(tile); + u16 pat_index = xe->pat.idx[XE_CACHE_WB]; + u8 id = tile->id; + + if (!xe_vm_has_scratch(vm)) + return 0; + + if (level > MAX_HUGEPTE_LEVEL) + return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, + 0, pat_index); + + return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | + XE_PTE_NULL; +} + +/** + * xe_pt_create() - Create a page-table. + * @vm: The vm to create for. + * @tile: The tile to create for. + * @level: The page-table level. + * + * Allocate and initialize a single struct xe_pt metadata structure. Also + * create the corresponding page-table bo, but don't initialize it. If the + * level is grater than zero, then it's assumed to be a directory page- + * table and the directory structure is also allocated and initialized to + * NULL pointers. + * + * Return: A valid struct xe_pt pointer on success, Pointer error code on + * error. + */ +struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, + unsigned int level) +{ + struct xe_pt *pt; + struct xe_bo *bo; + size_t size; + int err; + + size = !level ? sizeof(struct xe_pt) : sizeof(struct xe_pt_dir) + + XE_PDES * sizeof(struct xe_ptw *); + pt = kzalloc(size, GFP_KERNEL); + if (!pt) + return ERR_PTR(-ENOMEM); + + bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT | + XE_BO_CREATE_PINNED_BIT | + XE_BO_CREATE_NO_RESV_EVICT | + XE_BO_PAGETABLE); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + goto err_kfree; + } + pt->bo = bo; + pt->level = level; + pt->base.dir = level ? &as_xe_pt_dir(pt)->dir : NULL; + + if (vm->xef) + xe_drm_client_add_bo(vm->xef->client, pt->bo); + xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL); + + return pt; + +err_kfree: + kfree(pt); + return ERR_PTR(err); +} + +/** + * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero + * entries. + * @tile: The tile the scratch pagetable of which to use. + * @vm: The vm we populate for. + * @pt: The pagetable the bo of which to initialize. + * + * Populate the page-table bo of @pt with entries pointing into the tile's + * scratch page-table tree if any. Otherwise populate with zeros. + */ +void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, + struct xe_pt *pt) +{ + struct iosys_map *map = &pt->bo->vmap; + u64 empty; + int i; + + if (!xe_vm_has_scratch(vm)) { + /* + * FIXME: Some memory is allocated already allocated to zero? + * Find out which memory that is and avoid this memset... + */ + xe_map_memset(vm->xe, map, 0, 0, SZ_4K); + } else { + empty = __xe_pt_empty_pte(tile, vm, pt->level); + for (i = 0; i < XE_PDES; i++) + xe_pt_write(vm->xe, map, i, empty); + } +} + +/** + * xe_pt_shift() - Return the ilog2 value of the size of the address range of + * a page-table at a certain level. + * @level: The level. + * + * Return: The ilog2 value of the size of the address range of a page-table + * at level @level. + */ +unsigned int xe_pt_shift(unsigned int level) +{ + return XE_PTE_SHIFT + XE_PDE_SHIFT * level; +} + +/** + * xe_pt_destroy() - Destroy a page-table tree. + * @pt: The root of the page-table tree to destroy. + * @flags: vm flags. Currently unused. + * @deferred: List head of lockless list for deferred putting. NULL for + * immediate putting. + * + * Puts the page-table bo, recursively calls xe_pt_destroy on all children + * and finally frees @pt. TODO: Can we remove the @flags argument? + */ +void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) +{ + int i; + + if (!pt) + return; + + XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list)); + xe_bo_unpin(pt->bo); + xe_bo_put_deferred(pt->bo, deferred); + + if (pt->level > 0 && pt->num_live) { + struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); + + for (i = 0; i < XE_PDES; i++) { + if (xe_pt_entry(pt_dir, i)) + xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, + deferred); + } + } + kfree(pt); +} + +/** + * DOC: Pagetable building + * + * Below we use the term "page-table" for both page-directories, containing + * pointers to lower level page-directories or page-tables, and level 0 + * page-tables that contain only page-table-entries pointing to memory pages. + * + * When inserting an address range in an already existing page-table tree + * there will typically be a set of page-tables that are shared with other + * address ranges, and a set that are private to this address range. + * The set of shared page-tables can be at most two per level, + * and those can't be updated immediately because the entries of those + * page-tables may still be in use by the gpu for other mappings. Therefore + * when inserting entries into those, we instead stage those insertions by + * adding insertion data into struct xe_vm_pgtable_update structures. This + * data, (subtrees for the cpu and page-table-entries for the gpu) is then + * added in a separate commit step. CPU-data is committed while still under the + * vm lock, the object lock and for userptr, the notifier lock in read mode. + * The GPU async data is committed either by the GPU or CPU after fulfilling + * relevant dependencies. + * For non-shared page-tables (and, in fact, for shared ones that aren't + * existing at the time of staging), we add the data in-place without the + * special update structures. This private part of the page-table tree will + * remain disconnected from the vm page-table tree until data is committed to + * the shared page tables of the vm tree in the commit phase. + */ + +struct xe_pt_update { + /** @update: The update structure we're building for this parent. */ + struct xe_vm_pgtable_update *update; + /** @parent: The parent. Used to detect a parent change. */ + struct xe_pt *parent; + /** @preexisting: Whether the parent was pre-existing or allocated */ + bool preexisting; +}; + +struct xe_pt_stage_bind_walk { + /** base: The base class. */ + struct xe_pt_walk base; + + /* Input parameters for the walk */ + /** @vm: The vm we're building for. */ + struct xe_vm *vm; + /** @tile: The tile we're building for. */ + struct xe_tile *tile; + /** @default_pte: PTE flag only template. No address is associated */ + u64 default_pte; + /** @dma_offset: DMA offset to add to the PTE. */ + u64 dma_offset; + /** + * @needs_64k: This address range enforces 64K alignment and + * granularity. + */ + bool needs_64K; + /** + * @vma: VMA being mapped + */ + struct xe_vma *vma; + + /* Also input, but is updated during the walk*/ + /** @curs: The DMA address cursor. */ + struct xe_res_cursor *curs; + /** @va_curs_start: The Virtual address coresponding to @curs->start */ + u64 va_curs_start; + + /* Output */ + struct xe_walk_update { + /** @wupd.entries: Caller provided storage. */ + struct xe_vm_pgtable_update *entries; + /** @wupd.num_used_entries: Number of update @entries used. */ + unsigned int num_used_entries; + /** @wupd.updates: Tracks the update entry at a given level */ + struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1]; + } wupd; + + /* Walk state */ + /** + * @l0_end_addr: The end address of the current l0 leaf. Used for + * 64K granularity detection. + */ + u64 l0_end_addr; + /** @addr_64K: The start address of the current 64K chunk. */ + u64 addr_64K; + /** @found_64: Whether @add_64K actually points to a 64K chunk. */ + bool found_64K; +}; + +static int +xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, + pgoff_t offset, bool alloc_entries) +{ + struct xe_pt_update *upd = &wupd->updates[parent->level]; + struct xe_vm_pgtable_update *entry; + + /* + * For *each level*, we could only have one active + * struct xt_pt_update at any one time. Once we move on to a + * new parent and page-directory, the old one is complete, and + * updates are either already stored in the build tree or in + * @wupd->entries + */ + if (likely(upd->parent == parent)) + return 0; + + upd->parent = parent; + upd->preexisting = true; + + if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1) + return -EINVAL; + + entry = wupd->entries + wupd->num_used_entries++; + upd->update = entry; + entry->ofs = offset; + entry->pt_bo = parent->bo; + entry->pt = parent; + entry->flags = 0; + entry->qwords = 0; + + if (alloc_entries) { + entry->pt_entries = kmalloc_array(XE_PDES, + sizeof(*entry->pt_entries), + GFP_KERNEL); + if (!entry->pt_entries) + return -ENOMEM; + } + + return 0; +} + +/* + * NOTE: This is a very frequently called function so we allow ourselves + * to annotate (using branch prediction hints) the fastpath of updating a + * non-pre-existing pagetable with leaf ptes. + */ +static int +xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, + pgoff_t offset, struct xe_pt *xe_child, u64 pte) +{ + struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level]; + struct xe_pt_update *child_upd = xe_child ? + &xe_walk->wupd.updates[xe_child->level] : NULL; + int ret; + + ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true); + if (unlikely(ret)) + return ret; + + /* + * Register this new pagetable so that it won't be recognized as + * a shared pagetable by a subsequent insertion. + */ + if (unlikely(child_upd)) { + child_upd->update = NULL; + child_upd->parent = xe_child; + child_upd->preexisting = false; + } + + if (likely(!upd->preexisting)) { + /* Continue building a non-connected subtree. */ + struct iosys_map *map = &parent->bo->vmap; + + if (unlikely(xe_child)) + parent->base.dir->entries[offset] = &xe_child->base; + + xe_pt_write(xe_walk->vm->xe, map, offset, pte); + parent->num_live++; + } else { + /* Shared pt. Stage update. */ + unsigned int idx; + struct xe_vm_pgtable_update *entry = upd->update; + + idx = offset - entry->ofs; + entry->pt_entries[idx].pt = xe_child; + entry->pt_entries[idx].pte = pte; + entry->qwords++; + } + + return 0; +} + +static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, + struct xe_pt_stage_bind_walk *xe_walk) +{ + u64 size, dma; + + if (level > MAX_HUGEPTE_LEVEL) + return false; + + /* Does the virtual range requested cover a huge pte? */ + if (!xe_pt_covers(addr, next, level, &xe_walk->base)) + return false; + + /* Does the DMA segment cover the whole pte? */ + if (next - xe_walk->va_curs_start > xe_walk->curs->size) + return false; + + /* null VMA's do not have dma addresses */ + if (xe_vma_is_null(xe_walk->vma)) + return true; + + /* Is the DMA address huge PTE size aligned? */ + size = next - addr; + dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); + + return IS_ALIGNED(dma, size); +} + +/* + * Scan the requested mapping to check whether it can be done entirely + * with 64K PTEs. + */ +static bool +xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) +{ + struct xe_res_cursor curs = *xe_walk->curs; + + if (!IS_ALIGNED(addr, SZ_64K)) + return false; + + if (next > xe_walk->l0_end_addr) + return false; + + /* null VMA's do not have dma addresses */ + if (xe_vma_is_null(xe_walk->vma)) + return true; + + xe_res_next(&curs, addr - xe_walk->va_curs_start); + for (; addr < next; addr += SZ_64K) { + if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K) + return false; + + xe_res_next(&curs, SZ_64K); + } + + return addr == next; +} + +/* + * For non-compact "normal" 4K level-0 pagetables, we want to try to group + * addresses together in 64K-contigous regions to add a 64K TLB hint for the + * device to the PTE. + * This function determines whether the address is part of such a + * segment. For VRAM in normal pagetables, this is strictly necessary on + * some devices. + */ +static bool +xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) +{ + /* Address is within an already found 64k region */ + if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K) + return true; + + xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk); + xe_walk->addr_64K = addr; + + return xe_walk->found_64K; +} + +static int +xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, + unsigned int level, u64 addr, u64 next, + struct xe_ptw **child, + enum page_walk_action *action, + struct xe_pt_walk *walk) +{ + struct xe_pt_stage_bind_walk *xe_walk = + container_of(walk, typeof(*xe_walk), base); + u16 pat_index = xe_walk->vma->pat_index; + struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); + struct xe_vm *vm = xe_walk->vm; + struct xe_pt *xe_child; + bool covers; + int ret = 0; + u64 pte; + + /* Is this a leaf entry ?*/ + if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { + struct xe_res_cursor *curs = xe_walk->curs; + bool is_null = xe_vma_is_null(xe_walk->vma); + + XE_WARN_ON(xe_walk->va_curs_start != addr); + + pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : + xe_res_dma(curs) + xe_walk->dma_offset, + xe_walk->vma, pat_index, level); + pte |= xe_walk->default_pte; + + /* + * Set the XE_PTE_PS64 hint if possible, otherwise if + * this device *requires* 64K PTE size for VRAM, fail. + */ + if (level == 0 && !xe_parent->is_compact) { + if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) + pte |= XE_PTE_PS64; + else if (XE_WARN_ON(xe_walk->needs_64K)) + return -EINVAL; + } + + ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); + if (unlikely(ret)) + return ret; + + if (!is_null) + xe_res_next(curs, next - addr); + xe_walk->va_curs_start = next; + xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); + *action = ACTION_CONTINUE; + + return ret; + } + + /* + * Descending to lower level. Determine if we need to allocate a + * new page table or -directory, which we do if there is no + * previous one or there is one we can completely replace. + */ + if (level == 1) { + walk->shifts = xe_normal_pt_shifts; + xe_walk->l0_end_addr = next; + } + + covers = xe_pt_covers(addr, next, level, &xe_walk->base); + if (covers || !*child) { + u64 flags = 0; + + xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); + if (IS_ERR(xe_child)) + return PTR_ERR(xe_child); + + xe_pt_set_addr(xe_child, + round_down(addr, 1ull << walk->shifts[level])); + + if (!covers) + xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child); + + *child = &xe_child->base; + + /* + * Prefer the compact pagetable layout for L0 if possible. + * TODO: Suballocate the pt bo to avoid wasting a lot of + * memory. + */ + if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 && + covers && xe_pt_scan_64K(addr, next, xe_walk)) { + walk->shifts = xe_compact_pt_shifts; + flags |= XE_PDE_64K; + xe_child->is_compact = true; + } + + pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags; + ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, + pte); + } + + *action = ACTION_SUBTREE; + return ret; +} + +static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { + .pt_entry = xe_pt_stage_bind_entry, +}; + +/** + * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address + * range. + * @tile: The tile we're building for. + * @vma: The vma indicating the address range. + * @entries: Storage for the update entries used for connecting the tree to + * the main tree at commit time. + * @num_entries: On output contains the number of @entries used. + * + * This function builds a disconnected page-table tree for a given address + * range. The tree is connected to the main vm tree for the gpu using + * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind(). + * The function builds xe_vm_pgtable_update structures for already existing + * shared page-tables, and non-existing shared and non-shared page-tables + * are built and populated directly. + * + * Return 0 on success, negative error code on error. + */ +static int +xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, u32 *num_entries) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_bo *bo = xe_vma_bo(vma); + bool is_devmem = !xe_vma_is_userptr(vma) && bo && + (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)); + struct xe_res_cursor curs; + struct xe_pt_stage_bind_walk xe_walk = { + .base = { + .ops = &xe_pt_stage_bind_ops, + .shifts = xe_normal_pt_shifts, + .max_level = XE_PT_HIGHEST_LEVEL, + }, + .vm = xe_vma_vm(vma), + .tile = tile, + .curs = &curs, + .va_curs_start = xe_vma_start(vma), + .vma = vma, + .wupd.entries = entries, + .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem, + }; + struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; + int ret; + + if (vma && (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) && + (is_devmem || !IS_DGFX(xe))) + xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; + + if (is_devmem) { + xe_walk.default_pte |= XE_PPGTT_PTE_DM; + xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); + } + + if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) + xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); + + xe_bo_assert_held(bo); + + if (!xe_vma_is_null(vma)) { + if (xe_vma_is_userptr(vma)) + xe_res_first_sg(vma->userptr.sg, 0, xe_vma_size(vma), + &curs); + else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) + xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), + xe_vma_size(vma), &curs); + else + xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma), + xe_vma_size(vma), &curs); + } else { + curs.size = xe_vma_size(vma); + } + + ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma), + xe_vma_end(vma), &xe_walk.base); + + *num_entries = xe_walk.wupd.num_used_entries; + return ret; +} + +/** + * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a + * shared pagetable. + * @addr: The start address within the non-shared pagetable. + * @end: The end address within the non-shared pagetable. + * @level: The level of the non-shared pagetable. + * @walk: Walk info. The function adjusts the walk action. + * @action: next action to perform (see enum page_walk_action) + * @offset: Ignored on input, First non-shared entry on output. + * @end_offset: Ignored on input, Last non-shared entry + 1 on output. + * + * A non-shared page-table has some entries that belong to the address range + * and others that don't. This function determines the entries that belong + * fully to the address range. Depending on level, some entries may + * partially belong to the address range (that can't happen at level 0). + * The function detects that and adjust those offsets to not include those + * partial entries. Iff it does detect partial entries, we know that there must + * be shared page tables also at lower levels, so it adjusts the walk action + * accordingly. + * + * Return: true if there were non-shared entries, false otherwise. + */ +static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level, + struct xe_pt_walk *walk, + enum page_walk_action *action, + pgoff_t *offset, pgoff_t *end_offset) +{ + u64 size = 1ull << walk->shifts[level]; + + *offset = xe_pt_offset(addr, level, walk); + *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset; + + if (!level) + return true; + + /* + * If addr or next are not size aligned, there are shared pts at lower + * level, so in that case traverse down the subtree + */ + *action = ACTION_CONTINUE; + if (!IS_ALIGNED(addr, size)) { + *action = ACTION_SUBTREE; + (*offset)++; + } + + if (!IS_ALIGNED(end, size)) { + *action = ACTION_SUBTREE; + (*end_offset)--; + } + + return *end_offset > *offset; +} + +struct xe_pt_zap_ptes_walk { + /** @base: The walk base-class */ + struct xe_pt_walk base; + + /* Input parameters for the walk */ + /** @tile: The tile we're building for */ + struct xe_tile *tile; + + /* Output */ + /** @needs_invalidate: Whether we need to invalidate TLB*/ + bool needs_invalidate; +}; + +static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, + unsigned int level, u64 addr, u64 next, + struct xe_ptw **child, + enum page_walk_action *action, + struct xe_pt_walk *walk) +{ + struct xe_pt_zap_ptes_walk *xe_walk = + container_of(walk, typeof(*xe_walk), base); + struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); + pgoff_t end_offset; + + XE_WARN_ON(!*child); + XE_WARN_ON(!level && xe_child->is_compact); + + /* + * Note that we're called from an entry callback, and we're dealing + * with the child of that entry rather than the parent, so need to + * adjust level down. + */ + if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset, + &end_offset)) { + xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap, + offset * sizeof(u64), 0, + (end_offset - offset) * sizeof(u64)); + xe_walk->needs_invalidate = true; + } + + return 0; +} + +static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { + .pt_entry = xe_pt_zap_ptes_entry, +}; + +/** + * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range + * @tile: The tile we're zapping for. + * @vma: GPU VMA detailing address range. + * + * Eviction and Userptr invalidation needs to be able to zap the + * gpu ptes of a given address range in pagefaulting mode. + * In order to be able to do that, that function needs access to the shared + * page-table entrieaso it can either clear the leaf PTEs or + * clear the pointers to lower-level page-tables. The caller is required + * to hold the necessary locks to ensure neither the page-table connectivity + * nor the page-table entries of the range is updated from under us. + * + * Return: Whether ptes were actually updated and a TLB invalidation is + * required. + */ +bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) +{ + struct xe_pt_zap_ptes_walk xe_walk = { + .base = { + .ops = &xe_pt_zap_ptes_ops, + .shifts = xe_normal_pt_shifts, + .max_level = XE_PT_HIGHEST_LEVEL, + }, + .tile = tile, + }; + struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; + + if (!(vma->tile_present & BIT(tile->id))) + return false; + + (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), + xe_vma_end(vma), &xe_walk.base); + + return xe_walk.needs_invalidate; +} + +static void +xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile, + struct iosys_map *map, void *data, + u32 qword_ofs, u32 num_qwords, + const struct xe_vm_pgtable_update *update) +{ + struct xe_pt_entry *ptes = update->pt_entries; + u64 *ptr = data; + u32 i; + + for (i = 0; i < num_qwords; i++) { + if (map) + xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * + sizeof(u64), u64, ptes[i].pte); + else + ptr[i] = ptes[i].pte; + } +} + +static void xe_pt_abort_bind(struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, + u32 num_entries) +{ + u32 i, j; + + for (i = 0; i < num_entries; i++) { + if (!entries[i].pt_entries) + continue; + + for (j = 0; j < entries[i].qwords; j++) + xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL); + kfree(entries[i].pt_entries); + } +} + +static void xe_pt_commit_locks_assert(struct xe_vma *vma) +{ + struct xe_vm *vm = xe_vma_vm(vma); + + lockdep_assert_held(&vm->lock); + + if (xe_vma_is_userptr(vma)) + lockdep_assert_held_read(&vm->userptr.notifier_lock); + else if (!xe_vma_is_null(vma)) + dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); + + xe_vm_assert_held(vm); +} + +static void xe_pt_commit_bind(struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, + u32 num_entries, bool rebind, + struct llist_head *deferred) +{ + u32 i, j; + + xe_pt_commit_locks_assert(vma); + + for (i = 0; i < num_entries; i++) { + struct xe_pt *pt = entries[i].pt; + struct xe_pt_dir *pt_dir; + + if (!rebind) + pt->num_live += entries[i].qwords; + + if (!pt->level) { + kfree(entries[i].pt_entries); + continue; + } + + pt_dir = as_xe_pt_dir(pt); + for (j = 0; j < entries[i].qwords; j++) { + u32 j_ = j + entries[i].ofs; + struct xe_pt *newpte = entries[i].pt_entries[j].pt; + + if (xe_pt_entry(pt_dir, j_)) + xe_pt_destroy(xe_pt_entry(pt_dir, j_), + xe_vma_vm(vma)->flags, deferred); + + pt_dir->dir.entries[j_] = &newpte->base; + } + kfree(entries[i].pt_entries); + } +} + +static int +xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, u32 *num_entries, + bool rebind) +{ + int err; + + *num_entries = 0; + err = xe_pt_stage_bind(tile, vma, entries, num_entries); + if (!err) + xe_tile_assert(tile, *num_entries); + else /* abort! */ + xe_pt_abort_bind(vma, entries, *num_entries); + + return err; +} + +static void xe_vm_dbg_print_entries(struct xe_device *xe, + const struct xe_vm_pgtable_update *entries, + unsigned int num_entries) +#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) +{ + unsigned int i; + + vm_dbg(&xe->drm, "%u entries to update\n", num_entries); + for (i = 0; i < num_entries; i++) { + const struct xe_vm_pgtable_update *entry = &entries[i]; + struct xe_pt *xe_pt = entry->pt; + u64 page_size = 1ull << xe_pt_shift(xe_pt->level); + u64 end; + u64 start; + + xe_assert(xe, !entry->pt->is_compact); + start = entry->ofs * page_size; + end = start + page_size * entry->qwords; + vm_dbg(&xe->drm, + "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n", + i, xe_pt->level, entry->ofs, entry->qwords, + xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0); + } +} +#else +{} +#endif + +#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT + +static int xe_pt_userptr_inject_eagain(struct xe_vma *vma) +{ + u32 divisor = vma->userptr.divisor ? vma->userptr.divisor : 2; + static u32 count; + + if (count++ % divisor == divisor - 1) { + struct xe_vm *vm = xe_vma_vm(vma); + + vma->userptr.divisor = divisor << 1; + spin_lock(&vm->userptr.invalidated_lock); + list_move_tail(&vma->userptr.invalidate_link, + &vm->userptr.invalidated); + spin_unlock(&vm->userptr.invalidated_lock); + return true; + } + + return false; +} + +#else + +static bool xe_pt_userptr_inject_eagain(struct xe_vma *vma) +{ + return false; +} + +#endif + +/** + * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks + * @base: Base we derive from. + * @bind: Whether this is a bind or an unbind operation. A bind operation + * makes the pre-commit callback error with -EAGAIN if it detects a + * pending invalidation. + * @locked: Whether the pre-commit callback locked the userptr notifier lock + * and it needs unlocking. + */ +struct xe_pt_migrate_pt_update { + struct xe_migrate_pt_update base; + bool bind; + bool locked; +}; + +/* + * This function adds the needed dependencies to a page-table update job + * to make sure racing jobs for separate bind engines don't race writing + * to the same page-table range, wreaking havoc. Initially use a single + * fence for the entire VM. An optimization would use smaller granularity. + */ +static int xe_pt_vm_dependencies(struct xe_sched_job *job, + struct xe_range_fence_tree *rftree, + u64 start, u64 last) +{ + struct xe_range_fence *rtfence; + struct dma_fence *fence; + int err; + + rtfence = xe_range_fence_tree_first(rftree, start, last); + while (rtfence) { + fence = rtfence->fence; + + if (!dma_fence_is_signaled(fence)) { + /* + * Is this a CPU update? GPU is busy updating, so return + * an error + */ + if (!job) + return -ETIME; + + dma_fence_get(fence); + err = drm_sched_job_add_dependency(&job->drm, fence); + if (err) + return err; + } + + rtfence = xe_range_fence_tree_next(rtfence, start, last); + } + + return 0; +} + +static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) +{ + struct xe_range_fence_tree *rftree = + &xe_vma_vm(pt_update->vma)->rftree[pt_update->tile_id]; + + return xe_pt_vm_dependencies(pt_update->job, rftree, + pt_update->start, pt_update->last); +} + +static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) +{ + struct xe_pt_migrate_pt_update *userptr_update = + container_of(pt_update, typeof(*userptr_update), base); + struct xe_vma *vma = pt_update->vma; + unsigned long notifier_seq = vma->userptr.notifier_seq; + struct xe_vm *vm = xe_vma_vm(vma); + int err = xe_pt_vm_dependencies(pt_update->job, + &vm->rftree[pt_update->tile_id], + pt_update->start, + pt_update->last); + + if (err) + return err; + + userptr_update->locked = false; + + /* + * Wait until nobody is running the invalidation notifier, and + * since we're exiting the loop holding the notifier lock, + * nobody can proceed invalidating either. + * + * Note that we don't update the vma->userptr.notifier_seq since + * we don't update the userptr pages. + */ + do { + down_read(&vm->userptr.notifier_lock); + if (!mmu_interval_read_retry(&vma->userptr.notifier, + notifier_seq)) + break; + + up_read(&vm->userptr.notifier_lock); + + if (userptr_update->bind) + return -EAGAIN; + + notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier); + } while (true); + + /* Inject errors to test_whether they are handled correctly */ + if (userptr_update->bind && xe_pt_userptr_inject_eagain(vma)) { + up_read(&vm->userptr.notifier_lock); + return -EAGAIN; + } + + userptr_update->locked = true; + + return 0; +} + +static const struct xe_migrate_pt_update_ops bind_ops = { + .populate = xe_vm_populate_pgtable, + .pre_commit = xe_pt_pre_commit, +}; + +static const struct xe_migrate_pt_update_ops userptr_bind_ops = { + .populate = xe_vm_populate_pgtable, + .pre_commit = xe_pt_userptr_pre_commit, +}; + +struct invalidation_fence { + struct xe_gt_tlb_invalidation_fence base; + struct xe_gt *gt; + struct xe_vma *vma; + struct dma_fence *fence; + struct dma_fence_cb cb; + struct work_struct work; +}; + +static const char * +invalidation_fence_get_driver_name(struct dma_fence *dma_fence) +{ + return "xe"; +} + +static const char * +invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) +{ + return "invalidation_fence"; +} + +static const struct dma_fence_ops invalidation_fence_ops = { + .get_driver_name = invalidation_fence_get_driver_name, + .get_timeline_name = invalidation_fence_get_timeline_name, +}; + +static void invalidation_fence_cb(struct dma_fence *fence, + struct dma_fence_cb *cb) +{ + struct invalidation_fence *ifence = + container_of(cb, struct invalidation_fence, cb); + + trace_xe_gt_tlb_invalidation_fence_cb(&ifence->base); + if (!ifence->fence->error) { + queue_work(system_wq, &ifence->work); + } else { + ifence->base.base.error = ifence->fence->error; + dma_fence_signal(&ifence->base.base); + dma_fence_put(&ifence->base.base); + } + dma_fence_put(ifence->fence); +} + +static void invalidation_fence_work_func(struct work_struct *w) +{ + struct invalidation_fence *ifence = + container_of(w, struct invalidation_fence, work); + + trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base); + xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma); +} + +static int invalidation_fence_init(struct xe_gt *gt, + struct invalidation_fence *ifence, + struct dma_fence *fence, + struct xe_vma *vma) +{ + int ret; + + trace_xe_gt_tlb_invalidation_fence_create(&ifence->base); + + spin_lock_irq(>->tlb_invalidation.lock); + dma_fence_init(&ifence->base.base, &invalidation_fence_ops, + >->tlb_invalidation.lock, + gt->tlb_invalidation.fence_context, + ++gt->tlb_invalidation.fence_seqno); + spin_unlock_irq(>->tlb_invalidation.lock); + + INIT_LIST_HEAD(&ifence->base.link); + + dma_fence_get(&ifence->base.base); /* Ref for caller */ + ifence->fence = fence; + ifence->gt = gt; + ifence->vma = vma; + + INIT_WORK(&ifence->work, invalidation_fence_work_func); + ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); + if (ret == -ENOENT) { + dma_fence_put(ifence->fence); /* Usually dropped in CB */ + invalidation_fence_work_func(&ifence->work); + } else if (ret) { + dma_fence_put(&ifence->base.base); /* Caller ref */ + dma_fence_put(&ifence->base.base); /* Creation ref */ + } + + xe_gt_assert(gt, !ret || ret == -ENOENT); + + return ret && ret != -ENOENT ? ret : 0; +} + +static void xe_pt_calc_rfence_interval(struct xe_vma *vma, + struct xe_pt_migrate_pt_update *update, + struct xe_vm_pgtable_update *entries, + u32 num_entries) +{ + int i, level = 0; + + for (i = 0; i < num_entries; i++) { + const struct xe_vm_pgtable_update *entry = &entries[i]; + + if (entry->pt->level > level) + level = entry->pt->level; + } + + /* Greedy (non-optimal) calculation but simple */ + update->base.start = ALIGN_DOWN(xe_vma_start(vma), + 0x1ull << xe_pt_shift(level)); + update->base.last = ALIGN(xe_vma_end(vma), + 0x1ull << xe_pt_shift(level)) - 1; +} + +/** + * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma + * address range. + * @tile: The tile to bind for. + * @vma: The vma to bind. + * @q: The exec_queue with which to do pipelined page-table updates. + * @syncs: Entries to sync on before binding the built tree to the live vm tree. + * @num_syncs: Number of @sync entries. + * @rebind: Whether we're rebinding this vma to the same address range without + * an unbind in-between. + * + * This function builds a page-table tree (see xe_pt_stage_bind() for more + * information on page-table building), and the xe_vm_pgtable_update entries + * abstracting the operations needed to attach it to the main vm tree. It + * then takes the relevant locks and updates the metadata side of the main + * vm tree and submits the operations for pipelined attachment of the + * gpu page-table to the vm main tree, (which can be done either by the + * cpu and the GPU). + * + * Return: A valid dma-fence representing the pipelined attachment operation + * on success, an error pointer on error. + */ +struct dma_fence * +__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, + struct xe_sync_entry *syncs, u32 num_syncs, + bool rebind) +{ + struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; + struct xe_pt_migrate_pt_update bind_pt_update = { + .base = { + .ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops, + .vma = vma, + .tile_id = tile->id, + }, + .bind = true, + }; + struct xe_vm *vm = xe_vma_vm(vma); + u32 num_entries; + struct dma_fence *fence; + struct invalidation_fence *ifence = NULL; + struct xe_range_fence *rfence; + int err; + + bind_pt_update.locked = false; + xe_bo_assert_held(xe_vma_bo(vma)); + xe_vm_assert_held(vm); + + vm_dbg(&xe_vma_vm(vma)->xe->drm, + "Preparing bind, with range [%llx...%llx) engine %p.\n", + xe_vma_start(vma), xe_vma_end(vma), q); + + err = xe_pt_prepare_bind(tile, vma, entries, &num_entries, rebind); + if (err) + goto err; + xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); + + xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); + xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries, + num_entries); + + /* + * If rebind, we have to invalidate TLB on !LR vms to invalidate + * cached PTEs point to freed memory. on LR vms this is done + * automatically when the context is re-enabled by the rebind worker, + * or in fault mode it was invalidated on PTE zapping. + * + * If !rebind, and scratch enabled VMs, there is a chance the scratch + * PTE is already cached in the TLB so it needs to be invalidated. + * on !LR VMs this is done in the ring ops preceding a batch, but on + * non-faulting LR, in particular on user-space batch buffer chaining, + * it needs to be done here. + */ + if ((rebind && !xe_vm_in_lr_mode(vm) && !vm->batch_invalidate_tlb) || + (!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) { + ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); + if (!ifence) + return ERR_PTR(-ENOMEM); + } + + rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); + if (!rfence) { + kfree(ifence); + return ERR_PTR(-ENOMEM); + } + + fence = xe_migrate_update_pgtables(tile->migrate, + vm, xe_vma_bo(vma), q, + entries, num_entries, + syncs, num_syncs, + &bind_pt_update.base); + if (!IS_ERR(fence)) { + bool last_munmap_rebind = vma->gpuva.flags & XE_VMA_LAST_REBIND; + LLIST_HEAD(deferred); + int err; + + err = xe_range_fence_insert(&vm->rftree[tile->id], rfence, + &xe_range_fence_kfree_ops, + bind_pt_update.base.start, + bind_pt_update.base.last, fence); + if (err) + dma_fence_wait(fence, false); + + /* TLB invalidation must be done before signaling rebind */ + if (ifence) { + int err = invalidation_fence_init(tile->primary_gt, ifence, fence, + vma); + if (err) { + dma_fence_put(fence); + kfree(ifence); + return ERR_PTR(err); + } + fence = &ifence->base.base; + } + + /* add shared fence now for pagetable delayed destroy */ + dma_resv_add_fence(xe_vm_resv(vm), fence, !rebind && + last_munmap_rebind ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, + DMA_RESV_USAGE_BOOKKEEP); + xe_pt_commit_bind(vma, entries, num_entries, rebind, + bind_pt_update.locked ? &deferred : NULL); + + /* This vma is live (again?) now */ + vma->tile_present |= BIT(tile->id); + + if (bind_pt_update.locked) { + vma->userptr.initial_bind = true; + up_read(&vm->userptr.notifier_lock); + xe_bo_put_commit(&deferred); + } + if (!rebind && last_munmap_rebind && + xe_vm_in_preempt_fence_mode(vm)) + xe_vm_queue_rebind_worker(vm); + } else { + kfree(rfence); + kfree(ifence); + if (bind_pt_update.locked) + up_read(&vm->userptr.notifier_lock); + xe_pt_abort_bind(vma, entries, num_entries); + } + + return fence; + +err: + return ERR_PTR(err); +} + +struct xe_pt_stage_unbind_walk { + /** @base: The pagewalk base-class. */ + struct xe_pt_walk base; + + /* Input parameters for the walk */ + /** @tile: The tile we're unbinding from. */ + struct xe_tile *tile; + + /** + * @modified_start: Walk range start, modified to include any + * shared pagetables that we're the only user of and can thus + * treat as private. + */ + u64 modified_start; + /** @modified_end: Walk range start, modified like @modified_start. */ + u64 modified_end; + + /* Output */ + /* @wupd: Structure to track the page-table updates we're building */ + struct xe_walk_update wupd; +}; + +/* + * Check whether this range is the only one populating this pagetable, + * and in that case, update the walk range checks so that higher levels don't + * view us as a shared pagetable. + */ +static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, + const struct xe_pt *child, + enum page_walk_action *action, + struct xe_pt_walk *walk) +{ + struct xe_pt_stage_unbind_walk *xe_walk = + container_of(walk, typeof(*xe_walk), base); + unsigned int shift = walk->shifts[level]; + u64 size = 1ull << shift; + + if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) && + ((next - addr) >> shift) == child->num_live) { + u64 size = 1ull << walk->shifts[level + 1]; + + *action = ACTION_CONTINUE; + + if (xe_walk->modified_start >= addr) + xe_walk->modified_start = round_down(addr, size); + if (xe_walk->modified_end <= next) + xe_walk->modified_end = round_up(next, size); + + return true; + } + + return false; +} + +static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, + unsigned int level, u64 addr, u64 next, + struct xe_ptw **child, + enum page_walk_action *action, + struct xe_pt_walk *walk) +{ + struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); + + XE_WARN_ON(!*child); + XE_WARN_ON(!level && xe_child->is_compact); + + xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); + + return 0; +} + +static int +xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, + unsigned int level, u64 addr, u64 next, + struct xe_ptw **child, + enum page_walk_action *action, + struct xe_pt_walk *walk) +{ + struct xe_pt_stage_unbind_walk *xe_walk = + container_of(walk, typeof(*xe_walk), base); + struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); + pgoff_t end_offset; + u64 size = 1ull << walk->shifts[--level]; + + if (!IS_ALIGNED(addr, size)) + addr = xe_walk->modified_start; + if (!IS_ALIGNED(next, size)) + next = xe_walk->modified_end; + + /* Parent == *child is the root pt. Don't kill it. */ + if (parent != *child && + xe_pt_check_kill(addr, next, level, xe_child, action, walk)) + return 0; + + if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset, + &end_offset)) + return 0; + + (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false); + xe_walk->wupd.updates[level].update->qwords = end_offset - offset; + + return 0; +} + +static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { + .pt_entry = xe_pt_stage_unbind_entry, + .pt_post_descend = xe_pt_stage_unbind_post_descend, +}; + +/** + * xe_pt_stage_unbind() - Build page-table update structures for an unbind + * operation + * @tile: The tile we're unbinding for. + * @vma: The vma we're unbinding. + * @entries: Caller-provided storage for the update structures. + * + * Builds page-table update structures for an unbind operation. The function + * will attempt to remove all page-tables that we're the only user + * of, and for that to work, the unbind operation must be committed in the + * same critical section that blocks racing binds to the same page-table tree. + * + * Return: The number of entries used. + */ +static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, + struct xe_vm_pgtable_update *entries) +{ + struct xe_pt_stage_unbind_walk xe_walk = { + .base = { + .ops = &xe_pt_stage_unbind_ops, + .shifts = xe_normal_pt_shifts, + .max_level = XE_PT_HIGHEST_LEVEL, + }, + .tile = tile, + .modified_start = xe_vma_start(vma), + .modified_end = xe_vma_end(vma), + .wupd.entries = entries, + }; + struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; + + (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), + xe_vma_end(vma), &xe_walk.base); + + return xe_walk.wupd.num_used_entries; +} + +static void +xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, + struct xe_tile *tile, struct iosys_map *map, + void *ptr, u32 qword_ofs, u32 num_qwords, + const struct xe_vm_pgtable_update *update) +{ + struct xe_vma *vma = pt_update->vma; + u64 empty = __xe_pt_empty_pte(tile, xe_vma_vm(vma), update->pt->level); + int i; + + if (map && map->is_iomem) + for (i = 0; i < num_qwords; ++i) + xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * + sizeof(u64), u64, empty); + else if (map) + memset64(map->vaddr + qword_ofs * sizeof(u64), empty, + num_qwords); + else + memset64(ptr, empty, num_qwords); +} + +static void +xe_pt_commit_unbind(struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, u32 num_entries, + struct llist_head *deferred) +{ + u32 j; + + xe_pt_commit_locks_assert(vma); + + for (j = 0; j < num_entries; ++j) { + struct xe_vm_pgtable_update *entry = &entries[j]; + struct xe_pt *pt = entry->pt; + + pt->num_live -= entry->qwords; + if (pt->level) { + struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); + u32 i; + + for (i = entry->ofs; i < entry->ofs + entry->qwords; + i++) { + if (xe_pt_entry(pt_dir, i)) + xe_pt_destroy(xe_pt_entry(pt_dir, i), + xe_vma_vm(vma)->flags, deferred); + + pt_dir->dir.entries[i] = NULL; + } + } + } +} + +static const struct xe_migrate_pt_update_ops unbind_ops = { + .populate = xe_migrate_clear_pgtable_callback, + .pre_commit = xe_pt_pre_commit, +}; + +static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { + .populate = xe_migrate_clear_pgtable_callback, + .pre_commit = xe_pt_userptr_pre_commit, +}; + +/** + * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma + * address range. + * @tile: The tile to unbind for. + * @vma: The vma to unbind. + * @q: The exec_queue with which to do pipelined page-table updates. + * @syncs: Entries to sync on before disconnecting the tree to be destroyed. + * @num_syncs: Number of @sync entries. + * + * This function builds a the xe_vm_pgtable_update entries abstracting the + * operations needed to detach the page-table tree to be destroyed from the + * man vm tree. + * It then takes the relevant locks and submits the operations for + * pipelined detachment of the gpu page-table from the vm main tree, + * (which can be done either by the cpu and the GPU), Finally it frees the + * detached page-table tree. + * + * Return: A valid dma-fence representing the pipelined detachment operation + * on success, an error pointer on error. + */ +struct dma_fence * +__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, + struct xe_sync_entry *syncs, u32 num_syncs) +{ + struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; + struct xe_pt_migrate_pt_update unbind_pt_update = { + .base = { + .ops = xe_vma_is_userptr(vma) ? &userptr_unbind_ops : + &unbind_ops, + .vma = vma, + .tile_id = tile->id, + }, + }; + struct xe_vm *vm = xe_vma_vm(vma); + u32 num_entries; + struct dma_fence *fence = NULL; + struct invalidation_fence *ifence; + struct xe_range_fence *rfence; + + LLIST_HEAD(deferred); + + xe_bo_assert_held(xe_vma_bo(vma)); + xe_vm_assert_held(vm); + + vm_dbg(&xe_vma_vm(vma)->xe->drm, + "Preparing unbind, with range [%llx...%llx) engine %p.\n", + xe_vma_start(vma), xe_vma_end(vma), q); + + num_entries = xe_pt_stage_unbind(tile, vma, entries); + xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); + + xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); + xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries, + num_entries); + + ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); + if (!ifence) + return ERR_PTR(-ENOMEM); + + rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); + if (!rfence) { + kfree(ifence); + return ERR_PTR(-ENOMEM); + } + + /* + * Even if we were already evicted and unbind to destroy, we need to + * clear again here. The eviction may have updated pagetables at a + * lower level, because it needs to be more conservative. + */ + fence = xe_migrate_update_pgtables(tile->migrate, + vm, NULL, q ? q : + vm->q[tile->id], + entries, num_entries, + syncs, num_syncs, + &unbind_pt_update.base); + if (!IS_ERR(fence)) { + int err; + + err = xe_range_fence_insert(&vm->rftree[tile->id], rfence, + &xe_range_fence_kfree_ops, + unbind_pt_update.base.start, + unbind_pt_update.base.last, fence); + if (err) + dma_fence_wait(fence, false); + + /* TLB invalidation must be done before signaling unbind */ + err = invalidation_fence_init(tile->primary_gt, ifence, fence, vma); + if (err) { + dma_fence_put(fence); + kfree(ifence); + return ERR_PTR(err); + } + fence = &ifence->base.base; + + /* add shared fence now for pagetable delayed destroy */ + dma_resv_add_fence(xe_vm_resv(vm), fence, + DMA_RESV_USAGE_BOOKKEEP); + + /* This fence will be installed by caller when doing eviction */ + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, + DMA_RESV_USAGE_BOOKKEEP); + xe_pt_commit_unbind(vma, entries, num_entries, + unbind_pt_update.locked ? &deferred : NULL); + vma->tile_present &= ~BIT(tile->id); + } else { + kfree(rfence); + kfree(ifence); + } + + if (!vma->tile_present) + list_del_init(&vma->combined_links.rebind); + + if (unbind_pt_update.locked) { + xe_tile_assert(tile, xe_vma_is_userptr(vma)); + + if (!vma->tile_present) { + spin_lock(&vm->userptr.invalidated_lock); + list_del_init(&vma->userptr.invalidate_link); + spin_unlock(&vm->userptr.invalidated_lock); + } + up_read(&vm->userptr.notifier_lock); + xe_bo_put_commit(&deferred); + } + + return fence; +} diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h new file mode 100644 index 000000000000..71a4fbfcff43 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pt.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ +#ifndef _XE_PT_H_ +#define _XE_PT_H_ + +#include <linux/types.h> + +#include "xe_pt_types.h" + +struct dma_fence; +struct xe_bo; +struct xe_device; +struct xe_exec_queue; +struct xe_sync_entry; +struct xe_tile; +struct xe_vm; +struct xe_vma; + +/* Largest huge pte is currently 1GiB. May become device dependent. */ +#define MAX_HUGEPTE_LEVEL 2 + +#define xe_pt_write(xe, map, idx, data) \ + xe_map_wr(xe, map, (idx) * sizeof(u64), u64, data) + +unsigned int xe_pt_shift(unsigned int level); + +struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, + unsigned int level); + +void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, + struct xe_pt *pt); + +void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred); + +struct dma_fence * +__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, + struct xe_sync_entry *syncs, u32 num_syncs, + bool rebind); + +struct dma_fence * +__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, + struct xe_sync_entry *syncs, u32 num_syncs); + +bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma); + +#endif diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h new file mode 100644 index 000000000000..cee70cb0f014 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pt_types.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_PT_TYPES_H_ +#define _XE_PT_TYPES_H_ + +#include <linux/types.h> + +#include "xe_pt_walk.h" + +struct xe_bo; +struct xe_device; +struct xe_vma; + +enum xe_cache_level { + XE_CACHE_NONE, + XE_CACHE_WT, + XE_CACHE_WB, + XE_CACHE_NONE_COMPRESSION, /*UC + COH_NONE + COMPRESSION */ + __XE_CACHE_LEVEL_COUNT, +}; + +#define XE_VM_MAX_LEVEL 4 + +struct xe_pt { + struct xe_ptw base; + struct xe_bo *bo; + unsigned int level; + unsigned int num_live; + bool rebind; + bool is_compact; +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) + /** addr: Virtual address start address of the PT. */ + u64 addr; +#endif +}; + +struct xe_pt_ops { + u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, + u16 pat_index, u32 pt_level); + u64 (*pte_encode_vma)(u64 pte, struct xe_vma *vma, + u16 pat_index, u32 pt_level); + u64 (*pte_encode_addr)(struct xe_device *xe, u64 addr, + u16 pat_index, + u32 pt_level, bool devmem, u64 flags); + u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset, + u16 pat_index); +}; + +struct xe_pt_entry { + struct xe_pt *pt; + u64 pte; +}; + +struct xe_vm_pgtable_update { + /** @bo: page table bo to write to */ + struct xe_bo *pt_bo; + + /** @ofs: offset inside this PTE to begin writing to (in qwords) */ + u32 ofs; + + /** @qwords: number of PTE's to write */ + u32 qwords; + + /** @pt: opaque pointer useful for the caller of xe_migrate_update_pgtables */ + struct xe_pt *pt; + + /** @pt_entries: Newly added pagetable entries */ + struct xe_pt_entry *pt_entries; + + /** @flags: Target flags */ + u32 flags; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_pt_walk.c b/drivers/gpu/drm/xe/xe_pt_walk.c new file mode 100644 index 000000000000..8f6c8d063f39 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pt_walk.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright © 2022 Intel Corporation + */ +#include "xe_pt_walk.h" + +/** + * DOC: GPU page-table tree walking. + * The utilities in this file are similar to the CPU page-table walk + * utilities in mm/pagewalk.c. The main difference is that we distinguish + * the various levels of a page-table tree with an unsigned integer rather + * than by name. 0 is the lowest level, and page-tables with level 0 can + * not be directories pointing to lower levels, whereas all other levels + * can. The user of the utilities determines the highest level. + * + * Nomenclature: + * Each struct xe_ptw, regardless of level is referred to as a page table, and + * multiple page tables typically form a page table tree with page tables at + * intermediate levels being page directories pointing at page tables at lower + * levels. A shared page table for a given address range is a page-table which + * is neither fully within nor fully outside the address range and that can + * thus be shared by two or more address ranges. + * + * Please keep this code generic so that it can used as a drm-wide page- + * table walker should other drivers find use for it. + */ +static u64 xe_pt_addr_end(u64 addr, u64 end, unsigned int level, + const struct xe_pt_walk *walk) +{ + u64 size = 1ull << walk->shifts[level]; + u64 tmp = round_up(addr + 1, size); + + return min_t(u64, tmp, end); +} + +static bool xe_pt_next(pgoff_t *offset, u64 *addr, u64 next, u64 end, + unsigned int level, const struct xe_pt_walk *walk) +{ + pgoff_t step = 1; + + /* Shared pt walk skips to the last pagetable */ + if (unlikely(walk->shared_pt_mode)) { + unsigned int shift = walk->shifts[level]; + u64 skip_to = round_down(end, 1ull << shift); + + if (skip_to > next) { + step += (skip_to - next) >> shift; + next = skip_to; + } + } + + *addr = next; + *offset += step; + + return next != end; +} + +/** + * xe_pt_walk_range() - Walk a range of a gpu page table tree with callbacks + * for each page-table entry in all levels. + * @parent: The root page table for walk start. + * @level: The root page table level. + * @addr: Virtual address start. + * @end: Virtual address end + 1. + * @walk: Walk info. + * + * Similar to the CPU page-table walker, this is a helper to walk + * a gpu page table and call a provided callback function for each entry. + * + * Return: 0 on success, negative error code on error. The error is + * propagated from the callback and on error the walk is terminated. + */ +int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level, + u64 addr, u64 end, struct xe_pt_walk *walk) +{ + pgoff_t offset = xe_pt_offset(addr, level, walk); + struct xe_ptw **entries = parent->dir ? parent->dir->entries : NULL; + const struct xe_pt_walk_ops *ops = walk->ops; + enum page_walk_action action; + struct xe_ptw *child; + int err = 0; + u64 next; + + do { + next = xe_pt_addr_end(addr, end, level, walk); + if (walk->shared_pt_mode && xe_pt_covers(addr, next, level, + walk)) + continue; +again: + action = ACTION_SUBTREE; + child = entries ? entries[offset] : NULL; + err = ops->pt_entry(parent, offset, level, addr, next, + &child, &action, walk); + if (err) + break; + + /* Probably not needed yet for gpu pagetable walk. */ + if (unlikely(action == ACTION_AGAIN)) + goto again; + + if (likely(!level || !child || action == ACTION_CONTINUE)) + continue; + + err = xe_pt_walk_range(child, level - 1, addr, next, walk); + + if (!err && ops->pt_post_descend) + err = ops->pt_post_descend(parent, offset, level, addr, + next, &child, &action, walk); + if (err) + break; + + } while (xe_pt_next(&offset, &addr, next, end, level, walk)); + + return err; +} + +/** + * xe_pt_walk_shared() - Walk shared page tables of a page-table tree. + * @parent: Root page table directory. + * @level: Level of the root. + * @addr: Start address. + * @end: Last address + 1. + * @walk: Walk info. + * + * This function is similar to xe_pt_walk_range() but it skips page tables + * that are private to the range. Since the root (or @parent) page table is + * typically also a shared page table this function is different in that it + * calls the pt_entry callback and the post_descend callback also for the + * root. The root can be detected in the callbacks by checking whether + * parent == *child. + * Walking only the shared page tables is common for unbind-type operations + * where the page-table entries for an address range are cleared or detached + * from the main page-table tree. + * + * Return: 0 on success, negative error code on error: If a callback + * returns an error, the walk will be terminated and the error returned by + * this function. + */ +int xe_pt_walk_shared(struct xe_ptw *parent, unsigned int level, + u64 addr, u64 end, struct xe_pt_walk *walk) +{ + const struct xe_pt_walk_ops *ops = walk->ops; + enum page_walk_action action = ACTION_SUBTREE; + struct xe_ptw *child = parent; + int err; + + walk->shared_pt_mode = true; + err = walk->ops->pt_entry(parent, 0, level + 1, addr, end, + &child, &action, walk); + + if (err || action != ACTION_SUBTREE) + return err; + + err = xe_pt_walk_range(parent, level, addr, end, walk); + if (!err && ops->pt_post_descend) { + err = ops->pt_post_descend(parent, 0, level + 1, addr, end, + &child, &action, walk); + } + return err; +} diff --git a/drivers/gpu/drm/xe/xe_pt_walk.h b/drivers/gpu/drm/xe/xe_pt_walk.h new file mode 100644 index 000000000000..ec3d1e9efa6d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pt_walk.h @@ -0,0 +1,161 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright © 2022 Intel Corporation + */ +#ifndef __XE_PT_WALK__ +#define __XE_PT_WALK__ + +#include <linux/pagewalk.h> +#include <linux/types.h> + +struct xe_ptw_dir; + +/** + * struct xe_ptw - base class for driver pagetable subclassing. + * @dir: Pointer to an array of children if any. + * + * Drivers could subclass this, and if it's a page-directory, typically + * embed the xe_ptw_dir::entries array in the same allocation. + */ +struct xe_ptw { + struct xe_ptw_dir *dir; +}; + +/** + * struct xe_ptw_dir - page directory structure + * @entries: Array holding page directory children. + * + * It is the responsibility of the user to ensure @entries is + * correctly sized. + */ +struct xe_ptw_dir { + struct xe_ptw *entries[0]; +}; + +/** + * struct xe_pt_walk - Embeddable struct for walk parameters + */ +struct xe_pt_walk { + /** @ops: The walk ops used for the pagewalk */ + const struct xe_pt_walk_ops *ops; + /** + * @shifts: Array of page-table entry shifts used for the + * different levels, starting out with the leaf level 0 + * page-shift as the first entry. It's legal for this pointer to be + * changed during the walk. + */ + const u64 *shifts; + /** @max_level: Highest populated level in @sizes */ + unsigned int max_level; + /** + * @shared_pt_mode: Whether to skip all entries that are private + * to the address range and called only for entries that are + * shared with other address ranges. Such entries are referred to + * as shared pagetables. + */ + bool shared_pt_mode; +}; + +/** + * typedef xe_pt_entry_fn - gpu page-table-walk callback-function + * @parent: The parent page.table. + * @offset: The offset (number of entries) into the page table. + * @level: The level of @parent. + * @addr: The virtual address. + * @next: The virtual address for the next call, or end address. + * @child: Pointer to pointer to child page-table at this @offset. The + * function may modify the value pointed to if, for example, allocating a + * child page table. + * @action: The walk action to take upon return. See <linux/pagewalk.h>. + * @walk: The walk parameters. + */ +typedef int (*xe_pt_entry_fn)(struct xe_ptw *parent, pgoff_t offset, + unsigned int level, u64 addr, u64 next, + struct xe_ptw **child, + enum page_walk_action *action, + struct xe_pt_walk *walk); + +/** + * struct xe_pt_walk_ops - Walk callbacks. + */ +struct xe_pt_walk_ops { + /** + * @pt_entry: Callback to be called for each page table entry prior + * to descending to the next level. The returned value of the action + * function parameter is honored. + */ + xe_pt_entry_fn pt_entry; + /** + * @pt_post_descend: Callback to be called for each page table entry + * after return from descending to the next level. The returned value + * of the action function parameter is ignored. + */ + xe_pt_entry_fn pt_post_descend; +}; + +int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level, + u64 addr, u64 end, struct xe_pt_walk *walk); + +int xe_pt_walk_shared(struct xe_ptw *parent, unsigned int level, + u64 addr, u64 end, struct xe_pt_walk *walk); + +/** + * xe_pt_covers - Whether the address range covers an entire entry in @level + * @addr: Start of the range. + * @end: End of range + 1. + * @level: Page table level. + * @walk: Page table walk info. + * + * This function is a helper to aid in determining whether a leaf page table + * entry can be inserted at this @level. + * + * Return: Whether the range provided covers exactly an entry at this level. + */ +static inline bool xe_pt_covers(u64 addr, u64 end, unsigned int level, + const struct xe_pt_walk *walk) +{ + u64 pt_size = 1ull << walk->shifts[level]; + + return end - addr == pt_size && IS_ALIGNED(addr, pt_size); +} + +/** + * xe_pt_num_entries: Number of page-table entries of a given range at this + * level + * @addr: Start address. + * @end: End address. + * @level: Page table level. + * @walk: Walk info. + * + * Return: The number of page table entries at this level between @start and + * @end. + */ +static inline pgoff_t +xe_pt_num_entries(u64 addr, u64 end, unsigned int level, + const struct xe_pt_walk *walk) +{ + u64 pt_size = 1ull << walk->shifts[level]; + + return (round_up(end, pt_size) - round_down(addr, pt_size)) >> + walk->shifts[level]; +} + +/** + * xe_pt_offset: Offset of the page-table entry for a given address. + * @addr: The address. + * @level: Page table level. + * @walk: Walk info. + * + * Return: The page table entry offset for the given address in a + * page table with size indicated by @level. + */ +static inline pgoff_t +xe_pt_offset(u64 addr, unsigned int level, const struct xe_pt_walk *walk) +{ + if (level < walk->max_level) + addr &= ((1ull << walk->shifts[level + 1]) - 1); + + return addr >> walk->shifts[level]; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c new file mode 100644 index 000000000000..9b35673b286c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_query.c @@ -0,0 +1,552 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_query.h" + +#include <linux/nospec.h> +#include <linux/sched/clock.h> + +#include <drm/ttm/ttm_placement.h> +#include <drm/xe_drm.h> + +#include "regs/xe_engine_regs.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_exec_queue.h" +#include "xe_ggtt.h" +#include "xe_gt.h" +#include "xe_guc_hwconfig.h" +#include "xe_macros.h" +#include "xe_mmio.h" +#include "xe_ttm_vram_mgr.h" + +static const u16 xe_to_user_engine_class[] = { + [XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER, + [XE_ENGINE_CLASS_COPY] = DRM_XE_ENGINE_CLASS_COPY, + [XE_ENGINE_CLASS_VIDEO_DECODE] = DRM_XE_ENGINE_CLASS_VIDEO_DECODE, + [XE_ENGINE_CLASS_VIDEO_ENHANCE] = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE, + [XE_ENGINE_CLASS_COMPUTE] = DRM_XE_ENGINE_CLASS_COMPUTE, +}; + +static const enum xe_engine_class user_to_xe_engine_class[] = { + [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, + [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, + [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, + [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, + [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, +}; + +static size_t calc_hw_engine_info_size(struct xe_device *xe) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_gt *gt; + u8 gt_id; + int i = 0; + + for_each_gt(gt, xe, gt_id) + for_each_hw_engine(hwe, gt, id) { + if (xe_hw_engine_is_reserved(hwe)) + continue; + i++; + } + + return sizeof(struct drm_xe_query_engines) + + i * sizeof(struct drm_xe_engine); +} + +typedef u64 (*__ktime_func_t)(void); +static __ktime_func_t __clock_id_to_func(clockid_t clk_id) +{ + /* + * Use logic same as the perf subsystem to allow user to select the + * reference clock id to be used for timestamps. + */ + switch (clk_id) { + case CLOCK_MONOTONIC: + return &ktime_get_ns; + case CLOCK_MONOTONIC_RAW: + return &ktime_get_raw_ns; + case CLOCK_REALTIME: + return &ktime_get_real_ns; + case CLOCK_BOOTTIME: + return &ktime_get_boottime_ns; + case CLOCK_TAI: + return &ktime_get_clocktai_ns; + default: + return NULL; + } +} + +static void +__read_timestamps(struct xe_gt *gt, + struct xe_reg lower_reg, + struct xe_reg upper_reg, + u64 *engine_ts, + u64 *cpu_ts, + u64 *cpu_delta, + __ktime_func_t cpu_clock) +{ + u32 upper, lower, old_upper, loop = 0; + + upper = xe_mmio_read32(gt, upper_reg); + do { + *cpu_delta = local_clock(); + *cpu_ts = cpu_clock(); + lower = xe_mmio_read32(gt, lower_reg); + *cpu_delta = local_clock() - *cpu_delta; + old_upper = upper; + upper = xe_mmio_read32(gt, upper_reg); + } while (upper != old_upper && loop++ < 2); + + *engine_ts = (u64)upper << 32 | lower; +} + +static int +query_engine_cycles(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + struct drm_xe_query_engine_cycles __user *query_ptr; + struct drm_xe_engine_class_instance *eci; + struct drm_xe_query_engine_cycles resp; + size_t size = sizeof(resp); + __ktime_func_t cpu_clock; + struct xe_hw_engine *hwe; + struct xe_gt *gt; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + query_ptr = u64_to_user_ptr(query->data); + if (copy_from_user(&resp, query_ptr, size)) + return -EFAULT; + + cpu_clock = __clock_id_to_func(resp.clockid); + if (!cpu_clock) + return -EINVAL; + + eci = &resp.eci; + if (eci->gt_id > XE_MAX_GT_PER_TILE) + return -EINVAL; + + gt = xe_device_get_gt(xe, eci->gt_id); + if (!gt) + return -EINVAL; + + if (eci->engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) + return -EINVAL; + + hwe = xe_gt_hw_engine(gt, user_to_xe_engine_class[eci->engine_class], + eci->engine_instance, true); + if (!hwe) + return -EINVAL; + + xe_device_mem_access_get(xe); + xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + + __read_timestamps(gt, + RING_TIMESTAMP(hwe->mmio_base), + RING_TIMESTAMP_UDW(hwe->mmio_base), + &resp.engine_cycles, + &resp.cpu_timestamp, + &resp.cpu_delta, + cpu_clock); + + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + xe_device_mem_access_put(xe); + resp.width = 36; + + /* Only write to the output fields of user query */ + if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp)) + return -EFAULT; + + if (put_user(resp.cpu_delta, &query_ptr->cpu_delta)) + return -EFAULT; + + if (put_user(resp.engine_cycles, &query_ptr->engine_cycles)) + return -EFAULT; + + if (put_user(resp.width, &query_ptr->width)) + return -EFAULT; + + return 0; +} + +static int query_engines(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + size_t size = calc_hw_engine_info_size(xe); + struct drm_xe_query_engines __user *query_ptr = + u64_to_user_ptr(query->data); + struct drm_xe_query_engines *engines; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_gt *gt; + u8 gt_id; + int i = 0; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + engines = kmalloc(size, GFP_KERNEL); + if (!engines) + return -ENOMEM; + + for_each_gt(gt, xe, gt_id) + for_each_hw_engine(hwe, gt, id) { + if (xe_hw_engine_is_reserved(hwe)) + continue; + + engines->engines[i].instance.engine_class = + xe_to_user_engine_class[hwe->class]; + engines->engines[i].instance.engine_instance = + hwe->logical_instance; + engines->engines[i].instance.gt_id = gt->info.id; + engines->engines[i].instance.pad = 0; + memset(engines->engines[i].reserved, 0, + sizeof(engines->engines[i].reserved)); + + i++; + } + + engines->pad = 0; + engines->num_engines = i; + + if (copy_to_user(query_ptr, engines, size)) { + kfree(engines); + return -EFAULT; + } + kfree(engines); + + return 0; +} + +static size_t calc_mem_regions_size(struct xe_device *xe) +{ + u32 num_managers = 1; + int i; + + for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) + if (ttm_manager_type(&xe->ttm, i)) + num_managers++; + + return offsetof(struct drm_xe_query_mem_regions, mem_regions[num_managers]); +} + +static int query_mem_regions(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + size_t size = calc_mem_regions_size(xe); + struct drm_xe_query_mem_regions *mem_regions; + struct drm_xe_query_mem_regions __user *query_ptr = + u64_to_user_ptr(query->data); + struct ttm_resource_manager *man; + int ret, i; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + mem_regions = kzalloc(size, GFP_KERNEL); + if (XE_IOCTL_DBG(xe, !mem_regions)) + return -ENOMEM; + + man = ttm_manager_type(&xe->ttm, XE_PL_TT); + mem_regions->mem_regions[0].mem_class = DRM_XE_MEM_REGION_CLASS_SYSMEM; + /* + * The instance needs to be a unique number that represents the index + * in the placement mask used at xe_gem_create_ioctl() for the + * xe_bo_create() placement. + */ + mem_regions->mem_regions[0].instance = 0; + mem_regions->mem_regions[0].min_page_size = PAGE_SIZE; + mem_regions->mem_regions[0].total_size = man->size << PAGE_SHIFT; + if (perfmon_capable()) + mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man); + mem_regions->num_mem_regions = 1; + + for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { + man = ttm_manager_type(&xe->ttm, i); + if (man) { + mem_regions->mem_regions[mem_regions->num_mem_regions].mem_class = + DRM_XE_MEM_REGION_CLASS_VRAM; + mem_regions->mem_regions[mem_regions->num_mem_regions].instance = + mem_regions->num_mem_regions; + mem_regions->mem_regions[mem_regions->num_mem_regions].min_page_size = + xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? + SZ_64K : PAGE_SIZE; + mem_regions->mem_regions[mem_regions->num_mem_regions].total_size = + man->size; + + if (perfmon_capable()) { + xe_ttm_vram_get_used(man, + &mem_regions->mem_regions + [mem_regions->num_mem_regions].used, + &mem_regions->mem_regions + [mem_regions->num_mem_regions].cpu_visible_used); + } + + mem_regions->mem_regions[mem_regions->num_mem_regions].cpu_visible_size = + xe_ttm_vram_get_cpu_visible_size(man); + mem_regions->num_mem_regions++; + } + } + + if (!copy_to_user(query_ptr, mem_regions, size)) + ret = 0; + else + ret = -ENOSPC; + + kfree(mem_regions); + return ret; +} + +static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) +{ + const u32 num_params = DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY + 1; + size_t size = + sizeof(struct drm_xe_query_config) + num_params * sizeof(u64); + struct drm_xe_query_config __user *query_ptr = + u64_to_user_ptr(query->data); + struct drm_xe_query_config *config; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + config = kzalloc(size, GFP_KERNEL); + if (!config) + return -ENOMEM; + + config->num_params = num_params; + config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] = + xe->info.devid | (xe->info.revid << 16); + if (xe_device_get_root_tile(xe)->mem.vram.usable_size) + config->info[DRM_XE_QUERY_CONFIG_FLAGS] = + DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM; + config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] = + xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; + config->info[DRM_XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits; + config->info[DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY] = + xe_exec_queue_device_get_max_priority(xe); + + if (copy_to_user(query_ptr, config, size)) { + kfree(config); + return -EFAULT; + } + kfree(config); + + return 0; +} + +static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query) +{ + struct xe_gt *gt; + size_t size = sizeof(struct drm_xe_query_gt_list) + + xe->info.gt_count * sizeof(struct drm_xe_gt); + struct drm_xe_query_gt_list __user *query_ptr = + u64_to_user_ptr(query->data); + struct drm_xe_query_gt_list *gt_list; + u8 id; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + gt_list = kzalloc(size, GFP_KERNEL); + if (!gt_list) + return -ENOMEM; + + gt_list->num_gt = xe->info.gt_count; + + for_each_gt(gt, xe, id) { + if (xe_gt_is_media_type(gt)) + gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MEDIA; + else + gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MAIN; + gt_list->gt_list[id].tile_id = gt_to_tile(gt)->id; + gt_list->gt_list[id].gt_id = gt->info.id; + gt_list->gt_list[id].reference_clock = gt->info.reference_clock; + /* + * The mem_regions indexes in the mask below need to + * directly identify the struct + * drm_xe_query_mem_regions' instance constructed at + * query_mem_regions() + * + * For our current platforms: + * Bit 0 -> System Memory + * Bit 1 -> VRAM0 on Tile0 + * Bit 2 -> VRAM1 on Tile1 + * However the uAPI is generic and it's userspace's + * responsibility to check the mem_class, without any + * assumption. + */ + if (!IS_DGFX(xe)) + gt_list->gt_list[id].near_mem_regions = 0x1; + else + gt_list->gt_list[id].near_mem_regions = + BIT(gt_to_tile(gt)->id) << 1; + gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^ + gt_list->gt_list[id].near_mem_regions; + } + + if (copy_to_user(query_ptr, gt_list, size)) { + kfree(gt_list); + return -EFAULT; + } + kfree(gt_list); + + return 0; +} + +static int query_hwconfig(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + struct xe_gt *gt = xe_root_mmio_gt(xe); + size_t size = xe_guc_hwconfig_size(>->uc.guc); + void __user *query_ptr = u64_to_user_ptr(query->data); + void *hwconfig; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + hwconfig = kzalloc(size, GFP_KERNEL); + if (!hwconfig) + return -ENOMEM; + + xe_device_mem_access_get(xe); + xe_guc_hwconfig_copy(>->uc.guc, hwconfig); + xe_device_mem_access_put(xe); + + if (copy_to_user(query_ptr, hwconfig, size)) { + kfree(hwconfig); + return -EFAULT; + } + kfree(hwconfig); + + return 0; +} + +static size_t calc_topo_query_size(struct xe_device *xe) +{ + return xe->info.gt_count * + (3 * sizeof(struct drm_xe_query_topology_mask) + + sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) + + sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) + + sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss)); +} + +static void __user *copy_mask(void __user *ptr, + struct drm_xe_query_topology_mask *topo, + void *mask, size_t mask_size) +{ + topo->num_bytes = mask_size; + + if (copy_to_user(ptr, topo, sizeof(*topo))) + return ERR_PTR(-EFAULT); + ptr += sizeof(topo); + + if (copy_to_user(ptr, mask, mask_size)) + return ERR_PTR(-EFAULT); + ptr += mask_size; + + return ptr; +} + +static int query_gt_topology(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + void __user *query_ptr = u64_to_user_ptr(query->data); + size_t size = calc_topo_query_size(xe); + struct drm_xe_query_topology_mask topo; + struct xe_gt *gt; + int id; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + for_each_gt(gt, xe, id) { + topo.gt_id = id; + + topo.type = DRM_XE_TOPO_DSS_GEOMETRY; + query_ptr = copy_mask(query_ptr, &topo, + gt->fuse_topo.g_dss_mask, + sizeof(gt->fuse_topo.g_dss_mask)); + if (IS_ERR(query_ptr)) + return PTR_ERR(query_ptr); + + topo.type = DRM_XE_TOPO_DSS_COMPUTE; + query_ptr = copy_mask(query_ptr, &topo, + gt->fuse_topo.c_dss_mask, + sizeof(gt->fuse_topo.c_dss_mask)); + if (IS_ERR(query_ptr)) + return PTR_ERR(query_ptr); + + topo.type = DRM_XE_TOPO_EU_PER_DSS; + query_ptr = copy_mask(query_ptr, &topo, + gt->fuse_topo.eu_mask_per_dss, + sizeof(gt->fuse_topo.eu_mask_per_dss)); + if (IS_ERR(query_ptr)) + return PTR_ERR(query_ptr); + } + + return 0; +} + +static int (* const xe_query_funcs[])(struct xe_device *xe, + struct drm_xe_device_query *query) = { + query_engines, + query_mem_regions, + query_config, + query_gt_list, + query_hwconfig, + query_gt_topology, + query_engine_cycles, +}; + +int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct drm_xe_device_query *query = data; + u32 idx; + + if (XE_IOCTL_DBG(xe, query->extensions) || + XE_IOCTL_DBG(xe, query->reserved[0] || query->reserved[1])) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, query->query >= ARRAY_SIZE(xe_query_funcs))) + return -EINVAL; + + idx = array_index_nospec(query->query, ARRAY_SIZE(xe_query_funcs)); + if (XE_IOCTL_DBG(xe, !xe_query_funcs[idx])) + return -EINVAL; + + return xe_query_funcs[idx](xe, query); +} diff --git a/drivers/gpu/drm/xe/xe_query.h b/drivers/gpu/drm/xe/xe_query.h new file mode 100644 index 000000000000..beeb7a8192b4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_query.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_QUERY_H_ +#define _XE_QUERY_H_ + +struct drm_device; +struct drm_file; + +int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file); + +#endif diff --git a/drivers/gpu/drm/xe/xe_range_fence.c b/drivers/gpu/drm/xe/xe_range_fence.c new file mode 100644 index 000000000000..d35d9ec58e86 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_range_fence.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <linux/dma-fence.h> +#include <linux/interval_tree_generic.h> +#include <linux/slab.h> + +#include "xe_macros.h" +#include "xe_range_fence.h" + +#define XE_RANGE_TREE_START(_node) ((_node)->start) +#define XE_RANGE_TREE_LAST(_node) ((_node)->last) + +INTERVAL_TREE_DEFINE(struct xe_range_fence, rb, u64, __subtree_last, + XE_RANGE_TREE_START, XE_RANGE_TREE_LAST, static, + xe_range_fence_tree); + +static void +xe_range_fence_signal_notify(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct xe_range_fence *rfence = container_of(cb, typeof(*rfence), cb); + struct xe_range_fence_tree *tree = rfence->tree; + + llist_add(&rfence->link, &tree->list); +} + +static bool __xe_range_fence_tree_cleanup(struct xe_range_fence_tree *tree) +{ + struct llist_node *node = llist_del_all(&tree->list); + struct xe_range_fence *rfence, *next; + + llist_for_each_entry_safe(rfence, next, node, link) { + xe_range_fence_tree_remove(rfence, &tree->root); + dma_fence_put(rfence->fence); + kfree(rfence); + } + + return !!node; +} + +/** + * xe_range_fence_insert() - range fence insert + * @tree: range fence tree to insert intoi + * @rfence: range fence + * @ops: range fence ops + * @start: start address of range fence + * @last: last address of range fence + * @fence: dma fence which signals range fence can be removed + freed + * + * Return: 0 on success, non-zero on failure + */ +int xe_range_fence_insert(struct xe_range_fence_tree *tree, + struct xe_range_fence *rfence, + const struct xe_range_fence_ops *ops, + u64 start, u64 last, struct dma_fence *fence) +{ + int err = 0; + + __xe_range_fence_tree_cleanup(tree); + + if (dma_fence_is_signaled(fence)) + goto free; + + rfence->ops = ops; + rfence->start = start; + rfence->last = last; + rfence->tree = tree; + rfence->fence = dma_fence_get(fence); + err = dma_fence_add_callback(fence, &rfence->cb, + xe_range_fence_signal_notify); + if (err == -ENOENT) { + dma_fence_put(fence); + err = 0; + goto free; + } else if (err == 0) { + xe_range_fence_tree_insert(rfence, &tree->root); + return 0; + } + +free: + if (ops->free) + ops->free(rfence); + + return err; +} + +static void xe_range_fence_tree_remove_all(struct xe_range_fence_tree *tree) +{ + struct xe_range_fence *rfence; + bool retry = true; + + rfence = xe_range_fence_tree_iter_first(&tree->root, 0, U64_MAX); + while (rfence) { + /* Should be ok with the minimalistic callback */ + if (dma_fence_remove_callback(rfence->fence, &rfence->cb)) + llist_add(&rfence->link, &tree->list); + rfence = xe_range_fence_tree_iter_next(rfence, 0, U64_MAX); + } + + while (retry) + retry = __xe_range_fence_tree_cleanup(tree); +} + +/** + * xe_range_fence_tree_init() - Init range fence tree + * @tree: range fence tree + */ +void xe_range_fence_tree_init(struct xe_range_fence_tree *tree) +{ + memset(tree, 0, sizeof(*tree)); +} + +/** + * xe_range_fence_tree_fini() - Fini range fence tree + * @tree: range fence tree + */ +void xe_range_fence_tree_fini(struct xe_range_fence_tree *tree) +{ + xe_range_fence_tree_remove_all(tree); + XE_WARN_ON(!RB_EMPTY_ROOT(&tree->root.rb_root)); +} + +/** + * xe_range_fence_tree_first() - range fence tree iterator first + * @tree: range fence tree + * @start: start address of range fence + * @last: last address of range fence + * + * Return: first range fence found in range or NULL + */ +struct xe_range_fence * +xe_range_fence_tree_first(struct xe_range_fence_tree *tree, u64 start, + u64 last) +{ + return xe_range_fence_tree_iter_first(&tree->root, start, last); +} + +/** + * xe_range_fence_tree_next() - range fence tree iterator next + * @rfence: current range fence + * @start: start address of range fence + * @last: last address of range fence + * + * Return: next range fence found in range or NULL + */ +struct xe_range_fence * +xe_range_fence_tree_next(struct xe_range_fence *rfence, u64 start, u64 last) +{ + return xe_range_fence_tree_iter_next(rfence, start, last); +} + +const struct xe_range_fence_ops xe_range_fence_kfree_ops = { + .free = (void (*)(struct xe_range_fence *rfence)) kfree, +}; diff --git a/drivers/gpu/drm/xe/xe_range_fence.h b/drivers/gpu/drm/xe/xe_range_fence.h new file mode 100644 index 000000000000..edd58b34f5c0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_range_fence.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_RANGE_FENCE_H_ +#define _XE_RANGE_FENCE_H_ + +#include <linux/dma-fence.h> +#include <linux/rbtree.h> +#include <linux/types.h> + +struct xe_range_fence_tree; +struct xe_range_fence; + +/** struct xe_range_fence_ops - XE range fence ops */ +struct xe_range_fence_ops { + /** @free: free range fence op */ + void (*free)(struct xe_range_fence *rfence); +}; + +/** struct xe_range_fence - XE range fence (address conflict tracking) */ +struct xe_range_fence { + /** @rb: RB tree node inserted into interval tree */ + struct rb_node rb; + /** @start: start address of range fence is interval tree */ + u64 start; + /** @last: last address (inclusive) of range fence is interval tree */ + u64 last; + /** @__subtree_last: interval tree internal usage */ + u64 __subtree_last; + /** + * @fence: fence signals address in range fence no longer has conflict + */ + struct dma_fence *fence; + /** @tree: interval tree which range fence belongs to */ + struct xe_range_fence_tree *tree; + /** + * @cb: callback when fence signals to remove range fence free from interval tree + */ + struct dma_fence_cb cb; + /** @link: used to defer free of range fence to non-irq context */ + struct llist_node link; + /** @ops: range fence ops */ + const struct xe_range_fence_ops *ops; +}; + +/** struct xe_range_fence_tree - interval tree to store range fences */ +struct xe_range_fence_tree { + /** @root: interval tree root */ + struct rb_root_cached root; + /** @list: list of pending range fences to be freed */ + struct llist_head list; +}; + +extern const struct xe_range_fence_ops xe_range_fence_kfree_ops; + +struct xe_range_fence * +xe_range_fence_tree_first(struct xe_range_fence_tree *tree, u64 start, + u64 last); + +struct xe_range_fence * +xe_range_fence_tree_next(struct xe_range_fence *rfence, u64 start, u64 last); + +void xe_range_fence_tree_init(struct xe_range_fence_tree *tree); + +void xe_range_fence_tree_fini(struct xe_range_fence_tree *tree); + +int xe_range_fence_insert(struct xe_range_fence_tree *tree, + struct xe_range_fence *rfence, + const struct xe_range_fence_ops *ops, + u64 start, u64 end, + struct dma_fence *fence); + +#endif diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c new file mode 100644 index 000000000000..87adefb56024 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_reg_sr.h" + +#include <kunit/visibility.h> +#include <linux/align.h> +#include <linux/string_helpers.h> +#include <linux/xarray.h> + +#include <drm/drm_managed.h> +#include <drm/drm_print.h> + +#include "regs/xe_engine_regs.h" +#include "regs/xe_gt_regs.h" +#include "xe_device_types.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_gt_mcr.h" +#include "xe_gt_printk.h" +#include "xe_hw_engine_types.h" +#include "xe_macros.h" +#include "xe_mmio.h" +#include "xe_reg_whitelist.h" +#include "xe_rtp_types.h" + +#define XE_REG_SR_GROW_STEP_DEFAULT 16 + +static void reg_sr_fini(struct drm_device *drm, void *arg) +{ + struct xe_reg_sr *sr = arg; + + xa_destroy(&sr->xa); + kfree(sr->pool.arr); + memset(&sr->pool, 0, sizeof(sr->pool)); +} + +int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe) +{ + xa_init(&sr->xa); + memset(&sr->pool, 0, sizeof(sr->pool)); + sr->pool.grow_step = XE_REG_SR_GROW_STEP_DEFAULT; + sr->name = name; + + return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr); +} +EXPORT_SYMBOL_IF_KUNIT(xe_reg_sr_init); + +static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr) +{ + if (sr->pool.used == sr->pool.allocated) { + struct xe_reg_sr_entry *arr; + + arr = krealloc_array(sr->pool.arr, + ALIGN(sr->pool.allocated + 1, sr->pool.grow_step), + sizeof(*arr), GFP_KERNEL); + if (!arr) + return NULL; + + sr->pool.arr = arr; + sr->pool.allocated += sr->pool.grow_step; + } + + return &sr->pool.arr[sr->pool.used++]; +} + +static bool compatible_entries(const struct xe_reg_sr_entry *e1, + const struct xe_reg_sr_entry *e2) +{ + /* + * Don't allow overwriting values: clr_bits/set_bits should be disjoint + * when operating in the same register + */ + if (e1->clr_bits & e2->clr_bits || e1->set_bits & e2->set_bits || + e1->clr_bits & e2->set_bits || e1->set_bits & e2->clr_bits) + return false; + + if (e1->reg.raw != e2->reg.raw) + return false; + + return true; +} + +static void reg_sr_inc_error(struct xe_reg_sr *sr) +{ +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) + sr->errors++; +#endif +} + +int xe_reg_sr_add(struct xe_reg_sr *sr, + const struct xe_reg_sr_entry *e, + struct xe_gt *gt) +{ + unsigned long idx = e->reg.addr; + struct xe_reg_sr_entry *pentry = xa_load(&sr->xa, idx); + int ret; + + if (pentry) { + if (!compatible_entries(pentry, e)) { + ret = -EINVAL; + goto fail; + } + + pentry->clr_bits |= e->clr_bits; + pentry->set_bits |= e->set_bits; + pentry->read_mask |= e->read_mask; + + return 0; + } + + pentry = alloc_entry(sr); + if (!pentry) { + ret = -ENOMEM; + goto fail; + } + + *pentry = *e; + ret = xa_err(xa_store(&sr->xa, idx, pentry, GFP_KERNEL)); + if (ret) + goto fail; + + return 0; + +fail: + xe_gt_err(gt, + "discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s, mcr: %s): ret=%d\n", + idx, e->clr_bits, e->set_bits, + str_yes_no(e->reg.masked), + str_yes_no(e->reg.mcr), + ret); + reg_sr_inc_error(sr); + + return ret; +} + +/* + * Convert back from encoded value to type-safe, only to be used when reg.mcr + * is true + */ +static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg) +{ + return (const struct xe_reg_mcr){.__reg.raw = reg.raw }; +} + +static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry) +{ + struct xe_reg reg = entry->reg; + struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg); + u32 val; + + /* + * If this is a masked register, need to set the upper 16 bits. + * Set them to clr_bits since that is always a superset of the bits + * being modified. + * + * When it's not masked, we have to read it from hardware, unless we are + * supposed to set all bits. + */ + if (reg.masked) + val = entry->clr_bits << 16; + else if (entry->clr_bits + 1) + val = (reg.mcr ? + xe_gt_mcr_unicast_read_any(gt, reg_mcr) : + xe_mmio_read32(gt, reg)) & (~entry->clr_bits); + else + val = 0; + + /* + * TODO: add selftest to validate all tables, regardless of platform: + * - Masked registers can't have set_bits with upper bits set + * - set_bits must be contained in clr_bits + */ + val |= entry->set_bits; + + xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val); + + if (entry->reg.mcr) + xe_gt_mcr_multicast_write(gt, reg_mcr, val); + else + xe_mmio_write32(gt, reg, val); +} + +void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) +{ + struct xe_reg_sr_entry *entry; + unsigned long reg; + int err; + + if (xa_empty(&sr->xa)) + return; + + xe_gt_dbg(gt, "Applying %s save-restore MMIOs\n", sr->name); + + err = xe_force_wake_get(>->mmio.fw, XE_FORCEWAKE_ALL); + if (err) + goto err_force_wake; + + xa_for_each(&sr->xa, reg, entry) + apply_one_mmio(gt, entry); + + err = xe_force_wake_put(>->mmio.fw, XE_FORCEWAKE_ALL); + XE_WARN_ON(err); + + return; + +err_force_wake: + xe_gt_err(gt, "Failed to apply, err=%d\n", err); +} + +void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe) +{ + struct xe_reg_sr *sr = &hwe->reg_whitelist; + struct xe_gt *gt = hwe->gt; + struct xe_device *xe = gt_to_xe(gt); + struct xe_reg_sr_entry *entry; + struct drm_printer p; + u32 mmio_base = hwe->mmio_base; + unsigned long reg; + unsigned int slot = 0; + int err; + + if (xa_empty(&sr->xa)) + return; + + drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name); + + err = xe_force_wake_get(>->mmio.fw, XE_FORCEWAKE_ALL); + if (err) + goto err_force_wake; + + p = drm_debug_printer(KBUILD_MODNAME); + xa_for_each(&sr->xa, reg, entry) { + if (slot == RING_MAX_NONPRIV_SLOTS) { + xe_gt_err(gt, + "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n", + hwe->name, RING_MAX_NONPRIV_SLOTS); + break; + } + + xe_reg_whitelist_print_entry(&p, 0, reg, entry); + xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot), + reg | entry->set_bits); + slot++; + } + + /* And clear the rest just in case of garbage */ + for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) { + u32 addr = RING_NOPID(mmio_base).addr; + + xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr); + } + + err = xe_force_wake_put(>->mmio.fw, XE_FORCEWAKE_ALL); + XE_WARN_ON(err); + + return; + +err_force_wake: + drm_err(&xe->drm, "Failed to apply, err=%d\n", err); +} + +/** + * xe_reg_sr_dump - print all save/restore entries + * @sr: Save/restore entries + * @p: DRM printer + */ +void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p) +{ + struct xe_reg_sr_entry *entry; + unsigned long reg; + + if (!sr->name || xa_empty(&sr->xa)) + return; + + drm_printf(p, "%s\n", sr->name); + xa_for_each(&sr->xa, reg, entry) + drm_printf(p, "\tREG[0x%lx] clr=0x%08x set=0x%08x masked=%s mcr=%s\n", + reg, entry->clr_bits, entry->set_bits, + str_yes_no(entry->reg.masked), + str_yes_no(entry->reg.mcr)); +} diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h new file mode 100644 index 000000000000..e3197c33afe2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_reg_sr.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_REG_SR_ +#define _XE_REG_SR_ + +#include "xe_reg_sr_types.h" + +/* + * Reg save/restore bookkeeping + */ + +struct xe_device; +struct xe_gt; +struct xe_hw_engine; +struct drm_printer; + +int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe); +void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p); + +int xe_reg_sr_add(struct xe_reg_sr *sr, const struct xe_reg_sr_entry *e, + struct xe_gt *gt); +void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt); +void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h new file mode 100644 index 000000000000..ad48a52b824a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_REG_SR_TYPES_ +#define _XE_REG_SR_TYPES_ + +#include <linux/types.h> +#include <linux/xarray.h> + +#include "regs/xe_reg_defs.h" + +struct xe_reg_sr_entry { + struct xe_reg reg; + u32 clr_bits; + u32 set_bits; + /* Mask for bits to consider when reading value back */ + u32 read_mask; +}; + +struct xe_reg_sr { + struct { + struct xe_reg_sr_entry *arr; + unsigned int used; + unsigned int allocated; + unsigned int grow_step; + } pool; + struct xarray xa; + const char *name; + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) + unsigned int errors; +#endif +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c new file mode 100644 index 000000000000..e66ae1bdaf9c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_reg_whitelist.h" + +#include "regs/xe_engine_regs.h" +#include "regs/xe_gt_regs.h" +#include "xe_gt_types.h" +#include "xe_platform_types.h" +#include "xe_rtp.h" + +#undef XE_REG_MCR +#define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) + +static bool match_not_render(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + return hwe->class != XE_ENGINE_CLASS_RENDER; +} + +static const struct xe_rtp_entry_sr register_whitelist[] = { + { XE_RTP_NAME("WaAllowPMDepthAndInvocationCountAccessFromUMD, 1408556865"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(WHITELIST(PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_ACCESS_RD | + RING_FORCE_TO_NONPRIV_RANGE_4)) + }, + { XE_RTP_NAME("1508744258, 14012131227, 1808121037"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(WHITELIST(COMMON_SLICE_CHICKEN1, 0)) + }, + { XE_RTP_NAME("1806527549"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(WHITELIST(HIZ_CHICKEN, 0)) + }, + { XE_RTP_NAME("allow_read_ctx_timestamp"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1260), FUNC(match_not_render)), + XE_RTP_ACTIONS(WHITELIST(RING_CTX_TIMESTAMP(0), + RING_FORCE_TO_NONPRIV_ACCESS_RD, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, + { XE_RTP_NAME("16014440446"), + XE_RTP_RULES(PLATFORM(PVC)), + XE_RTP_ACTIONS(WHITELIST(XE_REG(0x4400), + RING_FORCE_TO_NONPRIV_DENY | + RING_FORCE_TO_NONPRIV_RANGE_64), + WHITELIST(XE_REG(0x4500), + RING_FORCE_TO_NONPRIV_DENY | + RING_FORCE_TO_NONPRIV_RANGE_64)) + }, + { XE_RTP_NAME("16017236439"), + XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COPY)), + XE_RTP_ACTIONS(WHITELIST(BCS_SWCTRL(0), + RING_FORCE_TO_NONPRIV_DENY, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, + {} +}; + +/** + * xe_reg_whitelist_process_engine - process table of registers to whitelist + * @hwe: engine instance to process whitelist for + * + * Process wwhitelist table for this platform, saving in @hwe all the + * registers that need to be whitelisted by the hardware so they can be accessed + * by userspace. + */ +void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe) +{ + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); + + xe_rtp_process_to_sr(&ctx, register_whitelist, &hwe->reg_whitelist); +} + +/** + * xe_reg_whitelist_print_entry - print one whitelist entry + * @p: DRM printer + * @indent: indent level + * @reg: register allowed/denied + * @entry: save-restore entry + * + * Print details about the entry added to allow/deny access + */ +void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent, + u32 reg, struct xe_reg_sr_entry *entry) +{ + u32 val = entry->set_bits; + const char *access_str = "(invalid)"; + unsigned int range_bit = 2; + u32 range_start, range_end; + bool deny; + + deny = val & RING_FORCE_TO_NONPRIV_DENY; + + switch (val & RING_FORCE_TO_NONPRIV_RANGE_MASK) { + case RING_FORCE_TO_NONPRIV_RANGE_4: + range_bit = 4; + break; + case RING_FORCE_TO_NONPRIV_RANGE_16: + range_bit = 6; + break; + case RING_FORCE_TO_NONPRIV_RANGE_64: + range_bit = 8; + break; + } + + range_start = reg & REG_GENMASK(25, range_bit); + range_end = range_start | REG_GENMASK(range_bit, 0); + + switch (val & RING_FORCE_TO_NONPRIV_ACCESS_MASK) { + case RING_FORCE_TO_NONPRIV_ACCESS_RW: + access_str = "rw"; + break; + case RING_FORCE_TO_NONPRIV_ACCESS_RD: + access_str = "read"; + break; + case RING_FORCE_TO_NONPRIV_ACCESS_WR: + access_str = "write"; + break; + } + + drm_printf_indent(p, indent, "REG[0x%x-0x%x]: %s %s access\n", + range_start, range_end, + deny ? "deny" : "allow", + access_str); +} + +/** + * xe_reg_whitelist_dump - print all whitelist entries + * @sr: Save/restore entries + * @p: DRM printer + */ +void xe_reg_whitelist_dump(struct xe_reg_sr *sr, struct drm_printer *p) +{ + struct xe_reg_sr_entry *entry; + unsigned long reg; + + if (!sr->name || xa_empty(&sr->xa)) + return; + + drm_printf(p, "%s\n", sr->name); + xa_for_each(&sr->xa, reg, entry) + xe_reg_whitelist_print_entry(p, 1, reg, entry); +} diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.h b/drivers/gpu/drm/xe/xe_reg_whitelist.h new file mode 100644 index 000000000000..69b121d377da --- /dev/null +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_REG_WHITELIST_ +#define _XE_REG_WHITELIST_ + +#include <linux/types.h> + +struct drm_printer; +struct xe_hw_engine; +struct xe_reg_sr; +struct xe_reg_sr_entry; + +void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe); + +void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent, + u32 reg, struct xe_reg_sr_entry *entry); + +void xe_reg_whitelist_dump(struct xe_reg_sr *sr, struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h new file mode 100644 index 000000000000..0a306963aa8e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -0,0 +1,240 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _XE_RES_CURSOR_H_ +#define _XE_RES_CURSOR_H_ + +#include <linux/scatterlist.h> + +#include <drm/drm_mm.h> +#include <drm/ttm/ttm_placement.h> +#include <drm/ttm/ttm_range_manager.h> +#include <drm/ttm/ttm_resource.h> +#include <drm/ttm/ttm_tt.h> + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_macros.h" +#include "xe_ttm_vram_mgr.h" + +/* state back for walking over vram_mgr, stolen_mgr, and gtt_mgr allocations */ +struct xe_res_cursor { + u64 start; + u64 size; + u64 remaining; + void *node; + u32 mem_type; + struct scatterlist *sgl; + struct drm_buddy *mm; +}; + +static struct drm_buddy *xe_res_get_buddy(struct ttm_resource *res) +{ + struct ttm_resource_manager *mgr; + + mgr = ttm_manager_type(res->bo->bdev, res->mem_type); + return &to_xe_ttm_vram_mgr(mgr)->mm; +} + +/** + * xe_res_first - initialize a xe_res_cursor + * + * @res: TTM resource object to walk + * @start: Start of the range + * @size: Size of the range + * @cur: cursor object to initialize + * + * Start walking over the range of allocations between @start and @size. + */ +static inline void xe_res_first(struct ttm_resource *res, + u64 start, u64 size, + struct xe_res_cursor *cur) +{ + cur->sgl = NULL; + if (!res) + goto fallback; + + XE_WARN_ON(start + size > res->size); + + cur->mem_type = res->mem_type; + + switch (cur->mem_type) { + case XE_PL_STOLEN: + case XE_PL_VRAM0: + case XE_PL_VRAM1: { + struct drm_buddy_block *block; + struct list_head *head, *next; + struct drm_buddy *mm = xe_res_get_buddy(res); + + head = &to_xe_ttm_vram_mgr_resource(res)->blocks; + + block = list_first_entry_or_null(head, + struct drm_buddy_block, + link); + if (!block) + goto fallback; + + while (start >= drm_buddy_block_size(mm, block)) { + start -= drm_buddy_block_size(mm, block); + + next = block->link.next; + if (next != head) + block = list_entry(next, struct drm_buddy_block, + link); + } + + cur->mm = mm; + cur->start = drm_buddy_block_offset(block) + start; + cur->size = min(drm_buddy_block_size(mm, block) - start, + size); + cur->remaining = size; + cur->node = block; + break; + } + default: + goto fallback; + } + + return; + +fallback: + cur->start = start; + cur->size = size; + cur->remaining = size; + cur->node = NULL; + cur->mem_type = XE_PL_TT; + XE_WARN_ON(res && start + size > res->size); +} + +static inline void __xe_res_sg_next(struct xe_res_cursor *cur) +{ + struct scatterlist *sgl = cur->sgl; + u64 start = cur->start; + + while (start >= sg_dma_len(sgl)) { + start -= sg_dma_len(sgl); + sgl = sg_next(sgl); + XE_WARN_ON(!sgl); + } + + cur->start = start; + cur->size = sg_dma_len(sgl) - start; + cur->sgl = sgl; +} + +/** + * xe_res_first_sg - initialize a xe_res_cursor with a scatter gather table + * + * @sg: scatter gather table to walk + * @start: Start of the range + * @size: Size of the range + * @cur: cursor object to initialize + * + * Start walking over the range of allocations between @start and @size. + */ +static inline void xe_res_first_sg(const struct sg_table *sg, + u64 start, u64 size, + struct xe_res_cursor *cur) +{ + XE_WARN_ON(!sg); + XE_WARN_ON(!IS_ALIGNED(start, PAGE_SIZE) || + !IS_ALIGNED(size, PAGE_SIZE)); + cur->node = NULL; + cur->start = start; + cur->remaining = size; + cur->size = 0; + cur->sgl = sg->sgl; + cur->mem_type = XE_PL_TT; + __xe_res_sg_next(cur); +} + +/** + * xe_res_next - advance the cursor + * + * @cur: the cursor to advance + * @size: number of bytes to move forward + * + * Move the cursor @size bytes forwrad, walking to the next node if necessary. + */ +static inline void xe_res_next(struct xe_res_cursor *cur, u64 size) +{ + struct drm_buddy_block *block; + struct list_head *next; + u64 start; + + XE_WARN_ON(size > cur->remaining); + + cur->remaining -= size; + if (!cur->remaining) + return; + + if (cur->size > size) { + cur->size -= size; + cur->start += size; + return; + } + + if (cur->sgl) { + cur->start += size; + __xe_res_sg_next(cur); + return; + } + + switch (cur->mem_type) { + case XE_PL_STOLEN: + case XE_PL_VRAM0: + case XE_PL_VRAM1: + start = size - cur->size; + block = cur->node; + + next = block->link.next; + block = list_entry(next, struct drm_buddy_block, link); + + + while (start >= drm_buddy_block_size(cur->mm, block)) { + start -= drm_buddy_block_size(cur->mm, block); + + next = block->link.next; + block = list_entry(next, struct drm_buddy_block, link); + } + + cur->start = drm_buddy_block_offset(block) + start; + cur->size = min(drm_buddy_block_size(cur->mm, block) - start, + cur->remaining); + cur->node = block; + break; + default: + return; + } +} + +/** + * xe_res_dma - return dma address of cursor at current position + * + * @cur: the cursor to return the dma address from + */ +static inline u64 xe_res_dma(const struct xe_res_cursor *cur) +{ + return cur->sgl ? sg_dma_address(cur->sgl) + cur->start : cur->start; +} +#endif diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c new file mode 100644 index 000000000000..1e4c06eacd98 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -0,0 +1,482 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_ring_ops.h" + +#include "generated/xe_wa_oob.h" +#include "instructions/xe_mi_commands.h" +#include "regs/xe_engine_regs.h" +#include "regs/xe_gpu_commands.h" +#include "regs/xe_gt_regs.h" +#include "regs/xe_lrc_layout.h" +#include "xe_exec_queue_types.h" +#include "xe_gt.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_sched_job.h" +#include "xe_vm_types.h" +#include "xe_vm.h" +#include "xe_wa.h" + +/* + * 3D-related flags that can't be set on _engines_ that lack access to the 3D + * pipeline (i.e., CCS engines). + */ +#define PIPE_CONTROL_3D_ENGINE_FLAGS (\ + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \ + PIPE_CONTROL_DEPTH_CACHE_FLUSH | \ + PIPE_CONTROL_TILE_CACHE_FLUSH | \ + PIPE_CONTROL_DEPTH_STALL | \ + PIPE_CONTROL_STALL_AT_SCOREBOARD | \ + PIPE_CONTROL_PSD_SYNC | \ + PIPE_CONTROL_AMFS_FLUSH | \ + PIPE_CONTROL_VF_CACHE_INVALIDATE | \ + PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET) + +/* 3D-related flags that can't be set on _platforms_ that lack a 3D pipeline */ +#define PIPE_CONTROL_3D_ARCH_FLAGS ( \ + PIPE_CONTROL_3D_ENGINE_FLAGS | \ + PIPE_CONTROL_INDIRECT_STATE_DISABLE | \ + PIPE_CONTROL_FLUSH_ENABLE | \ + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \ + PIPE_CONTROL_DC_FLUSH_ENABLE) + +static u32 preparser_disable(bool state) +{ + return MI_ARB_CHECK | BIT(8) | state; +} + +static int emit_aux_table_inv(struct xe_gt *gt, struct xe_reg reg, + u32 *dw, int i) +{ + dw[i++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1) | MI_LRI_MMIO_REMAP_EN; + dw[i++] = reg.addr + gt->mmio.adj_offset; + dw[i++] = AUX_INV; + dw[i++] = MI_NOOP; + + return i; +} + +static int emit_user_interrupt(u32 *dw, int i) +{ + dw[i++] = MI_USER_INTERRUPT; + dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE; + dw[i++] = MI_ARB_CHECK; + + return i; +} + +static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i) +{ + dw[i++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); + dw[i++] = addr; + dw[i++] = 0; + dw[i++] = value; + + return i; +} + +static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb, + u32 *dw, int i) +{ + dw[i++] = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW | + (invalidate_tlb ? MI_INVALIDATE_TLB : 0); + dw[i++] = addr | MI_FLUSH_DW_USE_GTT; + dw[i++] = 0; + dw[i++] = value; + + return i; +} + +static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i) +{ + dw[i++] = MI_BATCH_BUFFER_START | ppgtt_flag | XE_INSTR_NUM_DW(3); + dw[i++] = lower_32_bits(batch_addr); + dw[i++] = upper_32_bits(batch_addr); + + return i; +} + +static int emit_flush_invalidate(u32 flag, u32 *dw, int i) +{ + dw[i] = MI_FLUSH_DW; + dw[i] |= flag; + dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW | + MI_FLUSH_DW_STORE_INDEX; + + dw[i++] = LRC_PPHWSP_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; + dw[i++] = 0; + dw[i++] = ~0U; + + return i; +} + +static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw, + int i) +{ + u32 flags = PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_COMMAND_CACHE_INVALIDATE | + PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE | + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | + PIPE_CONTROL_VF_CACHE_INVALIDATE | + PIPE_CONTROL_CONST_CACHE_INVALIDATE | + PIPE_CONTROL_STATE_CACHE_INVALIDATE | + PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_STORE_DATA_INDEX; + + if (invalidate_tlb) + flags |= PIPE_CONTROL_TLB_INVALIDATE; + + flags &= ~mask_flags; + + dw[i++] = GFX_OP_PIPE_CONTROL(6); + dw[i++] = flags; + dw[i++] = LRC_PPHWSP_SCRATCH_ADDR; + dw[i++] = 0; + dw[i++] = 0; + dw[i++] = 0; + + return i; +} + +static int emit_store_imm_ppgtt_posted(u64 addr, u64 value, + u32 *dw, int i) +{ + dw[i++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(1); + dw[i++] = lower_32_bits(addr); + dw[i++] = upper_32_bits(addr); + dw[i++] = lower_32_bits(value); + dw[i++] = upper_32_bits(value); + + return i; +} + +static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i) +{ + struct xe_gt *gt = job->q->gt; + bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); + u32 flags; + + flags = (PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE); + + if (XE_WA(gt, 1409600907)) + flags |= PIPE_CONTROL_DEPTH_STALL; + + if (lacks_render) + flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; + else if (job->q->class == XE_ENGINE_CLASS_COMPUTE) + flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; + + dw[i++] = GFX_OP_PIPE_CONTROL(6) | PIPE_CONTROL0_HDC_PIPELINE_FLUSH; + dw[i++] = flags; + dw[i++] = 0; + dw[i++] = 0; + dw[i++] = 0; + dw[i++] = 0; + + return i; +} + +static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int i) +{ + if (hwe->class != XE_ENGINE_CLASS_RENDER) + return i; + + if (XE_WA(hwe->gt, 16020292621)) { + dw[i++] = GFX_OP_PIPE_CONTROL(6); + dw[i++] = PIPE_CONTROL_LRI_POST_SYNC; + dw[i++] = RING_NOPID(hwe->mmio_base).addr; + dw[i++] = 0; + dw[i++] = 0; + dw[i++] = 0; + } + + return i; +} + +static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw, + int i) +{ + dw[i++] = GFX_OP_PIPE_CONTROL(6); + dw[i++] = (stall_only ? PIPE_CONTROL_CS_STALL : + PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL) | + PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE; + dw[i++] = addr; + dw[i++] = 0; + dw[i++] = value; + dw[i++] = 0; /* We're thrashing one extra dword. */ + + return i; +} + +static u32 get_ppgtt_flag(struct xe_sched_job *job) +{ + return job->q->vm ? BIT(8) : 0; +} + +/* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */ +static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc, + u64 batch_addr, u32 seqno) +{ + u32 dw[MAX_JOB_SIZE_DW], i = 0; + u32 ppgtt_flag = get_ppgtt_flag(job); + struct xe_vm *vm = job->q->vm; + struct xe_gt *gt = job->q->gt; + + if (vm && vm->batch_invalidate_tlb) { + dw[i++] = preparser_disable(true); + i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), + seqno, true, dw, i); + dw[i++] = preparser_disable(false); + } else { + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), + seqno, dw, i); + } + + i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + + if (job->user_fence.used) + i = emit_store_imm_ppgtt_posted(job->user_fence.addr, + job->user_fence.value, + dw, i); + + i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i); + + i = emit_user_interrupt(dw, i); + + xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW); + + xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); +} + +static bool has_aux_ccs(struct xe_device *xe) +{ + /* + * PVC is a special case that has no compression of either type + * (FlatCCS or AuxCCS). Also, AuxCCS is no longer used from Xe2 + * onward, so any future platforms with no FlatCCS will not have + * AuxCCS either. + */ + if (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) + return false; + + return !xe->info.has_flat_ccs; +} + +static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, + u64 batch_addr, u32 seqno) +{ + u32 dw[MAX_JOB_SIZE_DW], i = 0; + u32 ppgtt_flag = get_ppgtt_flag(job); + struct xe_gt *gt = job->q->gt; + struct xe_device *xe = gt_to_xe(gt); + bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE; + struct xe_vm *vm = job->q->vm; + + dw[i++] = preparser_disable(true); + + /* hsdes: 1809175790 */ + if (has_aux_ccs(xe)) { + if (decode) + i = emit_aux_table_inv(gt, VD0_AUX_INV, dw, i); + else + i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i); + } + + if (vm && vm->batch_invalidate_tlb) + i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), + seqno, true, dw, i); + + dw[i++] = preparser_disable(false); + + if (!vm || !vm->batch_invalidate_tlb) + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), + seqno, dw, i); + + i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + + if (job->user_fence.used) + i = emit_store_imm_ppgtt_posted(job->user_fence.addr, + job->user_fence.value, + dw, i); + + i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i); + + i = emit_user_interrupt(dw, i); + + xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW); + + xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); +} + +static void __emit_job_gen12_render_compute(struct xe_sched_job *job, + struct xe_lrc *lrc, + u64 batch_addr, u32 seqno) +{ + u32 dw[MAX_JOB_SIZE_DW], i = 0; + u32 ppgtt_flag = get_ppgtt_flag(job); + struct xe_gt *gt = job->q->gt; + struct xe_device *xe = gt_to_xe(gt); + bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); + struct xe_vm *vm = job->q->vm; + u32 mask_flags = 0; + + dw[i++] = preparser_disable(true); + if (lacks_render) + mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS; + else if (job->q->class == XE_ENGINE_CLASS_COMPUTE) + mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS; + + /* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */ + i = emit_pipe_invalidate(mask_flags, vm && vm->batch_invalidate_tlb, dw, i); + + /* hsdes: 1809175790 */ + if (has_aux_ccs(xe)) + i = emit_aux_table_inv(gt, CCS_AUX_INV, dw, i); + + dw[i++] = preparser_disable(false); + + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), + seqno, dw, i); + + i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + + i = emit_render_cache_flush(job, dw, i); + + if (job->user_fence.used) + i = emit_store_imm_ppgtt_posted(job->user_fence.addr, + job->user_fence.value, + dw, i); + + i = emit_pipe_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, lacks_render, dw, i); + + i = emit_user_interrupt(dw, i); + + i = emit_pipe_control_to_ring_end(job->q->hwe, dw, i); + + xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW); + + xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); +} + +static void emit_migration_job_gen12(struct xe_sched_job *job, + struct xe_lrc *lrc, u32 seqno) +{ + u32 dw[MAX_JOB_SIZE_DW], i = 0; + + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), + seqno, dw, i); + + dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* Enabled again below */ + + i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i); + + /* XXX: Do we need this? Leaving for now. */ + dw[i++] = preparser_disable(true); + i = emit_flush_invalidate(0, dw, i); + dw[i++] = preparser_disable(false); + + i = emit_bb_start(job->batch_addr[1], BIT(8), dw, i); + + dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags | + MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW; + dw[i++] = xe_lrc_seqno_ggtt_addr(lrc) | MI_FLUSH_DW_USE_GTT; + dw[i++] = 0; + dw[i++] = seqno; /* value */ + + i = emit_user_interrupt(dw, i); + + xe_gt_assert(job->q->gt, i <= MAX_JOB_SIZE_DW); + + xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); +} + +static void emit_job_gen12_gsc(struct xe_sched_job *job) +{ + struct xe_gt *gt = job->q->gt; + + xe_gt_assert(gt, job->q->width <= 1); /* no parallel submission for GSCCS */ + + __emit_job_gen12_simple(job, job->q->lrc, + job->batch_addr[0], + xe_sched_job_seqno(job)); +} + +static void emit_job_gen12_copy(struct xe_sched_job *job) +{ + int i; + + if (xe_sched_job_is_migration(job->q)) { + emit_migration_job_gen12(job, job->q->lrc, + xe_sched_job_seqno(job)); + return; + } + + for (i = 0; i < job->q->width; ++i) + __emit_job_gen12_simple(job, job->q->lrc + i, + job->batch_addr[i], + xe_sched_job_seqno(job)); +} + +static void emit_job_gen12_video(struct xe_sched_job *job) +{ + int i; + + /* FIXME: Not doing parallel handshake for now */ + for (i = 0; i < job->q->width; ++i) + __emit_job_gen12_video(job, job->q->lrc + i, + job->batch_addr[i], + xe_sched_job_seqno(job)); +} + +static void emit_job_gen12_render_compute(struct xe_sched_job *job) +{ + int i; + + for (i = 0; i < job->q->width; ++i) + __emit_job_gen12_render_compute(job, job->q->lrc + i, + job->batch_addr[i], + xe_sched_job_seqno(job)); +} + +static const struct xe_ring_ops ring_ops_gen12_gsc = { + .emit_job = emit_job_gen12_gsc, +}; + +static const struct xe_ring_ops ring_ops_gen12_copy = { + .emit_job = emit_job_gen12_copy, +}; + +static const struct xe_ring_ops ring_ops_gen12_video = { + .emit_job = emit_job_gen12_video, +}; + +static const struct xe_ring_ops ring_ops_gen12_render_compute = { + .emit_job = emit_job_gen12_render_compute, +}; + +const struct xe_ring_ops * +xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class) +{ + switch (class) { + case XE_ENGINE_CLASS_OTHER: + return &ring_ops_gen12_gsc; + case XE_ENGINE_CLASS_COPY: + return &ring_ops_gen12_copy; + case XE_ENGINE_CLASS_VIDEO_DECODE: + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + return &ring_ops_gen12_video; + case XE_ENGINE_CLASS_RENDER: + case XE_ENGINE_CLASS_COMPUTE: + return &ring_ops_gen12_render_compute; + default: + return NULL; + } +} diff --git a/drivers/gpu/drm/xe/xe_ring_ops.h b/drivers/gpu/drm/xe/xe_ring_ops.h new file mode 100644 index 000000000000..e942735d76a6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ring_ops.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_RING_OPS_H_ +#define _XE_RING_OPS_H_ + +#include "xe_hw_engine_types.h" +#include "xe_ring_ops_types.h" + +struct xe_gt; + +const struct xe_ring_ops * +xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class); + +#endif diff --git a/drivers/gpu/drm/xe/xe_ring_ops_types.h b/drivers/gpu/drm/xe/xe_ring_ops_types.h new file mode 100644 index 000000000000..1ae56e2ee7b4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ring_ops_types.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_RING_OPS_TYPES_H_ +#define _XE_RING_OPS_TYPES_H_ + +struct xe_sched_job; + +#define MAX_JOB_SIZE_DW 48 +#define MAX_JOB_SIZE_BYTES (MAX_JOB_SIZE_DW * 4) + +/** + * struct xe_ring_ops - Ring operations + */ +struct xe_ring_ops { + /** @emit_job: Write job to ring */ + void (*emit_job)(struct xe_sched_job *job); +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c new file mode 100644 index 000000000000..fb44cc7521d8 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_rtp.h" + +#include <kunit/visibility.h> + +#include <drm/xe_drm.h> + +#include "xe_gt.h" +#include "xe_gt_topology.h" +#include "xe_macros.h" +#include "xe_reg_sr.h" + +/** + * DOC: Register Table Processing + * + * Internal infrastructure to define how registers should be updated based on + * rules and actions. This can be used to define tables with multiple entries + * (one per register) that will be walked over at some point in time to apply + * the values to the registers that have matching rules. + */ + +static bool has_samedia(const struct xe_device *xe) +{ + return xe->info.media_verx100 >= 1300; +} + +static bool rule_matches(const struct xe_device *xe, + struct xe_gt *gt, + struct xe_hw_engine *hwe, + const struct xe_rtp_rule *rules, + unsigned int n_rules) +{ + const struct xe_rtp_rule *r; + unsigned int i; + bool match; + + for (r = rules, i = 0; i < n_rules; r = &rules[++i]) { + switch (r->match_type) { + case XE_RTP_MATCH_PLATFORM: + match = xe->info.platform == r->platform; + break; + case XE_RTP_MATCH_SUBPLATFORM: + match = xe->info.platform == r->platform && + xe->info.subplatform == r->subplatform; + break; + case XE_RTP_MATCH_GRAPHICS_VERSION: + match = xe->info.graphics_verx100 == r->ver_start && + (!has_samedia(xe) || !xe_gt_is_media_type(gt)); + break; + case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE: + match = xe->info.graphics_verx100 >= r->ver_start && + xe->info.graphics_verx100 <= r->ver_end && + (!has_samedia(xe) || !xe_gt_is_media_type(gt)); + break; + case XE_RTP_MATCH_GRAPHICS_STEP: + match = xe->info.step.graphics >= r->step_start && + xe->info.step.graphics < r->step_end && + (!has_samedia(xe) || !xe_gt_is_media_type(gt)); + break; + case XE_RTP_MATCH_MEDIA_VERSION: + match = xe->info.media_verx100 == r->ver_start && + (!has_samedia(xe) || xe_gt_is_media_type(gt)); + break; + case XE_RTP_MATCH_MEDIA_VERSION_RANGE: + match = xe->info.media_verx100 >= r->ver_start && + xe->info.media_verx100 <= r->ver_end && + (!has_samedia(xe) || xe_gt_is_media_type(gt)); + break; + case XE_RTP_MATCH_MEDIA_STEP: + match = xe->info.step.media >= r->step_start && + xe->info.step.media < r->step_end && + (!has_samedia(xe) || xe_gt_is_media_type(gt)); + break; + case XE_RTP_MATCH_INTEGRATED: + match = !xe->info.is_dgfx; + break; + case XE_RTP_MATCH_DISCRETE: + match = xe->info.is_dgfx; + break; + case XE_RTP_MATCH_ENGINE_CLASS: + if (drm_WARN_ON(&xe->drm, !hwe)) + return false; + + match = hwe->class == r->engine_class; + break; + case XE_RTP_MATCH_NOT_ENGINE_CLASS: + if (drm_WARN_ON(&xe->drm, !hwe)) + return false; + + match = hwe->class != r->engine_class; + break; + case XE_RTP_MATCH_FUNC: + match = r->match_func(gt, hwe); + break; + default: + drm_warn(&xe->drm, "Invalid RTP match %u\n", + r->match_type); + match = false; + } + + if (!match) + return false; + } + + return true; +} + +static void rtp_add_sr_entry(const struct xe_rtp_action *action, + struct xe_gt *gt, + u32 mmio_base, + struct xe_reg_sr *sr) +{ + struct xe_reg_sr_entry sr_entry = { + .reg = action->reg, + .clr_bits = action->clr_bits, + .set_bits = action->set_bits, + .read_mask = action->read_mask, + }; + + sr_entry.reg.addr += mmio_base; + xe_reg_sr_add(sr, &sr_entry, gt); +} + +static bool rtp_process_one_sr(const struct xe_rtp_entry_sr *entry, + struct xe_device *xe, struct xe_gt *gt, + struct xe_hw_engine *hwe, struct xe_reg_sr *sr) +{ + const struct xe_rtp_action *action; + u32 mmio_base; + unsigned int i; + + if (!rule_matches(xe, gt, hwe, entry->rules, entry->n_rules)) + return false; + + for (i = 0, action = &entry->actions[0]; i < entry->n_actions; action++, i++) { + if ((entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) || + (action->flags & XE_RTP_ACTION_FLAG_ENGINE_BASE)) + mmio_base = hwe->mmio_base; + else + mmio_base = 0; + + rtp_add_sr_entry(action, gt, mmio_base, sr); + } + + return true; +} + +static void rtp_get_context(struct xe_rtp_process_ctx *ctx, + struct xe_hw_engine **hwe, + struct xe_gt **gt, + struct xe_device **xe) +{ + switch (ctx->type) { + case XE_RTP_PROCESS_TYPE_GT: + *hwe = NULL; + *gt = ctx->gt; + *xe = gt_to_xe(*gt); + break; + case XE_RTP_PROCESS_TYPE_ENGINE: + *hwe = ctx->hwe; + *gt = (*hwe)->gt; + *xe = gt_to_xe(*gt); + break; + }; +} + +/** + * xe_rtp_process_ctx_enable_active_tracking - Enable tracking of active entries + * + * Set additional metadata to track what entries are considered "active", i.e. + * their rules match the condition. Bits are never cleared: entries with + * matching rules set the corresponding bit in the bitmap. + * + * @ctx: The context for processing the table + * @active_entries: bitmap to store the active entries + * @n_entries: number of entries to be processed + */ +void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx, + unsigned long *active_entries, + size_t n_entries) +{ + ctx->active_entries = active_entries; + ctx->n_entries = n_entries; +} + +static void rtp_mark_active(struct xe_device *xe, + struct xe_rtp_process_ctx *ctx, + unsigned int first, unsigned int last) +{ + if (!ctx->active_entries) + return; + + if (drm_WARN_ON(&xe->drm, last > ctx->n_entries)) + return; + + if (first == last) + bitmap_set(ctx->active_entries, first, 1); + else + bitmap_set(ctx->active_entries, first, last - first + 2); +} + +/** + * xe_rtp_process_to_sr - Process all rtp @entries, adding the matching ones to + * the save-restore argument. + * @ctx: The context for processing the table, with one of device, gt or hwe + * @entries: Table with RTP definitions + * @sr: Save-restore struct where matching rules execute the action. This can be + * viewed as the "coalesced view" of multiple the tables. The bits for each + * register set are expected not to collide with previously added entries + * + * Walk the table pointed by @entries (with an empty sentinel) and add all + * entries with matching rules to @sr. If @hwe is not NULL, its mmio_base is + * used to calculate the right register offset + */ +void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, + const struct xe_rtp_entry_sr *entries, + struct xe_reg_sr *sr) +{ + const struct xe_rtp_entry_sr *entry; + struct xe_hw_engine *hwe = NULL; + struct xe_gt *gt = NULL; + struct xe_device *xe = NULL; + + rtp_get_context(ctx, &hwe, >, &xe); + + for (entry = entries; entry && entry->name; entry++) { + bool match = false; + + if (entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) { + struct xe_hw_engine *each_hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(each_hwe, gt, id) + match |= rtp_process_one_sr(entry, xe, gt, + each_hwe, sr); + } else { + match = rtp_process_one_sr(entry, xe, gt, hwe, sr); + } + + if (match) + rtp_mark_active(xe, ctx, entry - entries, + entry - entries); + } +} +EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_to_sr); + +/** + * xe_rtp_process - Process all rtp @entries, without running any action + * @ctx: The context for processing the table, with one of device, gt or hwe + * @entries: Table with RTP definitions + * + * Walk the table pointed by @entries (with an empty sentinel), executing the + * rules. A few differences from xe_rtp_process_to_sr(): + * + * 1. There is no action associated with each entry since this uses + * struct xe_rtp_entry. Its main use is for marking active workarounds via + * xe_rtp_process_ctx_enable_active_tracking(). + * 2. There is support for OR operations by having entries with no name. + */ +void xe_rtp_process(struct xe_rtp_process_ctx *ctx, + const struct xe_rtp_entry *entries) +{ + const struct xe_rtp_entry *entry, *first_entry; + struct xe_hw_engine *hwe; + struct xe_gt *gt; + struct xe_device *xe; + + rtp_get_context(ctx, &hwe, >, &xe); + + first_entry = entries; + if (drm_WARN_ON(&xe->drm, !first_entry->name)) + return; + + for (entry = entries; entry && entry->rules; entry++) { + if (entry->name) + first_entry = entry; + + if (!rule_matches(xe, gt, hwe, entry->rules, entry->n_rules)) + continue; + + /* Fast-forward entry, eliminating the OR'ed entries */ + for (entry++; entry && entry->rules; entry++) + if (entry->name) + break; + entry--; + + rtp_mark_active(xe, ctx, first_entry - entries, + entry - entries); + } +} + +bool xe_rtp_match_even_instance(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + return hwe->instance % 2 == 0; +} + +bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + u64 render_compute_mask = gt->info.engine_mask & + (XE_HW_ENGINE_CCS_MASK | XE_HW_ENGINE_RCS_MASK); + + return render_compute_mask && + hwe->engine_id == __ffs(render_compute_mask); +} + +bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + unsigned int dss_per_gslice = 4; + unsigned int dss; + + if (drm_WARN(>_to_xe(gt)->drm, xe_dss_mask_empty(gt->fuse_topo.g_dss_mask), + "Checking gslice for platform without geometry pipeline\n")) + return false; + + dss = xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0); + + return dss >= dss_per_gslice; +} diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h new file mode 100644 index 000000000000..c56fedd126e6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -0,0 +1,430 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_RTP_ +#define _XE_RTP_ + +#include <linux/types.h> +#include <linux/xarray.h> + +#define _XE_RTP_INCLUDE_PRIVATE_HELPERS + +#include "xe_rtp_helpers.h" +#include "xe_rtp_types.h" + +#undef _XE_RTP_INCLUDE_PRIVATE_HELPERS + +/* + * Register table poke infrastructure + */ + +struct xe_hw_engine; +struct xe_gt; +struct xe_reg_sr; + +/* + * Macros to encode rules to match against platform, IP version, stepping, etc. + * Shouldn't be used directly - see XE_RTP_RULES() + */ +#define _XE_RTP_RULE_PLATFORM(plat__) \ + { .match_type = XE_RTP_MATCH_PLATFORM, .platform = plat__ } + +#define _XE_RTP_RULE_SUBPLATFORM(plat__, sub__) \ + { .match_type = XE_RTP_MATCH_SUBPLATFORM, \ + .platform = plat__, .subplatform = sub__ } + +#define _XE_RTP_RULE_GRAPHICS_STEP(start__, end__) \ + { .match_type = XE_RTP_MATCH_GRAPHICS_STEP, \ + .step_start = start__, .step_end = end__ } + +#define _XE_RTP_RULE_MEDIA_STEP(start__, end__) \ + { .match_type = XE_RTP_MATCH_MEDIA_STEP, \ + .step_start = start__, .step_end = end__ } + +#define _XE_RTP_RULE_ENGINE_CLASS(cls__) \ + { .match_type = XE_RTP_MATCH_ENGINE_CLASS, \ + .engine_class = (cls__) } + +/** + * XE_RTP_RULE_PLATFORM - Create rule matching platform + * @plat_: platform to match + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_PLATFORM(plat_) \ + _XE_RTP_RULE_PLATFORM(XE_##plat_) + +/** + * XE_RTP_RULE_SUBPLATFORM - Create rule matching platform and sub-platform + * @plat_: platform to match + * @sub_: sub-platform to match + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_SUBPLATFORM(plat_, sub_) \ + _XE_RTP_RULE_SUBPLATFORM(XE_##plat_, XE_SUBPLATFORM_##plat_##_##sub_) + +/** + * XE_RTP_RULE_GRAPHICS_STEP - Create rule matching graphics stepping + * @start_: First stepping matching the rule + * @end_: First stepping that does not match the rule + * + * Note that the range matching this rule is [ @start_, @end_ ), i.e. inclusive + * on the left, exclusive on the right. + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_GRAPHICS_STEP(start_, end_) \ + _XE_RTP_RULE_GRAPHICS_STEP(STEP_##start_, STEP_##end_) + +/** + * XE_RTP_RULE_MEDIA_STEP - Create rule matching media stepping + * @start_: First stepping matching the rule + * @end_: First stepping that does not match the rule + * + * Note that the range matching this rule is [ @start_, @end_ ), i.e. inclusive + * on the left, exclusive on the right. + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_MEDIA_STEP(start_, end_) \ + _XE_RTP_RULE_MEDIA_STEP(STEP_##start_, STEP_##end_) + +/** + * XE_RTP_RULE_ENGINE_CLASS - Create rule matching an engine class + * @cls_: Engine class to match + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_ENGINE_CLASS(cls_) \ + _XE_RTP_RULE_ENGINE_CLASS(XE_ENGINE_CLASS_##cls_) + +/** + * XE_RTP_RULE_FUNC - Create rule using callback function for match + * @func__: Function to call to decide if rule matches + * + * This allows more complex checks to be performed. The ``XE_RTP`` + * infrastructure will simply call the function @func_ passed to decide if this + * rule matches the device. + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_FUNC(func__) \ + { .match_type = XE_RTP_MATCH_FUNC, \ + .match_func = (func__) } + +/** + * XE_RTP_RULE_GRAPHICS_VERSION - Create rule matching graphics version + * @ver__: Graphics IP version to match + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_GRAPHICS_VERSION(ver__) \ + { .match_type = XE_RTP_MATCH_GRAPHICS_VERSION, \ + .ver_start = ver__, } + +/** + * XE_RTP_RULE_GRAPHICS_VERSION_RANGE - Create rule matching a range of graphics version + * @ver_start__: First graphics IP version to match + * @ver_end__: Last graphics IP version to match + * + * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e. + * inclusive on boths sides + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_GRAPHICS_VERSION_RANGE(ver_start__, ver_end__) \ + { .match_type = XE_RTP_MATCH_GRAPHICS_VERSION_RANGE, \ + .ver_start = ver_start__, .ver_end = ver_end__, } + +/** + * XE_RTP_RULE_MEDIA_VERSION - Create rule matching media version + * @ver__: Graphics IP version to match + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_MEDIA_VERSION(ver__) \ + { .match_type = XE_RTP_MATCH_MEDIA_VERSION, \ + .ver_start = ver__, } + +/** + * XE_RTP_RULE_MEDIA_VERSION_RANGE - Create rule matching a range of media version + * @ver_start__: First media IP version to match + * @ver_end__: Last media IP version to match + * + * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e. + * inclusive on boths sides + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_MEDIA_VERSION_RANGE(ver_start__, ver_end__) \ + { .match_type = XE_RTP_MATCH_MEDIA_VERSION_RANGE, \ + .ver_start = ver_start__, .ver_end = ver_end__, } + +/** + * XE_RTP_RULE_IS_INTEGRATED - Create a rule matching integrated graphics devices + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_IS_INTEGRATED \ + { .match_type = XE_RTP_MATCH_INTEGRATED } + +/** + * XE_RTP_RULE_IS_DISCRETE - Create a rule matching discrete graphics devices + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_IS_DISCRETE \ + { .match_type = XE_RTP_MATCH_DISCRETE } + +/** + * XE_RTP_ACTION_WR - Helper to write a value to the register, overriding all + * the bits + * @reg_: Register + * @val_: Value to set + * @...: Additional fields to override in the struct xe_rtp_action entry + * + * The correspondent notation in bspec is: + * + * REGNAME = VALUE + */ +#define XE_RTP_ACTION_WR(reg_, val_, ...) \ + { .reg = XE_RTP_DROP_CAST(reg_), \ + .clr_bits = ~0u, .set_bits = (val_), \ + .read_mask = (~0u), ##__VA_ARGS__ } + +/** + * XE_RTP_ACTION_SET - Set bits from @val_ in the register. + * @reg_: Register + * @val_: Bits to set in the register + * @...: Additional fields to override in the struct xe_rtp_action entry + * + * For masked registers this translates to a single write, while for other + * registers it's a RMW. The correspondent bspec notation is (example for bits 2 + * and 5, but could be any): + * + * REGNAME[2] = 1 + * REGNAME[5] = 1 + */ +#define XE_RTP_ACTION_SET(reg_, val_, ...) \ + { .reg = XE_RTP_DROP_CAST(reg_), \ + .clr_bits = val_, .set_bits = val_, \ + .read_mask = val_, ##__VA_ARGS__ } + +/** + * XE_RTP_ACTION_CLR: Clear bits from @val_ in the register. + * @reg_: Register + * @val_: Bits to clear in the register + * @...: Additional fields to override in the struct xe_rtp_action entry + * + * For masked registers this translates to a single write, while for other + * registers it's a RMW. The correspondent bspec notation is (example for bits 2 + * and 5, but could be any): + * + * REGNAME[2] = 0 + * REGNAME[5] = 0 + */ +#define XE_RTP_ACTION_CLR(reg_, val_, ...) \ + { .reg = XE_RTP_DROP_CAST(reg_), \ + .clr_bits = val_, .set_bits = 0, \ + .read_mask = val_, ##__VA_ARGS__ } + +/** + * XE_RTP_ACTION_FIELD_SET: Set a bit range + * @reg_: Register + * @mask_bits_: Mask of bits to be changed in the register, forming a field + * @val_: Value to set in the field denoted by @mask_bits_ + * @...: Additional fields to override in the struct xe_rtp_action entry + * + * For masked registers this translates to a single write, while for other + * registers it's a RMW. The correspondent bspec notation is: + * + * REGNAME[<end>:<start>] = VALUE + */ +#define XE_RTP_ACTION_FIELD_SET(reg_, mask_bits_, val_, ...) \ + { .reg = XE_RTP_DROP_CAST(reg_), \ + .clr_bits = mask_bits_, .set_bits = val_, \ + .read_mask = mask_bits_, ##__VA_ARGS__ } + +#define XE_RTP_ACTION_FIELD_SET_NO_READ_MASK(reg_, mask_bits_, val_, ...) \ + { .reg = XE_RTP_DROP_CAST(reg_), \ + .clr_bits = (mask_bits_), .set_bits = (val_), \ + .read_mask = 0, ##__VA_ARGS__ } + +/** + * XE_RTP_ACTION_WHITELIST - Add register to userspace whitelist + * @reg_: Register + * @val_: Whitelist-specific flags to set + * @...: Additional fields to override in the struct xe_rtp_action entry + * + * Add a register to the whitelist, allowing userspace to modify the ster with + * regular user privileges. + */ +#define XE_RTP_ACTION_WHITELIST(reg_, val_, ...) \ + /* TODO fail build if ((flags) & ~(RING_FORCE_TO_NONPRIV_MASK_VALID)) */\ + { .reg = XE_RTP_DROP_CAST(reg_), \ + .set_bits = val_, \ + .clr_bits = RING_FORCE_TO_NONPRIV_MASK_VALID, \ + ##__VA_ARGS__ } + +/** + * XE_RTP_NAME - Helper to set the name in xe_rtp_entry + * @s_: Name describing this rule, often a HW-specific number + * + * TODO: maybe move this behind a debug config? + */ +#define XE_RTP_NAME(s_) .name = (s_) + +/** + * XE_RTP_ENTRY_FLAG - Helper to add multiple flags to a struct xe_rtp_entry_sr + * @...: Entry flags, without the ``XE_RTP_ENTRY_FLAG_`` prefix + * + * Helper to automatically add a ``XE_RTP_ENTRY_FLAG_`` prefix to the flags + * when defining struct xe_rtp_entry entries. Example: + * + * .. code-block:: c + * + * const struct xe_rtp_entry_sr wa_entries[] = { + * ... + * { XE_RTP_NAME("test-entry"), + * ... + * XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + * ... + * }, + * ... + * }; + */ +#define XE_RTP_ENTRY_FLAG(...) \ + .flags = (XE_RTP_PASTE_FOREACH(ENTRY_FLAG_, BITWISE_OR, (__VA_ARGS__))) + +/** + * XE_RTP_ACTION_FLAG - Helper to add multiple flags to a struct xe_rtp_action + * @...: Action flags, without the ``XE_RTP_ACTION_FLAG_`` prefix + * + * Helper to automatically add a ``XE_RTP_ACTION_FLAG_`` prefix to the flags + * when defining struct xe_rtp_action entries. Example: + * + * .. code-block:: c + * + * const struct xe_rtp_entry_sr wa_entries[] = { + * ... + * { XE_RTP_NAME("test-entry"), + * ... + * XE_RTP_ACTION_SET(..., XE_RTP_ACTION_FLAG(FOREACH_ENGINE)), + * ... + * }, + * ... + * }; + */ +#define XE_RTP_ACTION_FLAG(...) \ + .flags = (XE_RTP_PASTE_FOREACH(ACTION_FLAG_, BITWISE_OR, (__VA_ARGS__))) + +/** + * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry_sr entry + * @...: Rules + * + * At least one rule is needed and up to 4 are supported. Multiple rules are + * AND'ed together, i.e. all the rules must evaluate to true for the entry to + * be processed. See XE_RTP_MATCH_* for the possible match rules. Example: + * + * .. code-block:: c + * + * const struct xe_rtp_entry_sr wa_entries[] = { + * ... + * { XE_RTP_NAME("test-entry"), + * XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), + * ... + * }, + * ... + * }; + */ +#define XE_RTP_RULES(...) \ + .n_rules = _XE_COUNT_ARGS(__VA_ARGS__), \ + .rules = (const struct xe_rtp_rule[]) { \ + XE_RTP_PASTE_FOREACH(RULE_, COMMA, (__VA_ARGS__)) \ + } + +/** + * XE_RTP_ACTIONS - Helper to set multiple actions to a struct xe_rtp_entry_sr + * @...: Actions to be taken + * + * At least one action is needed and up to 4 are supported. See XE_RTP_ACTION_* + * for the possible actions. Example: + * + * .. code-block:: c + * + * const struct xe_rtp_entry_sr wa_entries[] = { + * ... + * { XE_RTP_NAME("test-entry"), + * XE_RTP_RULES(...), + * XE_RTP_ACTIONS(SET(..), SET(...), CLR(...)), + * ... + * }, + * ... + * }; + */ +#define XE_RTP_ACTIONS(...) \ + .n_actions = _XE_COUNT_ARGS(__VA_ARGS__), \ + .actions = (const struct xe_rtp_action[]) { \ + XE_RTP_PASTE_FOREACH(ACTION_, COMMA, (__VA_ARGS__)) \ + } + +#define XE_RTP_PROCESS_CTX_INITIALIZER(arg__) _Generic((arg__), \ + struct xe_hw_engine * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE }, \ + struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }) + +void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx, + unsigned long *active_entries, + size_t n_entries); + +void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, + const struct xe_rtp_entry_sr *entries, + struct xe_reg_sr *sr); + +void xe_rtp_process(struct xe_rtp_process_ctx *ctx, + const struct xe_rtp_entry *entries); + +/* Match functions to be used with XE_RTP_MATCH_FUNC */ + +/** + * xe_rtp_match_even_instance - Match if engine instance is even + * @gt: GT structure + * @hwe: Engine instance + * + * Returns: true if engine instance is even, false otherwise + */ +bool xe_rtp_match_even_instance(const struct xe_gt *gt, + const struct xe_hw_engine *hwe); + +/* + * xe_rtp_match_first_render_or_compute - Match if it's first render or compute + * engine in the GT + * + * @gt: GT structure + * @hwe: Engine instance + * + * Registers on the render reset domain need to have their values re-applied + * when any of those engines are reset. Since the engines reset together, a + * programming can be set to just one of them. For simplicity the first engine + * of either render or compute class can be chosen. + * + * Returns: true if engine id is the first to match the render reset domain, + * false otherwise. + */ +bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, + const struct xe_hw_engine *hwe); + +/* + * xe_rtp_match_first_gslice_fused_off - Match when first gslice is fused off + * + * @gt: GT structure + * @hwe: Engine instance + * + * Returns: true if first gslice is fused off, false otherwise. + */ +bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, + const struct xe_hw_engine *hwe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_rtp_helpers.h b/drivers/gpu/drm/xe/xe_rtp_helpers.h new file mode 100644 index 000000000000..181b6290fac3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_rtp_helpers.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_RTP_HELPERS_ +#define _XE_RTP_HELPERS_ + +#ifndef _XE_RTP_INCLUDE_PRIVATE_HELPERS +#error "This header is supposed to be included by xe_rtp.h only" +#endif + +/* + * Helper macros - not to be used outside this header. + */ +#define _XE_ESC(...) __VA_ARGS__ +#define _XE_COUNT_ARGS(...) _XE_ESC(__XE_COUNT_ARGS(__VA_ARGS__, 5, 4, 3, 2, 1,)) +#define __XE_COUNT_ARGS(_, _5, _4, _3, _2, X_, ...) X_ + +#define _XE_FIRST(...) _XE_ESC(__XE_FIRST(__VA_ARGS__,)) +#define __XE_FIRST(x_, ...) x_ +#define _XE_TUPLE_TAIL(...) _XE_ESC(__XE_TUPLE_TAIL(__VA_ARGS__)) +#define __XE_TUPLE_TAIL(x_, ...) (__VA_ARGS__) + +#define _XE_DROP_FIRST(x_, ...) __VA_ARGS__ + +#define _XE_RTP_CONCAT(a, b) __XE_RTP_CONCAT(a, b) +#define __XE_RTP_CONCAT(a, b) XE_RTP_ ## a ## b + +#define __XE_RTP_PASTE_SEP_COMMA , +#define __XE_RTP_PASTE_SEP_BITWISE_OR | + +/* + * XE_RTP_PASTE_FOREACH - Paste XE_RTP_<@prefix_> on each element of the tuple + * @args, with the end result separated by @sep_. @sep must be one of the + * previously declared macros __XE_RTP_PASTE_SEP_*, or declared with such + * prefix. + * + * Examples: + * + * 1) XE_RTP_PASTE_FOREACH(TEST_, COMMA, (FOO, BAR)) + * expands to: + * + * XE_RTP_TEST_FOO , XE_RTP_TEST_BAR + * + * 2) XE_RTP_PASTE_FOREACH(TEST2_, COMMA, (FOO)) + * expands to: + * + * XE_RTP_TEST2_FOO + * + * 3) XE_RTP_PASTE_FOREACH(TEST3, BITWISE_OR, (FOO, BAR)) + * expands to: + * + * XE_RTP_TEST3_FOO | XE_RTP_TEST3_BAR + * + * 4) #define __XE_RTP_PASTE_SEP_MY_SEP BANANA + * XE_RTP_PASTE_FOREACH(TEST_, MY_SEP, (FOO, BAR)) + * expands to: + * + * XE_RTP_TEST_FOO BANANA XE_RTP_TEST_BAR + */ +#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_ESC(_XE_RTP_CONCAT(PASTE_, _XE_COUNT_ARGS args_)(prefix_, sep_, args_)) +#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) +#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_) + +/* + * XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer + * + * Example: + * + * #define foo(a_) ((struct foo){ .a = a_ }) + * XE_RTP_DROP_CAST(foo(10)) + * expands to: + * + * { .a = 10 } + */ +#define XE_RTP_DROP_CAST(...) _XE_ESC(_XE_DROP_FIRST _XE_ESC __VA_ARGS__) + +#endif diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h new file mode 100644 index 000000000000..637acc7626a4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_RTP_TYPES_ +#define _XE_RTP_TYPES_ + +#include <linux/types.h> + +#include "regs/xe_reg_defs.h" + +struct xe_hw_engine; +struct xe_gt; + +/** + * struct xe_rtp_action - action to take for any matching rule + * + * This struct records what action should be taken in a register that has a + * matching rule. Example of actions: set/clear bits. + */ +struct xe_rtp_action { + /** @reg: Register */ + struct xe_reg reg; + /** + * @clr_bits: bits to clear when updating register. It's always a + * superset of bits being modified + */ + u32 clr_bits; + /** @set_bits: bits to set when updating register */ + u32 set_bits; +#define XE_RTP_NOCHECK .read_mask = 0 + /** @read_mask: mask for bits to consider when reading value back */ + u32 read_mask; +#define XE_RTP_ACTION_FLAG_ENGINE_BASE BIT(0) + /** @flags: flags to apply on rule evaluation or action */ + u8 flags; +}; + +enum { + XE_RTP_MATCH_PLATFORM, + XE_RTP_MATCH_SUBPLATFORM, + XE_RTP_MATCH_GRAPHICS_VERSION, + XE_RTP_MATCH_GRAPHICS_VERSION_RANGE, + XE_RTP_MATCH_GRAPHICS_STEP, + XE_RTP_MATCH_MEDIA_VERSION, + XE_RTP_MATCH_MEDIA_VERSION_RANGE, + XE_RTP_MATCH_MEDIA_STEP, + XE_RTP_MATCH_INTEGRATED, + XE_RTP_MATCH_DISCRETE, + XE_RTP_MATCH_ENGINE_CLASS, + XE_RTP_MATCH_NOT_ENGINE_CLASS, + XE_RTP_MATCH_FUNC, +}; + +/** struct xe_rtp_rule - match rule for processing entry */ +struct xe_rtp_rule { + u8 match_type; + + /* match filters */ + union { + /* MATCH_PLATFORM / MATCH_SUBPLATFORM */ + struct { + u8 platform; + u8 subplatform; + }; + /* + * MATCH_GRAPHICS_VERSION / XE_RTP_MATCH_GRAPHICS_VERSION_RANGE / + * MATCH_MEDIA_VERSION / XE_RTP_MATCH_MEDIA_VERSION_RANGE + */ + struct { + u32 ver_start; +#define XE_RTP_END_VERSION_UNDEFINED U32_MAX + u32 ver_end; + }; + /* MATCH_STEP */ + struct { + u8 step_start; + u8 step_end; + }; + /* MATCH_ENGINE_CLASS / MATCH_NOT_ENGINE_CLASS */ + struct { + u8 engine_class; + }; + /* MATCH_FUNC */ + bool (*match_func)(const struct xe_gt *gt, + const struct xe_hw_engine *hwe); + }; +}; + +/** struct xe_rtp_entry_sr - Entry in an rtp table */ +struct xe_rtp_entry_sr { + const char *name; + const struct xe_rtp_action *actions; + const struct xe_rtp_rule *rules; + u8 n_rules; + u8 n_actions; +#define XE_RTP_ENTRY_FLAG_FOREACH_ENGINE BIT(0) + u8 flags; +}; + +/** struct xe_rtp_entry - Entry in an rtp table, with no action associated */ +struct xe_rtp_entry { + const char *name; + const struct xe_rtp_rule *rules; + u8 n_rules; +}; + +enum xe_rtp_process_type { + XE_RTP_PROCESS_TYPE_GT, + XE_RTP_PROCESS_TYPE_ENGINE, +}; + +struct xe_rtp_process_ctx { + union { + struct xe_gt *gt; + struct xe_hw_engine *hwe; + }; + enum xe_rtp_process_type type; + unsigned long *active_entries; + size_t n_entries; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c new file mode 100644 index 000000000000..2c4632259edd --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_sa.h" + +#include <linux/kernel.h> + +#include <drm/drm_managed.h> + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_map.h" + +static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg) +{ + struct xe_sa_manager *sa_manager = arg; + struct xe_bo *bo = sa_manager->bo; + + if (!bo) { + drm_err(drm, "no bo for sa manager\n"); + return; + } + + drm_suballoc_manager_fini(&sa_manager->base); + + if (bo->vmap.is_iomem) + kvfree(sa_manager->cpu_ptr); + + xe_bo_unpin_map_no_vm(bo); + sa_manager->bo = NULL; +} + +struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align) +{ + struct xe_device *xe = tile_to_xe(tile); + u32 managed_size = size - SZ_4K; + struct xe_bo *bo; + int ret; + + struct xe_sa_manager *sa_manager = drmm_kzalloc(&tile_to_xe(tile)->drm, + sizeof(*sa_manager), + GFP_KERNEL); + if (!sa_manager) + return ERR_PTR(-ENOMEM); + + sa_manager->bo = NULL; + + bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) { + drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n", + PTR_ERR(bo)); + return (struct xe_sa_manager *)bo; + } + sa_manager->bo = bo; + + drm_suballoc_manager_init(&sa_manager->base, managed_size, align); + sa_manager->gpu_addr = xe_bo_ggtt_addr(bo); + + if (bo->vmap.is_iomem) { + sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL); + if (!sa_manager->cpu_ptr) { + xe_bo_unpin_map_no_vm(sa_manager->bo); + sa_manager->bo = NULL; + return ERR_PTR(-ENOMEM); + } + } else { + sa_manager->cpu_ptr = bo->vmap.vaddr; + memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size); + } + + ret = drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini, + sa_manager); + if (ret) + return ERR_PTR(ret); + + return sa_manager; +} + +struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, + unsigned int size) +{ + return drm_suballoc_new(&sa_manager->base, size, GFP_KERNEL, true, 0); +} + +void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo) +{ + struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager); + struct xe_device *xe = tile_to_xe(sa_manager->bo->tile); + + if (!sa_manager->bo->vmap.is_iomem) + return; + + xe_map_memcpy_to(xe, &sa_manager->bo->vmap, drm_suballoc_soffset(sa_bo), + xe_sa_bo_cpu_addr(sa_bo), + drm_suballoc_size(sa_bo)); +} + +void xe_sa_bo_free(struct drm_suballoc *sa_bo, + struct dma_fence *fence) +{ + drm_suballoc_free(sa_bo, fence); +} diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h new file mode 100644 index 000000000000..4e96483057d7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sa.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ +#ifndef _XE_SA_H_ +#define _XE_SA_H_ + +#include "xe_sa_types.h" + +struct dma_fence; +struct xe_bo; +struct xe_tile; + +struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align); + +struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, + u32 size); +void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo); +void xe_sa_bo_free(struct drm_suballoc *sa_bo, + struct dma_fence *fence); + +static inline struct xe_sa_manager * +to_xe_sa_manager(struct drm_suballoc_manager *mng) +{ + return container_of(mng, struct xe_sa_manager, base); +} + +static inline u64 xe_sa_bo_gpu_addr(struct drm_suballoc *sa) +{ + return to_xe_sa_manager(sa->manager)->gpu_addr + + drm_suballoc_soffset(sa); +} + +static inline void *xe_sa_bo_cpu_addr(struct drm_suballoc *sa) +{ + return to_xe_sa_manager(sa->manager)->cpu_ptr + + drm_suballoc_soffset(sa); +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h new file mode 100644 index 000000000000..2ef896aeca1d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sa_types.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ +#ifndef _XE_SA_TYPES_H_ +#define _XE_SA_TYPES_H_ + +#include <drm/drm_suballoc.h> + +struct xe_bo; + +struct xe_sa_manager { + struct drm_suballoc_manager base; + struct xe_bo *bo; + u64 gpu_addr; + void *cpu_ptr; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c new file mode 100644 index 000000000000..01106a1156ad --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_sched_job.h" + +#include <linux/dma-fence-array.h> +#include <linux/slab.h> + +#include "xe_device.h" +#include "xe_exec_queue.h" +#include "xe_gt.h" +#include "xe_hw_engine_types.h" +#include "xe_hw_fence.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_trace.h" +#include "xe_vm.h" + +static struct kmem_cache *xe_sched_job_slab; +static struct kmem_cache *xe_sched_job_parallel_slab; + +int __init xe_sched_job_module_init(void) +{ + xe_sched_job_slab = + kmem_cache_create("xe_sched_job", + sizeof(struct xe_sched_job) + + sizeof(u64), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!xe_sched_job_slab) + return -ENOMEM; + + xe_sched_job_parallel_slab = + kmem_cache_create("xe_sched_job_parallel", + sizeof(struct xe_sched_job) + + sizeof(u64) * + XE_HW_ENGINE_MAX_INSTANCE, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!xe_sched_job_parallel_slab) { + kmem_cache_destroy(xe_sched_job_slab); + return -ENOMEM; + } + + return 0; +} + +void xe_sched_job_module_exit(void) +{ + kmem_cache_destroy(xe_sched_job_slab); + kmem_cache_destroy(xe_sched_job_parallel_slab); +} + +static struct xe_sched_job *job_alloc(bool parallel) +{ + return kmem_cache_zalloc(parallel ? xe_sched_job_parallel_slab : + xe_sched_job_slab, GFP_KERNEL); +} + +bool xe_sched_job_is_migration(struct xe_exec_queue *q) +{ + return q->vm && (q->vm->flags & XE_VM_FLAG_MIGRATION); +} + +static void job_free(struct xe_sched_job *job) +{ + struct xe_exec_queue *q = job->q; + bool is_migration = xe_sched_job_is_migration(q); + + kmem_cache_free(xe_exec_queue_is_parallel(job->q) || is_migration ? + xe_sched_job_parallel_slab : xe_sched_job_slab, job); +} + +static struct xe_device *job_to_xe(struct xe_sched_job *job) +{ + return gt_to_xe(job->q->gt); +} + +struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q, + u64 *batch_addr) +{ + struct xe_sched_job *job; + struct dma_fence **fences; + bool is_migration = xe_sched_job_is_migration(q); + int err; + int i, j; + u32 width; + + /* only a kernel context can submit a vm-less job */ + XE_WARN_ON(!q->vm && !(q->flags & EXEC_QUEUE_FLAG_KERNEL)); + + /* Migration and kernel engines have their own locking */ + if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { + lockdep_assert_held(&q->vm->lock); + if (!xe_vm_in_lr_mode(q->vm)) + xe_vm_assert_held(q->vm); + } + + job = job_alloc(xe_exec_queue_is_parallel(q) || is_migration); + if (!job) + return ERR_PTR(-ENOMEM); + + job->q = q; + kref_init(&job->refcount); + xe_exec_queue_get(job->q); + + err = drm_sched_job_init(&job->drm, q->entity, 1, NULL); + if (err) + goto err_free; + + if (!xe_exec_queue_is_parallel(q)) { + job->fence = xe_lrc_create_seqno_fence(q->lrc); + if (IS_ERR(job->fence)) { + err = PTR_ERR(job->fence); + goto err_sched_job; + } + } else { + struct dma_fence_array *cf; + + fences = kmalloc_array(q->width, sizeof(*fences), GFP_KERNEL); + if (!fences) { + err = -ENOMEM; + goto err_sched_job; + } + + for (j = 0; j < q->width; ++j) { + fences[j] = xe_lrc_create_seqno_fence(q->lrc + j); + if (IS_ERR(fences[j])) { + err = PTR_ERR(fences[j]); + goto err_fences; + } + } + + cf = dma_fence_array_create(q->width, fences, + q->parallel.composite_fence_ctx, + q->parallel.composite_fence_seqno++, + false); + if (!cf) { + --q->parallel.composite_fence_seqno; + err = -ENOMEM; + goto err_fences; + } + + /* Sanity check */ + for (j = 0; j < q->width; ++j) + xe_assert(job_to_xe(job), cf->base.seqno == fences[j]->seqno); + + job->fence = &cf->base; + } + + width = q->width; + if (is_migration) + width = 2; + + for (i = 0; i < width; ++i) + job->batch_addr[i] = batch_addr[i]; + + /* All other jobs require a VM to be open which has a ref */ + if (unlikely(q->flags & EXEC_QUEUE_FLAG_KERNEL)) + xe_device_mem_access_get(job_to_xe(job)); + xe_device_assert_mem_access(job_to_xe(job)); + + trace_xe_sched_job_create(job); + return job; + +err_fences: + for (j = j - 1; j >= 0; --j) { + --q->lrc[j].fence_ctx.next_seqno; + dma_fence_put(fences[j]); + } + kfree(fences); +err_sched_job: + drm_sched_job_cleanup(&job->drm); +err_free: + xe_exec_queue_put(q); + job_free(job); + return ERR_PTR(err); +} + +/** + * xe_sched_job_destroy - Destroy XE schedule job + * @ref: reference to XE schedule job + * + * Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup + * base DRM schedule job, and free memory for XE schedule job. + */ +void xe_sched_job_destroy(struct kref *ref) +{ + struct xe_sched_job *job = + container_of(ref, struct xe_sched_job, refcount); + + if (unlikely(job->q->flags & EXEC_QUEUE_FLAG_KERNEL)) + xe_device_mem_access_put(job_to_xe(job)); + xe_exec_queue_put(job->q); + dma_fence_put(job->fence); + drm_sched_job_cleanup(&job->drm); + job_free(job); +} + +void xe_sched_job_set_error(struct xe_sched_job *job, int error) +{ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) + return; + + dma_fence_set_error(job->fence, error); + + if (dma_fence_is_array(job->fence)) { + struct dma_fence_array *array = + to_dma_fence_array(job->fence); + struct dma_fence **child = array->fences; + unsigned int nchild = array->num_fences; + + do { + struct dma_fence *current_fence = *child++; + + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + ¤t_fence->flags)) + continue; + dma_fence_set_error(current_fence, error); + } while (--nchild); + } + + trace_xe_sched_job_set_error(job); + + dma_fence_enable_sw_signaling(job->fence); + xe_hw_fence_irq_run(job->q->fence_irq); +} + +bool xe_sched_job_started(struct xe_sched_job *job) +{ + struct xe_lrc *lrc = job->q->lrc; + + return !__dma_fence_is_later(xe_sched_job_seqno(job), + xe_lrc_start_seqno(lrc), + job->fence->ops); +} + +bool xe_sched_job_completed(struct xe_sched_job *job) +{ + struct xe_lrc *lrc = job->q->lrc; + + /* + * Can safely check just LRC[0] seqno as that is last seqno written when + * parallel handshake is done. + */ + + return !__dma_fence_is_later(xe_sched_job_seqno(job), xe_lrc_seqno(lrc), + job->fence->ops); +} + +void xe_sched_job_arm(struct xe_sched_job *job) +{ + drm_sched_job_arm(&job->drm); +} + +void xe_sched_job_push(struct xe_sched_job *job) +{ + xe_sched_job_get(job); + trace_xe_sched_job_exec(job); + drm_sched_entity_push_job(&job->drm); + xe_sched_job_put(job); +} + +/** + * xe_sched_job_last_fence_add_dep - Add last fence dependency to job + * @job:job to add the last fence dependency to + * @vm: virtual memory job belongs to + * + * Returns: + * 0 on success, or an error on failing to expand the array. + */ +int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm) +{ + struct dma_fence *fence; + + fence = xe_exec_queue_last_fence_get(job->q, vm); + dma_fence_get(fence); + + return drm_sched_job_add_dependency(&job->drm, fence); +} diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h new file mode 100644 index 000000000000..34f475ba7f50 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sched_job.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_SCHED_JOB_H_ +#define _XE_SCHED_JOB_H_ + +#include "xe_sched_job_types.h" + +struct xe_vm; + +#define XE_SCHED_HANG_LIMIT 1 +#define XE_SCHED_JOB_TIMEOUT LONG_MAX + +int xe_sched_job_module_init(void); +void xe_sched_job_module_exit(void); + +struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q, + u64 *batch_addr); +void xe_sched_job_destroy(struct kref *ref); + +/** + * xe_sched_job_get - get reference to XE schedule job + * @job: XE schedule job object + * + * Increment XE schedule job's reference count + */ +static inline struct xe_sched_job *xe_sched_job_get(struct xe_sched_job *job) +{ + kref_get(&job->refcount); + return job; +} + +/** + * xe_sched_job_put - put reference to XE schedule job + * @job: XE schedule job object + * + * Decrement XE schedule job's reference count, call xe_sched_job_destroy when + * reference count == 0. + */ +static inline void xe_sched_job_put(struct xe_sched_job *job) +{ + kref_put(&job->refcount, xe_sched_job_destroy); +} + +void xe_sched_job_set_error(struct xe_sched_job *job, int error); +static inline bool xe_sched_job_is_error(struct xe_sched_job *job) +{ + return job->fence->error < 0; +} + +bool xe_sched_job_started(struct xe_sched_job *job); +bool xe_sched_job_completed(struct xe_sched_job *job); + +void xe_sched_job_arm(struct xe_sched_job *job); +void xe_sched_job_push(struct xe_sched_job *job); + +int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm); + +static inline struct xe_sched_job * +to_xe_sched_job(struct drm_sched_job *drm) +{ + return container_of(drm, struct xe_sched_job, drm); +} + +static inline u32 xe_sched_job_seqno(struct xe_sched_job *job) +{ + return job->fence->seqno; +} + +static inline void +xe_sched_job_add_migrate_flush(struct xe_sched_job *job, u32 flags) +{ + job->migrate_flush_flags = flags; +} + +bool xe_sched_job_is_migration(struct xe_exec_queue *q); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h new file mode 100644 index 000000000000..71213ba9735b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_SCHED_JOB_TYPES_H_ +#define _XE_SCHED_JOB_TYPES_H_ + +#include <linux/kref.h> + +#include <drm/gpu_scheduler.h> + +struct xe_exec_queue; + +/** + * struct xe_sched_job - XE schedule job (batch buffer tracking) + */ +struct xe_sched_job { + /** @drm: base DRM scheduler job */ + struct drm_sched_job drm; + /** @q: Exec queue */ + struct xe_exec_queue *q; + /** @refcount: ref count of this job */ + struct kref refcount; + /** + * @fence: dma fence to indicate completion. 1 way relationship - job + * can safely reference fence, fence cannot safely reference job. + */ +#define JOB_FLAG_SUBMIT DMA_FENCE_FLAG_USER_BITS + struct dma_fence *fence; + /** @user_fence: write back value when BB is complete */ + struct { + /** @used: user fence is used */ + bool used; + /** @addr: address to write to */ + u64 addr; + /** @value: write back value */ + u64 value; + } user_fence; + /** @migrate_flush_flags: Additional flush flags for migration jobs */ + u32 migrate_flush_flags; + /** @batch_addr: batch buffer address of job */ + u64 batch_addr[]; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c new file mode 100644 index 000000000000..42a0e0c917a0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_assert.h" +#include "xe_sriov.h" + +/** + * xe_sriov_mode_to_string - Convert enum value to string. + * @mode: the &xe_sriov_mode to convert + * + * Returns: SR-IOV mode as a user friendly string. + */ +const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode) +{ + switch (mode) { + case XE_SRIOV_MODE_NONE: + return "none"; + case XE_SRIOV_MODE_PF: + return "SR-IOV PF"; + case XE_SRIOV_MODE_VF: + return "SR-IOV VF"; + default: + return "<invalid>"; + } +} + +/** + * xe_sriov_probe_early - Probe a SR-IOV mode. + * @xe: the &xe_device to probe mode on + * @has_sriov: flag indicating hardware support for SR-IOV + * + * This function should be called only once and as soon as possible during + * driver probe to detect whether we are running a SR-IOV Physical Function + * (PF) or a Virtual Function (VF) device. + * + * SR-IOV PF mode detection is based on PCI @dev_is_pf() function. + * SR-IOV VF mode detection is based on dedicated MMIO register read. + */ +void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov) +{ + enum xe_sriov_mode mode = XE_SRIOV_MODE_NONE; + + /* TODO: replace with proper mode detection */ + xe_assert(xe, !has_sriov); + + xe_assert(xe, !xe->sriov.__mode); + xe->sriov.__mode = mode; + xe_assert(xe, xe->sriov.__mode); + + if (has_sriov) + drm_info(&xe->drm, "Running in %s mode\n", + xe_sriov_mode_to_string(xe_device_sriov_mode(xe))); +} diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h new file mode 100644 index 000000000000..5af73a3172b0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_SRIOV_H_ +#define _XE_SRIOV_H_ + +#include "xe_assert.h" +#include "xe_device_types.h" +#include "xe_sriov_types.h" + +const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode); + +void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov); + +static inline enum xe_sriov_mode xe_device_sriov_mode(struct xe_device *xe) +{ + xe_assert(xe, xe->sriov.__mode); + return xe->sriov.__mode; +} + +static inline bool xe_device_is_sriov_pf(struct xe_device *xe) +{ + return xe_device_sriov_mode(xe) == XE_SRIOV_MODE_PF; +} + +static inline bool xe_device_is_sriov_vf(struct xe_device *xe) +{ + return xe_device_sriov_mode(xe) == XE_SRIOV_MODE_VF; +} + +#ifdef CONFIG_PCI_IOV +#define IS_SRIOV_PF(xe) xe_device_is_sriov_pf(xe) +#else +#define IS_SRIOV_PF(xe) (typecheck(struct xe_device *, (xe)) && false) +#endif +#define IS_SRIOV_VF(xe) xe_device_is_sriov_vf(xe) + +#define IS_SRIOV(xe) (IS_SRIOV_PF(xe) || IS_SRIOV_VF(xe)) + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_printk.h b/drivers/gpu/drm/xe/xe_sriov_printk.h new file mode 100644 index 000000000000..117e1d541692 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_printk.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_SRIOV_PRINTK_H_ +#define _XE_SRIOV_PRINTK_H_ + +#include <drm/drm_print.h> + +#include "xe_device_types.h" +#include "xe_sriov_types.h" + +#define xe_sriov_printk_prefix(xe) \ + ((xe)->sriov.__mode == XE_SRIOV_MODE_PF ? "PF: " : \ + (xe)->sriov.__mode == XE_SRIOV_MODE_VF ? "VF: " : "") + +#define xe_sriov_printk(xe, _level, fmt, ...) \ + drm_##_level(&(xe)->drm, "%s" fmt, xe_sriov_printk_prefix(xe), ##__VA_ARGS__) + +#define xe_sriov_err(xe, fmt, ...) \ + xe_sriov_printk((xe), err, fmt, ##__VA_ARGS__) + +#define xe_sriov_err_ratelimited(xe, fmt, ...) \ + xe_sriov_printk((xe), err_ratelimited, fmt, ##__VA_ARGS__) + +#define xe_sriov_warn(xe, fmt, ...) \ + xe_sriov_printk((xe), warn, fmt, ##__VA_ARGS__) + +#define xe_sriov_notice(xe, fmt, ...) \ + xe_sriov_printk((xe), notice, fmt, ##__VA_ARGS__) + +#define xe_sriov_info(xe, fmt, ...) \ + xe_sriov_printk((xe), info, fmt, ##__VA_ARGS__) + +#define xe_sriov_dbg(xe, fmt, ...) \ + xe_sriov_printk((xe), dbg, fmt, ##__VA_ARGS__) + +/* for low level noisy debug messages */ +#ifdef CONFIG_DRM_XE_DEBUG_SRIOV +#define xe_sriov_dbg_verbose(xe, fmt, ...) xe_sriov_dbg(xe, fmt, ##__VA_ARGS__) +#else +#define xe_sriov_dbg_verbose(xe, fmt, ...) typecheck(struct xe_device *, (xe)) +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h new file mode 100644 index 000000000000..999a4311b98b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_types.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_SRIOV_TYPES_H_ +#define _XE_SRIOV_TYPES_H_ + +#include <linux/build_bug.h> + +/** + * enum xe_sriov_mode - SR-IOV mode + * @XE_SRIOV_MODE_NONE: bare-metal mode (non-virtualized) + * @XE_SRIOV_MODE_PF: SR-IOV Physical Function (PF) mode + * @XE_SRIOV_MODE_VF: SR-IOV Virtual Function (VF) mode + */ +enum xe_sriov_mode { + /* + * Note: We don't use default enum value 0 to allow catch any too early + * attempt of checking the SR-IOV mode prior to the actual mode probe. + */ + XE_SRIOV_MODE_NONE = 1, + XE_SRIOV_MODE_PF, + XE_SRIOV_MODE_VF, +}; +static_assert(XE_SRIOV_MODE_NONE); + +#endif diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c new file mode 100644 index 000000000000..eaf1b718f26c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_step.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_step.h" + +#include <linux/bitfield.h> + +#include "xe_device.h" +#include "xe_platform_types.h" + +/* + * Provide mapping between PCI's revision ID to the individual GMD + * (Graphics/Media/Display) stepping values that can be compared numerically. + * + * Some platforms may have unusual ways of mapping PCI revision ID to GMD + * steppings. E.g., in some cases a higher PCI revision may translate to a + * lower stepping of the GT and/or display IP. + * + * Also note that some revisions/steppings may have been set aside as + * placeholders but never materialized in real hardware; in those cases there + * may be jumps in the revision IDs or stepping values in the tables below. + */ + +/* + * Some platforms always have the same stepping value for GT and display; + * use a macro to define these to make it easier to identify the platforms + * where the two steppings can deviate. + */ +#define COMMON_GT_MEDIA_STEP(x_) \ + .graphics = STEP_##x_, \ + .media = STEP_##x_ + +#define COMMON_STEP(x_) \ + COMMON_GT_MEDIA_STEP(x_), \ + .graphics = STEP_##x_, \ + .media = STEP_##x_, \ + .display = STEP_##x_ + +__diag_push(); +__diag_ignore_all("-Woverride-init", "Allow field overrides in table"); + +/* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */ +static const struct xe_step_info tgl_revids[] = { + [0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 }, + [1] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_D0 }, +}; + +static const struct xe_step_info dg1_revids[] = { + [0] = { COMMON_STEP(A0) }, + [1] = { COMMON_STEP(B0) }, +}; + +static const struct xe_step_info adls_revids[] = { + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, + [0x1] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A2 }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 }, + [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_B0 }, + [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 }, +}; + +static const struct xe_step_info adls_rpls_revids[] = { + [0x4] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_D0 }, + [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 }, +}; + +static const struct xe_step_info adlp_revids[] = { + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 }, + [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_C0 }, + [0xC] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_D0 }, +}; + +static const struct xe_step_info adlp_rpl_revids[] = { + [0x4] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_E0 }, +}; + +static const struct xe_step_info adln_revids[] = { + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_D0 }, +}; + +static const struct xe_step_info dg2_g10_revid_step_tbl[] = { + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, + [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_A0 }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 }, + [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_C0 }, +}; + +static const struct xe_step_info dg2_g11_revid_step_tbl[] = { + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_C0 }, + [0x5] = { COMMON_GT_MEDIA_STEP(B1), .display = STEP_C0 }, +}; + +static const struct xe_step_info dg2_g12_revid_step_tbl[] = { + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_C0 }, + [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_C0 }, +}; + +static const struct xe_step_info pvc_revid_step_tbl[] = { + [0x5] = { .graphics = STEP_B0 }, + [0x6] = { .graphics = STEP_B1 }, + [0x7] = { .graphics = STEP_C0 }, +}; + +static const int pvc_basedie_subids[] = { + [0x3] = STEP_B0, + [0x4] = STEP_B1, + [0x5] = STEP_B3, +}; + +__diag_pop(); + +/** + * xe_step_pre_gmdid_get - Determine IP steppings from PCI revid + * @xe: Xe device + * + * Convert the PCI revid into proper IP steppings. This should only be + * used on platforms that do not have GMD_ID support. + */ +struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe) +{ + const struct xe_step_info *revids = NULL; + struct xe_step_info step = {}; + u16 revid = xe->info.revid; + int size = 0; + const int *basedie_info = NULL; + int basedie_size = 0; + int baseid = 0; + + if (xe->info.platform == XE_PVC) { + baseid = FIELD_GET(GENMASK(5, 3), xe->info.revid); + revid = FIELD_GET(GENMASK(2, 0), xe->info.revid); + revids = pvc_revid_step_tbl; + size = ARRAY_SIZE(pvc_revid_step_tbl); + basedie_info = pvc_basedie_subids; + basedie_size = ARRAY_SIZE(pvc_basedie_subids); + } else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10) { + revids = dg2_g10_revid_step_tbl; + size = ARRAY_SIZE(dg2_g10_revid_step_tbl); + } else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G11) { + revids = dg2_g11_revid_step_tbl; + size = ARRAY_SIZE(dg2_g11_revid_step_tbl); + } else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G12) { + revids = dg2_g12_revid_step_tbl; + size = ARRAY_SIZE(dg2_g12_revid_step_tbl); + } else if (xe->info.platform == XE_ALDERLAKE_N) { + revids = adln_revids; + size = ARRAY_SIZE(adln_revids); + } else if (xe->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_S_RPLS) { + revids = adls_rpls_revids; + size = ARRAY_SIZE(adls_rpls_revids); + } else if (xe->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_P_RPLU) { + revids = adlp_rpl_revids; + size = ARRAY_SIZE(adlp_rpl_revids); + } else if (xe->info.platform == XE_ALDERLAKE_P) { + revids = adlp_revids; + size = ARRAY_SIZE(adlp_revids); + } else if (xe->info.platform == XE_ALDERLAKE_S) { + revids = adls_revids; + size = ARRAY_SIZE(adls_revids); + } else if (xe->info.platform == XE_DG1) { + revids = dg1_revids; + size = ARRAY_SIZE(dg1_revids); + } else if (xe->info.platform == XE_TIGERLAKE) { + revids = tgl_revids; + size = ARRAY_SIZE(tgl_revids); + } + + /* Not using the stepping scheme for the platform yet. */ + if (!revids) + return step; + + if (revid < size && revids[revid].graphics != STEP_NONE) { + step = revids[revid]; + } else { + drm_warn(&xe->drm, "Unknown revid 0x%02x\n", revid); + + /* + * If we hit a gap in the revid array, use the information for + * the next revid. + * + * This may be wrong in all sorts of ways, especially if the + * steppings in the array are not monotonically increasing, but + * it's better than defaulting to 0. + */ + while (revid < size && revids[revid].graphics == STEP_NONE) + revid++; + + if (revid < size) { + drm_dbg(&xe->drm, "Using steppings for revid 0x%02x\n", + revid); + step = revids[revid]; + } else { + drm_dbg(&xe->drm, "Using future steppings\n"); + step.graphics = STEP_FUTURE; + step.display = STEP_FUTURE; + } + } + + drm_WARN_ON(&xe->drm, step.graphics == STEP_NONE); + + if (basedie_info && basedie_size) { + if (baseid < basedie_size && basedie_info[baseid] != STEP_NONE) { + step.basedie = basedie_info[baseid]; + } else { + drm_warn(&xe->drm, "Unknown baseid 0x%02x\n", baseid); + step.basedie = STEP_FUTURE; + } + } + + return step; +} + +/** + * xe_step_gmdid_get - Determine IP steppings from GMD_ID revid fields + * @xe: Xe device + * @graphics_gmdid_revid: value of graphics GMD_ID register's revid field + * @media_gmdid_revid: value of media GMD_ID register's revid field + * + * Convert the revid fields of the GMD_ID registers into proper IP steppings. + * + * GMD_ID revid values are currently expected to have consistent meanings on + * all platforms: major steppings (A0, B0, etc.) are 4 apart, with minor + * steppings (A1, A2, etc.) taking the values in between. + */ +struct xe_step_info xe_step_gmdid_get(struct xe_device *xe, + u32 graphics_gmdid_revid, + u32 media_gmdid_revid) +{ + struct xe_step_info step = { + .graphics = STEP_A0 + graphics_gmdid_revid, + .media = STEP_A0 + media_gmdid_revid, + }; + + if (step.graphics >= STEP_FUTURE) { + step.graphics = STEP_FUTURE; + drm_dbg(&xe->drm, "Graphics GMD_ID revid value %d treated as future stepping\n", + graphics_gmdid_revid); + } + + if (step.media >= STEP_FUTURE) { + step.media = STEP_FUTURE; + drm_dbg(&xe->drm, "Media GMD_ID revid value %d treated as future stepping\n", + media_gmdid_revid); + } + + return step; +} + +#define STEP_NAME_CASE(name) \ + case STEP_##name: \ + return #name; + +const char *xe_step_name(enum xe_step step) +{ + switch (step) { + STEP_NAME_LIST(STEP_NAME_CASE); + + default: + return "**"; + } +} diff --git a/drivers/gpu/drm/xe/xe_step.h b/drivers/gpu/drm/xe/xe_step.h new file mode 100644 index 000000000000..686cb59200c2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_step.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_STEP_H_ +#define _XE_STEP_H_ + +#include <linux/types.h> + +#include "xe_step_types.h" + +struct xe_device; + +struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe); +struct xe_step_info xe_step_gmdid_get(struct xe_device *xe, + u32 graphics_gmdid_revid, + u32 media_gmdid_revid); +static inline u32 xe_step_to_gmdid(enum xe_step step) { return step - STEP_A0; } + +const char *xe_step_name(enum xe_step step); + +#endif diff --git a/drivers/gpu/drm/xe/xe_step_types.h b/drivers/gpu/drm/xe/xe_step_types.h new file mode 100644 index 000000000000..ccc9b4795e95 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_step_types.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_STEP_TYPES_H_ +#define _XE_STEP_TYPES_H_ + +#include <linux/types.h> + +struct xe_step_info { + u8 graphics; + u8 media; + u8 display; + u8 basedie; +}; + +#define STEP_ENUM_VAL(name) STEP_##name, + +#define STEP_NAME_LIST(func) \ + func(A0) \ + func(A1) \ + func(A2) \ + func(A3) \ + func(B0) \ + func(B1) \ + func(B2) \ + func(B3) \ + func(C0) \ + func(C1) \ + func(C2) \ + func(C3) \ + func(D0) \ + func(D1) \ + func(D2) \ + func(D3) \ + func(E0) + +/* + * Symbolic steppings that do not match the hardware. These are valid both as gt + * and display steppings as symbolic names. + */ +enum xe_step { + STEP_NONE = 0, + STEP_NAME_LIST(STEP_ENUM_VAL) + STEP_FUTURE, + STEP_FOREVER, +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c new file mode 100644 index 000000000000..e4c220cf9115 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -0,0 +1,344 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_sync.h" + +#include <linux/dma-fence-array.h> +#include <linux/kthread.h> +#include <linux/sched/mm.h> +#include <linux/uaccess.h> + +#include <drm/drm_print.h> +#include <drm/drm_syncobj.h> +#include <drm/xe_drm.h> + +#include "xe_device_types.h" +#include "xe_exec_queue.h" +#include "xe_macros.h" +#include "xe_sched_job_types.h" + +struct user_fence { + struct xe_device *xe; + struct kref refcount; + struct dma_fence_cb cb; + struct work_struct worker; + struct mm_struct *mm; + u64 __user *addr; + u64 value; +}; + +static void user_fence_destroy(struct kref *kref) +{ + struct user_fence *ufence = container_of(kref, struct user_fence, + refcount); + + mmdrop(ufence->mm); + kfree(ufence); +} + +static void user_fence_get(struct user_fence *ufence) +{ + kref_get(&ufence->refcount); +} + +static void user_fence_put(struct user_fence *ufence) +{ + kref_put(&ufence->refcount, user_fence_destroy); +} + +static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr, + u64 value) +{ + struct user_fence *ufence; + + ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); + if (!ufence) + return NULL; + + ufence->xe = xe; + kref_init(&ufence->refcount); + ufence->addr = u64_to_user_ptr(addr); + ufence->value = value; + ufence->mm = current->mm; + mmgrab(ufence->mm); + + return ufence; +} + +static void user_fence_worker(struct work_struct *w) +{ + struct user_fence *ufence = container_of(w, struct user_fence, worker); + + if (mmget_not_zero(ufence->mm)) { + kthread_use_mm(ufence->mm); + if (copy_to_user(ufence->addr, &ufence->value, sizeof(ufence->value))) + XE_WARN_ON("Copy to user failed"); + kthread_unuse_mm(ufence->mm); + mmput(ufence->mm); + } + + wake_up_all(&ufence->xe->ufence_wq); + user_fence_put(ufence); +} + +static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence) +{ + INIT_WORK(&ufence->worker, user_fence_worker); + queue_work(ufence->xe->ordered_wq, &ufence->worker); + dma_fence_put(fence); +} + +static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct user_fence *ufence = container_of(cb, struct user_fence, cb); + + kick_ufence(ufence, fence); +} + +int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, + struct xe_sync_entry *sync, + struct drm_xe_sync __user *sync_user, + unsigned int flags) +{ + struct drm_xe_sync sync_in; + int err; + bool exec = flags & SYNC_PARSE_FLAG_EXEC; + bool in_lr_mode = flags & SYNC_PARSE_FLAG_LR_MODE; + bool disallow_user_fence = flags & SYNC_PARSE_FLAG_DISALLOW_USER_FENCE; + bool signal; + + if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user))) + return -EFAULT; + + if (XE_IOCTL_DBG(xe, sync_in.flags & ~DRM_XE_SYNC_FLAG_SIGNAL) || + XE_IOCTL_DBG(xe, sync_in.reserved[0] || sync_in.reserved[1])) + return -EINVAL; + + signal = sync_in.flags & DRM_XE_SYNC_FLAG_SIGNAL; + switch (sync_in.type) { + case DRM_XE_SYNC_TYPE_SYNCOBJ: + if (XE_IOCTL_DBG(xe, in_lr_mode && signal)) + return -EOPNOTSUPP; + + if (XE_IOCTL_DBG(xe, upper_32_bits(sync_in.addr))) + return -EINVAL; + + sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle); + if (XE_IOCTL_DBG(xe, !sync->syncobj)) + return -ENOENT; + + if (!signal) { + sync->fence = drm_syncobj_fence_get(sync->syncobj); + if (XE_IOCTL_DBG(xe, !sync->fence)) + return -EINVAL; + } + break; + + case DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ: + if (XE_IOCTL_DBG(xe, in_lr_mode && signal)) + return -EOPNOTSUPP; + + if (XE_IOCTL_DBG(xe, upper_32_bits(sync_in.addr))) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, sync_in.timeline_value == 0)) + return -EINVAL; + + sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle); + if (XE_IOCTL_DBG(xe, !sync->syncobj)) + return -ENOENT; + + if (signal) { + sync->chain_fence = dma_fence_chain_alloc(); + if (!sync->chain_fence) + return -ENOMEM; + } else { + sync->fence = drm_syncobj_fence_get(sync->syncobj); + if (XE_IOCTL_DBG(xe, !sync->fence)) + return -EINVAL; + + err = dma_fence_chain_find_seqno(&sync->fence, + sync_in.timeline_value); + if (err) + return err; + } + break; + + case DRM_XE_SYNC_TYPE_USER_FENCE: + if (XE_IOCTL_DBG(xe, disallow_user_fence)) + return -EOPNOTSUPP; + + if (XE_IOCTL_DBG(xe, !signal)) + return -EOPNOTSUPP; + + if (XE_IOCTL_DBG(xe, sync_in.addr & 0x7)) + return -EINVAL; + + if (exec) { + sync->addr = sync_in.addr; + } else { + sync->ufence = user_fence_create(xe, sync_in.addr, + sync_in.timeline_value); + if (XE_IOCTL_DBG(xe, !sync->ufence)) + return -ENOMEM; + } + + break; + + default: + return -EINVAL; + } + + sync->type = sync_in.type; + sync->flags = sync_in.flags; + sync->timeline_value = sync_in.timeline_value; + + return 0; +} + +int xe_sync_entry_wait(struct xe_sync_entry *sync) +{ + if (sync->fence) + dma_fence_wait(sync->fence, true); + + return 0; +} + +int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job) +{ + int err; + + if (sync->fence) { + err = drm_sched_job_add_dependency(&job->drm, + dma_fence_get(sync->fence)); + if (err) { + dma_fence_put(sync->fence); + return err; + } + } + + return 0; +} + +void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job, + struct dma_fence *fence) +{ + if (!(sync->flags & DRM_XE_SYNC_FLAG_SIGNAL)) + return; + + if (sync->chain_fence) { + drm_syncobj_add_point(sync->syncobj, sync->chain_fence, + fence, sync->timeline_value); + /* + * The chain's ownership is transferred to the + * timeline. + */ + sync->chain_fence = NULL; + } else if (sync->syncobj) { + drm_syncobj_replace_fence(sync->syncobj, fence); + } else if (sync->ufence) { + int err; + + dma_fence_get(fence); + user_fence_get(sync->ufence); + err = dma_fence_add_callback(fence, &sync->ufence->cb, + user_fence_cb); + if (err == -ENOENT) { + kick_ufence(sync->ufence, fence); + } else if (err) { + XE_WARN_ON("failed to add user fence"); + user_fence_put(sync->ufence); + dma_fence_put(fence); + } + } else if (sync->type == DRM_XE_SYNC_TYPE_USER_FENCE) { + job->user_fence.used = true; + job->user_fence.addr = sync->addr; + job->user_fence.value = sync->timeline_value; + } +} + +void xe_sync_entry_cleanup(struct xe_sync_entry *sync) +{ + if (sync->syncobj) + drm_syncobj_put(sync->syncobj); + if (sync->fence) + dma_fence_put(sync->fence); + if (sync->chain_fence) + dma_fence_put(&sync->chain_fence->base); + if (sync->ufence) + user_fence_put(sync->ufence); +} + +/** + * xe_sync_in_fence_get() - Get a fence from syncs, exec queue, and VM + * @sync: input syncs + * @num_sync: number of syncs + * @q: exec queue + * @vm: VM + * + * Get a fence from syncs, exec queue, and VM. If syncs contain in-fences create + * and return a composite fence of all in-fences + last fence. If no in-fences + * return last fence on input exec queue. Caller must drop reference to + * returned fence. + * + * Return: fence on success, ERR_PTR(-ENOMEM) on failure + */ +struct dma_fence * +xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, + struct xe_exec_queue *q, struct xe_vm *vm) +{ + struct dma_fence **fences = NULL; + struct dma_fence_array *cf = NULL; + struct dma_fence *fence; + int i, num_in_fence = 0, current_fence = 0; + + lockdep_assert_held(&vm->lock); + + /* Count in-fences */ + for (i = 0; i < num_sync; ++i) { + if (sync[i].fence) { + ++num_in_fence; + fence = sync[i].fence; + } + } + + /* Easy case... */ + if (!num_in_fence) { + fence = xe_exec_queue_last_fence_get(q, vm); + dma_fence_get(fence); + return fence; + } + + /* Create composite fence */ + fences = kmalloc_array(num_in_fence + 1, sizeof(*fences), GFP_KERNEL); + if (!fences) + return ERR_PTR(-ENOMEM); + for (i = 0; i < num_sync; ++i) { + if (sync[i].fence) { + dma_fence_get(sync[i].fence); + fences[current_fence++] = sync[i].fence; + } + } + fences[current_fence++] = xe_exec_queue_last_fence_get(q, vm); + dma_fence_get(fences[current_fence - 1]); + cf = dma_fence_array_create(num_in_fence, fences, + vm->composite_fence_ctx, + vm->composite_fence_seqno++, + false); + if (!cf) { + --vm->composite_fence_seqno; + goto err_out; + } + + return &cf->base; + +err_out: + while (current_fence) + dma_fence_put(fences[--current_fence]); + kfree(fences); + kfree(cf); + + return ERR_PTR(-ENOMEM); +} diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h new file mode 100644 index 000000000000..d284afbe917c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_SYNC_H_ +#define _XE_SYNC_H_ + +#include "xe_sync_types.h" + +struct xe_device; +struct xe_exec_queue; +struct xe_file; +struct xe_sched_job; +struct xe_vm; + +#define SYNC_PARSE_FLAG_EXEC BIT(0) +#define SYNC_PARSE_FLAG_LR_MODE BIT(1) +#define SYNC_PARSE_FLAG_DISALLOW_USER_FENCE BIT(2) + +int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, + struct xe_sync_entry *sync, + struct drm_xe_sync __user *sync_user, + unsigned int flags); +int xe_sync_entry_wait(struct xe_sync_entry *sync); +int xe_sync_entry_add_deps(struct xe_sync_entry *sync, + struct xe_sched_job *job); +void xe_sync_entry_signal(struct xe_sync_entry *sync, + struct xe_sched_job *job, + struct dma_fence *fence); +void xe_sync_entry_cleanup(struct xe_sync_entry *sync); +struct dma_fence * +xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, + struct xe_exec_queue *q, struct xe_vm *vm); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h new file mode 100644 index 000000000000..852db5e7884f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sync_types.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_SYNC_TYPES_H_ +#define _XE_SYNC_TYPES_H_ + +#include <linux/types.h> + +struct drm_syncobj; +struct dma_fence; +struct dma_fence_chain; +struct drm_xe_sync; +struct user_fence; + +struct xe_sync_entry { + struct drm_syncobj *syncobj; + struct dma_fence *fence; + struct dma_fence_chain *chain_fence; + struct user_fence *ufence; + u64 addr; + u64 timeline_value; + u32 type; + u32 flags; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c new file mode 100644 index 000000000000..044c20881de7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "xe_device.h" +#include "xe_ggtt.h" +#include "xe_gt.h" +#include "xe_migrate.h" +#include "xe_sa.h" +#include "xe_tile.h" +#include "xe_tile_sysfs.h" +#include "xe_ttm_vram_mgr.h" +#include "xe_wa.h" + +/** + * DOC: Multi-tile Design + * + * Different vendors use the term "tile" a bit differently, but in the Intel + * world, a 'tile' is pretty close to what most people would think of as being + * a complete GPU. When multiple GPUs are placed behind a single PCI device, + * that's what is referred to as a "multi-tile device." In such cases, pretty + * much all hardware is replicated per-tile, although certain responsibilities + * like PCI communication, reporting of interrupts to the OS, etc. are handled + * solely by the "root tile." A multi-tile platform takes care of tying the + * tiles together in a way such that interrupt notifications from remote tiles + * are forwarded to the root tile, the per-tile vram is combined into a single + * address space, etc. + * + * In contrast, a "GT" (which officially stands for "Graphics Technology") is + * the subset of a GPU/tile that is responsible for implementing graphics + * and/or media operations. The GT is where a lot of the driver implementation + * happens since it's where the hardware engines, the execution units, and the + * GuC all reside. + * + * Historically most Intel devices were single-tile devices that contained a + * single GT. PVC is an example of an Intel platform built on a multi-tile + * design (i.e., multiple GPUs behind a single PCI device); each PVC tile only + * has a single GT. In contrast, platforms like MTL that have separate chips + * for render and media IP are still only a single logical GPU, but the + * graphics and media IP blocks are each exposed as a separate GT within that + * single GPU. This is important from a software perspective because multi-GT + * platforms like MTL only replicate a subset of the GPU hardware and behave + * differently than multi-tile platforms like PVC where nearly everything is + * replicated. + * + * Per-tile functionality (shared by all GTs within the tile): + * - Complete 4MB MMIO space (containing SGunit/SoC registers, GT + * registers, display registers, etc.) + * - Global GTT + * - VRAM (if discrete) + * - Interrupt flows + * - Migration context + * - kernel batchbuffer pool + * - Primary GT + * - Media GT (if media version >= 13) + * + * Per-GT functionality: + * - GuC + * - Hardware engines + * - Programmable hardware units (subslices, EUs) + * - GSI subset of registers (multiple copies of these registers reside + * within the complete MMIO space provided by the tile, but at different + * offsets --- 0 for render, 0x380000 for media) + * - Multicast register steering + * - TLBs to cache page table translations + * - Reset capability + * - Low-level power management (e.g., C6) + * - Clock frequency + * - MOCS and PAT programming + */ + +/** + * xe_tile_alloc - Perform per-tile memory allocation + * @tile: Tile to perform allocations for + * + * Allocates various per-tile data structures using DRM-managed allocations. + * Does not touch the hardware. + * + * Returns -ENOMEM if allocations fail, otherwise 0. + */ +static int xe_tile_alloc(struct xe_tile *tile) +{ + struct drm_device *drm = &tile_to_xe(tile)->drm; + + tile->mem.ggtt = drmm_kzalloc(drm, sizeof(*tile->mem.ggtt), + GFP_KERNEL); + if (!tile->mem.ggtt) + return -ENOMEM; + tile->mem.ggtt->tile = tile; + + tile->mem.vram_mgr = drmm_kzalloc(drm, sizeof(*tile->mem.vram_mgr), GFP_KERNEL); + if (!tile->mem.vram_mgr) + return -ENOMEM; + + return 0; +} + +/** + * xe_tile_init_early - Initialize the tile and primary GT + * @tile: Tile to initialize + * @xe: Parent Xe device + * @id: Tile ID + * + * Initializes per-tile resources that don't require any interactions with the + * hardware or any knowledge about the Graphics/Media IP version. + * + * Returns: 0 on success, negative error code on error. + */ +int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id) +{ + int err; + + tile->xe = xe; + tile->id = id; + + err = xe_tile_alloc(tile); + if (err) + return err; + + tile->primary_gt = xe_gt_alloc(tile); + if (IS_ERR(tile->primary_gt)) + return PTR_ERR(tile->primary_gt); + + return 0; +} + +static int tile_ttm_mgr_init(struct xe_tile *tile) +{ + struct xe_device *xe = tile_to_xe(tile); + int err; + + if (tile->mem.vram.usable_size) { + err = xe_ttm_vram_mgr_init(tile, tile->mem.vram_mgr); + if (err) + return err; + xe->info.mem_region_mask |= BIT(tile->id) << 1; + } + + return 0; +} + +/** + * xe_tile_init_noalloc - Init tile up to the point where allocations can happen. + * @tile: The tile to initialize. + * + * This function prepares the tile to allow memory allocations to VRAM, but is + * not allowed to allocate memory itself. This state is useful for display + * readout, because the inherited display framebuffer will otherwise be + * overwritten as it is usually put at the start of VRAM. + * + * Note that since this is tile initialization, it should not perform any + * GT-specific operations, and thus does not need to hold GT forcewake. + * + * Returns: 0 on success, negative error code on error. + */ +int xe_tile_init_noalloc(struct xe_tile *tile) +{ + int err; + + xe_device_mem_access_get(tile_to_xe(tile)); + + err = tile_ttm_mgr_init(tile); + if (err) + goto err_mem_access; + + tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16); + if (IS_ERR(tile->mem.kernel_bb_pool)) + err = PTR_ERR(tile->mem.kernel_bb_pool); + + xe_wa_apply_tile_workarounds(tile); + + xe_tile_sysfs_init(tile); + +err_mem_access: + xe_device_mem_access_put(tile_to_xe(tile)); + return err; +} + +void xe_tile_migrate_wait(struct xe_tile *tile) +{ + xe_migrate_wait(tile->migrate); +} diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h new file mode 100644 index 000000000000..1c9e42ade6b0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_TILE_H_ +#define _XE_TILE_H_ + +#include "xe_device_types.h" + +struct xe_tile; + +int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id); +int xe_tile_init_noalloc(struct xe_tile *tile); + +void xe_tile_migrate_wait(struct xe_tile *tile); + +#endif diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c new file mode 100644 index 000000000000..0f8d3e7fce46 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <linux/kobject.h> +#include <linux/sysfs.h> +#include <drm/drm_managed.h> + +#include "xe_tile.h" +#include "xe_tile_sysfs.h" + +static void xe_tile_sysfs_kobj_release(struct kobject *kobj) +{ + kfree(kobj); +} + +static const struct kobj_type xe_tile_sysfs_kobj_type = { + .release = xe_tile_sysfs_kobj_release, + .sysfs_ops = &kobj_sysfs_ops, +}; + +static void tile_sysfs_fini(struct drm_device *drm, void *arg) +{ + struct xe_tile *tile = arg; + + kobject_put(tile->sysfs); +} + +void xe_tile_sysfs_init(struct xe_tile *tile) +{ + struct xe_device *xe = tile_to_xe(tile); + struct device *dev = xe->drm.dev; + struct kobj_tile *kt; + int err; + + kt = kzalloc(sizeof(*kt), GFP_KERNEL); + if (!kt) + return; + + kobject_init(&kt->base, &xe_tile_sysfs_kobj_type); + kt->tile = tile; + + err = kobject_add(&kt->base, &dev->kobj, "tile%d", tile->id); + if (err) { + kobject_put(&kt->base); + drm_warn(&xe->drm, "failed to register TILE sysfs directory, err: %d\n", err); + return; + } + + tile->sysfs = &kt->base; + + err = drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile); + if (err) + drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); +} diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.h b/drivers/gpu/drm/xe/xe_tile_sysfs.h new file mode 100644 index 000000000000..e4f065039eba --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_sysfs.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_TILE_SYSFS_H_ +#define _XE_TILE_SYSFS_H_ + +#include "xe_tile_sysfs_types.h" + +void xe_tile_sysfs_init(struct xe_tile *tile); + +static inline struct xe_tile * +kobj_to_tile(struct kobject *kobj) +{ + return container_of(kobj, struct kobj_tile, base)->tile; +} + +#endif /* _XE_TILE_SYSFS_H_ */ diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs_types.h b/drivers/gpu/drm/xe/xe_tile_sysfs_types.h new file mode 100644 index 000000000000..75906ba11a9e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_sysfs_types.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_TILE_SYSFS_TYPES_H_ +#define _XE_TILE_SYSFS_TYPES_H_ + +#include <linux/kobject.h> + +struct xe_tile; + +/** + * struct kobj_tile - A tile's kobject struct that connects the kobject + * and the TILE + * + * When dealing with multiple TILEs, this struct helps to understand which + * TILE needs to be addressed on a given sysfs call. + */ +struct kobj_tile { + /** @base: The actual kobject */ + struct kobject base; + /** @tile: A pointer to the tile itself */ + struct xe_tile *tile; +}; + +#endif /* _XE_TILE_SYSFS_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_trace.c b/drivers/gpu/drm/xe/xe_trace.c new file mode 100644 index 000000000000..2527c556bff1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_trace.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "xe_trace.h" +#endif diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h new file mode 100644 index 000000000000..95163c303f3e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -0,0 +1,608 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright © 2022 Intel Corporation + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM xe + +#if !defined(_XE_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _XE_TRACE_H_ + +#include <linux/tracepoint.h> +#include <linux/types.h> + +#include "xe_bo_types.h" +#include "xe_exec_queue_types.h" +#include "xe_gpu_scheduler_types.h" +#include "xe_gt_tlb_invalidation_types.h" +#include "xe_gt_types.h" +#include "xe_guc_exec_queue_types.h" +#include "xe_sched_job.h" +#include "xe_vm.h" + +DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence, + TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(fence), + + TP_STRUCT__entry( + __field(u64, fence) + __field(int, seqno) + ), + + TP_fast_assign( + __entry->fence = (u64)fence; + __entry->seqno = fence->seqno; + ), + + TP_printk("fence=0x%016llx, seqno=%d", + __entry->fence, __entry->seqno) +); + +DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_create, + TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(fence) +); + +DEFINE_EVENT(xe_gt_tlb_invalidation_fence, + xe_gt_tlb_invalidation_fence_work_func, + TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(fence) +); + +DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_cb, + TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(fence) +); + +DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_send, + TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(fence) +); + +DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_recv, + TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(fence) +); + +DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_signal, + TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(fence) +); + +DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_timeout, + TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(fence) +); + +DECLARE_EVENT_CLASS(xe_bo, + TP_PROTO(struct xe_bo *bo), + TP_ARGS(bo), + + TP_STRUCT__entry( + __field(size_t, size) + __field(u32, flags) + __field(u64, vm) + ), + + TP_fast_assign( + __entry->size = bo->size; + __entry->flags = bo->flags; + __entry->vm = (unsigned long)bo->vm; + ), + + TP_printk("size=%zu, flags=0x%02x, vm=0x%016llx", + __entry->size, __entry->flags, __entry->vm) +); + +DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, + TP_PROTO(struct xe_bo *bo), + TP_ARGS(bo) +); + +DEFINE_EVENT(xe_bo, xe_bo_move, + TP_PROTO(struct xe_bo *bo), + TP_ARGS(bo) +); + +DECLARE_EVENT_CLASS(xe_exec_queue, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q), + + TP_STRUCT__entry( + __field(enum xe_engine_class, class) + __field(u32, logical_mask) + __field(u8, gt_id) + __field(u16, width) + __field(u16, guc_id) + __field(u32, guc_state) + __field(u32, flags) + ), + + TP_fast_assign( + __entry->class = q->class; + __entry->logical_mask = q->logical_mask; + __entry->gt_id = q->gt->info.id; + __entry->width = q->width; + __entry->guc_id = q->guc->id; + __entry->guc_state = atomic_read(&q->guc->state); + __entry->flags = q->flags; + ), + + TP_printk("%d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x", + __entry->class, __entry->logical_mask, + __entry->gt_id, __entry->width, __entry->guc_id, + __entry->guc_state, __entry->flags) +); + +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_create, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) +); + +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_supress_resume, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) +); + +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_submit, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) +); + +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_scheduling_enable, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) +); + +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_scheduling_disable, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) +); + +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_scheduling_done, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) +); + +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_register, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) +); + +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_deregister, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) +); + +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_deregister_done, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) +); + +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_close, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) +); + +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_kill, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) +); + +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_cleanup_entity, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) +); + +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_destroy, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) +); + +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_reset, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) +); + +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_memory_cat_error, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) +); + +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_stop, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) +); + +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_resubmit, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) +); + +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_lr_cleanup, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) +); + +DECLARE_EVENT_CLASS(xe_sched_job, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job), + + TP_STRUCT__entry( + __field(u32, seqno) + __field(u16, guc_id) + __field(u32, guc_state) + __field(u32, flags) + __field(int, error) + __field(u64, fence) + __field(u64, batch_addr) + ), + + TP_fast_assign( + __entry->seqno = xe_sched_job_seqno(job); + __entry->guc_id = job->q->guc->id; + __entry->guc_state = + atomic_read(&job->q->guc->state); + __entry->flags = job->q->flags; + __entry->error = job->fence->error; + __entry->fence = (unsigned long)job->fence; + __entry->batch_addr = (u64)job->batch_addr[0]; + ), + + TP_printk("fence=0x%016llx, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", + __entry->fence, __entry->seqno, __entry->guc_id, + __entry->batch_addr, __entry->guc_state, + __entry->flags, __entry->error) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_create, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_exec, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_run, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_free, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_timedout, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_set_error, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_ban, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DECLARE_EVENT_CLASS(xe_sched_msg, + TP_PROTO(struct xe_sched_msg *msg), + TP_ARGS(msg), + + TP_STRUCT__entry( + __field(u32, opcode) + __field(u16, guc_id) + ), + + TP_fast_assign( + __entry->opcode = msg->opcode; + __entry->guc_id = + ((struct xe_exec_queue *)msg->private_data)->guc->id; + ), + + TP_printk("guc_id=%d, opcode=%u", __entry->guc_id, + __entry->opcode) +); + +DEFINE_EVENT(xe_sched_msg, xe_sched_msg_add, + TP_PROTO(struct xe_sched_msg *msg), + TP_ARGS(msg) +); + +DEFINE_EVENT(xe_sched_msg, xe_sched_msg_recv, + TP_PROTO(struct xe_sched_msg *msg), + TP_ARGS(msg) +); + +DECLARE_EVENT_CLASS(xe_hw_fence, + TP_PROTO(struct xe_hw_fence *fence), + TP_ARGS(fence), + + TP_STRUCT__entry( + __field(u64, ctx) + __field(u32, seqno) + __field(u64, fence) + ), + + TP_fast_assign( + __entry->ctx = fence->dma.context; + __entry->seqno = fence->dma.seqno; + __entry->fence = (unsigned long)fence; + ), + + TP_printk("ctx=0x%016llx, fence=0x%016llx, seqno=%u", + __entry->ctx, __entry->fence, __entry->seqno) +); + +DEFINE_EVENT(xe_hw_fence, xe_hw_fence_create, + TP_PROTO(struct xe_hw_fence *fence), + TP_ARGS(fence) +); + +DEFINE_EVENT(xe_hw_fence, xe_hw_fence_signal, + TP_PROTO(struct xe_hw_fence *fence), + TP_ARGS(fence) +); + +DEFINE_EVENT(xe_hw_fence, xe_hw_fence_try_signal, + TP_PROTO(struct xe_hw_fence *fence), + TP_ARGS(fence) +); + +DEFINE_EVENT(xe_hw_fence, xe_hw_fence_free, + TP_PROTO(struct xe_hw_fence *fence), + TP_ARGS(fence) +); + +DECLARE_EVENT_CLASS(xe_vma, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma), + + TP_STRUCT__entry( + __field(u64, vma) + __field(u32, asid) + __field(u64, start) + __field(u64, end) + __field(u64, ptr) + ), + + TP_fast_assign( + __entry->vma = (unsigned long)vma; + __entry->asid = xe_vma_vm(vma)->usm.asid; + __entry->start = xe_vma_start(vma); + __entry->end = xe_vma_end(vma) - 1; + __entry->ptr = xe_vma_userptr(vma); + ), + + TP_printk("vma=0x%016llx, asid=0x%05x, start=0x%012llx, end=0x%012llx, ptr=0x%012llx,", + __entry->vma, __entry->asid, __entry->start, + __entry->end, __entry->ptr) +) + +DEFINE_EVENT(xe_vma, xe_vma_flush, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_pagefault, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_acc, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_fail, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_bind, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_pf_bind, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_unbind, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_worker, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_exec, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_rebind_worker, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_rebind_exec, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_usm_invalidate, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_evict, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate_complete, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DECLARE_EVENT_CLASS(xe_vm, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm), + + TP_STRUCT__entry( + __field(u64, vm) + __field(u32, asid) + ), + + TP_fast_assign( + __entry->vm = (unsigned long)vm; + __entry->asid = vm->usm.asid; + ), + + TP_printk("vm=0x%016llx, asid=0x%05x", __entry->vm, + __entry->asid) +); + +DEFINE_EVENT(xe_vm, xe_vm_kill, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_create, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_free, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_cpu_bind, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_restart, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_enter, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_retry, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +/* GuC */ +DECLARE_EVENT_CLASS(xe_guc_ct_flow_control, + TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), + TP_ARGS(_head, _tail, size, space, len), + + TP_STRUCT__entry( + __field(u32, _head) + __field(u32, _tail) + __field(u32, size) + __field(u32, space) + __field(u32, len) + ), + + TP_fast_assign( + __entry->_head = _head; + __entry->_tail = _tail; + __entry->size = size; + __entry->space = space; + __entry->len = len; + ), + + TP_printk("h2g flow control: head=%u, tail=%u, size=%u, space=%u, len=%u", + __entry->_head, __entry->_tail, __entry->size, + __entry->space, __entry->len) +); + +DEFINE_EVENT(xe_guc_ct_flow_control, xe_guc_ct_h2g_flow_control, + TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), + TP_ARGS(_head, _tail, size, space, len) +); + +DEFINE_EVENT_PRINT(xe_guc_ct_flow_control, xe_guc_ct_g2h_flow_control, + TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), + TP_ARGS(_head, _tail, size, space, len), + + TP_printk("g2h flow control: head=%u, tail=%u, size=%u, space=%u, len=%u", + __entry->_head, __entry->_tail, __entry->size, + __entry->space, __entry->len) +); + +DECLARE_EVENT_CLASS(xe_guc_ctb, + TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), + TP_ARGS(gt_id, action, len, _head, tail), + + TP_STRUCT__entry( + __field(u8, gt_id) + __field(u32, action) + __field(u32, len) + __field(u32, tail) + __field(u32, _head) + ), + + TP_fast_assign( + __entry->gt_id = gt_id; + __entry->action = action; + __entry->len = len; + __entry->tail = tail; + __entry->_head = _head; + ), + + TP_printk("gt%d: H2G CTB: action=0x%x, len=%d, tail=%d, head=%d\n", + __entry->gt_id, __entry->action, __entry->len, + __entry->tail, __entry->_head) +); + +DEFINE_EVENT(xe_guc_ctb, xe_guc_ctb_h2g, + TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), + TP_ARGS(gt_id, action, len, _head, tail) +); + +DEFINE_EVENT_PRINT(xe_guc_ctb, xe_guc_ctb_g2h, + TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), + TP_ARGS(gt_id, action, len, _head, tail), + + TP_printk("gt%d: G2H CTB: action=0x%x, len=%d, tail=%d, head=%d\n", + __entry->gt_id, __entry->action, __entry->len, + __entry->tail, __entry->_head) + +); + +#endif + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe +#define TRACE_INCLUDE_FILE xe_trace +#include <trace/define_trace.h> diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c new file mode 100644 index 000000000000..d2b00d0bf1e2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -0,0 +1,334 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021-2023 Intel Corporation + * Copyright (C) 2021-2002 Red Hat + */ + +#include <drm/drm_managed.h> +#include <drm/drm_mm.h> + +#include <drm/ttm/ttm_device.h> +#include <drm/ttm/ttm_placement.h> +#include <drm/ttm/ttm_range_manager.h> + +#include "generated/xe_wa_oob.h" +#include "regs/xe_gt_regs.h" +#include "regs/xe_regs.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_mmio.h" +#include "xe_res_cursor.h" +#include "xe_ttm_stolen_mgr.h" +#include "xe_ttm_vram_mgr.h" +#include "xe_wa.h" + +struct xe_ttm_stolen_mgr { + struct xe_ttm_vram_mgr base; + + /* PCI base offset */ + resource_size_t io_base; + /* GPU base offset */ + resource_size_t stolen_base; + + void *__iomem mapping; +}; + +static inline struct xe_ttm_stolen_mgr * +to_stolen_mgr(struct ttm_resource_manager *man) +{ + return container_of(man, struct xe_ttm_stolen_mgr, base.manager); +} + +/** + * xe_ttm_stolen_cpu_access_needs_ggtt() - If we can't directly CPU access + * stolen, can we then fallback to mapping through the GGTT. + * @xe: xe device + * + * Some older integrated platforms don't support reliable CPU access for stolen, + * however on such hardware we can always use the mappable part of the GGTT for + * CPU access. Check if that's the case for this device. + */ +bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe) +{ + return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe); +} + +static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) +{ + struct xe_tile *tile = xe_device_get_root_tile(xe); + struct xe_gt *mmio = xe_root_mmio_gt(xe); + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + u64 stolen_size; + u64 tile_offset; + u64 tile_size; + + tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start; + tile_size = tile->mem.vram.actual_physical_size; + + /* Use DSM base address instead for stolen memory */ + mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset; + if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base)) + return 0; + + stolen_size = tile_size - mgr->stolen_base; + + /* Verify usage fits in the actual resource available */ + if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR)) + mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base; + + /* + * There may be few KB of platform dependent reserved memory at the end + * of vram which is not part of the DSM. Such reserved memory portion is + * always less then DSM granularity so align down the stolen_size to DSM + * granularity to accommodate such reserve vram portion. + */ + return ALIGN_DOWN(stolen_size, SZ_1M); +} + +static u32 get_wopcm_size(struct xe_device *xe) +{ + u32 wopcm_size; + u64 val; + + val = xe_mmio_read64_2x32(xe_root_mmio_gt(xe), STOLEN_RESERVED); + val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val); + + switch (val) { + case 0x5 ... 0x6: + val--; + fallthrough; + case 0x0 ... 0x3: + wopcm_size = (1U << val) * SZ_1M; + break; + default: + WARN(1, "Missing case wopcm_size=%llx\n", val); + wopcm_size = 0; + } + + return wopcm_size; +} + +static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct xe_gt *media_gt = xe_device_get_root_tile(xe)->media_gt; + u32 stolen_size, wopcm_size; + u32 ggc, gms; + + ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC); + + /* + * Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the + * GTT size + */ + if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK)) + return 0; + + /* + * Graphics >= 1270 uses the offset to the GSMBASE as address in the + * PTEs, together with the DM flag being set. Previously there was no + * such flag so the address was the io_base. + * + * DSMBASE = GSMBASE + 8MB + */ + mgr->stolen_base = SZ_8M; + mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base; + + /* return valid GMS value, -EIO if invalid */ + gms = REG_FIELD_GET(GMS_MASK, ggc); + switch (gms) { + case 0x0 ... 0x04: + stolen_size = gms * 32 * SZ_1M; + break; + case 0xf0 ... 0xfe: + stolen_size = (gms - 0xf0 + 1) * 4 * SZ_1M; + break; + default: + return 0; + } + + /* Carve out the top of DSM as it contains the reserved WOPCM region */ + wopcm_size = get_wopcm_size(xe); + if (drm_WARN_ON(&xe->drm, !wopcm_size)) + return 0; + + stolen_size -= wopcm_size; + + if (media_gt && XE_WA(media_gt, 14019821291)) { + u64 gscpsmi_base = xe_mmio_read64_2x32(media_gt, GSCPSMI_BASE) + & ~GENMASK_ULL(5, 0); + + /* + * This workaround is primarily implemented by the BIOS. We + * just need to figure out whether the BIOS has applied the + * workaround (meaning the programmed address falls within + * the DSM) and, if so, reserve that part of the DSM to + * prevent accidental reuse. The DSM location should be just + * below the WOPCM. + */ + if (gscpsmi_base >= mgr->io_base && + gscpsmi_base < mgr->io_base + stolen_size) { + xe_gt_dbg(media_gt, + "Reserving %llu bytes of DSM for Wa_14019821291\n", + mgr->io_base + stolen_size - gscpsmi_base); + stolen_size = gscpsmi_base - mgr->io_base; + } + } + + if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2))) + return 0; + + return stolen_size; +} + +extern struct resource intel_graphics_stolen_res; + +static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) +{ +#ifdef CONFIG_X86 + /* Map into GGTT */ + mgr->io_base = pci_resource_start(to_pci_dev(xe->drm.dev), 2); + + /* Stolen memory is x86 only */ + mgr->stolen_base = intel_graphics_stolen_res.start; + return resource_size(&intel_graphics_stolen_res); +#else + return 0; +#endif +} + +void xe_ttm_stolen_mgr_init(struct xe_device *xe) +{ + struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL); + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + u64 stolen_size, io_size, pgsize; + int err; + + if (IS_DGFX(xe)) + stolen_size = detect_bar2_dgfx(xe, mgr); + else if (GRAPHICS_VERx100(xe) >= 1270) + stolen_size = detect_bar2_integrated(xe, mgr); + else + stolen_size = detect_stolen(xe, mgr); + + if (!stolen_size) { + drm_dbg_kms(&xe->drm, "No stolen memory support\n"); + return; + } + + pgsize = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; + if (pgsize < PAGE_SIZE) + pgsize = PAGE_SIZE; + + /* + * We don't try to attempt partial visible support for stolen vram, + * since stolen is always at the end of vram, and the BAR size is pretty + * much always 256M, with small-bar. + */ + io_size = 0; + if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe)) + io_size = stolen_size; + + err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size, + io_size, pgsize); + if (err) { + drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err); + return; + } + + drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n", + stolen_size); + + if (io_size) + mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size); +} + +u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset) +{ + struct xe_device *xe = xe_bo_device(bo); + struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); + struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr); + struct xe_res_cursor cur; + + XE_WARN_ON(!mgr->io_base); + + if (xe_ttm_stolen_cpu_access_needs_ggtt(xe)) + return mgr->io_base + xe_bo_ggtt_addr(bo) + offset; + + xe_res_first(bo->ttm.resource, offset, 4096, &cur); + return mgr->io_base + cur.start; +} + +static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe, + struct xe_ttm_stolen_mgr *mgr, + struct ttm_resource *mem) +{ + struct xe_res_cursor cur; + + if (!mgr->io_base) + return -EIO; + + xe_res_first(mem, 0, 4096, &cur); + mem->bus.offset = cur.start; + + drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS)); + + if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping) + mem->bus.addr = (u8 *)mgr->mapping + mem->bus.offset; + + mem->bus.offset += mgr->io_base; + mem->bus.is_iomem = true; + mem->bus.caching = ttm_write_combined; + + return 0; +} + +static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe, + struct xe_ttm_stolen_mgr *mgr, + struct ttm_resource *mem) +{ +#ifdef CONFIG_X86 + struct xe_bo *bo = ttm_to_xe_bo(mem->bo); + + XE_WARN_ON(IS_DGFX(xe)); + + /* XXX: Require BO to be mapped to GGTT? */ + if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_CREATE_GGTT_BIT))) + return -EIO; + + /* GGTT is always contiguously mapped */ + mem->bus.offset = xe_bo_ggtt_addr(bo) + mgr->io_base; + + mem->bus.is_iomem = true; + mem->bus.caching = ttm_write_combined; + + return 0; +#else + /* How is it even possible to get here without gen12 stolen? */ + drm_WARN_ON(&xe->drm, 1); + return -EIO; +#endif +} + +int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem) +{ + struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); + struct xe_ttm_stolen_mgr *mgr = ttm_mgr ? to_stolen_mgr(ttm_mgr) : NULL; + + if (!mgr || !mgr->io_base) + return -EIO; + + if (xe_ttm_stolen_cpu_access_needs_ggtt(xe)) + return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem); + else + return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem); +} + +u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe) +{ + struct xe_ttm_stolen_mgr *mgr = + to_stolen_mgr(ttm_manager_type(&xe->ttm, XE_PL_STOLEN)); + + return mgr->stolen_base; +} diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h new file mode 100644 index 000000000000..1777245ff810 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_TTM_STOLEN_MGR_H_ +#define _XE_TTM_STOLEN_MGR_H_ + +#include <linux/types.h> + +struct ttm_resource; +struct xe_bo; +struct xe_device; + +void xe_ttm_stolen_mgr_init(struct xe_device *xe); +int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem); +bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe); +u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset); +u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c new file mode 100644 index 000000000000..3e1fa0c832ca --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021-2022 Intel Corporation + * Copyright (C) 2021-2002 Red Hat + */ + +#include "xe_ttm_sys_mgr.h" + +#include <drm/drm_managed.h> + +#include <drm/ttm/ttm_placement.h> +#include <drm/ttm/ttm_range_manager.h> +#include <drm/ttm/ttm_tt.h> + +#include "xe_bo.h" +#include "xe_gt.h" + +struct xe_ttm_sys_node { + struct ttm_buffer_object *tbo; + struct ttm_range_mgr_node base; +}; + +static inline struct xe_ttm_sys_node * +to_xe_ttm_sys_node(struct ttm_resource *res) +{ + return container_of(res, struct xe_ttm_sys_node, base.base); +} + +static int xe_ttm_sys_mgr_new(struct ttm_resource_manager *man, + struct ttm_buffer_object *tbo, + const struct ttm_place *place, + struct ttm_resource **res) +{ + struct xe_ttm_sys_node *node; + int r; + + node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL); + if (!node) + return -ENOMEM; + + node->tbo = tbo; + ttm_resource_init(tbo, place, &node->base.base); + + if (!(place->flags & TTM_PL_FLAG_TEMPORARY) && + ttm_resource_manager_usage(man) > (man->size << PAGE_SHIFT)) { + r = -ENOSPC; + goto err_fini; + } + + node->base.mm_nodes[0].start = 0; + node->base.mm_nodes[0].size = PFN_UP(node->base.base.size); + node->base.base.start = XE_BO_INVALID_OFFSET; + + *res = &node->base.base; + + return 0; + +err_fini: + ttm_resource_fini(man, &node->base.base); + kfree(node); + return r; +} + +static void xe_ttm_sys_mgr_del(struct ttm_resource_manager *man, + struct ttm_resource *res) +{ + struct xe_ttm_sys_node *node = to_xe_ttm_sys_node(res); + + ttm_resource_fini(man, res); + kfree(node); +} + +static void xe_ttm_sys_mgr_debug(struct ttm_resource_manager *man, + struct drm_printer *printer) +{ + +} + +static const struct ttm_resource_manager_func xe_ttm_sys_mgr_func = { + .alloc = xe_ttm_sys_mgr_new, + .free = xe_ttm_sys_mgr_del, + .debug = xe_ttm_sys_mgr_debug +}; + +static void ttm_sys_mgr_fini(struct drm_device *drm, void *arg) +{ + struct xe_device *xe = (struct xe_device *)arg; + struct ttm_resource_manager *man = &xe->mem.sys_mgr; + int err; + + ttm_resource_manager_set_used(man, false); + + err = ttm_resource_manager_evict_all(&xe->ttm, man); + if (err) + return; + + ttm_resource_manager_cleanup(man); + ttm_set_driver_manager(&xe->ttm, XE_PL_TT, NULL); +} + +int xe_ttm_sys_mgr_init(struct xe_device *xe) +{ + struct ttm_resource_manager *man = &xe->mem.sys_mgr; + struct sysinfo si; + u64 gtt_size; + + si_meminfo(&si); + gtt_size = (u64)si.totalram * si.mem_unit; + /* TTM limits allocation of all TTM devices by 50% of system memory */ + gtt_size /= 2; + + man->use_tt = true; + man->func = &xe_ttm_sys_mgr_func; + ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT); + ttm_set_driver_manager(&xe->ttm, XE_PL_TT, man); + ttm_resource_manager_set_used(man, true); + return drmm_add_action_or_reset(&xe->drm, ttm_sys_mgr_fini, xe); +} diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.h b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.h new file mode 100644 index 000000000000..e8f5cd395b28 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_TTM_SYS_MGR_H_ +#define _XE_TTM_SYS_MGR_H_ + +struct xe_device; + +int xe_ttm_sys_mgr_init(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c new file mode 100644 index 000000000000..115ec745e502 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -0,0 +1,480 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021-2022 Intel Corporation + * Copyright (C) 2021-2002 Red Hat + */ + +#include <drm/drm_managed.h> + +#include <drm/ttm/ttm_placement.h> +#include <drm/ttm/ttm_range_manager.h> + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_res_cursor.h" +#include "xe_ttm_vram_mgr.h" + +static inline struct drm_buddy_block * +xe_ttm_vram_mgr_first_block(struct list_head *list) +{ + return list_first_entry_or_null(list, struct drm_buddy_block, link); +} + +static inline bool xe_is_vram_mgr_blocks_contiguous(struct drm_buddy *mm, + struct list_head *head) +{ + struct drm_buddy_block *block; + u64 start, size; + + block = xe_ttm_vram_mgr_first_block(head); + if (!block) + return false; + + while (head != block->link.next) { + start = drm_buddy_block_offset(block); + size = drm_buddy_block_size(mm, block); + + block = list_entry(block->link.next, struct drm_buddy_block, + link); + if (start + size != drm_buddy_block_offset(block)) + return false; + } + + return true; +} + +static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, + struct ttm_buffer_object *tbo, + const struct ttm_place *place, + struct ttm_resource **res) +{ + struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); + struct xe_ttm_vram_mgr_resource *vres; + struct drm_buddy *mm = &mgr->mm; + u64 size, remaining_size, min_page_size; + unsigned long lpfn; + int err; + + lpfn = place->lpfn; + if (!lpfn || lpfn > man->size >> PAGE_SHIFT) + lpfn = man->size >> PAGE_SHIFT; + + if (tbo->base.size >> PAGE_SHIFT > (lpfn - place->fpfn)) + return -E2BIG; /* don't trigger eviction for the impossible */ + + vres = kzalloc(sizeof(*vres), GFP_KERNEL); + if (!vres) + return -ENOMEM; + + ttm_resource_init(tbo, place, &vres->base); + + /* bail out quickly if there's likely not enough VRAM for this BO */ + if (ttm_resource_manager_usage(man) > man->size) { + err = -ENOSPC; + goto error_fini; + } + + INIT_LIST_HEAD(&vres->blocks); + + if (place->flags & TTM_PL_FLAG_TOPDOWN) + vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; + + if (place->fpfn || lpfn != man->size >> PAGE_SHIFT) + vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; + + if (WARN_ON(!vres->base.size)) { + err = -EINVAL; + goto error_fini; + } + size = vres->base.size; + + min_page_size = mgr->default_page_size; + if (tbo->page_alignment) + min_page_size = tbo->page_alignment << PAGE_SHIFT; + + if (WARN_ON(min_page_size < mm->chunk_size)) { + err = -EINVAL; + goto error_fini; + } + + if (WARN_ON(min_page_size > SZ_2G)) { /* FIXME: sg limit */ + err = -EINVAL; + goto error_fini; + } + + if (WARN_ON((size > SZ_2G && + (vres->base.placement & TTM_PL_FLAG_CONTIGUOUS)))) { + err = -EINVAL; + goto error_fini; + } + + if (WARN_ON(!IS_ALIGNED(size, min_page_size))) { + err = -EINVAL; + goto error_fini; + } + + mutex_lock(&mgr->lock); + if (lpfn <= mgr->visible_size >> PAGE_SHIFT && size > mgr->visible_avail) { + mutex_unlock(&mgr->lock); + err = -ENOSPC; + goto error_fini; + } + + if (place->fpfn + (size >> PAGE_SHIFT) != place->lpfn && + place->flags & TTM_PL_FLAG_CONTIGUOUS) { + size = roundup_pow_of_two(size); + min_page_size = size; + + lpfn = max_t(unsigned long, place->fpfn + (size >> PAGE_SHIFT), lpfn); + } + + remaining_size = size; + do { + /* + * Limit maximum size to 2GiB due to SG table limitations. + * FIXME: Should maybe be handled as part of sg construction. + */ + u64 alloc_size = min_t(u64, remaining_size, SZ_2G); + + err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT, + (u64)lpfn << PAGE_SHIFT, + alloc_size, + min_page_size, + &vres->blocks, + vres->flags); + if (err) + goto error_free_blocks; + + remaining_size -= alloc_size; + } while (remaining_size); + + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { + if (!drm_buddy_block_trim(mm, vres->base.size, &vres->blocks)) + size = vres->base.size; + } + + if (lpfn <= mgr->visible_size >> PAGE_SHIFT) { + vres->used_visible_size = size; + } else { + struct drm_buddy_block *block; + + list_for_each_entry(block, &vres->blocks, link) { + u64 start = drm_buddy_block_offset(block); + + if (start < mgr->visible_size) { + u64 end = start + drm_buddy_block_size(mm, block); + + vres->used_visible_size += + min(end, mgr->visible_size) - start; + } + } + } + + mgr->visible_avail -= vres->used_visible_size; + mutex_unlock(&mgr->lock); + + if (!(vres->base.placement & TTM_PL_FLAG_CONTIGUOUS) && + xe_is_vram_mgr_blocks_contiguous(mm, &vres->blocks)) + vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS; + + /* + * For some kernel objects we still rely on the start when io mapping + * the object. + */ + if (vres->base.placement & TTM_PL_FLAG_CONTIGUOUS) { + struct drm_buddy_block *block = list_first_entry(&vres->blocks, + typeof(*block), + link); + + vres->base.start = drm_buddy_block_offset(block) >> PAGE_SHIFT; + } else { + vres->base.start = XE_BO_INVALID_OFFSET; + } + + *res = &vres->base; + return 0; + +error_free_blocks: + drm_buddy_free_list(mm, &vres->blocks); + mutex_unlock(&mgr->lock); +error_fini: + ttm_resource_fini(man, &vres->base); + kfree(vres); + + return err; +} + +static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man, + struct ttm_resource *res) +{ + struct xe_ttm_vram_mgr_resource *vres = + to_xe_ttm_vram_mgr_resource(res); + struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); + struct drm_buddy *mm = &mgr->mm; + + mutex_lock(&mgr->lock); + drm_buddy_free_list(mm, &vres->blocks); + mgr->visible_avail += vres->used_visible_size; + mutex_unlock(&mgr->lock); + + ttm_resource_fini(man, res); + + kfree(vres); +} + +static void xe_ttm_vram_mgr_debug(struct ttm_resource_manager *man, + struct drm_printer *printer) +{ + struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); + struct drm_buddy *mm = &mgr->mm; + + mutex_lock(&mgr->lock); + drm_printf(printer, "default_page_size: %lluKiB\n", + mgr->default_page_size >> 10); + drm_printf(printer, "visible_avail: %lluMiB\n", + (u64)mgr->visible_avail >> 20); + drm_printf(printer, "visible_size: %lluMiB\n", + (u64)mgr->visible_size >> 20); + + drm_buddy_print(mm, printer); + mutex_unlock(&mgr->lock); + drm_printf(printer, "man size:%llu\n", man->size); +} + +static bool xe_ttm_vram_mgr_intersects(struct ttm_resource_manager *man, + struct ttm_resource *res, + const struct ttm_place *place, + size_t size) +{ + struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); + struct xe_ttm_vram_mgr_resource *vres = + to_xe_ttm_vram_mgr_resource(res); + struct drm_buddy *mm = &mgr->mm; + struct drm_buddy_block *block; + + if (!place->fpfn && !place->lpfn) + return true; + + if (!place->fpfn && place->lpfn == mgr->visible_size >> PAGE_SHIFT) + return vres->used_visible_size > 0; + + list_for_each_entry(block, &vres->blocks, link) { + unsigned long fpfn = + drm_buddy_block_offset(block) >> PAGE_SHIFT; + unsigned long lpfn = fpfn + + (drm_buddy_block_size(mm, block) >> PAGE_SHIFT); + + if (place->fpfn < lpfn && place->lpfn > fpfn) + return true; + } + + return false; +} + +static bool xe_ttm_vram_mgr_compatible(struct ttm_resource_manager *man, + struct ttm_resource *res, + const struct ttm_place *place, + size_t size) +{ + struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); + struct xe_ttm_vram_mgr_resource *vres = + to_xe_ttm_vram_mgr_resource(res); + struct drm_buddy *mm = &mgr->mm; + struct drm_buddy_block *block; + + if (!place->fpfn && !place->lpfn) + return true; + + if (!place->fpfn && place->lpfn == mgr->visible_size >> PAGE_SHIFT) + return vres->used_visible_size == size; + + list_for_each_entry(block, &vres->blocks, link) { + unsigned long fpfn = + drm_buddy_block_offset(block) >> PAGE_SHIFT; + unsigned long lpfn = fpfn + + (drm_buddy_block_size(mm, block) >> PAGE_SHIFT); + + if (fpfn < place->fpfn || lpfn > place->lpfn) + return false; + } + + return true; +} + +static const struct ttm_resource_manager_func xe_ttm_vram_mgr_func = { + .alloc = xe_ttm_vram_mgr_new, + .free = xe_ttm_vram_mgr_del, + .intersects = xe_ttm_vram_mgr_intersects, + .compatible = xe_ttm_vram_mgr_compatible, + .debug = xe_ttm_vram_mgr_debug +}; + +static void ttm_vram_mgr_fini(struct drm_device *dev, void *arg) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_ttm_vram_mgr *mgr = arg; + struct ttm_resource_manager *man = &mgr->manager; + + ttm_resource_manager_set_used(man, false); + + if (ttm_resource_manager_evict_all(&xe->ttm, man)) + return; + + WARN_ON_ONCE(mgr->visible_avail != mgr->visible_size); + + drm_buddy_fini(&mgr->mm); + + ttm_resource_manager_cleanup(&mgr->manager); + + ttm_set_driver_manager(&xe->ttm, mgr->mem_type, NULL); + + mutex_destroy(&mgr->lock); +} + +int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, + u32 mem_type, u64 size, u64 io_size, + u64 default_page_size) +{ + struct ttm_resource_manager *man = &mgr->manager; + int err; + + man->func = &xe_ttm_vram_mgr_func; + mgr->mem_type = mem_type; + mutex_init(&mgr->lock); + mgr->default_page_size = default_page_size; + mgr->visible_size = io_size; + mgr->visible_avail = io_size; + + ttm_resource_manager_init(man, &xe->ttm, size); + err = drm_buddy_init(&mgr->mm, man->size, default_page_size); + if (err) + return err; + + ttm_set_driver_manager(&xe->ttm, mem_type, &mgr->manager); + ttm_resource_manager_set_used(&mgr->manager, true); + + return drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr); +} + +int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_mem_region *vram = &tile->mem.vram; + + mgr->vram = vram; + return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + tile->id, + vram->usable_size, vram->io_size, + PAGE_SIZE); +} + +int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, + struct ttm_resource *res, + u64 offset, u64 length, + struct device *dev, + enum dma_data_direction dir, + struct sg_table **sgt) +{ + struct xe_tile *tile = &xe->tiles[res->mem_type - XE_PL_VRAM0]; + struct xe_ttm_vram_mgr_resource *vres = to_xe_ttm_vram_mgr_resource(res); + struct xe_res_cursor cursor; + struct scatterlist *sg; + int num_entries = 0; + int i, r; + + if (vres->used_visible_size < res->size) + return -EOPNOTSUPP; + + *sgt = kmalloc(sizeof(**sgt), GFP_KERNEL); + if (!*sgt) + return -ENOMEM; + + /* Determine the number of DRM_BUDDY blocks to export */ + xe_res_first(res, offset, length, &cursor); + while (cursor.remaining) { + num_entries++; + xe_res_next(&cursor, cursor.size); + } + + r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL); + if (r) + goto error_free; + + /* Initialize scatterlist nodes of sg_table */ + for_each_sgtable_sg((*sgt), sg, i) + sg->length = 0; + + /* + * Walk down DRM_BUDDY blocks to populate scatterlist nodes + * @note: Use iterator api to get first the DRM_BUDDY block + * and the number of bytes from it. Access the following + * DRM_BUDDY block(s) if more buffer needs to exported + */ + xe_res_first(res, offset, length, &cursor); + for_each_sgtable_sg((*sgt), sg, i) { + phys_addr_t phys = cursor.start + tile->mem.vram.io_start; + size_t size = cursor.size; + dma_addr_t addr; + + addr = dma_map_resource(dev, phys, size, dir, + DMA_ATTR_SKIP_CPU_SYNC); + r = dma_mapping_error(dev, addr); + if (r) + goto error_unmap; + + sg_set_page(sg, NULL, size, 0); + sg_dma_address(sg) = addr; + sg_dma_len(sg) = size; + + xe_res_next(&cursor, cursor.size); + } + + return 0; + +error_unmap: + for_each_sgtable_sg((*sgt), sg, i) { + if (!sg->length) + continue; + + dma_unmap_resource(dev, sg->dma_address, + sg->length, dir, + DMA_ATTR_SKIP_CPU_SYNC); + } + sg_free_table(*sgt); + +error_free: + kfree(*sgt); + return r; +} + +void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir, + struct sg_table *sgt) +{ + struct scatterlist *sg; + int i; + + for_each_sgtable_sg(sgt, sg, i) + dma_unmap_resource(dev, sg->dma_address, + sg->length, dir, + DMA_ATTR_SKIP_CPU_SYNC); + sg_free_table(sgt); + kfree(sgt); +} + +u64 xe_ttm_vram_get_cpu_visible_size(struct ttm_resource_manager *man) +{ + struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); + + return mgr->visible_size; +} + +void xe_ttm_vram_get_used(struct ttm_resource_manager *man, + u64 *used, u64 *used_visible) +{ + struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); + + mutex_lock(&mgr->lock); + *used = mgr->mm.size - mgr->mm.avail; + *used_visible = mgr->visible_size - mgr->visible_avail; + mutex_unlock(&mgr->lock); +} diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h new file mode 100644 index 000000000000..d184e19a9230 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_TTM_VRAM_MGR_H_ +#define _XE_TTM_VRAM_MGR_H_ + +#include "xe_ttm_vram_mgr_types.h" + +enum dma_data_direction; +struct xe_device; +struct xe_tile; + +int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, + u32 mem_type, u64 size, u64 io_size, + u64 default_page_size); +int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr); +int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, + struct ttm_resource *res, + u64 offset, u64 length, + struct device *dev, + enum dma_data_direction dir, + struct sg_table **sgt); +void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir, + struct sg_table *sgt); + +u64 xe_ttm_vram_get_cpu_visible_size(struct ttm_resource_manager *man); +void xe_ttm_vram_get_used(struct ttm_resource_manager *man, + u64 *used, u64 *used_visible); + +static inline struct xe_ttm_vram_mgr_resource * +to_xe_ttm_vram_mgr_resource(struct ttm_resource *res) +{ + return container_of(res, struct xe_ttm_vram_mgr_resource, base); +} + +static inline struct xe_ttm_vram_mgr * +to_xe_ttm_vram_mgr(struct ttm_resource_manager *man) +{ + return container_of(man, struct xe_ttm_vram_mgr, manager); +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h new file mode 100644 index 000000000000..2d75cf126289 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_TTM_VRAM_MGR_TYPES_H_ +#define _XE_TTM_VRAM_MGR_TYPES_H_ + +#include <drm/drm_buddy.h> +#include <drm/ttm/ttm_device.h> + +struct xe_mem_region; + +/** + * struct xe_ttm_vram_mgr - XE TTM VRAM manager + * + * Manages placement of TTM resource in VRAM. + */ +struct xe_ttm_vram_mgr { + /** @manager: Base TTM resource manager */ + struct ttm_resource_manager manager; + /** @mm: DRM buddy allocator which manages the VRAM */ + struct drm_buddy mm; + /** @vram: ptr to details of associated VRAM region */ + struct xe_mem_region *vram; + /** @visible_size: Proped size of the CPU visible portion */ + u64 visible_size; + /** @visible_avail: CPU visible portion still unallocated */ + u64 visible_avail; + /** @default_page_size: default page size */ + u64 default_page_size; + /** @lock: protects allocations of VRAM */ + struct mutex lock; + /** @mem_type: The TTM memory type */ + u32 mem_type; +}; + +/** + * struct xe_ttm_vram_mgr_resource - XE TTM VRAM resource + */ +struct xe_ttm_vram_mgr_resource { + /** @base: Base TTM resource */ + struct ttm_resource base; + /** @blocks: list of DRM buddy blocks */ + struct list_head blocks; + /** @used_visible_size: How many CPU visible bytes this resource is using */ + u64 used_visible_size; + /** @flags: flags associated with the resource */ + unsigned long flags; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c new file mode 100644 index 000000000000..53ccd338fd8c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_tuning.h" + +#include <kunit/visibility.h> + +#include "regs/xe_gt_regs.h" +#include "xe_gt_types.h" +#include "xe_platform_types.h" +#include "xe_rtp.h" + +#undef XE_REG_MCR +#define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) + +static const struct xe_rtp_entry_sr gt_tunings[] = { + { XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(SET(XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS)) + }, + { XE_RTP_NAME("Tuning: 32B Access Enable"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS)) + }, + + /* Xe2 */ + + { XE_RTP_NAME("Tuning: L3 cache"), + XE_RTP_RULES(GRAPHICS_VERSION(2004)), + XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, + REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) + }, + { XE_RTP_NAME("Tuning: L3 cache - media"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, + REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) + }, + + {} +}; + +static const struct xe_rtp_entry_sr engine_tunings[] = { + { XE_RTP_NAME("Tuning: Set Indirect State Override"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1271), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE)) + }, + {} +}; + +static const struct xe_rtp_entry_sr lrc_tunings[] = { + { XE_RTP_NAME("Tuning: ganged timer, also known as 16011163337"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + /* read verification is ignored due to 1608008084. */ + XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(FF_MODE2, + FF_MODE2_GS_TIMER_MASK, + FF_MODE2_GS_TIMER_224)) + }, + + /* DG2 */ + + { XE_RTP_NAME("Tuning: L3 cache"), + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, + REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) + }, + { XE_RTP_NAME("Tuning: TDS gang timer"), + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), + /* read verification is ignored as in i915 - need to check enabling */ + XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2, + FF_MODE2_TDS_TIMER_MASK, + FF_MODE2_TDS_TIMER_128)) + }, + { XE_RTP_NAME("Tuning: TBIMR fast clip"), + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP)) + }, + + /* Xe_LPG */ + + { XE_RTP_NAME("Tuning: L3 cache"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, + REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) + }, + + {} +}; + +void xe_tuning_process_gt(struct xe_gt *gt) +{ + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); + + xe_rtp_process_to_sr(&ctx, gt_tunings, >->reg_sr); +} +EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt); + +void xe_tuning_process_engine(struct xe_hw_engine *hwe) +{ + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); + + xe_rtp_process_to_sr(&ctx, engine_tunings, &hwe->reg_sr); +} +EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine); + +/** + * xe_tuning_process_lrc - process lrc tunings + * @hwe: engine instance to process tunings for + * + * Process LRC table for this platform, saving in @hwe all the tunings that need + * to be applied on context restore. These are tunings touching registers that + * are part of the HW context image. + */ +void xe_tuning_process_lrc(struct xe_hw_engine *hwe) +{ + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); + + xe_rtp_process_to_sr(&ctx, lrc_tunings, &hwe->reg_lrc); +} diff --git a/drivers/gpu/drm/xe/xe_tuning.h b/drivers/gpu/drm/xe/xe_tuning.h new file mode 100644 index 000000000000..4f9c3ac3b516 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tuning.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_TUNING_ +#define _XE_TUNING_ + +struct xe_gt; +struct xe_hw_engine; + +void xe_tuning_process_gt(struct xe_gt *gt); +void xe_tuning_process_engine(struct xe_hw_engine *hwe); +void xe_tuning_process_lrc(struct xe_hw_engine *hwe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c new file mode 100644 index 000000000000..25e1ddfd2f86 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_uc.h" + +#include "xe_device.h" +#include "xe_gsc.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_guc_pc.h" +#include "xe_guc_submit.h" +#include "xe_huc.h" +#include "xe_uc_fw.h" +#include "xe_wopcm.h" + +static struct xe_gt * +uc_to_gt(struct xe_uc *uc) +{ + return container_of(uc, struct xe_gt, uc); +} + +static struct xe_device * +uc_to_xe(struct xe_uc *uc) +{ + return gt_to_xe(uc_to_gt(uc)); +} + +/* Should be called once at driver load only */ +int xe_uc_init(struct xe_uc *uc) +{ + int ret; + + /* + * We call the GuC/HuC/GSC init functions even if GuC submission is off + * to correctly move our tracking of the FW state to "disabled". + */ + + ret = xe_guc_init(&uc->guc); + if (ret) + goto err; + + ret = xe_huc_init(&uc->huc); + if (ret) + goto err; + + ret = xe_gsc_init(&uc->gsc); + if (ret) + goto err; + + if (!xe_device_uc_enabled(uc_to_xe(uc))) + return 0; + + ret = xe_wopcm_init(&uc->wopcm); + if (ret) + goto err; + + ret = xe_guc_submit_init(&uc->guc); + if (ret) + goto err; + + return 0; + +err: + return ret; +} + +/** + * xe_uc_init_post_hwconfig - init Uc post hwconfig load + * @uc: The UC object + * + * Return: 0 on success, negative error code on error. + */ +int xe_uc_init_post_hwconfig(struct xe_uc *uc) +{ + int err; + + /* GuC submission not enabled, nothing to do */ + if (!xe_device_uc_enabled(uc_to_xe(uc))) + return 0; + + err = xe_uc_sanitize_reset(uc); + if (err) + return err; + + err = xe_guc_init_post_hwconfig(&uc->guc); + if (err) + return err; + + return xe_gsc_init_post_hwconfig(&uc->gsc); +} + +static int uc_reset(struct xe_uc *uc) +{ + struct xe_device *xe = uc_to_xe(uc); + int ret; + + ret = xe_guc_reset(&uc->guc); + if (ret) { + drm_err(&xe->drm, "Failed to reset GuC, ret = %d\n", ret); + return ret; + } + + return 0; +} + +static void xe_uc_sanitize(struct xe_uc *uc) +{ + xe_huc_sanitize(&uc->huc); + xe_guc_sanitize(&uc->guc); +} + +int xe_uc_sanitize_reset(struct xe_uc *uc) +{ + xe_uc_sanitize(uc); + + return uc_reset(uc); +} + +/** + * xe_uc_init_hwconfig - minimally init Uc, read and parse hwconfig + * @uc: The UC object + * + * Return: 0 on success, negative error code on error. + */ +int xe_uc_init_hwconfig(struct xe_uc *uc) +{ + int ret; + + /* GuC submission not enabled, nothing to do */ + if (!xe_device_uc_enabled(uc_to_xe(uc))) + return 0; + + ret = xe_guc_min_load_for_hwconfig(&uc->guc); + if (ret) + return ret; + + return 0; +} + +/* + * Should be called during driver load, after every GT reset, and after every + * suspend to reload / auth the firmwares. + */ +int xe_uc_init_hw(struct xe_uc *uc) +{ + int ret; + + /* GuC submission not enabled, nothing to do */ + if (!xe_device_uc_enabled(uc_to_xe(uc))) + return 0; + + ret = xe_huc_upload(&uc->huc); + if (ret) + return ret; + + ret = xe_guc_upload(&uc->guc); + if (ret) + return ret; + + ret = xe_guc_enable_communication(&uc->guc); + if (ret) + return ret; + + ret = xe_gt_record_default_lrcs(uc_to_gt(uc)); + if (ret) + return ret; + + ret = xe_guc_post_load_init(&uc->guc); + if (ret) + return ret; + + ret = xe_guc_pc_start(&uc->guc.pc); + if (ret) + return ret; + + /* We don't fail the driver load if HuC fails to auth, but let's warn */ + ret = xe_huc_auth(&uc->huc, XE_HUC_AUTH_VIA_GUC); + xe_gt_assert(uc_to_gt(uc), !ret); + + /* GSC load is async */ + xe_gsc_load_start(&uc->gsc); + + return 0; +} + +int xe_uc_fini_hw(struct xe_uc *uc) +{ + return xe_uc_sanitize_reset(uc); +} + +int xe_uc_reset_prepare(struct xe_uc *uc) +{ + /* GuC submission not enabled, nothing to do */ + if (!xe_device_uc_enabled(uc_to_xe(uc))) + return 0; + + return xe_guc_reset_prepare(&uc->guc); +} + +void xe_uc_gucrc_disable(struct xe_uc *uc) +{ + XE_WARN_ON(xe_guc_pc_gucrc_disable(&uc->guc.pc)); +} + +void xe_uc_stop_prepare(struct xe_uc *uc) +{ + xe_gsc_wait_for_worker_completion(&uc->gsc); + xe_guc_stop_prepare(&uc->guc); +} + +int xe_uc_stop(struct xe_uc *uc) +{ + /* GuC submission not enabled, nothing to do */ + if (!xe_device_uc_enabled(uc_to_xe(uc))) + return 0; + + return xe_guc_stop(&uc->guc); +} + +int xe_uc_start(struct xe_uc *uc) +{ + /* GuC submission not enabled, nothing to do */ + if (!xe_device_uc_enabled(uc_to_xe(uc))) + return 0; + + return xe_guc_start(&uc->guc); +} + +static void uc_reset_wait(struct xe_uc *uc) +{ + int ret; + +again: + xe_guc_reset_wait(&uc->guc); + + ret = xe_uc_reset_prepare(uc); + if (ret) + goto again; +} + +int xe_uc_suspend(struct xe_uc *uc) +{ + int ret; + + /* GuC submission not enabled, nothing to do */ + if (!xe_device_uc_enabled(uc_to_xe(uc))) + return 0; + + uc_reset_wait(uc); + + ret = xe_uc_stop(uc); + if (ret) + return ret; + + return xe_guc_suspend(&uc->guc); +} diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h new file mode 100644 index 000000000000..5d5110c0c834 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_UC_H_ +#define _XE_UC_H_ + +#include "xe_uc_types.h" + +int xe_uc_init(struct xe_uc *uc); +int xe_uc_init_hwconfig(struct xe_uc *uc); +int xe_uc_init_post_hwconfig(struct xe_uc *uc); +int xe_uc_init_hw(struct xe_uc *uc); +int xe_uc_fini_hw(struct xe_uc *uc); +void xe_uc_gucrc_disable(struct xe_uc *uc); +int xe_uc_reset_prepare(struct xe_uc *uc); +void xe_uc_stop_prepare(struct xe_uc *uc); +int xe_uc_stop(struct xe_uc *uc); +int xe_uc_start(struct xe_uc *uc); +int xe_uc_suspend(struct xe_uc *uc); +int xe_uc_sanitize_reset(struct xe_uc *uc); + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc_debugfs.c b/drivers/gpu/drm/xe/xe_uc_debugfs.c new file mode 100644 index 000000000000..0a39ec5a6e99 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_debugfs.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include <drm/drm_debugfs.h> + +#include "xe_gt.h" +#include "xe_guc_debugfs.h" +#include "xe_huc_debugfs.h" +#include "xe_macros.h" +#include "xe_uc_debugfs.h" + +void xe_uc_debugfs_register(struct xe_uc *uc, struct dentry *parent) +{ + struct dentry *root; + + root = debugfs_create_dir("uc", parent); + if (IS_ERR(root)) { + XE_WARN_ON("Create UC directory failed"); + return; + } + + xe_guc_debugfs_register(&uc->guc, root); + xe_huc_debugfs_register(&uc->huc, root); +} diff --git a/drivers/gpu/drm/xe/xe_uc_debugfs.h b/drivers/gpu/drm/xe/xe_uc_debugfs.h new file mode 100644 index 000000000000..a13382df2bd7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_UC_DEBUGFS_H_ +#define _XE_UC_DEBUGFS_H_ + +struct dentry; +struct xe_uc; + +void xe_uc_debugfs_register(struct xe_uc *uc, struct dentry *parent); + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c new file mode 100644 index 000000000000..9dff96dfe455 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -0,0 +1,882 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include <linux/bitfield.h> +#include <linux/firmware.h> + +#include <drm/drm_managed.h> + +#include "regs/xe_guc_regs.h" +#include "xe_bo.h" +#include "xe_device_types.h" +#include "xe_force_wake.h" +#include "xe_gsc.h" +#include "xe_gt.h" +#include "xe_map.h" +#include "xe_mmio.h" +#include "xe_module.h" +#include "xe_uc_fw.h" + +/* + * List of required GuC and HuC binaries per-platform. They must be ordered + * based on platform, from newer to older. + * + * Versioning follows the guidelines from + * Documentation/driver-api/firmware/firmware-usage-guidelines.rst. There is a + * distinction for platforms being officially supported by the driver or not. + * Platforms not available publicly or not yet officially supported by the + * driver (under force-probe), use the mmp_ver(): the firmware autoselect logic + * will select the firmware from disk with filename that matches the full + * "mpp version", i.e. major.minor.patch. mmp_ver() should only be used for + * this case. + * + * For platforms officially supported by the driver, the filename always only + * ever contains the major version (GuC) or no version at all (HuC). + * + * After loading the file, the driver parses the versions embedded in the blob. + * The major version needs to match a major version supported by the driver (if + * any). The minor version is also checked and a notice emitted to the log if + * the version found is smaller than the version wanted. This is done only for + * informational purposes so users may have a chance to upgrade, but the driver + * still loads and use the older firmware. + * + * Examples: + * + * 1) Platform officially supported by i915 - using Tigerlake as example. + * Driver loads the following firmware blobs from disk: + * + * - i915/tgl_guc_<major>.bin + * - i915/tgl_huc.bin + * + * <major> number for GuC is checked that it matches the version inside + * the blob. <minor> version is checked and if smaller than the expected + * an info message is emitted about that. + * + * 1) XE_<FUTUREINTELPLATFORM>, still under require_force_probe. Using + * "wipplat" as a short-name. Driver loads the following firmware blobs + * from disk: + * + * - xe/wipplat_guc_<major>.<minor>.<patch>.bin + * - xe/wipplat_huc_<major>.<minor>.<patch>.bin + * + * <major> and <minor> are checked that they match the version inside + * the blob. Both of them need to match exactly what the driver is + * expecting, otherwise it fails. + * + * 3) Platform officially supported by xe and out of force-probe. Using + * "plat" as a short-name. Except for the different directory, the + * behavior is the same as (1). Driver loads the following firmware + * blobs from disk: + * + * - xe/plat_guc_<major>.bin + * - xe/plat_huc.bin + * + * <major> number for GuC is checked that it matches the version inside + * the blob. <minor> version is checked and if smaller than the expected + * an info message is emitted about that. + * + * For the platforms already released with a major version, they should never be + * removed from the table. Instead new entries with newer versions may be added + * before them, so they take precedence. + * + * TODO: Currently there's no fallback on major version. That's because xe + * driver only supports the one major version of each firmware in the table. + * This needs to be fixed when the major version of GuC is updated. + */ + +struct uc_fw_entry { + enum xe_platform platform; + struct { + const char *path; + u16 major; + u16 minor; + bool full_ver_required; + }; +}; + +struct fw_blobs_by_type { + const struct uc_fw_entry *entries; + u32 count; +}; + +#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ + fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 7)) \ + fw_def(DG2, major_ver(i915, guc, dg2, 70, 5)) \ + fw_def(DG1, major_ver(i915, guc, dg1, 70, 5)) \ + fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 5)) \ + fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 5)) \ + fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 5)) \ + fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 5)) \ + fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 5)) + +#define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ + fw_def(METEORLAKE, no_ver(i915, huc_gsc, mtl)) \ + fw_def(DG1, no_ver(i915, huc, dg1)) \ + fw_def(ALDERLAKE_P, no_ver(i915, huc, tgl)) \ + fw_def(ALDERLAKE_S, no_ver(i915, huc, tgl)) \ + fw_def(ROCKETLAKE, no_ver(i915, huc, tgl)) \ + fw_def(TIGERLAKE, no_ver(i915, huc, tgl)) + +/* for the GSC FW we match the compatibility version and not the release one */ +#define XE_GSC_FIRMWARE_DEFS(fw_def, major_ver) \ + fw_def(METEORLAKE, major_ver(i915, gsc, mtl, 1, 0)) + +#define MAKE_FW_PATH(dir__, uc__, shortname__, version__) \ + __stringify(dir__) "/" __stringify(shortname__) "_" __stringify(uc__) version__ ".bin" + +#define fw_filename_mmp_ver(dir_, uc_, shortname_, a, b, c) \ + MAKE_FW_PATH(dir_, uc_, shortname_, "_" __stringify(a ## . ## b ## . ## c)) +#define fw_filename_major_ver(dir_, uc_, shortname_, a, b) \ + MAKE_FW_PATH(dir_, uc_, shortname_, "_" __stringify(a)) +#define fw_filename_no_ver(dir_, uc_, shortname_) \ + MAKE_FW_PATH(dir_, uc_, shortname_, "") + +#define uc_fw_entry_mmp_ver(dir_, uc_, shortname_, a, b, c) \ + { fw_filename_mmp_ver(dir_, uc_, shortname_, a, b, c), \ + a, b, true } +#define uc_fw_entry_major_ver(dir_, uc_, shortname_, a, b) \ + { fw_filename_major_ver(dir_, uc_, shortname_, a, b), \ + a, b } +#define uc_fw_entry_no_ver(dir_, uc_, shortname_) \ + { fw_filename_no_ver(dir_, uc_, shortname_), \ + 0, 0 } + +/* All blobs need to be declared via MODULE_FIRMWARE() */ +#define XE_UC_MODULE_FIRMWARE(platform__, fw_filename) \ + MODULE_FIRMWARE(fw_filename); + +#define XE_UC_FW_ENTRY(platform__, entry__) \ + { \ + .platform = XE_ ## platform__, \ + entry__, \ + }, + +XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE, + fw_filename_mmp_ver, fw_filename_major_ver) +XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE, + fw_filename_mmp_ver, fw_filename_no_ver) +XE_GSC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE, fw_filename_major_ver) + +static struct xe_gt * +__uc_fw_to_gt(struct xe_uc_fw *uc_fw, enum xe_uc_fw_type type) +{ + XE_WARN_ON(type >= XE_UC_FW_NUM_TYPES); + + switch (type) { + case XE_UC_FW_TYPE_GUC: + return container_of(uc_fw, struct xe_gt, uc.guc.fw); + case XE_UC_FW_TYPE_HUC: + return container_of(uc_fw, struct xe_gt, uc.huc.fw); + case XE_UC_FW_TYPE_GSC: + return container_of(uc_fw, struct xe_gt, uc.gsc.fw); + default: + return NULL; + } +} + +static struct xe_gt *uc_fw_to_gt(struct xe_uc_fw *uc_fw) +{ + return __uc_fw_to_gt(uc_fw, uc_fw->type); +} + +static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw) +{ + return gt_to_xe(uc_fw_to_gt(uc_fw)); +} + +static void +uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw) +{ + static const struct uc_fw_entry entries_guc[] = { + XE_GUC_FIRMWARE_DEFS(XE_UC_FW_ENTRY, + uc_fw_entry_mmp_ver, + uc_fw_entry_major_ver) + }; + static const struct uc_fw_entry entries_huc[] = { + XE_HUC_FIRMWARE_DEFS(XE_UC_FW_ENTRY, + uc_fw_entry_mmp_ver, + uc_fw_entry_no_ver) + }; + static const struct uc_fw_entry entries_gsc[] = { + XE_GSC_FIRMWARE_DEFS(XE_UC_FW_ENTRY, uc_fw_entry_major_ver) + }; + static const struct fw_blobs_by_type blobs_all[XE_UC_FW_NUM_TYPES] = { + [XE_UC_FW_TYPE_GUC] = { entries_guc, ARRAY_SIZE(entries_guc) }, + [XE_UC_FW_TYPE_HUC] = { entries_huc, ARRAY_SIZE(entries_huc) }, + [XE_UC_FW_TYPE_GSC] = { entries_gsc, ARRAY_SIZE(entries_gsc) }, + }; + static const struct uc_fw_entry *entries; + enum xe_platform p = xe->info.platform; + u32 count; + int i; + + xe_assert(xe, uc_fw->type < ARRAY_SIZE(blobs_all)); + entries = blobs_all[uc_fw->type].entries; + count = blobs_all[uc_fw->type].count; + + for (i = 0; i < count && p <= entries[i].platform; i++) { + if (p == entries[i].platform) { + uc_fw->path = entries[i].path; + uc_fw->versions.wanted.major = entries[i].major; + uc_fw->versions.wanted.minor = entries[i].minor; + uc_fw->full_ver_required = entries[i].full_ver_required; + + if (uc_fw->type == XE_UC_FW_TYPE_GSC) + uc_fw->versions.wanted_type = XE_UC_FW_VER_COMPATIBILITY; + else + uc_fw->versions.wanted_type = XE_UC_FW_VER_RELEASE; + + break; + } + } +} + +static void +uc_fw_override(struct xe_uc_fw *uc_fw) +{ + char *path_override = NULL; + + /* empty string disables, but it's not allowed for GuC */ + switch (uc_fw->type) { + case XE_UC_FW_TYPE_GUC: + if (xe_modparam.guc_firmware_path && *xe_modparam.guc_firmware_path) + path_override = xe_modparam.guc_firmware_path; + break; + case XE_UC_FW_TYPE_HUC: + path_override = xe_modparam.huc_firmware_path; + break; + case XE_UC_FW_TYPE_GSC: + path_override = xe_modparam.gsc_firmware_path; + break; + default: + break; + } + + if (path_override) { + uc_fw->path = path_override; + uc_fw->user_overridden = true; + } +} + +/** + * xe_uc_fw_copy_rsa - copy fw RSA to buffer + * + * @uc_fw: uC firmware + * @dst: dst buffer + * @max_len: max number of bytes to copy + * + * Return: number of copied bytes. + */ +size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len) +{ + struct xe_device *xe = uc_fw_to_xe(uc_fw); + u32 size = min_t(u32, uc_fw->rsa_size, max_len); + + xe_assert(xe, !(size % 4)); + xe_assert(xe, xe_uc_fw_is_available(uc_fw)); + + xe_map_memcpy_from(xe, dst, &uc_fw->bo->vmap, + xe_uc_fw_rsa_offset(uc_fw), size); + + return size; +} + +static void uc_fw_fini(struct drm_device *drm, void *arg) +{ + struct xe_uc_fw *uc_fw = arg; + + if (!xe_uc_fw_is_available(uc_fw)) + return; + + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED); +} + +static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) +{ + struct xe_gt *gt = uc_fw_to_gt(uc_fw); + struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE]; + struct xe_uc_fw_version *compatibility = &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY]; + + xe_gt_assert(gt, uc_fw->type == XE_UC_FW_TYPE_GUC); + xe_gt_assert(gt, release->major >= 70); + + if (release->major > 70 || release->minor >= 6) { + /* v70.6.0 adds CSS header support */ + compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, + css->submission_version); + compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, + css->submission_version); + compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, + css->submission_version); + } else if (release->minor >= 3) { + /* v70.3.0 introduced v1.1.0 */ + compatibility->major = 1; + compatibility->minor = 1; + compatibility->patch = 0; + } else { + /* v70.0.0 introduced v1.0.0 */ + compatibility->major = 1; + compatibility->minor = 0; + compatibility->patch = 0; + } + + uc_fw->private_data_size = css->private_data_size; +} + +int xe_uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw) +{ + struct xe_device *xe = uc_fw_to_xe(uc_fw); + struct xe_uc_fw_version *wanted = &uc_fw->versions.wanted; + struct xe_uc_fw_version *found = &uc_fw->versions.found[uc_fw->versions.wanted_type]; + + /* Driver has no requirement on any version, any is good. */ + if (!wanted->major) + return 0; + + /* + * If full version is required, both major and minor should match. + * Otherwise, at least the major version. + */ + if (wanted->major != found->major || + (uc_fw->full_ver_required && wanted->minor != found->minor)) { + drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, + found->major, found->minor, + wanted->major, wanted->minor); + goto fail; + } + + if (wanted->minor > found->minor) { + drm_notice(&xe->drm, "%s firmware (%u.%u) is recommended, but only (%u.%u) was found in %s\n", + xe_uc_fw_type_repr(uc_fw->type), + wanted->major, wanted->minor, + found->major, found->minor, + uc_fw->path); + drm_info(&xe->drm, "Consider updating your linux-firmware pkg or downloading from %s\n", + XE_UC_FIRMWARE_URL); + } + + return 0; + +fail: + if (xe_uc_fw_is_overridden(uc_fw)) + return 0; + + return -ENOEXEC; +} + +/* Refer to the "CSS-based Firmware Layout" documentation entry for details */ +static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t fw_size) +{ + struct xe_device *xe = uc_fw_to_xe(uc_fw); + struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE]; + struct uc_css_header *css; + size_t size; + + /* Check the size of the blob before examining buffer contents */ + if (unlikely(fw_size < sizeof(struct uc_css_header))) { + drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, + fw_size, sizeof(struct uc_css_header)); + return -ENODATA; + } + + css = (struct uc_css_header *)fw_data; + + /* Check integrity of size values inside CSS header */ + size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw - + css->exponent_size_dw) * sizeof(u32); + if (unlikely(size != sizeof(struct uc_css_header))) { + drm_warn(&xe->drm, + "%s firmware %s: unexpected header size: %zu != %zu\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, + fw_size, sizeof(struct uc_css_header)); + return -EPROTO; + } + + /* uCode size must calculated from other sizes */ + uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); + + /* now RSA */ + uc_fw->rsa_size = css->key_size_dw * sizeof(u32); + + /* At least, it should have header, uCode and RSA. Size of all three. */ + size = sizeof(struct uc_css_header) + uc_fw->ucode_size + + uc_fw->rsa_size; + if (unlikely(fw_size < size)) { + drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, + fw_size, size); + return -ENOEXEC; + } + + /* Get version numbers from the CSS header */ + release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->sw_version); + release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version); + release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->sw_version); + + if (uc_fw->type == XE_UC_FW_TYPE_GUC) + guc_read_css_info(uc_fw, css); + + return 0; +} + +static bool is_cpd_header(const void *data) +{ + const u32 *marker = data; + + return *marker == GSC_CPD_HEADER_MARKER; +} + +static u32 entry_offset(const struct gsc_cpd_header_v2 *header, const char *name) +{ + const struct gsc_cpd_entry *entry; + int i; + + entry = (void *)header + header->header_length; + + for (i = 0; i < header->num_of_entries; i++, entry++) + if (strcmp(entry->name, name) == 0) + return entry->offset & GSC_CPD_ENTRY_OFFSET_MASK; + + return 0; +} + +/* Refer to the "GSC-based Firmware Layout" documentation entry for details */ +static int parse_cpd_header(struct xe_uc_fw *uc_fw, const void *data, size_t size, + const char *manifest_entry, const char *css_entry) +{ + struct xe_gt *gt = uc_fw_to_gt(uc_fw); + struct xe_device *xe = gt_to_xe(gt); + const struct gsc_cpd_header_v2 *header = data; + struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE]; + const struct gsc_manifest_header *manifest; + size_t min_size = sizeof(*header); + u32 offset; + + /* manifest_entry is mandatory, css_entry is optional */ + xe_assert(xe, manifest_entry); + + if (size < min_size || !is_cpd_header(header)) + return -ENOENT; + + if (header->header_length < sizeof(struct gsc_cpd_header_v2)) { + xe_gt_err(gt, "invalid CPD header length %u!\n", header->header_length); + return -EINVAL; + } + + min_size = header->header_length + sizeof(struct gsc_cpd_entry) * header->num_of_entries; + if (size < min_size) { + xe_gt_err(gt, "FW too small! %zu < %zu\n", size, min_size); + return -ENODATA; + } + + /* Look for the manifest first */ + offset = entry_offset(header, manifest_entry); + if (!offset) { + xe_gt_err(gt, "Failed to find %s manifest!\n", + xe_uc_fw_type_repr(uc_fw->type)); + return -ENODATA; + } + + min_size = offset + sizeof(struct gsc_manifest_header); + if (size < min_size) { + xe_gt_err(gt, "FW too small! %zu < %zu\n", size, min_size); + return -ENODATA; + } + + manifest = data + offset; + + release->major = manifest->fw_version.major; + release->minor = manifest->fw_version.minor; + release->patch = manifest->fw_version.hotfix; + + if (uc_fw->type == XE_UC_FW_TYPE_GSC) { + struct xe_gsc *gsc = container_of(uc_fw, struct xe_gsc, fw); + + release->build = manifest->fw_version.build; + gsc->security_version = manifest->security_version; + } + + /* then optionally look for the css header */ + if (css_entry) { + int ret; + + /* + * This section does not contain a CSS entry on DG2. We + * don't support DG2 HuC right now, so no need to handle + * it, just add a reminder in case that changes. + */ + xe_assert(xe, xe->info.platform != XE_DG2); + + offset = entry_offset(header, css_entry); + + /* the CSS header parser will check that the CSS header fits */ + if (offset > size) { + xe_gt_err(gt, "FW too small! %zu < %u\n", size, offset); + return -ENODATA; + } + + ret = parse_css_header(uc_fw, data + offset, size - offset); + if (ret) + return ret; + + uc_fw->css_offset = offset; + } + + uc_fw->has_gsc_headers = true; + + return 0; +} + +static int parse_gsc_layout(struct xe_uc_fw *uc_fw, const void *data, size_t size) +{ + struct xe_gt *gt = uc_fw_to_gt(uc_fw); + const struct gsc_layout_pointers *layout = data; + const struct gsc_bpdt_header *bpdt_header = NULL; + const struct gsc_bpdt_entry *bpdt_entry = NULL; + size_t min_size = sizeof(*layout); + int i; + + if (size < min_size) { + xe_gt_err(gt, "GSC FW too small! %zu < %zu\n", size, min_size); + return -ENODATA; + } + + min_size = layout->boot1.offset + layout->boot1.size; + if (size < min_size) { + xe_gt_err(gt, "GSC FW too small for boot section! %zu < %zu\n", + size, min_size); + return -ENODATA; + } + + min_size = sizeof(*bpdt_header); + if (layout->boot1.size < min_size) { + xe_gt_err(gt, "GSC FW boot section too small for BPDT header: %u < %zu\n", + layout->boot1.size, min_size); + return -ENODATA; + } + + bpdt_header = data + layout->boot1.offset; + if (bpdt_header->signature != GSC_BPDT_HEADER_SIGNATURE) { + xe_gt_err(gt, "invalid signature for BPDT header: 0x%08x!\n", + bpdt_header->signature); + return -EINVAL; + } + + min_size += sizeof(*bpdt_entry) * bpdt_header->descriptor_count; + if (layout->boot1.size < min_size) { + xe_gt_err(gt, "GSC FW boot section too small for BPDT entries: %u < %zu\n", + layout->boot1.size, min_size); + return -ENODATA; + } + + bpdt_entry = (void *)bpdt_header + sizeof(*bpdt_header); + for (i = 0; i < bpdt_header->descriptor_count; i++, bpdt_entry++) { + if ((bpdt_entry->type & GSC_BPDT_ENTRY_TYPE_MASK) != + GSC_BPDT_ENTRY_TYPE_GSC_RBE) + continue; + + min_size = bpdt_entry->sub_partition_offset; + + /* the CPD header parser will check that the CPD header fits */ + if (layout->boot1.size < min_size) { + xe_gt_err(gt, "GSC FW boot section too small for CPD offset: %u < %zu\n", + layout->boot1.size, min_size); + return -ENODATA; + } + + return parse_cpd_header(uc_fw, + (void *)bpdt_header + min_size, + layout->boot1.size - min_size, + "RBEP.man", NULL); + } + + xe_gt_err(gt, "couldn't find CPD header in GSC binary!\n"); + return -ENODATA; +} + +static int parse_headers(struct xe_uc_fw *uc_fw, const struct firmware *fw) +{ + int ret; + + /* + * All GuC releases and older HuC ones use CSS headers, while newer HuC + * releases use GSC CPD headers. + */ + switch (uc_fw->type) { + case XE_UC_FW_TYPE_GSC: + return parse_gsc_layout(uc_fw, fw->data, fw->size); + case XE_UC_FW_TYPE_HUC: + ret = parse_cpd_header(uc_fw, fw->data, fw->size, "HUCP.man", "huc_fw"); + if (!ret || ret != -ENOENT) + return ret; + fallthrough; + case XE_UC_FW_TYPE_GUC: + return parse_css_header(uc_fw, fw->data, fw->size); + default: + return -EINVAL; + } + + return 0; +} + +#define print_uc_fw_version(p_, version_, prefix_, ...) \ +do { \ + struct xe_uc_fw_version *ver_ = (version_); \ + if (ver_->build) \ + drm_printf(p_, prefix_ " version %u.%u.%u.%u\n", ##__VA_ARGS__, \ + ver_->major, ver_->minor, \ + ver_->patch, ver_->build); \ + else \ + drm_printf(p_, prefix_ " version %u.%u.%u\n", ##__VA_ARGS__, \ + ver_->major, ver_->minor, ver_->patch); \ +} while (0) + +static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmware_p) +{ + struct xe_device *xe = uc_fw_to_xe(uc_fw); + struct device *dev = xe->drm.dev; + struct drm_printer p = drm_info_printer(dev); + const struct firmware *fw = NULL; + int err; + + /* + * we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status + * before we're looked at the HW caps to see if we have uc support + */ + BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED); + xe_assert(xe, !uc_fw->status); + xe_assert(xe, !uc_fw->path); + + uc_fw_auto_select(xe, uc_fw); + xe_uc_fw_change_status(uc_fw, uc_fw->path ? + XE_UC_FIRMWARE_SELECTED : + XE_UC_FIRMWARE_NOT_SUPPORTED); + + if (!xe_uc_fw_is_supported(uc_fw)) + return 0; + + uc_fw_override(uc_fw); + + /* an empty path means the firmware is disabled */ + if (!xe_device_uc_enabled(xe) || !(*uc_fw->path)) { + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED); + drm_dbg(&xe->drm, "%s disabled", xe_uc_fw_type_repr(uc_fw->type)); + return 0; + } + + err = request_firmware(&fw, uc_fw->path, dev); + if (err) + goto fail; + + err = parse_headers(uc_fw, fw); + if (err) + goto fail; + + print_uc_fw_version(&p, + &uc_fw->versions.found[XE_UC_FW_VER_RELEASE], + "Using %s firmware from %s", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path); + + /* for GSC FW we want the compatibility version, which we query after load */ + if (uc_fw->type != XE_UC_FW_TYPE_GSC) { + err = xe_uc_fw_check_version_requirements(uc_fw); + if (err) + goto fail; + } + + *firmware_p = fw; + + return 0; + +fail: + xe_uc_fw_change_status(uc_fw, err == -ENOENT ? + XE_UC_FIRMWARE_MISSING : + XE_UC_FIRMWARE_ERROR); + + drm_notice(&xe->drm, "%s firmware %s: fetch failed with error %d\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); + drm_info(&xe->drm, "%s firmware(s) can be downloaded from %s\n", + xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL); + + release_firmware(fw); /* OK even if fw is NULL */ + + return err; +} + +static void uc_fw_release(const struct firmware *fw) +{ + release_firmware(fw); +} + +static int uc_fw_copy(struct xe_uc_fw *uc_fw, const void *data, size_t size, u32 flags) +{ + struct xe_device *xe = uc_fw_to_xe(uc_fw); + struct xe_gt *gt = uc_fw_to_gt(uc_fw); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_bo *obj; + int err; + + obj = xe_managed_bo_create_from_data(xe, tile, data, size, flags); + if (IS_ERR(obj)) { + drm_notice(&xe->drm, "%s firmware %s: failed to create / populate bo", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path); + err = PTR_ERR(obj); + goto fail; + } + + uc_fw->bo = obj; + uc_fw->size = size; + + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_AVAILABLE); + + err = drmm_add_action_or_reset(&xe->drm, uc_fw_fini, uc_fw); + if (err) + goto fail; + + return 0; + +fail: + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_ERROR); + drm_notice(&xe->drm, "%s firmware %s: copy failed with error %d\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); + + return err; +} + +int xe_uc_fw_init(struct xe_uc_fw *uc_fw) +{ + const struct firmware *fw = NULL; + int err; + + err = uc_fw_request(uc_fw, &fw); + if (err) + return err; + + /* no error and no firmware means nothing to copy */ + if (!fw) + return 0; + + err = uc_fw_copy(uc_fw, fw->data, fw->size, + XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_GGTT_BIT); + + uc_fw_release(fw); + + return err; +} + +static u32 uc_fw_ggtt_offset(struct xe_uc_fw *uc_fw) +{ + return xe_bo_ggtt_addr(uc_fw->bo); +} + +static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) +{ + struct xe_device *xe = uc_fw_to_xe(uc_fw); + struct xe_gt *gt = uc_fw_to_gt(uc_fw); + u32 src_offset, dma_ctrl; + int ret; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + /* Set the source address for the uCode */ + src_offset = uc_fw_ggtt_offset(uc_fw) + uc_fw->css_offset; + xe_mmio_write32(gt, DMA_ADDR_0_LOW, lower_32_bits(src_offset)); + xe_mmio_write32(gt, DMA_ADDR_0_HIGH, + upper_32_bits(src_offset) | DMA_ADDRESS_SPACE_GGTT); + + /* Set the DMA destination */ + xe_mmio_write32(gt, DMA_ADDR_1_LOW, offset); + xe_mmio_write32(gt, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); + + /* + * Set the transfer size. The header plus uCode will be copied to WOPCM + * via DMA, excluding any other components + */ + xe_mmio_write32(gt, DMA_COPY_SIZE, + sizeof(struct uc_css_header) + uc_fw->ucode_size); + + /* Start the DMA */ + xe_mmio_write32(gt, DMA_CTRL, + _MASKED_BIT_ENABLE(dma_flags | START_DMA)); + + /* Wait for DMA to finish */ + ret = xe_mmio_wait32(gt, DMA_CTRL, START_DMA, 0, 100000, &dma_ctrl, + false); + if (ret) + drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n", + xe_uc_fw_type_repr(uc_fw->type), dma_ctrl); + + /* Disable the bits once DMA is over */ + xe_mmio_write32(gt, DMA_CTRL, _MASKED_BIT_DISABLE(dma_flags)); + + return ret; +} + +int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) +{ + struct xe_device *xe = uc_fw_to_xe(uc_fw); + int err; + + /* make sure the status was cleared the last time we reset the uc */ + xe_assert(xe, !xe_uc_fw_is_loaded(uc_fw)); + + if (!xe_uc_fw_is_loadable(uc_fw)) + return -ENOEXEC; + + /* Call custom loader */ + err = uc_fw_xfer(uc_fw, offset, dma_flags); + if (err) + goto fail; + + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_TRANSFERRED); + return 0; + +fail: + drm_err(&xe->drm, "Failed to load %s firmware %s (%d)\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, + err); + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOAD_FAIL); + return err; +} + +static const char *version_type_repr(enum xe_uc_fw_version_types type) +{ + switch (type) { + case XE_UC_FW_VER_RELEASE: + return "release"; + case XE_UC_FW_VER_COMPATIBILITY: + return "compatibility"; + default: + return "Unknown version type"; + } +} + +void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p) +{ + int i; + + drm_printf(p, "%s firmware: %s\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path); + drm_printf(p, "\tstatus: %s\n", + xe_uc_fw_status_repr(uc_fw->status)); + + print_uc_fw_version(p, &uc_fw->versions.wanted, "\twanted %s", + version_type_repr(uc_fw->versions.wanted_type)); + + for (i = 0; i < XE_UC_FW_VER_TYPE_COUNT; i++) { + struct xe_uc_fw_version *ver = &uc_fw->versions.found[i]; + + if (ver->major) + print_uc_fw_version(p, ver, "\tfound %s", + version_type_repr(i)); + } + + if (uc_fw->ucode_size) + drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size); + if (uc_fw->rsa_size) + drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size); +} diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h new file mode 100644 index 000000000000..85c20795d1f8 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_fw.h @@ -0,0 +1,184 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_UC_FW_H_ +#define _XE_UC_FW_H_ + +#include <linux/errno.h> + +#include "xe_macros.h" +#include "xe_uc_fw_abi.h" +#include "xe_uc_fw_types.h" + +struct drm_printer; + +int xe_uc_fw_init(struct xe_uc_fw *uc_fw); +size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len); +int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags); +int xe_uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw); +void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p); + +static inline u32 xe_uc_fw_rsa_offset(struct xe_uc_fw *uc_fw) +{ + return sizeof(struct uc_css_header) + uc_fw->ucode_size + uc_fw->css_offset; +} + +static inline void xe_uc_fw_change_status(struct xe_uc_fw *uc_fw, + enum xe_uc_fw_status status) +{ + uc_fw->__status = status; +} + +static inline +const char *xe_uc_fw_status_repr(enum xe_uc_fw_status status) +{ + switch (status) { + case XE_UC_FIRMWARE_NOT_SUPPORTED: + return "N/A"; + case XE_UC_FIRMWARE_UNINITIALIZED: + return "UNINITIALIZED"; + case XE_UC_FIRMWARE_DISABLED: + return "DISABLED"; + case XE_UC_FIRMWARE_SELECTED: + return "SELECTED"; + case XE_UC_FIRMWARE_MISSING: + return "MISSING"; + case XE_UC_FIRMWARE_ERROR: + return "ERROR"; + case XE_UC_FIRMWARE_AVAILABLE: + return "AVAILABLE"; + case XE_UC_FIRMWARE_INIT_FAIL: + return "INIT FAIL"; + case XE_UC_FIRMWARE_LOADABLE: + return "LOADABLE"; + case XE_UC_FIRMWARE_LOAD_FAIL: + return "LOAD FAIL"; + case XE_UC_FIRMWARE_TRANSFERRED: + return "TRANSFERRED"; + case XE_UC_FIRMWARE_RUNNING: + return "RUNNING"; + } + return "<invalid>"; +} + +static inline int xe_uc_fw_status_to_error(enum xe_uc_fw_status status) +{ + switch (status) { + case XE_UC_FIRMWARE_NOT_SUPPORTED: + return -ENODEV; + case XE_UC_FIRMWARE_UNINITIALIZED: + return -EACCES; + case XE_UC_FIRMWARE_DISABLED: + return -EPERM; + case XE_UC_FIRMWARE_MISSING: + return -ENOENT; + case XE_UC_FIRMWARE_ERROR: + return -ENOEXEC; + case XE_UC_FIRMWARE_INIT_FAIL: + case XE_UC_FIRMWARE_LOAD_FAIL: + return -EIO; + case XE_UC_FIRMWARE_SELECTED: + return -ESTALE; + case XE_UC_FIRMWARE_AVAILABLE: + case XE_UC_FIRMWARE_LOADABLE: + case XE_UC_FIRMWARE_TRANSFERRED: + case XE_UC_FIRMWARE_RUNNING: + return 0; + } + return -EINVAL; +} + +static inline const char *xe_uc_fw_type_repr(enum xe_uc_fw_type type) +{ + switch (type) { + case XE_UC_FW_TYPE_GUC: + return "GuC"; + case XE_UC_FW_TYPE_HUC: + return "HuC"; + case XE_UC_FW_TYPE_GSC: + return "GSC"; + default: + return "uC"; + } +} + +static inline enum xe_uc_fw_status +__xe_uc_fw_status(struct xe_uc_fw *uc_fw) +{ + /* shouldn't call this before checking hw/blob availability */ + XE_WARN_ON(uc_fw->status == XE_UC_FIRMWARE_UNINITIALIZED); + return uc_fw->status; +} + +static inline bool xe_uc_fw_is_supported(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) != XE_UC_FIRMWARE_NOT_SUPPORTED; +} + +static inline bool xe_uc_fw_is_enabled(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) > XE_UC_FIRMWARE_DISABLED; +} + +static inline bool xe_uc_fw_is_disabled(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_DISABLED; +} + +static inline bool xe_uc_fw_is_available(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_AVAILABLE; +} + +static inline bool xe_uc_fw_is_loadable(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_LOADABLE; +} + +static inline bool xe_uc_fw_is_loaded(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_TRANSFERRED; +} + +static inline bool xe_uc_fw_is_running(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_RUNNING; +} + +static inline bool xe_uc_fw_is_overridden(const struct xe_uc_fw *uc_fw) +{ + return uc_fw->user_overridden; +} + +static inline void xe_uc_fw_sanitize(struct xe_uc_fw *uc_fw) +{ + if (xe_uc_fw_is_loaded(uc_fw)) + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOADABLE); +} + +static inline u32 __xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw) +{ + return sizeof(struct uc_css_header) + uc_fw->ucode_size; +} + +/** + * xe_uc_fw_get_upload_size() - Get size of firmware needed to be uploaded. + * @uc_fw: uC firmware. + * + * Get the size of the firmware and header that will be uploaded to WOPCM. + * + * Return: Upload firmware size, or zero on firmware fetch failure. + */ +static inline u32 xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw) +{ + if (!xe_uc_fw_is_available(uc_fw)) + return 0; + + return __xe_uc_fw_get_upload_size(uc_fw); +} + +#define XE_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git" + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h new file mode 100644 index 000000000000..87ade41209d0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h @@ -0,0 +1,321 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_UC_FW_ABI_H +#define _XE_UC_FW_ABI_H + +#include <linux/build_bug.h> +#include <linux/types.h> + +/** + * DOC: CSS-based Firmware Layout + * + * The CSS-based firmware structure is used for GuC releases on all platforms + * and for HuC releases up to DG1. Starting from DG2/MTL the HuC uses the GSC + * layout instead. + * The CSS firmware layout looks like this:: + * + * +======================================================================+ + * | Firmware blob | + * +===============+===============+============+============+============+ + * | CSS header | uCode | RSA key | modulus | exponent | + * +===============+===============+============+============+============+ + * <-header size-> <---header size continued -----------> + * <--- size -----------------------------------------------------------> + * <-key size-> + * <-mod size-> + * <-exp size-> + * + * The firmware may or may not have modulus key and exponent data. The header, + * uCode and RSA signature are must-have components that will be used by driver. + * Length of each components, which is all in dwords, can be found in header. + * In the case that modulus and exponent are not present in fw, a.k.a truncated + * image, the length value still appears in header. + * + * Driver will do some basic fw size validation based on the following rules: + * + * 1. Header, uCode and RSA are must-have components. + * 2. All firmware components, if they present, are in the sequence illustrated + * in the layout table above. + * 3. Length info of each component can be found in header, in dwords. + * 4. Modulus and exponent key are not required by driver. They may not appear + * in fw. So driver will load a truncated firmware in this case. + */ + +struct uc_css_header { + u32 module_type; + /* + * header_size includes all non-uCode bits, including css_header, rsa + * key, modulus key and exponent data. + */ + u32 header_size_dw; + u32 header_version; + u32 module_id; + u32 module_vendor; + u32 date; +#define CSS_DATE_DAY (0xFF << 0) +#define CSS_DATE_MONTH (0xFF << 8) +#define CSS_DATE_YEAR (0xFFFF << 16) + u32 size_dw; /* uCode plus header_size_dw */ + u32 key_size_dw; + u32 modulus_size_dw; + u32 exponent_size_dw; + u32 time; +#define CSS_TIME_HOUR (0xFF << 0) +#define CSS_DATE_MIN (0xFF << 8) +#define CSS_DATE_SEC (0xFFFF << 16) + char username[8]; + char buildnumber[12]; + u32 sw_version; +#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16) +#define CSS_SW_VERSION_UC_MINOR (0xFF << 8) +#define CSS_SW_VERSION_UC_PATCH (0xFF << 0) + union { + u32 submission_version; /* only applies to GuC */ + u32 reserved2; + }; + u32 reserved0[12]; + union { + u32 private_data_size; /* only applies to GuC */ + u32 reserved1; + }; + u32 header_info; +} __packed; +static_assert(sizeof(struct uc_css_header) == 128); + +/** + * DOC: GSC-based Firmware Layout + * + * The GSC-based firmware structure is used for GSC releases on all platforms + * and for HuC releases starting from DG2/MTL. Older HuC releases use the + * CSS-based layout instead. Differently from the CSS headers, the GSC headers + * uses a directory + entries structure (i.e., there is array of addresses + * pointing to specific header extensions identified by a name). Although the + * header structures are the same, some of the entries are specific to GSC while + * others are specific to HuC. The manifest header entry, which includes basic + * information about the binary (like the version) is always present, but it is + * named differently based on the binary type. + * + * The HuC binary starts with a Code Partition Directory (CPD) header. The + * entries we're interested in for use in the driver are: + * + * 1. "HUCP.man": points to the manifest header for the HuC. + * 2. "huc_fw": points to the FW code. On platforms that support load via DMA + * and 2-step HuC authentication (i.e. MTL+) this is a full CSS-based binary, + * while if the GSC is the one doing the load (which only happens on DG2) + * this section only contains the uCode. + * + * The GSC-based HuC firmware layout looks like this:: + * + * +================================================+ + * | CPD Header | + * +================================================+ + * | CPD entries[] | + * | entry1 | + * | ... | + * | entryX | + * | "HUCP.man" | + * | ... | + * | offset >----------------------------|------o + * | ... | | + * | entryY | | + * | "huc_fw" | | + * | ... | | + * | offset >----------------------------|----------o + * +================================================+ | | + * | | + * +================================================+ | | + * | Manifest Header |<-----o | + * | ... | | + * | FW version | | + * | ... | | + * +================================================+ | + * | + * +================================================+ | + * | FW binary |<---------o + * | CSS (MTL+ only) | + * | uCode | + * | RSA Key (MTL+ only) | + * | ... | + * +================================================+ + * + * The GSC binary starts instead with a layout header, which contains the + * locations of the various partitions of the binary. The one we're interested + * in is the boot1 partition, where we can find a BPDT header followed by + * entries, one of which points to the RBE sub-section of the partition, which + * contains the CPD. The GSC blob does not contain a CSS-based binary, so we + * only need to look for the manifest, which is under the "RBEP.man" CPD entry. + * Note that we have no need to find where the actual FW code is inside the + * image because the GSC ROM will itself parse the headers to find it and load + * it. + * The GSC firmware header layout looks like this:: + * + * +================================================+ + * | Layout Pointers | + * | ... | + * | Boot1 offset >---------------------------|------o + * | ... | | + * +================================================+ | + * | + * +================================================+ | + * | BPDT header |<-----o + * +================================================+ + * | BPDT entries[] | + * | entry1 | + * | ... | + * | entryX | + * | type == GSC_RBE | + * | offset >-----------------------------|------o + * | ... | | + * +================================================+ | + * | + * +================================================+ | + * | CPD Header |<-----o + * +================================================+ + * | CPD entries[] | + * | entry1 | + * | ... | + * | entryX | + * | "RBEP.man" | + * | ... | + * | offset >----------------------------|------o + * | ... | | + * +================================================+ | + * | + * +================================================+ | + * | Manifest Header |<-----o + * | ... | + * | FW version | + * | ... | + * | Security version | + * | ... | + * +================================================+ + */ + +struct gsc_version { + u16 major; + u16 minor; + u16 hotfix; + u16 build; +} __packed; + +struct gsc_partition { + u32 offset; + u32 size; +} __packed; + +struct gsc_layout_pointers { + u8 rom_bypass_vector[16]; + + /* size of this header section, not including ROM bypass vector */ + u16 size; + + /* + * bit0: Backup copy of layout pointers exists + * bits1-15: reserved + */ + u8 flags; + + u8 reserved; + + u32 crc32; + + struct gsc_partition datap; + struct gsc_partition boot1; + struct gsc_partition boot2; + struct gsc_partition boot3; + struct gsc_partition boot4; + struct gsc_partition boot5; + struct gsc_partition temp_pages; +} __packed; + +/* Boot partition structures */ +struct gsc_bpdt_header { + u32 signature; +#define GSC_BPDT_HEADER_SIGNATURE 0x000055AA + + u16 descriptor_count; /* num of entries after the header */ + + u8 version; + u8 configuration; + + u32 crc32; + + u32 build_version; + struct gsc_version tool_version; +} __packed; + +struct gsc_bpdt_entry { + /* + * Bits 0-15: BPDT entry type + * Bits 16-17: reserved + * Bit 18: code sub-partition + * Bits 19-31: reserved + */ + u32 type; +#define GSC_BPDT_ENTRY_TYPE_MASK GENMASK(15, 0) +#define GSC_BPDT_ENTRY_TYPE_GSC_RBE 0x1 + + u32 sub_partition_offset; /* from the base of the BPDT header */ + u32 sub_partition_size; +} __packed; + +/* Code partition directory (CPD) structures */ +struct gsc_cpd_header_v2 { + u32 header_marker; +#define GSC_CPD_HEADER_MARKER 0x44504324 + + u32 num_of_entries; + u8 header_version; + u8 entry_version; + u8 header_length; /* in bytes */ + u8 flags; + u32 partition_name; + u32 crc32; +} __packed; + +struct gsc_cpd_entry { + u8 name[12]; + + /* + * Bits 0-24: offset from the beginning of the code partition + * Bit 25: huffman compressed + * Bits 26-31: reserved + */ + u32 offset; +#define GSC_CPD_ENTRY_OFFSET_MASK GENMASK(24, 0) +#define GSC_CPD_ENTRY_HUFFMAN_COMP BIT(25) + + /* + * Module/Item length, in bytes. For Huffman-compressed modules, this + * refers to the uncompressed size. For software-compressed modules, + * this refers to the compressed size. + */ + u32 length; + + u8 reserved[4]; +} __packed; + +struct gsc_manifest_header { + u32 header_type; /* 0x4 for manifest type */ + u32 header_length; /* in dwords */ + u32 header_version; + u32 flags; + u32 vendor; + u32 date; + u32 size; /* In dwords, size of entire manifest (header + extensions) */ + u32 header_id; + u32 internal_data; + struct gsc_version fw_version; + u32 security_version; + struct gsc_version meu_kit_version; + u32 meu_manifest_version; + u8 general_data[4]; + u8 reserved3[56]; + u32 modulus_size; /* in dwords */ + u32 exponent_size; /* in dwords */ +} __packed; + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h new file mode 100644 index 000000000000..ee914a5d8523 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_UC_FW_TYPES_H_ +#define _XE_UC_FW_TYPES_H_ + +#include <linux/types.h> + +struct xe_bo; + +/* + * +------------+---------------------------------------------------+ + * | PHASE | FIRMWARE STATUS TRANSITIONS | + * +============+===================================================+ + * | | UNINITIALIZED | + * +------------+- / | \ -+ + * | | DISABLED <--/ | \--> NOT_SUPPORTED | + * | init_early | V | + * | | SELECTED | + * +------------+- / | \ -+ + * | | MISSING <--/ | \--> ERROR | + * | fetch | V | + * | | AVAILABLE | + * +------------+- | \ -+ + * | | | \--> INIT FAIL | + * | init | V | + * | | /------> LOADABLE <----<-----------\ | + * +------------+- \ / \ \ \ -+ + * | | LOAD FAIL <--< \--> TRANSFERRED \ | + * | upload | \ / \ / | + * | | \---------/ \--> RUNNING | + * +------------+---------------------------------------------------+ + */ + +/* + * FIXME: Ported from the i915 and this is state machine is way too complicated. + * Circle back and simplify this. + */ +enum xe_uc_fw_status { + XE_UC_FIRMWARE_NOT_SUPPORTED = -1, /* no uc HW */ + XE_UC_FIRMWARE_UNINITIALIZED = 0, /* used to catch checks done too early */ + XE_UC_FIRMWARE_DISABLED, /* disabled */ + XE_UC_FIRMWARE_SELECTED, /* selected the blob we want to load */ + XE_UC_FIRMWARE_MISSING, /* blob not found on the system */ + XE_UC_FIRMWARE_ERROR, /* invalid format or version */ + XE_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */ + XE_UC_FIRMWARE_INIT_FAIL, /* failed to prepare fw objects for load */ + XE_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */ + XE_UC_FIRMWARE_LOAD_FAIL, /* failed to xfer or init/auth the fw */ + XE_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */ + XE_UC_FIRMWARE_RUNNING /* init/auth done */ +}; + +enum xe_uc_fw_type { + XE_UC_FW_TYPE_GUC = 0, + XE_UC_FW_TYPE_HUC, + XE_UC_FW_TYPE_GSC, + XE_UC_FW_NUM_TYPES +}; + +/** + * struct xe_uc_fw_version - Version for XE micro controller firmware + */ +struct xe_uc_fw_version { + /** @major: major version of the FW */ + u16 major; + /** @minor: minor version of the FW */ + u16 minor; + /** @patch: patch version of the FW */ + u16 patch; + /** @build: build version of the FW (not always available) */ + u16 build; +}; + +enum xe_uc_fw_version_types { + XE_UC_FW_VER_RELEASE, + XE_UC_FW_VER_COMPATIBILITY, + XE_UC_FW_VER_TYPE_COUNT +}; + +/** + * struct xe_uc_fw - XE micro controller firmware + */ +struct xe_uc_fw { + /** @type: type uC firmware */ + enum xe_uc_fw_type type; + union { + /** @status: firmware load status */ + const enum xe_uc_fw_status status; + /** + * @__status: private firmware load status - only to be used + * by firmware laoding code + */ + enum xe_uc_fw_status __status; + }; + /** @path: path to uC firmware */ + const char *path; + /** @user_overridden: user provided path to uC firmware via modparam */ + bool user_overridden; + /** + * @full_ver_required: driver still under development and not ready + * for backward-compatible firmware. To be used only for **new** + * platforms, i.e. still under require_force_probe protection and not + * supported by i915. + */ + bool full_ver_required; + /** @size: size of uC firmware including css header */ + size_t size; + + /** @bo: XE BO for uC firmware */ + struct xe_bo *bo; + + /** @has_gsc_headers: whether the FW image starts with GSC headers */ + bool has_gsc_headers; + + /* + * The firmware build process will generate a version header file with + * major and minor version defined. The versions are built into CSS + * header of firmware. The xe kernel driver set the minimal firmware + * version required per platform. + */ + + /** @versions: FW versions wanted and found */ + struct { + /** @wanted: firmware version wanted by platform */ + struct xe_uc_fw_version wanted; + /** @wanted_type: type of firmware version wanted (release vs compatibility) */ + enum xe_uc_fw_version_types wanted_type; + /** @found: fw versions found in firmware blob */ + struct xe_uc_fw_version found[XE_UC_FW_VER_TYPE_COUNT]; + } versions; + + /** @rsa_size: RSA size */ + u32 rsa_size; + /** @ucode_size: micro kernel size */ + u32 ucode_size; + /** @css_offset: offset within the blob at which the CSS is located */ + u32 css_offset; + + /** @private_data_size: size of private data found in uC css header */ + u32 private_data_size; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc_types.h b/drivers/gpu/drm/xe/xe_uc_types.h new file mode 100644 index 000000000000..9924e4484866 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_types.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_UC_TYPES_H_ +#define _XE_UC_TYPES_H_ + +#include "xe_gsc_types.h" +#include "xe_guc_types.h" +#include "xe_huc_types.h" +#include "xe_wopcm_types.h" + +/** + * struct xe_uc - XE micro controllers + */ +struct xe_uc { + /** @guc: Graphics micro controller */ + struct xe_guc guc; + /** @huc: HuC */ + struct xe_huc huc; + /** @gsc: Graphics Security Controller */ + struct xe_gsc gsc; + /** @wopcm: WOPCM */ + struct xe_wopcm wopcm; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c new file mode 100644 index 000000000000..0cfe7289b97e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -0,0 +1,3209 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_vm.h" + +#include <linux/dma-fence-array.h> +#include <linux/nospec.h> + +#include <drm/drm_exec.h> +#include <drm/drm_print.h> +#include <drm/ttm/ttm_execbuf_util.h> +#include <drm/ttm/ttm_tt.h> +#include <drm/xe_drm.h> +#include <linux/delay.h> +#include <linux/kthread.h> +#include <linux/mm.h> +#include <linux/swap.h> + +#include "xe_assert.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_drm_client.h" +#include "xe_exec_queue.h" +#include "xe_gt.h" +#include "xe_gt_pagefault.h" +#include "xe_gt_tlb_invalidation.h" +#include "xe_migrate.h" +#include "xe_pat.h" +#include "xe_pm.h" +#include "xe_preempt_fence.h" +#include "xe_pt.h" +#include "xe_res_cursor.h" +#include "xe_sync.h" +#include "xe_trace.h" +#include "generated/xe_wa_oob.h" +#include "xe_wa.h" + +#define TEST_VM_ASYNC_OPS_ERROR + +static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) +{ + return vm->gpuvm.r_obj; +} + +/** + * xe_vma_userptr_check_repin() - Advisory check for repin needed + * @vma: The userptr vma + * + * Check if the userptr vma has been invalidated since last successful + * repin. The check is advisory only and can the function can be called + * without the vm->userptr.notifier_lock held. There is no guarantee that the + * vma userptr will remain valid after a lockless check, so typically + * the call needs to be followed by a proper check under the notifier_lock. + * + * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. + */ +int xe_vma_userptr_check_repin(struct xe_vma *vma) +{ + return mmu_interval_check_retry(&vma->userptr.notifier, + vma->userptr.notifier_seq) ? + -EAGAIN : 0; +} + +int xe_vma_userptr_pin_pages(struct xe_vma *vma) +{ + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_device *xe = vm->xe; + const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT; + struct page **pages; + bool in_kthread = !current->mm; + unsigned long notifier_seq; + int pinned, ret, i; + bool read_only = xe_vma_read_only(vma); + + lockdep_assert_held(&vm->lock); + xe_assert(xe, xe_vma_is_userptr(vma)); +retry: + if (vma->gpuva.flags & XE_VMA_DESTROYED) + return 0; + + notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier); + if (notifier_seq == vma->userptr.notifier_seq) + return 0; + + pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + if (vma->userptr.sg) { + dma_unmap_sgtable(xe->drm.dev, + vma->userptr.sg, + read_only ? DMA_TO_DEVICE : + DMA_BIDIRECTIONAL, 0); + sg_free_table(vma->userptr.sg); + vma->userptr.sg = NULL; + } + + pinned = ret = 0; + if (in_kthread) { + if (!mmget_not_zero(vma->userptr.notifier.mm)) { + ret = -EFAULT; + goto mm_closed; + } + kthread_use_mm(vma->userptr.notifier.mm); + } + + while (pinned < num_pages) { + ret = get_user_pages_fast(xe_vma_userptr(vma) + + pinned * PAGE_SIZE, + num_pages - pinned, + read_only ? 0 : FOLL_WRITE, + &pages[pinned]); + if (ret < 0) { + if (in_kthread) + ret = 0; + break; + } + + pinned += ret; + ret = 0; + } + + if (in_kthread) { + kthread_unuse_mm(vma->userptr.notifier.mm); + mmput(vma->userptr.notifier.mm); + } +mm_closed: + if (ret) + goto out; + + ret = sg_alloc_table_from_pages_segment(&vma->userptr.sgt, pages, + pinned, 0, + (u64)pinned << PAGE_SHIFT, + xe_sg_segment_size(xe->drm.dev), + GFP_KERNEL); + if (ret) { + vma->userptr.sg = NULL; + goto out; + } + vma->userptr.sg = &vma->userptr.sgt; + + ret = dma_map_sgtable(xe->drm.dev, vma->userptr.sg, + read_only ? DMA_TO_DEVICE : + DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_NO_KERNEL_MAPPING); + if (ret) { + sg_free_table(vma->userptr.sg); + vma->userptr.sg = NULL; + goto out; + } + + for (i = 0; i < pinned; ++i) { + if (!read_only) { + lock_page(pages[i]); + set_page_dirty(pages[i]); + unlock_page(pages[i]); + } + + mark_page_accessed(pages[i]); + } + +out: + release_pages(pages, pinned); + kvfree(pages); + + if (!(ret < 0)) { + vma->userptr.notifier_seq = notifier_seq; + if (xe_vma_userptr_check_repin(vma) == -EAGAIN) + goto retry; + } + + return ret < 0 ? ret : 0; +} + +static bool preempt_fences_waiting(struct xe_vm *vm) +{ + struct xe_exec_queue *q; + + lockdep_assert_held(&vm->lock); + xe_vm_assert_held(vm); + + list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { + if (!q->compute.pfence || + (q->compute.pfence && test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &q->compute.pfence->flags))) { + return true; + } + } + + return false; +} + +static void free_preempt_fences(struct list_head *list) +{ + struct list_head *link, *next; + + list_for_each_safe(link, next, list) + xe_preempt_fence_free(to_preempt_fence_from_link(link)); +} + +static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, + unsigned int *count) +{ + lockdep_assert_held(&vm->lock); + xe_vm_assert_held(vm); + + if (*count >= vm->preempt.num_exec_queues) + return 0; + + for (; *count < vm->preempt.num_exec_queues; ++(*count)) { + struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); + + if (IS_ERR(pfence)) + return PTR_ERR(pfence); + + list_move_tail(xe_preempt_fence_link(pfence), list); + } + + return 0; +} + +static int wait_for_existing_preempt_fences(struct xe_vm *vm) +{ + struct xe_exec_queue *q; + + xe_vm_assert_held(vm); + + list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { + if (q->compute.pfence) { + long timeout = dma_fence_wait(q->compute.pfence, false); + + if (timeout < 0) + return -ETIME; + dma_fence_put(q->compute.pfence); + q->compute.pfence = NULL; + } + } + + return 0; +} + +static bool xe_vm_is_idle(struct xe_vm *vm) +{ + struct xe_exec_queue *q; + + xe_vm_assert_held(vm); + list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { + if (!xe_exec_queue_is_idle(q)) + return false; + } + + return true; +} + +static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) +{ + struct list_head *link; + struct xe_exec_queue *q; + + list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { + struct dma_fence *fence; + + link = list->next; + xe_assert(vm->xe, link != list); + + fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), + q, q->compute.context, + ++q->compute.seqno); + dma_fence_put(q->compute.pfence); + q->compute.pfence = fence; + } +} + +static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) +{ + struct xe_exec_queue *q; + int err; + + if (!vm->preempt.num_exec_queues) + return 0; + + err = xe_bo_lock(bo, true); + if (err) + return err; + + err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); + if (err) + goto out_unlock; + + list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) + if (q->compute.pfence) { + dma_resv_add_fence(bo->ttm.base.resv, + q->compute.pfence, + DMA_RESV_USAGE_BOOKKEEP); + } + +out_unlock: + xe_bo_unlock(bo); + return err; +} + +static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, + struct drm_exec *exec) +{ + struct xe_exec_queue *q; + + lockdep_assert_held(&vm->lock); + xe_vm_assert_held(vm); + + list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { + q->ops->resume(q); + + drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->compute.pfence, + DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); + } +} + +int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) +{ + struct drm_gpuvm_exec vm_exec = { + .vm = &vm->gpuvm, + .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, + .num_fences = 1, + }; + struct drm_exec *exec = &vm_exec.exec; + struct dma_fence *pfence; + int err; + bool wait; + + xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); + + down_write(&vm->lock); + err = drm_gpuvm_exec_lock(&vm_exec); + if (err) + return err; + + pfence = xe_preempt_fence_create(q, q->compute.context, + ++q->compute.seqno); + if (!pfence) { + err = -ENOMEM; + goto out_unlock; + } + + list_add(&q->compute.link, &vm->preempt.exec_queues); + ++vm->preempt.num_exec_queues; + q->compute.pfence = pfence; + + down_read(&vm->userptr.notifier_lock); + + drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, + DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); + + /* + * Check to see if a preemption on VM is in flight or userptr + * invalidation, if so trigger this preempt fence to sync state with + * other preempt fences on the VM. + */ + wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); + if (wait) + dma_fence_enable_sw_signaling(pfence); + + up_read(&vm->userptr.notifier_lock); + +out_unlock: + drm_exec_fini(exec); + up_write(&vm->lock); + + return err; +} + +/** + * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM + * @vm: The VM. + * @q: The exec_queue + */ +void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) +{ + if (!xe_vm_in_preempt_fence_mode(vm)) + return; + + down_write(&vm->lock); + list_del(&q->compute.link); + --vm->preempt.num_exec_queues; + if (q->compute.pfence) { + dma_fence_enable_sw_signaling(q->compute.pfence); + dma_fence_put(q->compute.pfence); + q->compute.pfence = NULL; + } + up_write(&vm->lock); +} + +/** + * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs + * that need repinning. + * @vm: The VM. + * + * This function checks for whether the VM has userptrs that need repinning, + * and provides a release-type barrier on the userptr.notifier_lock after + * checking. + * + * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. + */ +int __xe_vm_userptr_needs_repin(struct xe_vm *vm) +{ + lockdep_assert_held_read(&vm->userptr.notifier_lock); + + return (list_empty(&vm->userptr.repin_list) && + list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; +} + +#define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 + +static void xe_vm_kill(struct xe_vm *vm) +{ + struct xe_exec_queue *q; + + lockdep_assert_held(&vm->lock); + + xe_vm_lock(vm, false); + vm->flags |= XE_VM_FLAG_BANNED; + trace_xe_vm_kill(vm); + + list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) + q->ops->kill(q); + xe_vm_unlock(vm); + + /* TODO: Inform user the VM is banned */ +} + +/** + * xe_vm_validate_should_retry() - Whether to retry after a validate error. + * @exec: The drm_exec object used for locking before validation. + * @err: The error returned from ttm_bo_validate(). + * @end: A ktime_t cookie that should be set to 0 before first use and + * that should be reused on subsequent calls. + * + * With multiple active VMs, under memory pressure, it is possible that + * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. + * Until ttm properly handles locking in such scenarios, best thing the + * driver can do is retry with a timeout. Check if that is necessary, and + * if so unlock the drm_exec's objects while keeping the ticket to prepare + * for a rerun. + * + * Return: true if a retry after drm_exec_init() is recommended; + * false otherwise. + */ +bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) +{ + ktime_t cur; + + if (err != -ENOMEM) + return false; + + cur = ktime_get(); + *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); + if (!ktime_before(cur, *end)) + return false; + + msleep(20); + return true; +} + +static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) +{ + struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); + struct drm_gpuva *gpuva; + int ret; + + lockdep_assert_held(&vm->lock); + drm_gpuvm_bo_for_each_va(gpuva, vm_bo) + list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, + &vm->rebind_list); + + ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); + if (ret) + return ret; + + vm_bo->evicted = false; + return 0; +} + +static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, + bool *done) +{ + int err; + + /* + * 1 fence for each preempt fence plus a fence for each tile from a + * possible rebind + */ + err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues + + vm->xe->info.tile_count); + if (err) + return err; + + if (xe_vm_is_idle(vm)) { + vm->preempt.rebind_deactivated = true; + *done = true; + return 0; + } + + if (!preempt_fences_waiting(vm)) { + *done = true; + return 0; + } + + err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues); + if (err) + return err; + + err = wait_for_existing_preempt_fences(vm); + if (err) + return err; + + return drm_gpuvm_validate(&vm->gpuvm, exec); +} + +static void preempt_rebind_work_func(struct work_struct *w) +{ + struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); + struct drm_exec exec; + struct dma_fence *rebind_fence; + unsigned int fence_count = 0; + LIST_HEAD(preempt_fences); + ktime_t end = 0; + int err = 0; + long wait; + int __maybe_unused tries = 0; + + xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); + trace_xe_vm_rebind_worker_enter(vm); + + down_write(&vm->lock); + + if (xe_vm_is_closed_or_banned(vm)) { + up_write(&vm->lock); + trace_xe_vm_rebind_worker_exit(vm); + return; + } + +retry: + if (xe_vm_userptr_check_repin(vm)) { + err = xe_vm_userptr_pin(vm); + if (err) + goto out_unlock_outer; + } + + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + + drm_exec_until_all_locked(&exec) { + bool done = false; + + err = xe_preempt_work_begin(&exec, vm, &done); + drm_exec_retry_on_contention(&exec); + if (err || done) { + drm_exec_fini(&exec); + if (err && xe_vm_validate_should_retry(&exec, err, &end)) + err = -EAGAIN; + + goto out_unlock_outer; + } + } + + err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); + if (err) + goto out_unlock; + + rebind_fence = xe_vm_rebind(vm, true); + if (IS_ERR(rebind_fence)) { + err = PTR_ERR(rebind_fence); + goto out_unlock; + } + + if (rebind_fence) { + dma_fence_wait(rebind_fence, false); + dma_fence_put(rebind_fence); + } + + /* Wait on munmap style VM unbinds */ + wait = dma_resv_wait_timeout(xe_vm_resv(vm), + DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + if (wait <= 0) { + err = -ETIME; + goto out_unlock; + } + +#define retry_required(__tries, __vm) \ + (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ + (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ + __xe_vm_userptr_needs_repin(__vm)) + + down_read(&vm->userptr.notifier_lock); + if (retry_required(tries, vm)) { + up_read(&vm->userptr.notifier_lock); + err = -EAGAIN; + goto out_unlock; + } + +#undef retry_required + + spin_lock(&vm->xe->ttm.lru_lock); + ttm_lru_bulk_move_tail(&vm->lru_bulk_move); + spin_unlock(&vm->xe->ttm.lru_lock); + + /* Point of no return. */ + arm_preempt_fences(vm, &preempt_fences); + resume_and_reinstall_preempt_fences(vm, &exec); + up_read(&vm->userptr.notifier_lock); + +out_unlock: + drm_exec_fini(&exec); +out_unlock_outer: + if (err == -EAGAIN) { + trace_xe_vm_rebind_worker_retry(vm); + goto retry; + } + + if (err) { + drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); + xe_vm_kill(vm); + } + up_write(&vm->lock); + + free_preempt_fences(&preempt_fences); + + trace_xe_vm_rebind_worker_exit(vm); +} + +static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier); + struct xe_vm *vm = xe_vma_vm(vma); + struct dma_resv_iter cursor; + struct dma_fence *fence; + long err; + + xe_assert(vm->xe, xe_vma_is_userptr(vma)); + trace_xe_vma_userptr_invalidate(vma); + + if (!mmu_notifier_range_blockable(range)) + return false; + + down_write(&vm->userptr.notifier_lock); + mmu_interval_set_seq(mni, cur_seq); + + /* No need to stop gpu access if the userptr is not yet bound. */ + if (!vma->userptr.initial_bind) { + up_write(&vm->userptr.notifier_lock); + return true; + } + + /* + * Tell exec and rebind worker they need to repin and rebind this + * userptr. + */ + if (!xe_vm_in_fault_mode(vm) && + !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) { + spin_lock(&vm->userptr.invalidated_lock); + list_move_tail(&vma->userptr.invalidate_link, + &vm->userptr.invalidated); + spin_unlock(&vm->userptr.invalidated_lock); + } + + up_write(&vm->userptr.notifier_lock); + + /* + * Preempt fences turn into schedule disables, pipeline these. + * Note that even in fault mode, we need to wait for binds and + * unbinds to complete, and those are attached as BOOKMARK fences + * to the vm. + */ + dma_resv_iter_begin(&cursor, xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP); + dma_resv_for_each_fence_unlocked(&cursor, fence) + dma_fence_enable_sw_signaling(fence); + dma_resv_iter_end(&cursor); + + err = dma_resv_wait_timeout(xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); + XE_WARN_ON(err <= 0); + + if (xe_vm_in_fault_mode(vm)) { + err = xe_vm_invalidate_vma(vma); + XE_WARN_ON(err); + } + + trace_xe_vma_userptr_invalidate_complete(vma); + + return true; +} + +static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { + .invalidate = vma_userptr_invalidate, +}; + +int xe_vm_userptr_pin(struct xe_vm *vm) +{ + struct xe_vma *vma, *next; + int err = 0; + LIST_HEAD(tmp_evict); + + lockdep_assert_held_write(&vm->lock); + + /* Collect invalidated userptrs */ + spin_lock(&vm->userptr.invalidated_lock); + list_for_each_entry_safe(vma, next, &vm->userptr.invalidated, + userptr.invalidate_link) { + list_del_init(&vma->userptr.invalidate_link); + list_move_tail(&vma->combined_links.userptr, + &vm->userptr.repin_list); + } + spin_unlock(&vm->userptr.invalidated_lock); + + /* Pin and move to temporary list */ + list_for_each_entry_safe(vma, next, &vm->userptr.repin_list, + combined_links.userptr) { + err = xe_vma_userptr_pin_pages(vma); + if (err < 0) + return err; + + list_move_tail(&vma->combined_links.userptr, &vm->rebind_list); + } + + return 0; +} + +/** + * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs + * that need repinning. + * @vm: The VM. + * + * This function does an advisory check for whether the VM has userptrs that + * need repinning. + * + * Return: 0 if there are no indications of userptrs needing repinning, + * -EAGAIN if there are. + */ +int xe_vm_userptr_check_repin(struct xe_vm *vm) +{ + return (list_empty_careful(&vm->userptr.repin_list) && + list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; +} + +static struct dma_fence * +xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, + struct xe_sync_entry *syncs, u32 num_syncs, + bool first_op, bool last_op); + +struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) +{ + struct dma_fence *fence = NULL; + struct xe_vma *vma, *next; + + lockdep_assert_held(&vm->lock); + if (xe_vm_in_lr_mode(vm) && !rebind_worker) + return NULL; + + xe_vm_assert_held(vm); + list_for_each_entry_safe(vma, next, &vm->rebind_list, + combined_links.rebind) { + xe_assert(vm->xe, vma->tile_present); + + list_del_init(&vma->combined_links.rebind); + dma_fence_put(fence); + if (rebind_worker) + trace_xe_vma_rebind_worker(vma); + else + trace_xe_vma_rebind_exec(vma); + fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false); + if (IS_ERR(fence)) + return fence; + } + + return fence; +} + +#define VMA_CREATE_FLAG_READ_ONLY BIT(0) +#define VMA_CREATE_FLAG_IS_NULL BIT(1) + +static struct xe_vma *xe_vma_create(struct xe_vm *vm, + struct xe_bo *bo, + u64 bo_offset_or_userptr, + u64 start, u64 end, + u16 pat_index, unsigned int flags) +{ + struct xe_vma *vma; + struct xe_tile *tile; + u8 id; + bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); + bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); + + xe_assert(vm->xe, start < end); + xe_assert(vm->xe, end < vm->size); + + if (!bo && !is_null) /* userptr */ + vma = kzalloc(sizeof(*vma), GFP_KERNEL); + else + vma = kzalloc(sizeof(*vma) - sizeof(struct xe_userptr), + GFP_KERNEL); + if (!vma) { + vma = ERR_PTR(-ENOMEM); + return vma; + } + + INIT_LIST_HEAD(&vma->combined_links.rebind); + + INIT_LIST_HEAD(&vma->gpuva.gem.entry); + vma->gpuva.vm = &vm->gpuvm; + vma->gpuva.va.addr = start; + vma->gpuva.va.range = end - start + 1; + if (read_only) + vma->gpuva.flags |= XE_VMA_READ_ONLY; + if (is_null) + vma->gpuva.flags |= DRM_GPUVA_SPARSE; + + for_each_tile(tile, vm->xe, id) + vma->tile_mask |= 0x1 << id; + + if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC) + vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; + + vma->pat_index = pat_index; + + if (bo) { + struct drm_gpuvm_bo *vm_bo; + + xe_bo_assert_held(bo); + + vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); + if (IS_ERR(vm_bo)) { + kfree(vma); + return ERR_CAST(vm_bo); + } + + drm_gpuvm_bo_extobj_add(vm_bo); + drm_gem_object_get(&bo->ttm.base); + vma->gpuva.gem.obj = &bo->ttm.base; + vma->gpuva.gem.offset = bo_offset_or_userptr; + drm_gpuva_link(&vma->gpuva, vm_bo); + drm_gpuvm_bo_put(vm_bo); + } else /* userptr or null */ { + if (!is_null) { + u64 size = end - start + 1; + int err; + + INIT_LIST_HEAD(&vma->userptr.invalidate_link); + vma->gpuva.gem.offset = bo_offset_or_userptr; + + err = mmu_interval_notifier_insert(&vma->userptr.notifier, + current->mm, + xe_vma_userptr(vma), size, + &vma_userptr_notifier_ops); + if (err) { + kfree(vma); + vma = ERR_PTR(err); + return vma; + } + + vma->userptr.notifier_seq = LONG_MAX; + } + + xe_vm_get(vm); + } + + return vma; +} + +static void xe_vma_destroy_late(struct xe_vma *vma) +{ + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_device *xe = vm->xe; + bool read_only = xe_vma_read_only(vma); + + if (xe_vma_is_userptr(vma)) { + if (vma->userptr.sg) { + dma_unmap_sgtable(xe->drm.dev, + vma->userptr.sg, + read_only ? DMA_TO_DEVICE : + DMA_BIDIRECTIONAL, 0); + sg_free_table(vma->userptr.sg); + vma->userptr.sg = NULL; + } + + /* + * Since userptr pages are not pinned, we can't remove + * the notifer until we're sure the GPU is not accessing + * them anymore + */ + mmu_interval_notifier_remove(&vma->userptr.notifier); + xe_vm_put(vm); + } else if (xe_vma_is_null(vma)) { + xe_vm_put(vm); + } else { + xe_bo_put(xe_vma_bo(vma)); + } + + kfree(vma); +} + +static void vma_destroy_work_func(struct work_struct *w) +{ + struct xe_vma *vma = + container_of(w, struct xe_vma, destroy_work); + + xe_vma_destroy_late(vma); +} + +static void vma_destroy_cb(struct dma_fence *fence, + struct dma_fence_cb *cb) +{ + struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); + + INIT_WORK(&vma->destroy_work, vma_destroy_work_func); + queue_work(system_unbound_wq, &vma->destroy_work); +} + +static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) +{ + struct xe_vm *vm = xe_vma_vm(vma); + + lockdep_assert_held_write(&vm->lock); + xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); + + if (xe_vma_is_userptr(vma)) { + xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); + + spin_lock(&vm->userptr.invalidated_lock); + list_del(&vma->userptr.invalidate_link); + spin_unlock(&vm->userptr.invalidated_lock); + } else if (!xe_vma_is_null(vma)) { + xe_bo_assert_held(xe_vma_bo(vma)); + + drm_gpuva_unlink(&vma->gpuva); + } + + xe_vm_assert_held(vm); + if (fence) { + int ret = dma_fence_add_callback(fence, &vma->destroy_cb, + vma_destroy_cb); + + if (ret) { + XE_WARN_ON(ret != -ENOENT); + xe_vma_destroy_late(vma); + } + } else { + xe_vma_destroy_late(vma); + } +} + +/** + * xe_vm_prepare_vma() - drm_exec utility to lock a vma + * @exec: The drm_exec object we're currently locking for. + * @vma: The vma for witch we want to lock the vm resv and any attached + * object's resv. + * @num_shared: The number of dma-fence slots to pre-allocate in the + * objects' reservation objects. + * + * Return: 0 on success, negative error code on error. In particular + * may return -EDEADLK on WW transaction contention and -EINTR if + * an interruptible wait is terminated by a signal. + */ +int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, + unsigned int num_shared) +{ + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_bo *bo = xe_vma_bo(vma); + int err; + + XE_WARN_ON(!vm); + err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared); + if (!err && bo && !bo->vm) + err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared); + + return err; +} + +static void xe_vma_destroy_unlocked(struct xe_vma *vma) +{ + struct drm_exec exec; + int err; + + drm_exec_init(&exec, 0, 0); + drm_exec_until_all_locked(&exec) { + err = xe_vm_prepare_vma(&exec, vma, 0); + drm_exec_retry_on_contention(&exec); + if (XE_WARN_ON(err)) + break; + } + + xe_vma_destroy(vma, NULL); + + drm_exec_fini(&exec); +} + +struct xe_vma * +xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) +{ + struct drm_gpuva *gpuva; + + lockdep_assert_held(&vm->lock); + + if (xe_vm_is_closed_or_banned(vm)) + return NULL; + + xe_assert(vm->xe, start + range <= vm->size); + + gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); + + return gpuva ? gpuva_to_vma(gpuva) : NULL; +} + +static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) +{ + int err; + + xe_assert(vm->xe, xe_vma_vm(vma) == vm); + lockdep_assert_held(&vm->lock); + + err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); + XE_WARN_ON(err); /* Shouldn't be possible */ + + return err; +} + +static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) +{ + xe_assert(vm->xe, xe_vma_vm(vma) == vm); + lockdep_assert_held(&vm->lock); + + drm_gpuva_remove(&vma->gpuva); + if (vm->usm.last_fault_vma == vma) + vm->usm.last_fault_vma = NULL; +} + +static struct drm_gpuva_op *xe_vm_op_alloc(void) +{ + struct xe_vma_op *op; + + op = kzalloc(sizeof(*op), GFP_KERNEL); + + if (unlikely(!op)) + return NULL; + + return &op->base; +} + +static void xe_vm_free(struct drm_gpuvm *gpuvm); + +static struct drm_gpuvm_ops gpuvm_ops = { + .op_alloc = xe_vm_op_alloc, + .vm_bo_validate = xe_gpuvm_validate, + .vm_free = xe_vm_free, +}; + +static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index) +{ + u64 pte = 0; + + if (pat_index & BIT(0)) + pte |= XE_PPGTT_PTE_PAT0; + + if (pat_index & BIT(1)) + pte |= XE_PPGTT_PTE_PAT1; + + return pte; +} + +static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index, + u32 pt_level) +{ + u64 pte = 0; + + if (pat_index & BIT(0)) + pte |= XE_PPGTT_PTE_PAT0; + + if (pat_index & BIT(1)) + pte |= XE_PPGTT_PTE_PAT1; + + if (pat_index & BIT(2)) { + if (pt_level) + pte |= XE_PPGTT_PDE_PDPE_PAT2; + else + pte |= XE_PPGTT_PTE_PAT2; + } + + if (pat_index & BIT(3)) + pte |= XELPG_PPGTT_PTE_PAT3; + + if (pat_index & (BIT(4))) + pte |= XE2_PPGTT_PTE_PAT4; + + return pte; +} + +static u64 pte_encode_ps(u32 pt_level) +{ + XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); + + if (pt_level == 1) + return XE_PDE_PS_2M; + else if (pt_level == 2) + return XE_PDPE_PS_1G; + + return 0; +} + +static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, + const u16 pat_index) +{ + struct xe_device *xe = xe_bo_device(bo); + u64 pde; + + pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); + pde |= XE_PAGE_PRESENT | XE_PAGE_RW; + pde |= pde_encode_pat_index(xe, pat_index); + + return pde; +} + +static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, + u16 pat_index, u32 pt_level) +{ + struct xe_device *xe = xe_bo_device(bo); + u64 pte; + + pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); + pte |= XE_PAGE_PRESENT | XE_PAGE_RW; + pte |= pte_encode_pat_index(xe, pat_index, pt_level); + pte |= pte_encode_ps(pt_level); + + if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) + pte |= XE_PPGTT_PTE_DM; + + return pte; +} + +static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, + u16 pat_index, u32 pt_level) +{ + struct xe_device *xe = xe_vma_vm(vma)->xe; + + pte |= XE_PAGE_PRESENT; + + if (likely(!xe_vma_read_only(vma))) + pte |= XE_PAGE_RW; + + pte |= pte_encode_pat_index(xe, pat_index, pt_level); + pte |= pte_encode_ps(pt_level); + + if (unlikely(xe_vma_is_null(vma))) + pte |= XE_PTE_NULL; + + return pte; +} + +static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, + u16 pat_index, + u32 pt_level, bool devmem, u64 flags) +{ + u64 pte; + + /* Avoid passing random bits directly as flags */ + xe_assert(xe, !(flags & ~XE_PTE_PS64)); + + pte = addr; + pte |= XE_PAGE_PRESENT | XE_PAGE_RW; + pte |= pte_encode_pat_index(xe, pat_index, pt_level); + pte |= pte_encode_ps(pt_level); + + if (devmem) + pte |= XE_PPGTT_PTE_DM; + + pte |= flags; + + return pte; +} + +static const struct xe_pt_ops xelp_pt_ops = { + .pte_encode_bo = xelp_pte_encode_bo, + .pte_encode_vma = xelp_pte_encode_vma, + .pte_encode_addr = xelp_pte_encode_addr, + .pde_encode_bo = xelp_pde_encode_bo, +}; + +static void vm_destroy_work_func(struct work_struct *w); + +/** + * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the + * given tile and vm. + * @xe: xe device. + * @tile: tile to set up for. + * @vm: vm to set up for. + * + * Sets up a pagetable tree with one page-table per level and a single + * leaf PTE. All pagetable entries point to the single page-table or, + * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and + * writes become NOPs. + * + * Return: 0 on success, negative error code on error. + */ +static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, + struct xe_vm *vm) +{ + u8 id = tile->id; + int i; + + for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { + vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); + if (IS_ERR(vm->scratch_pt[id][i])) + return PTR_ERR(vm->scratch_pt[id][i]); + + xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); + } + + return 0; +} + +static void xe_vm_free_scratch(struct xe_vm *vm) +{ + struct xe_tile *tile; + u8 id; + + if (!xe_vm_has_scratch(vm)) + return; + + for_each_tile(tile, vm->xe, id) { + u32 i; + + if (!vm->pt_root[id]) + continue; + + for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) + if (vm->scratch_pt[id][i]) + xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); + } +} + +struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) +{ + struct drm_gem_object *vm_resv_obj; + struct xe_vm *vm; + int err, number_tiles = 0; + struct xe_tile *tile; + u8 id; + + vm = kzalloc(sizeof(*vm), GFP_KERNEL); + if (!vm) + return ERR_PTR(-ENOMEM); + + vm->xe = xe; + + vm->size = 1ull << xe->info.va_bits; + + vm->flags = flags; + + init_rwsem(&vm->lock); + + INIT_LIST_HEAD(&vm->rebind_list); + + INIT_LIST_HEAD(&vm->userptr.repin_list); + INIT_LIST_HEAD(&vm->userptr.invalidated); + init_rwsem(&vm->userptr.notifier_lock); + spin_lock_init(&vm->userptr.invalidated_lock); + + INIT_WORK(&vm->destroy_work, vm_destroy_work_func); + + INIT_LIST_HEAD(&vm->preempt.exec_queues); + vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ + + for_each_tile(tile, xe, id) + xe_range_fence_tree_init(&vm->rftree[id]); + + vm->pt_ops = &xelp_pt_ops; + + if (!(flags & XE_VM_FLAG_MIGRATION)) + xe_device_mem_access_get(xe); + + vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); + if (!vm_resv_obj) { + err = -ENOMEM; + goto err_no_resv; + } + + drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, + vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); + + drm_gem_object_put(vm_resv_obj); + + err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); + if (err) + goto err_close; + + if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) + vm->flags |= XE_VM_FLAG_64K; + + for_each_tile(tile, xe, id) { + if (flags & XE_VM_FLAG_MIGRATION && + tile->id != XE_VM_FLAG_TILE_ID(flags)) + continue; + + vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); + if (IS_ERR(vm->pt_root[id])) { + err = PTR_ERR(vm->pt_root[id]); + vm->pt_root[id] = NULL; + goto err_unlock_close; + } + } + + if (xe_vm_has_scratch(vm)) { + for_each_tile(tile, xe, id) { + if (!vm->pt_root[id]) + continue; + + err = xe_vm_create_scratch(xe, tile, vm); + if (err) + goto err_unlock_close; + } + vm->batch_invalidate_tlb = true; + } + + if (flags & XE_VM_FLAG_LR_MODE) { + INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); + vm->flags |= XE_VM_FLAG_LR_MODE; + vm->batch_invalidate_tlb = false; + } + + /* Fill pt_root after allocating scratch tables */ + for_each_tile(tile, xe, id) { + if (!vm->pt_root[id]) + continue; + + xe_pt_populate_empty(tile, vm, vm->pt_root[id]); + } + dma_resv_unlock(xe_vm_resv(vm)); + + /* Kernel migration VM shouldn't have a circular loop.. */ + if (!(flags & XE_VM_FLAG_MIGRATION)) { + for_each_tile(tile, xe, id) { + struct xe_gt *gt = tile->primary_gt; + struct xe_vm *migrate_vm; + struct xe_exec_queue *q; + u32 create_flags = EXEC_QUEUE_FLAG_VM; + + if (!vm->pt_root[id]) + continue; + + migrate_vm = xe_migrate_get_vm(tile->migrate); + q = xe_exec_queue_create_class(xe, gt, migrate_vm, + XE_ENGINE_CLASS_COPY, + create_flags); + xe_vm_put(migrate_vm); + if (IS_ERR(q)) { + err = PTR_ERR(q); + goto err_close; + } + vm->q[id] = q; + number_tiles++; + } + } + + if (number_tiles > 1) + vm->composite_fence_ctx = dma_fence_context_alloc(1); + + mutex_lock(&xe->usm.lock); + if (flags & XE_VM_FLAG_FAULT_MODE) + xe->usm.num_vm_in_fault_mode++; + else if (!(flags & XE_VM_FLAG_MIGRATION)) + xe->usm.num_vm_in_non_fault_mode++; + mutex_unlock(&xe->usm.lock); + + trace_xe_vm_create(vm); + + return vm; + +err_unlock_close: + dma_resv_unlock(xe_vm_resv(vm)); +err_close: + xe_vm_close_and_put(vm); + return ERR_PTR(err); + +err_no_resv: + for_each_tile(tile, xe, id) + xe_range_fence_tree_fini(&vm->rftree[id]); + kfree(vm); + if (!(flags & XE_VM_FLAG_MIGRATION)) + xe_device_mem_access_put(xe); + return ERR_PTR(err); +} + +static void xe_vm_close(struct xe_vm *vm) +{ + down_write(&vm->lock); + vm->size = 0; + up_write(&vm->lock); +} + +void xe_vm_close_and_put(struct xe_vm *vm) +{ + LIST_HEAD(contested); + struct xe_device *xe = vm->xe; + struct xe_tile *tile; + struct xe_vma *vma, *next_vma; + struct drm_gpuva *gpuva, *next; + u8 id; + + xe_assert(xe, !vm->preempt.num_exec_queues); + + xe_vm_close(vm); + if (xe_vm_in_preempt_fence_mode(vm)) + flush_work(&vm->preempt.rebind_work); + + down_write(&vm->lock); + for_each_tile(tile, xe, id) { + if (vm->q[id]) + xe_exec_queue_last_fence_put(vm->q[id], vm); + } + up_write(&vm->lock); + + for_each_tile(tile, xe, id) { + if (vm->q[id]) { + xe_exec_queue_kill(vm->q[id]); + xe_exec_queue_put(vm->q[id]); + vm->q[id] = NULL; + } + } + + down_write(&vm->lock); + xe_vm_lock(vm, false); + drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { + vma = gpuva_to_vma(gpuva); + + if (xe_vma_has_no_bo(vma)) { + down_read(&vm->userptr.notifier_lock); + vma->gpuva.flags |= XE_VMA_DESTROYED; + up_read(&vm->userptr.notifier_lock); + } + + xe_vm_remove_vma(vm, vma); + + /* easy case, remove from VMA? */ + if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { + list_del_init(&vma->combined_links.rebind); + xe_vma_destroy(vma, NULL); + continue; + } + + list_move_tail(&vma->combined_links.destroy, &contested); + vma->gpuva.flags |= XE_VMA_DESTROYED; + } + + /* + * All vm operations will add shared fences to resv. + * The only exception is eviction for a shared object, + * but even so, the unbind when evicted would still + * install a fence to resv. Hence it's safe to + * destroy the pagetables immediately. + */ + xe_vm_free_scratch(vm); + + for_each_tile(tile, xe, id) { + if (vm->pt_root[id]) { + xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); + vm->pt_root[id] = NULL; + } + } + xe_vm_unlock(vm); + + /* + * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL + * Since we hold a refcount to the bo, we can remove and free + * the members safely without locking. + */ + list_for_each_entry_safe(vma, next_vma, &contested, + combined_links.destroy) { + list_del_init(&vma->combined_links.destroy); + xe_vma_destroy_unlocked(vma); + } + + up_write(&vm->lock); + + mutex_lock(&xe->usm.lock); + if (vm->flags & XE_VM_FLAG_FAULT_MODE) + xe->usm.num_vm_in_fault_mode--; + else if (!(vm->flags & XE_VM_FLAG_MIGRATION)) + xe->usm.num_vm_in_non_fault_mode--; + mutex_unlock(&xe->usm.lock); + + for_each_tile(tile, xe, id) + xe_range_fence_tree_fini(&vm->rftree[id]); + + xe_vm_put(vm); +} + +static void vm_destroy_work_func(struct work_struct *w) +{ + struct xe_vm *vm = + container_of(w, struct xe_vm, destroy_work); + struct xe_device *xe = vm->xe; + struct xe_tile *tile; + u8 id; + void *lookup; + + /* xe_vm_close_and_put was not called? */ + xe_assert(xe, !vm->size); + + if (!(vm->flags & XE_VM_FLAG_MIGRATION)) { + xe_device_mem_access_put(xe); + + if (xe->info.has_asid && vm->usm.asid) { + mutex_lock(&xe->usm.lock); + lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); + xe_assert(xe, lookup == vm); + mutex_unlock(&xe->usm.lock); + } + } + + for_each_tile(tile, xe, id) + XE_WARN_ON(vm->pt_root[id]); + + trace_xe_vm_free(vm); + dma_fence_put(vm->rebind_fence); + kfree(vm); +} + +static void xe_vm_free(struct drm_gpuvm *gpuvm) +{ + struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); + + /* To destroy the VM we need to be able to sleep */ + queue_work(system_unbound_wq, &vm->destroy_work); +} + +struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) +{ + struct xe_vm *vm; + + mutex_lock(&xef->vm.lock); + vm = xa_load(&xef->vm.xa, id); + if (vm) + xe_vm_get(vm); + mutex_unlock(&xef->vm.lock); + + return vm; +} + +u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) +{ + return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0, + tile_to_xe(tile)->pat.idx[XE_CACHE_WB]); +} + +static struct xe_exec_queue * +to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) +{ + return q ? q : vm->q[0]; +} + +static struct dma_fence * +xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, + struct xe_sync_entry *syncs, u32 num_syncs, + bool first_op, bool last_op) +{ + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); + struct xe_tile *tile; + struct dma_fence *fence = NULL; + struct dma_fence **fences = NULL; + struct dma_fence_array *cf = NULL; + int cur_fence = 0, i; + int number_tiles = hweight8(vma->tile_present); + int err; + u8 id; + + trace_xe_vma_unbind(vma); + + if (number_tiles > 1) { + fences = kmalloc_array(number_tiles, sizeof(*fences), + GFP_KERNEL); + if (!fences) + return ERR_PTR(-ENOMEM); + } + + for_each_tile(tile, vm->xe, id) { + if (!(vma->tile_present & BIT(id))) + goto next; + + fence = __xe_pt_unbind_vma(tile, vma, q ? q : vm->q[id], + first_op ? syncs : NULL, + first_op ? num_syncs : 0); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto err_fences; + } + + if (fences) + fences[cur_fence++] = fence; + +next: + if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list)) + q = list_next_entry(q, multi_gt_list); + } + + if (fences) { + cf = dma_fence_array_create(number_tiles, fences, + vm->composite_fence_ctx, + vm->composite_fence_seqno++, + false); + if (!cf) { + --vm->composite_fence_seqno; + err = -ENOMEM; + goto err_fences; + } + } + + fence = cf ? &cf->base : !fence ? + xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence; + if (last_op) { + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, fence); + } + + return fence; + +err_fences: + if (fences) { + while (cur_fence) + dma_fence_put(fences[--cur_fence]); + kfree(fences); + } + + return ERR_PTR(err); +} + +static struct dma_fence * +xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, + struct xe_sync_entry *syncs, u32 num_syncs, + bool first_op, bool last_op) +{ + struct xe_tile *tile; + struct dma_fence *fence; + struct dma_fence **fences = NULL; + struct dma_fence_array *cf = NULL; + struct xe_vm *vm = xe_vma_vm(vma); + int cur_fence = 0, i; + int number_tiles = hweight8(vma->tile_mask); + int err; + u8 id; + + trace_xe_vma_bind(vma); + + if (number_tiles > 1) { + fences = kmalloc_array(number_tiles, sizeof(*fences), + GFP_KERNEL); + if (!fences) + return ERR_PTR(-ENOMEM); + } + + for_each_tile(tile, vm->xe, id) { + if (!(vma->tile_mask & BIT(id))) + goto next; + + fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id], + first_op ? syncs : NULL, + first_op ? num_syncs : 0, + vma->tile_present & BIT(id)); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto err_fences; + } + + if (fences) + fences[cur_fence++] = fence; + +next: + if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list)) + q = list_next_entry(q, multi_gt_list); + } + + if (fences) { + cf = dma_fence_array_create(number_tiles, fences, + vm->composite_fence_ctx, + vm->composite_fence_seqno++, + false); + if (!cf) { + --vm->composite_fence_seqno; + err = -ENOMEM; + goto err_fences; + } + } + + if (last_op) { + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, + cf ? &cf->base : fence); + } + + return cf ? &cf->base : fence; + +err_fences: + if (fences) { + while (cur_fence) + dma_fence_put(fences[--cur_fence]); + kfree(fences); + } + + return ERR_PTR(err); +} + +static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, + struct xe_exec_queue *q, struct xe_sync_entry *syncs, + u32 num_syncs, bool immediate, bool first_op, + bool last_op) +{ + struct dma_fence *fence; + struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); + + xe_vm_assert_held(vm); + + if (immediate) { + fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op, + last_op); + if (IS_ERR(fence)) + return PTR_ERR(fence); + } else { + int i; + + xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); + + fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm); + if (last_op) { + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, fence); + } + } + + if (last_op) + xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); + dma_fence_put(fence); + + return 0; +} + +static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, + struct xe_bo *bo, struct xe_sync_entry *syncs, + u32 num_syncs, bool immediate, bool first_op, + bool last_op) +{ + int err; + + xe_vm_assert_held(vm); + xe_bo_assert_held(bo); + + if (bo && immediate) { + err = xe_bo_validate(bo, vm, true); + if (err) + return err; + } + + return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op, + last_op); +} + +static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, + struct xe_exec_queue *q, struct xe_sync_entry *syncs, + u32 num_syncs, bool first_op, bool last_op) +{ + struct dma_fence *fence; + struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); + + xe_vm_assert_held(vm); + xe_bo_assert_held(xe_vma_bo(vma)); + + fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op); + if (IS_ERR(fence)) + return PTR_ERR(fence); + + xe_vma_destroy(vma, fence); + if (last_op) + xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); + dma_fence_put(fence); + + return 0; +} + +#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ + DRM_XE_VM_CREATE_FLAG_LR_MODE | \ + DRM_XE_VM_CREATE_FLAG_FAULT_MODE) + +int xe_vm_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_vm_create *args = data; + struct xe_tile *tile; + struct xe_vm *vm; + u32 id, asid; + int err; + u32 flags = 0; + + if (XE_IOCTL_DBG(xe, args->extensions)) + return -EINVAL; + + if (XE_WA(xe_root_mmio_gt(xe), 14016763929)) + args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; + + if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && + !xe->info.has_usm)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && + args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && + args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && + xe_device_in_non_fault_mode(xe))) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) && + xe_device_in_fault_mode(xe))) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->extensions)) + return -EINVAL; + + if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) + flags |= XE_VM_FLAG_SCRATCH_PAGE; + if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) + flags |= XE_VM_FLAG_LR_MODE; + if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) + flags |= XE_VM_FLAG_FAULT_MODE; + + vm = xe_vm_create(xe, flags); + if (IS_ERR(vm)) + return PTR_ERR(vm); + + mutex_lock(&xef->vm.lock); + err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); + mutex_unlock(&xef->vm.lock); + if (err) { + xe_vm_close_and_put(vm); + return err; + } + + if (xe->info.has_asid) { + mutex_lock(&xe->usm.lock); + err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, + XA_LIMIT(1, XE_MAX_ASID - 1), + &xe->usm.next_asid, GFP_KERNEL); + mutex_unlock(&xe->usm.lock); + if (err < 0) { + xe_vm_close_and_put(vm); + return err; + } + err = 0; + vm->usm.asid = asid; + } + + args->vm_id = id; + vm->xef = xef; + + /* Record BO memory for VM pagetable created against client */ + for_each_tile(tile, xe, id) + if (vm->pt_root[id]) + xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo); + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) + /* Warning: Security issue - never enable by default */ + args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); +#endif + + return 0; +} + +int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_vm_destroy *args = data; + struct xe_vm *vm; + int err = 0; + + if (XE_IOCTL_DBG(xe, args->pad) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + + mutex_lock(&xef->vm.lock); + vm = xa_load(&xef->vm.xa, args->vm_id); + if (XE_IOCTL_DBG(xe, !vm)) + err = -ENOENT; + else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) + err = -EBUSY; + else + xa_erase(&xef->vm.xa, args->vm_id); + mutex_unlock(&xef->vm.lock); + + if (!err) + xe_vm_close_and_put(vm); + + return err; +} + +static const u32 region_to_mem_type[] = { + XE_PL_TT, + XE_PL_VRAM0, + XE_PL_VRAM1, +}; + +static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, + struct xe_exec_queue *q, u32 region, + struct xe_sync_entry *syncs, u32 num_syncs, + bool first_op, bool last_op) +{ + struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); + int err; + + xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); + + if (!xe_vma_has_no_bo(vma)) { + err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]); + if (err) + return err; + } + + if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) { + return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs, + true, first_op, last_op); + } else { + int i; + + /* Nothing to do, signal fences now */ + if (last_op) { + for (i = 0; i < num_syncs; i++) { + struct dma_fence *fence = + xe_exec_queue_last_fence_get(wait_exec_queue, vm); + + xe_sync_entry_signal(&syncs[i], NULL, fence); + } + } + + return 0; + } +} + +static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, + bool post_commit) +{ + down_read(&vm->userptr.notifier_lock); + vma->gpuva.flags |= XE_VMA_DESTROYED; + up_read(&vm->userptr.notifier_lock); + if (post_commit) + xe_vm_remove_vma(vm, vma); +} + +#undef ULL +#define ULL unsigned long long + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) +static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) +{ + struct xe_vma *vma; + + switch (op->op) { + case DRM_GPUVA_OP_MAP: + vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", + (ULL)op->map.va.addr, (ULL)op->map.va.range); + break; + case DRM_GPUVA_OP_REMAP: + vma = gpuva_to_vma(op->remap.unmap->va); + vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", + (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), + op->remap.unmap->keep ? 1 : 0); + if (op->remap.prev) + vm_dbg(&xe->drm, + "REMAP:PREV: addr=0x%016llx, range=0x%016llx", + (ULL)op->remap.prev->va.addr, + (ULL)op->remap.prev->va.range); + if (op->remap.next) + vm_dbg(&xe->drm, + "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", + (ULL)op->remap.next->va.addr, + (ULL)op->remap.next->va.range); + break; + case DRM_GPUVA_OP_UNMAP: + vma = gpuva_to_vma(op->unmap.va); + vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", + (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), + op->unmap.keep ? 1 : 0); + break; + case DRM_GPUVA_OP_PREFETCH: + vma = gpuva_to_vma(op->prefetch.va); + vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", + (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); + break; + default: + drm_warn(&xe->drm, "NOT POSSIBLE"); + } +} +#else +static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) +{ +} +#endif + +/* + * Create operations list from IOCTL arguments, setup operations fields so parse + * and commit steps are decoupled from IOCTL arguments. This step can fail. + */ +static struct drm_gpuva_ops * +vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, + u64 bo_offset_or_userptr, u64 addr, u64 range, + u32 operation, u32 flags, + u32 prefetch_region, u16 pat_index) +{ + struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; + struct drm_gpuva_ops *ops; + struct drm_gpuva_op *__op; + struct xe_vma_op *op; + struct drm_gpuvm_bo *vm_bo; + int err; + + lockdep_assert_held_write(&vm->lock); + + vm_dbg(&vm->xe->drm, + "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", + operation, (ULL)addr, (ULL)range, + (ULL)bo_offset_or_userptr); + + switch (operation) { + case DRM_XE_VM_BIND_OP_MAP: + case DRM_XE_VM_BIND_OP_MAP_USERPTR: + ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range, + obj, bo_offset_or_userptr); + break; + case DRM_XE_VM_BIND_OP_UNMAP: + ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); + break; + case DRM_XE_VM_BIND_OP_PREFETCH: + ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); + break; + case DRM_XE_VM_BIND_OP_UNMAP_ALL: + xe_assert(vm->xe, bo); + + err = xe_bo_lock(bo, true); + if (err) + return ERR_PTR(err); + + vm_bo = drm_gpuvm_bo_find(&vm->gpuvm, obj); + if (!vm_bo) + break; + + ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); + drm_gpuvm_bo_put(vm_bo); + xe_bo_unlock(bo); + break; + default: + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); + ops = ERR_PTR(-EINVAL); + } + if (IS_ERR(ops)) + return ops; + +#ifdef TEST_VM_ASYNC_OPS_ERROR + if (operation & FORCE_ASYNC_OP_ERROR) { + op = list_first_entry_or_null(&ops->list, struct xe_vma_op, + base.entry); + if (op) + op->inject_error = true; + } +#endif + + drm_gpuva_for_each_op(__op, ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); + + if (__op->op == DRM_GPUVA_OP_MAP) { + op->map.immediate = + flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; + op->map.read_only = + flags & DRM_XE_VM_BIND_FLAG_READONLY; + op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; + op->map.pat_index = pat_index; + } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { + op->prefetch.region = prefetch_region; + } + + print_op(vm->xe, __op); + } + + return ops; +} + +static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, + u16 pat_index, unsigned int flags) +{ + struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; + struct drm_exec exec; + struct xe_vma *vma; + int err; + + lockdep_assert_held_write(&vm->lock); + + if (bo) { + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_until_all_locked(&exec) { + err = 0; + if (!bo->vm) { + err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); + drm_exec_retry_on_contention(&exec); + } + if (!err) { + err = drm_exec_lock_obj(&exec, &bo->ttm.base); + drm_exec_retry_on_contention(&exec); + } + if (err) { + drm_exec_fini(&exec); + return ERR_PTR(err); + } + } + } + vma = xe_vma_create(vm, bo, op->gem.offset, + op->va.addr, op->va.addr + + op->va.range - 1, pat_index, flags); + if (bo) + drm_exec_fini(&exec); + + if (xe_vma_is_userptr(vma)) { + err = xe_vma_userptr_pin_pages(vma); + if (err) { + prep_vma_destroy(vm, vma, false); + xe_vma_destroy_unlocked(vma); + return ERR_PTR(err); + } + } else if (!xe_vma_has_no_bo(vma) && !bo->vm) { + err = add_preempt_fences(vm, bo); + if (err) { + prep_vma_destroy(vm, vma, false); + xe_vma_destroy_unlocked(vma); + return ERR_PTR(err); + } + } + + return vma; +} + +static u64 xe_vma_max_pte_size(struct xe_vma *vma) +{ + if (vma->gpuva.flags & XE_VMA_PTE_1G) + return SZ_1G; + else if (vma->gpuva.flags & XE_VMA_PTE_2M) + return SZ_2M; + + return SZ_4K; +} + +static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size) +{ + switch (size) { + case SZ_1G: + vma->gpuva.flags |= XE_VMA_PTE_1G; + break; + case SZ_2M: + vma->gpuva.flags |= XE_VMA_PTE_2M; + break; + } + + return SZ_4K; +} + +static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) +{ + int err = 0; + + lockdep_assert_held_write(&vm->lock); + + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + err |= xe_vm_insert_vma(vm, op->map.vma); + if (!err) + op->flags |= XE_VMA_OP_COMMITTED; + break; + case DRM_GPUVA_OP_REMAP: + { + u8 tile_present = + gpuva_to_vma(op->base.remap.unmap->va)->tile_present; + + prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), + true); + op->flags |= XE_VMA_OP_COMMITTED; + + if (op->remap.prev) { + err |= xe_vm_insert_vma(vm, op->remap.prev); + if (!err) + op->flags |= XE_VMA_OP_PREV_COMMITTED; + if (!err && op->remap.skip_prev) { + op->remap.prev->tile_present = + tile_present; + op->remap.prev = NULL; + } + } + if (op->remap.next) { + err |= xe_vm_insert_vma(vm, op->remap.next); + if (!err) + op->flags |= XE_VMA_OP_NEXT_COMMITTED; + if (!err && op->remap.skip_next) { + op->remap.next->tile_present = + tile_present; + op->remap.next = NULL; + } + } + + /* Adjust for partial unbind after removin VMA from VM */ + if (!err) { + op->base.remap.unmap->va->va.addr = op->remap.start; + op->base.remap.unmap->va->va.range = op->remap.range; + } + break; + } + case DRM_GPUVA_OP_UNMAP: + prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); + op->flags |= XE_VMA_OP_COMMITTED; + break; + case DRM_GPUVA_OP_PREFETCH: + op->flags |= XE_VMA_OP_COMMITTED; + break; + default: + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); + } + + return err; +} + + +static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, + struct drm_gpuva_ops *ops, + struct xe_sync_entry *syncs, u32 num_syncs, + struct list_head *ops_list, bool last) +{ + struct xe_vma_op *last_op = NULL; + struct drm_gpuva_op *__op; + int err = 0; + + lockdep_assert_held_write(&vm->lock); + + drm_gpuva_for_each_op(__op, ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); + struct xe_vma *vma; + bool first = list_empty(ops_list); + unsigned int flags = 0; + + INIT_LIST_HEAD(&op->link); + list_add_tail(&op->link, ops_list); + + if (first) { + op->flags |= XE_VMA_OP_FIRST; + op->num_syncs = num_syncs; + op->syncs = syncs; + } + + op->q = q; + + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + { + flags |= op->map.read_only ? + VMA_CREATE_FLAG_READ_ONLY : 0; + flags |= op->map.is_null ? + VMA_CREATE_FLAG_IS_NULL : 0; + + vma = new_vma(vm, &op->base.map, op->map.pat_index, + flags); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + op->map.vma = vma; + break; + } + case DRM_GPUVA_OP_REMAP: + { + struct xe_vma *old = + gpuva_to_vma(op->base.remap.unmap->va); + + op->remap.start = xe_vma_start(old); + op->remap.range = xe_vma_size(old); + + if (op->base.remap.prev) { + flags |= op->base.remap.unmap->va->flags & + XE_VMA_READ_ONLY ? + VMA_CREATE_FLAG_READ_ONLY : 0; + flags |= op->base.remap.unmap->va->flags & + DRM_GPUVA_SPARSE ? + VMA_CREATE_FLAG_IS_NULL : 0; + + vma = new_vma(vm, op->base.remap.prev, + old->pat_index, flags); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + op->remap.prev = vma; + + /* + * Userptr creates a new SG mapping so + * we must also rebind. + */ + op->remap.skip_prev = !xe_vma_is_userptr(old) && + IS_ALIGNED(xe_vma_end(vma), + xe_vma_max_pte_size(old)); + if (op->remap.skip_prev) { + xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); + op->remap.range -= + xe_vma_end(vma) - + xe_vma_start(old); + op->remap.start = xe_vma_end(vma); + } + } + + if (op->base.remap.next) { + flags |= op->base.remap.unmap->va->flags & + XE_VMA_READ_ONLY ? + VMA_CREATE_FLAG_READ_ONLY : 0; + flags |= op->base.remap.unmap->va->flags & + DRM_GPUVA_SPARSE ? + VMA_CREATE_FLAG_IS_NULL : 0; + + vma = new_vma(vm, op->base.remap.next, + old->pat_index, flags); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + op->remap.next = vma; + + /* + * Userptr creates a new SG mapping so + * we must also rebind. + */ + op->remap.skip_next = !xe_vma_is_userptr(old) && + IS_ALIGNED(xe_vma_start(vma), + xe_vma_max_pte_size(old)); + if (op->remap.skip_next) { + xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); + op->remap.range -= + xe_vma_end(old) - + xe_vma_start(vma); + } + } + break; + } + case DRM_GPUVA_OP_UNMAP: + case DRM_GPUVA_OP_PREFETCH: + /* Nothing to do */ + break; + default: + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); + } + + last_op = op; + + err = xe_vma_op_commit(vm, op); + if (err) + return err; + } + + /* FIXME: Unhandled corner case */ + XE_WARN_ON(!last_op && last && !list_empty(ops_list)); + + if (!last_op) + return 0; + + last_op->ops = ops; + if (last) { + last_op->flags |= XE_VMA_OP_LAST; + last_op->num_syncs = num_syncs; + last_op->syncs = syncs; + } + + return 0; +} + +static int op_execute(struct drm_exec *exec, struct xe_vm *vm, + struct xe_vma *vma, struct xe_vma_op *op) +{ + int err; + + lockdep_assert_held_write(&vm->lock); + + err = xe_vm_prepare_vma(exec, vma, 1); + if (err) + return err; + + xe_vm_assert_held(vm); + xe_bo_assert_held(xe_vma_bo(vma)); + + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), + op->syncs, op->num_syncs, + op->map.immediate || !xe_vm_in_fault_mode(vm), + op->flags & XE_VMA_OP_FIRST, + op->flags & XE_VMA_OP_LAST); + break; + case DRM_GPUVA_OP_REMAP: + { + bool prev = !!op->remap.prev; + bool next = !!op->remap.next; + + if (!op->remap.unmap_done) { + if (prev || next) + vma->gpuva.flags |= XE_VMA_FIRST_REBIND; + err = xe_vm_unbind(vm, vma, op->q, op->syncs, + op->num_syncs, + op->flags & XE_VMA_OP_FIRST, + op->flags & XE_VMA_OP_LAST && + !prev && !next); + if (err) + break; + op->remap.unmap_done = true; + } + + if (prev) { + op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND; + err = xe_vm_bind(vm, op->remap.prev, op->q, + xe_vma_bo(op->remap.prev), op->syncs, + op->num_syncs, true, false, + op->flags & XE_VMA_OP_LAST && !next); + op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND; + if (err) + break; + op->remap.prev = NULL; + } + + if (next) { + op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND; + err = xe_vm_bind(vm, op->remap.next, op->q, + xe_vma_bo(op->remap.next), + op->syncs, op->num_syncs, + true, false, + op->flags & XE_VMA_OP_LAST); + op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND; + if (err) + break; + op->remap.next = NULL; + } + + break; + } + case DRM_GPUVA_OP_UNMAP: + err = xe_vm_unbind(vm, vma, op->q, op->syncs, + op->num_syncs, op->flags & XE_VMA_OP_FIRST, + op->flags & XE_VMA_OP_LAST); + break; + case DRM_GPUVA_OP_PREFETCH: + err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region, + op->syncs, op->num_syncs, + op->flags & XE_VMA_OP_FIRST, + op->flags & XE_VMA_OP_LAST); + break; + default: + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); + } + + if (err) + trace_xe_vma_fail(vma); + + return err; +} + +static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma, + struct xe_vma_op *op) +{ + struct drm_exec exec; + int err; + +retry_userptr: + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_until_all_locked(&exec) { + err = op_execute(&exec, vm, vma, op); + drm_exec_retry_on_contention(&exec); + if (err) + break; + } + drm_exec_fini(&exec); + + if (err == -EAGAIN && xe_vma_is_userptr(vma)) { + lockdep_assert_held_write(&vm->lock); + err = xe_vma_userptr_pin_pages(vma); + if (!err) + goto retry_userptr; + + trace_xe_vma_fail(vma); + } + + return err; +} + +static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op) +{ + int ret = 0; + + lockdep_assert_held_write(&vm->lock); + +#ifdef TEST_VM_ASYNC_OPS_ERROR + if (op->inject_error) { + op->inject_error = false; + return -ENOMEM; + } +#endif + + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + ret = __xe_vma_op_execute(vm, op->map.vma, op); + break; + case DRM_GPUVA_OP_REMAP: + { + struct xe_vma *vma; + + if (!op->remap.unmap_done) + vma = gpuva_to_vma(op->base.remap.unmap->va); + else if (op->remap.prev) + vma = op->remap.prev; + else + vma = op->remap.next; + + ret = __xe_vma_op_execute(vm, vma, op); + break; + } + case DRM_GPUVA_OP_UNMAP: + ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va), + op); + break; + case DRM_GPUVA_OP_PREFETCH: + ret = __xe_vma_op_execute(vm, + gpuva_to_vma(op->base.prefetch.va), + op); + break; + default: + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); + } + + return ret; +} + +static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op) +{ + bool last = op->flags & XE_VMA_OP_LAST; + + if (last) { + while (op->num_syncs--) + xe_sync_entry_cleanup(&op->syncs[op->num_syncs]); + kfree(op->syncs); + if (op->q) + xe_exec_queue_put(op->q); + } + if (!list_empty(&op->link)) + list_del(&op->link); + if (op->ops) + drm_gpuva_ops_free(&vm->gpuvm, op->ops); + if (last) + xe_vm_put(vm); +} + +static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, + bool post_commit, bool prev_post_commit, + bool next_post_commit) +{ + lockdep_assert_held_write(&vm->lock); + + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + if (op->map.vma) { + prep_vma_destroy(vm, op->map.vma, post_commit); + xe_vma_destroy_unlocked(op->map.vma); + } + break; + case DRM_GPUVA_OP_UNMAP: + { + struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); + + if (vma) { + down_read(&vm->userptr.notifier_lock); + vma->gpuva.flags &= ~XE_VMA_DESTROYED; + up_read(&vm->userptr.notifier_lock); + if (post_commit) + xe_vm_insert_vma(vm, vma); + } + break; + } + case DRM_GPUVA_OP_REMAP: + { + struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); + + if (op->remap.prev) { + prep_vma_destroy(vm, op->remap.prev, prev_post_commit); + xe_vma_destroy_unlocked(op->remap.prev); + } + if (op->remap.next) { + prep_vma_destroy(vm, op->remap.next, next_post_commit); + xe_vma_destroy_unlocked(op->remap.next); + } + if (vma) { + down_read(&vm->userptr.notifier_lock); + vma->gpuva.flags &= ~XE_VMA_DESTROYED; + up_read(&vm->userptr.notifier_lock); + if (post_commit) + xe_vm_insert_vma(vm, vma); + } + break; + } + case DRM_GPUVA_OP_PREFETCH: + /* Nothing to do */ + break; + default: + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); + } +} + +static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, + struct drm_gpuva_ops **ops, + int num_ops_list) +{ + int i; + + for (i = num_ops_list - 1; i; ++i) { + struct drm_gpuva_ops *__ops = ops[i]; + struct drm_gpuva_op *__op; + + if (!__ops) + continue; + + drm_gpuva_for_each_op_reverse(__op, __ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); + + xe_vma_op_unwind(vm, op, + op->flags & XE_VMA_OP_COMMITTED, + op->flags & XE_VMA_OP_PREV_COMMITTED, + op->flags & XE_VMA_OP_NEXT_COMMITTED); + } + + drm_gpuva_ops_free(&vm->gpuvm, __ops); + } +} + +static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, + struct list_head *ops_list) +{ + struct xe_vma_op *op, *next; + int err; + + lockdep_assert_held_write(&vm->lock); + + list_for_each_entry_safe(op, next, ops_list, link) { + err = xe_vma_op_execute(vm, op); + if (err) { + drm_warn(&vm->xe->drm, "VM op(%d) failed with %d", + op->base.op, err); + /* + * FIXME: Killing VM rather than proper error handling + */ + xe_vm_kill(vm); + return -ENOSPC; + } + xe_vma_op_cleanup(vm, op); + } + + return 0; +} + +#ifdef TEST_VM_ASYNC_OPS_ERROR +#define SUPPORTED_FLAGS \ + (FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_READONLY | \ + DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | 0xffff) +#else +#define SUPPORTED_FLAGS \ + (DRM_XE_VM_BIND_FLAG_READONLY | \ + DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \ + 0xffff) +#endif +#define XE_64K_PAGE_MASK 0xffffull +#define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) + +#define MAX_BINDS 512 /* FIXME: Picking random upper limit */ + +static int vm_bind_ioctl_check_args(struct xe_device *xe, + struct drm_xe_vm_bind *args, + struct drm_xe_vm_bind_op **bind_ops) +{ + int err; + int i; + + if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->extensions) || + XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS)) + return -EINVAL; + + if (args->num_binds > 1) { + u64 __user *bind_user = + u64_to_user_ptr(args->vector_of_binds); + + *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) * + args->num_binds, GFP_KERNEL); + if (!*bind_ops) + return -ENOMEM; + + err = __copy_from_user(*bind_ops, bind_user, + sizeof(struct drm_xe_vm_bind_op) * + args->num_binds); + if (XE_IOCTL_DBG(xe, err)) { + err = -EFAULT; + goto free_bind_ops; + } + } else { + *bind_ops = &args->bind; + } + + for (i = 0; i < args->num_binds; ++i) { + u64 range = (*bind_ops)[i].range; + u64 addr = (*bind_ops)[i].addr; + u32 op = (*bind_ops)[i].op; + u32 flags = (*bind_ops)[i].flags; + u32 obj = (*bind_ops)[i].obj; + u64 obj_offset = (*bind_ops)[i].obj_offset; + u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; + bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; + u16 pat_index = (*bind_ops)[i].pat_index; + u16 coh_mode; + + if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { + err = -EINVAL; + goto free_bind_ops; + } + + pat_index = array_index_nospec(pat_index, xe->pat.n_entries); + (*bind_ops)[i].pat_index = pat_index; + coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); + if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ + err = -EINVAL; + goto free_bind_ops; + } + + if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { + err = -EINVAL; + goto free_bind_ops; + } + + if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || + XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || + XE_IOCTL_DBG(xe, obj && is_null) || + XE_IOCTL_DBG(xe, obj_offset && is_null) || + XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && + is_null) || + XE_IOCTL_DBG(xe, !obj && + op == DRM_XE_VM_BIND_OP_MAP && + !is_null) || + XE_IOCTL_DBG(xe, !obj && + op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || + XE_IOCTL_DBG(xe, addr && + op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || + XE_IOCTL_DBG(xe, range && + op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || + XE_IOCTL_DBG(xe, obj && + op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || + XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && + op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || + XE_IOCTL_DBG(xe, obj && + op == DRM_XE_VM_BIND_OP_PREFETCH) || + XE_IOCTL_DBG(xe, prefetch_region && + op != DRM_XE_VM_BIND_OP_PREFETCH) || + XE_IOCTL_DBG(xe, !(BIT(prefetch_region) & + xe->info.mem_region_mask)) || + XE_IOCTL_DBG(xe, obj && + op == DRM_XE_VM_BIND_OP_UNMAP)) { + err = -EINVAL; + goto free_bind_ops; + } + + if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || + XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || + XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || + XE_IOCTL_DBG(xe, !range && + op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { + err = -EINVAL; + goto free_bind_ops; + } + } + + return 0; + +free_bind_ops: + if (args->num_binds > 1) + kfree(*bind_ops); + return err; +} + +static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, + struct xe_exec_queue *q, + struct xe_sync_entry *syncs, + int num_syncs) +{ + struct dma_fence *fence; + int i, err = 0; + + fence = xe_sync_in_fence_get(syncs, num_syncs, + to_wait_exec_queue(vm, q), vm); + if (IS_ERR(fence)) + return PTR_ERR(fence); + + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, fence); + + xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, + fence); + dma_fence_put(fence); + + return err; +} + +int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_vm_bind *args = data; + struct drm_xe_sync __user *syncs_user; + struct xe_bo **bos = NULL; + struct drm_gpuva_ops **ops = NULL; + struct xe_vm *vm; + struct xe_exec_queue *q = NULL; + u32 num_syncs; + struct xe_sync_entry *syncs = NULL; + struct drm_xe_vm_bind_op *bind_ops; + LIST_HEAD(ops_list); + int err; + int i; + + err = vm_bind_ioctl_check_args(xe, args, &bind_ops); + if (err) + return err; + + if (args->exec_queue_id) { + q = xe_exec_queue_lookup(xef, args->exec_queue_id); + if (XE_IOCTL_DBG(xe, !q)) { + err = -ENOENT; + goto free_objs; + } + + if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { + err = -EINVAL; + goto put_exec_queue; + } + } + + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_DBG(xe, !vm)) { + err = -EINVAL; + goto put_exec_queue; + } + + err = down_write_killable(&vm->lock); + if (err) + goto put_vm; + + if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { + err = -ENOENT; + goto release_vm_lock; + } + + for (i = 0; i < args->num_binds; ++i) { + u64 range = bind_ops[i].range; + u64 addr = bind_ops[i].addr; + + if (XE_IOCTL_DBG(xe, range > vm->size) || + XE_IOCTL_DBG(xe, addr > vm->size - range)) { + err = -EINVAL; + goto release_vm_lock; + } + } + + if (args->num_binds) { + bos = kcalloc(args->num_binds, sizeof(*bos), GFP_KERNEL); + if (!bos) { + err = -ENOMEM; + goto release_vm_lock; + } + + ops = kcalloc(args->num_binds, sizeof(*ops), GFP_KERNEL); + if (!ops) { + err = -ENOMEM; + goto release_vm_lock; + } + } + + for (i = 0; i < args->num_binds; ++i) { + struct drm_gem_object *gem_obj; + u64 range = bind_ops[i].range; + u64 addr = bind_ops[i].addr; + u32 obj = bind_ops[i].obj; + u64 obj_offset = bind_ops[i].obj_offset; + u16 pat_index = bind_ops[i].pat_index; + u16 coh_mode; + + if (!obj) + continue; + + gem_obj = drm_gem_object_lookup(file, obj); + if (XE_IOCTL_DBG(xe, !gem_obj)) { + err = -ENOENT; + goto put_obj; + } + bos[i] = gem_to_xe_bo(gem_obj); + + if (XE_IOCTL_DBG(xe, range > bos[i]->size) || + XE_IOCTL_DBG(xe, obj_offset > + bos[i]->size - range)) { + err = -EINVAL; + goto put_obj; + } + + if (bos[i]->flags & XE_BO_INTERNAL_64K) { + if (XE_IOCTL_DBG(xe, obj_offset & + XE_64K_PAGE_MASK) || + XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || + XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { + err = -EINVAL; + goto put_obj; + } + } + + coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); + if (bos[i]->cpu_caching) { + if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && + bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { + err = -EINVAL; + goto put_obj; + } + } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { + /* + * Imported dma-buf from a different device should + * require 1way or 2way coherency since we don't know + * how it was mapped on the CPU. Just assume is it + * potentially cached on CPU side. + */ + err = -EINVAL; + goto put_obj; + } + } + + if (args->num_syncs) { + syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); + if (!syncs) { + err = -ENOMEM; + goto put_obj; + } + } + + syncs_user = u64_to_user_ptr(args->syncs); + for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { + err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], + &syncs_user[num_syncs], + (xe_vm_in_lr_mode(vm) ? + SYNC_PARSE_FLAG_LR_MODE : 0) | + (!args->num_binds ? + SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); + if (err) + goto free_syncs; + } + + if (!args->num_binds) { + err = -ENODATA; + goto free_syncs; + } + + for (i = 0; i < args->num_binds; ++i) { + u64 range = bind_ops[i].range; + u64 addr = bind_ops[i].addr; + u32 op = bind_ops[i].op; + u32 flags = bind_ops[i].flags; + u64 obj_offset = bind_ops[i].obj_offset; + u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; + u16 pat_index = bind_ops[i].pat_index; + + ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, + addr, range, op, flags, + prefetch_region, pat_index); + if (IS_ERR(ops[i])) { + err = PTR_ERR(ops[i]); + ops[i] = NULL; + goto unwind_ops; + } + + err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs, + &ops_list, + i == args->num_binds - 1); + if (err) + goto unwind_ops; + } + + /* Nothing to do */ + if (list_empty(&ops_list)) { + err = -ENODATA; + goto unwind_ops; + } + + xe_vm_get(vm); + if (q) + xe_exec_queue_get(q); + + err = vm_bind_ioctl_ops_execute(vm, &ops_list); + + up_write(&vm->lock); + + if (q) + xe_exec_queue_put(q); + xe_vm_put(vm); + + for (i = 0; bos && i < args->num_binds; ++i) + xe_bo_put(bos[i]); + + kfree(bos); + kfree(ops); + if (args->num_binds > 1) + kfree(bind_ops); + + return err; + +unwind_ops: + vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); +free_syncs: + if (err == -ENODATA) + err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); + while (num_syncs--) + xe_sync_entry_cleanup(&syncs[num_syncs]); + + kfree(syncs); +put_obj: + for (i = 0; i < args->num_binds; ++i) + xe_bo_put(bos[i]); +release_vm_lock: + up_write(&vm->lock); +put_vm: + xe_vm_put(vm); +put_exec_queue: + if (q) + xe_exec_queue_put(q); +free_objs: + kfree(bos); + kfree(ops); + if (args->num_binds > 1) + kfree(bind_ops); + return err; +} + +/** + * xe_vm_lock() - Lock the vm's dma_resv object + * @vm: The struct xe_vm whose lock is to be locked + * @intr: Whether to perform any wait interruptible + * + * Return: 0 on success, -EINTR if @intr is true and the wait for a + * contended lock was interrupted. If @intr is false, the function + * always returns 0. + */ +int xe_vm_lock(struct xe_vm *vm, bool intr) +{ + if (intr) + return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); + + return dma_resv_lock(xe_vm_resv(vm), NULL); +} + +/** + * xe_vm_unlock() - Unlock the vm's dma_resv object + * @vm: The struct xe_vm whose lock is to be released. + * + * Unlock a buffer object lock that was locked by xe_vm_lock(). + */ +void xe_vm_unlock(struct xe_vm *vm) +{ + dma_resv_unlock(xe_vm_resv(vm)); +} + +/** + * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock + * @vma: VMA to invalidate + * + * Walks a list of page tables leaves which it memset the entries owned by this + * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is + * complete. + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_vm_invalidate_vma(struct xe_vma *vma) +{ + struct xe_device *xe = xe_vma_vm(vma)->xe; + struct xe_tile *tile; + u32 tile_needs_invalidate = 0; + int seqno[XE_MAX_TILES_PER_DEVICE]; + u8 id; + int ret; + + xe_assert(xe, xe_vm_in_fault_mode(xe_vma_vm(vma))); + xe_assert(xe, !xe_vma_is_null(vma)); + trace_xe_vma_usm_invalidate(vma); + + /* Check that we don't race with page-table updates */ + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + if (xe_vma_is_userptr(vma)) { + WARN_ON_ONCE(!mmu_interval_check_retry + (&vma->userptr.notifier, + vma->userptr.notifier_seq)); + WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), + DMA_RESV_USAGE_BOOKKEEP)); + + } else { + xe_bo_assert_held(xe_vma_bo(vma)); + } + } + + for_each_tile(tile, xe, id) { + if (xe_pt_zap_ptes(tile, vma)) { + tile_needs_invalidate |= BIT(id); + xe_device_wmb(xe); + /* + * FIXME: We potentially need to invalidate multiple + * GTs within the tile + */ + seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma); + if (seqno[id] < 0) + return seqno[id]; + } + } + + for_each_tile(tile, xe, id) { + if (tile_needs_invalidate & BIT(id)) { + ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]); + if (ret < 0) + return ret; + } + } + + vma->usm.tile_invalidated = vma->tile_mask; + + return 0; +} + +int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) +{ + struct drm_gpuva *gpuva; + bool is_vram; + uint64_t addr; + + if (!down_read_trylock(&vm->lock)) { + drm_printf(p, " Failed to acquire VM lock to dump capture"); + return 0; + } + if (vm->pt_root[gt_id]) { + addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE); + is_vram = xe_bo_is_vram(vm->pt_root[gt_id]->bo); + drm_printf(p, " VM root: A:0x%llx %s\n", addr, + is_vram ? "VRAM" : "SYS"); + } + + drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { + struct xe_vma *vma = gpuva_to_vma(gpuva); + bool is_userptr = xe_vma_is_userptr(vma); + bool is_null = xe_vma_is_null(vma); + + if (is_null) { + addr = 0; + } else if (is_userptr) { + struct xe_res_cursor cur; + + if (vma->userptr.sg) { + xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE, + &cur); + addr = xe_res_dma(&cur); + } else { + addr = 0; + } + } else { + addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE); + is_vram = xe_bo_is_vram(xe_vma_bo(vma)); + } + drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n", + xe_vma_start(vma), xe_vma_end(vma) - 1, + xe_vma_size(vma), + addr, is_null ? "NULL" : is_userptr ? "USR" : + is_vram ? "VRAM" : "SYS"); + } + up_read(&vm->lock); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h new file mode 100644 index 000000000000..cf2f96e8c1ab --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -0,0 +1,263 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_VM_H_ +#define _XE_VM_H_ + +#include "xe_bo_types.h" +#include "xe_macros.h" +#include "xe_map.h" +#include "xe_vm_types.h" + +struct drm_device; +struct drm_printer; +struct drm_file; + +struct ttm_buffer_object; +struct ttm_validate_buffer; + +struct xe_exec_queue; +struct xe_file; +struct xe_sync_entry; +struct drm_exec; + +struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags); + +struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id); +int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node); + +static inline struct xe_vm *xe_vm_get(struct xe_vm *vm) +{ + drm_gpuvm_get(&vm->gpuvm); + return vm; +} + +static inline void xe_vm_put(struct xe_vm *vm) +{ + drm_gpuvm_put(&vm->gpuvm); +} + +int xe_vm_lock(struct xe_vm *vm, bool intr); + +void xe_vm_unlock(struct xe_vm *vm); + +static inline bool xe_vm_is_closed(struct xe_vm *vm) +{ + /* Only guaranteed not to change when vm->lock is held */ + return !vm->size; +} + +static inline bool xe_vm_is_banned(struct xe_vm *vm) +{ + return vm->flags & XE_VM_FLAG_BANNED; +} + +static inline bool xe_vm_is_closed_or_banned(struct xe_vm *vm) +{ + lockdep_assert_held(&vm->lock); + return xe_vm_is_closed(vm) || xe_vm_is_banned(vm); +} + +struct xe_vma * +xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range); + +/** + * xe_vm_has_scratch() - Whether the vm is configured for scratch PTEs + * @vm: The vm + * + * Return: whether the vm populates unmapped areas with scratch PTEs + */ +static inline bool xe_vm_has_scratch(const struct xe_vm *vm) +{ + return vm->flags & XE_VM_FLAG_SCRATCH_PAGE; +} + +/** + * gpuvm_to_vm() - Return the embedding xe_vm from a struct drm_gpuvm pointer + * @gpuvm: The struct drm_gpuvm pointer + * + * Return: Pointer to the embedding struct xe_vm. + */ +static inline struct xe_vm *gpuvm_to_vm(struct drm_gpuvm *gpuvm) +{ + return container_of(gpuvm, struct xe_vm, gpuvm); +} + +static inline struct xe_vm *gpuva_to_vm(struct drm_gpuva *gpuva) +{ + return gpuvm_to_vm(gpuva->vm); +} + +static inline struct xe_vma *gpuva_to_vma(struct drm_gpuva *gpuva) +{ + return container_of(gpuva, struct xe_vma, gpuva); +} + +static inline struct xe_vma_op *gpuva_op_to_vma_op(struct drm_gpuva_op *op) +{ + return container_of(op, struct xe_vma_op, base); +} + +/** + * DOC: Provide accessors for vma members to facilitate easy change of + * implementation. + */ +static inline u64 xe_vma_start(struct xe_vma *vma) +{ + return vma->gpuva.va.addr; +} + +static inline u64 xe_vma_size(struct xe_vma *vma) +{ + return vma->gpuva.va.range; +} + +static inline u64 xe_vma_end(struct xe_vma *vma) +{ + return xe_vma_start(vma) + xe_vma_size(vma); +} + +static inline u64 xe_vma_bo_offset(struct xe_vma *vma) +{ + return vma->gpuva.gem.offset; +} + +static inline struct xe_bo *xe_vma_bo(struct xe_vma *vma) +{ + return !vma->gpuva.gem.obj ? NULL : + container_of(vma->gpuva.gem.obj, struct xe_bo, ttm.base); +} + +static inline struct xe_vm *xe_vma_vm(struct xe_vma *vma) +{ + return container_of(vma->gpuva.vm, struct xe_vm, gpuvm); +} + +static inline bool xe_vma_read_only(struct xe_vma *vma) +{ + return vma->gpuva.flags & XE_VMA_READ_ONLY; +} + +static inline u64 xe_vma_userptr(struct xe_vma *vma) +{ + return vma->gpuva.gem.offset; +} + +static inline bool xe_vma_is_null(struct xe_vma *vma) +{ + return vma->gpuva.flags & DRM_GPUVA_SPARSE; +} + +static inline bool xe_vma_has_no_bo(struct xe_vma *vma) +{ + return !xe_vma_bo(vma); +} + +static inline bool xe_vma_is_userptr(struct xe_vma *vma) +{ + return xe_vma_has_no_bo(vma) && !xe_vma_is_null(vma); +} + +u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile); + +int xe_vm_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_vm_bind_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +void xe_vm_close_and_put(struct xe_vm *vm); + +static inline bool xe_vm_in_fault_mode(struct xe_vm *vm) +{ + return vm->flags & XE_VM_FLAG_FAULT_MODE; +} + +static inline bool xe_vm_in_lr_mode(struct xe_vm *vm) +{ + return vm->flags & XE_VM_FLAG_LR_MODE; +} + +static inline bool xe_vm_in_preempt_fence_mode(struct xe_vm *vm) +{ + return xe_vm_in_lr_mode(vm) && !xe_vm_in_fault_mode(vm); +} + +int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); +void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); + +int xe_vm_userptr_pin(struct xe_vm *vm); + +int __xe_vm_userptr_needs_repin(struct xe_vm *vm); + +int xe_vm_userptr_check_repin(struct xe_vm *vm); + +struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); + +int xe_vm_invalidate_vma(struct xe_vma *vma); + +extern struct ttm_device_funcs xe_ttm_funcs; + +static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm) +{ + xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); + queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); +} + +/** + * xe_vm_reactivate_rebind() - Reactivate the rebind functionality on compute + * vms. + * @vm: The vm. + * + * If the rebind functionality on a compute vm was disabled due + * to nothing to execute. Reactivate it and run the rebind worker. + * This function should be called after submitting a batch to a compute vm. + */ +static inline void xe_vm_reactivate_rebind(struct xe_vm *vm) +{ + if (xe_vm_in_preempt_fence_mode(vm) && vm->preempt.rebind_deactivated) { + vm->preempt.rebind_deactivated = false; + xe_vm_queue_rebind_worker(vm); + } +} + +int xe_vma_userptr_pin_pages(struct xe_vma *vma); + +int xe_vma_userptr_check_repin(struct xe_vma *vma); + +bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end); + +int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id); + +int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, + unsigned int num_shared); + +/** + * xe_vm_resv() - Return's the vm's reservation object + * @vm: The vm + * + * Return: Pointer to the vm's reservation object. + */ +static inline struct dma_resv *xe_vm_resv(struct xe_vm *vm) +{ + return drm_gpuvm_resv(&vm->gpuvm); +} + +/** + * xe_vm_assert_held(vm) - Assert that the vm's reservation object is held. + * @vm: The vm + */ +#define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm)) + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) +#define vm_dbg drm_dbg +#else +__printf(2, 3) +static inline void vm_dbg(const struct drm_device *dev, + const char *format, ...) +{ /* noop */ } +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h new file mode 100644 index 000000000000..bdc6659891a5 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm_doc.h @@ -0,0 +1,555 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_VM_DOC_H_ +#define _XE_VM_DOC_H_ + +/** + * DOC: XE VM (user address space) + * + * VM creation + * =========== + * + * Allocate a physical page for root of the page table structure, create default + * bind engine, and return a handle to the user. + * + * Scratch page + * ------------ + * + * If the VM is created with the flag, DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE, set the + * entire page table structure defaults pointing to blank page allocated by the + * VM. Invalid memory access rather than fault just read / write to this page. + * + * VM bind (create GPU mapping for a BO or userptr) + * ================================================ + * + * Creates GPU mapings for a BO or userptr within a VM. VM binds uses the same + * in / out fence interface (struct drm_xe_sync) as execs which allows users to + * think of binds and execs as more or less the same operation. + * + * Operations + * ---------- + * + * DRM_XE_VM_BIND_OP_MAP - Create mapping for a BO + * DRM_XE_VM_BIND_OP_UNMAP - Destroy mapping for a BO / userptr + * DRM_XE_VM_BIND_OP_MAP_USERPTR - Create mapping for userptr + * + * Implementation details + * ~~~~~~~~~~~~~~~~~~~~~~ + * + * All bind operations are implemented via a hybrid approach of using the CPU + * and GPU to modify page tables. If a new physical page is allocated in the + * page table structure we populate that page via the CPU and insert that new + * page into the existing page table structure via a GPU job. Also any existing + * pages in the page table structure that need to be modified also are updated + * via the GPU job. As the root physical page is prealloced on VM creation our + * GPU job will always have at least 1 update. The in / out fences are passed to + * this job so again this is conceptually the same as an exec. + * + * Very simple example of few binds on an empty VM with 48 bits of address space + * and the resulting operations: + * + * .. code-block:: + * + * bind BO0 0x0-0x1000 + * alloc page level 3a, program PTE[0] to BO0 phys address (CPU) + * alloc page level 2, program PDE[0] page level 3a phys address (CPU) + * alloc page level 1, program PDE[0] page level 2 phys address (CPU) + * update root PDE[0] to page level 1 phys address (GPU) + * + * bind BO1 0x201000-0x202000 + * alloc page level 3b, program PTE[1] to BO1 phys address (CPU) + * update page level 2 PDE[1] to page level 3b phys address (GPU) + * + * bind BO2 0x1ff000-0x201000 + * update page level 3a PTE[511] to BO2 phys addres (GPU) + * update page level 3b PTE[0] to BO2 phys addres + 0x1000 (GPU) + * + * GPU bypass + * ~~~~~~~~~~ + * + * In the above example the steps using the GPU can be converted to CPU if the + * bind can be done immediately (all in-fences satisfied, VM dma-resv kernel + * slot is idle). + * + * Address space + * ------------- + * + * Depending on platform either 48 or 57 bits of address space is supported. + * + * Page sizes + * ---------- + * + * The minimum page size is either 4k or 64k depending on platform and memory + * placement (sysmem vs. VRAM). We enforce that binds must be aligned to the + * minimum page size. + * + * Larger pages (2M or 1GB) can be used for BOs in VRAM, the BO physical address + * is aligned to the larger pages size, and VA is aligned to the larger page + * size. Larger pages for userptrs / BOs in sysmem should be possible but is not + * yet implemented. + * + * Sync error handling mode + * ------------------------ + * + * In both modes during the bind IOCTL the user input is validated. In sync + * error handling mode the newly bound BO is validated (potentially moved back + * to a region of memory where is can be used), page tables are updated by the + * CPU and the job to do the GPU binds is created in the IOCTL itself. This step + * can fail due to memory pressure. The user can recover by freeing memory and + * trying this operation again. + * + * Async error handling mode + * ------------------------- + * + * In async error handling the step of validating the BO, updating page tables, + * and generating a job are deferred to an async worker. As this step can now + * fail after the IOCTL has reported success we need an error handling flow for + * which the user can recover from. + * + * The solution is for a user to register a user address with the VM which the + * VM uses to report errors to. The ufence wait interface can be used to wait on + * a VM going into an error state. Once an error is reported the VM's async + * worker is paused. While the VM's async worker is paused sync, + * DRM_XE_VM_BIND_OP_UNMAP operations are allowed (this can free memory). Once the + * uses believe the error state is fixed, the async worker can be resumed via + * XE_VM_BIND_OP_RESTART operation. When VM async bind work is restarted, the + * first operation processed is the operation that caused the original error. + * + * Bind queues / engines + * --------------------- + * + * Think of the case where we have two bind operations A + B and are submitted + * in that order. A has in fences while B has none. If using a single bind + * queue, B is now blocked on A's in fences even though it is ready to run. This + * example is a real use case for VK sparse binding. We work around this + * limitation by implementing bind engines. + * + * In the bind IOCTL the user can optionally pass in an engine ID which must map + * to an engine which is of the special class DRM_XE_ENGINE_CLASS_VM_BIND. + * Underneath this is a really virtual engine that can run on any of the copy + * hardware engines. The job(s) created each IOCTL are inserted into this + * engine's ring. In the example above if A and B have different bind engines B + * is free to pass A. If the engine ID field is omitted, the default bind queue + * for the VM is used. + * + * TODO: Explain race in issue 41 and how we solve it + * + * Array of bind operations + * ------------------------ + * + * The uAPI allows multiple binds operations to be passed in via a user array, + * of struct drm_xe_vm_bind_op, in a single VM bind IOCTL. This interface + * matches the VK sparse binding API. The implementation is rather simple, parse + * the array into a list of operations, pass the in fences to the first operation, + * and pass the out fences to the last operation. The ordered nature of a bind + * engine makes this possible. + * + * Munmap semantics for unbinds + * ---------------------------- + * + * Munmap allows things like: + * + * .. code-block:: + * + * 0x0000-0x2000 and 0x3000-0x5000 have mappings + * Munmap 0x1000-0x4000, results in mappings 0x0000-0x1000 and 0x4000-0x5000 + * + * To support this semantic in the above example we decompose the above example + * into 4 operations: + * + * .. code-block:: + * + * unbind 0x0000-0x2000 + * unbind 0x3000-0x5000 + * rebind 0x0000-0x1000 + * rebind 0x4000-0x5000 + * + * Why not just do a partial unbind of 0x1000-0x2000 and 0x3000-0x4000? This + * falls apart when using large pages at the edges and the unbind forces us to + * use a smaller page size. For simplity we always issue a set of unbinds + * unmapping anything in the range and at most 2 rebinds on the edges. + * + * Similar to an array of binds, in fences are passed to the first operation and + * out fences are signaled on the last operation. + * + * In this example there is a window of time where 0x0000-0x1000 and + * 0x4000-0x5000 are invalid but the user didn't ask for these addresses to be + * removed from the mapping. To work around this we treat any munmap style + * unbinds which require a rebind as a kernel operations (BO eviction or userptr + * invalidation). The first operation waits on the VM's + * DMA_RESV_USAGE_PREEMPT_FENCE slots (waits for all pending jobs on VM to + * complete / triggers preempt fences) and the last operation is installed in + * the VM's DMA_RESV_USAGE_KERNEL slot (blocks future jobs / resume compute mode + * VM). The caveat is all dma-resv slots must be updated atomically with respect + * to execs and compute mode rebind worker. To accomplish this, hold the + * vm->lock in write mode from the first operation until the last. + * + * Deferred binds in fault mode + * ---------------------------- + * + * In a VM is in fault mode (TODO: link to fault mode), new bind operations that + * create mappings are by default are deferred to the page fault handler (first + * use). This behavior can be overriden by setting the flag + * DRM_XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping + * immediately. + * + * User pointer + * ============ + * + * User pointers are user allocated memory (malloc'd, mmap'd, etc..) for which the + * user wants to create a GPU mapping. Typically in other DRM drivers a dummy BO + * was created and then a binding was created. We bypass creating a dummy BO in + * XE and simply create a binding directly from the userptr. + * + * Invalidation + * ------------ + * + * Since this a core kernel managed memory the kernel can move this memory + * whenever it wants. We register an invalidation MMU notifier to alert XE when + * a user poiter is about to move. The invalidation notifier needs to block + * until all pending users (jobs or compute mode engines) of the userptr are + * idle to ensure no faults. This done by waiting on all of VM's dma-resv slots. + * + * Rebinds + * ------- + * + * Either the next exec (non-compute) or rebind worker (compute mode) will + * rebind the userptr. The invalidation MMU notifier kicks the rebind worker + * after the VM dma-resv wait if the VM is in compute mode. + * + * Compute mode + * ============ + * + * A VM in compute mode enables long running workloads and ultra low latency + * submission (ULLS). ULLS is implemented via a continuously running batch + + * semaphores. This enables to the user to insert jump to new batch commands + * into the continuously running batch. In both cases these batches exceed the + * time a dma fence is allowed to exist for before signaling, as such dma fences + * are not used when a VM is in compute mode. User fences (TODO: link user fence + * doc) are used instead to signal operation's completion. + * + * Preempt fences + * -------------- + * + * If the kernel decides to move memory around (either userptr invalidate, BO + * eviction, or mumap style unbind which results in a rebind) and a batch is + * running on an engine, that batch can fault or cause a memory corruption as + * page tables for the moved memory are no longer valid. To work around this we + * introduce the concept of preempt fences. When sw signaling is enabled on a + * preempt fence it tells the submission backend to kick that engine off the + * hardware and the preempt fence signals when the engine is off the hardware. + * Once all preempt fences are signaled for a VM the kernel can safely move the + * memory and kick the rebind worker which resumes all the engines execution. + * + * A preempt fence, for every engine using the VM, is installed the VM's + * dma-resv DMA_RESV_USAGE_PREEMPT_FENCE slot. The same preempt fence, for every + * engine using the VM, is also installed into the same dma-resv slot of every + * external BO mapped in the VM. + * + * Rebind worker + * ------------- + * + * The rebind worker is very similar to an exec. It is resposible for rebinding + * evicted BOs or userptrs, waiting on those operations, installing new preempt + * fences, and finally resuming executing of engines in the VM. + * + * Flow + * ~~~~ + * + * .. code-block:: + * + * <----------------------------------------------------------------------| + * Check if VM is closed, if so bail out | + * Lock VM global lock in read mode | + * Pin userptrs (also finds userptr invalidated since last rebind worker) | + * Lock VM dma-resv and external BOs dma-resv | + * Validate BOs that have been evicted | + * Wait on and allocate new preempt fences for every engine using the VM | + * Rebind invalidated userptrs + evicted BOs | + * Wait on last rebind fence | + * Wait VM's DMA_RESV_USAGE_KERNEL dma-resv slot | + * Install preeempt fences and issue resume for every engine using the VM | + * Check if any userptrs invalidated since pin | + * Squash resume for all engines | + * Unlock all | + * Wait all VM's dma-resv slots | + * Retry ---------------------------------------------------------- + * Release all engines waiting to resume + * Unlock all + * + * Timeslicing + * ----------- + * + * In order to prevent an engine from continuously being kicked off the hardware + * and making no forward progress an engine has a period of time it allowed to + * run after resume before it can be kicked off again. This effectively gives + * each engine a timeslice. + * + * Handling multiple GTs + * ===================== + * + * If a GT has slower access to some regions and the page table structure are in + * the slow region, the performance on that GT could adversely be affected. To + * work around this we allow a VM page tables to be shadowed in multiple GTs. + * When VM is created, a default bind engine and PT table structure are created + * on each GT. + * + * Binds can optionally pass in a mask of GTs where a mapping should be created, + * if this mask is zero then default to all the GTs where the VM has page + * tables. + * + * The implementation for this breaks down into a bunch for_each_gt loops in + * various places plus exporting a composite fence for multi-GT binds to the + * user. + * + * Fault mode (unified shared memory) + * ================================== + * + * A VM in fault mode can be enabled on devices that support page faults. If + * page faults are enabled, using dma fences can potentially induce a deadlock: + * A pending page fault can hold up the GPU work which holds up the dma fence + * signaling, and memory allocation is usually required to resolve a page + * fault, but memory allocation is not allowed to gate dma fence signaling. As + * such, dma fences are not allowed when VM is in fault mode. Because dma-fences + * are not allowed, long running workloads and ULLS are enabled on a faulting + * VM. + * + * Defered VM binds + * ---------------- + * + * By default, on a faulting VM binds just allocate the VMA and the actual + * updating of the page tables is defered to the page fault handler. This + * behavior can be overridden by setting the flag DRM_XE_VM_BIND_FLAG_IMMEDIATE in + * the VM bind which will then do the bind immediately. + * + * Page fault handler + * ------------------ + * + * Page faults are received in the G2H worker under the CT lock which is in the + * path of dma fences (no memory allocations are allowed, faults require memory + * allocations) thus we cannot process faults under the CT lock. Another issue + * is faults issue TLB invalidations which require G2H credits and we cannot + * allocate G2H credits in the G2H handlers without deadlocking. Lastly, we do + * not want the CT lock to be an outer lock of the VM global lock (VM global + * lock required to fault processing). + * + * To work around the above issue with processing faults in the G2H worker, we + * sink faults to a buffer which is large enough to sink all possible faults on + * the GT (1 per hardware engine) and kick a worker to process the faults. Since + * the page faults G2H are already received in a worker, kicking another worker + * adds more latency to a critical performance path. We add a fast path in the + * G2H irq handler which looks at first G2H and if it is a page fault we sink + * the fault to the buffer and kick the worker to process the fault. TLB + * invalidation responses are also in the critical path so these can also be + * processed in this fast path. + * + * Multiple buffers and workers are used and hashed over based on the ASID so + * faults from different VMs can be processed in parallel. + * + * The page fault handler itself is rather simple, flow is below. + * + * .. code-block:: + * + * Lookup VM from ASID in page fault G2H + * Lock VM global lock in read mode + * Lookup VMA from address in page fault G2H + * Check if VMA is valid, if not bail + * Check if VMA's BO has backing store, if not allocate + * <----------------------------------------------------------------------| + * If userptr, pin pages | + * Lock VM & BO dma-resv locks | + * If atomic fault, migrate to VRAM, else validate BO location | + * Issue rebind | + * Wait on rebind to complete | + * Check if userptr invalidated since pin | + * Drop VM & BO dma-resv locks | + * Retry ---------------------------------------------------------- + * Unlock all + * Issue blocking TLB invalidation | + * Send page fault response to GuC + * + * Access counters + * --------------- + * + * Access counters can be configured to trigger a G2H indicating the device is + * accessing VMAs in system memory frequently as hint to migrate those VMAs to + * VRAM. + * + * Same as the page fault handler, access counters G2H cannot be processed the + * G2H worker under the CT lock. Again we use a buffer to sink access counter + * G2H. Unlike page faults there is no upper bound so if the buffer is full we + * simply drop the G2H. Access counters are a best case optimization and it is + * safe to drop these unlike page faults. + * + * The access counter handler itself is rather simple flow is below. + * + * .. code-block:: + * + * Lookup VM from ASID in access counter G2H + * Lock VM global lock in read mode + * Lookup VMA from address in access counter G2H + * If userptr, bail nothing to do + * Lock VM & BO dma-resv locks + * Issue migration to VRAM + * Unlock all + * + * Notice no rebind is issued in the access counter handler as the rebind will + * be issued on next page fault. + * + * Cavets with eviction / user pointer invalidation + * ------------------------------------------------ + * + * In the case of eviction and user pointer invalidation on a faulting VM, there + * is no need to issue a rebind rather we just need to blow away the page tables + * for the VMAs and the page fault handler will rebind the VMAs when they fault. + * The cavet is to update / read the page table structure the VM global lock is + * neeeed. In both the case of eviction and user pointer invalidation locks are + * held which make acquiring the VM global lock impossible. To work around this + * every VMA maintains a list of leaf page table entries which should be written + * to zero to blow away the VMA's page tables. After writing zero to these + * entries a blocking TLB invalidate is issued. At this point it is safe for the + * kernel to move the VMA's memory around. This is a necessary lockless + * algorithm and is safe as leafs cannot be changed while either an eviction or + * userptr invalidation is occurring. + * + * Locking + * ======= + * + * VM locking protects all of the core data paths (bind operations, execs, + * evictions, and compute mode rebind worker) in XE. + * + * Locks + * ----- + * + * VM global lock (vm->lock) - rw semaphore lock. Outer most lock which protects + * the list of userptrs mapped in the VM, the list of engines using this VM, and + * the array of external BOs mapped in the VM. When adding or removing any of the + * aforemented state from the VM should acquire this lock in write mode. The VM + * bind path also acquires this lock in write while the exec / compute mode + * rebind worker acquire this lock in read mode. + * + * VM dma-resv lock (vm->ttm.base.resv->lock) - WW lock. Protects VM dma-resv + * slots which is shared with any private BO in the VM. Expected to be acquired + * during VM binds, execs, and compute mode rebind worker. This lock is also + * held when private BOs are being evicted. + * + * external BO dma-resv lock (bo->ttm.base.resv->lock) - WW lock. Protects + * external BO dma-resv slots. Expected to be acquired during VM binds (in + * addition to the VM dma-resv lock). All external BO dma-locks within a VM are + * expected to be acquired (in addition to the VM dma-resv lock) during execs + * and the compute mode rebind worker. This lock is also held when an external + * BO is being evicted. + * + * Putting it all together + * ----------------------- + * + * 1. An exec and bind operation with the same VM can't be executing at the same + * time (vm->lock). + * + * 2. A compute mode rebind worker and bind operation with the same VM can't be + * executing at the same time (vm->lock). + * + * 3. We can't add / remove userptrs or external BOs to a VM while an exec with + * the same VM is executing (vm->lock). + * + * 4. We can't add / remove userptrs, external BOs, or engines to a VM while a + * compute mode rebind worker with the same VM is executing (vm->lock). + * + * 5. Evictions within a VM can't be happen while an exec with the same VM is + * executing (dma-resv locks). + * + * 6. Evictions within a VM can't be happen while a compute mode rebind worker + * with the same VM is executing (dma-resv locks). + * + * dma-resv usage + * ============== + * + * As previously stated to enforce the ordering of kernel ops (eviction, userptr + * invalidation, munmap style unbinds which result in a rebind), rebinds during + * execs, execs, and resumes in the rebind worker we use both the VMs and + * external BOs dma-resv slots. Let try to make this as clear as possible. + * + * Slot installation + * ----------------- + * + * 1. Jobs from kernel ops install themselves into the DMA_RESV_USAGE_KERNEL + * slot of either an external BO or VM (depends on if kernel op is operating on + * an external or private BO) + * + * 2. In non-compute mode, jobs from execs install themselves into the + * DMA_RESV_USAGE_BOOKKEEP slot of the VM + * + * 3. In non-compute mode, jobs from execs install themselves into the + * DMA_RESV_USAGE_WRITE slot of all external BOs in the VM + * + * 4. Jobs from binds install themselves into the DMA_RESV_USAGE_BOOKKEEP slot + * of the VM + * + * 5. Jobs from binds install themselves into the DMA_RESV_USAGE_BOOKKEEP slot + * of the external BO (if the bind is to an external BO, this is addition to #4) + * + * 6. Every engine using a compute mode VM has a preempt fence in installed into + * the DMA_RESV_USAGE_PREEMPT_FENCE slot of the VM + * + * 7. Every engine using a compute mode VM has a preempt fence in installed into + * the DMA_RESV_USAGE_PREEMPT_FENCE slot of all the external BOs in the VM + * + * Slot waiting + * ------------ + * + * 1. The exection of all jobs from kernel ops shall wait on all slots + * (DMA_RESV_USAGE_PREEMPT_FENCE) of either an external BO or VM (depends on if + * kernel op is operating on external or private BO) + * + * 2. In non-compute mode, the exection of all jobs from rebinds in execs shall + * wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO or VM + * (depends on if the rebind is operatiing on an external or private BO) + * + * 3. In non-compute mode, the exection of all jobs from execs shall wait on the + * last rebind job + * + * 4. In compute mode, the exection of all jobs from rebinds in the rebind + * worker shall wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO + * or VM (depends on if rebind is operating on external or private BO) + * + * 5. In compute mode, resumes in rebind worker shall wait on last rebind fence + * + * 6. In compute mode, resumes in rebind worker shall wait on the + * DMA_RESV_USAGE_KERNEL slot of the VM + * + * Putting it all together + * ----------------------- + * + * 1. New jobs from kernel ops are blocked behind any existing jobs from + * non-compute mode execs + * + * 2. New jobs from non-compute mode execs are blocked behind any existing jobs + * from kernel ops and rebinds + * + * 3. New jobs from kernel ops are blocked behind all preempt fences signaling in + * compute mode + * + * 4. Compute mode engine resumes are blocked behind any existing jobs from + * kernel ops and rebinds + * + * Future work + * =========== + * + * Support large pages for sysmem and userptr. + * + * Update page faults to handle BOs are page level grainularity (e.g. part of BO + * could be in system memory while another part could be in VRAM). + * + * Page fault handler likely we be optimized a bit more (e.g. Rebinds always + * wait on the dma-resv kernel slots of VM or BO, technically we only have to + * wait the BO moving. If using a job to do the rebind, we could not block in + * the page fault handler rather attach a callback to fence of the rebind job to + * signal page fault complete. Our handling of short circuting for atomic faults + * for bound VMAs could be better. etc...). We can tune all of this once we have + * benchmarks / performance number from workloads up and running. + */ + +#endif diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h new file mode 100644 index 000000000000..63e8a50b88e9 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -0,0 +1,373 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_VM_TYPES_H_ +#define _XE_VM_TYPES_H_ + +#include <drm/drm_gpuvm.h> + +#include <linux/dma-resv.h> +#include <linux/kref.h> +#include <linux/mmu_notifier.h> +#include <linux/scatterlist.h> + +#include "xe_device_types.h" +#include "xe_pt_types.h" +#include "xe_range_fence.h" + +struct xe_bo; +struct xe_sync_entry; +struct xe_vm; + +#define TEST_VM_ASYNC_OPS_ERROR +#define FORCE_ASYNC_OP_ERROR BIT(31) + +#define XE_VMA_READ_ONLY DRM_GPUVA_USERBITS +#define XE_VMA_DESTROYED (DRM_GPUVA_USERBITS << 1) +#define XE_VMA_ATOMIC_PTE_BIT (DRM_GPUVA_USERBITS << 2) +#define XE_VMA_FIRST_REBIND (DRM_GPUVA_USERBITS << 3) +#define XE_VMA_LAST_REBIND (DRM_GPUVA_USERBITS << 4) +#define XE_VMA_PTE_4K (DRM_GPUVA_USERBITS << 5) +#define XE_VMA_PTE_2M (DRM_GPUVA_USERBITS << 6) +#define XE_VMA_PTE_1G (DRM_GPUVA_USERBITS << 7) + +/** struct xe_userptr - User pointer */ +struct xe_userptr { + /** @invalidate_link: Link for the vm::userptr.invalidated list */ + struct list_head invalidate_link; + /** + * @notifier: MMU notifier for user pointer (invalidation call back) + */ + struct mmu_interval_notifier notifier; + /** @sgt: storage for a scatter gather table */ + struct sg_table sgt; + /** @sg: allocated scatter gather table */ + struct sg_table *sg; + /** @notifier_seq: notifier sequence number */ + unsigned long notifier_seq; + /** + * @initial_bind: user pointer has been bound at least once. + * write: vm->userptr.notifier_lock in read mode and vm->resv held. + * read: vm->userptr.notifier_lock in write mode or vm->resv held. + */ + bool initial_bind; +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) + u32 divisor; +#endif +}; + +struct xe_vma { + /** @gpuva: Base GPUVA object */ + struct drm_gpuva gpuva; + + /** + * @combined_links: links into lists which are mutually exclusive. + * Locking: vm lock in write mode OR vm lock in read mode and the vm's + * resv. + */ + union { + /** @userptr: link into VM repin list if userptr. */ + struct list_head userptr; + /** @rebind: link into VM if this VMA needs rebinding. */ + struct list_head rebind; + /** @destroy: link to contested list when VM is being closed. */ + struct list_head destroy; + } combined_links; + + union { + /** @destroy_cb: callback to destroy VMA when unbind job is done */ + struct dma_fence_cb destroy_cb; + /** @destroy_work: worker to destroy this BO */ + struct work_struct destroy_work; + }; + + /** @usm: unified shared memory state */ + struct { + /** @tile_invalidated: VMA has been invalidated */ + u8 tile_invalidated; + } usm; + + /** @tile_mask: Tile mask of where to create binding for this VMA */ + u8 tile_mask; + + /** + * @tile_present: GT mask of binding are present for this VMA. + * protected by vm->lock, vm->resv and for userptrs, + * vm->userptr.notifier_lock for writing. Needs either for reading, + * but if reading is done under the vm->lock only, it needs to be held + * in write mode. + */ + u8 tile_present; + + /** + * @pat_index: The pat index to use when encoding the PTEs for this vma. + */ + u16 pat_index; + + /** + * @userptr: user pointer state, only allocated for VMAs that are + * user pointers + */ + struct xe_userptr userptr; +}; + +struct xe_device; + +struct xe_vm { + /** @gpuvm: base GPUVM used to track VMAs */ + struct drm_gpuvm gpuvm; + + struct xe_device *xe; + + /* exec queue used for (un)binding vma's */ + struct xe_exec_queue *q[XE_MAX_TILES_PER_DEVICE]; + + /** @lru_bulk_move: Bulk LRU move list for this VM's BOs */ + struct ttm_lru_bulk_move lru_bulk_move; + + u64 size; + + struct xe_pt *pt_root[XE_MAX_TILES_PER_DEVICE]; + struct xe_pt *scratch_pt[XE_MAX_TILES_PER_DEVICE][XE_VM_MAX_LEVEL]; + + /** + * @flags: flags for this VM, statically setup a creation time aside + * from XE_VM_FLAG_BANNED which requires vm->lock to set / read safely + */ +#define XE_VM_FLAG_64K BIT(0) +#define XE_VM_FLAG_LR_MODE BIT(1) +#define XE_VM_FLAG_MIGRATION BIT(2) +#define XE_VM_FLAG_SCRATCH_PAGE BIT(3) +#define XE_VM_FLAG_FAULT_MODE BIT(4) +#define XE_VM_FLAG_BANNED BIT(5) +#define XE_VM_FLAG_TILE_ID(flags) FIELD_GET(GENMASK(7, 6), flags) +#define XE_VM_FLAG_SET_TILE_ID(tile) FIELD_PREP(GENMASK(7, 6), (tile)->id) + unsigned long flags; + + /** @composite_fence_ctx: context composite fence */ + u64 composite_fence_ctx; + /** @composite_fence_seqno: seqno for composite fence */ + u32 composite_fence_seqno; + + /** + * @lock: outer most lock, protects objects of anything attached to this + * VM + */ + struct rw_semaphore lock; + + /** + * @rebind_list: list of VMAs that need rebinding. Protected by the + * vm->lock in write mode, OR (the vm->lock in read mode and the + * vm resv). + */ + struct list_head rebind_list; + + /** @rebind_fence: rebind fence from execbuf */ + struct dma_fence *rebind_fence; + + /** + * @destroy_work: worker to destroy VM, needed as a dma_fence signaling + * from an irq context can be last put and the destroy needs to be able + * to sleep. + */ + struct work_struct destroy_work; + + /** + * @rftree: range fence tree to track updates to page table structure. + * Used to implement conflict tracking between independent bind engines. + */ + struct xe_range_fence_tree rftree[XE_MAX_TILES_PER_DEVICE]; + + /** @async_ops: async VM operations (bind / unbinds) */ + struct { + /** @list: list of pending async VM ops */ + struct list_head pending; + /** @work: worker to execute async VM ops */ + struct work_struct work; + /** @lock: protects list of pending async VM ops and fences */ + spinlock_t lock; + /** @fence: fence state */ + struct { + /** @context: context of async fence */ + u64 context; + /** @seqno: seqno of async fence */ + u32 seqno; + } fence; + /** @error: error state for async VM ops */ + int error; + /** + * @munmap_rebind_inflight: an munmap style VM bind is in the + * middle of a set of ops which requires a rebind at the end. + */ + bool munmap_rebind_inflight; + } async_ops; + + const struct xe_pt_ops *pt_ops; + + /** @userptr: user pointer state */ + struct { + /** + * @userptr.repin_list: list of VMAs which are user pointers, + * and needs repinning. Protected by @lock. + */ + struct list_head repin_list; + /** + * @notifier_lock: protects notifier in write mode and + * submission in read mode. + */ + struct rw_semaphore notifier_lock; + /** + * @userptr.invalidated_lock: Protects the + * @userptr.invalidated list. + */ + spinlock_t invalidated_lock; + /** + * @userptr.invalidated: List of invalidated userptrs, not yet + * picked + * up for revalidation. Protected from access with the + * @invalidated_lock. Removing items from the list + * additionally requires @lock in write mode, and adding + * items to the list requires the @userptr.notifer_lock in + * write mode. + */ + struct list_head invalidated; + } userptr; + + /** @preempt: preempt state */ + struct { + /** + * @min_run_period_ms: The minimum run period before preempting + * an engine again + */ + s64 min_run_period_ms; + /** @exec_queues: list of exec queues attached to this VM */ + struct list_head exec_queues; + /** @num_exec_queues: number exec queues attached to this VM */ + int num_exec_queues; + /** + * @rebind_deactivated: Whether rebind has been temporarily deactivated + * due to no work available. Protected by the vm resv. + */ + bool rebind_deactivated; + /** + * @rebind_work: worker to rebind invalidated userptrs / evicted + * BOs + */ + struct work_struct rebind_work; + } preempt; + + /** @um: unified memory state */ + struct { + /** @asid: address space ID, unique to each VM */ + u32 asid; + /** + * @last_fault_vma: Last fault VMA, used for fast lookup when we + * get a flood of faults to the same VMA + */ + struct xe_vma *last_fault_vma; + } usm; + + /** @error_capture: allow to track errors */ + struct { + /** @capture_once: capture only one error per VM */ + bool capture_once; + } error_capture; + + /** @batch_invalidate_tlb: Always invalidate TLB before batch start */ + bool batch_invalidate_tlb; + /** @xef: XE file handle for tracking this VM's drm client */ + struct xe_file *xef; +}; + +/** struct xe_vma_op_map - VMA map operation */ +struct xe_vma_op_map { + /** @vma: VMA to map */ + struct xe_vma *vma; + /** @immediate: Immediate bind */ + bool immediate; + /** @read_only: Read only */ + bool read_only; + /** @is_null: is NULL binding */ + bool is_null; + /** @pat_index: The pat index to use for this operation. */ + u16 pat_index; +}; + +/** struct xe_vma_op_remap - VMA remap operation */ +struct xe_vma_op_remap { + /** @prev: VMA preceding part of a split mapping */ + struct xe_vma *prev; + /** @next: VMA subsequent part of a split mapping */ + struct xe_vma *next; + /** @start: start of the VMA unmap */ + u64 start; + /** @range: range of the VMA unmap */ + u64 range; + /** @skip_prev: skip prev rebind */ + bool skip_prev; + /** @skip_next: skip next rebind */ + bool skip_next; + /** @unmap_done: unmap operation in done */ + bool unmap_done; +}; + +/** struct xe_vma_op_prefetch - VMA prefetch operation */ +struct xe_vma_op_prefetch { + /** @region: memory region to prefetch to */ + u32 region; +}; + +/** enum xe_vma_op_flags - flags for VMA operation */ +enum xe_vma_op_flags { + /** @XE_VMA_OP_FIRST: first VMA operation for a set of syncs */ + XE_VMA_OP_FIRST = BIT(0), + /** @XE_VMA_OP_LAST: last VMA operation for a set of syncs */ + XE_VMA_OP_LAST = BIT(1), + /** @XE_VMA_OP_COMMITTED: VMA operation committed */ + XE_VMA_OP_COMMITTED = BIT(2), + /** @XE_VMA_OP_PREV_COMMITTED: Previous VMA operation committed */ + XE_VMA_OP_PREV_COMMITTED = BIT(3), + /** @XE_VMA_OP_NEXT_COMMITTED: Next VMA operation committed */ + XE_VMA_OP_NEXT_COMMITTED = BIT(4), +}; + +/** struct xe_vma_op - VMA operation */ +struct xe_vma_op { + /** @base: GPUVA base operation */ + struct drm_gpuva_op base; + /** + * @ops: GPUVA ops, when set call drm_gpuva_ops_free after this + * operations is processed + */ + struct drm_gpuva_ops *ops; + /** @q: exec queue for this operation */ + struct xe_exec_queue *q; + /** + * @syncs: syncs for this operation, only used on first and last + * operation + */ + struct xe_sync_entry *syncs; + /** @num_syncs: number of syncs */ + u32 num_syncs; + /** @link: async operation link */ + struct list_head link; + /** @flags: operation flags */ + enum xe_vma_op_flags flags; + +#ifdef TEST_VM_ASYNC_OPS_ERROR + /** @inject_error: inject error to test async op error handling */ + bool inject_error; +#endif + + union { + /** @map: VMA map operation specific data */ + struct xe_vma_op_map map; + /** @remap: VMA remap operation specific data */ + struct xe_vma_op_remap remap; + /** @prefetch: VMA prefetch operation specific data */ + struct xe_vma_op_prefetch prefetch; + }; +}; +#endif diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c new file mode 100644 index 000000000000..5f61dd87c586 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -0,0 +1,895 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_wa.h" + +#include <drm/drm_managed.h> +#include <kunit/visibility.h> +#include <linux/compiler_types.h> + +#include "generated/xe_wa_oob.h" +#include "regs/xe_engine_regs.h" +#include "regs/xe_gt_regs.h" +#include "regs/xe_regs.h" +#include "xe_device_types.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_hw_engine_types.h" +#include "xe_mmio.h" +#include "xe_platform_types.h" +#include "xe_rtp.h" +#include "xe_step.h" + +/** + * DOC: Hardware workarounds + * + * Hardware workarounds are register programming documented to be executed in + * the driver that fall outside of the normal programming sequences for a + * platform. There are some basic categories of workarounds, depending on + * how/when they are applied: + * + * - LRC workarounds: workarounds that touch registers that are + * saved/restored to/from the HW context image. The list is emitted (via Load + * Register Immediate commands) once when initializing the device and saved in + * the default context. That default context is then used on every context + * creation to have a "primed golden context", i.e. a context image that + * already contains the changes needed to all the registers. + * + * - Engine workarounds: the list of these WAs is applied whenever the specific + * engine is reset. It's also possible that a set of engine classes share a + * common power domain and they are reset together. This happens on some + * platforms with render and compute engines. In this case (at least) one of + * them need to keeep the workaround programming: the approach taken in the + * driver is to tie those workarounds to the first compute/render engine that + * is registered. When executing with GuC submission, engine resets are + * outside of kernel driver control, hence the list of registers involved in + * written once, on engine initialization, and then passed to GuC, that + * saves/restores their values before/after the reset takes place. See + * ``drivers/gpu/drm/xe/xe_guc_ads.c`` for reference. + * + * - GT workarounds: the list of these WAs is applied whenever these registers + * revert to their default values: on GPU reset, suspend/resume [1]_, etc. + * + * - Register whitelist: some workarounds need to be implemented in userspace, + * but need to touch privileged registers. The whitelist in the kernel + * instructs the hardware to allow the access to happen. From the kernel side, + * this is just a special case of a MMIO workaround (as we write the list of + * these to/be-whitelisted registers to some special HW registers). + * + * - Workaround batchbuffers: buffers that get executed automatically by the + * hardware on every HW context restore. These buffers are created and + * programmed in the default context so the hardware always go through those + * programming sequences when switching contexts. The support for workaround + * batchbuffers is enabled these hardware mechanisms: + * + * #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default + * context, pointing the hardware to jump to that location when that offset + * is reached in the context restore. Workaround batchbuffer in the driver + * currently uses this mechanism for all platforms. + * + * #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context, + * pointing the hardware to a buffer to continue executing after the + * engine registers are restored in a context restore sequence. This is + * currently not used in the driver. + * + * - Other/OOB: There are WAs that, due to their nature, cannot be applied from + * a central place. Those are peppered around the rest of the code, as needed. + * Workarounds related to the display IP are the main example. + * + * .. [1] Technically, some registers are powercontext saved & restored, so they + * survive a suspend/resume. In practice, writing them again is not too + * costly and simplifies things, so it's the approach taken in the driver. + * + * .. note:: + * Hardware workarounds in xe work the same way as in i915, with the + * difference of how they are maintained in the code. In xe it uses the + * xe_rtp infrastructure so the workarounds can be kept in tables, following + * a more declarative approach rather than procedural. + */ + +#undef XE_REG_MCR +#define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) + +__diag_push(); +__diag_ignore_all("-Woverride-init", "Allow field overrides in table"); + +static const struct xe_rtp_entry_sr gt_was[] = { + { XE_RTP_NAME("14011060649"), + XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255), + ENGINE_CLASS(VIDEO_DECODE), + FUNC(xe_rtp_match_even_instance)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, + { XE_RTP_NAME("14011059788"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), + XE_RTP_ACTIONS(SET(DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE)) + }, + { XE_RTP_NAME("14015795083"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1260)), + XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE)) + }, + + /* DG1 */ + + { XE_RTP_NAME("1409420604"), + XE_RTP_RULES(PLATFORM(DG1)), + XE_RTP_ACTIONS(SET(SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS)) + }, + { XE_RTP_NAME("1408615072"), + XE_RTP_RULES(PLATFORM(DG1)), + XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE2_DIS)) + }, + + /* DG2 */ + + { XE_RTP_NAME("16010515920"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), + GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, + { XE_RTP_NAME("22010523718"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10)), + XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE, CG3DDISCFEG_CLKGATE_DIS)) + }, + { XE_RTP_NAME("14011006942"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10)), + XE_RTP_ACTIONS(SET(SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS)) + }, + { XE_RTP_NAME("14012362059"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB)) + }, + { XE_RTP_NAME("14012362059"), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB)) + }, + { XE_RTP_NAME("14010948348"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS)) + }, + { XE_RTP_NAME("14011037102"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(UNSLCGCTL9444, LTCDD_CLKGATE_DIS)) + }, + { XE_RTP_NAME("14011371254"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS)) + }, + { XE_RTP_NAME("14011431319"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(UNSLCGCTL9440, + GAMTLBOACS_CLKGATE_DIS | + GAMTLBVDBOX7_CLKGATE_DIS | GAMTLBVDBOX6_CLKGATE_DIS | + GAMTLBVDBOX5_CLKGATE_DIS | GAMTLBVDBOX4_CLKGATE_DIS | + GAMTLBVDBOX3_CLKGATE_DIS | GAMTLBVDBOX2_CLKGATE_DIS | + GAMTLBVDBOX1_CLKGATE_DIS | GAMTLBVDBOX0_CLKGATE_DIS | + GAMTLBKCR_CLKGATE_DIS | GAMTLBGUC_CLKGATE_DIS | + GAMTLBBLT_CLKGATE_DIS), + SET(UNSLCGCTL9444, + GAMTLBGFXA0_CLKGATE_DIS | GAMTLBGFXA1_CLKGATE_DIS | + GAMTLBCOMPA0_CLKGATE_DIS | GAMTLBCOMPA1_CLKGATE_DIS | + GAMTLBCOMPB0_CLKGATE_DIS | GAMTLBCOMPB1_CLKGATE_DIS | + GAMTLBCOMPC0_CLKGATE_DIS | GAMTLBCOMPC1_CLKGATE_DIS | + GAMTLBCOMPD0_CLKGATE_DIS | GAMTLBCOMPD1_CLKGATE_DIS | + GAMTLBMERT_CLKGATE_DIS | + GAMTLBVEBOX3_CLKGATE_DIS | GAMTLBVEBOX2_CLKGATE_DIS | + GAMTLBVEBOX1_CLKGATE_DIS | GAMTLBVEBOX0_CLKGATE_DIS)) + }, + { XE_RTP_NAME("14010569222"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE, GAMEDIA_CLKGATE_DIS)) + }, + { XE_RTP_NAME("14011028019"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS)) + }, + { XE_RTP_NAME("14010680813"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(XEHP_GAMSTLB_CTRL, + CONTROL_BLOCK_CLKGATE_DIS | + EGRESS_BLOCK_CLKGATE_DIS | + TAG_BLOCK_CLKGATE_DIS)) + }, + { XE_RTP_NAME("14014830051"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(CLR(SARB_CHICKEN1, COMP_CKN_IN)) + }, + { XE_RTP_NAME("18018781329"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(SET(RENDER_MOD_CTRL, FORCE_MISS_FTLB), + SET(COMP_MOD_CTRL, FORCE_MISS_FTLB), + SET(XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB), + SET(XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB)) + }, + { XE_RTP_NAME("1509235366"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(SET(XEHP_GAMCNTRL_CTRL, + INVALIDATION_BROADCAST_MODE_DIS | + GLOBAL_INVALIDATION_MODE)) + }, + { XE_RTP_NAME("14010648519"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(SET(XEHP_L3NODEARBCFG, XEHP_LNESPARE)) + }, + + /* PVC */ + + { XE_RTP_NAME("18018781329"), + XE_RTP_RULES(PLATFORM(PVC)), + XE_RTP_ACTIONS(SET(RENDER_MOD_CTRL, FORCE_MISS_FTLB), + SET(COMP_MOD_CTRL, FORCE_MISS_FTLB), + SET(XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB), + SET(XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB)) + }, + { XE_RTP_NAME("16016694945"), + XE_RTP_RULES(PLATFORM(PVC)), + XE_RTP_ACTIONS(SET(XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC)) + }, + + /* Xe_LPG */ + + { XE_RTP_NAME("14015795083"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE)) + }, + { XE_RTP_NAME("14018575942"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)), + XE_RTP_ACTIONS(SET(COMP_MOD_CTRL, FORCE_MISS_FTLB)) + }, + { XE_RTP_NAME("22016670082"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)), + XE_RTP_ACTIONS(SET(SQCNT1, ENFORCE_RAR)) + }, + + /* Xe_LPM+ */ + + { XE_RTP_NAME("16021867713"), + XE_RTP_RULES(MEDIA_VERSION(1300), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, + { XE_RTP_NAME("22016670082"), + XE_RTP_RULES(MEDIA_VERSION(1300)), + XE_RTP_ACTIONS(SET(XELPMP_SQCNT1, ENFORCE_RAR)) + }, + + /* Xe2_LPG */ + + { XE_RTP_NAME("16020975621"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, SBEUNIT_CLKGATE_DIS)) + }, + { XE_RTP_NAME("14018157293"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(XEHPC_L3CLOS_MASK(0), ~0), + SET(XEHPC_L3CLOS_MASK(1), ~0), + SET(XEHPC_L3CLOS_MASK(2), ~0), + SET(XEHPC_L3CLOS_MASK(3), ~0)) + }, + + /* Xe2_LPM */ + + { XE_RTP_NAME("14017421178"), + XE_RTP_RULES(MEDIA_VERSION(2000), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, + { XE_RTP_NAME("16021867713"), + XE_RTP_RULES(MEDIA_VERSION(2000), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, + { XE_RTP_NAME("14019449301"), + XE_RTP_RULES(MEDIA_VERSION(2000), ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, + + {} +}; + +static const struct xe_rtp_entry_sr engine_was[] = { + { XE_RTP_NAME("22010931296, 18011464164, 14010919138"), + XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(FF_THREAD_MODE(RENDER_RING_BASE), + FF_TESSELATION_DOP_GATE_DISABLE)) + }, + { XE_RTP_NAME("1409804808"), + XE_RTP_RULES(GRAPHICS_VERSION(1200), + ENGINE_CLASS(RENDER), + IS_INTEGRATED), + XE_RTP_ACTIONS(SET(ROW_CHICKEN2, PUSH_CONST_DEREF_HOLD_DIS)) + }, + { XE_RTP_NAME("14010229206, 1409085225"), + XE_RTP_RULES(GRAPHICS_VERSION(1200), + ENGINE_CLASS(RENDER), + IS_INTEGRATED), + XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH)) + }, + { XE_RTP_NAME("1606931601"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_EARLY_READ)) + }, + { XE_RTP_NAME("14010826681, 1606700617, 22010271021, 18019627453"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1255), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(CS_DEBUG_MODE1(RENDER_RING_BASE), + FF_DOP_CLOCK_GATE_DISABLE)) + }, + { XE_RTP_NAME("1406941453"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(SAMPLER_MODE, ENABLE_SMALLPL)) + }, + { XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN1(RENDER_RING_BASE), + FFSC_PERCTX_PREEMPT_CTRL)) + }, + + /* TGL */ + + { XE_RTP_NAME("1607297627, 1607030317, 1607186500"), + XE_RTP_RULES(PLATFORM(TIGERLAKE), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE), + WAIT_FOR_EVENT_POWER_DOWN_DISABLE | + RC_SEMA_IDLE_MSG_DISABLE)) + }, + + /* RKL */ + + { XE_RTP_NAME("1607297627, 1607030317, 1607186500"), + XE_RTP_RULES(PLATFORM(ROCKETLAKE), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE), + WAIT_FOR_EVENT_POWER_DOWN_DISABLE | + RC_SEMA_IDLE_MSG_DISABLE)) + }, + + /* ADL-P */ + + { XE_RTP_NAME("1607297627, 1607030317, 1607186500"), + XE_RTP_RULES(PLATFORM(ALDERLAKE_P), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE), + WAIT_FOR_EVENT_POWER_DOWN_DISABLE | + RC_SEMA_IDLE_MSG_DISABLE)) + }, + + /* DG2 */ + + { XE_RTP_NAME("22013037850"), + XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, + DISABLE_128B_EVICTION_COMMAND_UDW)) + }, + { XE_RTP_NAME("22014226127"), + XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE)) + }, + { XE_RTP_NAME("18017747507"), + XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(VFG_PREEMPTION_CHICKEN, + POLYGON_TRIFAN_LINELOOP_DISABLE)) + }, + { XE_RTP_NAME("22012826095, 22013059131"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(FIELD_SET(LSC_CHICKEN_BIT_0_UDW, + MAXREQS_PER_BANK, + REG_FIELD_PREP(MAXREQS_PER_BANK, 2))) + }, + { XE_RTP_NAME("22012826095, 22013059131"), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(FIELD_SET(LSC_CHICKEN_BIT_0_UDW, + MAXREQS_PER_BANK, + REG_FIELD_PREP(MAXREQS_PER_BANK, 2))) + }, + { XE_RTP_NAME("22013059131"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT)) + }, + { XE_RTP_NAME("22013059131"), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT)) + }, + { XE_RTP_NAME("14010918519"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, + FORCE_SLM_FENCE_SCOPE_TO_TILE | + FORCE_UGM_FENCE_SCOPE_TO_TILE, + /* + * Ignore read back as it always returns 0 in these + * steps + */ + .read_mask = 0)) + }, + { XE_RTP_NAME("14015227452"), + XE_RTP_RULES(PLATFORM(DG2), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE)) + }, + { XE_RTP_NAME("16015675438"), + XE_RTP_RULES(PLATFORM(DG2), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2(RENDER_RING_BASE), + PERF_FIX_BALANCING_CFE_DISABLE)) + }, + { XE_RTP_NAME("18028616096"), + XE_RTP_RULES(PLATFORM(DG2), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3)) + }, + { XE_RTP_NAME("16011620976, 22015475538"), + XE_RTP_RULES(PLATFORM(DG2), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8)) + }, + { XE_RTP_NAME("22012654132"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC, + /* + * Register can't be read back for verification on + * DG2 due to Wa_14012342262 + */ + .read_mask = 0)) + }, + { XE_RTP_NAME("22012654132"), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC, + /* + * Register can't be read back for verification on + * DG2 due to Wa_14012342262 + */ + .read_mask = 0)) + }, + { XE_RTP_NAME("1509727124"), + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(SAMPLER_MODE, SC_DISABLE_POWER_OPTIMIZATION_EBB)) + }, + { XE_RTP_NAME("22012856258"), + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_READ_SUPPRESSION)) + }, + { XE_RTP_NAME("14013392000"), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN2, ENABLE_LARGE_GRF_MODE)) + }, + { XE_RTP_NAME("14012419201"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN4, + DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX)) + }, + { XE_RTP_NAME("14012419201"), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN4, + DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX)) + }, + { XE_RTP_NAME("1308578152"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0), + ENGINE_CLASS(RENDER), + FUNC(xe_rtp_match_first_gslice_fused_off)), + XE_RTP_ACTIONS(CLR(CS_DEBUG_MODE1(RENDER_RING_BASE), + REPLAY_MODE_GRANULARITY)) + }, + { XE_RTP_NAME("22010960976, 14013347512"), + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(CLR(XEHP_HDC_CHICKEN0, + LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK)) + }, + { XE_RTP_NAME("1608949956, 14010198302"), + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN, + MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE)) + }, + { XE_RTP_NAME("22010430635"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN4, + DISABLE_GRF_CLEAR)) + }, + { XE_RTP_NAME("14013202645"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY)) + }, + { XE_RTP_NAME("14013202645"), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY)) + }, + { XE_RTP_NAME("22012532006"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, + DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA)) + }, + { XE_RTP_NAME("22012532006"), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, + DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA)) + }, + { XE_RTP_NAME("14015150844"), + XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(XEHP_HDC_CHICKEN0, DIS_ATOMIC_CHAINING_TYPED_WRITES, + XE_RTP_NOCHECK)) + }, + + /* PVC */ + + { XE_RTP_NAME("22014226127"), + XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE)) + }, + { XE_RTP_NAME("14015227452"), + XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE)) + }, + { XE_RTP_NAME("16015675438"), + XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2(RENDER_RING_BASE), + PERF_FIX_BALANCING_CFE_DISABLE)) + }, + { XE_RTP_NAME("14014999345"), + XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COMPUTE), + GRAPHICS_STEP(B0, C0)), + XE_RTP_ACTIONS(SET(CACHE_MODE_SS, DISABLE_ECC)) + }, + + /* Xe_LPG */ + + { XE_RTP_NAME("14017856879"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN3, DIS_FIX_EOT1_FLUSH)) + }, + { XE_RTP_NAME("14015150844"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(XEHP_HDC_CHICKEN0, DIS_ATOMIC_CHAINING_TYPED_WRITES, + XE_RTP_NOCHECK)) + }, + + /* Xe2_LPG */ + + { XE_RTP_NAME("18032247524"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE)) + }, + { XE_RTP_NAME("16018712365"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS)) + }, + { XE_RTP_NAME("14018957109"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN5, DISABLE_SAMPLE_G_PERFORMANCE)) + }, + { XE_RTP_NAME("16021540221"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH)) + }, + { XE_RTP_NAME("14019322943"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, TGM_WRITE_EOM_FORCE)) + }, + { XE_RTP_NAME("14018471104"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL)) + }, + { XE_RTP_NAME("16018737384"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS)) + }, + /* + * These two workarounds are the same, just applying to different + * engines. Although Wa_18032095049 (for the RCS) isn't required on + * all steppings, disabling these reports has no impact for our + * driver or the GuC, so we go ahead and treat it the same as + * Wa_16021639441 which does apply to all steppings. + */ + { XE_RTP_NAME("18032095049, 16021639441"), + XE_RTP_RULES(GRAPHICS_VERSION(2004)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), + GHWSP_CSB_REPORT_DIS | + PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, + + {} +}; + +static const struct xe_rtp_entry_sr lrc_was[] = { + { XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN3, + DISABLE_CPS_AWARE_COLOR_PIPE)) + }, + { XE_RTP_NAME("WaDisableGPGPUMidThreadPreemption"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), + XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(RENDER_RING_BASE), + PREEMPT_GPGPU_LEVEL_MASK, + PREEMPT_GPGPU_THREAD_GROUP_LEVEL)) + }, + { XE_RTP_NAME("1806527549"), + XE_RTP_RULES(GRAPHICS_VERSION(1200)), + XE_RTP_ACTIONS(SET(HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE)) + }, + { XE_RTP_NAME("1606376872"), + XE_RTP_RULES(GRAPHICS_VERSION(1200)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC)) + }, + + /* DG1 */ + + { XE_RTP_NAME("1409044764"), + XE_RTP_RULES(PLATFORM(DG1)), + XE_RTP_ACTIONS(CLR(COMMON_SLICE_CHICKEN3, + DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN)) + }, + { XE_RTP_NAME("22010493298"), + XE_RTP_RULES(PLATFORM(DG1)), + XE_RTP_ACTIONS(SET(HIZ_CHICKEN, + DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE)) + }, + + /* DG2 */ + + { XE_RTP_NAME("16011186671"), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(CLR(VFLSKPD, DIS_MULT_MISS_RD_SQUASH), + SET(VFLSKPD, DIS_OVER_FETCH_CACHE)) + }, + { XE_RTP_NAME("14010469329"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3, + XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE)) + }, + { XE_RTP_NAME("14010698770, 22010613112, 22010465075"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3, + DISABLE_CPS_AWARE_COLOR_PIPE)) + }, + { XE_RTP_NAME("16013271637"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1, + MSC_MSAA_REODER_BUF_BYPASS_DISABLE)) + }, + { XE_RTP_NAME("14014947963"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(FIELD_SET(VF_PREEMPTION, + PREEMPTION_VERTEX_COUNT, + 0x4000)) + }, + { XE_RTP_NAME("18018764978"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(SET(XEHP_PSS_MODE2, + SCOREBOARD_STALL_FLUSH_CONTROL)) + }, + { XE_RTP_NAME("18019271663"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE)) + }, + { XE_RTP_NAME("14019877138"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) + }, + + /* PVC */ + + { XE_RTP_NAME("16017236439"), + XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COPY), + FUNC(xe_rtp_match_even_instance)), + XE_RTP_ACTIONS(SET(BCS_SWCTRL(0), + BCS_SWCTRL_DISABLE_256B, + XE_RTP_ACTION_FLAG(ENGINE_BASE))), + }, + + /* Xe_LPG */ + + { XE_RTP_NAME("18019271663"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)), + XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE)) + }, + + /* Xe2_LPG */ + + { XE_RTP_NAME("16020518922"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(FF_MODE, + DIS_TE_AUTOSTRIP | + DIS_MESH_PARTIAL_AUTOSTRIP | + DIS_MESH_AUTOSTRIP), + SET(VFLSKPD, + DIS_PARTIAL_AUTOSTRIP | + DIS_AUTOSTRIP)) + }, + { XE_RTP_NAME("14019386621"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE)) + }, + { XE_RTP_NAME("14019877138"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) + }, + { XE_RTP_NAME("14020013138"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) + }, + { XE_RTP_NAME("14019988906"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) + }, + + {} +}; + +static __maybe_unused const struct xe_rtp_entry oob_was[] = { +#include <generated/xe_wa_oob.c> + {} +}; + +static_assert(ARRAY_SIZE(oob_was) - 1 == _XE_WA_OOB_COUNT); + +__diag_pop(); + +/** + * xe_wa_process_oob - process OOB workaround table + * @gt: GT instance to process workarounds for + * + * Process OOB workaround table for this platform, marking in @gt the + * workarounds that are active. + */ +void xe_wa_process_oob(struct xe_gt *gt) +{ + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); + + xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.oob, + ARRAY_SIZE(oob_was)); + xe_rtp_process(&ctx, oob_was); +} + +/** + * xe_wa_process_gt - process GT workaround table + * @gt: GT instance to process workarounds for + * + * Process GT workaround table for this platform, saving in @gt all the + * workarounds that need to be applied at the GT level. + */ +void xe_wa_process_gt(struct xe_gt *gt) +{ + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); + + xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.gt, + ARRAY_SIZE(gt_was)); + xe_rtp_process_to_sr(&ctx, gt_was, >->reg_sr); +} +EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt); + +/** + * xe_wa_process_engine - process engine workaround table + * @hwe: engine instance to process workarounds for + * + * Process engine workaround table for this platform, saving in @hwe all the + * workarounds that need to be applied at the engine level that match this + * engine. + */ +void xe_wa_process_engine(struct xe_hw_engine *hwe) +{ + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); + + xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.engine, + ARRAY_SIZE(engine_was)); + xe_rtp_process_to_sr(&ctx, engine_was, &hwe->reg_sr); +} + +/** + * xe_wa_process_lrc - process context workaround table + * @hwe: engine instance to process workarounds for + * + * Process context workaround table for this platform, saving in @hwe all the + * workarounds that need to be applied on context restore. These are workarounds + * touching registers that are part of the HW context image. + */ +void xe_wa_process_lrc(struct xe_hw_engine *hwe) +{ + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); + + xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.lrc, + ARRAY_SIZE(lrc_was)); + xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc); +} + +/** + * xe_wa_init - initialize gt with workaround bookkeeping + * @gt: GT instance to initialize + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_wa_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + size_t n_oob, n_lrc, n_engine, n_gt, total; + unsigned long *p; + + n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_was)); + n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_was)); + n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_was)); + n_oob = BITS_TO_LONGS(ARRAY_SIZE(oob_was)); + total = n_gt + n_engine + n_lrc + n_oob; + + p = drmm_kzalloc(&xe->drm, sizeof(*p) * total, GFP_KERNEL); + if (!p) + return -ENOMEM; + + gt->wa_active.gt = p; + p += n_gt; + gt->wa_active.engine = p; + p += n_engine; + gt->wa_active.lrc = p; + p += n_lrc; + gt->wa_active.oob = p; + + return 0; +} + +void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p) +{ + size_t idx; + + drm_printf(p, "GT Workarounds\n"); + for_each_set_bit(idx, gt->wa_active.gt, ARRAY_SIZE(gt_was)) + drm_printf_indent(p, 1, "%s\n", gt_was[idx].name); + + drm_printf(p, "\nEngine Workarounds\n"); + for_each_set_bit(idx, gt->wa_active.engine, ARRAY_SIZE(engine_was)) + drm_printf_indent(p, 1, "%s\n", engine_was[idx].name); + + drm_printf(p, "\nLRC Workarounds\n"); + for_each_set_bit(idx, gt->wa_active.lrc, ARRAY_SIZE(lrc_was)) + drm_printf_indent(p, 1, "%s\n", lrc_was[idx].name); + + drm_printf(p, "\nOOB Workarounds\n"); + for_each_set_bit(idx, gt->wa_active.oob, ARRAY_SIZE(oob_was)) + if (oob_was[idx].name) + drm_printf_indent(p, 1, "%s\n", oob_was[idx].name); +} + +/* + * Apply tile (non-GT, non-display) workarounds. Think very carefully before + * adding anything to this function; most workarounds should be implemented + * elsewhere. The programming here is primarily for sgunit/soc workarounds, + * which are relatively rare. Since the registers these workarounds target are + * outside the GT, they should only need to be applied once at device + * probe/resume; they will not lose their values on any kind of GT or engine + * reset. + * + * TODO: We may want to move this over to xe_rtp in the future once we have + * enough workarounds to justify the work. + */ +void xe_wa_apply_tile_workarounds(struct xe_tile *tile) +{ + struct xe_gt *mmio = tile->primary_gt; + + if (XE_WA(mmio, 22010954014)) + xe_mmio_rmw32(mmio, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS); +} diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h new file mode 100644 index 000000000000..1b24d66f9d80 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_WA_ +#define _XE_WA_ + +struct drm_printer; +struct xe_gt; +struct xe_hw_engine; +struct xe_tile; + +int xe_wa_init(struct xe_gt *gt); +void xe_wa_process_oob(struct xe_gt *gt); +void xe_wa_process_gt(struct xe_gt *gt); +void xe_wa_process_engine(struct xe_hw_engine *hwe); +void xe_wa_process_lrc(struct xe_hw_engine *hwe); +void xe_wa_apply_tile_workarounds(struct xe_tile *tile); + +void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe); +void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); + +/** + * XE_WA - Out-of-band workarounds, that don't fit the lifecycle any + * other more specific type + * @gt__: gt instance + * @id__: XE_OOB_<id__>, as generated by build system in generated/xe_wa_oob.h + */ +#define XE_WA(gt__, id__) test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob) + +#endif diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules new file mode 100644 index 000000000000..727bdc429212 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -0,0 +1,24 @@ +22012773006 GRAPHICS_VERSION_RANGE(1200, 1250) +16011759253 SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0) +14014475959 GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0) + PLATFORM(DG2) +22011391025 PLATFORM(DG2) +14012197797 PLATFORM(DG2), GRAPHICS_STEP(A0, B0) +16011777198 SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0) + SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0) +22012727170 SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0) + SUBPLATFORM(DG2, G11) +22012727685 SUBPLATFORM(DG2, G11) +16015675438 PLATFORM(PVC) + SUBPLATFORM(DG2, G10) + SUBPLATFORM(DG2, G12) +18020744125 PLATFORM(PVC) +1509372804 PLATFORM(PVC), GRAPHICS_STEP(A0, C0) +1409600907 GRAPHICS_VERSION_RANGE(1200, 1250) +14016763929 SUBPLATFORM(DG2, G10) + SUBPLATFORM(DG2, G12) +16017236439 PLATFORM(PVC) +22010954014 PLATFORM(DG2) +14019821291 MEDIA_VERSION_RANGE(1300, 2000) +14015076503 MEDIA_VERSION(1300) +16020292621 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c new file mode 100644 index 000000000000..a75eeba7bfe5 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_wait_user_fence.h" + +#include <drm/drm_device.h> +#include <drm/drm_file.h> +#include <drm/drm_utils.h> +#include <drm/xe_drm.h> + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_macros.h" +#include "xe_exec_queue.h" + +static int do_compare(u64 addr, u64 value, u64 mask, u16 op) +{ + u64 rvalue; + int err; + bool passed; + + err = copy_from_user(&rvalue, u64_to_user_ptr(addr), sizeof(rvalue)); + if (err) + return -EFAULT; + + switch (op) { + case DRM_XE_UFENCE_WAIT_OP_EQ: + passed = (rvalue & mask) == (value & mask); + break; + case DRM_XE_UFENCE_WAIT_OP_NEQ: + passed = (rvalue & mask) != (value & mask); + break; + case DRM_XE_UFENCE_WAIT_OP_GT: + passed = (rvalue & mask) > (value & mask); + break; + case DRM_XE_UFENCE_WAIT_OP_GTE: + passed = (rvalue & mask) >= (value & mask); + break; + case DRM_XE_UFENCE_WAIT_OP_LT: + passed = (rvalue & mask) < (value & mask); + break; + case DRM_XE_UFENCE_WAIT_OP_LTE: + passed = (rvalue & mask) <= (value & mask); + break; + default: + XE_WARN_ON("Not possible"); + return -EINVAL; + } + + return passed ? 0 : 1; +} + +#define VALID_FLAGS DRM_XE_UFENCE_WAIT_FLAG_ABSTIME +#define MAX_OP DRM_XE_UFENCE_WAIT_OP_LTE + +static long to_jiffies_timeout(struct xe_device *xe, + struct drm_xe_wait_user_fence *args) +{ + unsigned long long t; + long timeout; + + /* + * For negative timeout we want to wait "forever" by setting + * MAX_SCHEDULE_TIMEOUT. But we have to assign this value also + * to args->timeout to avoid being zeroed on the signal delivery + * (see arithmetics after wait). + */ + if (args->timeout < 0) { + args->timeout = MAX_SCHEDULE_TIMEOUT; + return MAX_SCHEDULE_TIMEOUT; + } + + if (args->timeout == 0) + return 0; + + /* + * Save the timeout to an u64 variable because nsecs_to_jiffies + * might return a value that overflows s32 variable. + */ + if (args->flags & DRM_XE_UFENCE_WAIT_FLAG_ABSTIME) + t = drm_timeout_abs_to_jiffies(args->timeout); + else + t = nsecs_to_jiffies(args->timeout); + + /* + * Anything greater then MAX_SCHEDULE_TIMEOUT is meaningless, + * also we don't want to cap it at MAX_SCHEDULE_TIMEOUT because + * apparently user doesn't mean to wait forever, otherwise the + * args->timeout should have been set to a negative value. + */ + if (t > MAX_SCHEDULE_TIMEOUT) + timeout = MAX_SCHEDULE_TIMEOUT - 1; + else + timeout = t; + + return timeout ?: 1; +} + +int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + DEFINE_WAIT_FUNC(w_wait, woken_wake_function); + struct drm_xe_wait_user_fence *args = data; + struct xe_exec_queue *q = NULL; + u64 addr = args->addr; + int err = 0; + long timeout; + ktime_t start; + + if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad) || + XE_IOCTL_DBG(xe, args->pad2) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->flags & ~VALID_FLAGS)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->op > MAX_OP)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, addr & 0x7)) + return -EINVAL; + + if (args->exec_queue_id) { + q = xe_exec_queue_lookup(xef, args->exec_queue_id); + if (XE_IOCTL_DBG(xe, !q)) + return -ENOENT; + } + + timeout = to_jiffies_timeout(xe, args); + + start = ktime_get(); + + add_wait_queue(&xe->ufence_wq, &w_wait); + for (;;) { + err = do_compare(addr, args->value, args->mask, args->op); + if (err <= 0) + break; + + if (signal_pending(current)) { + err = -ERESTARTSYS; + break; + } + + if (q) { + if (q->ops->reset_status(q)) { + drm_info(&xe->drm, "exec gueue reset detected\n"); + err = -EIO; + break; + } + } + + if (!timeout) { + err = -ETIME; + break; + } + + timeout = wait_woken(&w_wait, TASK_INTERRUPTIBLE, timeout); + } + remove_wait_queue(&xe->ufence_wq, &w_wait); + + if (!(args->flags & DRM_XE_UFENCE_WAIT_FLAG_ABSTIME)) { + args->timeout -= ktime_to_ns(ktime_sub(ktime_get(), start)); + if (args->timeout < 0) + args->timeout = 0; + } + + if (!timeout && !(err < 0)) + err = -ETIME; + + if (q) + xe_exec_queue_put(q); + + return err; +} diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.h b/drivers/gpu/drm/xe/xe_wait_user_fence.h new file mode 100644 index 000000000000..0e268978f9e6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_WAIT_USER_FENCE_H_ +#define _XE_WAIT_USER_FENCE_H_ + +struct drm_device; +struct drm_file; + +int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +#endif diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c new file mode 100644 index 000000000000..d3a99157e523 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wopcm.c @@ -0,0 +1,270 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_wopcm.h" + +#include "regs/xe_guc_regs.h" +#include "xe_device.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_mmio.h" +#include "xe_uc_fw.h" + +/** + * DOC: Write Once Protected Content Memory (WOPCM) Layout + * + * The layout of the WOPCM will be fixed after writing to GuC WOPCM size and + * offset registers whose values are calculated and determined by HuC/GuC + * firmware size and set of hardware requirements/restrictions as shown below: + * + * :: + * + * +=========> +====================+ <== WOPCM Top + * ^ | HW contexts RSVD | + * | +===> +====================+ <== GuC WOPCM Top + * | ^ | | + * | | | | + * | | | | + * | GuC | | + * | WOPCM | | + * | Size +--------------------+ + * WOPCM | | GuC FW RSVD | + * | | +--------------------+ + * | | | GuC Stack RSVD | + * | | +------------------- + + * | v | GuC WOPCM RSVD | + * | +===> +====================+ <== GuC WOPCM base + * | | WOPCM RSVD | + * | +------------------- + <== HuC Firmware Top + * v | HuC FW | + * +=========> +====================+ <== WOPCM Base + * + * GuC accessible WOPCM starts at GuC WOPCM base and ends at GuC WOPCM top. + * The top part of the WOPCM is reserved for hardware contexts (e.g. RC6 + * context). + */ + +/* Default WOPCM size is 2MB from Gen11, 1MB on previous platforms */ +/* FIXME: Larger size require for 2 tile PVC, do a proper probe sooner or later */ +#define DGFX_WOPCM_SIZE SZ_4M +/* FIXME: Larger size require for MTL, do a proper probe sooner or later */ +#define MTL_WOPCM_SIZE SZ_4M +#define WOPCM_SIZE SZ_2M + +#define MAX_WOPCM_SIZE SZ_8M + +/* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */ +#define WOPCM_RESERVED_SIZE SZ_16K + +/* 16KB reserved at the beginning of GuC WOPCM. */ +#define GUC_WOPCM_RESERVED SZ_16K +/* 8KB from GUC_WOPCM_RESERVED is reserved for GuC stack. */ +#define GUC_WOPCM_STACK_RESERVED SZ_8K + +/* GuC WOPCM Offset value needs to be aligned to 16KB. */ +#define GUC_WOPCM_OFFSET_ALIGNMENT (1UL << GUC_WOPCM_OFFSET_SHIFT) + +/* 36KB WOPCM reserved at the end of WOPCM */ +#define WOPCM_HW_CTX_RESERVED (SZ_32K + SZ_4K) + +static inline struct xe_gt *wopcm_to_gt(struct xe_wopcm *wopcm) +{ + return container_of(wopcm, struct xe_gt, uc.wopcm); +} + +static inline struct xe_device *wopcm_to_xe(struct xe_wopcm *wopcm) +{ + return gt_to_xe(wopcm_to_gt(wopcm)); +} + +static u32 context_reserved_size(void) +{ + return WOPCM_HW_CTX_RESERVED; +} + +static bool __check_layout(struct xe_device *xe, u32 wopcm_size, + u32 guc_wopcm_base, u32 guc_wopcm_size, + u32 guc_fw_size, u32 huc_fw_size) +{ + const u32 ctx_rsvd = context_reserved_size(); + u32 size; + + size = wopcm_size - ctx_rsvd; + if (unlikely(guc_wopcm_base >= size || + guc_wopcm_size > size - guc_wopcm_base)) { + drm_err(&xe->drm, + "WOPCM: invalid GuC region layout: %uK + %uK > %uK\n", + guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K, + size / SZ_1K); + return false; + } + + size = guc_fw_size + GUC_WOPCM_RESERVED + GUC_WOPCM_STACK_RESERVED; + if (unlikely(guc_wopcm_size < size)) { + drm_err(&xe->drm, "WOPCM: no space for %s: %uK < %uK\n", + xe_uc_fw_type_repr(XE_UC_FW_TYPE_GUC), + guc_wopcm_size / SZ_1K, size / SZ_1K); + return false; + } + + size = huc_fw_size + WOPCM_RESERVED_SIZE; + if (unlikely(guc_wopcm_base < size)) { + drm_err(&xe->drm, "WOPCM: no space for %s: %uK < %uK\n", + xe_uc_fw_type_repr(XE_UC_FW_TYPE_HUC), + guc_wopcm_base / SZ_1K, size / SZ_1K); + return false; + } + + return true; +} + +static bool __wopcm_regs_locked(struct xe_gt *gt, + u32 *guc_wopcm_base, u32 *guc_wopcm_size) +{ + u32 reg_base = xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET); + u32 reg_size = xe_mmio_read32(gt, GUC_WOPCM_SIZE); + + if (!(reg_size & GUC_WOPCM_SIZE_LOCKED) || + !(reg_base & GUC_WOPCM_OFFSET_VALID)) + return false; + + *guc_wopcm_base = reg_base & GUC_WOPCM_OFFSET_MASK; + *guc_wopcm_size = reg_size & GUC_WOPCM_SIZE_MASK; + return true; +} + +static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt, + struct xe_wopcm *wopcm) +{ + u32 base = wopcm->guc.base; + u32 size = wopcm->guc.size; + u32 huc_agent = xe_uc_fw_is_available(>->uc.huc.fw) ? HUC_LOADING_AGENT_GUC : 0; + u32 mask; + int err; + + XE_WARN_ON(!(base & GUC_WOPCM_OFFSET_MASK)); + XE_WARN_ON(base & ~GUC_WOPCM_OFFSET_MASK); + XE_WARN_ON(!(size & GUC_WOPCM_SIZE_MASK)); + XE_WARN_ON(size & ~GUC_WOPCM_SIZE_MASK); + + mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED; + err = xe_mmio_write32_and_verify(gt, GUC_WOPCM_SIZE, size, mask, + size | GUC_WOPCM_SIZE_LOCKED); + if (err) + goto err_out; + + mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent; + err = xe_mmio_write32_and_verify(gt, DMA_GUC_WOPCM_OFFSET, + base | huc_agent, mask, + base | huc_agent | + GUC_WOPCM_OFFSET_VALID); + if (err) + goto err_out; + + return 0; + +err_out: + drm_notice(&xe->drm, "Failed to init uC WOPCM registers!\n"); + drm_notice(&xe->drm, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET", + DMA_GUC_WOPCM_OFFSET.addr, + xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET)); + drm_notice(&xe->drm, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE", + GUC_WOPCM_SIZE.addr, + xe_mmio_read32(gt, GUC_WOPCM_SIZE)); + + return err; +} + +u32 xe_wopcm_size(struct xe_device *xe) +{ + return IS_DGFX(xe) ? DGFX_WOPCM_SIZE : + xe->info.platform == XE_METEORLAKE ? MTL_WOPCM_SIZE : + WOPCM_SIZE; +} + +/** + * xe_wopcm_init() - Initialize the WOPCM structure. + * @wopcm: pointer to xe_wopcm. + * + * This function will partition WOPCM space based on GuC and HuC firmware sizes + * and will allocate max remaining for use by GuC. This function will also + * enforce platform dependent hardware restrictions on GuC WOPCM offset and + * size. It will fail the WOPCM init if any of these checks fail, so that the + * following WOPCM registers setup and GuC firmware uploading would be aborted. + */ +int xe_wopcm_init(struct xe_wopcm *wopcm) +{ + struct xe_device *xe = wopcm_to_xe(wopcm); + struct xe_gt *gt = wopcm_to_gt(wopcm); + u32 guc_fw_size = xe_uc_fw_get_upload_size(>->uc.guc.fw); + u32 huc_fw_size = xe_uc_fw_get_upload_size(>->uc.huc.fw); + u32 ctx_rsvd = context_reserved_size(); + u32 guc_wopcm_base; + u32 guc_wopcm_size; + bool locked; + int ret = 0; + + if (!guc_fw_size) + return -EINVAL; + + wopcm->size = xe_wopcm_size(xe); + drm_dbg(&xe->drm, "WOPCM: %uK\n", wopcm->size / SZ_1K); + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + XE_WARN_ON(guc_fw_size >= wopcm->size); + XE_WARN_ON(huc_fw_size >= wopcm->size); + XE_WARN_ON(ctx_rsvd + WOPCM_RESERVED_SIZE >= wopcm->size); + + locked = __wopcm_regs_locked(gt, &guc_wopcm_base, &guc_wopcm_size); + if (locked) { + drm_dbg(&xe->drm, "GuC WOPCM is already locked [%uK, %uK)\n", + guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K); + /* + * When the GuC wopcm base and size are preprogrammed by + * BIOS/IFWI, check against the max allowed wopcm size to + * validate if the programmed values align to the wopcm layout. + */ + wopcm->size = MAX_WOPCM_SIZE; + + goto check; + } + + /* + * Aligned value of guc_wopcm_base will determine available WOPCM space + * for HuC firmware and mandatory reserved area. + */ + guc_wopcm_base = huc_fw_size + WOPCM_RESERVED_SIZE; + guc_wopcm_base = ALIGN(guc_wopcm_base, GUC_WOPCM_OFFSET_ALIGNMENT); + + /* + * Need to clamp guc_wopcm_base now to make sure the following math is + * correct. Formal check of whole WOPCM layout will be done below. + */ + guc_wopcm_base = min(guc_wopcm_base, wopcm->size - ctx_rsvd); + + /* Aligned remainings of usable WOPCM space can be assigned to GuC. */ + guc_wopcm_size = wopcm->size - ctx_rsvd - guc_wopcm_base; + guc_wopcm_size &= GUC_WOPCM_SIZE_MASK; + + drm_dbg(&xe->drm, "Calculated GuC WOPCM [%uK, %uK)\n", + guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K); + +check: + if (__check_layout(xe, wopcm->size, guc_wopcm_base, guc_wopcm_size, + guc_fw_size, huc_fw_size)) { + wopcm->guc.base = guc_wopcm_base; + wopcm->guc.size = guc_wopcm_size; + XE_WARN_ON(!wopcm->guc.base); + XE_WARN_ON(!wopcm->guc.size); + } else { + drm_notice(&xe->drm, "Unsuccessful WOPCM partitioning\n"); + return -E2BIG; + } + + if (!locked) + ret = __wopcm_init_regs(xe, gt, wopcm); + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_wopcm.h b/drivers/gpu/drm/xe/xe_wopcm.h new file mode 100644 index 000000000000..0197a282460b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wopcm.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_WOPCM_H_ +#define _XE_WOPCM_H_ + +#include "xe_wopcm_types.h" + +struct xe_device; + +int xe_wopcm_init(struct xe_wopcm *wopcm); +u32 xe_wopcm_size(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_wopcm_types.h b/drivers/gpu/drm/xe/xe_wopcm_types.h new file mode 100644 index 000000000000..486d850c4084 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wopcm_types.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_WOPCM_TYPES_H_ +#define _XE_WOPCM_TYPES_H_ + +#include <linux/types.h> + +/** + * struct xe_wopcm - Overall WOPCM info and WOPCM regions. + */ +struct xe_wopcm { + /** @size: Size of overall WOPCM */ + u32 size; + /** @guc: GuC WOPCM Region info */ + struct { + /** @base: GuC WOPCM base which is offset from WOPCM base */ + u32 base; + /** @size: Size of the GuC WOPCM region */ + u32 size; + } guc; +}; + +#endif diff --git a/drivers/gpu/drm/xlnx/zynqmp_kms.c b/drivers/gpu/drm/xlnx/zynqmp_kms.c index a7f8611be6f4..db3bb4afbfc4 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_kms.c +++ b/drivers/gpu/drm/xlnx/zynqmp_kms.c @@ -27,7 +27,6 @@ #include <drm/drm_managed.h> #include <drm/drm_mode_config.h> #include <drm/drm_plane.h> -#include <drm/drm_plane_helper.h> #include <drm/drm_probe_helper.h> #include <drm/drm_simple_kms_helper.h> #include <drm/drm_vblank.h> |