diff options
145 files changed, 8120 insertions, 5400 deletions
diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst index 3ea622876b67..e37557b30f62 100644 --- a/Documentation/gpu/drm-kms-helpers.rst +++ b/Documentation/gpu/drm-kms-helpers.rst @@ -74,15 +74,6 @@ Helper Functions Reference .. kernel-doc:: drivers/gpu/drm/drm_atomic_helper.c :export: -Legacy CRTC/Modeset Helper Functions Reference -============================================== - -.. kernel-doc:: drivers/gpu/drm/drm_crtc_helper.c - :doc: overview - -.. kernel-doc:: drivers/gpu/drm/drm_crtc_helper.c - :export: - Simple KMS Helper Reference =========================== @@ -282,15 +273,6 @@ Flip-work Helper Reference .. kernel-doc:: drivers/gpu/drm/drm_flip_work.c :export: -Plane Helper Reference -====================== - -.. kernel-doc:: drivers/gpu/drm/drm_plane_helper.c - :doc: overview - -.. kernel-doc:: drivers/gpu/drm/drm_plane_helper.c - :export: - Auxiliary Modeset Helpers ========================= @@ -308,3 +290,21 @@ Framebuffer GEM Helper Reference .. kernel-doc:: drivers/gpu/drm/drm_gem_framebuffer_helper.c :export: + +Legacy Plane Helper Reference +============================= + +.. kernel-doc:: drivers/gpu/drm/drm_plane_helper.c + :doc: overview + +.. kernel-doc:: drivers/gpu/drm/drm_plane_helper.c + :export: + +Legacy CRTC/Modeset Helper Functions Reference +============================================== + +.. kernel-doc:: drivers/gpu/drm/drm_crtc_helper.c + :doc: overview + +.. kernel-doc:: drivers/gpu/drm/drm_crtc_helper.c + :export: diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst index 307284125d7a..2dcf5b42015d 100644 --- a/Documentation/gpu/drm-kms.rst +++ b/Documentation/gpu/drm-kms.rst @@ -263,14 +263,20 @@ Taken all together there's two consequences for the atomic design: - An atomic update is assembled and validated as an entirely free-standing pile of structures within the :c:type:`drm_atomic_state <drm_atomic_state>` - container. Again drivers can subclass that container for their own state - structure tracking needs. Only when a state is committed is it applied to the - driver and modeset objects. This way rolling back an update boils down to - releasing memory and unreferencing objects like framebuffers. + container. Driver private state structures are also tracked in the same + structure; see the next chapter. Only when a state is committed is it applied + to the driver and modeset objects. This way rolling back an update boils down + to releasing memory and unreferencing objects like framebuffers. Read on in this chapter, and also in :ref:`drm_atomic_helper` for more detailed coverage of specific topics. +Handling Driver Private State +----------------------------- + +.. kernel-doc:: drivers/gpu/drm/drm_atomic.c + :doc: handling driver private state + Atomic Mode Setting Function Reference -------------------------------------- diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index f421a54527d2..1e593370f64f 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -194,6 +194,24 @@ drm_mode_config_helper_suspend/resume(). Contact: Maintainer of the driver you plan to convert +Convert drivers to use drm_fb_helper_fbdev_setup/teardown() +----------------------------------------------------------- + +Most drivers can use drm_fb_helper_fbdev_setup() except maybe: + +- amdgpu which has special logic to decide whether to call + drm_helper_disable_unused_functions() + +- armada which isn't atomic and doesn't call + drm_helper_disable_unused_functions() + +- i915 which calls drm_fb_helper_initial_config() in a worker + +Drivers that use drm_framebuffer_remove() to clean up the fbdev framebuffer can +probably use drm_fb_helper_fbdev_teardown(). + +Contact: Maintainer of the driver you plan to convert + Core refactorings ================= diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 3cbb2c78a9df..bae0d32e327b 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -528,6 +528,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = { INTEL_SKL_IDS(&gen9_early_ops), INTEL_BXT_IDS(&gen9_early_ops), INTEL_KBL_IDS(&gen9_early_ops), + INTEL_CFL_IDS(&gen9_early_ops), INTEL_GLK_IDS(&gen9_early_ops), INTEL_CNL_IDS(&gen9_early_ops), }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 642bea2c9b3a..d5a2eefd6c3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -959,6 +959,7 @@ struct amdgpu_gfx_config { }; struct amdgpu_cu_info { + uint32_t simd_per_cu; uint32_t max_waves_per_simd; uint32_t wave_front_size; uint32_t max_scratch_slots_per_cu; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 896b16db58aa..335e454e2ee1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -275,14 +275,34 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) kfree(mem); } -uint64_t get_vmem_size(struct kgd_dev *kgd) +void get_local_mem_info(struct kgd_dev *kgd, + struct kfd_local_mem_info *mem_info) { - struct amdgpu_device *adev = - (struct amdgpu_device *)kgd; + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask : + ~((1ULL << 32) - 1); + resource_size_t aper_limit = adev->mc.aper_base + adev->mc.aper_size; + + memset(mem_info, 0, sizeof(*mem_info)); + if (!(adev->mc.aper_base & address_mask || aper_limit & address_mask)) { + mem_info->local_mem_size_public = adev->mc.visible_vram_size; + mem_info->local_mem_size_private = adev->mc.real_vram_size - + adev->mc.visible_vram_size; + } else { + mem_info->local_mem_size_public = 0; + mem_info->local_mem_size_private = adev->mc.real_vram_size; + } + mem_info->vram_width = adev->mc.vram_width; - BUG_ON(kgd == NULL); + pr_debug("Address base: 0x%llx limit 0x%llx public 0x%llx private 0x%llx\n", + adev->mc.aper_base, aper_limit, + mem_info->local_mem_size_public, + mem_info->local_mem_size_private); - return adev->mc.real_vram_size; + if (amdgpu_sriov_vf(adev)) + mem_info->mem_clk_max = adev->clock.default_mclk / 100; + else + mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100; } uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) @@ -298,6 +318,39 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - /* The sclk is in quantas of 10kHz */ - return adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; + /* the sclk is in quantas of 10kHz */ + if (amdgpu_sriov_vf(adev)) + return adev->clock.default_sclk / 100; + + return amdgpu_dpm_get_sclk(adev, false) / 100; +} + +void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct amdgpu_cu_info acu_info = adev->gfx.cu_info; + + memset(cu_info, 0, sizeof(*cu_info)); + if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap)) + return; + + cu_info->cu_active_number = acu_info.number; + cu_info->cu_ao_mask = acu_info.ao_cu_mask; + memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0], + sizeof(acu_info.bitmap)); + cu_info->num_shader_engines = adev->gfx.config.max_shader_engines; + cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; + cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh; + cu_info->simd_per_cu = acu_info.simd_per_cu; + cu_info->max_waves_per_simd = acu_info.max_waves_per_simd; + cu_info->wave_front_size = acu_info.wave_front_size; + cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu; + cu_info->lds_size = acu_info.lds_size; +} + +uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 8d689ab7e429..2a519f9062ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -56,10 +56,13 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr); void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); -uint64_t get_vmem_size(struct kgd_dev *kgd); +void get_local_mem_info(struct kgd_dev *kgd, + struct kfd_local_mem_info *mem_info); uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); +void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); +uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); #define read_user_wptr(mmptr, wptr, dst) \ ({ \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 1ae149456c9f..a9e6aea0e5f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -105,7 +105,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm); +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); @@ -166,7 +173,7 @@ static int get_tile_config(struct kgd_dev *kgd, static const struct kfd2kgd_calls kfd2kgd = { .init_gtt_mem_allocation = alloc_gtt_mem, .free_gtt_mem = free_gtt_mem, - .get_vmem_size = get_vmem_size, + .get_local_mem_info = get_local_mem_info, .get_gpu_clock_counter = get_gpu_clock_counter, .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, .alloc_pasid = amdgpu_pasid_alloc, @@ -177,6 +184,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, .hqd_is_occupied = kgd_hqd_is_occupied, .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_hqd_destroy, @@ -191,6 +200,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, + .get_cu_info = get_cu_info, + .get_vram_usage = amdgpu_amdkfd_get_vram_usage }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) @@ -375,7 +386,44 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t i = 0, reg; +#define HQD_N_REGS (35+4) +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + acquire_queue(kgd, pipe_id, queue_id); + + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0); + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1); + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2); + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3); + + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++) + DUMP_REG(reg); + + release_queue(kgd); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; @@ -410,10 +458,17 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, - m->sdma_rlc_doorbell); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); + data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL, + ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr); + + if (read_user_wptr(mm, wptr, data)) + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); + else + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + m->sdma_rlc_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, m->sdma_rlc_virtual_addr); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); @@ -423,8 +478,37 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) m->sdma_rlc_rb_rptr_addr_lo); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdma_rlc_rb_rptr_addr_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, - m->sdma_rlc_rb_cntl); + + data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + + return 0; +} + +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET + + queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; + uint32_t i = 0, reg; +#undef HQD_N_REGS +#define HQD_N_REGS (19+4) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) + DUMP_REG(sdma_offset + reg); + for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK; + reg++) + DUMP_REG(sdma_offset + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; return 0; } @@ -575,7 +659,7 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, struct cik_sdma_rlc_registers *m; uint32_t sdma_base_addr; uint32_t temp; - int timeout = utimeout; + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); sdma_base_addr = get_sdma_base_addr(m); @@ -588,10 +672,9 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT) break; - if (timeout <= 0) + if (time_after(jiffies, end_jiffies)) return -ETIME; - msleep(20); - timeout -= 20; + usleep_range(500, 1000); } WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); @@ -599,6 +682,8 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); + m->sdma_rlc_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index e9b436bc8dcb..b127259d7d85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -45,7 +45,7 @@ enum hqd_dequeue_request_type { RESET_WAVES }; -struct cik_sdma_rlc_registers; +struct vi_sdma_mqd; /* * Register access functions @@ -64,7 +64,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm); +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); @@ -125,7 +132,7 @@ static int get_tile_config(struct kgd_dev *kgd, static const struct kfd2kgd_calls kfd2kgd = { .init_gtt_mem_allocation = alloc_gtt_mem, .free_gtt_mem = free_gtt_mem, - .get_vmem_size = get_vmem_size, + .get_local_mem_info = get_local_mem_info, .get_gpu_clock_counter = get_gpu_clock_counter, .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, .alloc_pasid = amdgpu_pasid_alloc, @@ -136,6 +143,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, .hqd_is_occupied = kgd_hqd_is_occupied, .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_hqd_destroy, @@ -152,6 +161,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, + .get_cu_info = get_cu_info, + .get_vram_usage = amdgpu_amdkfd_get_vram_usage }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) @@ -268,9 +279,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) return 0; } -static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) +static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m) { - return 0; + uint32_t retval; + + retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + + m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET; + pr_debug("kfd: sdma base address: 0x%x\n", retval); + + return retval; } static inline struct vi_mqd *get_mqd(void *mqd) @@ -278,9 +295,9 @@ static inline struct vi_mqd *get_mqd(void *mqd) return (struct vi_mqd *)mqd; } -static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) +static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd) { - return (struct cik_sdma_rlc_registers *)mqd; + return (struct vi_sdma_mqd *)mqd; } static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, @@ -358,8 +375,138 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) { + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t i = 0, reg; +#define HQD_N_REGS (54+4) +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + acquire_queue(kgd, pipe_id, queue_id); + + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0); + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1); + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2); + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3); + + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++) + DUMP_REG(reg); + + release_queue(kgd); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct vi_sdma_mqd *m; + unsigned long end_jiffies; + uint32_t sdma_base_addr; + uint32_t data; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(m); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); + + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + if (m->sdma_engine_id) { + data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); + } else { + data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); + } + + data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, + ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); + + if (read_user_wptr(mm, wptr, data)) + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); + else + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + m->sdmax_rlcx_rb_rptr); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, + m->sdmax_rlcx_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + m->sdmax_rlcx_rb_base_hi); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdmax_rlcx_rb_rptr_addr_lo); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdmax_rlcx_rb_rptr_addr_hi); + + data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + + return 0; +} + +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET + + queue_id * KFD_VI_SDMA_QUEUE_OFFSET; + uint32_t i = 0, reg; +#undef HQD_N_REGS +#define HQD_N_REGS (19+4+2+3+7) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) + DUMP_REG(sdma_offset + reg); + for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK; + reg++) + DUMP_REG(sdma_offset + reg); + for (reg = mmSDMA0_RLC0_CSA_ADDR_LO; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; + reg++) + DUMP_REG(sdma_offset + reg); + for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_DUMMY_REG; + reg++) + DUMP_REG(sdma_offset + reg); + for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL; + reg++) + DUMP_REG(sdma_offset + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + return 0; } @@ -388,7 +535,7 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct cik_sdma_rlc_registers *m; + struct vi_sdma_mqd *m; uint32_t sdma_base_addr; uint32_t sdma_rlc_rb_cntl; @@ -509,10 +656,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, unsigned int utimeout) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct cik_sdma_rlc_registers *m; + struct vi_sdma_mqd *m; uint32_t sdma_base_addr; uint32_t temp; - int timeout = utimeout; + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); sdma_base_addr = get_sdma_base_addr(m); @@ -523,18 +670,19 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, while (true) { temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); - if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT) + if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (timeout <= 0) + if (time_after(jiffies, end_jiffies)) return -ETIME; - msleep(20); - timeout -= 20; + usleep_range(500, 1000); } WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); + + m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index 6a9e38a3d2a0..cee6e8a3ad9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -562,7 +562,7 @@ #define PRIVATE_BASE(x) ((x) << 0) /* scratch */ #define SHARED_BASE(x) ((x) << 16) /* LDS */ -#define KFD_CIK_SDMA_QUEUE_OFFSET 0x200 +#define KFD_CIK_SDMA_QUEUE_OFFSET (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL) /* valid for both DEFAULT_MTYPE and APE1_MTYPE */ enum { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 4b5109bfd5f4..a066c5eda135 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -48,6 +48,8 @@ #include "oss/oss_2_0_d.h" #include "oss/oss_2_0_sh_mask.h" +#define NUM_SIMD_PER_CU 0x4 /* missing from the gfx_7 IP headers */ + #define GFX7_NUM_GFX_RINGS 1 #define GFX7_MEC_HPD_SIZE 2048 @@ -5277,6 +5279,11 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) cu_info->number = active_cu_number; cu_info->ao_cu_mask = ao_cu_mask; + cu_info->simd_per_cu = NUM_SIMD_PER_CU; + cu_info->max_waves_per_simd = 10; + cu_info->max_scratch_slots_per_cu = 32; + cu_info->wave_front_size = 64; + cu_info->lds_size = 64; } const struct amdgpu_ip_block_version gfx_v7_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index ff9f1a82630f..4e694ae9f308 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -7116,6 +7116,11 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) cu_info->number = active_cu_number; cu_info->ao_cu_mask = ao_cu_mask; + cu_info->simd_per_cu = NUM_SIMD_PER_CU; + cu_info->max_waves_per_simd = 10; + cu_info->max_scratch_slots_per_cu = 32; + cu_info->wave_front_size = 64; + cu_info->lds_size = 64; } const struct amdgpu_ip_block_version gfx_v8_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h index dbf3703cbd1b..19ddd2312e00 100644 --- a/drivers/gpu/drm/amd/amdgpu/vid.h +++ b/drivers/gpu/drm/amd/amdgpu/vid.h @@ -27,6 +27,8 @@ #define SDMA1_REGISTER_OFFSET 0x200 /* not a register */ #define SDMA_MAX_INSTANCE 2 +#define KFD_VI_SDMA_QUEUE_OFFSET 0x80 /* not a register */ + /* crtc instance offsets */ #define CRTC0_REGISTER_OFFSET (0x1b9c - 0x1b9c) #define CRTC1_REGISTER_OFFSET (0x1d9c - 0x1b9c) diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 342c2d937b17..a317e76ffb5e 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -35,6 +35,8 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_process_queue_manager.o kfd_device_queue_manager.o \ kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ kfd_interrupt.o kfd_events.o cik_event_interrupt.o \ - kfd_dbgdev.o kfd_dbgmgr.o + kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o + +amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o obj-$(CONFIG_HSA_AMD) += amdkfd.o diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm new file mode 100644 index 000000000000..997a383dcb8b --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm @@ -0,0 +1,1384 @@ +/* + * Copyright 2015-2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#if 0 +HW (VI) source code for CWSR trap handler +#Version 18 + multiple trap handler + +// this performance-optimal version was originally from Seven Xu at SRDC + +// Revison #18 --... +/* Rev History +** #1. Branch from gc dv. //gfxip/gfx8/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV) +** #4. SR Memory Layout: +** 1. VGPR-SGPR-HWREG-{LDS} +** 2. tba_hi.bits.26 - reconfigured as the first wave in tg bits, for defer Save LDS for a threadgroup.. performance concern.. +** #5. Update: 1. Accurate g8sr_ts_save_d timestamp +** #6. Update: 1. Fix s_barrier usage; 2. VGPR s/r using swizzle buffer?(NoNeed, already matched the swizzle pattern, more investigation) +** #7. Update: 1. don't barrier if noLDS +** #8. Branch: 1. Branch to ver#0, which is very similar to gc dv version +** 2. Fix SQ issue by s_sleep 2 +** #9. Update: 1. Fix scc restore failed issue, restore wave_status at last +** 2. optimize s_buffer save by burst 16sgprs... +** #10. Update 1. Optimize restore sgpr by busrt 16 sgprs. +** #11. Update 1. Add 2 more timestamp for debug version +** #12. Update 1. Add VGPR SR using DWx4, some case improve and some case drop performance +** #13. Integ 1. Always use MUBUF for PV trap shader... +** #14. Update 1. s_buffer_store soft clause... +** #15. Update 1. PERF - sclar write with glc:0/mtype0 to allow L2 combine. perf improvement a lot. +** #16. Update 1. PRRF - UNROLL LDS_DMA got 2500cycle save in IP tree +** #17. Update 1. FUNC - LDS_DMA has issues while ATC, replace with ds_read/buffer_store for save part[TODO restore part] +** 2. PERF - Save LDS before save VGPR to cover LDS save long latency... +** #18. Update 1. FUNC - Implicitly estore STATUS.VCCZ, which is not writable by s_setreg_b32 +** 2. FUNC - Handle non-CWSR traps +*/ + +var G8SR_WDMEM_HWREG_OFFSET = 0 +var G8SR_WDMEM_SGPR_OFFSET = 128 // in bytes + +// Keep definition same as the app shader, These 2 time stamps are part of the app shader... Should before any Save and after restore. + +var G8SR_DEBUG_TIMESTAMP = 0 +var G8SR_DEBUG_TS_SAVE_D_OFFSET = 40*4 // ts_save_d timestamp offset relative to SGPR_SR_memory_offset +var s_g8sr_ts_save_s = s[34:35] // save start +var s_g8sr_ts_sq_save_msg = s[36:37] // The save shader send SAVEWAVE msg to spi +var s_g8sr_ts_spi_wrexec = s[38:39] // the SPI write the sr address to SQ +var s_g8sr_ts_save_d = s[40:41] // save end +var s_g8sr_ts_restore_s = s[42:43] // restore start +var s_g8sr_ts_restore_d = s[44:45] // restore end + +var G8SR_VGPR_SR_IN_DWX4 = 0 +var G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 = 0x00100000 // DWx4 stride is 4*4Bytes +var G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 = G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 + + +/*************************************************************************/ +/* control on how to run the shader */ +/*************************************************************************/ +//any hack that needs to be made to run this code in EMU (either because various EMU code are not ready or no compute save & restore in EMU run) +var EMU_RUN_HACK = 0 +var EMU_RUN_HACK_RESTORE_NORMAL = 0 +var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0 +var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0 +var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK +var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK +var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK +var SAVE_LDS = 1 +var WG_BASE_ADDR_LO = 0x9000a000 +var WG_BASE_ADDR_HI = 0x0 +var WAVE_SPACE = 0x5000 //memory size that each wave occupies in workgroup state mem +var CTX_SAVE_CONTROL = 0x0 +var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL +var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either because various RTL code are not ready or no compute save & restore in RTL run) +var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write +var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes +var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing + +/**************************************************************************/ +/* variables */ +/**************************************************************************/ +var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 +var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 +var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 + +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6 +var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24 +var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits + +var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 +var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF // Exception mask +var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 +var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 +var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8 +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0 +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21 + +var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME +var SQ_WAVE_IB_STS_RCNT_SIZE = 4 //FIXME +var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME +var SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE = 1 //FIXME +var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME + +var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24 +var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27 + + +/* Save */ +var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes +var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE + +var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit +var S_SAVE_SPI_INIT_ATC_SHIFT = 27 +var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype +var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28 +var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG +var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 + +var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used +var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME +var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME +var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME + +var s_save_spi_init_lo = exec_lo +var s_save_spi_init_hi = exec_hi + + //tba_lo and tba_hi need to be saved/restored +var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3??h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} +var s_save_pc_hi = ttmp1 +var s_save_exec_lo = ttmp2 +var s_save_exec_hi = ttmp3 +var s_save_status = ttmp4 +var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine +var s_save_xnack_mask_lo = ttmp6 +var s_save_xnack_mask_hi = ttmp7 +var s_save_buf_rsrc0 = ttmp8 +var s_save_buf_rsrc1 = ttmp9 +var s_save_buf_rsrc2 = ttmp10 +var s_save_buf_rsrc3 = ttmp11 + +var s_save_mem_offset = tma_lo +var s_save_alloc_size = s_save_trapsts //conflict +var s_save_tmp = s_save_buf_rsrc2 //shared with s_save_buf_rsrc2 (conflict: should not use mem access with s_save_tmp at the same time) +var s_save_m0 = tma_hi + +/* Restore */ +var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE +var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC + +var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit +var S_RESTORE_SPI_INIT_ATC_SHIFT = 27 +var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype +var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28 +var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG +var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 + +var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT +var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK +var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT +var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK + +var s_restore_spi_init_lo = exec_lo +var s_restore_spi_init_hi = exec_hi + +var s_restore_mem_offset = ttmp2 +var s_restore_alloc_size = ttmp3 +var s_restore_tmp = ttmp6 //tba_lo/hi need to be restored +var s_restore_mem_offset_save = s_restore_tmp //no conflict + +var s_restore_m0 = s_restore_alloc_size //no conflict + +var s_restore_mode = ttmp7 + +var s_restore_pc_lo = ttmp0 +var s_restore_pc_hi = ttmp1 +var s_restore_exec_lo = tma_lo //no conflict +var s_restore_exec_hi = tma_hi //no conflict +var s_restore_status = ttmp4 +var s_restore_trapsts = ttmp5 +var s_restore_xnack_mask_lo = xnack_mask_lo +var s_restore_xnack_mask_hi = xnack_mask_hi +var s_restore_buf_rsrc0 = ttmp8 +var s_restore_buf_rsrc1 = ttmp9 +var s_restore_buf_rsrc2 = ttmp10 +var s_restore_buf_rsrc3 = ttmp11 + +/**************************************************************************/ +/* trap handler entry points */ +/**************************************************************************/ +/* Shader Main*/ + +shader main + asic(VI) + type(CS) + + + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore + //FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC + s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC + s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f. + s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE + //FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE + s_branch L_SKIP_RESTORE //NOT restore, SAVE actually + else + s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save + end + +L_JUMP_TO_RESTORE: + s_branch L_RESTORE //restore + +L_SKIP_RESTORE: + + s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC + s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save + s_cbranch_scc1 L_SAVE //this is the operation for save + + // ********* Handle non-CWSR traps ******************* +if (!EMU_RUN_HACK) + /* read tba and tma for next level trap handler, ttmp4 is used as s_save_status */ + s_load_dwordx4 [ttmp8,ttmp9,ttmp10, ttmp11], [tma_lo,tma_hi], 0 + s_waitcnt lgkmcnt(0) + s_or_b32 ttmp7, ttmp8, ttmp9 + s_cbranch_scc0 L_NO_NEXT_TRAP //next level trap handler not been set + s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC) + s_setpc_b64 [ttmp8,ttmp9] //jump to next level trap handler + +L_NO_NEXT_TRAP: + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception + s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly. + s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0 + s_addc_u32 ttmp1, ttmp1, 0 +L_EXCP_CASE: + s_and_b32 ttmp1, ttmp1, 0xFFFF + s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC) + s_rfe_b64 [ttmp0, ttmp1] +end + // ********* End handling of non-CWSR traps ******************* + +/**************************************************************************/ +/* save routine */ +/**************************************************************************/ + +L_SAVE: + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_save_s + s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? +end + + //check whether there is mem_viol + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK + s_cbranch_scc0 L_NO_PC_REWIND + + //if so, need rewind PC assuming GDS operation gets NACKed + s_mov_b32 s_save_tmp, 0 //clear mem_viol bit + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit + s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] + s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8 + s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0 // -scc + +L_NO_PC_REWIND: + s_mov_b32 s_save_tmp, 0 //clear saveCtx bit + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit + + s_mov_b32 s_save_xnack_mask_lo, xnack_mask_lo //save XNACK_MASK + s_mov_b32 s_save_xnack_mask_hi, xnack_mask_hi //save XNACK must before any memory operation + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT + s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY + s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS + s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG + + s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp + + /* inform SPI the readiness and wait for SPI's go signal */ + s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI + s_mov_b32 s_save_exec_hi, exec_hi + s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_sq_save_msg + s_waitcnt lgkmcnt(0) +end + + if (EMU_RUN_HACK) + + else + s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC + end + + L_SLEEP: + s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 + + if (EMU_RUN_HACK) + + else + s_cbranch_execz L_SLEEP + end + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_spi_wrexec + s_waitcnt lgkmcnt(0) +end + + /* setup Resource Contants */ + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE)) + //calculate wd_addr using absolute thread id + v_readlane_b32 s_save_tmp, v9, 0 + s_lshr_b32 s_save_tmp, s_save_tmp, 6 + s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE + s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO + s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI + s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL + else + end + if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE)) + s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO + s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI + s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL + else + end + + + s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo + s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE + s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited + s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK + s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position + s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK + s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position + s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE + + //FIXME right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi (might need to save them before using them?) + s_mov_b32 s_save_m0, m0 //save M0 + + /* global mem offset */ + s_mov_b32 s_save_mem_offset, 0x0 //mem offset initial value = 0 + + + + + /* save HW registers */ + ////////////////////////////// + + L_SAVE_HWREG: + // HWREG SR memory offset : size(VGPR)+size(SGPR) + get_vgpr_size_bytes(s_save_mem_offset) + get_sgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + + + s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) //M0 + + if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME)) + s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 + s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over + s_mov_b32 tba_lo, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO + s_mov_b32 tba_hi, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI + end + + write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) //PC + write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) //EXEC + write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset) //STATUS + + //s_save_trapsts conflicts with s_save_alloc_size + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset) //TRAPSTS + + write_hwreg_to_mem(s_save_xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_LO + write_hwreg_to_mem(s_save_xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_HI + + //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2 + s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE + write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(tba_lo, s_save_buf_rsrc0, s_save_mem_offset) //TBA_LO + write_hwreg_to_mem(tba_hi, s_save_buf_rsrc0, s_save_mem_offset) //TBA_HI + + + + /* the first wave in the threadgroup */ + // save fist_wave bits in tba_hi unused bit.26 + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK // extract fisrt wave bit + //s_or_b32 tba_hi, s_save_tmp, tba_hi // save first wave bit to tba_hi.bits[26] + s_mov_b32 s_save_exec_hi, 0x0 + s_or_b32 s_save_exec_hi, s_save_tmp, s_save_exec_hi // save first wave bit to s_save_exec_hi.bits[26] + + + /* save SGPRs */ + // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save... + ////////////////////////////// + + // SGPR SR memory offset : size(VGPR) + get_vgpr_size_bytes(s_save_mem_offset) + // TODO, change RSRC word to rearrange memory layout for SGPRS + + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size + s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) + + if (SGPR_SAVE_USE_SQC) + s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 2 //NUM_RECORDS in bytes + else + s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) + end + + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + // backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0 + //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0 + s_mov_b64 s_save_xnack_mask_lo, s_save_buf_rsrc0 + s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset + s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0 + + s_mov_b32 m0, 0x0 //SGPR initial index value =0 + L_SAVE_SGPR_LOOP: + // SGPR is allocated in 16 SGPR granularity + s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0] + s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0] + s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0] + s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0] + s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0] + s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0] + s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0] + s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0] + + write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset) //PV: the best performance should be using s_buffer_store_dwordx4 + s_add_u32 m0, m0, 16 //next sgpr index + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_SGPR_LOOP //SGPR save is complete? + // restore s_save_buf_rsrc0,1 + //s_mov_b64 s_save_buf_rsrc0, s_save_pc_lo + s_mov_b64 s_save_buf_rsrc0, s_save_xnack_mask_lo + + + + + /* save first 4 VGPR, then LDS save could use */ + // each wave will alloc 4 vgprs at least... + ///////////////////////////////////////////////////////////////////////////////////// + + s_mov_b32 s_save_mem_offset, 0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_mov_b32 exec_hi, 0xFFFFFFFF + + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + // VGPR Allocated in 4-GPR granularity + +if G8SR_VGPR_SR_IN_DWX4 + // the const stride for DWx4 is 4*4 bytes + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes + + buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes +else + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 +end + + + + /* save LDS */ + ////////////////////////////// + + L_SAVE_LDS: + + // Change EXEC to all threads... + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size + s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE + + s_barrier //LDS is used? wait for other waves in the same TG + //s_and_b32 s_save_tmp, tba_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here + s_and_b32 s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here + s_cbranch_scc0 L_SAVE_LDS_DONE + + // first wave do LDS save; + + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 6 //LDS size in dwords = lds_size * 64dw + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //LDS size in bytes + s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes + + // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG) + // + get_vgpr_size_bytes(s_save_mem_offset) + get_sgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes() + + + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + s_mov_b32 m0, 0x0 //lds_offset initial value = 0 + + +var LDS_DMA_ENABLE = 0 +var UNROLL = 0 +if UNROLL==0 && LDS_DMA_ENABLE==1 + s_mov_b32 s3, 256*2 + s_nop 0 + s_nop 0 + s_nop 0 + L_SAVE_LDS_LOOP: + //TODO: looks the 2 buffer_store/load clause for s/r will hurt performance.??? + if (SAVE_LDS) //SPI always alloc LDS space in 128DW granularity + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 // first 64DW + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW + end + + s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes + s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 //mem offset increased by 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_LDS_LOOP //LDS save is complete? + +elsif LDS_DMA_ENABLE==1 && UNROLL==1 // UNROOL , has ichace miss + // store from higest LDS address to lowest + s_mov_b32 s3, 256*2 + s_sub_u32 m0, s_save_alloc_size, s3 + s_add_u32 s_save_mem_offset, s_save_mem_offset, m0 + s_lshr_b32 s_save_alloc_size, s_save_alloc_size, 9 // how many 128 trunks... + s_sub_u32 s_save_alloc_size, 128, s_save_alloc_size // store from higheset addr to lowest + s_mul_i32 s_save_alloc_size, s_save_alloc_size, 6*4 // PC offset increment, each LDS save block cost 6*4 Bytes instruction + s_add_u32 s_save_alloc_size, s_save_alloc_size, 3*4 //2is the below 2 inst...//s_addc and s_setpc + s_nop 0 + s_nop 0 + s_nop 0 //pad 3 dw to let LDS_DMA align with 64Bytes + s_getpc_b64 s[0:1] // reuse s[0:1], since s[0:1] already saved + s_add_u32 s0, s0,s_save_alloc_size + s_addc_u32 s1, s1, 0 + s_setpc_b64 s[0:1] + + + for var i =0; i< 128; i++ + // be careful to make here a 64Byte aligned address, which could improve performance... + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:0 // first 64DW + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW + + if i!=127 + s_sub_u32 m0, m0, s3 // use a sgpr to shrink 2DW-inst to 1DW inst to improve performance , i.e. pack more LDS_DMA inst to one Cacheline + s_sub_u32 s_save_mem_offset, s_save_mem_offset, s3 + end + end + +else // BUFFER_STORE + v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0 + v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2 // tid + v_mul_i32_i24 v2, v3, 8 // tid*8 + v_mov_b32 v3, 256*2 + s_mov_b32 m0, 0x10000 + s_mov_b32 s0, s_save_buf_rsrc3 + s_and_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0xFF7FFFFF // disable add_tid + s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0x58000 //DFMT + +L_SAVE_LDS_LOOP_VECTOR: + ds_read_b64 v[0:1], v2 //x =LDS[a], byte address + s_waitcnt lgkmcnt(0) + buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1 glc:1 slc:1 +// s_waitcnt vmcnt(0) + v_add_u32 v2, vcc[0:1], v2, v3 + v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size + s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR + + // restore rsrc3 + s_mov_b32 s_save_buf_rsrc3, s0 + +end + +L_SAVE_LDS_DONE: + + + /* save VGPRs - set the Rest VGPRs */ + ////////////////////////////////////////////////////////////////////////////////////// + L_SAVE_VGPR: + // VGPR SR memory offset: 0 + // TODO rearrange the RSRC words to use swizzle for VGPR save... + + s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size + s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible + s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + // VGPR Allocated in 4-GPR granularity + +if G8SR_VGPR_SR_IN_DWX4 + // the const stride for DWx4 is 4*4 bytes + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes + + s_mov_b32 m0, 4 // skip first 4 VGPRs + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc0 L_SAVE_VGPR_LOOP_END // no more vgprs + + s_set_gpr_idx_on m0, 0x1 // This will change M0 + s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 // because above inst change m0 +L_SAVE_VGPR_LOOP: + v_mov_b32 v0, v0 // v0 = v[0+m0] + v_mov_b32 v1, v1 + v_mov_b32 v2, v2 + v_mov_b32 v3, v3 + + + buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + s_add_u32 m0, m0, 4 + s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? + s_set_gpr_idx_off +L_SAVE_VGPR_LOOP_END: + + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes +else + // VGPR store using dw burst + s_mov_b32 m0, 0x4 //VGPR initial index value =0 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc0 L_SAVE_VGPR_END + + + s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1 + s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later + + L_SAVE_VGPR_LOOP: + v_mov_b32 v0, v0 //v0 = v[0+m0] + v_mov_b32 v1, v1 //v0 = v[0+m0] + v_mov_b32 v2, v2 //v0 = v[0+m0] + v_mov_b32 v3, v3 //v0 = v[0+m0] + + if(USE_MTBUF_INSTEAD_OF_MUBUF) + tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 + else + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 + end + + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? + s_set_gpr_idx_off +end + +L_SAVE_VGPR_END: + + + + + + + /* S_PGM_END_SAVED */ //FIXME graphics ONLY + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT)) + s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] + s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 + s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over + s_rfe_b64 s_save_pc_lo //Return to the main shader program + else + end + +// Save Done timestamp +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_save_d + // SGPR SR memory offset : size(VGPR) + get_vgpr_size_bytes(s_save_mem_offset) + s_add_u32 s_save_mem_offset, s_save_mem_offset, G8SR_DEBUG_TS_SAVE_D_OFFSET + s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? + // Need reset rsrc2?? + s_mov_b32 m0, s_save_mem_offset + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + s_buffer_store_dwordx2 s_g8sr_ts_save_d, s_save_buf_rsrc0, m0 glc:1 +end + + + s_branch L_END_PGM + + + +/**************************************************************************/ +/* restore routine */ +/**************************************************************************/ + +L_RESTORE: + /* Setup Resource Contants */ + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) + //calculate wd_addr using absolute thread id + v_readlane_b32 s_restore_tmp, v9, 0 + s_lshr_b32 s_restore_tmp, s_restore_tmp, 6 + s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE + s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO + s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI + s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL + else + end + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_restore_s + s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? + // tma_lo/hi are sgpr 110, 111, which will not used for 112 SGPR allocated case... + s_mov_b32 s_restore_pc_lo, s_g8sr_ts_restore_s[0] + s_mov_b32 s_restore_pc_hi, s_g8sr_ts_restore_s[1] //backup ts to ttmp0/1, sicne exec will be finally restored.. +end + + + + s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo + s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE + s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) + s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK + s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position + s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK + s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position + s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE + + /* global mem offset */ +// s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0 + + /* the first wave in the threadgroup */ + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK + s_cbranch_scc0 L_RESTORE_VGPR + + /* restore LDS */ + ////////////////////////////// + L_RESTORE_LDS: + + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size + s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 6 //LDS size in dwords = lds_size * 64dw + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //LDS size in bytes + s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes + + // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG) + // + get_vgpr_size_bytes(s_restore_mem_offset) + get_sgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() //FIXME, Check if offset overflow??? + + + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + s_mov_b32 m0, 0x0 //lds_offset initial value = 0 + + L_RESTORE_LDS_LOOP: + if (SAVE_LDS) + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW + end + s_add_u32 m0, m0, 256*2 // 128 DW + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW + s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete? + + + /* restore VGPRs */ + ////////////////////////////// + L_RESTORE_VGPR: + // VGPR SR memory offset : 0 + s_mov_b32 s_restore_mem_offset, 0x0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) + s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + +if G8SR_VGPR_SR_IN_DWX4 + get_vgpr_size_bytes(s_restore_mem_offset) + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + + // the const stride for DWx4 is 4*4 bytes + s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes + + s_mov_b32 m0, s_restore_alloc_size + s_set_gpr_idx_on m0, 0x8 // Note.. This will change m0 + +L_RESTORE_VGPR_LOOP: + buffer_load_dwordx4 v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 + s_waitcnt vmcnt(0) + s_sub_u32 m0, m0, 4 + v_mov_b32 v0, v0 // v[0+m0] = v0 + v_mov_b32 v1, v1 + v_mov_b32 v2, v2 + v_mov_b32 v3, v3 + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + s_cmp_eq_u32 m0, 0x8000 + s_cbranch_scc0 L_RESTORE_VGPR_LOOP + s_set_gpr_idx_off + + s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE // const stride to 4*4 bytes + +else + // VGPR load using dw burst + s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + s_mov_b32 m0, 4 //VGPR initial index value = 1 + s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8 + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later + + L_RESTORE_VGPR_LOOP: + if(USE_MTBUF_INSTEAD_OF_MUBUF) + tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 + else + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3 + end + s_waitcnt vmcnt(0) //ensure data ready + v_mov_b32 v0, v0 //v[0+m0] = v0 + v_mov_b32 v1, v1 + v_mov_b32 v2, v2 + v_mov_b32 v3, v3 + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete? + s_set_gpr_idx_off + /* VGPR restore on v0 */ + if(USE_MTBUF_INSTEAD_OF_MUBUF) + tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 + else + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3 + end + +end + + /* restore SGPRs */ + ////////////////////////////// + + // SGPR SR memory offset : size(VGPR) + get_vgpr_size_bytes(s_restore_mem_offset) + get_sgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 16*4 // restore SGPR from S[n] to S[0], by 16 sgprs group + // TODO, change RSRC word to rearrange memory layout for SGPRS + + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) + + if (SGPR_SAVE_USE_SQC) + s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 2 //NUM_RECORDS in bytes + else + s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) + end + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + /* If 112 SGPRs ar allocated, 4 sgprs are not used TBA(108,109),TMA(110,111), + However, we are safe to restore these 4 SGPRs anyway, since TBA,TMA will later be restored by HWREG + */ + s_mov_b32 m0, s_restore_alloc_size + + L_RESTORE_SGPR_LOOP: + read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) //PV: further performance improvement can be made + s_waitcnt lgkmcnt(0) //ensure data ready + + s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0] + + s_movreld_b64 s0, s0 //s[0+m0] = s0 + s_movreld_b64 s2, s2 + s_movreld_b64 s4, s4 + s_movreld_b64 s6, s6 + s_movreld_b64 s8, s8 + s_movreld_b64 s10, s10 + s_movreld_b64 s12, s12 + s_movreld_b64 s14, s14 + + s_cmp_eq_u32 m0, 0 //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc0 L_RESTORE_SGPR_LOOP //SGPR restore (except s0) is complete? + + /* restore HW registers */ + ////////////////////////////// + L_RESTORE_HWREG: + + +if G8SR_DEBUG_TIMESTAMP + s_mov_b32 s_g8sr_ts_restore_s[0], s_restore_pc_lo + s_mov_b32 s_g8sr_ts_restore_s[1], s_restore_pc_hi +end + + // HWREG SR memory offset : size(VGPR)+size(SGPR) + get_vgpr_size_bytes(s_restore_mem_offset) + get_sgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + + + s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) //M0 + read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //PC + read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //EXEC + read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset) //STATUS + read_hwreg_from_mem(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset) //TRAPSTS + read_hwreg_from_mem(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_LO + read_hwreg_from_mem(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_HI + read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) //MODE + read_hwreg_from_mem(tba_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //TBA_LO + read_hwreg_from_mem(tba_hi, s_restore_buf_rsrc0, s_restore_mem_offset) //TBA_HI + + s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS + + s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS + + //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) + s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) + s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over + end + if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL)) + s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4 //pc[31:0]+4 // save is hack through s_trap but restore is normal + s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over + end + + s_mov_b32 m0, s_restore_m0 + s_mov_b32 exec_lo, s_restore_exec_lo + s_mov_b32 exec_hi, s_restore_exec_hi + + s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0 + s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts + s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0 + //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore + s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode + //reuse s_restore_m0 as a temp register + s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT + s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT + s_mov_b32 s_restore_tmp, 0x0 //IB_STS is zero + s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 + s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT + s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT + s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 + s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT + s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp + + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 + s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu + + s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_restore_d + s_waitcnt lgkmcnt(0) +end + +// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution + s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc + + +/**************************************************************************/ +/* the END */ +/**************************************************************************/ +L_END_PGM: + s_endpgm + +end + + +/**************************************************************************/ +/* the helper functions */ +/**************************************************************************/ + +//Only for save hwreg to mem +function write_hwreg_to_mem(s, s_rsrc, s_mem_offset) + s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on + s_mov_b32 m0, s_mem_offset + s_buffer_store_dword s, s_rsrc, m0 glc:1 + s_add_u32 s_mem_offset, s_mem_offset, 4 + s_mov_b32 m0, exec_lo +end + + +// HWREG are saved before SGPRs, so all HWREG could be use. +function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset) + + s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1 + s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1 + s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1 + s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1 + s_add_u32 s_rsrc[0], s_rsrc[0], 4*16 + s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0 // +scc +end + + +function read_hwreg_from_mem(s, s_rsrc, s_mem_offset) + s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1 + s_add_u32 s_mem_offset, s_mem_offset, 4 +end + +function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset) + s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset glc:1 + s_sub_u32 s_mem_offset, s_mem_offset, 4*16 +end + + + +function get_lds_size_bytes(s_lds_size_byte) + // SQ LDS granularity is 64DW, while PGM_RSRC2.lds_size is in granularity 128DW + s_getreg_b32 s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) // lds_size + s_lshl_b32 s_lds_size_byte, s_lds_size_byte, 8 //LDS size in dwords = lds_size * 64 *4Bytes // granularity 64DW +end + +function get_vgpr_size_bytes(s_vgpr_size_byte) + s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size + s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1 + s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) //FIXME for GFX, zero is possible +end + +function get_sgpr_size_bytes(s_sgpr_size_byte) + s_getreg_b32 s_sgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size + s_add_u32 s_sgpr_size_byte, s_sgpr_size_byte, 1 + s_lshl_b32 s_sgpr_size_byte, s_sgpr_size_byte, 6 //Number of SGPRs = (sgpr_size + 1) * 16 *4 (non-zero value) +end + +function get_hwreg_size_bytes + return 128 //HWREG size 128 bytes +end + + +#endif + +static const uint32_t cwsr_trap_gfx8_hex[] = { + 0xbf820001, 0xbf820123, + 0xb8f4f802, 0x89748674, + 0xb8f5f803, 0x8675ff75, + 0x00000400, 0xbf850011, + 0xc00a1e37, 0x00000000, + 0xbf8c007f, 0x87777978, + 0xbf840002, 0xb974f802, + 0xbe801d78, 0xb8f5f803, + 0x8675ff75, 0x000001ff, + 0xbf850002, 0x80708470, + 0x82718071, 0x8671ff71, + 0x0000ffff, 0xb974f802, + 0xbe801f70, 0xb8f5f803, + 0x8675ff75, 0x00000100, + 0xbf840006, 0xbefa0080, + 0xb97a0203, 0x8671ff71, + 0x0000ffff, 0x80f08870, + 0x82f18071, 0xbefa0080, + 0xb97a0283, 0xbef60068, + 0xbef70069, 0xb8fa1c07, + 0x8e7a9c7a, 0x87717a71, + 0xb8fa03c7, 0x8e7a9b7a, + 0x87717a71, 0xb8faf807, + 0x867aff7a, 0x00007fff, + 0xb97af807, 0xbef2007e, + 0xbef3007f, 0xbefe0180, + 0xbf900004, 0xbf8e0002, + 0xbf88fffe, 0xbef8007e, + 0x8679ff7f, 0x0000ffff, + 0x8779ff79, 0x00040000, + 0xbefa0080, 0xbefb00ff, + 0x00807fac, 0x867aff7f, + 0x08000000, 0x8f7a837a, + 0x877b7a7b, 0x867aff7f, + 0x70000000, 0x8f7a817a, + 0x877b7a7b, 0xbeef007c, + 0xbeee0080, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x806e7a6e, + 0xbefa0084, 0xbefa00ff, + 0x01000000, 0xbefe007c, + 0xbefc006e, 0xc0611bfc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611c3c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611c7c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611cbc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611cfc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611d3c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xb8f5f803, + 0xbefe007c, 0xbefc006e, + 0xc0611d7c, 0x0000007c, + 0x806e846e, 0xbefc007e, + 0xbefe007c, 0xbefc006e, + 0xc0611dbc, 0x0000007c, + 0x806e846e, 0xbefc007e, + 0xbefe007c, 0xbefc006e, + 0xc0611dfc, 0x0000007c, + 0x806e846e, 0xbefc007e, + 0xb8eff801, 0xbefe007c, + 0xbefc006e, 0xc0611bfc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611b3c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611b7c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0x867aff7f, + 0x04000000, 0xbef30080, + 0x8773737a, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8f51605, 0x80758175, + 0x8e758475, 0x8e7a8275, + 0xbefa00ff, 0x01000000, + 0xbef60178, 0x80786e78, + 0x82798079, 0xbefc0080, + 0xbe802b00, 0xbe822b02, + 0xbe842b04, 0xbe862b06, + 0xbe882b08, 0xbe8a2b0a, + 0xbe8c2b0c, 0xbe8e2b0e, + 0xc06b003c, 0x00000000, + 0xc06b013c, 0x00000010, + 0xc06b023c, 0x00000020, + 0xc06b033c, 0x00000030, + 0x8078c078, 0x82798079, + 0x807c907c, 0xbf0a757c, + 0xbf85ffeb, 0xbef80176, + 0xbeee0080, 0xbefe00c1, + 0xbeff00c1, 0xbefa00ff, + 0x01000000, 0xe0724000, + 0x6e1e0000, 0xe0724100, + 0x6e1e0100, 0xe0724200, + 0x6e1e0200, 0xe0724300, + 0x6e1e0300, 0xbefe00c1, + 0xbeff00c1, 0xb8f54306, + 0x8675c175, 0xbf84002c, + 0xbf8a0000, 0x867aff73, + 0x04000000, 0xbf840028, + 0x8e758675, 0x8e758275, + 0xbefa0075, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x806e7a6e, + 0x806eff6e, 0x00000080, + 0xbefa00ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0xd1060002, + 0x00011103, 0x7e0602ff, + 0x00000200, 0xbefc00ff, + 0x00010000, 0xbe80007b, + 0x867bff7b, 0xff7fffff, + 0x877bff7b, 0x00058000, + 0xd8ec0000, 0x00000002, + 0xbf8c007f, 0xe0765000, + 0x6e1e0002, 0x32040702, + 0xd0c9006a, 0x0000eb02, + 0xbf87fff7, 0xbefb0000, + 0xbeee00ff, 0x00000400, + 0xbefe00c1, 0xbeff00c1, + 0xb8f52a05, 0x80758175, + 0x8e758275, 0x8e7a8875, + 0xbefa00ff, 0x01000000, + 0xbefc0084, 0xbf0a757c, + 0xbf840015, 0xbf11017c, + 0x8075ff75, 0x00001000, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0xe0724000, 0x6e1e0000, + 0xe0724100, 0x6e1e0100, + 0xe0724200, 0x6e1e0200, + 0xe0724300, 0x6e1e0300, + 0x807c847c, 0x806eff6e, + 0x00000400, 0xbf0a757c, + 0xbf85ffef, 0xbf9c0000, + 0xbf8200ca, 0xbef8007e, + 0x8679ff7f, 0x0000ffff, + 0x8779ff79, 0x00040000, + 0xbefa0080, 0xbefb00ff, + 0x00807fac, 0x8676ff7f, + 0x08000000, 0x8f768376, + 0x877b767b, 0x8676ff7f, + 0x70000000, 0x8f768176, + 0x877b767b, 0x8676ff7f, + 0x04000000, 0xbf84001e, + 0xbefe00c1, 0xbeff00c1, + 0xb8f34306, 0x8673c173, + 0xbf840019, 0x8e738673, + 0x8e738273, 0xbefa0073, + 0xb8f22a05, 0x80728172, + 0x8e728a72, 0xb8f61605, + 0x80768176, 0x8e768676, + 0x80727672, 0x8072ff72, + 0x00000080, 0xbefa00ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x721e0000, + 0xe0510100, 0x721e0000, + 0x807cff7c, 0x00000200, + 0x8072ff72, 0x00000200, + 0xbf0a737c, 0xbf85fff6, + 0xbef20080, 0xbefe00c1, + 0xbeff00c1, 0xb8f32a05, + 0x80738173, 0x8e738273, + 0x8e7a8873, 0xbefa00ff, + 0x01000000, 0xbef60072, + 0x8072ff72, 0x00000400, + 0xbefc0084, 0xbf11087c, + 0x8073ff73, 0x00008000, + 0xe0524000, 0x721e0000, + 0xe0524100, 0x721e0100, + 0xe0524200, 0x721e0200, + 0xe0524300, 0x721e0300, + 0xbf8c0f70, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0x807c847c, + 0x8072ff72, 0x00000400, + 0xbf0a737c, 0xbf85ffee, + 0xbf9c0000, 0xe0524000, + 0x761e0000, 0xe0524100, + 0x761e0100, 0xe0524200, + 0x761e0200, 0xe0524300, + 0x761e0300, 0xb8f22a05, + 0x80728172, 0x8e728a72, + 0xb8f61605, 0x80768176, + 0x8e768676, 0x80727672, + 0x80f2c072, 0xb8f31605, + 0x80738173, 0x8e738473, + 0x8e7a8273, 0xbefa00ff, + 0x01000000, 0xbefc0073, + 0xc031003c, 0x00000072, + 0x80f2c072, 0xbf8c007f, + 0x80fc907c, 0xbe802d00, + 0xbe822d02, 0xbe842d04, + 0xbe862d06, 0xbe882d08, + 0xbe8a2d0a, 0xbe8c2d0c, + 0xbe8e2d0e, 0xbf06807c, + 0xbf84fff1, 0xb8f22a05, + 0x80728172, 0x8e728a72, + 0xb8f61605, 0x80768176, + 0x8e768676, 0x80727672, + 0xbefa0084, 0xbefa00ff, + 0x01000000, 0xc0211cfc, + 0x00000072, 0x80728472, + 0xc0211c3c, 0x00000072, + 0x80728472, 0xc0211c7c, + 0x00000072, 0x80728472, + 0xc0211bbc, 0x00000072, + 0x80728472, 0xc0211bfc, + 0x00000072, 0x80728472, + 0xc0211d3c, 0x00000072, + 0x80728472, 0xc0211d7c, + 0x00000072, 0x80728472, + 0xc0211a3c, 0x00000072, + 0x80728472, 0xc0211a7c, + 0x00000072, 0x80728472, + 0xc0211dfc, 0x00000072, + 0x80728472, 0xc0211b3c, + 0x00000072, 0x80728472, + 0xc0211b7c, 0x00000072, + 0x80728472, 0xbf8c007f, + 0x8671ff71, 0x0000ffff, + 0xbefc0073, 0xbefe006e, + 0xbeff006f, 0x867375ff, + 0x000003ff, 0xb9734803, + 0x867375ff, 0xfffff800, + 0x8f738b73, 0xb973a2c3, + 0xb977f801, 0x8673ff71, + 0xf0000000, 0x8f739c73, + 0x8e739073, 0xbef60080, + 0x87767376, 0x8673ff71, + 0x08000000, 0x8f739b73, + 0x8e738f73, 0x87767376, + 0x8673ff74, 0x00800000, + 0x8f739773, 0xb976f807, + 0x86fe7e7e, 0x86ea6a6a, + 0xb974f802, 0xbf8a0000, + 0x95807370, 0xbf810000, +}; + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 505d39156acd..62c3d9cd6ef1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -117,7 +117,7 @@ static int kfd_open(struct inode *inode, struct file *filep) return -EPERM; } - process = kfd_create_process(current); + process = kfd_create_process(filep); if (IS_ERR(process)) return PTR_ERR(process); @@ -206,6 +206,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->ctx_save_restore_area_address = args->ctx_save_restore_address; q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; + q_properties->ctl_stack_size = args->ctl_stack_size; if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) q_properties->type = KFD_QUEUE_TYPE_COMPUTE; @@ -431,6 +432,38 @@ out: return err; } +static int kfd_ioctl_set_trap_handler(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_set_trap_handler_args *args = data; + struct kfd_dev *dev; + int err = 0; + struct kfd_process_device *pdd; + + dev = kfd_device_by_id(args->gpu_id); + if (dev == NULL) + return -EINVAL; + + mutex_lock(&p->mutex); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + err = -ESRCH; + goto out; + } + + if (dev->dqm->ops.set_trap_handler(dev->dqm, + &pdd->qpd, + args->tba_addr, + args->tma_addr)) + err = -EINVAL; + +out: + mutex_unlock(&p->mutex); + + return err; +} + static int kfd_ioctl_dbg_register(struct file *filep, struct kfd_process *p, void *data) { @@ -493,7 +526,7 @@ static int kfd_ioctl_dbg_unregister(struct file *filep, long status; dev = kfd_device_by_id(args->gpu_id); - if (!dev) + if (!dev || !dev->dbgmgr) return -EINVAL; if (dev->device_info->asic_family == CHIP_CARRIZO) { @@ -979,7 +1012,10 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { kfd_ioctl_set_scratch_backing_va, 0), AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, - kfd_ioctl_get_tile_config, 0) + kfd_ioctl_get_tile_config, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, + kfd_ioctl_set_trap_handler, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) @@ -1088,6 +1124,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) KFD_MMAP_EVENTS_MASK) { vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK; return kfd_event_mmap(process, vma); + } else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) == + KFD_MMAP_RESERVED_MEM_MASK) { + vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK; + return kfd_reserved_mem_mmap(process, vma); } return -EFAULT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c new file mode 100644 index 000000000000..2bc2816767a7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -0,0 +1,1267 @@ +/* + * Copyright 2015-2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/pci.h> +#include <linux/acpi.h> +#include <linux/amd-iommu.h> +#include "kfd_crat.h" +#include "kfd_priv.h" +#include "kfd_topology.h" + +/* GPU Processor ID base for dGPUs for which VCRAT needs to be created. + * GPU processor ID are expressed with Bit[31]=1. + * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs + * used in the CRAT. + */ +static uint32_t gpu_processor_id_low = 0x80001000; + +/* Return the next available gpu_processor_id and increment it for next GPU + * @total_cu_count - Total CUs present in the GPU including ones + * masked off + */ +static inline unsigned int get_and_inc_gpu_processor_id( + unsigned int total_cu_count) +{ + int current_id = gpu_processor_id_low; + + gpu_processor_id_low += total_cu_count; + return current_id; +} + +/* Static table to describe GPU Cache information */ +struct kfd_gpu_cache_info { + uint32_t cache_size; + uint32_t cache_level; + uint32_t flags; + /* Indicates how many Compute Units share this cache + * Value = 1 indicates the cache is not shared + */ + uint32_t num_cu_shared; +}; + +static struct kfd_gpu_cache_info kaveri_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + + }, + { + /* Scalar L1 Instruction Cache (in SQC module) per bank */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* Scalar L1 Data Cache (in SQC module) per bank */ + .cache_size = 8, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + + /* TODO: Add L2 Cache information */ +}; + + +static struct kfd_gpu_cache_info carrizo_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache (in SQC module) per bank */ + .cache_size = 8, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 4, + }, + { + /* Scalar L1 Data Cache (in SQC module) per bank. */ + .cache_size = 4, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 4, + }, + + /* TODO: Add L2 Cache information */ +}; + +/* NOTE: In future if more information is added to struct kfd_gpu_cache_info + * the following ASICs may need a separate table. + */ +#define hawaii_cache_info kaveri_cache_info +#define tonga_cache_info carrizo_cache_info +#define fiji_cache_info carrizo_cache_info +#define polaris10_cache_info carrizo_cache_info +#define polaris11_cache_info carrizo_cache_info + +static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, + struct crat_subtype_computeunit *cu) +{ + dev->node_props.cpu_cores_count = cu->num_cpu_cores; + dev->node_props.cpu_core_id_base = cu->processor_id_low; + if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT) + dev->node_props.capability |= HSA_CAP_ATS_PRESENT; + + pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores, + cu->processor_id_low); +} + +static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, + struct crat_subtype_computeunit *cu) +{ + dev->node_props.simd_id_base = cu->processor_id_low; + dev->node_props.simd_count = cu->num_simd_cores; + dev->node_props.lds_size_in_kb = cu->lds_size_in_kb; + dev->node_props.max_waves_per_simd = cu->max_waves_simd; + dev->node_props.wave_front_size = cu->wave_front_size; + dev->node_props.array_count = cu->array_count; + dev->node_props.cu_per_simd_array = cu->num_cu_per_array; + dev->node_props.simd_per_cu = cu->num_simd_per_cu; + dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu; + if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE) + dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE; + pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low); +} + +/* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct + * topology device present in the device_list + */ +static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu, + struct list_head *device_list) +{ + struct kfd_topology_device *dev; + + pr_debug("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n", + cu->proximity_domain, cu->hsa_capability); + list_for_each_entry(dev, device_list, list) { + if (cu->proximity_domain == dev->proximity_domain) { + if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT) + kfd_populated_cu_info_cpu(dev, cu); + + if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT) + kfd_populated_cu_info_gpu(dev, cu); + break; + } + } + + return 0; +} + +/* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct + * topology device present in the device_list + */ +static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, + struct list_head *device_list) +{ + struct kfd_mem_properties *props; + struct kfd_topology_device *dev; + + pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n", + mem->proximity_domain); + list_for_each_entry(dev, device_list, list) { + if (mem->proximity_domain == dev->proximity_domain) { + props = kfd_alloc_struct(props); + if (!props) + return -ENOMEM; + + /* We're on GPU node */ + if (dev->node_props.cpu_cores_count == 0) { + /* APU */ + if (mem->visibility_type == 0) + props->heap_type = + HSA_MEM_HEAP_TYPE_FB_PRIVATE; + /* dGPU */ + else + props->heap_type = mem->visibility_type; + } else + props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM; + + if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE) + props->flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE; + if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE) + props->flags |= HSA_MEM_FLAGS_NON_VOLATILE; + + props->size_in_bytes = + ((uint64_t)mem->length_high << 32) + + mem->length_low; + props->width = mem->width; + + dev->node_props.mem_banks_count++; + list_add_tail(&props->list, &dev->mem_props); + + break; + } + } + + return 0; +} + +/* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct + * topology device present in the device_list + */ +static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, + struct list_head *device_list) +{ + struct kfd_cache_properties *props; + struct kfd_topology_device *dev; + uint32_t id; + uint32_t total_num_of_cu; + + id = cache->processor_id_low; + + pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id); + list_for_each_entry(dev, device_list, list) { + total_num_of_cu = (dev->node_props.array_count * + dev->node_props.cu_per_simd_array); + + /* Cache infomration in CRAT doesn't have proximity_domain + * information as it is associated with a CPU core or GPU + * Compute Unit. So map the cache using CPU core Id or SIMD + * (GPU) ID. + * TODO: This works because currently we can safely assume that + * Compute Units are parsed before caches are parsed. In + * future, remove this dependency + */ + if ((id >= dev->node_props.cpu_core_id_base && + id <= dev->node_props.cpu_core_id_base + + dev->node_props.cpu_cores_count) || + (id >= dev->node_props.simd_id_base && + id < dev->node_props.simd_id_base + + total_num_of_cu)) { + props = kfd_alloc_struct(props); + if (!props) + return -ENOMEM; + + props->processor_id_low = id; + props->cache_level = cache->cache_level; + props->cache_size = cache->cache_size; + props->cacheline_size = cache->cache_line_size; + props->cachelines_per_tag = cache->lines_per_tag; + props->cache_assoc = cache->associativity; + props->cache_latency = cache->cache_latency; + memcpy(props->sibling_map, cache->sibling_map, + sizeof(props->sibling_map)); + + if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE) + props->cache_type |= HSA_CACHE_TYPE_DATA; + if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE) + props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; + if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE) + props->cache_type |= HSA_CACHE_TYPE_CPU; + if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE) + props->cache_type |= HSA_CACHE_TYPE_HSACU; + + dev->cache_count++; + dev->node_props.caches_count++; + list_add_tail(&props->list, &dev->cache_props); + + break; + } + } + + return 0; +} + +/* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct + * topology device present in the device_list + */ +static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, + struct list_head *device_list) +{ + struct kfd_iolink_properties *props = NULL, *props2; + struct kfd_topology_device *dev, *cpu_dev; + uint32_t id_from; + uint32_t id_to; + + id_from = iolink->proximity_domain_from; + id_to = iolink->proximity_domain_to; + + pr_debug("Found IO link entry in CRAT table with id_from=%d\n", + id_from); + list_for_each_entry(dev, device_list, list) { + if (id_from == dev->proximity_domain) { + props = kfd_alloc_struct(props); + if (!props) + return -ENOMEM; + + props->node_from = id_from; + props->node_to = id_to; + props->ver_maj = iolink->version_major; + props->ver_min = iolink->version_minor; + props->iolink_type = iolink->io_interface_type; + + if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) + props->weight = 20; + else + props->weight = node_distance(id_from, id_to); + + props->min_latency = iolink->minimum_latency; + props->max_latency = iolink->maximum_latency; + props->min_bandwidth = iolink->minimum_bandwidth_mbs; + props->max_bandwidth = iolink->maximum_bandwidth_mbs; + props->rec_transfer_size = + iolink->recommended_transfer_size; + + dev->io_link_count++; + dev->node_props.io_links_count++; + list_add_tail(&props->list, &dev->io_link_props); + break; + } + } + + /* CPU topology is created before GPUs are detected, so CPU->GPU + * links are not built at that time. If a PCIe type is discovered, it + * means a GPU is detected and we are adding GPU->CPU to the topology. + * At this time, also add the corresponded CPU->GPU link. + */ + if (props && props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) { + cpu_dev = kfd_topology_device_by_proximity_domain(id_to); + if (!cpu_dev) + return -ENODEV; + /* same everything but the other direction */ + props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL); + props2->node_from = id_to; + props2->node_to = id_from; + props2->kobj = NULL; + cpu_dev->io_link_count++; + cpu_dev->node_props.io_links_count++; + list_add_tail(&props2->list, &cpu_dev->io_link_props); + } + + return 0; +} + +/* kfd_parse_subtype - parse subtypes and attach it to correct topology device + * present in the device_list + * @sub_type_hdr - subtype section of crat_image + * @device_list - list of topology devices present in this crat_image + */ +static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, + struct list_head *device_list) +{ + struct crat_subtype_computeunit *cu; + struct crat_subtype_memory *mem; + struct crat_subtype_cache *cache; + struct crat_subtype_iolink *iolink; + int ret = 0; + + switch (sub_type_hdr->type) { + case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY: + cu = (struct crat_subtype_computeunit *)sub_type_hdr; + ret = kfd_parse_subtype_cu(cu, device_list); + break; + case CRAT_SUBTYPE_MEMORY_AFFINITY: + mem = (struct crat_subtype_memory *)sub_type_hdr; + ret = kfd_parse_subtype_mem(mem, device_list); + break; + case CRAT_SUBTYPE_CACHE_AFFINITY: + cache = (struct crat_subtype_cache *)sub_type_hdr; + ret = kfd_parse_subtype_cache(cache, device_list); + break; + case CRAT_SUBTYPE_TLB_AFFINITY: + /* + * For now, nothing to do here + */ + pr_debug("Found TLB entry in CRAT table (not processing)\n"); + break; + case CRAT_SUBTYPE_CCOMPUTE_AFFINITY: + /* + * For now, nothing to do here + */ + pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n"); + break; + case CRAT_SUBTYPE_IOLINK_AFFINITY: + iolink = (struct crat_subtype_iolink *)sub_type_hdr; + ret = kfd_parse_subtype_iolink(iolink, device_list); + break; + default: + pr_warn("Unknown subtype %d in CRAT\n", + sub_type_hdr->type); + } + + return ret; +} + +/* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT + * create a kfd_topology_device and add in to device_list. Also parse + * CRAT subtypes and attach it to appropriate kfd_topology_device + * @crat_image - input image containing CRAT + * @device_list - [OUT] list of kfd_topology_device generated after + * parsing crat_image + * @proximity_domain - Proximity domain of the first device in the table + * + * Return - 0 if successful else -ve value + */ +int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, + uint32_t proximity_domain) +{ + struct kfd_topology_device *top_dev = NULL; + struct crat_subtype_generic *sub_type_hdr; + uint16_t node_id; + int ret = 0; + struct crat_header *crat_table = (struct crat_header *)crat_image; + uint16_t num_nodes; + uint32_t image_len; + + if (!crat_image) + return -EINVAL; + + if (!list_empty(device_list)) { + pr_warn("Error device list should be empty\n"); + return -EINVAL; + } + + num_nodes = crat_table->num_domains; + image_len = crat_table->length; + + pr_info("Parsing CRAT table with %d nodes\n", num_nodes); + + for (node_id = 0; node_id < num_nodes; node_id++) { + top_dev = kfd_create_topology_device(device_list); + if (!top_dev) + break; + top_dev->proximity_domain = proximity_domain++; + } + + if (!top_dev) { + ret = -ENOMEM; + goto err; + } + + memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH); + memcpy(top_dev->oem_table_id, crat_table->oem_table_id, + CRAT_OEMTABLEID_LENGTH); + top_dev->oem_revision = crat_table->oem_revision; + + sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); + while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) < + ((char *)crat_image) + image_len) { + if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) { + ret = kfd_parse_subtype(sub_type_hdr, device_list); + if (ret) + break; + } + + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + sub_type_hdr->length); + } + +err: + if (ret) + kfd_release_topology_device_list(device_list); + + return ret; +} + +/* Helper function. See kfd_fill_gpu_cache_info for parameter description */ +static int fill_in_pcache(struct crat_subtype_cache *pcache, + struct kfd_gpu_cache_info *pcache_info, + struct kfd_cu_info *cu_info, + int mem_available, + int cu_bitmask, + int cache_type, unsigned int cu_processor_id, + int cu_block) +{ + unsigned int cu_sibling_map_mask; + int first_active_cu; + + /* First check if enough memory is available */ + if (sizeof(struct crat_subtype_cache) > mem_available) + return -ENOMEM; + + cu_sibling_map_mask = cu_bitmask; + cu_sibling_map_mask >>= cu_block; + cu_sibling_map_mask &= + ((1 << pcache_info[cache_type].num_cu_shared) - 1); + first_active_cu = ffs(cu_sibling_map_mask); + + /* CU could be inactive. In case of shared cache find the first active + * CU. and incase of non-shared cache check if the CU is inactive. If + * inactive active skip it + */ + if (first_active_cu) { + memset(pcache, 0, sizeof(struct crat_subtype_cache)); + pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY; + pcache->length = sizeof(struct crat_subtype_cache); + pcache->flags = pcache_info[cache_type].flags; + pcache->processor_id_low = cu_processor_id + + (first_active_cu - 1); + pcache->cache_level = pcache_info[cache_type].cache_level; + pcache->cache_size = pcache_info[cache_type].cache_size; + + /* Sibling map is w.r.t processor_id_low, so shift out + * inactive CU + */ + cu_sibling_map_mask = + cu_sibling_map_mask >> (first_active_cu - 1); + + pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); + pcache->sibling_map[1] = + (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); + pcache->sibling_map[2] = + (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); + pcache->sibling_map[3] = + (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); + return 0; + } + return 1; +} + +/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info + * tables + * + * @kdev - [IN] GPU device + * @gpu_processor_id - [IN] GPU processor ID to which these caches + * associate + * @available_size - [IN] Amount of memory available in pcache + * @cu_info - [IN] Compute Unit info obtained from KGD + * @pcache - [OUT] memory into which cache data is to be filled in. + * @size_filled - [OUT] amount of data used up in pcache. + * @num_of_entries - [OUT] number of caches added + */ +static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, + int gpu_processor_id, + int available_size, + struct kfd_cu_info *cu_info, + struct crat_subtype_cache *pcache, + int *size_filled, + int *num_of_entries) +{ + struct kfd_gpu_cache_info *pcache_info; + int num_of_cache_types = 0; + int i, j, k; + int ct = 0; + int mem_available = available_size; + unsigned int cu_processor_id; + int ret; + + switch (kdev->device_info->asic_family) { + case CHIP_KAVERI: + pcache_info = kaveri_cache_info; + num_of_cache_types = ARRAY_SIZE(kaveri_cache_info); + break; + case CHIP_HAWAII: + pcache_info = hawaii_cache_info; + num_of_cache_types = ARRAY_SIZE(hawaii_cache_info); + break; + case CHIP_CARRIZO: + pcache_info = carrizo_cache_info; + num_of_cache_types = ARRAY_SIZE(carrizo_cache_info); + break; + case CHIP_TONGA: + pcache_info = tonga_cache_info; + num_of_cache_types = ARRAY_SIZE(tonga_cache_info); + break; + case CHIP_FIJI: + pcache_info = fiji_cache_info; + num_of_cache_types = ARRAY_SIZE(fiji_cache_info); + break; + case CHIP_POLARIS10: + pcache_info = polaris10_cache_info; + num_of_cache_types = ARRAY_SIZE(polaris10_cache_info); + break; + case CHIP_POLARIS11: + pcache_info = polaris11_cache_info; + num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); + break; + default: + return -EINVAL; + } + + *size_filled = 0; + *num_of_entries = 0; + + /* For each type of cache listed in the kfd_gpu_cache_info table, + * go through all available Compute Units. + * The [i,j,k] loop will + * if kfd_gpu_cache_info.num_cu_shared = 1 + * will parse through all available CU + * If (kfd_gpu_cache_info.num_cu_shared != 1) + * then it will consider only one CU from + * the shared unit + */ + + for (ct = 0; ct < num_of_cache_types; ct++) { + cu_processor_id = gpu_processor_id; + for (i = 0; i < cu_info->num_shader_engines; i++) { + for (j = 0; j < cu_info->num_shader_arrays_per_engine; + j++) { + for (k = 0; k < cu_info->num_cu_per_sh; + k += pcache_info[ct].num_cu_shared) { + + ret = fill_in_pcache(pcache, + pcache_info, + cu_info, + mem_available, + cu_info->cu_bitmap[i][j], + ct, + cu_processor_id, + k); + + if (ret < 0) + break; + + if (!ret) { + pcache++; + (*num_of_entries)++; + mem_available -= + sizeof(*pcache); + (*size_filled) += + sizeof(*pcache); + } + + /* Move to next CU block */ + cu_processor_id += + pcache_info[ct].num_cu_shared; + } + } + } + } + + pr_debug("Added [%d] GPU cache entries\n", *num_of_entries); + + return 0; +} + +/* + * kfd_create_crat_image_acpi - Allocates memory for CRAT image and + * copies CRAT from ACPI (if available). + * NOTE: Call kfd_destroy_crat_image to free CRAT image memory + * + * @crat_image: CRAT read from ACPI. If no CRAT in ACPI then + * crat_image will be NULL + * @size: [OUT] size of crat_image + * + * Return 0 if successful else return error code + */ +int kfd_create_crat_image_acpi(void **crat_image, size_t *size) +{ + struct acpi_table_header *crat_table; + acpi_status status; + void *pcrat_image; + + if (!crat_image) + return -EINVAL; + + *crat_image = NULL; + + /* Fetch the CRAT table from ACPI */ + status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table); + if (status == AE_NOT_FOUND) { + pr_warn("CRAT table not found\n"); + return -ENODATA; + } else if (ACPI_FAILURE(status)) { + const char *err = acpi_format_exception(status); + + pr_err("CRAT table error: %s\n", err); + return -EINVAL; + } + + if (ignore_crat) { + pr_info("CRAT table disabled by module option\n"); + return -ENODATA; + } + + pcrat_image = kmalloc(crat_table->length, GFP_KERNEL); + if (!pcrat_image) + return -ENOMEM; + + memcpy(pcrat_image, crat_table, crat_table->length); + + *crat_image = pcrat_image; + *size = crat_table->length; + + return 0; +} + +/* Memory required to create Virtual CRAT. + * Since there is no easy way to predict the amount of memory required, the + * following amount are allocated for CPU and GPU Virtual CRAT. This is + * expected to cover all known conditions. But to be safe additional check + * is put in the code to ensure we don't overwrite. + */ +#define VCRAT_SIZE_FOR_CPU (2 * PAGE_SIZE) +#define VCRAT_SIZE_FOR_GPU (3 * PAGE_SIZE) + +/* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node + * + * @numa_node_id: CPU NUMA node id + * @avail_size: Available size in the memory + * @sub_type_hdr: Memory into which compute info will be filled in + * + * Return 0 if successful else return -ve value + */ +static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size, + int proximity_domain, + struct crat_subtype_computeunit *sub_type_hdr) +{ + const struct cpumask *cpumask; + + *avail_size -= sizeof(struct crat_subtype_computeunit); + if (*avail_size < 0) + return -ENOMEM; + + memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit)); + + /* Fill in subtype header data */ + sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; + sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); + sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; + + cpumask = cpumask_of_node(numa_node_id); + + /* Fill in CU data */ + sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT; + sub_type_hdr->proximity_domain = proximity_domain; + sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id); + if (sub_type_hdr->processor_id_low == -1) + return -EINVAL; + + sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask); + + return 0; +} + +/* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node + * + * @numa_node_id: CPU NUMA node id + * @avail_size: Available size in the memory + * @sub_type_hdr: Memory into which compute info will be filled in + * + * Return 0 if successful else return -ve value + */ +static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size, + int proximity_domain, + struct crat_subtype_memory *sub_type_hdr) +{ + uint64_t mem_in_bytes = 0; + pg_data_t *pgdat; + int zone_type; + + *avail_size -= sizeof(struct crat_subtype_memory); + if (*avail_size < 0) + return -ENOMEM; + + memset(sub_type_hdr, 0, sizeof(struct crat_subtype_memory)); + + /* Fill in subtype header data */ + sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; + sub_type_hdr->length = sizeof(struct crat_subtype_memory); + sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; + + /* Fill in Memory Subunit data */ + + /* Unlike si_meminfo, si_meminfo_node is not exported. So + * the following lines are duplicated from si_meminfo_node + * function + */ + pgdat = NODE_DATA(numa_node_id); + for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) + mem_in_bytes += pgdat->node_zones[zone_type].managed_pages; + mem_in_bytes <<= PAGE_SHIFT; + + sub_type_hdr->length_low = lower_32_bits(mem_in_bytes); + sub_type_hdr->length_high = upper_32_bits(mem_in_bytes); + sub_type_hdr->proximity_domain = proximity_domain; + + return 0; +} + +static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, + uint32_t *num_entries, + struct crat_subtype_iolink *sub_type_hdr) +{ + int nid; + struct cpuinfo_x86 *c = &cpu_data(0); + uint8_t link_type; + + if (c->x86_vendor == X86_VENDOR_AMD) + link_type = CRAT_IOLINK_TYPE_HYPERTRANSPORT; + else + link_type = CRAT_IOLINK_TYPE_QPI_1_1; + + *num_entries = 0; + + /* Create IO links from this node to other CPU nodes */ + for_each_online_node(nid) { + if (nid == numa_node_id) /* node itself */ + continue; + + *avail_size -= sizeof(struct crat_subtype_iolink); + if (*avail_size < 0) + return -ENOMEM; + + memset(sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); + + /* Fill in subtype header data */ + sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; + sub_type_hdr->length = sizeof(struct crat_subtype_iolink); + sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; + + /* Fill in IO link data */ + sub_type_hdr->proximity_domain_from = numa_node_id; + sub_type_hdr->proximity_domain_to = nid; + sub_type_hdr->io_interface_type = link_type; + + (*num_entries)++; + sub_type_hdr++; + } + + return 0; +} + +/* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU + * + * @pcrat_image: Fill in VCRAT for CPU + * @size: [IN] allocated size of crat_image. + * [OUT] actual size of data filled in crat_image + */ +static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) +{ + struct crat_header *crat_table = (struct crat_header *)pcrat_image; + struct acpi_table_header *acpi_table; + acpi_status status; + struct crat_subtype_generic *sub_type_hdr; + int avail_size = *size; + int numa_node_id; + uint32_t entries = 0; + int ret = 0; + + if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_CPU) + return -EINVAL; + + /* Fill in CRAT Header. + * Modify length and total_entries as subunits are added. + */ + avail_size -= sizeof(struct crat_header); + if (avail_size < 0) + return -ENOMEM; + + memset(crat_table, 0, sizeof(struct crat_header)); + memcpy(&crat_table->signature, CRAT_SIGNATURE, + sizeof(crat_table->signature)); + crat_table->length = sizeof(struct crat_header); + + status = acpi_get_table("DSDT", 0, &acpi_table); + if (status == AE_NOT_FOUND) + pr_warn("DSDT table not found for OEM information\n"); + else { + crat_table->oem_revision = acpi_table->revision; + memcpy(crat_table->oem_id, acpi_table->oem_id, + CRAT_OEMID_LENGTH); + memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, + CRAT_OEMTABLEID_LENGTH); + } + crat_table->total_entries = 0; + crat_table->num_domains = 0; + + sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); + + for_each_online_node(numa_node_id) { + if (kfd_numa_node_to_apic_id(numa_node_id) == -1) + continue; + + /* Fill in Subtype: Compute Unit */ + ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size, + crat_table->num_domains, + (struct crat_subtype_computeunit *)sub_type_hdr); + if (ret < 0) + return ret; + crat_table->length += sub_type_hdr->length; + crat_table->total_entries++; + + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + sub_type_hdr->length); + + /* Fill in Subtype: Memory */ + ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size, + crat_table->num_domains, + (struct crat_subtype_memory *)sub_type_hdr); + if (ret < 0) + return ret; + crat_table->length += sub_type_hdr->length; + crat_table->total_entries++; + + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + sub_type_hdr->length); + + /* Fill in Subtype: IO Link */ + ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size, + &entries, + (struct crat_subtype_iolink *)sub_type_hdr); + if (ret < 0) + return ret; + crat_table->length += (sub_type_hdr->length * entries); + crat_table->total_entries += entries; + + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + sub_type_hdr->length * entries); + + crat_table->num_domains++; + } + + /* TODO: Add cache Subtype for CPU. + * Currently, CPU cache information is available in function + * detect_cache_attributes(cpu) defined in the file + * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not + * exported and to get the same information the code needs to be + * duplicated. + */ + + *size = crat_table->length; + pr_info("Virtual CRAT table created for CPU\n"); + + return 0; +} + +static int kfd_fill_gpu_memory_affinity(int *avail_size, + struct kfd_dev *kdev, uint8_t type, uint64_t size, + struct crat_subtype_memory *sub_type_hdr, + uint32_t proximity_domain, + const struct kfd_local_mem_info *local_mem_info) +{ + *avail_size -= sizeof(struct crat_subtype_memory); + if (*avail_size < 0) + return -ENOMEM; + + memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_memory)); + sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; + sub_type_hdr->length = sizeof(struct crat_subtype_memory); + sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; + + sub_type_hdr->proximity_domain = proximity_domain; + + pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n", + type, size); + + sub_type_hdr->length_low = lower_32_bits(size); + sub_type_hdr->length_high = upper_32_bits(size); + + sub_type_hdr->width = local_mem_info->vram_width; + sub_type_hdr->visibility_type = type; + + return 0; +} + +/* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU + * to its NUMA node + * @avail_size: Available size in the memory + * @kdev - [IN] GPU device + * @sub_type_hdr: Memory into which io link info will be filled in + * @proximity_domain - proximity domain of the GPU node + * + * Return 0 if successful else return -ve value + */ +static int kfd_fill_gpu_direct_io_link(int *avail_size, + struct kfd_dev *kdev, + struct crat_subtype_iolink *sub_type_hdr, + uint32_t proximity_domain) +{ + *avail_size -= sizeof(struct crat_subtype_iolink); + if (*avail_size < 0) + return -ENOMEM; + + memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); + + /* Fill in subtype header data */ + sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; + sub_type_hdr->length = sizeof(struct crat_subtype_iolink); + sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; + + /* Fill in IOLINK subtype. + * TODO: Fill-in other fields of iolink subtype + */ + sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; + sub_type_hdr->proximity_domain_from = proximity_domain; +#ifdef CONFIG_NUMA + if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) + sub_type_hdr->proximity_domain_to = 0; + else + sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node; +#else + sub_type_hdr->proximity_domain_to = 0; +#endif + return 0; +} + +/* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU + * + * @pcrat_image: Fill in VCRAT for GPU + * @size: [IN] allocated size of crat_image. + * [OUT] actual size of data filled in crat_image + */ +static int kfd_create_vcrat_image_gpu(void *pcrat_image, + size_t *size, struct kfd_dev *kdev, + uint32_t proximity_domain) +{ + struct crat_header *crat_table = (struct crat_header *)pcrat_image; + struct crat_subtype_generic *sub_type_hdr; + struct crat_subtype_computeunit *cu; + struct kfd_cu_info cu_info; + struct amd_iommu_device_info iommu_info; + int avail_size = *size; + uint32_t total_num_of_cu; + int num_of_cache_entries = 0; + int cache_mem_filled = 0; + int ret = 0; + const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | + AMD_IOMMU_DEVICE_FLAG_PRI_SUP | + AMD_IOMMU_DEVICE_FLAG_PASID_SUP; + struct kfd_local_mem_info local_mem_info; + + if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) + return -EINVAL; + + /* Fill the CRAT Header. + * Modify length and total_entries as subunits are added. + */ + avail_size -= sizeof(struct crat_header); + if (avail_size < 0) + return -ENOMEM; + + memset(crat_table, 0, sizeof(struct crat_header)); + + memcpy(&crat_table->signature, CRAT_SIGNATURE, + sizeof(crat_table->signature)); + /* Change length as we add more subtypes*/ + crat_table->length = sizeof(struct crat_header); + crat_table->num_domains = 1; + crat_table->total_entries = 0; + + /* Fill in Subtype: Compute Unit + * First fill in the sub type header and then sub type data + */ + avail_size -= sizeof(struct crat_subtype_computeunit); + if (avail_size < 0) + return -ENOMEM; + + sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1); + memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit)); + + sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; + sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); + sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; + + /* Fill CU subtype data */ + cu = (struct crat_subtype_computeunit *)sub_type_hdr; + cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT; + cu->proximity_domain = proximity_domain; + + kdev->kfd2kgd->get_cu_info(kdev->kgd, &cu_info); + cu->num_simd_per_cu = cu_info.simd_per_cu; + cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number; + cu->max_waves_simd = cu_info.max_waves_per_simd; + + cu->wave_front_size = cu_info.wave_front_size; + cu->array_count = cu_info.num_shader_arrays_per_engine * + cu_info.num_shader_engines; + total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh); + cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu); + cu->num_cu_per_array = cu_info.num_cu_per_sh; + cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu; + cu->num_banks = cu_info.num_shader_engines; + cu->lds_size_in_kb = cu_info.lds_size; + + cu->hsa_capability = 0; + + /* Check if this node supports IOMMU. During parsing this flag will + * translate to HSA_CAP_ATS_PRESENT + */ + iommu_info.flags = 0; + if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) { + if ((iommu_info.flags & required_iommu_flags) == + required_iommu_flags) + cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; + } + + crat_table->length += sub_type_hdr->length; + crat_table->total_entries++; + + /* Fill in Subtype: Memory. Only on systems with large BAR (no + * private FB), report memory as public. On other systems + * report the total FB size (public+private) as a single + * private heap. + */ + kdev->kfd2kgd->get_local_mem_info(kdev->kgd, &local_mem_info); + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + sub_type_hdr->length); + + if (local_mem_info.local_mem_size_private == 0) + ret = kfd_fill_gpu_memory_affinity(&avail_size, + kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC, + local_mem_info.local_mem_size_public, + (struct crat_subtype_memory *)sub_type_hdr, + proximity_domain, + &local_mem_info); + else + ret = kfd_fill_gpu_memory_affinity(&avail_size, + kdev, HSA_MEM_HEAP_TYPE_FB_PRIVATE, + local_mem_info.local_mem_size_public + + local_mem_info.local_mem_size_private, + (struct crat_subtype_memory *)sub_type_hdr, + proximity_domain, + &local_mem_info); + if (ret < 0) + return ret; + + crat_table->length += sizeof(struct crat_subtype_memory); + crat_table->total_entries++; + + /* TODO: Fill in cache information. This information is NOT readily + * available in KGD + */ + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + sub_type_hdr->length); + ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low, + avail_size, + &cu_info, + (struct crat_subtype_cache *)sub_type_hdr, + &cache_mem_filled, + &num_of_cache_entries); + + if (ret < 0) + return ret; + + crat_table->length += cache_mem_filled; + crat_table->total_entries += num_of_cache_entries; + avail_size -= cache_mem_filled; + + /* Fill in Subtype: IO_LINKS + * Only direct links are added here which is Link from GPU to + * to its NUMA node. Indirect links are added by userspace. + */ + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + cache_mem_filled); + ret = kfd_fill_gpu_direct_io_link(&avail_size, kdev, + (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain); + + if (ret < 0) + return ret; + + crat_table->length += sub_type_hdr->length; + crat_table->total_entries++; + + *size = crat_table->length; + pr_info("Virtual CRAT table created for GPU\n"); + + return ret; +} + +/* kfd_create_crat_image_virtual - Allocates memory for CRAT image and + * creates a Virtual CRAT (VCRAT) image + * + * NOTE: Call kfd_destroy_crat_image to free CRAT image memory + * + * @crat_image: VCRAT image created because ACPI does not have a + * CRAT for this device + * @size: [OUT] size of virtual crat_image + * @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device + * COMPUTE_UNIT_GPU - Create VCRAT for GPU + * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU + * -- this option is not currently implemented. + * The assumption is that all AMD APUs will have CRAT + * @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU + * + * Return 0 if successful else return -ve value + */ +int kfd_create_crat_image_virtual(void **crat_image, size_t *size, + int flags, struct kfd_dev *kdev, + uint32_t proximity_domain) +{ + void *pcrat_image = NULL; + int ret = 0; + + if (!crat_image) + return -EINVAL; + + *crat_image = NULL; + + /* Allocate one VCRAT_SIZE_FOR_CPU for CPU virtual CRAT image and + * VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. This should cover + * all the current conditions. A check is put not to overwrite beyond + * allocated size + */ + switch (flags) { + case COMPUTE_UNIT_CPU: + pcrat_image = kmalloc(VCRAT_SIZE_FOR_CPU, GFP_KERNEL); + if (!pcrat_image) + return -ENOMEM; + *size = VCRAT_SIZE_FOR_CPU; + ret = kfd_create_vcrat_image_cpu(pcrat_image, size); + break; + case COMPUTE_UNIT_GPU: + if (!kdev) + return -EINVAL; + pcrat_image = kmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL); + if (!pcrat_image) + return -ENOMEM; + *size = VCRAT_SIZE_FOR_GPU; + ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev, + proximity_domain); + break; + case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU): + /* TODO: */ + ret = -EINVAL; + pr_err("VCRAT not implemented for APU\n"); + break; + default: + ret = -EINVAL; + } + + if (!ret) + *crat_image = pcrat_image; + else + kfree(pcrat_image); + + return ret; +} + + +/* kfd_destroy_crat_image + * + * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..) + * + */ +void kfd_destroy_crat_image(void *crat_image) +{ + kfree(crat_image); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h index a374fa3d3ee6..b5cd182b9edd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h @@ -44,6 +44,10 @@ #define CRAT_OEMID_64BIT_MASK ((1ULL << (CRAT_OEMID_LENGTH * 8)) - 1) +/* Compute Unit flags */ +#define COMPUTE_UNIT_CPU (1 << 0) /* Create Virtual CRAT for CPU */ +#define COMPUTE_UNIT_GPU (1 << 1) /* Create Virtual CRAT for GPU */ + struct crat_header { uint32_t signature; uint32_t length; @@ -105,7 +109,7 @@ struct crat_subtype_computeunit { uint8_t wave_front_size; uint8_t num_banks; uint16_t micro_engine_id; - uint8_t num_arrays; + uint8_t array_count; uint8_t num_cu_per_array; uint8_t num_simd_per_cu; uint8_t max_slots_scatch_cu; @@ -127,13 +131,14 @@ struct crat_subtype_memory { uint8_t length; uint16_t reserved; uint32_t flags; - uint32_t promixity_domain; + uint32_t proximity_domain; uint32_t base_addr_low; uint32_t base_addr_high; uint32_t length_low; uint32_t length_high; uint32_t width; - uint8_t reserved2[CRAT_MEMORY_RESERVED_LENGTH]; + uint8_t visibility_type; /* for virtual (dGPU) CRAT */ + uint8_t reserved2[CRAT_MEMORY_RESERVED_LENGTH - 1]; }; /* @@ -222,9 +227,12 @@ struct crat_subtype_ccompute { /* * HSA IO Link Affinity structure and definitions */ -#define CRAT_IOLINK_FLAGS_ENABLED 0x00000001 -#define CRAT_IOLINK_FLAGS_COHERENCY 0x00000002 -#define CRAT_IOLINK_FLAGS_RESERVED 0xfffffffc +#define CRAT_IOLINK_FLAGS_ENABLED (1 << 0) +#define CRAT_IOLINK_FLAGS_NON_COHERENT (1 << 1) +#define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2) +#define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3) +#define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4) +#define CRAT_IOLINK_FLAGS_RESERVED_MASK 0xffffffe0 /* * IO interface types @@ -232,10 +240,18 @@ struct crat_subtype_ccompute { #define CRAT_IOLINK_TYPE_UNDEFINED 0 #define CRAT_IOLINK_TYPE_HYPERTRANSPORT 1 #define CRAT_IOLINK_TYPE_PCIEXPRESS 2 -#define CRAT_IOLINK_TYPE_OTHER 3 +#define CRAT_IOLINK_TYPE_AMBA 3 +#define CRAT_IOLINK_TYPE_MIPI 4 +#define CRAT_IOLINK_TYPE_QPI_1_1 5 +#define CRAT_IOLINK_TYPE_RESERVED1 6 +#define CRAT_IOLINK_TYPE_RESERVED2 7 +#define CRAT_IOLINK_TYPE_RAPID_IO 8 +#define CRAT_IOLINK_TYPE_INFINIBAND 9 +#define CRAT_IOLINK_TYPE_RESERVED3 10 +#define CRAT_IOLINK_TYPE_OTHER 11 #define CRAT_IOLINK_TYPE_MAX 255 -#define CRAT_IOLINK_RESERVED_LENGTH 24 +#define CRAT_IOLINK_RESERVED_LENGTH 24 struct crat_subtype_iolink { uint8_t type; @@ -291,4 +307,14 @@ struct cdit_header { #pragma pack() +struct kfd_dev; + +int kfd_create_crat_image_acpi(void **crat_image, size_t *size); +void kfd_destroy_crat_image(void *crat_image); +int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, + uint32_t proximity_domain); +int kfd_create_crat_image_virtual(void **crat_image, size_t *size, + int flags, struct kfd_dev *kdev, + uint32_t proximity_domain); + #endif /* KFD_CRAT_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index c407f6bd9956..afb26f205d29 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -95,7 +95,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, ib_packet->bitfields3.ib_base_hi = largep->u.high_part; ib_packet->control = (1 << 23) | (1 << 31) | - ((size_in_bytes / sizeof(uint32_t)) & 0xfffff); + ((size_in_bytes / 4) & 0xfffff); ib_packet->bitfields5.pasid = pasid; @@ -126,8 +126,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, rm_packet->header.opcode = IT_RELEASE_MEM; rm_packet->header.type = PM4_TYPE_3; - rm_packet->header.count = sizeof(struct pm4__release_mem) / - sizeof(unsigned int) - 2; + rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2; rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; rm_packet->bitfields2.event_index = @@ -652,8 +651,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; packets_vec[0].header.type = PM4_TYPE_3; packets_vec[0].bitfields2.reg_offset = - GRBM_GFX_INDEX / (sizeof(uint32_t)) - - USERCONFIG_REG_BASE; + GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE; packets_vec[0].bitfields2.insert_vmid = 0; packets_vec[0].reg_data[0] = reg_gfx_index.u32All; @@ -661,8 +659,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, packets_vec[1].header.count = 1; packets_vec[1].header.opcode = IT_SET_CONFIG_REG; packets_vec[1].header.type = PM4_TYPE_3; - packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) - - AMD_CONFIG_REG_BASE; + packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE; packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; packets_vec[1].bitfields2.insert_vmid = 1; @@ -678,8 +675,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, packets_vec[2].ordinal1 = packets_vec[0].ordinal1; packets_vec[2].bitfields2.reg_offset = - GRBM_GFX_INDEX / (sizeof(uint32_t)) - - USERCONFIG_REG_BASE; + GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE; packets_vec[2].bitfields2.insert_vmid = 0; packets_vec[2].reg_data[0] = reg_gfx_index.u32All; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c new file mode 100644 index 000000000000..4bd6ebfaf425 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c @@ -0,0 +1,75 @@ +/* + * Copyright 2016-2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/debugfs.h> +#include "kfd_priv.h" + +static struct dentry *debugfs_root; + +static int kfd_debugfs_open(struct inode *inode, struct file *file) +{ + int (*show)(struct seq_file *, void *) = inode->i_private; + + return single_open(file, show, NULL); +} + +static const struct file_operations kfd_debugfs_fops = { + .owner = THIS_MODULE, + .open = kfd_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void kfd_debugfs_init(void) +{ + struct dentry *ent; + + debugfs_root = debugfs_create_dir("kfd", NULL); + if (!debugfs_root || debugfs_root == ERR_PTR(-ENODEV)) { + pr_warn("Failed to create kfd debugfs dir\n"); + return; + } + + ent = debugfs_create_file("mqds", S_IFREG | 0444, debugfs_root, + kfd_debugfs_mqds_by_process, + &kfd_debugfs_fops); + if (!ent) + pr_warn("Failed to create mqds in kfd debugfs\n"); + + ent = debugfs_create_file("hqds", S_IFREG | 0444, debugfs_root, + kfd_debugfs_hqds_by_device, + &kfd_debugfs_fops); + if (!ent) + pr_warn("Failed to create hqds in kfd debugfs\n"); + + ent = debugfs_create_file("rls", S_IFREG | 0444, debugfs_root, + kfd_debugfs_rls_by_device, + &kfd_debugfs_fops); + if (!ent) + pr_warn("Failed to create rls in kfd debugfs\n"); +} + +void kfd_debugfs_fini(void) +{ + debugfs_remove_recursive(debugfs_root); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 621a3b53a038..a8fa33a08de3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -27,6 +27,7 @@ #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_vi.h" +#include "cwsr_trap_handler_gfx8.asm" #define MQD_SIZE_ALIGNED 768 @@ -38,7 +39,8 @@ static const struct kfd_device_info kaveri_device_info = { .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = false, }; static const struct kfd_device_info carrizo_device_info = { @@ -49,7 +51,8 @@ static const struct kfd_device_info carrizo_device_info = { .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, }; struct kfd_deviceid { @@ -212,6 +215,17 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, return AMD_IOMMU_INV_PRI_RSP_INVALID; } +static void kfd_cwsr_init(struct kfd_dev *kfd) +{ + if (cwsr_enable && kfd->device_info->supports_cwsr) { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); + + kfd->cwsr_isa = cwsr_trap_gfx8_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); + kfd->cwsr_enabled = true; + } +} + bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources) { @@ -224,6 +238,17 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd - kfd->vm_info.first_vmid_kfd + 1; + /* Verify module parameters regarding mapped process number*/ + if ((hws_max_conc_proc < 0) + || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) { + dev_err(kfd_device, + "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n", + hws_max_conc_proc, kfd->vm_info.vmid_num_kfd, + kfd->vm_info.vmid_num_kfd); + kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd; + } else + kfd->max_proc_per_quantum = hws_max_conc_proc; + /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * kfd->device_info->mqd_size_aligned; @@ -286,6 +311,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto device_iommu_pasid_error; } + kfd_cwsr_init(kfd); + if (kfd_resume(kfd)) goto kfd_resume_error; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index e202921c150e..d0693fd8cbf8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -149,8 +149,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm, static int create_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, - struct qcm_process_device *qpd, - int *allocated_vmid) + struct qcm_process_device *qpd) { int retval; @@ -170,9 +169,11 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, if (retval) goto out_unlock; } - *allocated_vmid = qpd->vmid; q->properties.vmid = qpd->vmid; + q->properties.tba_addr = qpd->tba_addr; + q->properties.tma_addr = qpd->tma_addr; + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) retval = create_compute_queue_nocpsch(dqm, q, qpd); else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) @@ -181,10 +182,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, retval = -EINVAL; if (retval) { - if (list_empty(&qpd->queues_list)) { + if (list_empty(&qpd->queues_list)) deallocate_vmid(dqm, qpd, q); - *allocated_vmid = 0; - } goto out_unlock; } @@ -809,16 +808,13 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, } static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, - struct qcm_process_device *qpd, int *allocate_vmid) + struct qcm_process_device *qpd) { int retval; struct mqd_manager *mqd; retval = 0; - if (allocate_vmid) - *allocate_vmid = 0; - mutex_lock(&dqm->lock); if (dqm->total_queue_count >= max_num_of_queues_per_device) { @@ -846,6 +842,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, } dqm->asic_ops.init_sdma_vm(dqm, q, qpd); + + q->properties.tba_addr = qpd->tba_addr; + q->properties.tma_addr = qpd->tma_addr; retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) @@ -1110,6 +1109,26 @@ out: return retval; } +static int set_trap_handler(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + uint64_t tba_addr, + uint64_t tma_addr) +{ + uint64_t *tma; + + if (dqm->dev->cwsr_enabled) { + /* Jump from CWSR trap handler to user trap */ + tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); + tma[0] = tba_addr; + tma[1] = tma_addr; + } else { + qpd->tba_addr = tba_addr; + qpd->tma_addr = tma_addr; + } + + return 0; +} + static int process_termination_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -1241,6 +1260,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; dqm->ops.set_cache_memory_policy = set_cache_memory_policy; + dqm->ops.set_trap_handler = set_trap_handler; dqm->ops.process_termination = process_termination_cpsch; break; case KFD_SCHED_POLICY_NO_HWS: @@ -1256,6 +1276,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.initialize = initialize_nocpsch; dqm->ops.uninitialize = uninitialize; dqm->ops.set_cache_memory_policy = set_cache_memory_policy; + dqm->ops.set_trap_handler = set_trap_handler; dqm->ops.process_termination = process_termination_nocpsch; break; default: @@ -1290,3 +1311,74 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm) dqm->ops.uninitialize(dqm); kfree(dqm); } + +#if defined(CONFIG_DEBUG_FS) + +static void seq_reg_dump(struct seq_file *m, + uint32_t (*dump)[2], uint32_t n_regs) +{ + uint32_t i, count; + + for (i = 0, count = 0; i < n_regs; i++) { + if (count == 0 || + dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { + seq_printf(m, "%s %08x: %08x", + i ? "\n" : "", + dump[i][0], dump[i][1]); + count = 7; + } else { + seq_printf(m, " %08x", dump[i][1]); + count--; + } + } + + seq_puts(m, "\n"); +} + +int dqm_debugfs_hqds(struct seq_file *m, void *data) +{ + struct device_queue_manager *dqm = data; + uint32_t (*dump)[2], n_regs; + int pipe, queue; + int r = 0; + + for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { + int pipe_offset = pipe * get_queues_per_pipe(dqm); + + for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { + if (!test_bit(pipe_offset + queue, + dqm->dev->shared_resources.queue_bitmap)) + continue; + + r = dqm->dev->kfd2kgd->hqd_dump( + dqm->dev->kgd, pipe, queue, &dump, &n_regs); + if (r) + break; + + seq_printf(m, " CP Pipe %d, Queue %d\n", + pipe, queue); + seq_reg_dump(m, dump, n_regs); + + kfree(dump); + } + } + + for (pipe = 0; pipe < CIK_SDMA_ENGINE_NUM; pipe++) { + for (queue = 0; queue < CIK_SDMA_QUEUES_PER_ENGINE; queue++) { + r = dqm->dev->kfd2kgd->hqd_sdma_dump( + dqm->dev->kgd, pipe, queue, &dump, &n_regs); + if (r) + break; + + seq_printf(m, " SDMA Engine %d, RLC %d\n", + pipe, queue); + seq_reg_dump(m, dump, n_regs); + + kfree(dump); + } + } + + return r; +} + +#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 5b77cb69f732..c61b693bfa8c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -84,8 +84,7 @@ struct device_process_node { struct device_queue_manager_ops { int (*create_queue)(struct device_queue_manager *dqm, struct queue *q, - struct qcm_process_device *qpd, - int *allocate_vmid); + struct qcm_process_device *qpd); int (*destroy_queue)(struct device_queue_manager *dqm, struct qcm_process_device *qpd, @@ -123,6 +122,11 @@ struct device_queue_manager_ops { void __user *alternate_aperture_base, uint64_t alternate_aperture_size); + int (*set_trap_handler)(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + uint64_t tba_addr, + uint64_t tma_addr); + int (*process_termination)(struct device_queue_manager *dqm, struct qcm_process_device *qpd); }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index feb76c235b1a..ebb4da14e3df 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -116,8 +116,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd) pr_debug("doorbell aperture size == 0x%08lX\n", kfd->shared_resources.doorbell_aperture_size); - pr_debug("doorbell kernel address == 0x%08lX\n", - (uintptr_t)kfd->doorbell_kernel_ptr); + pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr); return 0; } @@ -194,8 +193,8 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, pr_debug("Get kernel queue doorbell\n" " doorbell offset == 0x%08X\n" - " kernel address == 0x%08lX\n", - *doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx)); + " kernel address == %p\n", + *doorbell_off, (kfd->doorbell_kernel_ptr + inx)); return kfd->doorbell_kernel_ptr + inx; } @@ -215,7 +214,7 @@ inline void write_kernel_doorbell(u32 __iomem *db, u32 value) { if (db) { writel(value, db); - pr_debug("Writing %d to doorbell address 0x%p\n", value, db); + pr_debug("Writing %d to doorbell address %p\n", value, db); } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index cb92d4b72400..93aae5c1e78b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -441,7 +441,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, /* * Because we are called from arbitrary context (workqueue) as opposed * to process context, kfd_process could attempt to exit while we are - * running so the lookup function returns a locked process. + * running so the lookup function increments the process ref count. */ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); @@ -493,7 +493,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, } mutex_unlock(&p->event_mutex); - mutex_unlock(&p->mutex); + kfd_unref_process(p); } static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) @@ -847,7 +847,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, /* * Because we are called from arbitrary context (workqueue) as opposed * to process context, kfd_process could attempt to exit while we are - * running so the lookup function returns a locked process. + * running so the lookup function increments the process ref count. */ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); struct mm_struct *mm; @@ -860,7 +860,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, */ mm = get_task_mm(p->lead_thread); if (!mm) { - mutex_unlock(&p->mutex); + kfd_unref_process(p); return; /* Process is exiting */ } @@ -903,7 +903,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, &memory_exception_data); mutex_unlock(&p->event_mutex); - mutex_unlock(&p->mutex); + kfd_unref_process(p); } void kfd_signal_hw_exception_event(unsigned int pasid) @@ -911,7 +911,7 @@ void kfd_signal_hw_exception_event(unsigned int pasid) /* * Because we are called from arbitrary context (workqueue) as opposed * to process context, kfd_process could attempt to exit while we are - * running so the lookup function returns a locked process. + * running so the lookup function increments the process ref count. */ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); @@ -924,5 +924,5 @@ void kfd_signal_hw_exception_event(unsigned int pasid) lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL); mutex_unlock(&p->event_mutex); - mutex_unlock(&p->mutex); + kfd_unref_process(p); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index c59384bbbc5f..7377513050e6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -300,9 +300,14 @@ int kfd_init_apertures(struct kfd_process *process) struct kfd_process_device *pdd; /*Iterating over all devices*/ - while ((dev = kfd_topology_enum_kfd_devices(id)) != NULL && + while (kfd_topology_enum_kfd_devices(id, &dev) == 0 && id < NUM_OF_SUPPORTED_GPUS) { + if (!dev) { + id++; /* Skip non GPU devices */ + continue; + } + pdd = kfd_create_process_device_data(dev, process); if (!pdd) { pr_err("Failed to create process device data\n"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 8b0c0645d7c0..5dc6567d4a13 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -218,7 +218,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq, rptr = *kq->rptr_kernel; wptr = *kq->wptr_kernel; queue_address = (unsigned int *)kq->pq_kernel_addr; - queue_size_dwords = kq->queue->properties.queue_size / sizeof(uint32_t); + queue_size_dwords = kq->queue->properties.queue_size / 4; pr_debug("rptr: %d\n", rptr); pr_debug("wptr: %d\n", wptr); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index f744caeaee04..3ac72bed4f31 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -50,6 +50,15 @@ module_param(sched_policy, int, 0444); MODULE_PARM_DESC(sched_policy, "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)"); +int hws_max_conc_proc = 8; +module_param(hws_max_conc_proc, int, 0444); +MODULE_PARM_DESC(hws_max_conc_proc, + "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))"); + +int cwsr_enable = 1; +module_param(cwsr_enable, int, 0444); +MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))"); + int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT; module_param(max_num_of_queues_per_device, int, 0444); MODULE_PARM_DESC(max_num_of_queues_per_device, @@ -60,6 +69,11 @@ module_param(send_sigterm, int, 0444); MODULE_PARM_DESC(send_sigterm, "Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)"); +int ignore_crat; +module_param(ignore_crat, int, 0444); +MODULE_PARM_DESC(ignore_crat, + "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)"); + static int amdkfd_init_completed; int kgd2kfd_init(unsigned int interface_version, @@ -114,6 +128,8 @@ static int __init kfd_module_init(void) kfd_process_create_wq(); + kfd_debugfs_init(); + amdkfd_init_completed = 1; dev_info(kfd_device, "Initialized module\n"); @@ -130,6 +146,7 @@ static void __exit kfd_module_exit(void) { amdkfd_init_completed = 0; + kfd_debugfs_fini(); kfd_process_destroy_wq(); kfd_topology_shutdown(); kfd_chardev_exit(); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index 1f3a6ba7eed2..8972bcfbf701 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -85,6 +85,10 @@ struct mqd_manager { uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); +#if defined(CONFIG_DEBUG_FS) + int (*debugfs_show_mqd)(struct seq_file *m, void *data); +#endif + struct mutex mqd_mutex; struct kfd_dev *dev; }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 4728fad3fd74..f8ef4a051e08 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -36,6 +36,11 @@ static inline struct cik_mqd *get_mqd(void *mqd) return (struct cik_mqd *)mqd; } +static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) +{ + return (struct cik_sdma_rlc_registers *)mqd; +} + static int init_mqd(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -149,7 +154,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, { /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); - uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1); + uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1); return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, (uint32_t __user *)p->write_ptr, @@ -160,7 +165,9 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, struct mm_struct *mms) { - return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd); + return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, + (uint32_t __user *)p->write_ptr, + mms); } static int update_mqd(struct mqd_manager *mm, void *mqd, @@ -176,8 +183,7 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, * Calculating queue size which is log base 2 of actual queue size -1 * dwords and another -1 for ffs */ - m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - - 1 - 1; + m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1; m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); @@ -202,7 +208,7 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, struct cik_sdma_rlc_registers *m; m = get_sdma_mqd(mqd); - m->sdma_rlc_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1) + m->sdma_rlc_rb_cntl = order_base_2(q->queue_size / 4) << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | @@ -343,8 +349,7 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, * Calculating queue size which is log base 2 of actual queue * size -1 dwords */ - m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - - 1 - 1; + m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1; m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); @@ -360,15 +365,25 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, return 0; } -struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) -{ - struct cik_sdma_rlc_registers *m; +#if defined(CONFIG_DEBUG_FS) - m = (struct cik_sdma_rlc_registers *)mqd; +static int debugfs_show_mqd(struct seq_file *m, void *data) +{ + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + data, sizeof(struct cik_mqd), false); + return 0; +} - return m; +static int debugfs_show_mqd_sdma(struct seq_file *m, void *data) +{ + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + data, sizeof(struct cik_sdma_rlc_registers), false); + return 0; } +#endif + + struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, struct kfd_dev *dev) { @@ -392,6 +407,9 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif break; case KFD_MQD_TYPE_HIQ: mqd->init_mqd = init_mqd_hiq; @@ -400,6 +418,9 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif break; case KFD_MQD_TYPE_SDMA: mqd->init_mqd = init_mqd_sdma; @@ -408,6 +429,9 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = destroy_mqd_sdma; mqd->is_occupied = is_occupied_sdma; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; +#endif break; default: kfree(mqd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 4ea854f9007b..971aec0637dc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -30,7 +30,7 @@ #include "vi_structs.h" #include "gca/gfx_8_0_sh_mask.h" #include "gca/gfx_8_0_enum.h" - +#include "oss/oss_3_0_sh_mask.h" #define CP_MQD_CONTROL__PRIV_STATE__SHIFT 0x8 static inline struct vi_mqd *get_mqd(void *mqd) @@ -38,6 +38,11 @@ static inline struct vi_mqd *get_mqd(void *mqd) return (struct vi_mqd *)mqd; } +static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd) +{ + return (struct vi_sdma_mqd *)mqd; +} + static int init_mqd(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -84,6 +89,28 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, if (q->format == KFD_QUEUE_FORMAT_AQL) m->cp_hqd_iq_rptr = 1; + if (q->tba_addr) { + m->compute_tba_lo = lower_32_bits(q->tba_addr >> 8); + m->compute_tba_hi = upper_32_bits(q->tba_addr >> 8); + m->compute_tma_lo = lower_32_bits(q->tma_addr >> 8); + m->compute_tma_hi = upper_32_bits(q->tma_addr >> 8); + m->compute_pgm_rsrc2 |= + (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT); + } + + if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) { + m->cp_hqd_persistent_state |= + (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT); + m->cp_hqd_ctx_save_base_addr_lo = + lower_32_bits(q->ctx_save_restore_area_address); + m->cp_hqd_ctx_save_base_addr_hi = + upper_32_bits(q->ctx_save_restore_area_address); + m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size; + m->cp_hqd_cntl_stack_size = q->ctl_stack_size; + m->cp_hqd_cntl_stack_offset = q->ctl_stack_size; + m->cp_hqd_wg_state_offset = q->ctl_stack_size; + } + *mqd = m; if (gart_addr) *gart_addr = addr; @@ -98,7 +125,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, { /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); - uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1); + uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1); return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, (uint32_t __user *)p->write_ptr, @@ -116,8 +143,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT | atc_bit << CP_HQD_PQ_CONTROL__PQ_ATC__SHIFT | mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT; - m->cp_hqd_pq_control |= - ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; + m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); @@ -147,7 +173,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, * is safe, giving a maximum field value of 0xA. */ m->cp_hqd_eop_control |= min(0xA, - ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1); + order_base_2(q->eop_ring_buffer_size / 4) - 1); m->cp_hqd_eop_base_addr_lo = lower_32_bits(q->eop_ring_buffer_address >> 8); m->cp_hqd_eop_base_addr_hi = @@ -163,6 +189,11 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT; } + if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) + m->cp_hqd_ctx_save_control = + atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT | + mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT; + q->is_active = (q->queue_size > 0 && q->queue_address != 0 && q->queue_percent > 0); @@ -234,6 +265,117 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, return retval; } +static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + int retval; + struct vi_sdma_mqd *m; + + + retval = kfd_gtt_sa_allocate(mm->dev, + sizeof(struct vi_sdma_mqd), + mqd_mem_obj); + + if (retval != 0) + return -ENOMEM; + + m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr; + + memset(m, 0, sizeof(struct vi_sdma_mqd)); + + *mqd = m; + if (gart_addr != NULL) + *gart_addr = (*mqd_mem_obj)->gpu_addr; + + retval = mm->update_mqd(mm, m, q); + + return retval; +} + +static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); +} + +static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, + (uint32_t __user *)p->write_ptr, + mms); +} + +static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct vi_sdma_mqd *m; + + m = get_sdma_mqd(mqd); + m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4) + << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | + q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | + 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | + 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT; + + m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8); + m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8); + m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->sdmax_rlcx_doorbell = + q->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT; + + m->sdmax_rlcx_virtual_addr = q->sdma_vm_addr; + + m->sdma_engine_id = q->sdma_engine_id; + m->sdma_queue_id = q->sdma_queue_id; + + q->is_active = (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0); + + return 0; +} + +/* + * * preempt type here is ignored because there is only one way + * * to preempt sdma queue + */ +static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); +} + +static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); +} + +#if defined(CONFIG_DEBUG_FS) + +static int debugfs_show_mqd(struct seq_file *m, void *data) +{ + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + data, sizeof(struct vi_mqd), false); + return 0; +} + +static int debugfs_show_mqd_sdma(struct seq_file *m, void *data) +{ + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + data, sizeof(struct vi_sdma_mqd), false); + return 0; +} + +#endif + struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, struct kfd_dev *dev) { @@ -257,6 +399,9 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif break; case KFD_MQD_TYPE_HIQ: mqd->init_mqd = init_mqd_hiq; @@ -265,8 +410,20 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif break; case KFD_MQD_TYPE_SDMA: + mqd->init_mqd = init_mqd_sdma; + mqd->uninit_mqd = uninit_mqd_sdma; + mqd->load_mqd = load_mqd_sdma; + mqd->update_mqd = update_mqd_sdma; + mqd->destroy_mqd = destroy_mqd_sdma; + mqd->is_occupied = is_occupied_sdma; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; +#endif break; default: kfree(mqd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 16da8ad02d8b..0ecbd1f9b606 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -45,7 +45,7 @@ static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size) header.u32All = 0; header.opcode = opcode; - header.count = packet_size/sizeof(uint32_t) - 2; + header.count = packet_size / 4 - 2; header.type = PM4_TYPE_3; return header.u32All; @@ -55,15 +55,27 @@ static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int *rlib_size, bool *over_subscription) { - unsigned int process_count, queue_count; + unsigned int process_count, queue_count, compute_queue_count; unsigned int map_queue_size; + unsigned int max_proc_per_quantum = 1; + struct kfd_dev *dev = pm->dqm->dev; process_count = pm->dqm->processes_count; queue_count = pm->dqm->queue_count; + compute_queue_count = queue_count - pm->dqm->sdma_queue_count; - /* check if there is over subscription*/ + /* check if there is over subscription + * Note: the arbitration between the number of VMIDs and + * hws_max_conc_proc has been done in + * kgd2kfd_device_init(). + */ *over_subscription = false; - if ((process_count > 1) || queue_count > get_queues_num(pm->dqm)) { + + if (dev->max_proc_per_quantum > 1) + max_proc_per_quantum = dev->max_proc_per_quantum; + + if ((process_count > max_proc_per_quantum) || + compute_queue_count > get_queues_num(pm->dqm)) { *over_subscription = true; pr_debug("Over subscribed runlist\n"); } @@ -116,10 +128,24 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, uint64_t ib, size_t ib_size_in_dwords, bool chain) { struct pm4_mes_runlist *packet; + int concurrent_proc_cnt = 0; + struct kfd_dev *kfd = pm->dqm->dev; if (WARN_ON(!ib)) return -EFAULT; + /* Determine the number of processes to map together to HW: + * it can not exceed the number of VMIDs available to the + * scheduler, and it is determined by the smaller of the number + * of processes in the runlist and kfd module parameter + * hws_max_conc_proc. + * Note: the arbitration between the number of VMIDs and + * hws_max_conc_proc has been done in + * kgd2kfd_device_init(). + */ + concurrent_proc_cnt = min(pm->dqm->processes_count, + kfd->max_proc_per_quantum); + packet = (struct pm4_mes_runlist *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_runlist)); @@ -130,6 +156,7 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, packet->bitfields4.chain = chain ? 1 : 0; packet->bitfields4.offload_polling = 0; packet->bitfields4.valid = 1; + packet->bitfields4.process_cnt = concurrent_proc_cnt; packet->ordinal2 = lower_32_bits(ib); packet->bitfields3.ib_base_hi = upper_32_bits(ib); @@ -251,6 +278,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return retval; *rl_size_bytes = alloc_size_bytes; + pm->ib_size_bytes = alloc_size_bytes; pr_debug("Building runlist ib process count: %d queues count %d\n", pm->dqm->processes_count, pm->dqm->queue_count); @@ -564,3 +592,26 @@ void pm_release_ib(struct packet_manager *pm) } mutex_unlock(&pm->lock); } + +#if defined(CONFIG_DEBUG_FS) + +int pm_debugfs_runlist(struct seq_file *m, void *data) +{ + struct packet_manager *pm = data; + + mutex_lock(&pm->lock); + + if (!pm->allocated) { + seq_puts(m, " No active runlist\n"); + goto out; + } + + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false); + +out: + mutex_unlock(&pm->lock); + return 0; +} + +#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c index d6a796144269..15fff4420e53 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c @@ -59,7 +59,7 @@ unsigned int kfd_pasid_alloc(void) struct kfd_dev *dev = NULL; unsigned int i = 0; - while ((dev = kfd_topology_enum_kfd_devices(i)) != NULL) { + while ((kfd_topology_enum_kfd_devices(i, &dev)) == 0) { if (dev && dev->kfd2kgd) { kfd2kgd = dev->kfd2kgd; break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 9e4134c5b481..6a48d29ada47 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -33,6 +33,8 @@ #include <linux/kfd_ioctl.h> #include <linux/idr.h> #include <linux/kfifo.h> +#include <linux/seq_file.h> +#include <linux/kref.h> #include <kgd_kfd_interface.h> #include "amd_shared.h" @@ -41,6 +43,7 @@ #define KFD_MMAP_DOORBELL_MASK 0x8000000000000 #define KFD_MMAP_EVENTS_MASK 0x4000000000000 +#define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000 /* * When working with cp scheduler we should assign the HIQ manually or via @@ -63,6 +66,15 @@ #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 /* + * Size of the per-process TBA+TMA buffer: 2 pages + * + * The first page is the TBA used for the CWSR ISA code. The second + * page is used as TMA for daisy changing a user-mode trap handler. + */ +#define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2) +#define KFD_CWSR_TMA_OFFSET PAGE_SIZE + +/* * Kernel module parameter to specify maximum number of supported queues per * device */ @@ -79,11 +91,25 @@ extern int max_num_of_queues_per_device; extern int sched_policy; /* + * Kernel module parameter to specify the maximum process + * number per HW scheduler + */ +extern int hws_max_conc_proc; + +extern int cwsr_enable; + +/* * Kernel module parameter to specify whether to send sigterm to HSA process on * unhandled exception */ extern int send_sigterm; +/* + * Ignore CRAT table during KFD initialization, can be used to work around + * broken CRAT tables on some AMD systems + */ +extern int ignore_crat; + /** * enum kfd_sched_policy * @@ -131,6 +157,7 @@ struct kfd_device_info { size_t ih_ring_entry_size; uint8_t num_of_watch_points; uint16_t mqd_size_aligned; + bool supports_cwsr; }; struct kfd_mem_obj { @@ -200,6 +227,14 @@ struct kfd_dev { /* Debug manager */ struct kfd_dbgmgr *dbgmgr; + + /* Maximum process number mapped to HW scheduler */ + unsigned int max_proc_per_quantum; + + /* CWSR */ + bool cwsr_enabled; + const void *cwsr_isa; + unsigned int cwsr_isa_size; }; /* KGD2KFD callbacks */ @@ -332,6 +367,9 @@ struct queue_properties { uint32_t eop_ring_buffer_size; uint64_t ctx_save_restore_area_address; uint32_t ctx_save_restore_area_size; + uint32_t ctl_stack_size; + uint64_t tba_addr; + uint64_t tma_addr; }; /** @@ -439,6 +477,11 @@ struct qcm_process_device { uint32_t num_gws; uint32_t num_oac; uint32_t sh_hidden_private_base; + + /* CWSR memory */ + void *cwsr_kaddr; + uint64_t tba_addr; + uint64_t tma_addr; }; @@ -501,6 +544,9 @@ struct kfd_process { */ void *mm; + struct kref ref; + struct work_struct release_work; + struct mutex mutex; /* @@ -563,9 +609,10 @@ struct amdkfd_ioctl_desc { void kfd_process_create_wq(void); void kfd_process_destroy_wq(void); -struct kfd_process *kfd_create_process(const struct task_struct *); +struct kfd_process *kfd_create_process(struct file *filep); struct kfd_process *kfd_get_process(const struct task_struct *); struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); +void kfd_unref_process(struct kfd_process *p); struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, struct kfd_process *p); @@ -577,6 +624,9 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, struct kfd_process *p); +int kfd_reserved_mem_mmap(struct kfd_process *process, + struct vm_area_struct *vma); + /* Process device data iterator */ struct kfd_process_device *kfd_get_first_process_device_data( struct kfd_process *p); @@ -624,9 +674,12 @@ int kfd_topology_init(void); void kfd_topology_shutdown(void); int kfd_topology_add_device(struct kfd_dev *gpu); int kfd_topology_remove_device(struct kfd_dev *gpu); +struct kfd_topology_device *kfd_topology_device_by_proximity_domain( + uint32_t proximity_domain); struct kfd_dev *kfd_device_by_id(uint32_t gpu_id); struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); -struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx); +int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev); +int kfd_numa_node_to_apic_id(int numa_node_id); /* Interrupts */ int kfd_interrupt_init(struct kfd_dev *dev); @@ -643,8 +696,6 @@ int kgd2kfd_resume(struct kfd_dev *kfd); int kfd_init_apertures(struct kfd_process *process); /* Queue Context Management */ -struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd); - int init_queue(struct queue **q, const struct queue_properties *properties); void uninit_queue(struct queue *q); void print_queue_properties(struct queue_properties *q); @@ -699,6 +750,7 @@ struct packet_manager { struct mutex lock; bool allocated; struct kfd_mem_obj *ib_buffer_obj; + unsigned int ib_size_bytes; }; int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm); @@ -745,4 +797,23 @@ int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); +/* Debugfs */ +#if defined(CONFIG_DEBUG_FS) + +void kfd_debugfs_init(void); +void kfd_debugfs_fini(void); +int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data); +int pqm_debugfs_mqds(struct seq_file *m, void *data); +int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data); +int dqm_debugfs_hqds(struct seq_file *m, void *data); +int kfd_debugfs_rls_by_device(struct seq_file *m, void *data); +int pm_debugfs_runlist(struct seq_file *m, void *data); + +#else + +static inline void kfd_debugfs_init(void) {} +static inline void kfd_debugfs_fini(void) {} + +#endif + #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 1f5ccd28bd41..a22fb0710f15 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -24,10 +24,12 @@ #include <linux/log2.h> #include <linux/sched.h> #include <linux/sched/mm.h> +#include <linux/sched/task.h> #include <linux/slab.h> #include <linux/amd-iommu.h> #include <linux/notifier.h> #include <linux/compat.h> +#include <linux/mman.h> struct mm_struct; @@ -46,13 +48,12 @@ DEFINE_STATIC_SRCU(kfd_processes_srcu); static struct workqueue_struct *kfd_process_wq; -struct kfd_process_release_work { - struct work_struct kfd_work; - struct kfd_process *p; -}; - static struct kfd_process *find_process(const struct task_struct *thread); -static struct kfd_process *create_process(const struct task_struct *thread); +static void kfd_process_ref_release(struct kref *ref); +static struct kfd_process *create_process(const struct task_struct *thread, + struct file *filep); +static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep); + void kfd_process_create_wq(void) { @@ -68,9 +69,10 @@ void kfd_process_destroy_wq(void) } } -struct kfd_process *kfd_create_process(const struct task_struct *thread) +struct kfd_process *kfd_create_process(struct file *filep) { struct kfd_process *process; + struct task_struct *thread = current; if (!thread->mm) return ERR_PTR(-EINVAL); @@ -79,9 +81,6 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread) if (thread->group_leader->mm != thread->mm) return ERR_PTR(-EINVAL); - /* Take mmap_sem because we call __mmu_notifier_register inside */ - down_write(&thread->mm->mmap_sem); - /* * take kfd processes mutex before starting of process creation * so there won't be a case where two threads of the same process @@ -93,14 +92,11 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread) process = find_process(thread); if (process) pr_debug("Process already found\n"); - - if (!process) - process = create_process(thread); + else + process = create_process(thread, filep); mutex_unlock(&kfd_processes_mutex); - up_write(&thread->mm->mmap_sem); - return process; } @@ -144,63 +140,75 @@ static struct kfd_process *find_process(const struct task_struct *thread) return p; } -static void kfd_process_wq_release(struct work_struct *work) +void kfd_unref_process(struct kfd_process *p) +{ + kref_put(&p->ref, kfd_process_ref_release); +} + +static void kfd_process_destroy_pdds(struct kfd_process *p) { - struct kfd_process_release_work *my_work; struct kfd_process_device *pdd, *temp; - struct kfd_process *p; - my_work = (struct kfd_process_release_work *) work; + list_for_each_entry_safe(pdd, temp, &p->per_device_data, + per_device_list) { + pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n", + pdd->dev->id, p->pasid); - p = my_work->p; + list_del(&pdd->per_device_list); - pr_debug("Releasing process (pasid %d) in workqueue\n", - p->pasid); + if (pdd->qpd.cwsr_kaddr) + free_pages((unsigned long)pdd->qpd.cwsr_kaddr, + get_order(KFD_CWSR_TBA_TMA_SIZE)); - mutex_lock(&p->mutex); + kfree(pdd); + } +} - list_for_each_entry_safe(pdd, temp, &p->per_device_data, - per_device_list) { - pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n", - pdd->dev->id, p->pasid); +/* No process locking is needed in this function, because the process + * is not findable any more. We must assume that no other thread is + * using it any more, otherwise we couldn't safely free the process + * structure in the end. + */ +static void kfd_process_wq_release(struct work_struct *work) +{ + struct kfd_process *p = container_of(work, struct kfd_process, + release_work); + struct kfd_process_device *pdd; + + pr_debug("Releasing process (pasid %d) in workqueue\n", p->pasid); + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { if (pdd->bound == PDD_BOUND) amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); - - list_del(&pdd->per_device_list); - kfree(pdd); } + kfd_process_destroy_pdds(p); + kfd_event_free_process(p); kfd_pasid_free(p->pasid); kfd_free_process_doorbells(p); - mutex_unlock(&p->mutex); - mutex_destroy(&p->mutex); - kfree(p); + put_task_struct(p->lead_thread); - kfree(work); + kfree(p); } -static void kfd_process_destroy_delayed(struct rcu_head *rcu) +static void kfd_process_ref_release(struct kref *ref) { - struct kfd_process_release_work *work; - struct kfd_process *p; + struct kfd_process *p = container_of(ref, struct kfd_process, ref); - p = container_of(rcu, struct kfd_process, rcu); - - mmdrop(p->mm); + INIT_WORK(&p->release_work, kfd_process_wq_release); + queue_work(kfd_process_wq, &p->release_work); +} - work = kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC); +static void kfd_process_destroy_delayed(struct rcu_head *rcu) +{ + struct kfd_process *p = container_of(rcu, struct kfd_process, rcu); - if (work) { - INIT_WORK((struct work_struct *) work, kfd_process_wq_release); - work->p = p; - queue_work(kfd_process_wq, (struct work_struct *) work); - } + kfd_unref_process(p); } static void kfd_process_notifier_release(struct mmu_notifier *mn, @@ -244,15 +252,12 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, kfd_process_dequeue_from_all_devices(p); pqm_uninit(&p->pqm); + /* Indicate to other users that MM is no longer valid */ + p->mm = NULL; + mutex_unlock(&p->mutex); - /* - * Because we drop mm_count inside kfd_process_destroy_delayed - * and because the mmu_notifier_unregister function also drop - * mm_count we need to take an extra count here. - */ - mmgrab(p->mm); - mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm); + mmu_notifier_unregister_no_release(&p->mmu_notifier, mm); mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed); } @@ -260,7 +265,44 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { .release = kfd_process_notifier_release, }; -static struct kfd_process *create_process(const struct task_struct *thread) +static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep) +{ + unsigned long offset; + struct kfd_process_device *pdd = NULL; + struct kfd_dev *dev = NULL; + struct qcm_process_device *qpd = NULL; + + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + dev = pdd->dev; + qpd = &pdd->qpd; + if (!dev->cwsr_enabled || qpd->cwsr_kaddr) + continue; + offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT; + qpd->tba_addr = (int64_t)vm_mmap(filep, 0, + KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, + MAP_SHARED, offset); + + if (IS_ERR_VALUE(qpd->tba_addr)) { + int err = qpd->tba_addr; + + pr_err("Failure to set tba address. error %d.\n", err); + qpd->tba_addr = 0; + qpd->cwsr_kaddr = NULL; + return err; + } + + memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); + + qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; + pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", + qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); + } + + return 0; +} + +static struct kfd_process *create_process(const struct task_struct *thread, + struct file *filep) { struct kfd_process *process; int err = -ENOMEM; @@ -277,13 +319,15 @@ static struct kfd_process *create_process(const struct task_struct *thread) if (kfd_alloc_process_doorbells(process) < 0) goto err_alloc_doorbells; + kref_init(&process->ref); + mutex_init(&process->mutex); process->mm = thread->mm; /* register notifier */ process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; - err = __mmu_notifier_register(&process->mmu_notifier, process->mm); + err = mmu_notifier_register(&process->mmu_notifier, process->mm); if (err) goto err_mmu_notifier; @@ -291,6 +335,7 @@ static struct kfd_process *create_process(const struct task_struct *thread) (uintptr_t)process->mm); process->lead_thread = thread->group_leader; + get_task_struct(process->lead_thread); INIT_LIST_HEAD(&process->per_device_data); @@ -306,8 +351,14 @@ static struct kfd_process *create_process(const struct task_struct *thread) if (err != 0) goto err_init_apertures; + err = kfd_process_init_cwsr(process, filep); + if (err) + goto err_init_cwsr; + return process; +err_init_cwsr: + kfd_process_destroy_pdds(process); err_init_apertures: pqm_uninit(&process->pqm); err_process_pqm_init: @@ -343,16 +394,18 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, struct kfd_process_device *pdd = NULL; pdd = kzalloc(sizeof(*pdd), GFP_KERNEL); - if (pdd != NULL) { - pdd->dev = dev; - INIT_LIST_HEAD(&pdd->qpd.queues_list); - INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); - pdd->qpd.dqm = dev->dqm; - pdd->process = p; - pdd->bound = PDD_UNBOUND; - pdd->already_dequeued = false; - list_add(&pdd->per_device_list, &p->per_device_data); - } + if (!pdd) + return NULL; + + pdd->dev = dev; + INIT_LIST_HEAD(&pdd->qpd.queues_list); + INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); + pdd->qpd.dqm = dev->dqm; + pdd->qpd.pqm = &p->pqm; + pdd->process = p; + pdd->bound = PDD_UNBOUND; + pdd->already_dequeued = false; + list_add(&pdd->per_device_list, &p->per_device_data); return pdd; } @@ -483,6 +536,8 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) mutex_unlock(kfd_get_dbgmgr_mutex()); + mutex_lock(&p->mutex); + pdd = kfd_get_process_device_data(dev, p); if (pdd) /* For GPU relying on IOMMU, we need to dequeue here @@ -491,6 +546,8 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) kfd_process_dequeue_from_device(pdd); mutex_unlock(&p->mutex); + + kfd_unref_process(p); } struct kfd_process_device *kfd_get_first_process_device_data( @@ -515,22 +572,86 @@ bool kfd_has_process_device_data(struct kfd_process *p) return !(list_empty(&p->per_device_data)); } -/* This returns with process->mutex locked. */ +/* This increments the process->ref counter. */ struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) { - struct kfd_process *p; + struct kfd_process *p, *ret_p = NULL; unsigned int temp; int idx = srcu_read_lock(&kfd_processes_srcu); hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { if (p->pasid == pasid) { - mutex_lock(&p->mutex); + kref_get(&p->ref); + ret_p = p; break; } } srcu_read_unlock(&kfd_processes_srcu, idx); - return p; + return ret_p; } + +int kfd_reserved_mem_mmap(struct kfd_process *process, + struct vm_area_struct *vma) +{ + struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff); + struct kfd_process_device *pdd; + struct qcm_process_device *qpd; + + if (!dev) + return -EINVAL; + if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) { + pr_err("Incorrect CWSR mapping size.\n"); + return -EINVAL; + } + + pdd = kfd_get_process_device_data(dev, process); + if (!pdd) + return -EINVAL; + qpd = &pdd->qpd; + + qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(KFD_CWSR_TBA_TMA_SIZE)); + if (!qpd->cwsr_kaddr) { + pr_err("Error allocating per process CWSR buffer.\n"); + return -ENOMEM; + } + + vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND + | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP; + /* Mapping pages to user process */ + return remap_pfn_range(vma, vma->vm_start, + PFN_DOWN(__pa(qpd->cwsr_kaddr)), + KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); +} + +#if defined(CONFIG_DEBUG_FS) + +int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data) +{ + struct kfd_process *p; + unsigned int temp; + int r = 0; + + int idx = srcu_read_lock(&kfd_processes_srcu); + + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + seq_printf(m, "Process %d PASID %d:\n", + p->lead_thread->tgid, p->pasid); + + mutex_lock(&p->mutex); + r = pqm_debugfs_mqds(m, &p->pqm); + mutex_unlock(&p->mutex); + + if (r) + break; + } + + srcu_read_unlock(&kfd_processes_srcu, idx); + + return r; +} + +#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index a3f1e62c60ba..876380632668 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -178,10 +178,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, return retval; if (list_empty(&pdd->qpd.queues_list) && - list_empty(&pdd->qpd.priv_queue_list)) { - pdd->qpd.pqm = pqm; + list_empty(&pdd->qpd.priv_queue_list)) dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); - } pqn = kzalloc(sizeof(*pqn), GFP_KERNEL); if (!pqn) { @@ -203,8 +201,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; pqn->q = q; pqn->kq = NULL; - retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, - &q->properties.vmid); + retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd); pr_debug("DQM returned %d for create_queue\n", retval); print_queue(q); break; @@ -224,8 +221,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; pqn->q = q; pqn->kq = NULL; - retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, - &q->properties.vmid); + retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd); pr_debug("DQM returned %d for create_queue\n", retval); print_queue(q); break; @@ -315,6 +311,10 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (pqn->q) { dqm = pqn->q->device->dqm; retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); + if (retval) { + pr_debug("Destroy queue failed, returned %d\n", retval); + goto err_destroy_queue; + } uninit_queue(pqn->q); } @@ -326,6 +326,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) list_empty(&pdd->qpd.priv_queue_list)) dqm->ops.unregister_process(dqm, &pdd->qpd); +err_destroy_queue: return retval; } @@ -367,4 +368,67 @@ struct kernel_queue *pqm_get_kernel_queue( return NULL; } +#if defined(CONFIG_DEBUG_FS) + +int pqm_debugfs_mqds(struct seq_file *m, void *data) +{ + struct process_queue_manager *pqm = data; + struct process_queue_node *pqn; + struct queue *q; + enum KFD_MQD_TYPE mqd_type; + struct mqd_manager *mqd_manager; + int r = 0; + + list_for_each_entry(pqn, &pqm->queues, process_queue_list) { + if (pqn->q) { + q = pqn->q; + switch (q->properties.type) { + case KFD_QUEUE_TYPE_SDMA: + seq_printf(m, " SDMA queue on device %x\n", + q->device->id); + mqd_type = KFD_MQD_TYPE_SDMA; + break; + case KFD_QUEUE_TYPE_COMPUTE: + seq_printf(m, " Compute queue on device %x\n", + q->device->id); + mqd_type = KFD_MQD_TYPE_CP; + break; + default: + seq_printf(m, + " Bad user queue type %d on device %x\n", + q->properties.type, q->device->id); + continue; + } + mqd_manager = q->device->dqm->ops.get_mqd_manager( + q->device->dqm, mqd_type); + } else if (pqn->kq) { + q = pqn->kq->queue; + mqd_manager = pqn->kq->mqd; + switch (q->properties.type) { + case KFD_QUEUE_TYPE_DIQ: + seq_printf(m, " DIQ on device %x\n", + pqn->kq->dev->id); + mqd_type = KFD_MQD_TYPE_HIQ; + break; + default: + seq_printf(m, + " Bad kernel queue type %d on device %x\n", + q->properties.type, + pqn->kq->dev->id); + continue; + } + } else { + seq_printf(m, + " Weird: Queue node with neither kernel nor user queue\n"); + continue; + } + + r = mqd_manager->debugfs_show_mqd(m, q->mqd); + if (r != 0) + break; + } + + return r; +} +#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 19ce59028d6b..c6a76090a725 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -28,27 +28,32 @@ #include <linux/hash.h> #include <linux/cpufreq.h> #include <linux/log2.h> +#include <linux/dmi.h> +#include <linux/atomic.h> #include "kfd_priv.h" #include "kfd_crat.h" #include "kfd_topology.h" +#include "kfd_device_queue_manager.h" +/* topology_device_list - Master list of all topology devices */ static struct list_head topology_device_list; -static int topology_crat_parsed; static struct kfd_system_properties sys_props; static DECLARE_RWSEM(topology_lock); +static atomic_t topology_crat_proximity_domain; -struct kfd_dev *kfd_device_by_id(uint32_t gpu_id) +struct kfd_topology_device *kfd_topology_device_by_proximity_domain( + uint32_t proximity_domain) { struct kfd_topology_device *top_dev; - struct kfd_dev *device = NULL; + struct kfd_topology_device *device = NULL; down_read(&topology_lock); list_for_each_entry(top_dev, &topology_device_list, list) - if (top_dev->gpu_id == gpu_id) { - device = top_dev->gpu; + if (top_dev->proximity_domain == proximity_domain) { + device = top_dev; break; } @@ -57,7 +62,7 @@ struct kfd_dev *kfd_device_by_id(uint32_t gpu_id) return device; } -struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) +struct kfd_dev *kfd_device_by_id(uint32_t gpu_id) { struct kfd_topology_device *top_dev; struct kfd_dev *device = NULL; @@ -65,7 +70,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) down_read(&topology_lock); list_for_each_entry(top_dev, &topology_device_list, list) - if (top_dev->gpu->pdev == pdev) { + if (top_dev->gpu_id == gpu_id) { device = top_dev->gpu; break; } @@ -75,282 +80,31 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) return device; } -static int kfd_topology_get_crat_acpi(void *crat_image, size_t *size) -{ - struct acpi_table_header *crat_table; - acpi_status status; - - if (!size) - return -EINVAL; - - /* - * Fetch the CRAT table from ACPI - */ - status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table); - if (status == AE_NOT_FOUND) { - pr_warn("CRAT table not found\n"); - return -ENODATA; - } else if (ACPI_FAILURE(status)) { - const char *err = acpi_format_exception(status); - - pr_err("CRAT table error: %s\n", err); - return -EINVAL; - } - - if (*size >= crat_table->length && crat_image != NULL) - memcpy(crat_image, crat_table, crat_table->length); - - *size = crat_table->length; - - return 0; -} - -static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, - struct crat_subtype_computeunit *cu) -{ - dev->node_props.cpu_cores_count = cu->num_cpu_cores; - dev->node_props.cpu_core_id_base = cu->processor_id_low; - if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT) - dev->node_props.capability |= HSA_CAP_ATS_PRESENT; - - pr_info("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores, - cu->processor_id_low); -} - -static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, - struct crat_subtype_computeunit *cu) -{ - dev->node_props.simd_id_base = cu->processor_id_low; - dev->node_props.simd_count = cu->num_simd_cores; - dev->node_props.lds_size_in_kb = cu->lds_size_in_kb; - dev->node_props.max_waves_per_simd = cu->max_waves_simd; - dev->node_props.wave_front_size = cu->wave_front_size; - dev->node_props.mem_banks_count = cu->num_banks; - dev->node_props.array_count = cu->num_arrays; - dev->node_props.cu_per_simd_array = cu->num_cu_per_array; - dev->node_props.simd_per_cu = cu->num_simd_per_cu; - dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu; - if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE) - dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE; - pr_info("CU GPU: simds=%d id_base=%d\n", cu->num_simd_cores, - cu->processor_id_low); -} - -/* kfd_parse_subtype_cu is called when the topology mutex is already acquired */ -static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu) -{ - struct kfd_topology_device *dev; - int i = 0; - - pr_info("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n", - cu->proximity_domain, cu->hsa_capability); - list_for_each_entry(dev, &topology_device_list, list) { - if (cu->proximity_domain == i) { - if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT) - kfd_populated_cu_info_cpu(dev, cu); - - if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT) - kfd_populated_cu_info_gpu(dev, cu); - break; - } - i++; - } - - return 0; -} - -/* - * kfd_parse_subtype_mem is called when the topology mutex is - * already acquired - */ -static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem) -{ - struct kfd_mem_properties *props; - struct kfd_topology_device *dev; - int i = 0; - - pr_info("Found memory entry in CRAT table with proximity_domain=%d\n", - mem->promixity_domain); - list_for_each_entry(dev, &topology_device_list, list) { - if (mem->promixity_domain == i) { - props = kfd_alloc_struct(props); - if (props == NULL) - return -ENOMEM; - - if (dev->node_props.cpu_cores_count == 0) - props->heap_type = HSA_MEM_HEAP_TYPE_FB_PRIVATE; - else - props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM; - - if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE) - props->flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE; - if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE) - props->flags |= HSA_MEM_FLAGS_NON_VOLATILE; - - props->size_in_bytes = - ((uint64_t)mem->length_high << 32) + - mem->length_low; - props->width = mem->width; - - dev->mem_bank_count++; - list_add_tail(&props->list, &dev->mem_props); - - break; - } - i++; - } - - return 0; -} - -/* - * kfd_parse_subtype_cache is called when the topology mutex - * is already acquired - */ -static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache) -{ - struct kfd_cache_properties *props; - struct kfd_topology_device *dev; - uint32_t id; - - id = cache->processor_id_low; - - pr_info("Found cache entry in CRAT table with processor_id=%d\n", id); - list_for_each_entry(dev, &topology_device_list, list) - if (id == dev->node_props.cpu_core_id_base || - id == dev->node_props.simd_id_base) { - props = kfd_alloc_struct(props); - if (props == NULL) - return -ENOMEM; - - props->processor_id_low = id; - props->cache_level = cache->cache_level; - props->cache_size = cache->cache_size; - props->cacheline_size = cache->cache_line_size; - props->cachelines_per_tag = cache->lines_per_tag; - props->cache_assoc = cache->associativity; - props->cache_latency = cache->cache_latency; - - if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE) - props->cache_type |= HSA_CACHE_TYPE_DATA; - if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE) - props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; - if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE) - props->cache_type |= HSA_CACHE_TYPE_CPU; - if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE) - props->cache_type |= HSA_CACHE_TYPE_HSACU; - - dev->cache_count++; - dev->node_props.caches_count++; - list_add_tail(&props->list, &dev->cache_props); - - break; - } - - return 0; -} - -/* - * kfd_parse_subtype_iolink is called when the topology mutex - * is already acquired - */ -static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink) +struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) { - struct kfd_iolink_properties *props; - struct kfd_topology_device *dev; - uint32_t i = 0; - uint32_t id_from; - uint32_t id_to; - - id_from = iolink->proximity_domain_from; - id_to = iolink->proximity_domain_to; + struct kfd_topology_device *top_dev; + struct kfd_dev *device = NULL; - pr_info("Found IO link entry in CRAT table with id_from=%d\n", id_from); - list_for_each_entry(dev, &topology_device_list, list) { - if (id_from == i) { - props = kfd_alloc_struct(props); - if (props == NULL) - return -ENOMEM; - - props->node_from = id_from; - props->node_to = id_to; - props->ver_maj = iolink->version_major; - props->ver_min = iolink->version_minor; - - /* - * weight factor (derived from CDIR), currently always 1 - */ - props->weight = 1; - - props->min_latency = iolink->minimum_latency; - props->max_latency = iolink->maximum_latency; - props->min_bandwidth = iolink->minimum_bandwidth_mbs; - props->max_bandwidth = iolink->maximum_bandwidth_mbs; - props->rec_transfer_size = - iolink->recommended_transfer_size; - - dev->io_link_count++; - dev->node_props.io_links_count++; - list_add_tail(&props->list, &dev->io_link_props); + down_read(&topology_lock); + list_for_each_entry(top_dev, &topology_device_list, list) + if (top_dev->gpu->pdev == pdev) { + device = top_dev->gpu; break; } - i++; - } - return 0; -} - -static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr) -{ - struct crat_subtype_computeunit *cu; - struct crat_subtype_memory *mem; - struct crat_subtype_cache *cache; - struct crat_subtype_iolink *iolink; - int ret = 0; - - switch (sub_type_hdr->type) { - case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY: - cu = (struct crat_subtype_computeunit *)sub_type_hdr; - ret = kfd_parse_subtype_cu(cu); - break; - case CRAT_SUBTYPE_MEMORY_AFFINITY: - mem = (struct crat_subtype_memory *)sub_type_hdr; - ret = kfd_parse_subtype_mem(mem); - break; - case CRAT_SUBTYPE_CACHE_AFFINITY: - cache = (struct crat_subtype_cache *)sub_type_hdr; - ret = kfd_parse_subtype_cache(cache); - break; - case CRAT_SUBTYPE_TLB_AFFINITY: - /* - * For now, nothing to do here - */ - pr_info("Found TLB entry in CRAT table (not processing)\n"); - break; - case CRAT_SUBTYPE_CCOMPUTE_AFFINITY: - /* - * For now, nothing to do here - */ - pr_info("Found CCOMPUTE entry in CRAT table (not processing)\n"); - break; - case CRAT_SUBTYPE_IOLINK_AFFINITY: - iolink = (struct crat_subtype_iolink *)sub_type_hdr; - ret = kfd_parse_subtype_iolink(iolink); - break; - default: - pr_warn("Unknown subtype (%d) in CRAT\n", - sub_type_hdr->type); - } + up_read(&topology_lock); - return ret; + return device; } +/* Called with write topology_lock acquired */ static void kfd_release_topology_device(struct kfd_topology_device *dev) { struct kfd_mem_properties *mem; struct kfd_cache_properties *cache; struct kfd_iolink_properties *iolink; + struct kfd_perf_properties *perf; list_del(&dev->list); @@ -375,25 +129,35 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev) kfree(iolink); } - kfree(dev); + while (dev->perf_props.next != &dev->perf_props) { + perf = container_of(dev->perf_props.next, + struct kfd_perf_properties, list); + list_del(&perf->list); + kfree(perf); + } - sys_props.num_devices--; + kfree(dev); } -static void kfd_release_live_view(void) +void kfd_release_topology_device_list(struct list_head *device_list) { struct kfd_topology_device *dev; - while (topology_device_list.next != &topology_device_list) { - dev = container_of(topology_device_list.next, - struct kfd_topology_device, list); + while (!list_empty(device_list)) { + dev = list_first_entry(device_list, + struct kfd_topology_device, list); kfd_release_topology_device(dev); + } } +static void kfd_release_live_view(void) +{ + kfd_release_topology_device_list(&topology_device_list); memset(&sys_props, 0, sizeof(sys_props)); } -static struct kfd_topology_device *kfd_create_topology_device(void) +struct kfd_topology_device *kfd_create_topology_device( + struct list_head *device_list) { struct kfd_topology_device *dev; @@ -406,65 +170,13 @@ static struct kfd_topology_device *kfd_create_topology_device(void) INIT_LIST_HEAD(&dev->mem_props); INIT_LIST_HEAD(&dev->cache_props); INIT_LIST_HEAD(&dev->io_link_props); + INIT_LIST_HEAD(&dev->perf_props); - list_add_tail(&dev->list, &topology_device_list); - sys_props.num_devices++; + list_add_tail(&dev->list, device_list); return dev; } -static int kfd_parse_crat_table(void *crat_image) -{ - struct kfd_topology_device *top_dev; - struct crat_subtype_generic *sub_type_hdr; - uint16_t node_id; - int ret; - struct crat_header *crat_table = (struct crat_header *)crat_image; - uint16_t num_nodes; - uint32_t image_len; - - if (!crat_image) - return -EINVAL; - - num_nodes = crat_table->num_domains; - image_len = crat_table->length; - - pr_info("Parsing CRAT table with %d nodes\n", num_nodes); - - for (node_id = 0; node_id < num_nodes; node_id++) { - top_dev = kfd_create_topology_device(); - if (!top_dev) { - kfd_release_live_view(); - return -ENOMEM; - } - } - - sys_props.platform_id = - (*((uint64_t *)crat_table->oem_id)) & CRAT_OEMID_64BIT_MASK; - sys_props.platform_oem = *((uint64_t *)crat_table->oem_table_id); - sys_props.platform_rev = crat_table->revision; - - sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); - while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) < - ((char *)crat_image) + image_len) { - if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) { - ret = kfd_parse_subtype(sub_type_hdr); - if (ret != 0) { - kfd_release_live_view(); - return ret; - } - } - - sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + - sub_type_hdr->length); - } - - sys_props.generation_count++; - topology_crat_parsed = 1; - - return 0; -} - #define sysfs_show_gen_prop(buffer, fmt, ...) \ snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__) @@ -501,11 +213,17 @@ static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr, return ret; } +static void kfd_topology_kobj_release(struct kobject *kobj) +{ + kfree(kobj); +} + static const struct sysfs_ops sysprops_ops = { .show = sysprops_show, }; static struct kobj_type sysprops_type = { + .release = kfd_topology_kobj_release, .sysfs_ops = &sysprops_ops, }; @@ -541,6 +259,7 @@ static const struct sysfs_ops iolink_ops = { }; static struct kobj_type iolink_type = { + .release = kfd_topology_kobj_release, .sysfs_ops = &iolink_ops, }; @@ -568,6 +287,7 @@ static const struct sysfs_ops mem_ops = { }; static struct kobj_type mem_type = { + .release = kfd_topology_kobj_release, .sysfs_ops = &mem_ops, }; @@ -575,7 +295,7 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, char *buffer) { ssize_t ret; - uint32_t i; + uint32_t i, j; struct kfd_cache_properties *cache; /* Making sure that the buffer is an empty string */ @@ -593,12 +313,18 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, sysfs_show_32bit_prop(buffer, "latency", cache->cache_latency); sysfs_show_32bit_prop(buffer, "type", cache->cache_type); snprintf(buffer, PAGE_SIZE, "%ssibling_map ", buffer); - for (i = 0; i < KFD_TOPOLOGY_CPU_SIBLINGS; i++) - ret = snprintf(buffer, PAGE_SIZE, "%s%d%s", - buffer, cache->sibling_map[i], - (i == KFD_TOPOLOGY_CPU_SIBLINGS-1) ? - "\n" : ","); - + for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++) + for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) { + /* Check each bit */ + if (cache->sibling_map[i] & (1 << j)) + ret = snprintf(buffer, PAGE_SIZE, + "%s%d%s", buffer, 1, ","); + else + ret = snprintf(buffer, PAGE_SIZE, + "%s%d%s", buffer, 0, ","); + } + /* Replace the last "," with end of line */ + *(buffer + strlen(buffer) - 1) = 0xA; return ret; } @@ -607,9 +333,43 @@ static const struct sysfs_ops cache_ops = { }; static struct kobj_type cache_type = { + .release = kfd_topology_kobj_release, .sysfs_ops = &cache_ops, }; +/****** Sysfs of Performance Counters ******/ + +struct kfd_perf_attr { + struct kobj_attribute attr; + uint32_t data; +}; + +static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs, + char *buf) +{ + struct kfd_perf_attr *attr; + + buf[0] = 0; + attr = container_of(attrs, struct kfd_perf_attr, attr); + if (!attr->data) /* invalid data for PMC */ + return 0; + else + return sysfs_show_32bit_val(buf, attr->data); +} + +#define KFD_PERF_DESC(_name, _data) \ +{ \ + .attr = __ATTR(_name, 0444, perf_show, NULL), \ + .data = _data, \ +} + +static struct kfd_perf_attr perf_attr_iommu[] = { + KFD_PERF_DESC(max_concurrent, 0), + KFD_PERF_DESC(num_counters, 0), + KFD_PERF_DESC(counter_ids, 0), +}; +/****************************************/ + static ssize_t node_show(struct kobject *kobj, struct attribute *attr, char *buffer) { @@ -646,18 +406,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.cpu_cores_count); sysfs_show_32bit_prop(buffer, "simd_count", dev->node_props.simd_count); - - if (dev->mem_bank_count < dev->node_props.mem_banks_count) { - pr_info_once("mem_banks_count truncated from %d to %d\n", - dev->node_props.mem_banks_count, - dev->mem_bank_count); - sysfs_show_32bit_prop(buffer, "mem_banks_count", - dev->mem_bank_count); - } else { - sysfs_show_32bit_prop(buffer, "mem_banks_count", - dev->node_props.mem_banks_count); - } - + sysfs_show_32bit_prop(buffer, "mem_banks_count", + dev->node_props.mem_banks_count); sysfs_show_32bit_prop(buffer, "caches_count", dev->node_props.caches_count); sysfs_show_32bit_prop(buffer, "io_links_count", @@ -705,9 +455,12 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, HSA_CAP_WATCH_POINTS_TOTALBITS_MASK); } + if (dev->gpu->device_info->asic_family == CHIP_TONGA) + dev->node_props.capability |= + HSA_CAP_AQL_QUEUE_DOUBLE_MAP; + sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute", - dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz( - dev->gpu->kgd)); + dev->node_props.max_engine_clk_fcompute); sysfs_show_64bit_prop(buffer, "local_mem_size", (unsigned long long int) 0); @@ -729,6 +482,7 @@ static const struct sysfs_ops node_ops = { }; static struct kobj_type node_type = { + .release = kfd_topology_kobj_release, .sysfs_ops = &node_ops, }; @@ -744,6 +498,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) struct kfd_iolink_properties *iolink; struct kfd_cache_properties *cache; struct kfd_mem_properties *mem; + struct kfd_perf_properties *perf; if (dev->kobj_iolink) { list_for_each_entry(iolink, &dev->io_link_props, list) @@ -780,6 +535,16 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) dev->kobj_mem = NULL; } + if (dev->kobj_perf) { + list_for_each_entry(perf, &dev->perf_props, list) { + kfree(perf->attr_group); + perf->attr_group = NULL; + } + kobject_del(dev->kobj_perf); + kobject_put(dev->kobj_perf); + dev->kobj_perf = NULL; + } + if (dev->kobj_node) { sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid); sysfs_remove_file(dev->kobj_node, &dev->attr_name); @@ -796,8 +561,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, struct kfd_iolink_properties *iolink; struct kfd_cache_properties *cache; struct kfd_mem_properties *mem; + struct kfd_perf_properties *perf; int ret; - uint32_t i; + uint32_t i, num_attrs; + struct attribute **attrs; if (WARN_ON(dev->kobj_node)) return -EEXIST; @@ -826,6 +593,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, if (!dev->kobj_iolink) return -ENOMEM; + dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node); + if (!dev->kobj_perf) + return -ENOMEM; + /* * Creating sysfs files for node properties */ @@ -903,11 +674,38 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, if (ret < 0) return ret; i++; -} + } + + /* All hardware blocks have the same number of attributes. */ + num_attrs = sizeof(perf_attr_iommu)/sizeof(struct kfd_perf_attr); + list_for_each_entry(perf, &dev->perf_props, list) { + perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr) + * num_attrs + sizeof(struct attribute_group), + GFP_KERNEL); + if (!perf->attr_group) + return -ENOMEM; + + attrs = (struct attribute **)(perf->attr_group + 1); + if (!strcmp(perf->block_name, "iommu")) { + /* Information of IOMMU's num_counters and counter_ids is shown + * under /sys/bus/event_source/devices/amd_iommu. We don't + * duplicate here. + */ + perf_attr_iommu[0].data = perf->max_concurrent; + for (i = 0; i < num_attrs; i++) + attrs[i] = &perf_attr_iommu[i].attr.attr; + } + perf->attr_group->name = perf->block_name; + perf->attr_group->attrs = attrs; + ret = sysfs_create_group(dev->kobj_perf, perf->attr_group); + if (ret < 0) + return ret; + } return 0; } +/* Called with write topology lock acquired */ static int kfd_build_sysfs_node_tree(void) { struct kfd_topology_device *dev; @@ -924,6 +722,7 @@ static int kfd_build_sysfs_node_tree(void) return 0; } +/* Called with write topology lock acquired */ static void kfd_remove_sysfs_node_tree(void) { struct kfd_topology_device *dev; @@ -995,75 +794,246 @@ static void kfd_topology_release_sysfs(void) } } +/* Called with write topology_lock acquired */ +static void kfd_topology_update_device_list(struct list_head *temp_list, + struct list_head *master_list) +{ + while (!list_empty(temp_list)) { + list_move_tail(temp_list->next, master_list); + sys_props.num_devices++; + } +} + +static void kfd_debug_print_topology(void) +{ + struct kfd_topology_device *dev; + + down_read(&topology_lock); + + dev = list_last_entry(&topology_device_list, + struct kfd_topology_device, list); + if (dev) { + if (dev->node_props.cpu_cores_count && + dev->node_props.simd_count) { + pr_info("Topology: Add APU node [0x%0x:0x%0x]\n", + dev->node_props.device_id, + dev->node_props.vendor_id); + } else if (dev->node_props.cpu_cores_count) + pr_info("Topology: Add CPU node\n"); + else if (dev->node_props.simd_count) + pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n", + dev->node_props.device_id, + dev->node_props.vendor_id); + } + up_read(&topology_lock); +} + +/* Helper function for intializing platform_xx members of + * kfd_system_properties. Uses OEM info from the last CPU/APU node. + */ +static void kfd_update_system_properties(void) +{ + struct kfd_topology_device *dev; + + down_read(&topology_lock); + dev = list_last_entry(&topology_device_list, + struct kfd_topology_device, list); + if (dev) { + sys_props.platform_id = + (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK; + sys_props.platform_oem = *((uint64_t *)dev->oem_table_id); + sys_props.platform_rev = dev->oem_revision; + } + up_read(&topology_lock); +} + +static void find_system_memory(const struct dmi_header *dm, + void *private) +{ + struct kfd_mem_properties *mem; + u16 mem_width, mem_clock; + struct kfd_topology_device *kdev = + (struct kfd_topology_device *)private; + const u8 *dmi_data = (const u8 *)(dm + 1); + + if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) { + mem_width = (u16)(*(const u16 *)(dmi_data + 0x6)); + mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11)); + list_for_each_entry(mem, &kdev->mem_props, list) { + if (mem_width != 0xFFFF && mem_width != 0) + mem->width = mem_width; + if (mem_clock != 0) + mem->mem_clk_max = mem_clock; + } + } +} + +/* + * Performance counters information is not part of CRAT but we would like to + * put them in the sysfs under topology directory for Thunk to get the data. + * This function is called before updating the sysfs. + */ +static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev) +{ + struct kfd_perf_properties *props; + + if (amd_iommu_pc_supported()) { + props = kfd_alloc_struct(props); + if (!props) + return -ENOMEM; + strcpy(props->block_name, "iommu"); + props->max_concurrent = amd_iommu_pc_get_max_banks(0) * + amd_iommu_pc_get_max_counters(0); /* assume one iommu */ + list_add_tail(&props->list, &kdev->perf_props); + } + + return 0; +} + +/* kfd_add_non_crat_information - Add information that is not currently + * defined in CRAT but is necessary for KFD topology + * @dev - topology device to which addition info is added + */ +static void kfd_add_non_crat_information(struct kfd_topology_device *kdev) +{ + /* Check if CPU only node. */ + if (!kdev->gpu) { + /* Add system memory information */ + dmi_walk(find_system_memory, kdev); + } + /* TODO: For GPU node, rearrange code from kfd_topology_add_device */ +} + +/* kfd_is_acpi_crat_invalid - CRAT from ACPI is valid only for AMD APU devices. + * Ignore CRAT for all other devices. AMD APU is identified if both CPU + * and GPU cores are present. + * @device_list - topology device list created by parsing ACPI CRAT table. + * @return - TRUE if invalid, FALSE is valid. + */ +static bool kfd_is_acpi_crat_invalid(struct list_head *device_list) +{ + struct kfd_topology_device *dev; + + list_for_each_entry(dev, device_list, list) { + if (dev->node_props.cpu_cores_count && + dev->node_props.simd_count) + return false; + } + pr_info("Ignoring ACPI CRAT on non-APU system\n"); + return true; +} + int kfd_topology_init(void) { void *crat_image = NULL; size_t image_size = 0; int ret; - - /* - * Initialize the head for the topology device list + struct list_head temp_topology_device_list; + int cpu_only_node = 0; + struct kfd_topology_device *kdev; + int proximity_domain; + + /* topology_device_list - Master list of all topology devices + * temp_topology_device_list - temporary list created while parsing CRAT + * or VCRAT. Once parsing is complete the contents of list is moved to + * topology_device_list */ + + /* Initialize the head for the both the lists */ INIT_LIST_HEAD(&topology_device_list); + INIT_LIST_HEAD(&temp_topology_device_list); init_rwsem(&topology_lock); - topology_crat_parsed = 0; memset(&sys_props, 0, sizeof(sys_props)); + /* Proximity domains in ACPI CRAT tables start counting at + * 0. The same should be true for virtual CRAT tables created + * at this stage. GPUs added later in kfd_topology_add_device + * use a counter. + */ + proximity_domain = 0; + /* - * Get the CRAT image from the ACPI + * Get the CRAT image from the ACPI. If ACPI doesn't have one + * or if ACPI CRAT is invalid create a virtual CRAT. + * NOTE: The current implementation expects all AMD APUs to have + * CRAT. If no CRAT is available, it is assumed to be a CPU */ - ret = kfd_topology_get_crat_acpi(crat_image, &image_size); - if (ret == 0 && image_size > 0) { - pr_info("Found CRAT image with size=%zd\n", image_size); - crat_image = kmalloc(image_size, GFP_KERNEL); - if (!crat_image) { - ret = -ENOMEM; - pr_err("No memory for allocating CRAT image\n"); - goto err; + ret = kfd_create_crat_image_acpi(&crat_image, &image_size); + if (!ret) { + ret = kfd_parse_crat_table(crat_image, + &temp_topology_device_list, + proximity_domain); + if (ret || + kfd_is_acpi_crat_invalid(&temp_topology_device_list)) { + kfd_release_topology_device_list( + &temp_topology_device_list); + kfd_destroy_crat_image(crat_image); + crat_image = NULL; } - ret = kfd_topology_get_crat_acpi(crat_image, &image_size); - - if (ret == 0) { - down_write(&topology_lock); - ret = kfd_parse_crat_table(crat_image); - if (ret == 0) - ret = kfd_topology_update_sysfs(); - up_write(&topology_lock); - } else { - pr_err("Couldn't get CRAT table size from ACPI\n"); + } + + if (!crat_image) { + ret = kfd_create_crat_image_virtual(&crat_image, &image_size, + COMPUTE_UNIT_CPU, NULL, + proximity_domain); + cpu_only_node = 1; + if (ret) { + pr_err("Error creating VCRAT table for CPU\n"); + return ret; } - kfree(crat_image); - } else if (ret == -ENODATA) { - ret = 0; - } else { - pr_err("Couldn't get CRAT table size from ACPI\n"); + + ret = kfd_parse_crat_table(crat_image, + &temp_topology_device_list, + proximity_domain); + if (ret) { + pr_err("Error parsing VCRAT table for CPU\n"); + goto err; + } + } + + kdev = list_first_entry(&temp_topology_device_list, + struct kfd_topology_device, list); + kfd_add_perf_to_topology(kdev); + + down_write(&topology_lock); + kfd_topology_update_device_list(&temp_topology_device_list, + &topology_device_list); + atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1); + ret = kfd_topology_update_sysfs(); + up_write(&topology_lock); + + if (!ret) { + sys_props.generation_count++; + kfd_update_system_properties(); + kfd_debug_print_topology(); + pr_info("Finished initializing topology\n"); + } else + pr_err("Failed to update topology in sysfs ret=%d\n", ret); + + /* For nodes with GPU, this information gets added + * when GPU is detected (kfd_topology_add_device). + */ + if (cpu_only_node) { + /* Add additional information to CPU only node created above */ + down_write(&topology_lock); + kdev = list_first_entry(&topology_device_list, + struct kfd_topology_device, list); + up_write(&topology_lock); + kfd_add_non_crat_information(kdev); } err: - pr_info("Finished initializing topology ret=%d\n", ret); + kfd_destroy_crat_image(crat_image); return ret; } void kfd_topology_shutdown(void) { + down_write(&topology_lock); kfd_topology_release_sysfs(); kfd_release_live_view(); -} - -static void kfd_debug_print_topology(void) -{ - struct kfd_topology_device *dev; - uint32_t i = 0; - - pr_info("DEBUG PRINT OF TOPOLOGY:"); - list_for_each_entry(dev, &topology_device_list, list) { - pr_info("Node: %d\n", i); - pr_info("\tGPU assigned: %s\n", (dev->gpu ? "yes" : "no")); - pr_info("\tCPU count: %d\n", dev->node_props.cpu_cores_count); - pr_info("\tSIMD count: %d", dev->node_props.simd_count); - i++; - } + up_write(&topology_lock); } static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) @@ -1072,11 +1042,15 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) uint32_t buf[7]; uint64_t local_mem_size; int i; + struct kfd_local_mem_info local_mem_info; if (!gpu) return 0; - local_mem_size = gpu->kfd2kgd->get_vmem_size(gpu->kgd); + gpu->kfd2kgd->get_local_mem_info(gpu->kgd, &local_mem_info); + + local_mem_size = local_mem_info.local_mem_size_private + + local_mem_info.local_mem_size_public; buf[0] = gpu->pdev->devfn; buf[1] = gpu->pdev->subsystem_vendor; @@ -1091,19 +1065,26 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) return hashout; } - +/* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If + * the GPU device is not already present in the topology device + * list then return NULL. This means a new topology device has to + * be created for this GPU. + * TODO: Rather than assiging @gpu to first topology device withtout + * gpu attached, it will better to have more stringent check. + */ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) { struct kfd_topology_device *dev; struct kfd_topology_device *out_dev = NULL; + down_write(&topology_lock); list_for_each_entry(dev, &topology_device_list, list) if (!dev->gpu && (dev->node_props.simd_count > 0)) { dev->gpu = gpu; out_dev = dev; break; } - + up_write(&topology_lock); return out_dev; } @@ -1115,84 +1096,196 @@ static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival) */ } +/* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info, + * patch this after CRAT parsing. + */ +static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev) +{ + struct kfd_mem_properties *mem; + struct kfd_local_mem_info local_mem_info; + + if (!dev) + return; + + /* Currently, amdgpu driver (amdgpu_mc) deals only with GPUs with + * single bank of VRAM local memory. + * for dGPUs - VCRAT reports only one bank of Local Memory + * for APUs - If CRAT from ACPI reports more than one bank, then + * all the banks will report the same mem_clk_max information + */ + dev->gpu->kfd2kgd->get_local_mem_info(dev->gpu->kgd, + &local_mem_info); + + list_for_each_entry(mem, &dev->mem_props, list) + mem->mem_clk_max = local_mem_info.mem_clk_max; +} + +static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) +{ + struct kfd_iolink_properties *link; + + if (!dev || !dev->gpu) + return; + + /* GPU only creates direck links so apply flags setting to all */ + if (dev->gpu->device_info->asic_family == CHIP_HAWAII) + list_for_each_entry(link, &dev->io_link_props, list) + link->flags = CRAT_IOLINK_FLAGS_ENABLED | + CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | + CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; +} + int kfd_topology_add_device(struct kfd_dev *gpu) { uint32_t gpu_id; struct kfd_topology_device *dev; - int res; + struct kfd_cu_info cu_info; + int res = 0; + struct list_head temp_topology_device_list; + void *crat_image = NULL; + size_t image_size = 0; + int proximity_domain; + + INIT_LIST_HEAD(&temp_topology_device_list); gpu_id = kfd_generate_gpu_id(gpu); pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); - down_write(&topology_lock); - /* - * Try to assign the GPU to existing topology device (generated from - * CRAT table + proximity_domain = atomic_inc_return(&topology_crat_proximity_domain); + + /* Check to see if this gpu device exists in the topology_device_list. + * If so, assign the gpu to that device, + * else create a Virtual CRAT for this gpu device and then parse that + * CRAT to create a new topology device. Once created assign the gpu to + * that topology device */ dev = kfd_assign_gpu(gpu); if (!dev) { - pr_info("GPU was not found in the current topology. Extending.\n"); - kfd_debug_print_topology(); - dev = kfd_create_topology_device(); - if (!dev) { - res = -ENOMEM; + res = kfd_create_crat_image_virtual(&crat_image, &image_size, + COMPUTE_UNIT_GPU, gpu, + proximity_domain); + if (res) { + pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n", + gpu_id); + return res; + } + res = kfd_parse_crat_table(crat_image, + &temp_topology_device_list, + proximity_domain); + if (res) { + pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n", + gpu_id); goto err; } - dev->gpu = gpu; - /* - * TODO: Make a call to retrieve topology information from the - * GPU vBIOS - */ + down_write(&topology_lock); + kfd_topology_update_device_list(&temp_topology_device_list, + &topology_device_list); /* Update the SYSFS tree, since we added another topology * device */ - if (kfd_topology_update_sysfs() < 0) - kfd_topology_release_sysfs(); - + res = kfd_topology_update_sysfs(); + up_write(&topology_lock); + + if (!res) + sys_props.generation_count++; + else + pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n", + gpu_id, res); + dev = kfd_assign_gpu(gpu); + if (WARN_ON(!dev)) { + res = -ENODEV; + goto err; + } } dev->gpu_id = gpu_id; gpu->id = gpu_id; + + /* TODO: Move the following lines to function + * kfd_add_non_crat_information + */ + + /* Fill-in additional information that is not available in CRAT but + * needed for the topology + */ + + dev->gpu->kfd2kgd->get_cu_info(dev->gpu->kgd, &cu_info); + dev->node_props.simd_arrays_per_engine = + cu_info.num_shader_arrays_per_engine; + dev->node_props.vendor_id = gpu->pdev->vendor; dev->node_props.device_id = gpu->pdev->device; - dev->node_props.location_id = (gpu->pdev->bus->number << 24) + - (gpu->pdev->devfn & 0xffffff); - /* - * TODO: Retrieve max engine clock values from KGD - */ + dev->node_props.location_id = PCI_DEVID(gpu->pdev->bus->number, + gpu->pdev->devfn); + dev->node_props.max_engine_clk_fcompute = + dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(dev->gpu->kgd); + dev->node_props.max_engine_clk_ccompute = + cpufreq_quick_get_max(0) / 1000; + + kfd_fill_mem_clk_max_info(dev); + kfd_fill_iolink_non_crat_info(dev); + + switch (dev->gpu->device_info->asic_family) { + case CHIP_KAVERI: + case CHIP_HAWAII: + case CHIP_TONGA: + dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 << + HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & + HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); + break; + case CHIP_CARRIZO: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + pr_debug("Adding doorbell packet type capability\n"); + dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 << + HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & + HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); + break; + default: + WARN(1, "Unexpected ASIC family %u", + dev->gpu->device_info->asic_family); + } + /* Fix errors in CZ CRAT. + * simd_count: Carrizo CRAT reports wrong simd_count, probably + * because it doesn't consider masked out CUs + * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd + * capability flag: Carrizo CRAT doesn't report IOMMU flags + */ if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) { - dev->node_props.capability |= HSA_CAP_DOORBELL_PACKET_TYPE; - pr_info("Adding doorbell packet type capability\n"); + dev->node_props.simd_count = + cu_info.simd_per_cu * cu_info.cu_active_number; + dev->node_props.max_waves_per_simd = 10; + dev->node_props.capability |= HSA_CAP_ATS_PRESENT; } - res = 0; - -err: - up_write(&topology_lock); + kfd_debug_print_topology(); - if (res == 0) + if (!res) kfd_notify_gpu_change(gpu_id, 1); - +err: + kfd_destroy_crat_image(crat_image); return res; } int kfd_topology_remove_device(struct kfd_dev *gpu) { - struct kfd_topology_device *dev; + struct kfd_topology_device *dev, *tmp; uint32_t gpu_id; int res = -ENODEV; down_write(&topology_lock); - list_for_each_entry(dev, &topology_device_list, list) + list_for_each_entry_safe(dev, tmp, &topology_device_list, list) if (dev->gpu == gpu) { gpu_id = dev->gpu_id; kfd_remove_sysfs_node_entry(dev); kfd_release_topology_device(dev); + sys_props.num_devices--; res = 0; if (kfd_topology_update_sysfs() < 0) kfd_topology_release_sysfs(); @@ -1201,28 +1294,32 @@ int kfd_topology_remove_device(struct kfd_dev *gpu) up_write(&topology_lock); - if (res == 0) + if (!res) kfd_notify_gpu_change(gpu_id, 0); return res; } -/* - * When idx is out of bounds, the function will return NULL +/* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD + * topology. If GPU device is found @idx, then valid kfd_dev pointer is + * returned through @kdev + * Return - 0: On success (@kdev will be NULL for non GPU nodes) + * -1: If end of list */ -struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx) +int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev) { struct kfd_topology_device *top_dev; - struct kfd_dev *device = NULL; uint8_t device_idx = 0; + *kdev = NULL; down_read(&topology_lock); list_for_each_entry(top_dev, &topology_device_list, list) { if (device_idx == idx) { - device = top_dev->gpu; - break; + *kdev = top_dev->gpu; + up_read(&topology_lock); + return 0; } device_idx++; @@ -1230,6 +1327,88 @@ struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx) up_read(&topology_lock); - return device; + return -1; + +} + +static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask) +{ + const struct cpuinfo_x86 *cpuinfo; + int first_cpu_of_numa_node; + + if (!cpumask || cpumask == cpu_none_mask) + return -1; + first_cpu_of_numa_node = cpumask_first(cpumask); + if (first_cpu_of_numa_node >= nr_cpu_ids) + return -1; + cpuinfo = &cpu_data(first_cpu_of_numa_node); + return cpuinfo->apicid; } + +/* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor + * of the given NUMA node (numa_node_id) + * Return -1 on failure + */ +int kfd_numa_node_to_apic_id(int numa_node_id) +{ + if (numa_node_id == -1) { + pr_warn("Invalid NUMA Node. Use online CPU mask\n"); + return kfd_cpumask_to_apic_id(cpu_online_mask); + } + return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id)); +} + +#if defined(CONFIG_DEBUG_FS) + +int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data) +{ + struct kfd_topology_device *dev; + unsigned int i = 0; + int r = 0; + + down_read(&topology_lock); + + list_for_each_entry(dev, &topology_device_list, list) { + if (!dev->gpu) { + i++; + continue; + } + + seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); + r = dqm_debugfs_hqds(m, dev->gpu->dqm); + if (r) + break; + } + + up_read(&topology_lock); + + return r; +} + +int kfd_debugfs_rls_by_device(struct seq_file *m, void *data) +{ + struct kfd_topology_device *dev; + unsigned int i = 0; + int r = 0; + + down_read(&topology_lock); + + list_for_each_entry(dev, &topology_device_list, list) { + if (!dev->gpu) { + i++; + continue; + } + + seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); + r = pm_debugfs_runlist(m, &dev->gpu->dqm->packets); + if (r) + break; + } + + up_read(&topology_lock); + + return r; +} + +#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index c3ddb9b95ff8..53fca1f45401 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -39,8 +39,13 @@ #define HSA_CAP_WATCH_POINTS_SUPPORTED 0x00000080 #define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00 #define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8 -#define HSA_CAP_RESERVED 0xfffff000 -#define HSA_CAP_DOORBELL_PACKET_TYPE 0x00001000 +#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK 0x00003000 +#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT 12 +#define HSA_CAP_RESERVED 0xffffc000 + +#define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0 +#define HSA_CAP_DOORBELL_TYPE_1_0 0x1 +#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000 struct kfd_node_properties { uint32_t cpu_cores_count; @@ -91,8 +96,6 @@ struct kfd_mem_properties { struct attribute attr; }; -#define KFD_TOPOLOGY_CPU_SIBLINGS 256 - #define HSA_CACHE_TYPE_DATA 0x00000001 #define HSA_CACHE_TYPE_INSTRUCTION 0x00000002 #define HSA_CACHE_TYPE_CPU 0x00000004 @@ -109,7 +112,7 @@ struct kfd_cache_properties { uint32_t cache_assoc; uint32_t cache_latency; uint32_t cache_type; - uint8_t sibling_map[KFD_TOPOLOGY_CPU_SIBLINGS]; + uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE]; struct kobject *kobj; struct attribute attr; }; @@ -132,24 +135,36 @@ struct kfd_iolink_properties { struct attribute attr; }; +struct kfd_perf_properties { + struct list_head list; + char block_name[16]; + uint32_t max_concurrent; + struct attribute_group *attr_group; +}; + struct kfd_topology_device { struct list_head list; uint32_t gpu_id; + uint32_t proximity_domain; struct kfd_node_properties node_props; - uint32_t mem_bank_count; struct list_head mem_props; uint32_t cache_count; struct list_head cache_props; uint32_t io_link_count; struct list_head io_link_props; + struct list_head perf_props; struct kfd_dev *gpu; struct kobject *kobj_node; struct kobject *kobj_mem; struct kobject *kobj_cache; struct kobject *kobj_iolink; + struct kobject *kobj_perf; struct attribute attr_gpuid; struct attribute attr_name; struct attribute attr_props; + uint8_t oem_id[CRAT_OEMID_LENGTH]; + uint8_t oem_table_id[CRAT_OEMTABLEID_LENGTH]; + uint32_t oem_revision; }; struct kfd_system_properties { @@ -164,6 +179,12 @@ struct kfd_system_properties { struct attribute attr_props; }; +struct kfd_topology_device *kfd_create_topology_device( + struct list_head *device_list); +void kfd_release_topology_device_list(struct list_head *device_list); +extern bool amd_iommu_pc_supported(void); +extern u8 amd_iommu_pc_get_max_banks(u16 devid); +extern u8 amd_iommu_pc_get_max_counters(u16 devid); #endif /* __KFD_TOPOLOGY_H__ */ diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index f516fd10e6ba..a6752bd0c871 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -46,6 +46,28 @@ enum kfd_preempt_type { KFD_PREEMPT_TYPE_WAVEFRONT_RESET, }; +struct kfd_cu_info { + uint32_t num_shader_engines; + uint32_t num_shader_arrays_per_engine; + uint32_t num_cu_per_sh; + uint32_t cu_active_number; + uint32_t cu_ao_mask; + uint32_t simd_per_cu; + uint32_t max_waves_per_simd; + uint32_t wave_front_size; + uint32_t max_scratch_slots_per_cu; + uint32_t lds_size; + uint32_t cu_bitmap[4][4]; +}; + +/* For getting GPU local memory information from KGD */ +struct kfd_local_mem_info { + uint64_t local_mem_size_private; + uint64_t local_mem_size_public; + uint32_t vram_width; + uint32_t mem_clk_max; +}; + enum kgd_memory_pool { KGD_POOL_SYSTEM_CACHEABLE = 1, KGD_POOL_SYSTEM_WRITECOMBINE = 2, @@ -106,7 +128,7 @@ struct tile_config { * * @free_gtt_mem: Frees a buffer that was allocated on the gart aperture * - * @get_vmem_size: Retrieves (physical) size of VRAM + * @get_local_mem_info: Retrieves information about GPU local memory * * @get_gpu_clock_counter: Retrieves GPU clock counter * @@ -131,6 +153,12 @@ struct tile_config { * @hqd_sdma_load: Loads the SDMA mqd structure to a H/W SDMA hqd slot. * used only for no HWS mode. * + * @hqd_dump: Dumps CPC HQD registers to an array of address-value pairs. + * Array is allocated with kmalloc, needs to be freed with kfree by caller. + * + * @hqd_sdma_dump: Dumps SDMA HQD registers to an array of address-value pairs. + * Array is allocated with kmalloc, needs to be freed with kfree by caller. + * * @hqd_is_occupies: Checks if a hqd slot is occupied. * * @hqd_destroy: Destructs and preempts the queue assigned to that hqd slot. @@ -147,6 +175,10 @@ struct tile_config { * * @get_tile_config: Returns GPU-specific tiling mode information * + * @get_cu_info: Retrieves activated cu info + * + * @get_vram_usage: Returns current VRAM usage + * * This structure contains function pointers to services that the kgd driver * provides to amdkfd driver. * @@ -158,7 +190,8 @@ struct kfd2kgd_calls { void (*free_gtt_mem)(struct kgd_dev *kgd, void *mem_obj); - uint64_t (*get_vmem_size)(struct kgd_dev *kgd); + void (*get_local_mem_info)(struct kgd_dev *kgd, + struct kfd_local_mem_info *mem_info); uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd); uint32_t (*get_max_engine_clock_in_mhz)(struct kgd_dev *kgd); @@ -184,7 +217,16 @@ struct kfd2kgd_calls { uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm); - int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd); + int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm); + + int (*hqd_dump)(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); + + int (*hqd_sdma_dump)(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); @@ -224,6 +266,10 @@ struct kfd2kgd_calls { void (*set_scratch_backing_va)(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); int (*get_tile_config)(struct kgd_dev *kgd, struct tile_config *config); + + void (*get_cu_info)(struct kgd_dev *kgd, + struct kfd_cu_info *cu_info); + uint64_t (*get_vram_usage)(struct kgd_dev *kgd); }; /** diff --git a/drivers/gpu/drm/amd/include/vi_structs.h b/drivers/gpu/drm/amd/include/vi_structs.h index 20234820194b..717fbae1d362 100644 --- a/drivers/gpu/drm/amd/include/vi_structs.h +++ b/drivers/gpu/drm/amd/include/vi_structs.h @@ -153,6 +153,8 @@ struct vi_sdma_mqd { uint32_t reserved_125; uint32_t reserved_126; uint32_t reserved_127; + uint32_t sdma_engine_id; + uint32_t sdma_queue_id; }; struct vi_mqd { diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c index 2e065facdce7..e2adfbef7d6b 100644 --- a/drivers/gpu/drm/armada/armada_crtc.c +++ b/drivers/gpu/drm/armada/armada_crtc.c @@ -13,6 +13,7 @@ #include <drm/drmP.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_plane_helper.h> +#include <drm/drm_atomic_helper.h> #include "armada_crtc.h" #include "armada_drm.h" #include "armada_fb.h" @@ -20,13 +21,6 @@ #include "armada_hw.h" #include "armada_trace.h" -struct armada_frame_work { - struct armada_plane_work work; - struct drm_pending_vblank_event *event; - struct armada_regs regs[4]; - struct drm_framebuffer *old_fb; -}; - enum csc_mode { CSC_AUTO = 0, CSC_YUV_CCIR601 = 1, @@ -168,16 +162,23 @@ static void armada_drm_crtc_update(struct armada_crtc *dcrtc) void armada_drm_plane_calc_addrs(u32 *addrs, struct drm_framebuffer *fb, int x, int y) { + const struct drm_format_info *format = fb->format; + unsigned int num_planes = format->num_planes; u32 addr = drm_fb_obj(fb)->dev_addr; - int num_planes = fb->format->num_planes; int i; if (num_planes > 3) num_planes = 3; - for (i = 0; i < num_planes; i++) + addrs[0] = addr + fb->offsets[0] + y * fb->pitches[0] + + x * format->cpp[0]; + + y /= format->vsub; + x /= format->hsub; + + for (i = 1; i < num_planes; i++) addrs[i] = addr + fb->offsets[i] + y * fb->pitches[i] + - x * fb->format->cpp[i]; + x * format->cpp[i]; for (; i < 3; i++) addrs[i] = 0; } @@ -209,6 +210,38 @@ static unsigned armada_drm_crtc_calc_fb(struct drm_framebuffer *fb, return i; } +static void armada_drm_plane_work_call(struct armada_crtc *dcrtc, + struct armada_plane_work *work, + void (*fn)(struct armada_crtc *, struct armada_plane_work *)) +{ + struct armada_plane *dplane = drm_to_armada_plane(work->plane); + struct drm_pending_vblank_event *event; + struct drm_framebuffer *fb; + + if (fn) + fn(dcrtc, work); + drm_crtc_vblank_put(&dcrtc->crtc); + + event = work->event; + fb = work->old_fb; + if (event || fb) { + struct drm_device *dev = dcrtc->crtc.dev; + unsigned long flags; + + spin_lock_irqsave(&dev->event_lock, flags); + if (event) + drm_crtc_send_vblank_event(&dcrtc->crtc, event); + if (fb) + __armada_drm_queue_unref_work(dev, fb); + spin_unlock_irqrestore(&dev->event_lock, flags); + } + + if (work->need_kfree) + kfree(work); + + wake_up(&dplane->frame_wait); +} + static void armada_drm_plane_work_run(struct armada_crtc *dcrtc, struct drm_plane *plane) { @@ -216,24 +249,19 @@ static void armada_drm_plane_work_run(struct armada_crtc *dcrtc, struct armada_plane_work *work = xchg(&dplane->work, NULL); /* Handle any pending frame work. */ - if (work) { - work->fn(dcrtc, dplane, work); - drm_crtc_vblank_put(&dcrtc->crtc); - } - - wake_up(&dplane->frame_wait); + if (work) + armada_drm_plane_work_call(dcrtc, work, work->fn); } int armada_drm_plane_work_queue(struct armada_crtc *dcrtc, - struct armada_plane *plane, struct armada_plane_work *work) + struct armada_plane_work *work) { + struct armada_plane *plane = drm_to_armada_plane(work->plane); int ret; ret = drm_crtc_vblank_get(&dcrtc->crtc); - if (ret) { - DRM_ERROR("failed to acquire vblank counter\n"); + if (ret) return ret; - } ret = cmpxchg(&plane->work, NULL, work) ? -EBUSY : 0; if (ret) @@ -247,51 +275,60 @@ int armada_drm_plane_work_wait(struct armada_plane *plane, long timeout) return wait_event_timeout(plane->frame_wait, !plane->work, timeout); } -struct armada_plane_work *armada_drm_plane_work_cancel( - struct armada_crtc *dcrtc, struct armada_plane *plane) +void armada_drm_plane_work_cancel(struct armada_crtc *dcrtc, + struct armada_plane *dplane) { - struct armada_plane_work *work = xchg(&plane->work, NULL); + struct armada_plane_work *work = xchg(&dplane->work, NULL); if (work) - drm_crtc_vblank_put(&dcrtc->crtc); - - return work; + armada_drm_plane_work_call(dcrtc, work, work->cancel); } -static int armada_drm_crtc_queue_frame_work(struct armada_crtc *dcrtc, - struct armada_frame_work *work) +static void armada_drm_crtc_complete_frame_work(struct armada_crtc *dcrtc, + struct armada_plane_work *work) { - struct armada_plane *plane = drm_to_armada_plane(dcrtc->crtc.primary); + unsigned long flags; - return armada_drm_plane_work_queue(dcrtc, plane, &work->work); + spin_lock_irqsave(&dcrtc->irq_lock, flags); + armada_drm_crtc_update_regs(dcrtc, work->regs); + spin_unlock_irqrestore(&dcrtc->irq_lock, flags); } -static void armada_drm_crtc_complete_frame_work(struct armada_crtc *dcrtc, - struct armada_plane *plane, struct armada_plane_work *work) +static void armada_drm_crtc_complete_disable_work(struct armada_crtc *dcrtc, + struct armada_plane_work *work) { - struct armada_frame_work *fwork = container_of(work, struct armada_frame_work, work); - struct drm_device *dev = dcrtc->crtc.dev; unsigned long flags; + if (dcrtc->plane == work->plane) + dcrtc->plane = NULL; + spin_lock_irqsave(&dcrtc->irq_lock, flags); - armada_drm_crtc_update_regs(dcrtc, fwork->regs); + armada_drm_crtc_update_regs(dcrtc, work->regs); spin_unlock_irqrestore(&dcrtc->irq_lock, flags); +} - if (fwork->event) { - spin_lock_irqsave(&dev->event_lock, flags); - drm_crtc_send_vblank_event(&dcrtc->crtc, fwork->event); - spin_unlock_irqrestore(&dev->event_lock, flags); - } +static struct armada_plane_work * +armada_drm_crtc_alloc_plane_work(struct drm_plane *plane) +{ + struct armada_plane_work *work; + int i = 0; + + work = kzalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return NULL; - /* Finally, queue the process-half of the cleanup. */ - __armada_drm_queue_unref_work(dcrtc->crtc.dev, fwork->old_fb); - kfree(fwork); + work->plane = plane; + work->fn = armada_drm_crtc_complete_frame_work; + work->need_kfree = true; + armada_reg_queue_end(work->regs, i); + + return work; } static void armada_drm_crtc_finish_fb(struct armada_crtc *dcrtc, struct drm_framebuffer *fb, bool force) { - struct armada_frame_work *work; + struct armada_plane_work *work; if (!fb) return; @@ -302,15 +339,11 @@ static void armada_drm_crtc_finish_fb(struct armada_crtc *dcrtc, return; } - work = kmalloc(sizeof(*work), GFP_KERNEL); + work = armada_drm_crtc_alloc_plane_work(dcrtc->crtc.primary); if (work) { - int i = 0; - work->work.fn = armada_drm_crtc_complete_frame_work; - work->event = NULL; work->old_fb = fb; - armada_reg_queue_end(work->regs, i); - if (armada_drm_crtc_queue_frame_work(dcrtc, work) == 0) + if (armada_drm_plane_work_queue(dcrtc, work) == 0) return; kfree(work); @@ -373,8 +406,11 @@ static void armada_drm_crtc_prepare(struct drm_crtc *crtc) * the new mode parameters. */ plane = dcrtc->plane; - if (plane) + if (plane) { drm_plane_force_disable(plane); + WARN_ON(!armada_drm_plane_work_wait(drm_to_armada_plane(plane), + HZ)); + } } /* The mode_config.mutex will be held for this call */ @@ -440,11 +476,11 @@ static void armada_drm_crtc_irq(struct armada_crtc *dcrtc, u32 stat) if (stat & VSYNC_IRQ) drm_crtc_handle_vblank(&dcrtc->crtc); - spin_lock(&dcrtc->irq_lock); ovl_plane = dcrtc->plane; if (ovl_plane) armada_drm_plane_work_run(dcrtc, ovl_plane); + spin_lock(&dcrtc->irq_lock); if (stat & GRA_FRAME_IRQ && dcrtc->interlaced) { int i = stat & GRA_FRAME_IRQ0 ? 0 : 1; uint32_t val; @@ -536,18 +572,14 @@ static uint32_t armada_drm_crtc_calculate_csc(struct armada_crtc *dcrtc) return val; } -static void armada_drm_primary_set(struct drm_crtc *crtc, - struct drm_plane *plane, int x, int y) +static void armada_drm_gra_plane_regs(struct armada_regs *regs, + struct drm_framebuffer *fb, struct armada_plane_state *state, + int x, int y, bool interlaced) { - struct armada_plane_state *state = &drm_to_armada_plane(plane)->state; - struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); - struct armada_regs regs[8]; - bool interlaced = dcrtc->interlaced; - unsigned i; + unsigned int i; u32 ctrl0; - i = armada_drm_crtc_calc_fb(plane->fb, x, y, regs, interlaced); - + i = armada_drm_crtc_calc_fb(fb, x, y, regs, interlaced); armada_reg_queue_set(regs, i, state->dst_yx, LCD_SPU_GRA_OVSA_HPXL_VLN); armada_reg_queue_set(regs, i, state->src_hw, LCD_SPU_GRA_HPXL_VLN); armada_reg_queue_set(regs, i, state->dst_hw, LCD_SPU_GZM_HPXL_VLN); @@ -559,9 +591,21 @@ static void armada_drm_primary_set(struct drm_crtc *crtc, armada_reg_queue_mod(regs, i, ctrl0, CFG_GRAFORMAT | CFG_GRA_MOD(CFG_SWAPRB | CFG_SWAPUV | CFG_SWAPYU | CFG_YUV2RGB) | - CFG_PALETTE_ENA | CFG_GRA_FTOGGLE, + CFG_PALETTE_ENA | CFG_GRA_FTOGGLE | + CFG_GRA_HSMOOTH | CFG_GRA_ENA, LCD_SPU_DMA_CTRL0); armada_reg_queue_end(regs, i); +} + +static void armada_drm_primary_set(struct drm_crtc *crtc, + struct drm_plane *plane, int x, int y) +{ + struct armada_plane_state *state = &drm_to_armada_plane(plane)->state; + struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); + struct armada_regs regs[8]; + bool interlaced = dcrtc->interlaced; + + armada_drm_gra_plane_regs(regs, plane->fb, state, x, y, interlaced); armada_drm_crtc_update_regs(dcrtc, regs); } @@ -581,7 +625,7 @@ static int armada_drm_crtc_mode_set(struct drm_crtc *crtc, interlaced = !!(adj->flags & DRM_MODE_FLAG_INTERLACE); - val = CFG_GRA_ENA | CFG_GRA_HSMOOTH; + val = CFG_GRA_ENA; val |= CFG_GRA_FMT(drm_fb_to_armada_fb(dcrtc->crtc.primary->fb)->fmt); val |= CFG_GRA_MOD(drm_fb_to_armada_fb(dcrtc->crtc.primary->fb)->mod); @@ -633,8 +677,6 @@ static int armada_drm_crtc_mode_set(struct drm_crtc *crtc, /* Now compute the divider for real */ dcrtc->variant->compute_clock(dcrtc, adj, &sclk); - /* Ensure graphic fifo is enabled */ - armada_reg_queue_mod(regs, i, 0, CFG_PDWN64x66, LCD_SPU_SRAM_PARA1); armada_reg_queue_set(regs, i, sclk, LCD_CFG_SCLK_DIV); if (interlaced ^ dcrtc->interlaced) { @@ -647,6 +689,9 @@ static int armada_drm_crtc_mode_set(struct drm_crtc *crtc, spin_lock_irqsave(&dcrtc->irq_lock, flags); + /* Ensure graphic fifo is enabled */ + armada_reg_queue_mod(regs, i, 0, CFG_PDWN64x66, LCD_SPU_SRAM_PARA1); + /* Even interlaced/progressive frame */ dcrtc->v[1].spu_v_h_total = adj->crtc_vtotal << 16 | adj->crtc_htotal; @@ -729,47 +774,13 @@ static int armada_drm_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y, return 0; } -void armada_drm_crtc_plane_disable(struct armada_crtc *dcrtc, - struct drm_plane *plane) -{ - u32 sram_para1, dma_ctrl0_mask; - - /* - * Drop our reference on any framebuffer attached to this plane. - * We don't need to NULL this out as drm_plane_force_disable(), - * and __setplane_internal() will do so for an overlay plane, and - * __drm_helper_disable_unused_functions() will do so for the - * primary plane. - */ - if (plane->fb) - drm_framebuffer_put(plane->fb); - - /* Power down the Y/U/V FIFOs */ - sram_para1 = CFG_PDWN16x66 | CFG_PDWN32x66; - - /* Power down most RAMs and FIFOs if this is the primary plane */ - if (plane->type == DRM_PLANE_TYPE_PRIMARY) { - sram_para1 |= CFG_PDWN256x32 | CFG_PDWN256x24 | CFG_PDWN256x8 | - CFG_PDWN32x32 | CFG_PDWN64x66; - dma_ctrl0_mask = CFG_GRA_ENA; - } else { - dma_ctrl0_mask = CFG_DMA_ENA; - } - - spin_lock_irq(&dcrtc->irq_lock); - armada_updatel(0, dma_ctrl0_mask, dcrtc->base + LCD_SPU_DMA_CTRL0); - spin_unlock_irq(&dcrtc->irq_lock); - - armada_updatel(sram_para1, 0, dcrtc->base + LCD_SPU_SRAM_PARA1); -} - /* The mode_config.mutex will be held for this call */ static void armada_drm_crtc_disable(struct drm_crtc *crtc) { - struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); - armada_drm_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); - armada_drm_crtc_plane_disable(dcrtc, crtc->primary); + + /* Disable our primary plane when we disable the CRTC. */ + crtc->primary->funcs->disable_plane(crtc->primary, NULL); } static const struct drm_crtc_helper_funcs armada_crtc_helper_funcs = { @@ -879,9 +890,11 @@ static int armada_drm_crtc_cursor_update(struct armada_crtc *dcrtc, bool reload) return 0; } + spin_lock_irq(&dcrtc->irq_lock); para1 = readl_relaxed(dcrtc->base + LCD_SPU_SRAM_PARA1); armada_updatel(CFG_CSB_256x32, CFG_CSB_256x32 | CFG_PDWN256x32, dcrtc->base + LCD_SPU_SRAM_PARA1); + spin_unlock_irq(&dcrtc->irq_lock); /* * Initialize the transparency if the SRAM was powered down. @@ -1021,7 +1034,7 @@ static int armada_drm_crtc_page_flip(struct drm_crtc *crtc, struct drm_modeset_acquire_ctx *ctx) { struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); - struct armada_frame_work *work; + struct armada_plane_work *work; unsigned i; int ret; @@ -1029,11 +1042,10 @@ static int armada_drm_crtc_page_flip(struct drm_crtc *crtc, if (fb->format != crtc->primary->fb->format) return -EINVAL; - work = kmalloc(sizeof(*work), GFP_KERNEL); + work = armada_drm_crtc_alloc_plane_work(dcrtc->crtc.primary); if (!work) return -ENOMEM; - work->work.fn = armada_drm_crtc_complete_frame_work; work->event = event; work->old_fb = dcrtc->crtc.primary->fb; @@ -1047,7 +1059,7 @@ static int armada_drm_crtc_page_flip(struct drm_crtc *crtc, */ drm_framebuffer_get(fb); - ret = armada_drm_crtc_queue_frame_work(dcrtc, work); + ret = armada_drm_plane_work_queue(dcrtc, work); if (ret) { /* Undo our reference above */ drm_framebuffer_put(fb); @@ -1127,14 +1139,195 @@ static const struct drm_crtc_funcs armada_crtc_funcs = { .disable_vblank = armada_drm_crtc_disable_vblank, }; +static void armada_drm_primary_update_state(struct drm_plane_state *state, + struct armada_regs *regs) +{ + struct armada_plane *dplane = drm_to_armada_plane(state->plane); + struct armada_crtc *dcrtc = drm_to_armada_crtc(state->crtc); + struct armada_framebuffer *dfb = drm_fb_to_armada_fb(state->fb); + bool was_disabled; + unsigned int idx = 0; + u32 val; + + val = CFG_GRA_FMT(dfb->fmt) | CFG_GRA_MOD(dfb->mod); + if (dfb->fmt > CFG_420) + val |= CFG_PALETTE_ENA; + if (state->visible) + val |= CFG_GRA_ENA; + if (drm_rect_width(&state->src) >> 16 != drm_rect_width(&state->dst)) + val |= CFG_GRA_HSMOOTH; + + was_disabled = !(dplane->state.ctrl0 & CFG_GRA_ENA); + if (was_disabled) + armada_reg_queue_mod(regs, idx, + 0, CFG_PDWN64x66, LCD_SPU_SRAM_PARA1); + + dplane->state.ctrl0 = val; + dplane->state.src_hw = (drm_rect_height(&state->src) & 0xffff0000) | + drm_rect_width(&state->src) >> 16; + dplane->state.dst_hw = drm_rect_height(&state->dst) << 16 | + drm_rect_width(&state->dst); + dplane->state.dst_yx = state->dst.y1 << 16 | state->dst.x1; + + armada_drm_gra_plane_regs(regs + idx, &dfb->fb, &dplane->state, + state->src.x1 >> 16, state->src.y1 >> 16, + dcrtc->interlaced); + + dplane->state.vsync_update = !was_disabled; + dplane->state.changed = true; +} + +static int armada_drm_primary_update(struct drm_plane *plane, + struct drm_crtc *crtc, struct drm_framebuffer *fb, + int crtc_x, int crtc_y, unsigned int crtc_w, unsigned int crtc_h, + uint32_t src_x, uint32_t src_y, uint32_t src_w, uint32_t src_h, + struct drm_modeset_acquire_ctx *ctx) +{ + struct armada_plane *dplane = drm_to_armada_plane(plane); + struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); + struct armada_plane_work *work; + struct drm_plane_state state = { + .plane = plane, + .crtc = crtc, + .fb = fb, + .src_x = src_x, + .src_y = src_y, + .src_w = src_w, + .src_h = src_h, + .crtc_x = crtc_x, + .crtc_y = crtc_y, + .crtc_w = crtc_w, + .crtc_h = crtc_h, + .rotation = DRM_MODE_ROTATE_0, + }; + const struct drm_rect clip = { + .x2 = crtc->mode.hdisplay, + .y2 = crtc->mode.vdisplay, + }; + int ret; + + ret = drm_atomic_helper_check_plane_state(&state, crtc->state, &clip, 0, + INT_MAX, true, false); + if (ret) + return ret; + + work = &dplane->works[dplane->next_work]; + work->fn = armada_drm_crtc_complete_frame_work; + + if (plane->fb != fb) { + /* + * Take a reference on the new framebuffer - we want to + * hold on to it while the hardware is displaying it. + */ + drm_framebuffer_reference(fb); + + work->old_fb = plane->fb; + } else { + work->old_fb = NULL; + } + + armada_drm_primary_update_state(&state, work->regs); + + if (!dplane->state.changed) + return 0; + + /* Wait for pending work to complete */ + if (armada_drm_plane_work_wait(dplane, HZ / 10) == 0) + armada_drm_plane_work_cancel(dcrtc, dplane); + + if (!dplane->state.vsync_update) { + work->fn(dcrtc, work); + if (work->old_fb) + drm_framebuffer_unreference(work->old_fb); + return 0; + } + + /* Queue it for update on the next interrupt if we are enabled */ + ret = armada_drm_plane_work_queue(dcrtc, work); + if (ret) { + work->fn(dcrtc, work); + if (work->old_fb) + drm_framebuffer_unreference(work->old_fb); + } + + dplane->next_work = !dplane->next_work; + + return 0; +} + +int armada_drm_plane_disable(struct drm_plane *plane, + struct drm_modeset_acquire_ctx *ctx) +{ + struct armada_plane *dplane = drm_to_armada_plane(plane); + struct armada_crtc *dcrtc; + struct armada_plane_work *work; + unsigned int idx = 0; + u32 sram_para1, enable_mask; + + if (!plane->crtc) + return 0; + + /* + * Arrange to power down most RAMs and FIFOs if this is the primary + * plane, otherwise just the YUV FIFOs for the overlay plane. + */ + if (plane->type == DRM_PLANE_TYPE_PRIMARY) { + sram_para1 = CFG_PDWN256x32 | CFG_PDWN256x24 | CFG_PDWN256x8 | + CFG_PDWN32x32 | CFG_PDWN64x66; + enable_mask = CFG_GRA_ENA; + } else { + sram_para1 = CFG_PDWN16x66 | CFG_PDWN32x66; + enable_mask = CFG_DMA_ENA; + } + + dplane->state.ctrl0 &= ~enable_mask; + + dcrtc = drm_to_armada_crtc(plane->crtc); + + /* + * Try to disable the plane and drop our ref on the framebuffer + * at the next frame update. If we fail for any reason, disable + * the plane immediately. + */ + work = &dplane->works[dplane->next_work]; + work->fn = armada_drm_crtc_complete_disable_work; + work->cancel = armada_drm_crtc_complete_disable_work; + work->old_fb = plane->fb; + + armada_reg_queue_mod(work->regs, idx, + 0, enable_mask, LCD_SPU_DMA_CTRL0); + armada_reg_queue_mod(work->regs, idx, + sram_para1, 0, LCD_SPU_SRAM_PARA1); + armada_reg_queue_end(work->regs, idx); + + /* Wait for any preceding work to complete, but don't wedge */ + if (WARN_ON(!armada_drm_plane_work_wait(dplane, HZ))) + armada_drm_plane_work_cancel(dcrtc, dplane); + + if (armada_drm_plane_work_queue(dcrtc, work)) { + work->fn(dcrtc, work); + if (work->old_fb) + drm_framebuffer_unreference(work->old_fb); + } + + dplane->next_work = !dplane->next_work; + + return 0; +} + static const struct drm_plane_funcs armada_primary_plane_funcs = { - .update_plane = drm_primary_helper_update, - .disable_plane = drm_primary_helper_disable, + .update_plane = armada_drm_primary_update, + .disable_plane = armada_drm_plane_disable, .destroy = drm_primary_helper_destroy, }; int armada_drm_plane_init(struct armada_plane *plane) { + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(plane->works); i++) + plane->works[i].plane = &plane->base; + init_waitqueue_head(&plane->frame_wait); return 0; @@ -1225,17 +1418,13 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, ret = devm_request_irq(dev, irq, armada_drm_irq, 0, "armada_drm_crtc", dcrtc); - if (ret < 0) { - kfree(dcrtc); - return ret; - } + if (ret < 0) + goto err_crtc; if (dcrtc->variant->init) { ret = dcrtc->variant->init(dcrtc, dev); - if (ret) { - kfree(dcrtc); - return ret; - } + if (ret) + goto err_crtc; } /* Ensure AXI pipeline is enabled */ @@ -1246,13 +1435,15 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, dcrtc->crtc.port = port; primary = kzalloc(sizeof(*primary), GFP_KERNEL); - if (!primary) - return -ENOMEM; + if (!primary) { + ret = -ENOMEM; + goto err_crtc; + } ret = armada_drm_plane_init(primary); if (ret) { kfree(primary); - return ret; + goto err_crtc; } ret = drm_universal_plane_init(drm, &primary->base, 0, @@ -1263,7 +1454,7 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) { kfree(primary); - return ret; + goto err_crtc; } ret = drm_crtc_init_with_planes(drm, &dcrtc->crtc, &primary->base, NULL, @@ -1282,6 +1473,9 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, err_crtc_init: primary->base.funcs->destroy(&primary->base); +err_crtc: + kfree(dcrtc); + return ret; } diff --git a/drivers/gpu/drm/armada/armada_crtc.h b/drivers/gpu/drm/armada/armada_crtc.h index bab11f483575..445829b8877a 100644 --- a/drivers/gpu/drm/armada/armada_crtc.h +++ b/drivers/gpu/drm/armada/armada_crtc.h @@ -36,21 +36,31 @@ struct armada_plane; struct armada_variant; struct armada_plane_work { - void (*fn)(struct armada_crtc *, - struct armada_plane *, - struct armada_plane_work *); + void (*fn)(struct armada_crtc *, struct armada_plane_work *); + void (*cancel)(struct armada_crtc *, struct armada_plane_work *); + bool need_kfree; + struct drm_plane *plane; + struct drm_framebuffer *old_fb; + struct drm_pending_vblank_event *event; + struct armada_regs regs[14]; }; struct armada_plane_state { + u16 src_x; + u16 src_y; u32 src_hw; u32 dst_hw; u32 dst_yx; u32 ctrl0; + bool changed; + bool vsync_update; }; struct armada_plane { struct drm_plane base; wait_queue_head_t frame_wait; + bool next_work; + struct armada_plane_work works[2]; struct armada_plane_work *work; struct armada_plane_state state; }; @@ -58,10 +68,10 @@ struct armada_plane { int armada_drm_plane_init(struct armada_plane *plane); int armada_drm_plane_work_queue(struct armada_crtc *dcrtc, - struct armada_plane *plane, struct armada_plane_work *work); + struct armada_plane_work *work); int armada_drm_plane_work_wait(struct armada_plane *plane, long timeout); -struct armada_plane_work *armada_drm_plane_work_cancel( - struct armada_crtc *dcrtc, struct armada_plane *plane); +void armada_drm_plane_work_cancel(struct armada_crtc *dcrtc, + struct armada_plane *plane); void armada_drm_plane_calc_addrs(u32 *addrs, struct drm_framebuffer *fb, int x, int y); @@ -104,8 +114,8 @@ struct armada_crtc { void armada_drm_crtc_update_regs(struct armada_crtc *, struct armada_regs *); -void armada_drm_crtc_plane_disable(struct armada_crtc *dcrtc, - struct drm_plane *plane); +int armada_drm_plane_disable(struct drm_plane *plane, + struct drm_modeset_acquire_ctx *ctx); extern struct platform_driver armada_lcd_platform_driver; diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c index b411b608821a..77b55adaa2ac 100644 --- a/drivers/gpu/drm/armada/armada_overlay.c +++ b/drivers/gpu/drm/armada/armada_overlay.c @@ -7,7 +7,7 @@ * published by the Free Software Foundation. */ #include <drm/drmP.h> -#include <drm/drm_plane_helper.h> +#include <drm/drm_atomic_helper.h> #include "armada_crtc.h" #include "armada_drm.h" #include "armada_fb.h" @@ -32,11 +32,6 @@ struct armada_ovl_plane_properties { struct armada_ovl_plane { struct armada_plane base; - struct drm_framebuffer *old_fb; - struct { - struct armada_plane_work work; - struct armada_regs regs[13]; - } vbl; struct armada_ovl_plane_properties prop; }; #define drm_to_armada_ovl_plane(p) \ @@ -67,218 +62,204 @@ armada_ovl_update_attr(struct armada_ovl_plane_properties *prop, spin_unlock_irq(&dcrtc->irq_lock); } -static void armada_ovl_retire_fb(struct armada_ovl_plane *dplane, - struct drm_framebuffer *fb) -{ - struct drm_framebuffer *old_fb; - - old_fb = xchg(&dplane->old_fb, fb); - - if (old_fb) - armada_drm_queue_unref_work(dplane->base.base.dev, old_fb); -} - /* === Plane support === */ static void armada_ovl_plane_work(struct armada_crtc *dcrtc, - struct armada_plane *plane, struct armada_plane_work *work) + struct armada_plane_work *work) { - struct armada_ovl_plane *dplane = container_of(plane, struct armada_ovl_plane, base); + unsigned long flags; - trace_armada_ovl_plane_work(&dcrtc->crtc, &plane->base); + trace_armada_ovl_plane_work(&dcrtc->crtc, work->plane); - armada_drm_crtc_update_regs(dcrtc, dplane->vbl.regs); - armada_ovl_retire_fb(dplane, NULL); + spin_lock_irqsave(&dcrtc->irq_lock, flags); + armada_drm_crtc_update_regs(dcrtc, work->regs); + spin_unlock_irqrestore(&dcrtc->irq_lock, flags); } -static int -armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int crtc_x, int crtc_y, unsigned crtc_w, unsigned crtc_h, - uint32_t src_x, uint32_t src_y, uint32_t src_w, uint32_t src_h, - struct drm_modeset_acquire_ctx *ctx) +static void armada_ovl_plane_update_state(struct drm_plane_state *state, + struct armada_regs *regs) { - struct armada_ovl_plane *dplane = drm_to_armada_ovl_plane(plane); - struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); - struct drm_rect src = { - .x1 = src_x, - .y1 = src_y, - .x2 = src_x + src_w, - .y2 = src_y + src_h, - }; - struct drm_rect dest = { - .x1 = crtc_x, - .y1 = crtc_y, - .x2 = crtc_x + crtc_w, - .y2 = crtc_y + crtc_h, - }; - const struct drm_rect clip = { - .x2 = crtc->mode.hdisplay, - .y2 = crtc->mode.vdisplay, - }; - uint32_t val, ctrl0; - unsigned idx = 0; - bool visible; - int ret; - - trace_armada_ovl_plane_update(plane, crtc, fb, - crtc_x, crtc_y, crtc_w, crtc_h, - src_x, src_y, src_w, src_h); - - ret = drm_plane_helper_check_update(plane, crtc, fb, &src, &dest, &clip, - DRM_MODE_ROTATE_0, - 0, INT_MAX, true, false, &visible); - if (ret) - return ret; - - ctrl0 = CFG_DMA_FMT(drm_fb_to_armada_fb(fb)->fmt) | - CFG_DMA_MOD(drm_fb_to_armada_fb(fb)->mod) | - CFG_CBSH_ENA | CFG_DMA_HSMOOTH | CFG_DMA_ENA; - - /* Does the position/size result in nothing to display? */ - if (!visible) - ctrl0 &= ~CFG_DMA_ENA; - - if (!dcrtc->plane) { - dcrtc->plane = plane; - armada_ovl_update_attr(&dplane->prop, dcrtc); - } - - /* FIXME: overlay on an interlaced display */ - /* Just updating the position/size? */ - if (plane->fb == fb && dplane->base.state.ctrl0 == ctrl0) { - val = (drm_rect_height(&src) & 0xffff0000) | - drm_rect_width(&src) >> 16; - dplane->base.state.src_hw = val; - writel_relaxed(val, dcrtc->base + LCD_SPU_DMA_HPXL_VLN); - - val = drm_rect_height(&dest) << 16 | drm_rect_width(&dest); - dplane->base.state.dst_hw = val; - writel_relaxed(val, dcrtc->base + LCD_SPU_DZM_HPXL_VLN); - - val = dest.y1 << 16 | dest.x1; - dplane->base.state.dst_yx = val; - writel_relaxed(val, dcrtc->base + LCD_SPU_DMA_OVSA_HPXL_VLN); - - return 0; - } else if (~dplane->base.state.ctrl0 & ctrl0 & CFG_DMA_ENA) { + struct armada_ovl_plane *dplane = drm_to_armada_ovl_plane(state->plane); + struct armada_framebuffer *dfb = drm_fb_to_armada_fb(state->fb); + const struct drm_format_info *format; + unsigned int idx = 0; + bool fb_changed; + u32 val, ctrl0; + u16 src_x, src_y; + + ctrl0 = CFG_DMA_FMT(dfb->fmt) | CFG_DMA_MOD(dfb->mod) | CFG_CBSH_ENA; + if (state->visible) + ctrl0 |= CFG_DMA_ENA; + if (drm_rect_width(&state->src) >> 16 != drm_rect_width(&state->dst)) + ctrl0 |= CFG_DMA_HSMOOTH; + + /* + * Shifting a YUV packed format image by one pixel causes the U/V + * planes to swap. Compensate for it by also toggling the UV swap. + */ + format = dfb->fb.format; + if (format->num_planes == 1 && state->src.x1 >> 16 & (format->hsub - 1)) + ctrl0 ^= CFG_DMA_MOD(CFG_SWAPUV); + + if (~dplane->base.state.ctrl0 & ctrl0 & CFG_DMA_ENA) { /* Power up the Y/U/V FIFOs on ENA 0->1 transitions */ - armada_updatel(0, CFG_PDWN16x66 | CFG_PDWN32x66, - dcrtc->base + LCD_SPU_SRAM_PARA1); + armada_reg_queue_mod(regs, idx, + 0, CFG_PDWN16x66 | CFG_PDWN32x66, + LCD_SPU_SRAM_PARA1); } - if (armada_drm_plane_work_wait(&dplane->base, HZ / 25) == 0) - armada_drm_plane_work_cancel(dcrtc, &dplane->base); - - if (plane->fb != fb) { - u32 addrs[3], pixel_format; - int num_planes, hsub; - - /* - * Take a reference on the new framebuffer - we want to - * hold on to it while the hardware is displaying it. - */ - drm_framebuffer_get(fb); - - if (plane->fb) - armada_ovl_retire_fb(dplane, plane->fb); + fb_changed = dplane->base.base.fb != &dfb->fb || + dplane->base.state.src_x != state->src.x1 >> 16 || + dplane->base.state.src_y != state->src.y1 >> 16; - src_y = src.y1 >> 16; - src_x = src.x1 >> 16; + dplane->base.state.vsync_update = fb_changed; - armada_drm_plane_calc_addrs(addrs, fb, src_x, src_y); + /* FIXME: overlay on an interlaced display */ + if (fb_changed) { + u32 addrs[3]; - pixel_format = fb->format->format; - hsub = drm_format_horz_chroma_subsampling(pixel_format); - num_planes = fb->format->num_planes; + dplane->base.state.src_y = src_y = state->src.y1 >> 16; + dplane->base.state.src_x = src_x = state->src.x1 >> 16; - /* - * Annoyingly, shifting a YUYV-format image by one pixel - * causes the U/V planes to toggle. Toggle the UV swap. - * (Unfortunately, this causes momentary colour flickering.) - */ - if (src_x & (hsub - 1) && num_planes == 1) - ctrl0 ^= CFG_DMA_MOD(CFG_SWAPUV); + armada_drm_plane_calc_addrs(addrs, &dfb->fb, src_x, src_y); - armada_reg_queue_set(dplane->vbl.regs, idx, addrs[0], + armada_reg_queue_set(regs, idx, addrs[0], LCD_SPU_DMA_START_ADDR_Y0); - armada_reg_queue_set(dplane->vbl.regs, idx, addrs[1], + armada_reg_queue_set(regs, idx, addrs[1], LCD_SPU_DMA_START_ADDR_U0); - armada_reg_queue_set(dplane->vbl.regs, idx, addrs[2], + armada_reg_queue_set(regs, idx, addrs[2], LCD_SPU_DMA_START_ADDR_V0); - armada_reg_queue_set(dplane->vbl.regs, idx, addrs[0], + armada_reg_queue_set(regs, idx, addrs[0], LCD_SPU_DMA_START_ADDR_Y1); - armada_reg_queue_set(dplane->vbl.regs, idx, addrs[1], + armada_reg_queue_set(regs, idx, addrs[1], LCD_SPU_DMA_START_ADDR_U1); - armada_reg_queue_set(dplane->vbl.regs, idx, addrs[2], + armada_reg_queue_set(regs, idx, addrs[2], LCD_SPU_DMA_START_ADDR_V1); - val = fb->pitches[0] << 16 | fb->pitches[0]; - armada_reg_queue_set(dplane->vbl.regs, idx, val, + val = dfb->fb.pitches[0] << 16 | dfb->fb.pitches[0]; + armada_reg_queue_set(regs, idx, val, LCD_SPU_DMA_PITCH_YC); - val = fb->pitches[1] << 16 | fb->pitches[2]; - armada_reg_queue_set(dplane->vbl.regs, idx, val, + val = dfb->fb.pitches[1] << 16 | dfb->fb.pitches[2]; + armada_reg_queue_set(regs, idx, val, LCD_SPU_DMA_PITCH_UV); } - val = (drm_rect_height(&src) & 0xffff0000) | drm_rect_width(&src) >> 16; + val = (drm_rect_height(&state->src) & 0xffff0000) | + drm_rect_width(&state->src) >> 16; if (dplane->base.state.src_hw != val) { dplane->base.state.src_hw = val; - armada_reg_queue_set(dplane->vbl.regs, idx, val, + armada_reg_queue_set(regs, idx, val, LCD_SPU_DMA_HPXL_VLN); } - val = drm_rect_height(&dest) << 16 | drm_rect_width(&dest); + val = drm_rect_height(&state->dst) << 16 | drm_rect_width(&state->dst); if (dplane->base.state.dst_hw != val) { dplane->base.state.dst_hw = val; - armada_reg_queue_set(dplane->vbl.regs, idx, val, + armada_reg_queue_set(regs, idx, val, LCD_SPU_DZM_HPXL_VLN); } - val = dest.y1 << 16 | dest.x1; + val = state->dst.y1 << 16 | state->dst.x1; if (dplane->base.state.dst_yx != val) { dplane->base.state.dst_yx = val; - armada_reg_queue_set(dplane->vbl.regs, idx, val, + armada_reg_queue_set(regs, idx, val, LCD_SPU_DMA_OVSA_HPXL_VLN); } if (dplane->base.state.ctrl0 != ctrl0) { dplane->base.state.ctrl0 = ctrl0; - armada_reg_queue_mod(dplane->vbl.regs, idx, ctrl0, + armada_reg_queue_mod(regs, idx, ctrl0, CFG_CBSH_ENA | CFG_DMAFORMAT | CFG_DMA_FTOGGLE | CFG_DMA_HSMOOTH | CFG_DMA_TSTMODE | CFG_DMA_MOD(CFG_SWAPRB | CFG_SWAPUV | CFG_SWAPYU | CFG_YUV2RGB) | CFG_DMA_ENA, LCD_SPU_DMA_CTRL0); + dplane->base.state.vsync_update = true; } - if (idx) { - armada_reg_queue_end(dplane->vbl.regs, idx); - armada_drm_plane_work_queue(dcrtc, &dplane->base, - &dplane->vbl.work); - } - return 0; + + dplane->base.state.changed = idx != 0; + + armada_reg_queue_end(regs, idx); } -static int armada_ovl_plane_disable(struct drm_plane *plane, - struct drm_modeset_acquire_ctx *ctx) +static int +armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, + struct drm_framebuffer *fb, + int crtc_x, int crtc_y, unsigned crtc_w, unsigned crtc_h, + uint32_t src_x, uint32_t src_y, uint32_t src_w, uint32_t src_h, + struct drm_modeset_acquire_ctx *ctx) { struct armada_ovl_plane *dplane = drm_to_armada_ovl_plane(plane); - struct drm_framebuffer *fb; - struct armada_crtc *dcrtc; + struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); + struct armada_plane_work *work; + struct drm_plane_state state = { + .plane = plane, + .crtc = crtc, + .fb = fb, + .src_x = src_x, + .src_y = src_y, + .src_w = src_w, + .src_h = src_h, + .crtc_x = crtc_x, + .crtc_y = crtc_y, + .crtc_w = crtc_w, + .crtc_h = crtc_h, + .rotation = DRM_MODE_ROTATE_0, + }; + const struct drm_rect clip = { + .x2 = crtc->mode.hdisplay, + .y2 = crtc->mode.vdisplay, + }; + int ret; - if (!dplane->base.base.crtc) + trace_armada_ovl_plane_update(plane, crtc, fb, + crtc_x, crtc_y, crtc_w, crtc_h, + src_x, src_y, src_w, src_h); + + ret = drm_atomic_helper_check_plane_state(&state, crtc->state, &clip, 0, + INT_MAX, true, false); + if (ret) + return ret; + + work = &dplane->base.works[dplane->base.next_work]; + + if (plane->fb != fb) { + /* + * Take a reference on the new framebuffer - we want to + * hold on to it while the hardware is displaying it. + */ + drm_framebuffer_reference(fb); + + work->old_fb = plane->fb; + } else { + work->old_fb = NULL; + } + + armada_ovl_plane_update_state(&state, work->regs); + + if (!dplane->base.state.changed) return 0; - dcrtc = drm_to_armada_crtc(dplane->base.base.crtc); + /* Wait for pending work to complete */ + if (armada_drm_plane_work_wait(&dplane->base, HZ / 25) == 0) + armada_drm_plane_work_cancel(dcrtc, &dplane->base); + + /* Just updating the position/size? */ + if (!dplane->base.state.vsync_update) { + armada_ovl_plane_work(dcrtc, work); + return 0; + } - armada_drm_plane_work_cancel(dcrtc, &dplane->base); - armada_drm_crtc_plane_disable(dcrtc, plane); + if (!dcrtc->plane) { + dcrtc->plane = plane; + armada_ovl_update_attr(&dplane->prop, dcrtc); + } - dcrtc->plane = NULL; - dplane->base.state.ctrl0 = 0; + /* Queue it for update on the next interrupt if we are enabled */ + ret = armada_drm_plane_work_queue(dcrtc, work); + if (ret) + DRM_ERROR("failed to queue plane work: %d\n", ret); - fb = xchg(&dplane->old_fb, NULL); - if (fb) - drm_framebuffer_put(fb); + dplane->base.next_work = !dplane->base.next_work; return 0; } @@ -362,7 +343,7 @@ static int armada_ovl_plane_set_property(struct drm_plane *plane, static const struct drm_plane_funcs armada_ovl_plane_funcs = { .update_plane = armada_ovl_plane_update, - .disable_plane = armada_ovl_plane_disable, + .disable_plane = armada_drm_plane_disable, .destroy = armada_ovl_plane_destroy, .set_property = armada_ovl_plane_set_property, }; @@ -454,7 +435,8 @@ int armada_overlay_plane_create(struct drm_device *dev, unsigned long crtcs) return ret; } - dplane->vbl.work.fn = armada_ovl_plane_work; + dplane->base.works[0].fn = armada_ovl_plane_work; + dplane->base.works[1].fn = armada_ovl_plane_work; ret = drm_universal_plane_init(dev, &dplane->base.base, crtcs, &armada_ovl_plane_funcs, diff --git a/drivers/gpu/drm/armada/armada_trace.h b/drivers/gpu/drm/armada/armada_trace.h index 8dbfea7a00fe..f03a56bda596 100644 --- a/drivers/gpu/drm/armada/armada_trace.h +++ b/drivers/gpu/drm/armada/armada_trace.h @@ -34,14 +34,34 @@ TRACE_EVENT(armada_ovl_plane_update, __field(struct drm_plane *, plane) __field(struct drm_crtc *, crtc) __field(struct drm_framebuffer *, fb) + __field(int, crtc_x) + __field(int, crtc_y) + __field(unsigned int, crtc_w) + __field(unsigned int, crtc_h) + __field(u32, src_x) + __field(u32, src_y) + __field(u32, src_w) + __field(u32, src_h) ), TP_fast_assign( __entry->plane = plane; __entry->crtc = crtc; __entry->fb = fb; + __entry->crtc_x = crtc_x; + __entry->crtc_y = crtc_y; + __entry->crtc_w = crtc_w; + __entry->crtc_h = crtc_h; + __entry->src_x = src_x; + __entry->src_y = src_y; + __entry->src_w = src_w; + __entry->src_h = src_h; ), - TP_printk("plane %p crtc %p fb %p", - __entry->plane, __entry->crtc, __entry->fb) + TP_printk("plane %p crtc %p fb %p crtc @ (%d,%d, %ux%u) src @ (%u,%u, %ux%u)", + __entry->plane, __entry->crtc, __entry->fb, + __entry->crtc_x, __entry->crtc_y, + __entry->crtc_w, __entry->crtc_h, + __entry->src_x >> 16, __entry->src_y >> 16, + __entry->src_w >> 16, __entry->src_h >> 16) ); TRACE_EVENT(armada_ovl_plane_work, diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 37445d50816a..b76d49218cf1 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -50,7 +50,8 @@ EXPORT_SYMBOL(__drm_crtc_commit_free); * @state: atomic state * * Free all the memory allocated by drm_atomic_state_init. - * This is useful for drivers that subclass the atomic state. + * This should only be used by drivers which are still subclassing + * &drm_atomic_state and haven't switched to &drm_private_state yet. */ void drm_atomic_state_default_release(struct drm_atomic_state *state) { @@ -67,7 +68,8 @@ EXPORT_SYMBOL(drm_atomic_state_default_release); * @state: atomic state * * Default implementation for filling in a new atomic state. - * This is useful for drivers that subclass the atomic state. + * This should only be used by drivers which are still subclassing + * &drm_atomic_state and haven't switched to &drm_private_state yet. */ int drm_atomic_state_init(struct drm_device *dev, struct drm_atomic_state *state) @@ -132,7 +134,8 @@ EXPORT_SYMBOL(drm_atomic_state_alloc); * @state: atomic state * * Default implementation for clearing atomic state. - * This is useful for drivers that subclass the atomic state. + * This should only be used by drivers which are still subclassing + * &drm_atomic_state and haven't switched to &drm_private_state yet. */ void drm_atomic_state_default_clear(struct drm_atomic_state *state) { @@ -947,6 +950,42 @@ static void drm_atomic_plane_print_state(struct drm_printer *p, } /** + * DOC: handling driver private state + * + * Very often the DRM objects exposed to userspace in the atomic modeset api + * (&drm_connector, &drm_crtc and &drm_plane) do not map neatly to the + * underlying hardware. Especially for any kind of shared resources (e.g. shared + * clocks, scaler units, bandwidth and fifo limits shared among a group of + * planes or CRTCs, and so on) it makes sense to model these as independent + * objects. Drivers then need to do similar state tracking and commit ordering for + * such private (since not exposed to userpace) objects as the atomic core and + * helpers already provide for connectors, planes and CRTCs. + * + * To make this easier on drivers the atomic core provides some support to track + * driver private state objects using struct &drm_private_obj, with the + * associated state struct &drm_private_state. + * + * Similar to userspace-exposed objects, private state structures can be + * acquired by calling drm_atomic_get_private_obj_state(). Since this function + * does not take care of locking, drivers should wrap it for each type of + * private state object they have with the required call to drm_modeset_lock() + * for the corresponding &drm_modeset_lock. + * + * All private state structures contained in a &drm_atomic_state update can be + * iterated using for_each_oldnew_private_obj_in_state(), + * for_each_new_private_obj_in_state() and for_each_old_private_obj_in_state(). + * Drivers are recommended to wrap these for each type of driver private state + * object they have, filtering on &drm_private_obj.funcs using for_each_if(), at + * least if they want to iterate over all objects of a given type. + * + * An earlier way to handle driver private state was by subclassing struct + * &drm_atomic_state. But since that encourages non-standard ways to implement + * the check/commit split atomic requires (by using e.g. "check and rollback or + * commit instead" of "duplicate state, check, then either commit or release + * duplicated state) it is deprecated in favour of using &drm_private_state. + */ + +/** * drm_atomic_private_obj_init - initialize private object * @obj: private object * @state: initial private object state diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 365901c1c33c..ddd537914575 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -4871,6 +4871,11 @@ EXPORT_SYMBOL(drm_hdmi_avi_infoframe_from_display_mode); * @mode: DRM display mode * @rgb_quant_range: RGB quantization range (Q) * @rgb_quant_range_selectable: Sink support selectable RGB quantization range (QS) + * @is_hdmi2_sink: HDMI 2.0 sink, which has different default recommendations + * + * Note that @is_hdmi2_sink can be derived by looking at the + * &drm_scdc.supported flag stored in &drm_hdmi_info.scdc, + * &drm_display_info.hdmi, which can be found in &drm_connector.display_info. */ void drm_hdmi_avi_infoframe_quant_range(struct hdmi_avi_infoframe *frame, diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 04a3a5ce370a..035784ddd133 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -66,19 +66,23 @@ static DEFINE_MUTEX(kernel_fb_helper_lock); * helper functions used by many drivers to implement the kernel mode setting * interfaces. * - * Initialization is done as a four-step process with drm_fb_helper_prepare(), - * drm_fb_helper_init(), drm_fb_helper_single_add_all_connectors() and - * drm_fb_helper_initial_config(). Drivers with fancier requirements than the - * default behaviour can override the third step with their own code. - * Teardown is done with drm_fb_helper_fini() after the fbdev device is - * unregisters using drm_fb_helper_unregister_fbi(). - * - * At runtime drivers should restore the fbdev console by calling - * drm_fb_helper_restore_fbdev_mode_unlocked() from their &drm_driver.lastclose - * callback. They should also notify the fb helper code from updates to the - * output configuration by calling drm_fb_helper_hotplug_event(). For easier - * integration with the output polling code in drm_crtc_helper.c the modeset - * code provides a &drm_mode_config_funcs.output_poll_changed callback. + * Setup fbdev emulation by calling drm_fb_helper_fbdev_setup() and tear it + * down by calling drm_fb_helper_fbdev_teardown(). + * + * Drivers that need to handle connector hotplugging (e.g. dp mst) can't use + * the setup helper and will need to do the whole four-step setup process with + * drm_fb_helper_prepare(), drm_fb_helper_init(), + * drm_fb_helper_single_add_all_connectors(), enable hotplugging and + * drm_fb_helper_initial_config() to avoid a possible race window. + * + * At runtime drivers should restore the fbdev console by using + * drm_fb_helper_lastclose() as their &drm_driver.lastclose callback. + * They should also notify the fb helper code from updates to the output + * configuration by using drm_fb_helper_output_poll_changed() as their + * &drm_mode_config_funcs.output_poll_changed callback. + * + * For suspend/resume consider using drm_mode_config_helper_suspend() and + * drm_mode_config_helper_resume() which takes care of fbdev as well. * * All other functions exported by the fb helper library can be used to * implement the fbdev driver interface by the driver. @@ -103,7 +107,8 @@ static DEFINE_MUTEX(kernel_fb_helper_lock); * always run in process context since the fb_*() function could be running in * atomic context. If drm_fb_helper_deferred_io() is used as the deferred_io * callback it will also schedule dirty_work with the damage collected from the - * mmap page writes. + * mmap page writes. Drivers can use drm_fb_helper_defio_init() to setup + * deferred I/O (coupled with drm_fb_helper_fbdev_teardown()). */ #define drm_fb_helper_for_each_connector(fbh, i__) \ @@ -1025,6 +1030,49 @@ void drm_fb_helper_deferred_io(struct fb_info *info, EXPORT_SYMBOL(drm_fb_helper_deferred_io); /** + * drm_fb_helper_defio_init - fbdev deferred I/O initialization + * @fb_helper: driver-allocated fbdev helper + * + * This function allocates &fb_deferred_io, sets callback to + * drm_fb_helper_deferred_io(), delay to 50ms and calls fb_deferred_io_init(). + * It should be called from the &drm_fb_helper_funcs->fb_probe callback. + * drm_fb_helper_fbdev_teardown() cleans up deferred I/O. + * + * NOTE: A copy of &fb_ops is made and assigned to &info->fbops. This is done + * because fb_deferred_io_cleanup() clears &fbops->fb_mmap and would thereby + * affect other instances of that &fb_ops. + * + * Returns: + * 0 on success or a negative error code on failure. + */ +int drm_fb_helper_defio_init(struct drm_fb_helper *fb_helper) +{ + struct fb_info *info = fb_helper->fbdev; + struct fb_deferred_io *fbdefio; + struct fb_ops *fbops; + + fbdefio = kzalloc(sizeof(*fbdefio), GFP_KERNEL); + fbops = kzalloc(sizeof(*fbops), GFP_KERNEL); + if (!fbdefio || !fbops) { + kfree(fbdefio); + kfree(fbops); + return -ENOMEM; + } + + info->fbdefio = fbdefio; + fbdefio->delay = msecs_to_jiffies(50); + fbdefio->deferred_io = drm_fb_helper_deferred_io; + + *fbops = *info->fbops; + info->fbops = fbops; + + fb_deferred_io_init(info); + + return 0; +} +EXPORT_SYMBOL(drm_fb_helper_defio_init); + +/** * drm_fb_helper_sys_read - wrapper around fb_sys_read * @info: fb_info struct pointer * @buf: userspace buffer to read from framebuffer memory @@ -1848,6 +1896,7 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper, if (ret < 0) return ret; + strcpy(fb_helper->fb->comm, "[fbcon]"); return 0; } @@ -2730,6 +2779,120 @@ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper) EXPORT_SYMBOL(drm_fb_helper_hotplug_event); /** + * drm_fb_helper_fbdev_setup() - Setup fbdev emulation + * @dev: DRM device + * @fb_helper: fbdev helper structure to set up + * @funcs: fbdev helper functions + * @preferred_bpp: Preferred bits per pixel for the device. + * @dev->mode_config.preferred_depth is used if this is zero. + * @max_conn_count: Maximum number of connectors. + * @dev->mode_config.num_connector is used if this is zero. + * + * This function sets up fbdev emulation and registers fbdev for access by + * userspace. If all connectors are disconnected, setup is deferred to the next + * time drm_fb_helper_hotplug_event() is called. + * The caller must to provide a &drm_fb_helper_funcs->fb_probe callback + * function. + * + * See also: drm_fb_helper_initial_config() + * + * Returns: + * Zero on success or negative error code on failure. + */ +int drm_fb_helper_fbdev_setup(struct drm_device *dev, + struct drm_fb_helper *fb_helper, + const struct drm_fb_helper_funcs *funcs, + unsigned int preferred_bpp, + unsigned int max_conn_count) +{ + int ret; + + if (!preferred_bpp) + preferred_bpp = dev->mode_config.preferred_depth; + if (!preferred_bpp) + preferred_bpp = 32; + + if (!max_conn_count) + max_conn_count = dev->mode_config.num_connector; + if (!max_conn_count) { + DRM_DEV_ERROR(dev->dev, "No connectors\n"); + return -EINVAL; + } + + drm_fb_helper_prepare(dev, fb_helper, funcs); + + ret = drm_fb_helper_init(dev, fb_helper, max_conn_count); + if (ret < 0) { + DRM_DEV_ERROR(dev->dev, "Failed to initialize fbdev helper\n"); + return ret; + } + + ret = drm_fb_helper_single_add_all_connectors(fb_helper); + if (ret < 0) { + DRM_DEV_ERROR(dev->dev, "Failed to add connectors\n"); + goto err_drm_fb_helper_fini; + } + + if (!drm_drv_uses_atomic_modeset(dev)) + drm_helper_disable_unused_functions(dev); + + ret = drm_fb_helper_initial_config(fb_helper, preferred_bpp); + if (ret < 0) { + DRM_DEV_ERROR(dev->dev, "Failed to set fbdev configuration\n"); + goto err_drm_fb_helper_fini; + } + + return 0; + +err_drm_fb_helper_fini: + drm_fb_helper_fini(fb_helper); + + return ret; +} +EXPORT_SYMBOL(drm_fb_helper_fbdev_setup); + +/** + * drm_fb_helper_fbdev_teardown - Tear down fbdev emulation + * @dev: DRM device + * + * This function unregisters fbdev if not already done and cleans up the + * associated resources including the &drm_framebuffer. + * The driver is responsible for freeing the &drm_fb_helper structure which is + * stored in &drm_device->fb_helper. Do note that this pointer has been cleared + * when this function returns. + * + * In order to support device removal/unplug while file handles are still open, + * drm_fb_helper_unregister_fbi() should be called on device removal and + * drm_fb_helper_fbdev_teardown() in the &drm_driver->release callback when + * file handles are closed. + */ +void drm_fb_helper_fbdev_teardown(struct drm_device *dev) +{ + struct drm_fb_helper *fb_helper = dev->fb_helper; + struct fb_ops *fbops = NULL; + + if (!fb_helper) + return; + + /* Unregister if it hasn't been done already */ + if (fb_helper->fbdev && fb_helper->fbdev->dev) + drm_fb_helper_unregister_fbi(fb_helper); + + if (fb_helper->fbdev && fb_helper->fbdev->fbdefio) { + fb_deferred_io_cleanup(fb_helper->fbdev); + kfree(fb_helper->fbdev->fbdefio); + fbops = fb_helper->fbdev->fbops; + } + + drm_fb_helper_fini(fb_helper); + kfree(fbops); + + if (fb_helper->fb) + drm_framebuffer_remove(fb_helper->fb); +} +EXPORT_SYMBOL(drm_fb_helper_fbdev_teardown); + +/** * drm_fb_helper_lastclose - DRM driver lastclose helper for fbdev emulation * @dev: DRM device * diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index d63d4c2ac4c8..5a13ff29f4f0 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -664,6 +664,7 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, INIT_LIST_HEAD(&fb->filp_head); fb->funcs = funcs; + strcpy(fb->comm, current->comm); ret = __drm_mode_object_add(dev, &fb->base, DRM_MODE_OBJECT_FB, false, drm_framebuffer_free); @@ -978,6 +979,7 @@ void drm_framebuffer_print_info(struct drm_printer *p, unsigned int indent, struct drm_format_name_buf format_name; unsigned int i; + drm_printf_indent(p, indent, "allocated by = %s\n", fb->comm); drm_printf_indent(p, indent, "refcount=%u\n", drm_framebuffer_read_refcount(fb)); drm_printf_indent(p, indent, "format=%s\n", diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 9b733c510cbf..131695915acd 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -29,9 +29,9 @@ /** * DOC: Overview * - * DRM synchronisation objects (syncobj) are a persistent objects, - * that contain an optional fence. The fence can be updated with a new - * fence, or be NULL. + * DRM synchronisation objects (syncobj, see struct &drm_syncobj) are + * persistent objects that contain an optional fence. The fence can be updated + * with a new fence, or be NULL. * * syncobj's can be waited upon, where it will wait for the underlying * fence. @@ -61,7 +61,8 @@ * @file_private: drm file private pointer * @handle: sync object handle to lookup. * - * Returns a reference to the syncobj pointed to by handle or NULL. + * Returns a reference to the syncobj pointed to by handle or NULL. The + * reference must be released by calling drm_syncobj_put(). */ struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private, u32 handle) @@ -229,6 +230,19 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) return 0; } +/** + * drm_syncobj_find_fence - lookup and reference the fence in a sync object + * @file_private: drm file private pointer + * @handle: sync object handle to lookup. + * @fence: out parameter for the fence + * + * This is just a convenience function that combines drm_syncobj_find() and + * drm_syncobj_fence_get(). + * + * Returns 0 on success or a negative error value on failure. On success @fence + * contains a reference to the fence, which must be released by calling + * dma_fence_put(). + */ int drm_syncobj_find_fence(struct drm_file *file_private, u32 handle, struct dma_fence **fence) @@ -269,6 +283,12 @@ EXPORT_SYMBOL(drm_syncobj_free); * @out_syncobj: returned syncobj * @flags: DRM_SYNCOBJ_* flags * @fence: if non-NULL, the syncobj will represent this fence + * + * This is the first function to create a sync object. After creating, drivers + * probably want to make it available to userspace, either through + * drm_syncobj_get_handle() or drm_syncobj_get_fd(). + * + * Returns 0 on success or a negative error value on failure. */ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, struct dma_fence *fence) @@ -302,6 +322,14 @@ EXPORT_SYMBOL(drm_syncobj_create); /** * drm_syncobj_get_handle - get a handle from a syncobj + * @file_private: drm file private pointer + * @syncobj: Sync object to export + * @handle: out parameter with the new handle + * + * Exports a sync object created with drm_syncobj_create() as a handle on + * @file_private to userspace. + * + * Returns 0 on success or a negative error value on failure. */ int drm_syncobj_get_handle(struct drm_file *file_private, struct drm_syncobj *syncobj, u32 *handle) @@ -388,6 +416,15 @@ static int drm_syncobj_alloc_file(struct drm_syncobj *syncobj) return 0; } +/** + * drm_syncobj_get_fd - get a file descriptor from a syncobj + * @syncobj: Sync object to export + * @p_fd: out parameter with the new file descriptor + * + * Exports a sync object created with drm_syncobj_create() as a file descriptor. + * + * Returns 0 on success or a negative error value on failure. + */ int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd) { int ret; diff --git a/drivers/gpu/drm/etnaviv/Kconfig b/drivers/gpu/drm/etnaviv/Kconfig index a29b8f59eb15..3f58b4077767 100644 --- a/drivers/gpu/drm/etnaviv/Kconfig +++ b/drivers/gpu/drm/etnaviv/Kconfig @@ -6,6 +6,7 @@ config DRM_ETNAVIV depends on MMU select SHMEM select SYNC_FILE + select THERMAL if DRM_ETNAVIV_THERMAL select TMPFS select WANT_DEV_COREDUMP select CMA if HAVE_DMA_CONTIGUOUS @@ -13,6 +14,14 @@ config DRM_ETNAVIV help DRM driver for Vivante GPUs. +config DRM_ETNAVIV_THERMAL + bool "enable ETNAVIV thermal throttling" + depends on DRM_ETNAVIV + default y + help + Compile in support for thermal throttling. + Say Y unless you want to risk burning your SoC. + config DRM_ETNAVIV_REGISTER_LOGGING bool "enable ETNAVIV register logging" depends on DRM_ETNAVIV diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c index 9e7098e3207f..99ad2f073c6e 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c @@ -100,6 +100,8 @@ static void etnaviv_cmd_select_pipe(struct etnaviv_gpu *gpu, { u32 flush = 0; + lockdep_assert_held(&gpu->lock); + /* * This assumes that if we're switching to 2D, we're switching * away from 3D, and vice versa. Hence, if we're switching to @@ -164,7 +166,9 @@ static u32 etnaviv_buffer_reserve(struct etnaviv_gpu *gpu, u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu) { - struct etnaviv_cmdbuf *buffer = gpu->buffer; + struct etnaviv_cmdbuf *buffer = &gpu->buffer; + + lockdep_assert_held(&gpu->lock); /* initialize buffer */ buffer->user_size = 0; @@ -178,7 +182,9 @@ u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu) u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe_addr) { - struct etnaviv_cmdbuf *buffer = gpu->buffer; + struct etnaviv_cmdbuf *buffer = &gpu->buffer; + + lockdep_assert_held(&gpu->lock); buffer->user_size = 0; @@ -211,10 +217,12 @@ u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe void etnaviv_buffer_end(struct etnaviv_gpu *gpu) { - struct etnaviv_cmdbuf *buffer = gpu->buffer; + struct etnaviv_cmdbuf *buffer = &gpu->buffer; unsigned int waitlink_offset = buffer->user_size - 16; u32 link_target, flush = 0; + lockdep_assert_held(&gpu->lock); + if (gpu->exec_state == ETNA_PIPE_2D) flush = VIVS_GL_FLUSH_CACHE_PE2D; else if (gpu->exec_state == ETNA_PIPE_3D) @@ -253,10 +261,12 @@ void etnaviv_buffer_end(struct etnaviv_gpu *gpu) /* Append a 'sync point' to the ring buffer. */ void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event) { - struct etnaviv_cmdbuf *buffer = gpu->buffer; + struct etnaviv_cmdbuf *buffer = &gpu->buffer; unsigned int waitlink_offset = buffer->user_size - 16; u32 dwords, target; + lockdep_assert_held(&gpu->lock); + /* * We need at most 3 dwords in the return target: * 1 event + 1 end + 1 wait + 1 link. @@ -287,13 +297,16 @@ void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event) } /* Append a command buffer to the ring buffer. */ -void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, - struct etnaviv_cmdbuf *cmdbuf) +void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state, + unsigned int event, struct etnaviv_cmdbuf *cmdbuf) { - struct etnaviv_cmdbuf *buffer = gpu->buffer; + struct etnaviv_cmdbuf *buffer = &gpu->buffer; unsigned int waitlink_offset = buffer->user_size - 16; u32 return_target, return_dwords; u32 link_target, link_dwords; + bool switch_context = gpu->exec_state != exec_state; + + lockdep_assert_held(&gpu->lock); if (drm_debug & DRM_UT_DRIVER) etnaviv_buffer_dump(gpu, buffer, 0, 0x50); @@ -306,7 +319,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, * need to append a mmu flush load state, followed by a new * link to this buffer - a total of four additional words. */ - if (gpu->mmu->need_flush || gpu->switch_context) { + if (gpu->mmu->need_flush || switch_context) { u32 target, extra_dwords; /* link command */ @@ -321,7 +334,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, } /* pipe switch commands */ - if (gpu->switch_context) + if (switch_context) extra_dwords += 4; target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords); @@ -349,10 +362,9 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, gpu->mmu->need_flush = false; } - if (gpu->switch_context) { - etnaviv_cmd_select_pipe(gpu, buffer, cmdbuf->exec_state); - gpu->exec_state = cmdbuf->exec_state; - gpu->switch_context = false; + if (switch_context) { + etnaviv_cmd_select_pipe(gpu, buffer, exec_state); + gpu->exec_state = exec_state; } /* And the link to the submitted buffer */ @@ -421,4 +433,6 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, if (drm_debug & DRM_UT_DRIVER) etnaviv_buffer_dump(gpu, buffer, 0, 0x50); + + gpu->lastctx = cmdbuf->ctx; } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c b/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c index 6e3bbcf24160..68e6d3772ad8 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c @@ -78,6 +78,7 @@ static const struct { ST(0x17c0, 8), ST(0x17e0, 8), ST(0x2400, 14 * 16), + ST(0x3824, 1), ST(0x10800, 32 * 16), ST(0x14600, 16), ST(0x14800, 8 * 8), diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c index 66ac79558bbd..3746827f45eb 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c @@ -86,26 +86,11 @@ void etnaviv_cmdbuf_suballoc_destroy(struct etnaviv_cmdbuf_suballoc *suballoc) kfree(suballoc); } -struct etnaviv_cmdbuf * -etnaviv_cmdbuf_new(struct etnaviv_cmdbuf_suballoc *suballoc, u32 size, - size_t nr_bos, size_t nr_pmrs) +int etnaviv_cmdbuf_init(struct etnaviv_cmdbuf_suballoc *suballoc, + struct etnaviv_cmdbuf *cmdbuf, u32 size) { - struct etnaviv_cmdbuf *cmdbuf; - struct etnaviv_perfmon_request *pmrs; - size_t sz = size_vstruct(nr_bos, sizeof(cmdbuf->bo_map[0]), - sizeof(*cmdbuf)); int granule_offs, order, ret; - cmdbuf = kzalloc(sz, GFP_KERNEL); - if (!cmdbuf) - return NULL; - - sz = sizeof(*pmrs) * nr_pmrs; - pmrs = kzalloc(sz, GFP_KERNEL); - if (!pmrs) - goto out_free_cmdbuf; - - cmdbuf->pmrs = pmrs; cmdbuf->suballoc = suballoc; cmdbuf->size = size; @@ -123,7 +108,7 @@ retry: if (!ret) { dev_err(suballoc->gpu->dev, "Timeout waiting for cmdbuf space\n"); - return NULL; + return -ETIMEDOUT; } goto retry; } @@ -131,11 +116,7 @@ retry: cmdbuf->suballoc_offset = granule_offs * SUBALLOC_GRANULE; cmdbuf->vaddr = suballoc->vaddr + cmdbuf->suballoc_offset; - return cmdbuf; - -out_free_cmdbuf: - kfree(cmdbuf); - return NULL; + return 0; } void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf) @@ -151,8 +132,6 @@ void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf) suballoc->free_space = 1; mutex_unlock(&suballoc->lock); wake_up_all(&suballoc->free_event); - kfree(cmdbuf->pmrs); - kfree(cmdbuf); } u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h index b6348b9f2a9d..ddc3f7ea169c 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h @@ -33,27 +33,15 @@ struct etnaviv_cmdbuf { void *vaddr; u32 size; u32 user_size; - /* fence after which this buffer is to be disposed */ - struct dma_fence *fence; - /* target exec state */ - u32 exec_state; - /* per GPU in-flight list */ - struct list_head node; - /* perfmon requests */ - unsigned int nr_pmrs; - struct etnaviv_perfmon_request *pmrs; - /* BOs attached to this command buffer */ - unsigned int nr_bos; - struct etnaviv_vram_mapping *bo_map[0]; }; struct etnaviv_cmdbuf_suballoc * etnaviv_cmdbuf_suballoc_new(struct etnaviv_gpu * gpu); void etnaviv_cmdbuf_suballoc_destroy(struct etnaviv_cmdbuf_suballoc *suballoc); -struct etnaviv_cmdbuf * -etnaviv_cmdbuf_new(struct etnaviv_cmdbuf_suballoc *suballoc, u32 size, - size_t nr_bos, size_t nr_pmrs); + +int etnaviv_cmdbuf_init(struct etnaviv_cmdbuf_suballoc *suballoc, + struct etnaviv_cmdbuf *cmdbuf, u32 size); void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf); u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index 491eddf9b150..6faf4042db23 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -172,7 +172,7 @@ static int etnaviv_mmu_show(struct etnaviv_gpu *gpu, struct seq_file *m) static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu, struct seq_file *m) { - struct etnaviv_cmdbuf *buf = gpu->buffer; + struct etnaviv_cmdbuf *buf = &gpu->buffer; u32 size = buf->size; u32 *ptr = buf->vaddr; u32 i; @@ -459,9 +459,6 @@ static int etnaviv_ioctl_pm_query_dom(struct drm_device *dev, void *data, struct drm_etnaviv_pm_domain *args = data; struct etnaviv_gpu *gpu; - /* reject as long as the feature isn't stable */ - return -EINVAL; - if (args->pipe >= ETNA_MAX_PIPES) return -EINVAL; @@ -479,9 +476,6 @@ static int etnaviv_ioctl_pm_query_sig(struct drm_device *dev, void *data, struct drm_etnaviv_pm_signal *args = data; struct etnaviv_gpu *gpu; - /* reject as long as the feature isn't stable */ - return -EINVAL; - if (args->pipe >= ETNA_MAX_PIPES) return -EINVAL; @@ -556,7 +550,7 @@ static struct drm_driver etnaviv_drm_driver = { .desc = "etnaviv DRM", .date = "20151214", .major = 1, - .minor = 1, + .minor = 2, }; /* @@ -580,12 +574,6 @@ static int etnaviv_bind(struct device *dev) } drm->dev_private = priv; - priv->wq = alloc_ordered_workqueue("etnaviv", 0); - if (!priv->wq) { - ret = -ENOMEM; - goto out_wq; - } - mutex_init(&priv->gem_lock); INIT_LIST_HEAD(&priv->gem_list); priv->num_gpus = 0; @@ -607,9 +595,6 @@ static int etnaviv_bind(struct device *dev) out_register: component_unbind_all(dev, drm); out_bind: - flush_workqueue(priv->wq); - destroy_workqueue(priv->wq); -out_wq: kfree(priv); out_unref: drm_dev_unref(drm); @@ -624,9 +609,6 @@ static void etnaviv_unbind(struct device *dev) drm_dev_unregister(drm); - flush_workqueue(priv->wq); - destroy_workqueue(priv->wq); - component_unbind_all(dev, drm); drm->dev_private = NULL; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h index d249acb6da08..a54f0b758a5c 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h @@ -56,18 +56,8 @@ struct etnaviv_drm_private { /* list of GEM objects: */ struct mutex gem_lock; struct list_head gem_list; - - struct workqueue_struct *wq; }; -static inline void etnaviv_queue_work(struct drm_device *dev, - struct work_struct *w) -{ - struct etnaviv_drm_private *priv = dev->dev_private; - - queue_work(priv->wq, w); -} - int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, struct drm_file *file); @@ -97,8 +87,8 @@ u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu); u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe_addr); void etnaviv_buffer_end(struct etnaviv_gpu *gpu); void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event); -void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, - struct etnaviv_cmdbuf *cmdbuf); +void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state, + unsigned int event, struct etnaviv_cmdbuf *cmdbuf); void etnaviv_validate_init(void); bool etnaviv_cmd_validate_one(struct etnaviv_gpu *gpu, u32 *stream, unsigned int size, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c index 2d955d7d7b6d..6d0909c589d1 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c @@ -120,7 +120,7 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu) struct core_dump_iterator iter; struct etnaviv_vram_mapping *vram; struct etnaviv_gem_object *obj; - struct etnaviv_cmdbuf *cmd; + struct etnaviv_gem_submit *submit; unsigned int n_obj, n_bomap_pages; size_t file_size, mmu_size; __le64 *bomap, *bomap_start; @@ -132,11 +132,11 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu) n_bomap_pages = 0; file_size = ARRAY_SIZE(etnaviv_dump_registers) * sizeof(struct etnaviv_dump_registers) + - mmu_size + gpu->buffer->size; + mmu_size + gpu->buffer.size; /* Add in the active command buffers */ - list_for_each_entry(cmd, &gpu->active_cmd_list, node) { - file_size += cmd->size; + list_for_each_entry(submit, &gpu->active_submit_list, node) { + file_size += submit->cmdbuf.size; n_obj++; } @@ -176,13 +176,14 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu) etnaviv_core_dump_registers(&iter, gpu); etnaviv_core_dump_mmu(&iter, gpu, mmu_size); - etnaviv_core_dump_mem(&iter, ETDUMP_BUF_RING, gpu->buffer->vaddr, - gpu->buffer->size, - etnaviv_cmdbuf_get_va(gpu->buffer)); - - list_for_each_entry(cmd, &gpu->active_cmd_list, node) - etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD, cmd->vaddr, - cmd->size, etnaviv_cmdbuf_get_va(cmd)); + etnaviv_core_dump_mem(&iter, ETDUMP_BUF_RING, gpu->buffer.vaddr, + gpu->buffer.size, + etnaviv_cmdbuf_get_va(&gpu->buffer)); + + list_for_each_entry(submit, &gpu->active_submit_list, node) + etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD, + submit->cmdbuf.vaddr, submit->cmdbuf.size, + etnaviv_cmdbuf_get_va(&submit->cmdbuf)); /* Reserve space for the bomap */ if (n_bomap_pages) { diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index daee3f1196df..fcc969fa0e69 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -24,6 +24,9 @@ #include "etnaviv_gpu.h" #include "etnaviv_mmu.h" +static struct lock_class_key etnaviv_shm_lock_class; +static struct lock_class_key etnaviv_userptr_lock_class; + static void etnaviv_gem_scatter_map(struct etnaviv_gem_object *etnaviv_obj) { struct drm_device *dev = etnaviv_obj->base.dev; @@ -583,7 +586,7 @@ void etnaviv_gem_free_object(struct drm_gem_object *obj) kfree(etnaviv_obj); } -int etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj) +void etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj) { struct etnaviv_drm_private *priv = dev->dev_private; struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj); @@ -591,8 +594,6 @@ int etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj) mutex_lock(&priv->gem_lock); list_add_tail(&etnaviv_obj->gem_node, &priv->gem_list); mutex_unlock(&priv->gem_lock); - - return 0; } static int etnaviv_gem_new_impl(struct drm_device *dev, u32 size, u32 flags, @@ -640,8 +641,9 @@ static int etnaviv_gem_new_impl(struct drm_device *dev, u32 size, u32 flags, return 0; } -static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev, - u32 size, u32 flags) +/* convenience method to construct a GEM buffer object, and userspace handle */ +int etnaviv_gem_new_handle(struct drm_device *dev, struct drm_file *file, + u32 size, u32 flags, u32 *handle) { struct drm_gem_object *obj = NULL; int ret; @@ -653,6 +655,8 @@ static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev, if (ret) goto fail; + lockdep_set_class(&to_etnaviv_bo(obj)->lock, &etnaviv_shm_lock_class); + ret = drm_gem_object_init(dev, obj, size); if (ret == 0) { struct address_space *mapping; @@ -660,7 +664,7 @@ static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev, /* * Our buffers are kept pinned, so allocating them * from the MOVABLE zone is a really bad idea, and - * conflicts with CMA. See coments above new_inode() + * conflicts with CMA. See comments above new_inode() * why this is required _and_ expected if you're * going to pin these pages. */ @@ -672,33 +676,12 @@ static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev, if (ret) goto fail; - return obj; - -fail: - drm_gem_object_put_unlocked(obj); - return ERR_PTR(ret); -} - -/* convenience method to construct a GEM buffer object, and userspace handle */ -int etnaviv_gem_new_handle(struct drm_device *dev, struct drm_file *file, - u32 size, u32 flags, u32 *handle) -{ - struct drm_gem_object *obj; - int ret; - - obj = __etnaviv_gem_new(dev, size, flags); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - ret = etnaviv_gem_obj_add(dev, obj); - if (ret < 0) { - drm_gem_object_put_unlocked(obj); - return ret; - } + etnaviv_gem_obj_add(dev, obj); ret = drm_gem_handle_create(file, obj, handle); /* drop reference from allocate - handle holds it now */ +fail: drm_gem_object_put_unlocked(obj); return ret; @@ -722,139 +705,41 @@ int etnaviv_gem_new_private(struct drm_device *dev, size_t size, u32 flags, return 0; } -struct get_pages_work { - struct work_struct work; - struct mm_struct *mm; - struct task_struct *task; - struct etnaviv_gem_object *etnaviv_obj; -}; - -static struct page **etnaviv_gem_userptr_do_get_pages( - struct etnaviv_gem_object *etnaviv_obj, struct mm_struct *mm, struct task_struct *task) -{ - int ret = 0, pinned, npages = etnaviv_obj->base.size >> PAGE_SHIFT; - struct page **pvec; - uintptr_t ptr; - unsigned int flags = 0; - - pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); - if (!pvec) - return ERR_PTR(-ENOMEM); - - if (!etnaviv_obj->userptr.ro) - flags |= FOLL_WRITE; - - pinned = 0; - ptr = etnaviv_obj->userptr.ptr; - - down_read(&mm->mmap_sem); - while (pinned < npages) { - ret = get_user_pages_remote(task, mm, ptr, npages - pinned, - flags, pvec + pinned, NULL, NULL); - if (ret < 0) - break; - - ptr += ret * PAGE_SIZE; - pinned += ret; - } - up_read(&mm->mmap_sem); - - if (ret < 0) { - release_pages(pvec, pinned); - kvfree(pvec); - return ERR_PTR(ret); - } - - return pvec; -} - -static void __etnaviv_gem_userptr_get_pages(struct work_struct *_work) -{ - struct get_pages_work *work = container_of(_work, typeof(*work), work); - struct etnaviv_gem_object *etnaviv_obj = work->etnaviv_obj; - struct page **pvec; - - pvec = etnaviv_gem_userptr_do_get_pages(etnaviv_obj, work->mm, work->task); - - mutex_lock(&etnaviv_obj->lock); - if (IS_ERR(pvec)) { - etnaviv_obj->userptr.work = ERR_CAST(pvec); - } else { - etnaviv_obj->userptr.work = NULL; - etnaviv_obj->pages = pvec; - } - - mutex_unlock(&etnaviv_obj->lock); - drm_gem_object_put_unlocked(&etnaviv_obj->base); - - mmput(work->mm); - put_task_struct(work->task); - kfree(work); -} - static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj) { struct page **pvec = NULL; - struct get_pages_work *work; - struct mm_struct *mm; - int ret, pinned, npages = etnaviv_obj->base.size >> PAGE_SHIFT; - - if (etnaviv_obj->userptr.work) { - if (IS_ERR(etnaviv_obj->userptr.work)) { - ret = PTR_ERR(etnaviv_obj->userptr.work); - etnaviv_obj->userptr.work = NULL; - } else { - ret = -EAGAIN; - } - return ret; - } + struct etnaviv_gem_userptr *userptr = &etnaviv_obj->userptr; + int ret, pinned = 0, npages = etnaviv_obj->base.size >> PAGE_SHIFT; - mm = get_task_mm(etnaviv_obj->userptr.task); - pinned = 0; - if (mm == current->mm) { - pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); - if (!pvec) { - mmput(mm); - return -ENOMEM; - } - - pinned = __get_user_pages_fast(etnaviv_obj->userptr.ptr, npages, - !etnaviv_obj->userptr.ro, pvec); - if (pinned < 0) { - kvfree(pvec); - mmput(mm); - return pinned; - } - - if (pinned == npages) { - etnaviv_obj->pages = pvec; - mmput(mm); - return 0; - } - } + might_lock_read(¤t->mm->mmap_sem); - release_pages(pvec, pinned); - kvfree(pvec); + if (userptr->mm != current->mm) + return -EPERM; - work = kmalloc(sizeof(*work), GFP_KERNEL); - if (!work) { - mmput(mm); + pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); + if (!pvec) return -ENOMEM; - } - get_task_struct(current); - drm_gem_object_get(&etnaviv_obj->base); + do { + unsigned num_pages = npages - pinned; + uint64_t ptr = userptr->ptr + pinned * PAGE_SIZE; + struct page **pages = pvec + pinned; - work->mm = mm; - work->task = current; - work->etnaviv_obj = etnaviv_obj; + ret = get_user_pages_fast(ptr, num_pages, + !userptr->ro ? FOLL_WRITE : 0, pages); + if (ret < 0) { + release_pages(pvec, pinned); + kvfree(pvec); + return ret; + } + + pinned += ret; - etnaviv_obj->userptr.work = &work->work; - INIT_WORK(&work->work, __etnaviv_gem_userptr_get_pages); + } while (pinned < npages); - etnaviv_queue_work(etnaviv_obj->base.dev, &work->work); + etnaviv_obj->pages = pvec; - return -EAGAIN; + return 0; } static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj) @@ -870,7 +755,6 @@ static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj) release_pages(etnaviv_obj->pages, npages); kvfree(etnaviv_obj->pages); } - put_task_struct(etnaviv_obj->userptr.task); } static int etnaviv_gem_userptr_mmap_obj(struct etnaviv_gem_object *etnaviv_obj, @@ -897,17 +781,16 @@ int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file, if (ret) return ret; + lockdep_set_class(&etnaviv_obj->lock, &etnaviv_userptr_lock_class); + etnaviv_obj->userptr.ptr = ptr; - etnaviv_obj->userptr.task = current; + etnaviv_obj->userptr.mm = current->mm; etnaviv_obj->userptr.ro = !(flags & ETNA_USERPTR_WRITE); - get_task_struct(current); - ret = etnaviv_gem_obj_add(dev, &etnaviv_obj->base); - if (ret) - goto unreference; + etnaviv_gem_obj_add(dev, &etnaviv_obj->base); ret = drm_gem_handle_create(file, &etnaviv_obj->base, handle); -unreference: + /* drop reference from allocate - handle holds it now */ drm_gem_object_put_unlocked(&etnaviv_obj->base); return ret; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h index e437fba1209d..be72a9833f2b 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h @@ -18,6 +18,7 @@ #define __ETNAVIV_GEM_H__ #include <linux/reservation.h> +#include "etnaviv_cmdbuf.h" #include "etnaviv_drv.h" struct dma_fence; @@ -26,8 +27,7 @@ struct etnaviv_gem_object; struct etnaviv_gem_userptr { uintptr_t ptr; - struct task_struct *task; - struct work_struct *work; + struct mm_struct *mm; bool ro; }; @@ -98,26 +98,32 @@ struct etnaviv_gem_submit_bo { /* Created per submit-ioctl, to track bo's and cmdstream bufs, etc, * associated with the cmdstream submission for synchronization (and - * make it easier to unwind when things go wrong, etc). This only - * lasts for the duration of the submit-ioctl. + * make it easier to unwind when things go wrong, etc). */ struct etnaviv_gem_submit { - struct drm_device *dev; + struct kref refcount; struct etnaviv_gpu *gpu; - struct ww_acquire_ctx ticket; - struct dma_fence *fence; + struct dma_fence *out_fence, *in_fence; + struct list_head node; /* GPU active submit list */ + struct etnaviv_cmdbuf cmdbuf; + bool runtime_resumed; + u32 exec_state; u32 flags; + unsigned int nr_pmrs; + struct etnaviv_perfmon_request *pmrs; unsigned int nr_bos; struct etnaviv_gem_submit_bo bos[0]; /* No new members here, the previous one is variable-length! */ }; +void etnaviv_submit_put(struct etnaviv_gem_submit * submit); + int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj, struct timespec *timeout); int etnaviv_gem_new_private(struct drm_device *dev, size_t size, u32 flags, struct reservation_object *robj, const struct etnaviv_gem_ops *ops, struct etnaviv_gem_object **res); -int etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj); +void etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj); struct page **etnaviv_gem_get_pages(struct etnaviv_gem_object *obj); void etnaviv_gem_put_pages(struct etnaviv_gem_object *obj); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c index ae884723e9b1..5704305d41e6 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c @@ -19,6 +19,7 @@ #include "etnaviv_drv.h" #include "etnaviv_gem.h" +static struct lock_class_key etnaviv_prime_lock_class; struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj) { @@ -125,6 +126,8 @@ struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev, if (ret < 0) return ERR_PTR(ret); + lockdep_set_class(&etnaviv_obj->lock, &etnaviv_prime_lock_class); + npages = size / PAGE_SIZE; etnaviv_obj->sgt = sgt; @@ -139,9 +142,7 @@ struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev, if (ret) goto fail; - ret = etnaviv_gem_obj_add(dev, &etnaviv_obj->base); - if (ret) - goto fail; + etnaviv_gem_obj_add(dev, &etnaviv_obj->base); return &etnaviv_obj->base; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index ff911541a190..1f8202bca061 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -33,22 +33,25 @@ #define BO_PINNED 0x2000 static struct etnaviv_gem_submit *submit_create(struct drm_device *dev, - struct etnaviv_gpu *gpu, size_t nr) + struct etnaviv_gpu *gpu, size_t nr_bos, size_t nr_pmrs) { struct etnaviv_gem_submit *submit; - size_t sz = size_vstruct(nr, sizeof(submit->bos[0]), sizeof(*submit)); + size_t sz = size_vstruct(nr_bos, sizeof(submit->bos[0]), sizeof(*submit)); - submit = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); - if (submit) { - submit->dev = dev; - submit->gpu = gpu; + submit = kzalloc(sz, GFP_KERNEL); + if (!submit) + return NULL; - /* initially, until copy_from_user() and bo lookup succeeds: */ - submit->nr_bos = 0; - submit->fence = NULL; - - ww_acquire_init(&submit->ticket, &reservation_ww_class); + submit->pmrs = kcalloc(nr_pmrs, sizeof(struct etnaviv_perfmon_request), + GFP_KERNEL); + if (!submit->pmrs) { + kfree(submit); + return NULL; } + submit->nr_pmrs = nr_pmrs; + + submit->gpu = gpu; + kref_init(&submit->refcount); return submit; } @@ -111,7 +114,8 @@ static void submit_unlock_object(struct etnaviv_gem_submit *submit, int i) } } -static int submit_lock_objects(struct etnaviv_gem_submit *submit) +static int submit_lock_objects(struct etnaviv_gem_submit *submit, + struct ww_acquire_ctx *ticket) { int contended, slow_locked = -1, i, ret = 0; @@ -126,7 +130,7 @@ retry: if (!(submit->bos[i].flags & BO_LOCKED)) { ret = ww_mutex_lock_interruptible(&etnaviv_obj->resv->lock, - &submit->ticket); + ticket); if (ret == -EALREADY) DRM_ERROR("BO at index %u already on submit list\n", i); @@ -136,7 +140,7 @@ retry: } } - ww_acquire_done(&submit->ticket); + ww_acquire_done(ticket); return 0; @@ -154,7 +158,7 @@ fail: /* we lost out in a seqno race, lock and retry.. */ ret = ww_mutex_lock_slow_interruptible(&etnaviv_obj->resv->lock, - &submit->ticket); + ticket); if (!ret) { submit->bos[contended].flags |= BO_LOCKED; slow_locked = contended; @@ -181,19 +185,33 @@ static int submit_fence_sync(const struct etnaviv_gem_submit *submit) break; } + if (submit->flags & ETNA_SUBMIT_FENCE_FD_IN) { + /* + * Wait if the fence is from a foreign context, or if the fence + * array contains any fence from a foreign context. + */ + if (!dma_fence_match_context(submit->in_fence, context)) + ret = dma_fence_wait(submit->in_fence, true); + } + return ret; } -static void submit_unpin_objects(struct etnaviv_gem_submit *submit) +static void submit_attach_object_fences(struct etnaviv_gem_submit *submit) { int i; for (i = 0; i < submit->nr_bos; i++) { - if (submit->bos[i].flags & BO_PINNED) - etnaviv_gem_mapping_unreference(submit->bos[i].mapping); + struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj; + + if (submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE) + reservation_object_add_excl_fence(etnaviv_obj->resv, + submit->out_fence); + else + reservation_object_add_shared_fence(etnaviv_obj->resv, + submit->out_fence); - submit->bos[i].mapping = NULL; - submit->bos[i].flags &= ~BO_PINNED; + submit_unlock_object(submit, i); } } @@ -211,6 +229,7 @@ static int submit_pin_objects(struct etnaviv_gem_submit *submit) ret = PTR_ERR(mapping); break; } + atomic_inc(&etnaviv_obj->gpu_active); submit->bos[i].flags |= BO_PINNED; submit->bos[i].mapping = mapping; @@ -285,13 +304,11 @@ static int submit_reloc(struct etnaviv_gem_submit *submit, void *stream, } static int submit_perfmon_validate(struct etnaviv_gem_submit *submit, - struct etnaviv_cmdbuf *cmdbuf, - const struct drm_etnaviv_gem_submit_pmr *pmrs, - u32 nr_pms) + u32 exec_state, const struct drm_etnaviv_gem_submit_pmr *pmrs) { u32 i; - for (i = 0; i < nr_pms; i++) { + for (i = 0; i < submit->nr_pmrs; i++) { const struct drm_etnaviv_gem_submit_pmr *r = pmrs + i; struct etnaviv_gem_submit_bo *bo; int ret; @@ -316,39 +333,65 @@ static int submit_perfmon_validate(struct etnaviv_gem_submit *submit, return -EINVAL; } - if (etnaviv_pm_req_validate(r, cmdbuf->exec_state)) { + if (etnaviv_pm_req_validate(r, exec_state)) { DRM_ERROR("perfmon request: domain or signal not valid"); return -EINVAL; } - cmdbuf->pmrs[i].flags = r->flags; - cmdbuf->pmrs[i].domain = r->domain; - cmdbuf->pmrs[i].signal = r->signal; - cmdbuf->pmrs[i].sequence = r->sequence; - cmdbuf->pmrs[i].offset = r->read_offset; - cmdbuf->pmrs[i].bo_vma = etnaviv_gem_vmap(&bo->obj->base); + submit->pmrs[i].flags = r->flags; + submit->pmrs[i].domain = r->domain; + submit->pmrs[i].signal = r->signal; + submit->pmrs[i].sequence = r->sequence; + submit->pmrs[i].offset = r->read_offset; + submit->pmrs[i].bo_vma = etnaviv_gem_vmap(&bo->obj->base); } return 0; } -static void submit_cleanup(struct etnaviv_gem_submit *submit) +static void submit_cleanup(struct kref *kref) { + struct etnaviv_gem_submit *submit = + container_of(kref, struct etnaviv_gem_submit, refcount); unsigned i; + if (submit->runtime_resumed) + pm_runtime_put_autosuspend(submit->gpu->dev); + + if (submit->cmdbuf.suballoc) + etnaviv_cmdbuf_free(&submit->cmdbuf); + for (i = 0; i < submit->nr_bos; i++) { struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj; + /* unpin all objects */ + if (submit->bos[i].flags & BO_PINNED) { + etnaviv_gem_mapping_unreference(submit->bos[i].mapping); + atomic_dec(&etnaviv_obj->gpu_active); + submit->bos[i].mapping = NULL; + submit->bos[i].flags &= ~BO_PINNED; + } + + /* if the GPU submit failed, objects might still be locked */ submit_unlock_object(submit, i); drm_gem_object_put_unlocked(&etnaviv_obj->base); } - ww_acquire_fini(&submit->ticket); - if (submit->fence) - dma_fence_put(submit->fence); + wake_up_all(&submit->gpu->fence_event); + + if (submit->in_fence) + dma_fence_put(submit->in_fence); + if (submit->out_fence) + dma_fence_put(submit->out_fence); + kfree(submit->pmrs); kfree(submit); } +void etnaviv_submit_put(struct etnaviv_gem_submit *submit) +{ + kref_put(&submit->refcount, submit_cleanup); +} + int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, struct drm_file *file) { @@ -358,10 +401,9 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, struct drm_etnaviv_gem_submit_pmr *pmrs; struct drm_etnaviv_gem_submit_bo *bos; struct etnaviv_gem_submit *submit; - struct etnaviv_cmdbuf *cmdbuf; struct etnaviv_gpu *gpu; - struct dma_fence *in_fence = NULL; struct sync_file *sync_file = NULL; + struct ww_acquire_ctx ticket; int out_fence_fd = -1; void *stream; int ret; @@ -399,17 +441,11 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, relocs = kvmalloc_array(args->nr_relocs, sizeof(*relocs), GFP_KERNEL); pmrs = kvmalloc_array(args->nr_pmrs, sizeof(*pmrs), GFP_KERNEL); stream = kvmalloc_array(1, args->stream_size, GFP_KERNEL); - cmdbuf = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc, - ALIGN(args->stream_size, 8) + 8, - args->nr_bos, args->nr_pmrs); - if (!bos || !relocs || !pmrs || !stream || !cmdbuf) { + if (!bos || !relocs || !pmrs || !stream) { ret = -ENOMEM; goto err_submit_cmds; } - cmdbuf->exec_state = args->exec_state; - cmdbuf->ctx = file->driver_priv; - ret = copy_from_user(bos, u64_to_user_ptr(args->bos), args->nr_bos * sizeof(*bos)); if (ret) { @@ -430,7 +466,6 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, ret = -EFAULT; goto err_submit_cmds; } - cmdbuf->nr_pmrs = args->nr_pmrs; ret = copy_from_user(stream, u64_to_user_ptr(args->stream), args->stream_size); @@ -447,19 +482,28 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, } } - submit = submit_create(dev, gpu, args->nr_bos); + ww_acquire_init(&ticket, &reservation_ww_class); + + submit = submit_create(dev, gpu, args->nr_bos, args->nr_pmrs); if (!submit) { ret = -ENOMEM; - goto err_submit_cmds; + goto err_submit_ww_acquire; } + ret = etnaviv_cmdbuf_init(gpu->cmdbuf_suballoc, &submit->cmdbuf, + ALIGN(args->stream_size, 8) + 8); + if (ret) + goto err_submit_objects; + + submit->cmdbuf.ctx = file->driver_priv; + submit->exec_state = args->exec_state; submit->flags = args->flags; ret = submit_lookup_objects(submit, file, bos, args->nr_bos); if (ret) goto err_submit_objects; - ret = submit_lock_objects(submit); + ret = submit_lock_objects(submit, &ticket); if (ret) goto err_submit_objects; @@ -470,21 +514,11 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, } if (args->flags & ETNA_SUBMIT_FENCE_FD_IN) { - in_fence = sync_file_get_fence(args->fence_fd); - if (!in_fence) { + submit->in_fence = sync_file_get_fence(args->fence_fd); + if (!submit->in_fence) { ret = -EINVAL; goto err_submit_objects; } - - /* - * Wait if the fence is from a foreign context, or if the fence - * array contains any fence from a foreign context. - */ - if (!dma_fence_match_context(in_fence, gpu->fence_context)) { - ret = dma_fence_wait(in_fence, true); - if (ret) - goto err_submit_objects; - } } ret = submit_fence_sync(submit); @@ -493,25 +527,25 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, ret = submit_pin_objects(submit); if (ret) - goto out; + goto err_submit_objects; ret = submit_reloc(submit, stream, args->stream_size / 4, relocs, args->nr_relocs); if (ret) - goto out; + goto err_submit_objects; - ret = submit_perfmon_validate(submit, cmdbuf, pmrs, args->nr_pmrs); + ret = submit_perfmon_validate(submit, args->exec_state, pmrs); if (ret) - goto out; + goto err_submit_objects; - memcpy(cmdbuf->vaddr, stream, args->stream_size); - cmdbuf->user_size = ALIGN(args->stream_size, 8); + memcpy(submit->cmdbuf.vaddr, stream, args->stream_size); + submit->cmdbuf.user_size = ALIGN(args->stream_size, 8); - ret = etnaviv_gpu_submit(gpu, submit, cmdbuf); + ret = etnaviv_gpu_submit(gpu, submit); if (ret) - goto out; + goto err_submit_objects; - cmdbuf = NULL; + submit_attach_object_fences(submit); if (args->flags & ETNA_SUBMIT_FENCE_FD_OUT) { /* @@ -520,39 +554,26 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, * fence to the sync file here, eliminating the ENOMEM * possibility at this stage. */ - sync_file = sync_file_create(submit->fence); + sync_file = sync_file_create(submit->out_fence); if (!sync_file) { ret = -ENOMEM; - goto out; + goto err_submit_objects; } fd_install(out_fence_fd, sync_file->file); } args->fence_fd = out_fence_fd; - args->fence = submit->fence->seqno; - -out: - submit_unpin_objects(submit); - - /* - * If we're returning -EAGAIN, it may be due to the userptr code - * wanting to run its workqueue outside of any locks. Flush our - * workqueue to ensure that it is run in a timely manner. - */ - if (ret == -EAGAIN) - flush_workqueue(priv->wq); + args->fence = submit->out_fence->seqno; err_submit_objects: - if (in_fence) - dma_fence_put(in_fence); - submit_cleanup(submit); + etnaviv_submit_put(submit); + +err_submit_ww_acquire: + ww_acquire_fini(&ticket); err_submit_cmds: if (ret && (out_fence_fd >= 0)) put_unused_fd(out_fence_fd); - /* if we still own the cmdbuf */ - if (cmdbuf) - etnaviv_cmdbuf_free(cmdbuf); if (stream) kvfree(stream); if (bos) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index e19cbe05da2a..21d0d22f1168 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -644,7 +644,7 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu) prefetch = etnaviv_buffer_init(gpu); gpu_write(gpu, VIVS_HI_INTR_ENBL, ~0U); - etnaviv_gpu_start_fe(gpu, etnaviv_cmdbuf_get_va(gpu->buffer), + etnaviv_gpu_start_fe(gpu, etnaviv_cmdbuf_get_va(&gpu->buffer), prefetch); } @@ -717,15 +717,15 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) } /* Create buffer: */ - gpu->buffer = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc, PAGE_SIZE, 0, 0); - if (!gpu->buffer) { - ret = -ENOMEM; + ret = etnaviv_cmdbuf_init(gpu->cmdbuf_suballoc, &gpu->buffer, + PAGE_SIZE); + if (ret) { dev_err(gpu->dev, "could not create command buffer\n"); goto destroy_iommu; } if (gpu->mmu->version == ETNAVIV_IOMMU_V1 && - etnaviv_cmdbuf_get_va(gpu->buffer) > 0x80000000) { + etnaviv_cmdbuf_get_va(&gpu->buffer) > 0x80000000) { ret = -EINVAL; dev_err(gpu->dev, "command buffer outside valid memory window\n"); @@ -751,8 +751,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) return 0; free_buffer: - etnaviv_cmdbuf_free(gpu->buffer); - gpu->buffer = NULL; + etnaviv_cmdbuf_free(&gpu->buffer); destroy_iommu: etnaviv_iommu_destroy(gpu->mmu); gpu->mmu = NULL; @@ -958,7 +957,7 @@ static void recover_worker(struct work_struct *work) pm_runtime_put_autosuspend(gpu->dev); /* Retire the buffer objects in a work */ - etnaviv_queue_work(gpu->drm, &gpu->retire_work); + queue_work(gpu->wq, &gpu->retire_work); } static void hangcheck_timer_reset(struct etnaviv_gpu *gpu) @@ -994,7 +993,7 @@ static void hangcheck_handler(struct timer_list *t) dev_err(gpu->dev, " completed fence: %u\n", fence); dev_err(gpu->dev, " active fence: %u\n", gpu->active_fence); - etnaviv_queue_work(gpu->drm, &gpu->recover_work); + queue_work(gpu->wq, &gpu->recover_work); } /* if still more pending work, reset the hangcheck timer: */ @@ -1201,42 +1200,23 @@ static void retire_worker(struct work_struct *work) struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu, retire_work); u32 fence = gpu->completed_fence; - struct etnaviv_cmdbuf *cmdbuf, *tmp; - unsigned int i; + struct etnaviv_gem_submit *submit, *tmp; + LIST_HEAD(retire_list); mutex_lock(&gpu->lock); - list_for_each_entry_safe(cmdbuf, tmp, &gpu->active_cmd_list, node) { - if (!dma_fence_is_signaled(cmdbuf->fence)) + list_for_each_entry_safe(submit, tmp, &gpu->active_submit_list, node) { + if (!dma_fence_is_signaled(submit->out_fence)) break; - list_del(&cmdbuf->node); - dma_fence_put(cmdbuf->fence); - - for (i = 0; i < cmdbuf->nr_bos; i++) { - struct etnaviv_vram_mapping *mapping = cmdbuf->bo_map[i]; - struct etnaviv_gem_object *etnaviv_obj = mapping->object; - - atomic_dec(&etnaviv_obj->gpu_active); - /* drop the refcount taken in etnaviv_gpu_submit */ - etnaviv_gem_mapping_unreference(mapping); - } - - etnaviv_cmdbuf_free(cmdbuf); - /* - * We need to balance the runtime PM count caused by - * each submission. Upon submission, we increment - * the runtime PM counter, and allocate one event. - * So here, we put the runtime PM count for each - * completed event. - */ - pm_runtime_put_autosuspend(gpu->dev); + list_move(&submit->node, &retire_list); } gpu->retired_fence = fence; mutex_unlock(&gpu->lock); - wake_up_all(&gpu->fence_event); + list_for_each_entry_safe(submit, tmp, &retire_list, node) + etnaviv_submit_put(submit); } int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu, @@ -1295,41 +1275,25 @@ int etnaviv_gpu_wait_obj_inactive(struct etnaviv_gpu *gpu, ret = wait_event_interruptible_timeout(gpu->fence_event, !is_active(etnaviv_obj), remaining); - if (ret > 0) { - struct etnaviv_drm_private *priv = gpu->drm->dev_private; - - /* Synchronise with the retire worker */ - flush_workqueue(priv->wq); + if (ret > 0) return 0; - } else if (ret == -ERESTARTSYS) { + else if (ret == -ERESTARTSYS) return -ERESTARTSYS; - } else { + else return -ETIMEDOUT; - } -} - -int etnaviv_gpu_pm_get_sync(struct etnaviv_gpu *gpu) -{ - return pm_runtime_get_sync(gpu->dev); -} - -void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu) -{ - pm_runtime_mark_last_busy(gpu->dev); - pm_runtime_put_autosuspend(gpu->dev); } static void sync_point_perfmon_sample(struct etnaviv_gpu *gpu, struct etnaviv_event *event, unsigned int flags) { - const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf; + const struct etnaviv_gem_submit *submit = event->submit; unsigned int i; - for (i = 0; i < cmdbuf->nr_pmrs; i++) { - const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i; + for (i = 0; i < submit->nr_pmrs; i++) { + const struct etnaviv_perfmon_request *pmr = submit->pmrs + i; if (pmr->flags == flags) - etnaviv_perfmon_process(gpu, pmr); + etnaviv_perfmon_process(gpu, pmr, submit->exec_state); } } @@ -1354,14 +1318,14 @@ static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu, static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu, struct etnaviv_event *event) { - const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf; + const struct etnaviv_gem_submit *submit = event->submit; unsigned int i; u32 val; sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_POST); - for (i = 0; i < cmdbuf->nr_pmrs; i++) { - const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i; + for (i = 0; i < submit->nr_pmrs; i++) { + const struct etnaviv_perfmon_request *pmr = submit->pmrs + i; *pmr->bo_vma = pmr->sequence; } @@ -1380,24 +1344,15 @@ static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu, /* add bo's to gpu's ring, and kick gpu: */ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, - struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf) + struct etnaviv_gem_submit *submit) { - struct dma_fence *fence; unsigned int i, nr_events = 1, event[3]; int ret; - ret = etnaviv_gpu_pm_get_sync(gpu); + ret = pm_runtime_get_sync(gpu->dev); if (ret < 0) return ret; - - /* - * TODO - * - * - flush - * - data endian - * - prefetch - * - */ + submit->runtime_resumed = true; /* * if there are performance monitor requests we need to have @@ -1406,19 +1361,19 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, * - a sync point to re-configure gpu, process ETNA_PM_PROCESS_POST requests * and update the sequence number for userspace. */ - if (cmdbuf->nr_pmrs) + if (submit->nr_pmrs) nr_events = 3; ret = event_alloc(gpu, nr_events, event); if (ret) { DRM_ERROR("no free events\n"); - goto out_pm_put; + return ret; } mutex_lock(&gpu->lock); - fence = etnaviv_gpu_fence_alloc(gpu); - if (!fence) { + submit->out_fence = etnaviv_gpu_fence_alloc(gpu); + if (!submit->out_fence) { for (i = 0; i < nr_events; i++) event_free(gpu, event[i]); @@ -1426,80 +1381,51 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, goto out_unlock; } - gpu->event[event[0]].fence = fence; - submit->fence = dma_fence_get(fence); - gpu->active_fence = submit->fence->seqno; + gpu->active_fence = submit->out_fence->seqno; - if (gpu->lastctx != cmdbuf->ctx) { - gpu->mmu->need_flush = true; - gpu->switch_context = true; - gpu->lastctx = cmdbuf->ctx; - } - - if (cmdbuf->nr_pmrs) { + if (submit->nr_pmrs) { gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre; - gpu->event[event[1]].cmdbuf = cmdbuf; + kref_get(&submit->refcount); + gpu->event[event[1]].submit = submit; etnaviv_sync_point_queue(gpu, event[1]); } - etnaviv_buffer_queue(gpu, event[0], cmdbuf); + kref_get(&submit->refcount); + gpu->event[event[0]].fence = submit->out_fence; + etnaviv_buffer_queue(gpu, submit->exec_state, event[0], + &submit->cmdbuf); - if (cmdbuf->nr_pmrs) { + if (submit->nr_pmrs) { gpu->event[event[2]].sync_point = &sync_point_perfmon_sample_post; - gpu->event[event[2]].cmdbuf = cmdbuf; + kref_get(&submit->refcount); + gpu->event[event[2]].submit = submit; etnaviv_sync_point_queue(gpu, event[2]); } - cmdbuf->fence = fence; - list_add_tail(&cmdbuf->node, &gpu->active_cmd_list); - - /* We're committed to adding this command buffer, hold a PM reference */ - pm_runtime_get_noresume(gpu->dev); - - for (i = 0; i < submit->nr_bos; i++) { - struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj; + list_add_tail(&submit->node, &gpu->active_submit_list); - /* Each cmdbuf takes a refcount on the mapping */ - etnaviv_gem_mapping_reference(submit->bos[i].mapping); - cmdbuf->bo_map[i] = submit->bos[i].mapping; - atomic_inc(&etnaviv_obj->gpu_active); - - if (submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE) - reservation_object_add_excl_fence(etnaviv_obj->resv, - fence); - else - reservation_object_add_shared_fence(etnaviv_obj->resv, - fence); - } - cmdbuf->nr_bos = submit->nr_bos; hangcheck_timer_reset(gpu); ret = 0; out_unlock: mutex_unlock(&gpu->lock); -out_pm_put: - etnaviv_gpu_pm_put(gpu); - return ret; } -static void etnaviv_process_sync_point(struct etnaviv_gpu *gpu, - struct etnaviv_event *event) -{ - u32 addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS); - - event->sync_point(gpu, event); - etnaviv_gpu_start_fe(gpu, addr + 2, 2); -} - static void sync_point_worker(struct work_struct *work) { struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu, sync_point_work); + struct etnaviv_event *event = &gpu->event[gpu->sync_point_event]; + u32 addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS); - etnaviv_process_sync_point(gpu, &gpu->event[gpu->sync_point_event]); + event->sync_point(gpu, event); + etnaviv_submit_put(event->submit); event_free(gpu, gpu->sync_point_event); + + /* restart FE last to avoid GPU and IRQ racing against this worker */ + etnaviv_gpu_start_fe(gpu, addr + 2, 2); } /* @@ -1550,7 +1476,7 @@ static irqreturn_t irq_handler(int irq, void *data) if (gpu->event[event].sync_point) { gpu->sync_point_event = event; - etnaviv_queue_work(gpu->drm, &gpu->sync_point_work); + queue_work(gpu->wq, &gpu->sync_point_work); } fence = gpu->event[event].fence; @@ -1576,7 +1502,7 @@ static irqreturn_t irq_handler(int irq, void *data) } /* Retire the buffer objects in a work */ - etnaviv_queue_work(gpu->drm, &gpu->retire_work); + queue_work(gpu->wq, &gpu->retire_work); ret = IRQ_HANDLED; } @@ -1653,9 +1579,11 @@ int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, unsigned int timeout_ms) static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu) { - if (gpu->buffer) { + if (gpu->buffer.suballoc) { /* Replace the last WAIT with END */ + mutex_lock(&gpu->lock); etnaviv_buffer_end(gpu); + mutex_unlock(&gpu->lock); /* * We know that only the FE is busy here, this should @@ -1680,7 +1608,7 @@ static int etnaviv_gpu_hw_resume(struct etnaviv_gpu *gpu) etnaviv_gpu_update_clock(gpu); etnaviv_gpu_hw_init(gpu); - gpu->switch_context = true; + gpu->lastctx = NULL; gpu->exec_state = -1; mutex_unlock(&gpu->lock); @@ -1738,20 +1666,29 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master, struct etnaviv_gpu *gpu = dev_get_drvdata(dev); int ret; - if (IS_ENABLED(CONFIG_THERMAL)) { + if (IS_ENABLED(CONFIG_DRM_ETNAVIV_THERMAL)) { gpu->cooling = thermal_of_cooling_device_register(dev->of_node, (char *)dev_name(dev), gpu, &cooling_ops); if (IS_ERR(gpu->cooling)) return PTR_ERR(gpu->cooling); } + gpu->wq = alloc_ordered_workqueue(dev_name(dev), 0); + if (!gpu->wq) { + if (IS_ENABLED(CONFIG_DRM_ETNAVIV_THERMAL)) + thermal_cooling_device_unregister(gpu->cooling); + return -ENOMEM; + } + #ifdef CONFIG_PM ret = pm_runtime_get_sync(gpu->dev); #else ret = etnaviv_gpu_clk_enable(gpu); #endif if (ret < 0) { - thermal_cooling_device_unregister(gpu->cooling); + destroy_workqueue(gpu->wq); + if (IS_ENABLED(CONFIG_DRM_ETNAVIV_THERMAL)) + thermal_cooling_device_unregister(gpu->cooling); return ret; } @@ -1759,7 +1696,7 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master, gpu->fence_context = dma_fence_context_alloc(1); spin_lock_init(&gpu->fence_spinlock); - INIT_LIST_HEAD(&gpu->active_cmd_list); + INIT_LIST_HEAD(&gpu->active_submit_list); INIT_WORK(&gpu->retire_work, retire_worker); INIT_WORK(&gpu->sync_point_work, sync_point_worker); INIT_WORK(&gpu->recover_work, recover_worker); @@ -1784,6 +1721,9 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master, hangcheck_disable(gpu); + flush_workqueue(gpu->wq); + destroy_workqueue(gpu->wq); + #ifdef CONFIG_PM pm_runtime_get_sync(gpu->dev); pm_runtime_put_sync_suspend(gpu->dev); @@ -1791,10 +1731,8 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master, etnaviv_gpu_hw_suspend(gpu); #endif - if (gpu->buffer) { - etnaviv_cmdbuf_free(gpu->buffer); - gpu->buffer = NULL; - } + if (gpu->buffer.suballoc) + etnaviv_cmdbuf_free(&gpu->buffer); if (gpu->cmdbuf_suballoc) { etnaviv_cmdbuf_suballoc_destroy(gpu->cmdbuf_suballoc); @@ -1808,7 +1746,8 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master, gpu->drm = NULL; - thermal_cooling_device_unregister(gpu->cooling); + if (IS_ENABLED(CONFIG_DRM_ETNAVIV_THERMAL)) + thermal_cooling_device_unregister(gpu->cooling); gpu->cooling = NULL; } @@ -1931,7 +1870,7 @@ static int etnaviv_gpu_rpm_resume(struct device *dev) return ret; /* Re-initialise the basic hardware state */ - if (gpu->drm && gpu->buffer) { + if (gpu->drm && gpu->buffer.suballoc) { ret = etnaviv_gpu_hw_resume(gpu); if (ret) { etnaviv_gpu_clk_disable(gpu); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index 4f10f147297a..7623905210dc 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -20,6 +20,7 @@ #include <linux/clk.h> #include <linux/regulator/consumer.h> +#include "etnaviv_cmdbuf.h" #include "etnaviv_drv.h" struct etnaviv_gem_submit; @@ -89,7 +90,7 @@ struct etnaviv_chip_identity { struct etnaviv_event { struct dma_fence *fence; - struct etnaviv_cmdbuf *cmdbuf; + struct etnaviv_gem_submit *submit; void (*sync_point)(struct etnaviv_gpu *gpu, struct etnaviv_event *event); }; @@ -106,10 +107,10 @@ struct etnaviv_gpu { struct mutex lock; struct etnaviv_chip_identity identity; struct etnaviv_file_private *lastctx; - bool switch_context; + struct workqueue_struct *wq; /* 'ring'-buffer: */ - struct etnaviv_cmdbuf *buffer; + struct etnaviv_cmdbuf buffer; int exec_state; /* bus base address of memory */ @@ -122,7 +123,7 @@ struct etnaviv_gpu { spinlock_t event_spinlock; /* list of currently in-flight command buffers */ - struct list_head active_cmd_list; + struct list_head active_submit_list; u32 idle_mask; @@ -202,7 +203,7 @@ int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu, int etnaviv_gpu_wait_obj_inactive(struct etnaviv_gpu *gpu, struct etnaviv_gem_object *etnaviv_obj, struct timespec *timeout); int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, - struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf); + struct etnaviv_gem_submit *submit); int etnaviv_gpu_pm_get_sync(struct etnaviv_gpu *gpu); void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu); int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, unsigned int timeout_ms); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c index 14e24ac6573f..7a8c94731748 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c @@ -70,9 +70,8 @@ static int __etnaviv_iommu_init(struct etnaviv_iommuv1_domain *etnaviv_domain) return -ENOMEM; } - for (i = 0; i < PT_ENTRIES; i++) - etnaviv_domain->pgtable_cpu[i] = - etnaviv_domain->base.bad_page_dma; + memset32(etnaviv_domain->pgtable_cpu, etnaviv_domain->base.bad_page_dma, + PT_ENTRIES); return 0; } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c index fc60fc8ddbf0..1e956e266aa3 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c @@ -229,7 +229,7 @@ void etnaviv_iommuv2_restore(struct etnaviv_gpu *gpu) prefetch = etnaviv_buffer_config_mmuv2(gpu, (u32)etnaviv_domain->mtlb_dma, (u32)etnaviv_domain->base.bad_page_dma); - etnaviv_gpu_start_fe(gpu, (u32)etnaviv_cmdbuf_get_pa(gpu->buffer), + etnaviv_gpu_start_fe(gpu, (u32)etnaviv_cmdbuf_get_pa(&gpu->buffer), prefetch); etnaviv_gpu_wait_idle(gpu, 100); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c index 35074b944778..d113fe06e6b5 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c @@ -263,18 +263,16 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu, if (iova < 0x80000000 - sg_dma_len(sgt->sgl)) { mapping->iova = iova; list_add_tail(&mapping->mmu_node, &mmu->mappings); - mutex_unlock(&mmu->lock); - return 0; + ret = 0; + goto unlock; } } node = &mapping->vram_node; ret = etnaviv_iommu_find_iova(mmu, node, etnaviv_obj->base.size); - if (ret < 0) { - mutex_unlock(&mmu->lock); - return ret; - } + if (ret < 0) + goto unlock; mmu->last_iova = node->start + etnaviv_obj->base.size; mapping->iova = node->start; @@ -283,12 +281,12 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu, if (ret < 0) { drm_mm_remove_node(node); - mutex_unlock(&mmu->lock); - return ret; + goto unlock; } list_add_tail(&mapping->mmu_node, &mmu->mappings); mmu->need_flush = true; +unlock: mutex_unlock(&mmu->lock); return ret; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c index 768f5aafdd18..26dddfc41aac 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c @@ -479,9 +479,9 @@ int etnaviv_pm_req_validate(const struct drm_etnaviv_gem_submit_pmr *r, } void etnaviv_perfmon_process(struct etnaviv_gpu *gpu, - const struct etnaviv_perfmon_request *pmr) + const struct etnaviv_perfmon_request *pmr, u32 exec_state) { - const struct etnaviv_pm_domain_meta *meta = &doms_meta[gpu->exec_state]; + const struct etnaviv_pm_domain_meta *meta = &doms_meta[exec_state]; const struct etnaviv_pm_domain *dom; const struct etnaviv_pm_signal *sig; u32 *bo = pmr->bo_vma; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h index 35dce194cb00..c1653c64ab6b 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h @@ -44,6 +44,6 @@ int etnaviv_pm_req_validate(const struct drm_etnaviv_gem_submit_pmr *r, u32 exec_state); void etnaviv_perfmon_process(struct etnaviv_gpu *gpu, - const struct etnaviv_perfmon_request *pmr); + const struct etnaviv_perfmon_request *pmr, u32 exec_state); #endif /* __ETNAVIV_PERFMON_H__ */ diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig index 5a7c9d8abd6b..735ce47688f9 100644 --- a/drivers/gpu/drm/exynos/Kconfig +++ b/drivers/gpu/drm/exynos/Kconfig @@ -95,26 +95,21 @@ config DRM_EXYNOS_G2D help Choose this option if you want to use Exynos G2D for DRM. -config DRM_EXYNOS_IPP - bool "Image Post Processor" - help - Choose this option if you want to use IPP feature for DRM. - config DRM_EXYNOS_FIMC bool "FIMC" - depends on DRM_EXYNOS_IPP && MFD_SYSCON + depends on BROKEN && MFD_SYSCON help Choose this option if you want to use Exynos FIMC for DRM. config DRM_EXYNOS_ROTATOR bool "Rotator" - depends on DRM_EXYNOS_IPP + depends on BROKEN help Choose this option if you want to use Exynos Rotator for DRM. config DRM_EXYNOS_GSC bool "GScaler" - depends on DRM_EXYNOS_IPP && ARCH_EXYNOS5 && VIDEO_SAMSUNG_EXYNOS_GSC=n + depends on BROKEN && ARCH_EXYNOS5 && VIDEO_SAMSUNG_EXYNOS_GSC=n help Choose this option if you want to use Exynos GSC for DRM. diff --git a/drivers/gpu/drm/exynos/Makefile b/drivers/gpu/drm/exynos/Makefile index bdf4212dde7b..a51c5459bb13 100644 --- a/drivers/gpu/drm/exynos/Makefile +++ b/drivers/gpu/drm/exynos/Makefile @@ -18,7 +18,6 @@ exynosdrm-$(CONFIG_DRM_EXYNOS_MIXER) += exynos_mixer.o exynosdrm-$(CONFIG_DRM_EXYNOS_HDMI) += exynos_hdmi.o exynosdrm-$(CONFIG_DRM_EXYNOS_VIDI) += exynos_drm_vidi.o exynosdrm-$(CONFIG_DRM_EXYNOS_G2D) += exynos_drm_g2d.o -exynosdrm-$(CONFIG_DRM_EXYNOS_IPP) += exynos_drm_ipp.o exynosdrm-$(CONFIG_DRM_EXYNOS_FIMC) += exynos_drm_fimc.o exynosdrm-$(CONFIG_DRM_EXYNOS_ROTATOR) += exynos_drm_rotator.o exynosdrm-$(CONFIG_DRM_EXYNOS_GSC) += exynos_drm_gsc.o diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 6be5b53c3b27..1c330f2a7a5d 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -21,13 +21,12 @@ #include <linux/pm_runtime.h> #include <linux/regmap.h> -#include <video/exynos5433_decon.h> - #include "exynos_drm_drv.h" #include "exynos_drm_crtc.h" #include "exynos_drm_fb.h" #include "exynos_drm_plane.h" #include "exynos_drm_iommu.h" +#include "regs-decon5433.h" #define DSD_CFG_MUX 0x1004 #define DSD_CFG_MUX_TE_UNMASK_GLOBAL BIT(13) @@ -744,11 +743,6 @@ static int exynos5433_decon_probe(struct platform_device *pdev) } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(dev, "cannot find IO resource\n"); - return -ENXIO; - } - ctx->addr = devm_ioremap_resource(dev, res); if (IS_ERR(ctx->addr)) { dev_err(dev, "ioremap failed\n"); diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index 615efcf7782a..3931d5e33fe0 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -25,13 +25,13 @@ #include <video/of_display_timing.h> #include <video/of_videomode.h> -#include <video/exynos7_decon.h> #include "exynos_drm_crtc.h" #include "exynos_drm_plane.h" #include "exynos_drm_drv.h" #include "exynos_drm_fb.h" #include "exynos_drm_iommu.h" +#include "regs-decon7.h" /* * DECON stands for Display and Enhancement controller. diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index b96bd5a781b2..a518e9c6d6cc 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -29,7 +29,6 @@ #include "exynos_drm_plane.h" #include "exynos_drm_vidi.h" #include "exynos_drm_g2d.h" -#include "exynos_drm_ipp.h" #include "exynos_drm_iommu.h" #define DRIVER_NAME "exynos" @@ -109,14 +108,6 @@ static const struct drm_ioctl_desc exynos_ioctls[] = { DRM_AUTH | DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(EXYNOS_G2D_EXEC, exynos_g2d_exec_ioctl, DRM_AUTH | DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_PROPERTY, exynos_drm_ipp_get_property, - DRM_AUTH | DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(EXYNOS_IPP_SET_PROPERTY, exynos_drm_ipp_set_property, - DRM_AUTH | DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(EXYNOS_IPP_QUEUE_BUF, exynos_drm_ipp_queue_buf, - DRM_AUTH | DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(EXYNOS_IPP_CMD_CTRL, exynos_drm_ipp_cmd_ctrl, - DRM_AUTH | DRM_RENDER_ALLOW), }; static const struct file_operations exynos_drm_driver_fops = { @@ -257,9 +248,6 @@ static struct exynos_drm_driver_info exynos_drm_drivers[] = { }, { DRV_PTR(gsc_driver, CONFIG_DRM_EXYNOS_GSC), }, { - DRV_PTR(ipp_driver, CONFIG_DRM_EXYNOS_IPP), - DRM_VIRTUAL_DEVICE - }, { &exynos_drm_platform_driver, DRM_VIRTUAL_DEVICE } diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index 589d465a7f88..df2262f70d91 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -188,7 +188,6 @@ struct exynos_drm_g2d_private { struct drm_exynos_file_private { struct exynos_drm_g2d_private *g2d_priv; - struct device *ipp_dev; }; /* @@ -291,6 +290,5 @@ extern struct platform_driver g2d_driver; extern struct platform_driver fimc_driver; extern struct platform_driver rotator_driver; extern struct platform_driver gsc_driver; -extern struct platform_driver ipp_driver; extern struct platform_driver mic_driver; #endif diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c deleted file mode 100644 index 3edda18cc2d2..000000000000 --- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c +++ /dev/null @@ -1,1806 +0,0 @@ -/* - * Copyright (C) 2012 Samsung Electronics Co.Ltd - * Authors: - * Eunchul Kim <[email protected]> - * Jinyoung Jeon <[email protected]> - * Sangmin Lee <[email protected]> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - */ -#include <linux/kernel.h> -#include <linux/platform_device.h> -#include <linux/types.h> -#include <linux/clk.h> -#include <linux/pm_runtime.h> - -#include <drm/drmP.h> -#include <drm/exynos_drm.h> -#include "exynos_drm_drv.h" -#include "exynos_drm_gem.h" -#include "exynos_drm_ipp.h" -#include "exynos_drm_iommu.h" - -/* - * IPP stands for Image Post Processing and - * supports image scaler/rotator and input/output DMA operations. - * using FIMC, GSC, Rotator, so on. - * IPP is integration device driver of same attribute h/w - */ - -/* - * TODO - * 1. expand command control id. - * 2. integrate property and config. - * 3. removed send_event id check routine. - * 4. compare send_event id if needed. - * 5. free subdrv_remove notifier callback list if needed. - * 6. need to check subdrv_open about multi-open. - * 7. need to power_on implement power and sysmmu ctrl. - */ - -#define get_ipp_context(dev) platform_get_drvdata(to_platform_device(dev)) -#define ipp_is_m2m_cmd(c) (c == IPP_CMD_M2M) - -/* - * A structure of event. - * - * @base: base of event. - * @event: ipp event. - */ -struct drm_exynos_ipp_send_event { - struct drm_pending_event base; - struct drm_exynos_ipp_event event; -}; - -/* - * A structure of memory node. - * - * @list: list head to memory queue information. - * @ops_id: id of operations. - * @prop_id: id of property. - * @buf_id: id of buffer. - * @buf_info: gem objects and dma address, size. - * @filp: a pointer to drm_file. - */ -struct drm_exynos_ipp_mem_node { - struct list_head list; - enum drm_exynos_ops_id ops_id; - u32 prop_id; - u32 buf_id; - struct drm_exynos_ipp_buf_info buf_info; -}; - -/* - * A structure of ipp context. - * - * @subdrv: prepare initialization using subdrv. - * @ipp_lock: lock for synchronization of access to ipp_idr. - * @prop_lock: lock for synchronization of access to prop_idr. - * @ipp_idr: ipp driver idr. - * @prop_idr: property idr. - * @event_workq: event work queue. - * @cmd_workq: command work queue. - */ -struct ipp_context { - struct exynos_drm_subdrv subdrv; - struct mutex ipp_lock; - struct mutex prop_lock; - struct idr ipp_idr; - struct idr prop_idr; - struct workqueue_struct *event_workq; - struct workqueue_struct *cmd_workq; -}; - -static LIST_HEAD(exynos_drm_ippdrv_list); -static DEFINE_MUTEX(exynos_drm_ippdrv_lock); -static BLOCKING_NOTIFIER_HEAD(exynos_drm_ippnb_list); - -int exynos_drm_ippdrv_register(struct exynos_drm_ippdrv *ippdrv) -{ - mutex_lock(&exynos_drm_ippdrv_lock); - list_add_tail(&ippdrv->drv_list, &exynos_drm_ippdrv_list); - mutex_unlock(&exynos_drm_ippdrv_lock); - - return 0; -} - -int exynos_drm_ippdrv_unregister(struct exynos_drm_ippdrv *ippdrv) -{ - mutex_lock(&exynos_drm_ippdrv_lock); - list_del(&ippdrv->drv_list); - mutex_unlock(&exynos_drm_ippdrv_lock); - - return 0; -} - -static int ipp_create_id(struct idr *id_idr, struct mutex *lock, void *obj) -{ - int ret; - - mutex_lock(lock); - ret = idr_alloc(id_idr, obj, 1, 0, GFP_KERNEL); - mutex_unlock(lock); - - return ret; -} - -static void ipp_remove_id(struct idr *id_idr, struct mutex *lock, u32 id) -{ - mutex_lock(lock); - idr_remove(id_idr, id); - mutex_unlock(lock); -} - -static void *ipp_find_obj(struct idr *id_idr, struct mutex *lock, u32 id) -{ - void *obj; - - mutex_lock(lock); - obj = idr_find(id_idr, id); - mutex_unlock(lock); - - return obj; -} - -static int ipp_check_driver(struct exynos_drm_ippdrv *ippdrv, - struct drm_exynos_ipp_property *property) -{ - if (ippdrv->dedicated || (!ipp_is_m2m_cmd(property->cmd) && - !pm_runtime_suspended(ippdrv->dev))) - return -EBUSY; - - if (ippdrv->check_property && - ippdrv->check_property(ippdrv->dev, property)) - return -EINVAL; - - return 0; -} - -static struct exynos_drm_ippdrv *ipp_find_driver(struct ipp_context *ctx, - struct drm_exynos_ipp_property *property) -{ - struct exynos_drm_ippdrv *ippdrv; - u32 ipp_id = property->ipp_id; - int ret; - - if (ipp_id) { - ippdrv = ipp_find_obj(&ctx->ipp_idr, &ctx->ipp_lock, ipp_id); - if (!ippdrv) { - DRM_DEBUG("ipp%d driver not found\n", ipp_id); - return ERR_PTR(-ENODEV); - } - - ret = ipp_check_driver(ippdrv, property); - if (ret < 0) { - DRM_DEBUG("ipp%d driver check error %d\n", ipp_id, ret); - return ERR_PTR(ret); - } - - return ippdrv; - } else { - list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) { - ret = ipp_check_driver(ippdrv, property); - if (ret == 0) - return ippdrv; - } - - DRM_DEBUG("cannot find driver suitable for given property.\n"); - } - - return ERR_PTR(-ENODEV); -} - -static struct exynos_drm_ippdrv *ipp_find_drv_by_handle(u32 prop_id) -{ - struct exynos_drm_ippdrv *ippdrv; - struct drm_exynos_ipp_cmd_node *c_node; - int count = 0; - - DRM_DEBUG_KMS("prop_id[%d]\n", prop_id); - - /* - * This case is search ipp driver by prop_id handle. - * sometimes, ipp subsystem find driver by prop_id. - * e.g PAUSE state, queue buf, command control. - */ - list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) { - DRM_DEBUG_KMS("count[%d]ippdrv[%pK]\n", count++, ippdrv); - - mutex_lock(&ippdrv->cmd_lock); - list_for_each_entry(c_node, &ippdrv->cmd_list, list) { - if (c_node->property.prop_id == prop_id) { - mutex_unlock(&ippdrv->cmd_lock); - return ippdrv; - } - } - mutex_unlock(&ippdrv->cmd_lock); - } - - return ERR_PTR(-ENODEV); -} - -int exynos_drm_ipp_get_property(struct drm_device *drm_dev, void *data, - struct drm_file *file) -{ - struct drm_exynos_file_private *file_priv = file->driver_priv; - struct device *dev = file_priv->ipp_dev; - struct ipp_context *ctx = get_ipp_context(dev); - struct drm_exynos_ipp_prop_list *prop_list = data; - struct exynos_drm_ippdrv *ippdrv; - int count = 0; - - if (!ctx) { - DRM_ERROR("invalid context.\n"); - return -EINVAL; - } - - if (!prop_list) { - DRM_ERROR("invalid property parameter.\n"); - return -EINVAL; - } - - DRM_DEBUG_KMS("ipp_id[%d]\n", prop_list->ipp_id); - - if (!prop_list->ipp_id) { - list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) - count++; - - /* - * Supports ippdrv list count for user application. - * First step user application getting ippdrv count. - * and second step getting ippdrv capability using ipp_id. - */ - prop_list->count = count; - } else { - /* - * Getting ippdrv capability by ipp_id. - * some device not supported wb, output interface. - * so, user application detect correct ipp driver - * using this ioctl. - */ - ippdrv = ipp_find_obj(&ctx->ipp_idr, &ctx->ipp_lock, - prop_list->ipp_id); - if (!ippdrv) { - DRM_ERROR("not found ipp%d driver.\n", - prop_list->ipp_id); - return -ENODEV; - } - - *prop_list = ippdrv->prop_list; - } - - return 0; -} - -static void ipp_print_property(struct drm_exynos_ipp_property *property, - int idx) -{ - struct drm_exynos_ipp_config *config = &property->config[idx]; - struct drm_exynos_pos *pos = &config->pos; - struct drm_exynos_sz *sz = &config->sz; - - DRM_DEBUG_KMS("prop_id[%d]ops[%s]fmt[0x%x]\n", - property->prop_id, idx ? "dst" : "src", config->fmt); - - DRM_DEBUG_KMS("pos[%d %d %d %d]sz[%d %d]f[%d]r[%d]\n", - pos->x, pos->y, pos->w, pos->h, - sz->hsize, sz->vsize, config->flip, config->degree); -} - -static struct drm_exynos_ipp_cmd_work *ipp_create_cmd_work(void) -{ - struct drm_exynos_ipp_cmd_work *cmd_work; - - cmd_work = kzalloc(sizeof(*cmd_work), GFP_KERNEL); - if (!cmd_work) - return ERR_PTR(-ENOMEM); - - INIT_WORK((struct work_struct *)cmd_work, ipp_sched_cmd); - - return cmd_work; -} - -static struct drm_exynos_ipp_event_work *ipp_create_event_work(void) -{ - struct drm_exynos_ipp_event_work *event_work; - - event_work = kzalloc(sizeof(*event_work), GFP_KERNEL); - if (!event_work) - return ERR_PTR(-ENOMEM); - - INIT_WORK(&event_work->work, ipp_sched_event); - - return event_work; -} - -int exynos_drm_ipp_set_property(struct drm_device *drm_dev, void *data, - struct drm_file *file) -{ - struct drm_exynos_file_private *file_priv = file->driver_priv; - struct device *dev = file_priv->ipp_dev; - struct ipp_context *ctx = get_ipp_context(dev); - struct drm_exynos_ipp_property *property = data; - struct exynos_drm_ippdrv *ippdrv; - struct drm_exynos_ipp_cmd_node *c_node; - u32 prop_id; - int ret, i; - - if (!ctx) { - DRM_ERROR("invalid context.\n"); - return -EINVAL; - } - - if (!property) { - DRM_ERROR("invalid property parameter.\n"); - return -EINVAL; - } - - prop_id = property->prop_id; - - /* - * This is log print for user application property. - * user application set various property. - */ - for_each_ipp_ops(i) - ipp_print_property(property, i); - - /* - * In case prop_id is not zero try to set existing property. - */ - if (prop_id) { - c_node = ipp_find_obj(&ctx->prop_idr, &ctx->prop_lock, prop_id); - - if (!c_node || c_node->filp != file) { - DRM_DEBUG_KMS("prop_id[%d] not found\n", prop_id); - return -EINVAL; - } - - if (c_node->state != IPP_STATE_STOP) { - DRM_DEBUG_KMS("prop_id[%d] not stopped\n", prop_id); - return -EINVAL; - } - - c_node->property = *property; - - return 0; - } - - /* find ipp driver using ipp id */ - ippdrv = ipp_find_driver(ctx, property); - if (IS_ERR(ippdrv)) { - DRM_ERROR("failed to get ipp driver.\n"); - return -EINVAL; - } - - /* allocate command node */ - c_node = kzalloc(sizeof(*c_node), GFP_KERNEL); - if (!c_node) - return -ENOMEM; - - ret = ipp_create_id(&ctx->prop_idr, &ctx->prop_lock, c_node); - if (ret < 0) { - DRM_ERROR("failed to create id.\n"); - goto err_clear; - } - property->prop_id = ret; - - DRM_DEBUG_KMS("created prop_id[%d]cmd[%d]ippdrv[%pK]\n", - property->prop_id, property->cmd, ippdrv); - - /* stored property information and ippdrv in private data */ - c_node->property = *property; - c_node->state = IPP_STATE_IDLE; - c_node->filp = file; - - c_node->start_work = ipp_create_cmd_work(); - if (IS_ERR(c_node->start_work)) { - DRM_ERROR("failed to create start work.\n"); - ret = PTR_ERR(c_node->start_work); - goto err_remove_id; - } - - c_node->stop_work = ipp_create_cmd_work(); - if (IS_ERR(c_node->stop_work)) { - DRM_ERROR("failed to create stop work.\n"); - ret = PTR_ERR(c_node->stop_work); - goto err_free_start; - } - - c_node->event_work = ipp_create_event_work(); - if (IS_ERR(c_node->event_work)) { - DRM_ERROR("failed to create event work.\n"); - ret = PTR_ERR(c_node->event_work); - goto err_free_stop; - } - - mutex_init(&c_node->lock); - mutex_init(&c_node->mem_lock); - mutex_init(&c_node->event_lock); - - init_completion(&c_node->start_complete); - init_completion(&c_node->stop_complete); - - for_each_ipp_ops(i) - INIT_LIST_HEAD(&c_node->mem_list[i]); - - INIT_LIST_HEAD(&c_node->event_list); - mutex_lock(&ippdrv->cmd_lock); - list_add_tail(&c_node->list, &ippdrv->cmd_list); - mutex_unlock(&ippdrv->cmd_lock); - - /* make dedicated state without m2m */ - if (!ipp_is_m2m_cmd(property->cmd)) - ippdrv->dedicated = true; - - return 0; - -err_free_stop: - kfree(c_node->stop_work); -err_free_start: - kfree(c_node->start_work); -err_remove_id: - ipp_remove_id(&ctx->prop_idr, &ctx->prop_lock, property->prop_id); -err_clear: - kfree(c_node); - return ret; -} - -static int ipp_validate_mem_node(struct drm_device *drm_dev, - struct drm_exynos_ipp_mem_node *m_node, - struct drm_exynos_ipp_cmd_node *c_node) -{ - struct drm_exynos_ipp_config *ipp_cfg; - unsigned int num_plane; - unsigned long size, buf_size = 0, plane_size, img_size = 0; - unsigned int bpp, width, height; - int i; - - ipp_cfg = &c_node->property.config[m_node->ops_id]; - num_plane = drm_format_num_planes(ipp_cfg->fmt); - - /** - * This is a rather simplified validation of a memory node. - * It basically verifies provided gem object handles - * and the buffer sizes with respect to current configuration. - * This is not the best that can be done - * but it seems more than enough - */ - for (i = 0; i < num_plane; ++i) { - width = ipp_cfg->sz.hsize; - height = ipp_cfg->sz.vsize; - bpp = drm_format_plane_cpp(ipp_cfg->fmt, i); - - /* - * The result of drm_format_plane_cpp() for chroma planes must - * be used with drm_format_xxxx_chroma_subsampling() for - * correct result. - */ - if (i > 0) { - width /= drm_format_horz_chroma_subsampling( - ipp_cfg->fmt); - height /= drm_format_vert_chroma_subsampling( - ipp_cfg->fmt); - } - plane_size = width * height * bpp; - img_size += plane_size; - - if (m_node->buf_info.handles[i]) { - size = exynos_drm_gem_get_size(drm_dev, - m_node->buf_info.handles[i], - c_node->filp); - if (plane_size > size) { - DRM_ERROR( - "buffer %d is smaller than required\n", - i); - return -EINVAL; - } - - buf_size += size; - } - } - - if (buf_size < img_size) { - DRM_ERROR("size of buffers(%lu) is smaller than image(%lu)\n", - buf_size, img_size); - return -EINVAL; - } - - return 0; -} - -static int ipp_put_mem_node(struct drm_device *drm_dev, - struct drm_exynos_ipp_cmd_node *c_node, - struct drm_exynos_ipp_mem_node *m_node) -{ - int i; - - DRM_DEBUG_KMS("node[%pK]\n", m_node); - - if (!m_node) { - DRM_ERROR("invalid dequeue node.\n"); - return -EFAULT; - } - - DRM_DEBUG_KMS("ops_id[%d]\n", m_node->ops_id); - - /* put gem buffer */ - for_each_ipp_planar(i) { - unsigned long handle = m_node->buf_info.handles[i]; - if (handle) - exynos_drm_gem_put_dma_addr(drm_dev, handle, - c_node->filp); - } - - list_del(&m_node->list); - kfree(m_node); - - return 0; -} - -static struct drm_exynos_ipp_mem_node - *ipp_get_mem_node(struct drm_device *drm_dev, - struct drm_exynos_ipp_cmd_node *c_node, - struct drm_exynos_ipp_queue_buf *qbuf) -{ - struct drm_exynos_ipp_mem_node *m_node; - struct drm_exynos_ipp_buf_info *buf_info; - int i; - - m_node = kzalloc(sizeof(*m_node), GFP_KERNEL); - if (!m_node) - return ERR_PTR(-ENOMEM); - - buf_info = &m_node->buf_info; - - /* operations, buffer id */ - m_node->ops_id = qbuf->ops_id; - m_node->prop_id = qbuf->prop_id; - m_node->buf_id = qbuf->buf_id; - INIT_LIST_HEAD(&m_node->list); - - DRM_DEBUG_KMS("m_node[%pK]ops_id[%d]\n", m_node, qbuf->ops_id); - DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]\n", qbuf->prop_id, m_node->buf_id); - - for_each_ipp_planar(i) { - DRM_DEBUG_KMS("i[%d]handle[0x%x]\n", i, qbuf->handle[i]); - - /* get dma address by handle */ - if (qbuf->handle[i]) { - dma_addr_t *addr; - - addr = exynos_drm_gem_get_dma_addr(drm_dev, - qbuf->handle[i], c_node->filp); - if (IS_ERR(addr)) { - DRM_ERROR("failed to get addr.\n"); - ipp_put_mem_node(drm_dev, c_node, m_node); - return ERR_PTR(-EFAULT); - } - - buf_info->handles[i] = qbuf->handle[i]; - buf_info->base[i] = *addr; - DRM_DEBUG_KMS("i[%d]base[%pad]hd[0x%lx]\n", i, - &buf_info->base[i], buf_info->handles[i]); - } - } - - mutex_lock(&c_node->mem_lock); - if (ipp_validate_mem_node(drm_dev, m_node, c_node)) { - ipp_put_mem_node(drm_dev, c_node, m_node); - mutex_unlock(&c_node->mem_lock); - return ERR_PTR(-EFAULT); - } - list_add_tail(&m_node->list, &c_node->mem_list[qbuf->ops_id]); - mutex_unlock(&c_node->mem_lock); - - return m_node; -} - -static void ipp_clean_mem_nodes(struct drm_device *drm_dev, - struct drm_exynos_ipp_cmd_node *c_node, int ops) -{ - struct drm_exynos_ipp_mem_node *m_node, *tm_node; - struct list_head *head = &c_node->mem_list[ops]; - - mutex_lock(&c_node->mem_lock); - - list_for_each_entry_safe(m_node, tm_node, head, list) { - int ret; - - ret = ipp_put_mem_node(drm_dev, c_node, m_node); - if (ret) - DRM_ERROR("failed to put m_node.\n"); - } - - mutex_unlock(&c_node->mem_lock); -} - -static int ipp_get_event(struct drm_device *drm_dev, - struct drm_exynos_ipp_cmd_node *c_node, - struct drm_exynos_ipp_queue_buf *qbuf) -{ - struct drm_exynos_ipp_send_event *e; - int ret; - - DRM_DEBUG_KMS("ops_id[%d]buf_id[%d]\n", qbuf->ops_id, qbuf->buf_id); - - e = kzalloc(sizeof(*e), GFP_KERNEL); - if (!e) - return -ENOMEM; - - /* make event */ - e->event.base.type = DRM_EXYNOS_IPP_EVENT; - e->event.base.length = sizeof(e->event); - e->event.user_data = qbuf->user_data; - e->event.prop_id = qbuf->prop_id; - e->event.buf_id[EXYNOS_DRM_OPS_DST] = qbuf->buf_id; - - ret = drm_event_reserve_init(drm_dev, c_node->filp, &e->base, &e->event.base); - if (ret) { - kfree(e); - return ret; - } - - mutex_lock(&c_node->event_lock); - list_add_tail(&e->base.link, &c_node->event_list); - mutex_unlock(&c_node->event_lock); - - return 0; -} - -static void ipp_put_event(struct drm_exynos_ipp_cmd_node *c_node, - struct drm_exynos_ipp_queue_buf *qbuf) -{ - struct drm_exynos_ipp_send_event *e, *te; - int count = 0; - - mutex_lock(&c_node->event_lock); - list_for_each_entry_safe(e, te, &c_node->event_list, base.link) { - DRM_DEBUG_KMS("count[%d]e[%pK]\n", count++, e); - - /* - * qbuf == NULL condition means all event deletion. - * stop operations want to delete all event list. - * another case delete only same buf id. - */ - if (!qbuf) { - /* delete list */ - list_del(&e->base.link); - kfree(e); - } - - /* compare buffer id */ - if (qbuf && (qbuf->buf_id == - e->event.buf_id[EXYNOS_DRM_OPS_DST])) { - /* delete list */ - list_del(&e->base.link); - kfree(e); - goto out_unlock; - } - } - -out_unlock: - mutex_unlock(&c_node->event_lock); - return; -} - -static void ipp_clean_cmd_node(struct ipp_context *ctx, - struct drm_exynos_ipp_cmd_node *c_node) -{ - int i; - - /* cancel works */ - cancel_work_sync(&c_node->start_work->work); - cancel_work_sync(&c_node->stop_work->work); - cancel_work_sync(&c_node->event_work->work); - - /* put event */ - ipp_put_event(c_node, NULL); - - for_each_ipp_ops(i) - ipp_clean_mem_nodes(ctx->subdrv.drm_dev, c_node, i); - - /* delete list */ - list_del(&c_node->list); - - ipp_remove_id(&ctx->prop_idr, &ctx->prop_lock, - c_node->property.prop_id); - - /* destroy mutex */ - mutex_destroy(&c_node->lock); - mutex_destroy(&c_node->mem_lock); - mutex_destroy(&c_node->event_lock); - - /* free command node */ - kfree(c_node->start_work); - kfree(c_node->stop_work); - kfree(c_node->event_work); - kfree(c_node); -} - -static bool ipp_check_mem_list(struct drm_exynos_ipp_cmd_node *c_node) -{ - switch (c_node->property.cmd) { - case IPP_CMD_WB: - return !list_empty(&c_node->mem_list[EXYNOS_DRM_OPS_DST]); - case IPP_CMD_OUTPUT: - return !list_empty(&c_node->mem_list[EXYNOS_DRM_OPS_SRC]); - case IPP_CMD_M2M: - default: - return !list_empty(&c_node->mem_list[EXYNOS_DRM_OPS_SRC]) && - !list_empty(&c_node->mem_list[EXYNOS_DRM_OPS_DST]); - } -} - -static struct drm_exynos_ipp_mem_node - *ipp_find_mem_node(struct drm_exynos_ipp_cmd_node *c_node, - struct drm_exynos_ipp_queue_buf *qbuf) -{ - struct drm_exynos_ipp_mem_node *m_node; - struct list_head *head; - int count = 0; - - DRM_DEBUG_KMS("buf_id[%d]\n", qbuf->buf_id); - - /* source/destination memory list */ - head = &c_node->mem_list[qbuf->ops_id]; - - /* find memory node from memory list */ - list_for_each_entry(m_node, head, list) { - DRM_DEBUG_KMS("count[%d]m_node[%pK]\n", count++, m_node); - - /* compare buffer id */ - if (m_node->buf_id == qbuf->buf_id) - return m_node; - } - - return NULL; -} - -static int ipp_set_mem_node(struct exynos_drm_ippdrv *ippdrv, - struct drm_exynos_ipp_cmd_node *c_node, - struct drm_exynos_ipp_mem_node *m_node) -{ - struct exynos_drm_ipp_ops *ops = NULL; - int ret = 0; - - DRM_DEBUG_KMS("node[%pK]\n", m_node); - - if (!m_node) { - DRM_ERROR("invalid queue node.\n"); - return -EFAULT; - } - - DRM_DEBUG_KMS("ops_id[%d]\n", m_node->ops_id); - - /* get operations callback */ - ops = ippdrv->ops[m_node->ops_id]; - if (!ops) { - DRM_ERROR("not support ops.\n"); - return -EFAULT; - } - - /* set address and enable irq */ - if (ops->set_addr) { - ret = ops->set_addr(ippdrv->dev, &m_node->buf_info, - m_node->buf_id, IPP_BUF_ENQUEUE); - if (ret) { - DRM_ERROR("failed to set addr.\n"); - return ret; - } - } - - return ret; -} - -static void ipp_handle_cmd_work(struct device *dev, - struct exynos_drm_ippdrv *ippdrv, - struct drm_exynos_ipp_cmd_work *cmd_work, - struct drm_exynos_ipp_cmd_node *c_node) -{ - struct ipp_context *ctx = get_ipp_context(dev); - - cmd_work->ippdrv = ippdrv; - cmd_work->c_node = c_node; - queue_work(ctx->cmd_workq, &cmd_work->work); -} - -static int ipp_queue_buf_with_run(struct device *dev, - struct drm_exynos_ipp_cmd_node *c_node, - struct drm_exynos_ipp_mem_node *m_node, - struct drm_exynos_ipp_queue_buf *qbuf) -{ - struct exynos_drm_ippdrv *ippdrv; - struct drm_exynos_ipp_property *property; - struct exynos_drm_ipp_ops *ops; - int ret; - - ippdrv = ipp_find_drv_by_handle(qbuf->prop_id); - if (IS_ERR(ippdrv)) { - DRM_ERROR("failed to get ipp driver.\n"); - return -EFAULT; - } - - ops = ippdrv->ops[qbuf->ops_id]; - if (!ops) { - DRM_ERROR("failed to get ops.\n"); - return -EFAULT; - } - - property = &c_node->property; - - if (c_node->state != IPP_STATE_START) { - DRM_DEBUG_KMS("bypass for invalid state.\n"); - return 0; - } - - mutex_lock(&c_node->mem_lock); - if (!ipp_check_mem_list(c_node)) { - mutex_unlock(&c_node->mem_lock); - DRM_DEBUG_KMS("empty memory.\n"); - return 0; - } - - /* - * If set destination buffer and enabled clock, - * then m2m operations need start operations at queue_buf - */ - if (ipp_is_m2m_cmd(property->cmd)) { - struct drm_exynos_ipp_cmd_work *cmd_work = c_node->start_work; - - cmd_work->ctrl = IPP_CTRL_PLAY; - ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node); - } else { - ret = ipp_set_mem_node(ippdrv, c_node, m_node); - if (ret) { - mutex_unlock(&c_node->mem_lock); - DRM_ERROR("failed to set m node.\n"); - return ret; - } - } - mutex_unlock(&c_node->mem_lock); - - return 0; -} - -static void ipp_clean_queue_buf(struct drm_device *drm_dev, - struct drm_exynos_ipp_cmd_node *c_node, - struct drm_exynos_ipp_queue_buf *qbuf) -{ - struct drm_exynos_ipp_mem_node *m_node, *tm_node; - - /* delete list */ - mutex_lock(&c_node->mem_lock); - list_for_each_entry_safe(m_node, tm_node, - &c_node->mem_list[qbuf->ops_id], list) { - if (m_node->buf_id == qbuf->buf_id && - m_node->ops_id == qbuf->ops_id) - ipp_put_mem_node(drm_dev, c_node, m_node); - } - mutex_unlock(&c_node->mem_lock); -} - -int exynos_drm_ipp_queue_buf(struct drm_device *drm_dev, void *data, - struct drm_file *file) -{ - struct drm_exynos_file_private *file_priv = file->driver_priv; - struct device *dev = file_priv->ipp_dev; - struct ipp_context *ctx = get_ipp_context(dev); - struct drm_exynos_ipp_queue_buf *qbuf = data; - struct drm_exynos_ipp_cmd_node *c_node; - struct drm_exynos_ipp_mem_node *m_node; - int ret; - - if (!qbuf) { - DRM_ERROR("invalid buf parameter.\n"); - return -EINVAL; - } - - if (qbuf->ops_id >= EXYNOS_DRM_OPS_MAX) { - DRM_ERROR("invalid ops parameter.\n"); - return -EINVAL; - } - - DRM_DEBUG_KMS("prop_id[%d]ops_id[%s]buf_id[%d]buf_type[%d]\n", - qbuf->prop_id, qbuf->ops_id ? "dst" : "src", - qbuf->buf_id, qbuf->buf_type); - - /* find command node */ - c_node = ipp_find_obj(&ctx->prop_idr, &ctx->prop_lock, - qbuf->prop_id); - if (!c_node || c_node->filp != file) { - DRM_ERROR("failed to get command node.\n"); - return -ENODEV; - } - - /* buffer control */ - switch (qbuf->buf_type) { - case IPP_BUF_ENQUEUE: - /* get memory node */ - m_node = ipp_get_mem_node(drm_dev, c_node, qbuf); - if (IS_ERR(m_node)) { - DRM_ERROR("failed to get m_node.\n"); - return PTR_ERR(m_node); - } - - /* - * first step get event for destination buffer. - * and second step when M2M case run with destination buffer - * if needed. - */ - if (qbuf->ops_id == EXYNOS_DRM_OPS_DST) { - /* get event for destination buffer */ - ret = ipp_get_event(drm_dev, c_node, qbuf); - if (ret) { - DRM_ERROR("failed to get event.\n"); - goto err_clean_node; - } - - /* - * M2M case run play control for streaming feature. - * other case set address and waiting. - */ - ret = ipp_queue_buf_with_run(dev, c_node, m_node, qbuf); - if (ret) { - DRM_ERROR("failed to run command.\n"); - goto err_clean_node; - } - } - break; - case IPP_BUF_DEQUEUE: - mutex_lock(&c_node->lock); - - /* put event for destination buffer */ - if (qbuf->ops_id == EXYNOS_DRM_OPS_DST) - ipp_put_event(c_node, qbuf); - - ipp_clean_queue_buf(drm_dev, c_node, qbuf); - - mutex_unlock(&c_node->lock); - break; - default: - DRM_ERROR("invalid buffer control.\n"); - return -EINVAL; - } - - return 0; - -err_clean_node: - DRM_ERROR("clean memory nodes.\n"); - - ipp_clean_queue_buf(drm_dev, c_node, qbuf); - return ret; -} - -static bool exynos_drm_ipp_check_valid(struct device *dev, - enum drm_exynos_ipp_ctrl ctrl, enum drm_exynos_ipp_state state) -{ - if (ctrl != IPP_CTRL_PLAY) { - if (pm_runtime_suspended(dev)) { - DRM_ERROR("pm:runtime_suspended.\n"); - goto err_status; - } - } - - switch (ctrl) { - case IPP_CTRL_PLAY: - if (state != IPP_STATE_IDLE) - goto err_status; - break; - case IPP_CTRL_STOP: - if (state == IPP_STATE_STOP) - goto err_status; - break; - case IPP_CTRL_PAUSE: - if (state != IPP_STATE_START) - goto err_status; - break; - case IPP_CTRL_RESUME: - if (state != IPP_STATE_STOP) - goto err_status; - break; - default: - DRM_ERROR("invalid state.\n"); - goto err_status; - } - - return true; - -err_status: - DRM_ERROR("invalid status:ctrl[%d]state[%d]\n", ctrl, state); - return false; -} - -int exynos_drm_ipp_cmd_ctrl(struct drm_device *drm_dev, void *data, - struct drm_file *file) -{ - struct drm_exynos_file_private *file_priv = file->driver_priv; - struct exynos_drm_ippdrv *ippdrv = NULL; - struct device *dev = file_priv->ipp_dev; - struct ipp_context *ctx = get_ipp_context(dev); - struct drm_exynos_ipp_cmd_ctrl *cmd_ctrl = data; - struct drm_exynos_ipp_cmd_work *cmd_work; - struct drm_exynos_ipp_cmd_node *c_node; - - if (!ctx) { - DRM_ERROR("invalid context.\n"); - return -EINVAL; - } - - if (!cmd_ctrl) { - DRM_ERROR("invalid control parameter.\n"); - return -EINVAL; - } - - DRM_DEBUG_KMS("ctrl[%d]prop_id[%d]\n", - cmd_ctrl->ctrl, cmd_ctrl->prop_id); - - ippdrv = ipp_find_drv_by_handle(cmd_ctrl->prop_id); - if (IS_ERR(ippdrv)) { - DRM_ERROR("failed to get ipp driver.\n"); - return PTR_ERR(ippdrv); - } - - c_node = ipp_find_obj(&ctx->prop_idr, &ctx->prop_lock, - cmd_ctrl->prop_id); - if (!c_node || c_node->filp != file) { - DRM_ERROR("invalid command node list.\n"); - return -ENODEV; - } - - if (!exynos_drm_ipp_check_valid(ippdrv->dev, cmd_ctrl->ctrl, - c_node->state)) { - DRM_ERROR("invalid state.\n"); - return -EINVAL; - } - - switch (cmd_ctrl->ctrl) { - case IPP_CTRL_PLAY: - if (pm_runtime_suspended(ippdrv->dev)) - pm_runtime_get_sync(ippdrv->dev); - - c_node->state = IPP_STATE_START; - - cmd_work = c_node->start_work; - cmd_work->ctrl = cmd_ctrl->ctrl; - ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node); - break; - case IPP_CTRL_STOP: - cmd_work = c_node->stop_work; - cmd_work->ctrl = cmd_ctrl->ctrl; - ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node); - - if (!wait_for_completion_timeout(&c_node->stop_complete, - msecs_to_jiffies(300))) { - DRM_ERROR("timeout stop:prop_id[%d]\n", - c_node->property.prop_id); - } - - c_node->state = IPP_STATE_STOP; - ippdrv->dedicated = false; - mutex_lock(&ippdrv->cmd_lock); - ipp_clean_cmd_node(ctx, c_node); - - if (list_empty(&ippdrv->cmd_list)) - pm_runtime_put_sync(ippdrv->dev); - mutex_unlock(&ippdrv->cmd_lock); - break; - case IPP_CTRL_PAUSE: - cmd_work = c_node->stop_work; - cmd_work->ctrl = cmd_ctrl->ctrl; - ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node); - - if (!wait_for_completion_timeout(&c_node->stop_complete, - msecs_to_jiffies(200))) { - DRM_ERROR("timeout stop:prop_id[%d]\n", - c_node->property.prop_id); - } - - c_node->state = IPP_STATE_STOP; - break; - case IPP_CTRL_RESUME: - c_node->state = IPP_STATE_START; - cmd_work = c_node->start_work; - cmd_work->ctrl = cmd_ctrl->ctrl; - ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node); - break; - default: - DRM_ERROR("could not support this state currently.\n"); - return -EINVAL; - } - - DRM_DEBUG_KMS("done ctrl[%d]prop_id[%d]\n", - cmd_ctrl->ctrl, cmd_ctrl->prop_id); - - return 0; -} - -int exynos_drm_ippnb_register(struct notifier_block *nb) -{ - return blocking_notifier_chain_register( - &exynos_drm_ippnb_list, nb); -} - -int exynos_drm_ippnb_unregister(struct notifier_block *nb) -{ - return blocking_notifier_chain_unregister( - &exynos_drm_ippnb_list, nb); -} - -int exynos_drm_ippnb_send_event(unsigned long val, void *v) -{ - return blocking_notifier_call_chain( - &exynos_drm_ippnb_list, val, v); -} - -static int ipp_set_property(struct exynos_drm_ippdrv *ippdrv, - struct drm_exynos_ipp_property *property) -{ - struct exynos_drm_ipp_ops *ops = NULL; - bool swap = false; - int ret, i; - - if (!property) { - DRM_ERROR("invalid property parameter.\n"); - return -EINVAL; - } - - DRM_DEBUG_KMS("prop_id[%d]\n", property->prop_id); - - /* reset h/w block */ - if (ippdrv->reset && - ippdrv->reset(ippdrv->dev)) { - return -EINVAL; - } - - /* set source,destination operations */ - for_each_ipp_ops(i) { - struct drm_exynos_ipp_config *config = - &property->config[i]; - - ops = ippdrv->ops[i]; - if (!ops || !config) { - DRM_ERROR("not support ops and config.\n"); - return -EINVAL; - } - - /* set format */ - if (ops->set_fmt) { - ret = ops->set_fmt(ippdrv->dev, config->fmt); - if (ret) - return ret; - } - - /* set transform for rotation, flip */ - if (ops->set_transf) { - ret = ops->set_transf(ippdrv->dev, config->degree, - config->flip, &swap); - if (ret) - return ret; - } - - /* set size */ - if (ops->set_size) { - ret = ops->set_size(ippdrv->dev, swap, &config->pos, - &config->sz); - if (ret) - return ret; - } - } - - return 0; -} - -static int ipp_start_property(struct exynos_drm_ippdrv *ippdrv, - struct drm_exynos_ipp_cmd_node *c_node) -{ - struct drm_exynos_ipp_mem_node *m_node; - struct drm_exynos_ipp_property *property = &c_node->property; - struct list_head *head; - int ret, i; - - DRM_DEBUG_KMS("prop_id[%d]\n", property->prop_id); - - /* store command info in ippdrv */ - ippdrv->c_node = c_node; - - mutex_lock(&c_node->mem_lock); - if (!ipp_check_mem_list(c_node)) { - DRM_DEBUG_KMS("empty memory.\n"); - ret = -ENOMEM; - goto err_unlock; - } - - /* set current property in ippdrv */ - ret = ipp_set_property(ippdrv, property); - if (ret) { - DRM_ERROR("failed to set property.\n"); - ippdrv->c_node = NULL; - goto err_unlock; - } - - /* check command */ - switch (property->cmd) { - case IPP_CMD_M2M: - for_each_ipp_ops(i) { - /* source/destination memory list */ - head = &c_node->mem_list[i]; - - m_node = list_first_entry(head, - struct drm_exynos_ipp_mem_node, list); - - DRM_DEBUG_KMS("m_node[%pK]\n", m_node); - - ret = ipp_set_mem_node(ippdrv, c_node, m_node); - if (ret) { - DRM_ERROR("failed to set m node.\n"); - goto err_unlock; - } - } - break; - case IPP_CMD_WB: - /* destination memory list */ - head = &c_node->mem_list[EXYNOS_DRM_OPS_DST]; - - list_for_each_entry(m_node, head, list) { - ret = ipp_set_mem_node(ippdrv, c_node, m_node); - if (ret) { - DRM_ERROR("failed to set m node.\n"); - goto err_unlock; - } - } - break; - case IPP_CMD_OUTPUT: - /* source memory list */ - head = &c_node->mem_list[EXYNOS_DRM_OPS_SRC]; - - list_for_each_entry(m_node, head, list) { - ret = ipp_set_mem_node(ippdrv, c_node, m_node); - if (ret) { - DRM_ERROR("failed to set m node.\n"); - goto err_unlock; - } - } - break; - default: - DRM_ERROR("invalid operations.\n"); - ret = -EINVAL; - goto err_unlock; - } - mutex_unlock(&c_node->mem_lock); - - DRM_DEBUG_KMS("cmd[%d]\n", property->cmd); - - /* start operations */ - if (ippdrv->start) { - ret = ippdrv->start(ippdrv->dev, property->cmd); - if (ret) { - DRM_ERROR("failed to start ops.\n"); - ippdrv->c_node = NULL; - return ret; - } - } - - return 0; - -err_unlock: - mutex_unlock(&c_node->mem_lock); - ippdrv->c_node = NULL; - return ret; -} - -static int ipp_stop_property(struct drm_device *drm_dev, - struct exynos_drm_ippdrv *ippdrv, - struct drm_exynos_ipp_cmd_node *c_node) -{ - struct drm_exynos_ipp_property *property = &c_node->property; - int i; - - DRM_DEBUG_KMS("prop_id[%d]\n", property->prop_id); - - /* stop operations */ - if (ippdrv->stop) - ippdrv->stop(ippdrv->dev, property->cmd); - - /* check command */ - switch (property->cmd) { - case IPP_CMD_M2M: - for_each_ipp_ops(i) - ipp_clean_mem_nodes(drm_dev, c_node, i); - break; - case IPP_CMD_WB: - ipp_clean_mem_nodes(drm_dev, c_node, EXYNOS_DRM_OPS_DST); - break; - case IPP_CMD_OUTPUT: - ipp_clean_mem_nodes(drm_dev, c_node, EXYNOS_DRM_OPS_SRC); - break; - default: - DRM_ERROR("invalid operations.\n"); - return -EINVAL; - } - - return 0; -} - -void ipp_sched_cmd(struct work_struct *work) -{ - struct drm_exynos_ipp_cmd_work *cmd_work = - container_of(work, struct drm_exynos_ipp_cmd_work, work); - struct exynos_drm_ippdrv *ippdrv; - struct drm_exynos_ipp_cmd_node *c_node; - struct drm_exynos_ipp_property *property; - int ret; - - ippdrv = cmd_work->ippdrv; - if (!ippdrv) { - DRM_ERROR("invalid ippdrv list.\n"); - return; - } - - c_node = cmd_work->c_node; - if (!c_node) { - DRM_ERROR("invalid command node list.\n"); - return; - } - - mutex_lock(&c_node->lock); - - property = &c_node->property; - - switch (cmd_work->ctrl) { - case IPP_CTRL_PLAY: - case IPP_CTRL_RESUME: - ret = ipp_start_property(ippdrv, c_node); - if (ret) { - DRM_ERROR("failed to start property:prop_id[%d]\n", - c_node->property.prop_id); - goto err_unlock; - } - - /* - * M2M case supports wait_completion of transfer. - * because M2M case supports single unit operation - * with multiple queue. - * M2M need to wait completion of data transfer. - */ - if (ipp_is_m2m_cmd(property->cmd)) { - if (!wait_for_completion_timeout - (&c_node->start_complete, msecs_to_jiffies(200))) { - DRM_ERROR("timeout event:prop_id[%d]\n", - c_node->property.prop_id); - goto err_unlock; - } - } - break; - case IPP_CTRL_STOP: - case IPP_CTRL_PAUSE: - ret = ipp_stop_property(ippdrv->drm_dev, ippdrv, - c_node); - if (ret) { - DRM_ERROR("failed to stop property.\n"); - goto err_unlock; - } - - complete(&c_node->stop_complete); - break; - default: - DRM_ERROR("unknown control type\n"); - break; - } - - DRM_DEBUG_KMS("ctrl[%d] done.\n", cmd_work->ctrl); - -err_unlock: - mutex_unlock(&c_node->lock); -} - -static int ipp_send_event(struct exynos_drm_ippdrv *ippdrv, - struct drm_exynos_ipp_cmd_node *c_node, int *buf_id) -{ - struct drm_device *drm_dev = ippdrv->drm_dev; - struct drm_exynos_ipp_property *property = &c_node->property; - struct drm_exynos_ipp_mem_node *m_node; - struct drm_exynos_ipp_queue_buf qbuf; - struct drm_exynos_ipp_send_event *e; - struct list_head *head; - struct timeval now; - u32 tbuf_id[EXYNOS_DRM_OPS_MAX] = {0, }; - int ret, i; - - for_each_ipp_ops(i) - DRM_DEBUG_KMS("%s buf_id[%d]\n", i ? "dst" : "src", buf_id[i]); - - if (!drm_dev) { - DRM_ERROR("failed to get drm_dev.\n"); - return -EINVAL; - } - - if (!property) { - DRM_ERROR("failed to get property.\n"); - return -EINVAL; - } - - mutex_lock(&c_node->event_lock); - if (list_empty(&c_node->event_list)) { - DRM_DEBUG_KMS("event list is empty.\n"); - ret = 0; - goto err_event_unlock; - } - - mutex_lock(&c_node->mem_lock); - if (!ipp_check_mem_list(c_node)) { - DRM_DEBUG_KMS("empty memory.\n"); - ret = 0; - goto err_mem_unlock; - } - - /* check command */ - switch (property->cmd) { - case IPP_CMD_M2M: - for_each_ipp_ops(i) { - /* source/destination memory list */ - head = &c_node->mem_list[i]; - - m_node = list_first_entry(head, - struct drm_exynos_ipp_mem_node, list); - - tbuf_id[i] = m_node->buf_id; - DRM_DEBUG_KMS("%s buf_id[%d]\n", - i ? "dst" : "src", tbuf_id[i]); - - ret = ipp_put_mem_node(drm_dev, c_node, m_node); - if (ret) - DRM_ERROR("failed to put m_node.\n"); - } - break; - case IPP_CMD_WB: - /* clear buf for finding */ - memset(&qbuf, 0x0, sizeof(qbuf)); - qbuf.ops_id = EXYNOS_DRM_OPS_DST; - qbuf.buf_id = buf_id[EXYNOS_DRM_OPS_DST]; - - /* get memory node entry */ - m_node = ipp_find_mem_node(c_node, &qbuf); - if (!m_node) { - DRM_ERROR("empty memory node.\n"); - ret = -ENOMEM; - goto err_mem_unlock; - } - - tbuf_id[EXYNOS_DRM_OPS_DST] = m_node->buf_id; - - ret = ipp_put_mem_node(drm_dev, c_node, m_node); - if (ret) - DRM_ERROR("failed to put m_node.\n"); - break; - case IPP_CMD_OUTPUT: - /* source memory list */ - head = &c_node->mem_list[EXYNOS_DRM_OPS_SRC]; - - m_node = list_first_entry(head, - struct drm_exynos_ipp_mem_node, list); - - tbuf_id[EXYNOS_DRM_OPS_SRC] = m_node->buf_id; - - ret = ipp_put_mem_node(drm_dev, c_node, m_node); - if (ret) - DRM_ERROR("failed to put m_node.\n"); - break; - default: - DRM_ERROR("invalid operations.\n"); - ret = -EINVAL; - goto err_mem_unlock; - } - mutex_unlock(&c_node->mem_lock); - - if (tbuf_id[EXYNOS_DRM_OPS_DST] != buf_id[EXYNOS_DRM_OPS_DST]) - DRM_ERROR("failed to match buf_id[%d %d]prop_id[%d]\n", - tbuf_id[1], buf_id[1], property->prop_id); - - /* - * command node have event list of destination buffer - * If destination buffer enqueue to mem list, - * then we make event and link to event list tail. - * so, we get first event for first enqueued buffer. - */ - e = list_first_entry(&c_node->event_list, - struct drm_exynos_ipp_send_event, base.link); - - do_gettimeofday(&now); - DRM_DEBUG_KMS("tv_sec[%ld]tv_usec[%ld]\n", now.tv_sec, now.tv_usec); - e->event.tv_sec = now.tv_sec; - e->event.tv_usec = now.tv_usec; - e->event.prop_id = property->prop_id; - - /* set buffer id about source destination */ - for_each_ipp_ops(i) - e->event.buf_id[i] = tbuf_id[i]; - - drm_send_event(drm_dev, &e->base); - mutex_unlock(&c_node->event_lock); - - DRM_DEBUG_KMS("done cmd[%d]prop_id[%d]buf_id[%d]\n", - property->cmd, property->prop_id, tbuf_id[EXYNOS_DRM_OPS_DST]); - - return 0; - -err_mem_unlock: - mutex_unlock(&c_node->mem_lock); -err_event_unlock: - mutex_unlock(&c_node->event_lock); - return ret; -} - -void ipp_sched_event(struct work_struct *work) -{ - struct drm_exynos_ipp_event_work *event_work = - container_of(work, struct drm_exynos_ipp_event_work, work); - struct exynos_drm_ippdrv *ippdrv; - struct drm_exynos_ipp_cmd_node *c_node; - int ret; - - if (!event_work) { - DRM_ERROR("failed to get event_work.\n"); - return; - } - - DRM_DEBUG_KMS("buf_id[%d]\n", event_work->buf_id[EXYNOS_DRM_OPS_DST]); - - ippdrv = event_work->ippdrv; - if (!ippdrv) { - DRM_ERROR("failed to get ipp driver.\n"); - return; - } - - c_node = ippdrv->c_node; - if (!c_node) { - DRM_ERROR("failed to get command node.\n"); - return; - } - - /* - * IPP supports command thread, event thread synchronization. - * If IPP close immediately from user land, then IPP make - * synchronization with command thread, so make complete event. - * or going out operations. - */ - if (c_node->state != IPP_STATE_START) { - DRM_DEBUG_KMS("bypass state[%d]prop_id[%d]\n", - c_node->state, c_node->property.prop_id); - goto err_completion; - } - - ret = ipp_send_event(ippdrv, c_node, event_work->buf_id); - if (ret) { - DRM_ERROR("failed to send event.\n"); - goto err_completion; - } - -err_completion: - if (ipp_is_m2m_cmd(c_node->property.cmd)) - complete(&c_node->start_complete); -} - -static int ipp_subdrv_probe(struct drm_device *drm_dev, struct device *dev) -{ - struct ipp_context *ctx = get_ipp_context(dev); - struct exynos_drm_ippdrv *ippdrv; - int ret, count = 0; - - /* get ipp driver entry */ - list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) { - ippdrv->drm_dev = drm_dev; - - ret = ipp_create_id(&ctx->ipp_idr, &ctx->ipp_lock, ippdrv); - if (ret < 0) { - DRM_ERROR("failed to create id.\n"); - goto err; - } - ippdrv->prop_list.ipp_id = ret; - - DRM_DEBUG_KMS("count[%d]ippdrv[%pK]ipp_id[%d]\n", - count++, ippdrv, ret); - - /* store parent device for node */ - ippdrv->parent_dev = dev; - - /* store event work queue and handler */ - ippdrv->event_workq = ctx->event_workq; - ippdrv->sched_event = ipp_sched_event; - INIT_LIST_HEAD(&ippdrv->cmd_list); - mutex_init(&ippdrv->cmd_lock); - - ret = drm_iommu_attach_device(drm_dev, ippdrv->dev); - if (ret) { - DRM_ERROR("failed to activate iommu\n"); - goto err; - } - } - - return 0; - -err: - /* get ipp driver entry */ - list_for_each_entry_continue_reverse(ippdrv, &exynos_drm_ippdrv_list, - drv_list) { - drm_iommu_detach_device(drm_dev, ippdrv->dev); - - ipp_remove_id(&ctx->ipp_idr, &ctx->ipp_lock, - ippdrv->prop_list.ipp_id); - } - - return ret; -} - -static void ipp_subdrv_remove(struct drm_device *drm_dev, struct device *dev) -{ - struct exynos_drm_ippdrv *ippdrv, *t; - struct ipp_context *ctx = get_ipp_context(dev); - - /* get ipp driver entry */ - list_for_each_entry_safe(ippdrv, t, &exynos_drm_ippdrv_list, drv_list) { - drm_iommu_detach_device(drm_dev, ippdrv->dev); - - ipp_remove_id(&ctx->ipp_idr, &ctx->ipp_lock, - ippdrv->prop_list.ipp_id); - - ippdrv->drm_dev = NULL; - exynos_drm_ippdrv_unregister(ippdrv); - } -} - -static int ipp_subdrv_open(struct drm_device *drm_dev, struct device *dev, - struct drm_file *file) -{ - struct drm_exynos_file_private *file_priv = file->driver_priv; - - file_priv->ipp_dev = dev; - - DRM_DEBUG_KMS("done priv[%pK]\n", dev); - - return 0; -} - -static void ipp_subdrv_close(struct drm_device *drm_dev, struct device *dev, - struct drm_file *file) -{ - struct exynos_drm_ippdrv *ippdrv = NULL; - struct ipp_context *ctx = get_ipp_context(dev); - struct drm_exynos_ipp_cmd_node *c_node, *tc_node; - int count = 0; - - list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) { - mutex_lock(&ippdrv->cmd_lock); - list_for_each_entry_safe(c_node, tc_node, - &ippdrv->cmd_list, list) { - DRM_DEBUG_KMS("count[%d]ippdrv[%pK]\n", - count++, ippdrv); - - if (c_node->filp == file) { - /* - * userland goto unnormal state. process killed. - * and close the file. - * so, IPP didn't called stop cmd ctrl. - * so, we are make stop operation in this state. - */ - if (c_node->state == IPP_STATE_START) { - ipp_stop_property(drm_dev, ippdrv, - c_node); - c_node->state = IPP_STATE_STOP; - } - - ippdrv->dedicated = false; - ipp_clean_cmd_node(ctx, c_node); - if (list_empty(&ippdrv->cmd_list)) - pm_runtime_put_sync(ippdrv->dev); - } - } - mutex_unlock(&ippdrv->cmd_lock); - } - - return; -} - -static int ipp_probe(struct platform_device *pdev) -{ - struct device *dev = &pdev->dev; - struct ipp_context *ctx; - struct exynos_drm_subdrv *subdrv; - int ret; - - ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; - - mutex_init(&ctx->ipp_lock); - mutex_init(&ctx->prop_lock); - - idr_init(&ctx->ipp_idr); - idr_init(&ctx->prop_idr); - - /* - * create single thread for ipp event - * IPP supports event thread for IPP drivers. - * IPP driver send event_work to this thread. - * and IPP event thread send event to user process. - */ - ctx->event_workq = create_singlethread_workqueue("ipp_event"); - if (!ctx->event_workq) { - dev_err(dev, "failed to create event workqueue\n"); - return -EINVAL; - } - - /* - * create single thread for ipp command - * IPP supports command thread for user process. - * user process make command node using set property ioctl. - * and make start_work and send this work to command thread. - * and then this command thread start property. - */ - ctx->cmd_workq = create_singlethread_workqueue("ipp_cmd"); - if (!ctx->cmd_workq) { - dev_err(dev, "failed to create cmd workqueue\n"); - ret = -EINVAL; - goto err_event_workq; - } - - /* set sub driver informations */ - subdrv = &ctx->subdrv; - subdrv->dev = dev; - subdrv->probe = ipp_subdrv_probe; - subdrv->remove = ipp_subdrv_remove; - subdrv->open = ipp_subdrv_open; - subdrv->close = ipp_subdrv_close; - - platform_set_drvdata(pdev, ctx); - - ret = exynos_drm_subdrv_register(subdrv); - if (ret < 0) { - DRM_ERROR("failed to register drm ipp device.\n"); - goto err_cmd_workq; - } - - dev_info(dev, "drm ipp registered successfully.\n"); - - return 0; - -err_cmd_workq: - destroy_workqueue(ctx->cmd_workq); -err_event_workq: - destroy_workqueue(ctx->event_workq); - return ret; -} - -static int ipp_remove(struct platform_device *pdev) -{ - struct ipp_context *ctx = platform_get_drvdata(pdev); - - /* unregister sub driver */ - exynos_drm_subdrv_unregister(&ctx->subdrv); - - /* remove,destroy ipp idr */ - idr_destroy(&ctx->ipp_idr); - idr_destroy(&ctx->prop_idr); - - mutex_destroy(&ctx->ipp_lock); - mutex_destroy(&ctx->prop_lock); - - /* destroy command, event work queue */ - destroy_workqueue(ctx->cmd_workq); - destroy_workqueue(ctx->event_workq); - - return 0; -} - -struct platform_driver ipp_driver = { - .probe = ipp_probe, - .remove = ipp_remove, - .driver = { - .name = "exynos-drm-ipp", - .owner = THIS_MODULE, - }, -}; - diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.h b/drivers/gpu/drm/exynos/exynos_drm_ipp.h deleted file mode 100644 index 2a61547a39d0..000000000000 --- a/drivers/gpu/drm/exynos/exynos_drm_ipp.h +++ /dev/null @@ -1,252 +0,0 @@ -/* - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * - * Authors: - * Eunchul Kim <[email protected]> - * Jinyoung Jeon <[email protected]> - * Sangmin Lee <[email protected]> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef _EXYNOS_DRM_IPP_H_ -#define _EXYNOS_DRM_IPP_H_ - -#define for_each_ipp_ops(pos) \ - for (pos = 0; pos < EXYNOS_DRM_OPS_MAX; pos++) -#define for_each_ipp_planar(pos) \ - for (pos = 0; pos < EXYNOS_DRM_PLANAR_MAX; pos++) - -#define IPP_GET_LCD_WIDTH _IOR('F', 302, int) -#define IPP_GET_LCD_HEIGHT _IOR('F', 303, int) -#define IPP_SET_WRITEBACK _IOW('F', 304, u32) - -/* definition of state */ -enum drm_exynos_ipp_state { - IPP_STATE_IDLE, - IPP_STATE_START, - IPP_STATE_STOP, -}; - -/* - * A structure of command work information. - * @work: work structure. - * @ippdrv: current work ippdrv. - * @c_node: command node information. - * @ctrl: command control. - */ -struct drm_exynos_ipp_cmd_work { - struct work_struct work; - struct exynos_drm_ippdrv *ippdrv; - struct drm_exynos_ipp_cmd_node *c_node; - enum drm_exynos_ipp_ctrl ctrl; -}; - -/* - * A structure of command node. - * - * @list: list head to command queue information. - * @event_list: list head of event. - * @mem_list: list head to source,destination memory queue information. - * @lock: lock for synchronization of access to ioctl. - * @mem_lock: lock for synchronization of access to memory nodes. - * @event_lock: lock for synchronization of access to scheduled event. - * @start_complete: completion of start of command. - * @stop_complete: completion of stop of command. - * @property: property information. - * @start_work: start command work structure. - * @stop_work: stop command work structure. - * @event_work: event work structure. - * @state: state of command node. - * @filp: associated file pointer. - */ -struct drm_exynos_ipp_cmd_node { - struct list_head list; - struct list_head event_list; - struct list_head mem_list[EXYNOS_DRM_OPS_MAX]; - struct mutex lock; - struct mutex mem_lock; - struct mutex event_lock; - struct completion start_complete; - struct completion stop_complete; - struct drm_exynos_ipp_property property; - struct drm_exynos_ipp_cmd_work *start_work; - struct drm_exynos_ipp_cmd_work *stop_work; - struct drm_exynos_ipp_event_work *event_work; - enum drm_exynos_ipp_state state; - struct drm_file *filp; -}; - -/* - * A structure of buffer information. - * - * @handles: Y, Cb, Cr each gem object handle. - * @base: Y, Cb, Cr each planar address. - */ -struct drm_exynos_ipp_buf_info { - unsigned long handles[EXYNOS_DRM_PLANAR_MAX]; - dma_addr_t base[EXYNOS_DRM_PLANAR_MAX]; -}; - -/* - * A structure of wb setting information. - * - * @enable: enable flag for wb. - * @refresh: HZ of the refresh rate. - */ -struct drm_exynos_ipp_set_wb { - __u32 enable; - __u32 refresh; -}; - -/* - * A structure of event work information. - * - * @work: work structure. - * @ippdrv: current work ippdrv. - * @buf_id: id of src, dst buffer. - */ -struct drm_exynos_ipp_event_work { - struct work_struct work; - struct exynos_drm_ippdrv *ippdrv; - u32 buf_id[EXYNOS_DRM_OPS_MAX]; -}; - -/* - * A structure of source,destination operations. - * - * @set_fmt: set format of image. - * @set_transf: set transform(rotations, flip). - * @set_size: set size of region. - * @set_addr: set address for dma. - */ -struct exynos_drm_ipp_ops { - int (*set_fmt)(struct device *dev, u32 fmt); - int (*set_transf)(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap); - int (*set_size)(struct device *dev, int swap, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz); - int (*set_addr)(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type); -}; - -/* - * A structure of ipp driver. - * - * @drv_list: list head for registed sub driver information. - * @parent_dev: parent device information. - * @dev: platform device. - * @drm_dev: drm device. - * @dedicated: dedicated ipp device. - * @ops: source, destination operations. - * @event_workq: event work queue. - * @c_node: current command information. - * @cmd_list: list head for command information. - * @cmd_lock: lock for synchronization of access to cmd_list. - * @prop_list: property informations of current ipp driver. - * @check_property: check property about format, size, buffer. - * @reset: reset ipp block. - * @start: ipp each device start. - * @stop: ipp each device stop. - * @sched_event: work schedule handler. - */ -struct exynos_drm_ippdrv { - struct list_head drv_list; - struct device *parent_dev; - struct device *dev; - struct drm_device *drm_dev; - bool dedicated; - struct exynos_drm_ipp_ops *ops[EXYNOS_DRM_OPS_MAX]; - struct workqueue_struct *event_workq; - struct drm_exynos_ipp_cmd_node *c_node; - struct list_head cmd_list; - struct mutex cmd_lock; - struct drm_exynos_ipp_prop_list prop_list; - - int (*check_property)(struct device *dev, - struct drm_exynos_ipp_property *property); - int (*reset)(struct device *dev); - int (*start)(struct device *dev, enum drm_exynos_ipp_cmd cmd); - void (*stop)(struct device *dev, enum drm_exynos_ipp_cmd cmd); - void (*sched_event)(struct work_struct *work); -}; - -#ifdef CONFIG_DRM_EXYNOS_IPP -extern int exynos_drm_ippdrv_register(struct exynos_drm_ippdrv *ippdrv); -extern int exynos_drm_ippdrv_unregister(struct exynos_drm_ippdrv *ippdrv); -extern int exynos_drm_ipp_get_property(struct drm_device *drm_dev, void *data, - struct drm_file *file); -extern int exynos_drm_ipp_set_property(struct drm_device *drm_dev, void *data, - struct drm_file *file); -extern int exynos_drm_ipp_queue_buf(struct drm_device *drm_dev, void *data, - struct drm_file *file); -extern int exynos_drm_ipp_cmd_ctrl(struct drm_device *drm_dev, void *data, - struct drm_file *file); -extern int exynos_drm_ippnb_register(struct notifier_block *nb); -extern int exynos_drm_ippnb_unregister(struct notifier_block *nb); -extern int exynos_drm_ippnb_send_event(unsigned long val, void *v); -extern void ipp_sched_cmd(struct work_struct *work); -extern void ipp_sched_event(struct work_struct *work); - -#else -static inline int exynos_drm_ippdrv_register(struct exynos_drm_ippdrv *ippdrv) -{ - return -ENODEV; -} - -static inline int exynos_drm_ippdrv_unregister(struct exynos_drm_ippdrv *ippdrv) -{ - return -ENODEV; -} - -static inline int exynos_drm_ipp_get_property(struct drm_device *drm_dev, - void *data, - struct drm_file *file_priv) -{ - return -ENOTTY; -} - -static inline int exynos_drm_ipp_set_property(struct drm_device *drm_dev, - void *data, - struct drm_file *file_priv) -{ - return -ENOTTY; -} - -static inline int exynos_drm_ipp_queue_buf(struct drm_device *drm_dev, - void *data, - struct drm_file *file) -{ - return -ENOTTY; -} - -static inline int exynos_drm_ipp_cmd_ctrl(struct drm_device *drm_dev, - void *data, - struct drm_file *file) -{ - return -ENOTTY; -} - -static inline int exynos_drm_ippnb_register(struct notifier_block *nb) -{ - return -ENODEV; -} - -static inline int exynos_drm_ippnb_unregister(struct notifier_block *nb) -{ - return -ENODEV; -} - -static inline int exynos_drm_ippnb_send_event(unsigned long val, void *v) -{ - return -ENOTTY; -} -#endif - -#endif /* _EXYNOS_DRM_IPP_H_ */ - diff --git a/include/video/exynos5433_decon.h b/drivers/gpu/drm/exynos/regs-decon5433.h index 78957c9626f5..19ad9e47945e 100644 --- a/include/video/exynos5433_decon.h +++ b/drivers/gpu/drm/exynos/regs-decon5433.h @@ -6,8 +6,8 @@ * published by the Free Software Foundationr */ -#ifndef EXYNOS_REGS_DECON_H -#define EXYNOS_REGS_DECON_H +#ifndef EXYNOS_REGS_DECON5433_H +#define EXYNOS_REGS_DECON5433_H /* Exynos543X DECON */ #define DECON_VIDCON0 0x0000 @@ -206,4 +206,4 @@ #define CRCCTRL_CRCEN (0x1 << 0) #define CRCCTRL_MASK (0x7) -#endif /* EXYNOS_REGS_DECON_H */ +#endif /* EXYNOS_REGS_DECON5433_H */ diff --git a/include/video/exynos7_decon.h b/drivers/gpu/drm/exynos/regs-decon7.h index a62b11b613f6..5df7765d2397 100644 --- a/include/video/exynos7_decon.h +++ b/drivers/gpu/drm/exynos/regs-decon7.h @@ -1,5 +1,4 @@ -/* include/video/exynos7_decon.h - * +/* * Copyright (c) 2014 Samsung Electronics Co., Ltd. * Author: Ajay Kumar <[email protected]> * @@ -9,6 +8,9 @@ * option) any later version. */ +#ifndef EXYNOS_REGS_DECON7_H +#define EXYNOS_REGS_DECON7_H + /* VIDCON0 */ #define VIDCON0 0x00 @@ -347,3 +349,5 @@ #define DECON_UPDATE_SLAVE_SYNC (1 << 4) #define DECON_UPDATE_STANDALONE_F (1 << 0) + +#endif /* EXYNOS_REGS_DECON7_H */ diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index fa36491495b1..108d21f34777 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -29,7 +29,6 @@ config DRM_I915_DEBUG select SW_SYNC # signaling validation framework (igt/syncobj*) select DRM_I915_SW_FENCE_DEBUG_OBJECTS select DRM_I915_SELFTEST - select DRM_I915_TRACE_GEM default n help Choose this option to turn on extra driver debugging that may affect @@ -53,6 +52,7 @@ config DRM_I915_DEBUG_GEM config DRM_I915_TRACE_GEM bool "Insert extra ftrace output from the GEM internals" + depends on DRM_I915_DEBUG_GEM select TRACING default n help diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 18c45734c7a2..edec15d19538 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -825,6 +825,21 @@ static int force_nonpriv_reg_handler(struct parser_exec_state *s, return 0; } +static inline bool is_mocs_mmio(unsigned int offset) +{ + return ((offset >= 0xc800) && (offset <= 0xcff8)) || + ((offset >= 0xb020) && (offset <= 0xb0a0)); +} + +static int mocs_cmd_reg_handler(struct parser_exec_state *s, + unsigned int offset, unsigned int index) +{ + if (!is_mocs_mmio(offset)) + return -EINVAL; + vgpu_vreg(s->vgpu, offset) = cmd_val(s, index + 1); + return 0; +} + static int cmd_reg_handler(struct parser_exec_state *s, unsigned int offset, unsigned int index, char *cmd) { @@ -848,6 +863,10 @@ static int cmd_reg_handler(struct parser_exec_state *s, return 0; } + if (is_mocs_mmio(offset) && + mocs_cmd_reg_handler(s, offset, index)) + return -EINVAL; + if (is_force_nonpriv_mmio(offset) && force_nonpriv_reg_handler(s, offset, index)) return -EPERM; @@ -1220,13 +1239,13 @@ static int gen8_check_mi_display_flip(struct parser_exec_state *s, return 0; if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - stride = vgpu_vreg(s->vgpu, info->stride_reg) & GENMASK(9, 0); - tile = (vgpu_vreg(s->vgpu, info->ctrl_reg) & + stride = vgpu_vreg_t(s->vgpu, info->stride_reg) & GENMASK(9, 0); + tile = (vgpu_vreg_t(s->vgpu, info->ctrl_reg) & GENMASK(12, 10)) >> 10; } else { - stride = (vgpu_vreg(s->vgpu, info->stride_reg) & + stride = (vgpu_vreg_t(s->vgpu, info->stride_reg) & GENMASK(15, 6)) >> 6; - tile = (vgpu_vreg(s->vgpu, info->ctrl_reg) & (1 << 10)) >> 10; + tile = (vgpu_vreg_t(s->vgpu, info->ctrl_reg) & (1 << 10)) >> 10; } if (stride != info->stride_val) @@ -1245,21 +1264,21 @@ static int gen8_update_plane_mmio_from_mi_display_flip( struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; struct intel_vgpu *vgpu = s->vgpu; - set_mask_bits(&vgpu_vreg(vgpu, info->surf_reg), GENMASK(31, 12), + set_mask_bits(&vgpu_vreg_t(vgpu, info->surf_reg), GENMASK(31, 12), info->surf_val << 12); if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - set_mask_bits(&vgpu_vreg(vgpu, info->stride_reg), GENMASK(9, 0), + set_mask_bits(&vgpu_vreg_t(vgpu, info->stride_reg), GENMASK(9, 0), info->stride_val); - set_mask_bits(&vgpu_vreg(vgpu, info->ctrl_reg), GENMASK(12, 10), + set_mask_bits(&vgpu_vreg_t(vgpu, info->ctrl_reg), GENMASK(12, 10), info->tile_val << 10); } else { - set_mask_bits(&vgpu_vreg(vgpu, info->stride_reg), GENMASK(15, 6), + set_mask_bits(&vgpu_vreg_t(vgpu, info->stride_reg), GENMASK(15, 6), info->stride_val << 6); - set_mask_bits(&vgpu_vreg(vgpu, info->ctrl_reg), GENMASK(10, 10), + set_mask_bits(&vgpu_vreg_t(vgpu, info->ctrl_reg), GENMASK(10, 10), info->tile_val << 10); } - vgpu_vreg(vgpu, PIPE_FRMCOUNT_G4X(info->pipe))++; + vgpu_vreg_t(vgpu, PIPE_FRMCOUNT_G4X(info->pipe))++; intel_vgpu_trigger_virtual_event(vgpu, info->event); return 0; } diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 09185036bac8..dd96ffc878ac 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -59,7 +59,7 @@ static int edp_pipe_is_enabled(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - if (!(vgpu_vreg(vgpu, PIPECONF(_PIPE_EDP)) & PIPECONF_ENABLE)) + if (!(vgpu_vreg_t(vgpu, PIPECONF(_PIPE_EDP)) & PIPECONF_ENABLE)) return 0; if (!(vgpu_vreg(vgpu, _TRANS_DDI_FUNC_CTL_EDP) & TRANS_DDI_FUNC_ENABLE)) @@ -74,7 +74,7 @@ int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe) if (WARN_ON(pipe < PIPE_A || pipe >= I915_MAX_PIPES)) return -EINVAL; - if (vgpu_vreg(vgpu, PIPECONF(pipe)) & PIPECONF_ENABLE) + if (vgpu_vreg_t(vgpu, PIPECONF(pipe)) & PIPECONF_ENABLE) return 1; if (edp_pipe_is_enabled(vgpu) && @@ -169,103 +169,105 @@ static u8 dpcd_fix_data[DPCD_HEADER_SIZE] = { static void emulate_monitor_status_change(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTB_HOTPLUG_CPT | + vgpu_vreg_t(vgpu, SDEISR) &= ~(SDE_PORTB_HOTPLUG_CPT | SDE_PORTC_HOTPLUG_CPT | SDE_PORTD_HOTPLUG_CPT); if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT | + vgpu_vreg_t(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT | SDE_PORTE_HOTPLUG_SPT); - vgpu_vreg(vgpu, SKL_FUSE_STATUS) |= + vgpu_vreg_t(vgpu, SKL_FUSE_STATUS) |= SKL_FUSE_DOWNLOAD_STATUS | SKL_FUSE_PG_DIST_STATUS(SKL_PG0) | SKL_FUSE_PG_DIST_STATUS(SKL_PG1) | SKL_FUSE_PG_DIST_STATUS(SKL_PG2); - vgpu_vreg(vgpu, LCPLL1_CTL) |= + vgpu_vreg_t(vgpu, LCPLL1_CTL) |= LCPLL_PLL_ENABLE | LCPLL_PLL_LOCK; - vgpu_vreg(vgpu, LCPLL2_CTL) |= LCPLL_PLL_ENABLE; + vgpu_vreg_t(vgpu, LCPLL2_CTL) |= LCPLL_PLL_ENABLE; } if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { - vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED; - vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &= + vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED; + vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &= ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | TRANS_DDI_PORT_MASK); - vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= + vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_B << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); if (IS_BROADWELL(dev_priv)) { - vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_B)) &= + vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_B)) &= ~PORT_CLK_SEL_MASK; - vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_B)) |= + vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_B)) |= PORT_CLK_SEL_LCPLL_810; } - vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_B)) |= DDI_BUF_CTL_ENABLE; - vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_B)) &= ~DDI_BUF_IS_IDLE; - vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT; + vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_B)) |= DDI_BUF_CTL_ENABLE; + vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_B)) &= ~DDI_BUF_IS_IDLE; + vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT; } if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { - vgpu_vreg(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT; - vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &= + vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT; + vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &= ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | TRANS_DDI_PORT_MASK); - vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= + vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_C << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); if (IS_BROADWELL(dev_priv)) { - vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_C)) &= + vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_C)) &= ~PORT_CLK_SEL_MASK; - vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_C)) |= + vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_C)) |= PORT_CLK_SEL_LCPLL_810; } - vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_C)) |= DDI_BUF_CTL_ENABLE; - vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_C)) &= ~DDI_BUF_IS_IDLE; - vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED; + vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_C)) |= DDI_BUF_CTL_ENABLE; + vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_C)) &= ~DDI_BUF_IS_IDLE; + vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED; } if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) { - vgpu_vreg(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT; - vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &= + vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT; + vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &= ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | TRANS_DDI_PORT_MASK); - vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= + vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_D << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); if (IS_BROADWELL(dev_priv)) { - vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_D)) &= + vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_D)) &= ~PORT_CLK_SEL_MASK; - vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_D)) |= + vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_D)) |= PORT_CLK_SEL_LCPLL_810; } - vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_D)) |= DDI_BUF_CTL_ENABLE; - vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_D)) &= ~DDI_BUF_IS_IDLE; - vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED; + vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_D)) |= DDI_BUF_CTL_ENABLE; + vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_D)) &= ~DDI_BUF_IS_IDLE; + vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED; } if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) && intel_vgpu_has_monitor_on_port(vgpu, PORT_E)) { - vgpu_vreg(vgpu, SDEISR) |= SDE_PORTE_HOTPLUG_SPT; + vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTE_HOTPLUG_SPT; } if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) { if (IS_BROADWELL(dev_priv)) - vgpu_vreg(vgpu, GEN8_DE_PORT_ISR) |= + vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_PORT_DP_A_HOTPLUG; else - vgpu_vreg(vgpu, SDEISR) |= SDE_PORTA_HOTPLUG_SPT; + vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTA_HOTPLUG_SPT; - vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_A)) |= DDI_INIT_DISPLAY_DETECTED; + vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_A)) |= DDI_INIT_DISPLAY_DETECTED; } /* Clear host CRT status, so guest couldn't detect this host CRT. */ if (IS_BROADWELL(dev_priv)) - vgpu_vreg(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK; + vgpu_vreg_t(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK; + + vgpu_vreg_t(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE; } static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num) @@ -282,7 +284,6 @@ static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num) static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, int type, unsigned int resolution) { - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); if (WARN_ON(resolution >= GVT_EDID_NUM)) @@ -308,7 +309,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, port->type = type; emulate_monitor_status_change(vgpu); - vgpu_vreg(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE; + return 0; } @@ -368,12 +369,12 @@ static void emulate_vblank_on_pipe(struct intel_vgpu *vgpu, int pipe) if (!pipe_is_enabled(vgpu, pipe)) continue; - vgpu_vreg(vgpu, PIPE_FLIPCOUNT_G4X(pipe))++; + vgpu_vreg_t(vgpu, PIPE_FLIPCOUNT_G4X(pipe))++; intel_vgpu_trigger_virtual_event(vgpu, event); } if (pipe_is_enabled(vgpu, pipe)) { - vgpu_vreg(vgpu, PIPE_FRMCOUNT_G4X(pipe))++; + vgpu_vreg_t(vgpu, PIPE_FRMCOUNT_G4X(pipe))++; intel_vgpu_trigger_virtual_event(vgpu, vblank_event[pipe]); } } diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c index 42cd09ec63fa..f61337632969 100644 --- a/drivers/gpu/drm/i915/gvt/edid.c +++ b/drivers/gpu/drm/i915/gvt/edid.c @@ -95,9 +95,9 @@ static inline int get_port_from_gmbus0(u32 gmbus0) static void reset_gmbus_controller(struct intel_vgpu *vgpu) { - vgpu_vreg(vgpu, PCH_GMBUS2) = GMBUS_HW_RDY; + vgpu_vreg_t(vgpu, PCH_GMBUS2) = GMBUS_HW_RDY; if (!vgpu->display.i2c_edid.edid_available) - vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_SATOER; + vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_SATOER; vgpu->display.i2c_edid.gmbus.phase = GMBUS_IDLE_PHASE; } @@ -123,16 +123,16 @@ static int gmbus0_mmio_write(struct intel_vgpu *vgpu, vgpu->display.i2c_edid.state = I2C_GMBUS; vgpu->display.i2c_edid.gmbus.phase = GMBUS_IDLE_PHASE; - vgpu_vreg(vgpu, PCH_GMBUS2) &= ~GMBUS_ACTIVE; - vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_HW_RDY | GMBUS_HW_WAIT_PHASE; + vgpu_vreg_t(vgpu, PCH_GMBUS2) &= ~GMBUS_ACTIVE; + vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_HW_RDY | GMBUS_HW_WAIT_PHASE; if (intel_vgpu_has_monitor_on_port(vgpu, port) && !intel_vgpu_port_is_dp(vgpu, port)) { vgpu->display.i2c_edid.port = port; vgpu->display.i2c_edid.edid_available = true; - vgpu_vreg(vgpu, PCH_GMBUS2) &= ~GMBUS_SATOER; + vgpu_vreg_t(vgpu, PCH_GMBUS2) &= ~GMBUS_SATOER; } else - vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_SATOER; + vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_SATOER; return 0; } @@ -159,8 +159,8 @@ static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, * 2) HW_RDY bit asserted */ if (wvalue & GMBUS_SW_CLR_INT) { - vgpu_vreg(vgpu, PCH_GMBUS2) &= ~GMBUS_INT; - vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_HW_RDY; + vgpu_vreg_t(vgpu, PCH_GMBUS2) &= ~GMBUS_INT; + vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_HW_RDY; } /* For virtualization, we suppose that HW is always ready, @@ -208,7 +208,7 @@ static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, * visible in gmbus interface) */ i2c_edid->gmbus.phase = GMBUS_IDLE_PHASE; - vgpu_vreg(vgpu, PCH_GMBUS2) &= ~GMBUS_ACTIVE; + vgpu_vreg_t(vgpu, PCH_GMBUS2) &= ~GMBUS_ACTIVE; } break; case NIDX_NS_W: @@ -220,7 +220,7 @@ static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, * START (-->INDEX) -->DATA */ i2c_edid->gmbus.phase = GMBUS_DATA_PHASE; - vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_ACTIVE; + vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_ACTIVE; break; default: gvt_vgpu_err("Unknown/reserved GMBUS cycle detected!\n"); @@ -256,7 +256,7 @@ static int gmbus3_mmio_read(struct intel_vgpu *vgpu, unsigned int offset, u32 reg_data = 0; /* Data can only be recevied if previous settings correct */ - if (vgpu_vreg(vgpu, PCH_GMBUS1) & GMBUS_SLAVE_READ) { + if (vgpu_vreg_t(vgpu, PCH_GMBUS1) & GMBUS_SLAVE_READ) { if (byte_left <= 0) { memcpy(p_data, &vgpu_vreg(vgpu, offset), bytes); return 0; diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c index 6cc99543693f..6b50fe78dc1b 100644 --- a/drivers/gpu/drm/i915/gvt/fb_decoder.c +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c @@ -147,7 +147,7 @@ static u32 intel_vgpu_get_stride(struct intel_vgpu *vgpu, int pipe, { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - u32 stride_reg = vgpu_vreg(vgpu, DSPSTRIDE(pipe)) & stride_mask; + u32 stride_reg = vgpu_vreg_t(vgpu, DSPSTRIDE(pipe)) & stride_mask; u32 stride = stride_reg; if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { @@ -209,7 +209,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, if (pipe >= I915_MAX_PIPES) return -ENODEV; - val = vgpu_vreg(vgpu, DSPCNTR(pipe)); + val = vgpu_vreg_t(vgpu, DSPCNTR(pipe)); plane->enabled = !!(val & DISPLAY_PLANE_ENABLE); if (!plane->enabled) return -ENODEV; @@ -244,7 +244,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, plane->hw_format = fmt; - plane->base = vgpu_vreg(vgpu, DSPSURF(pipe)) & I915_GTT_PAGE_MASK; + plane->base = vgpu_vreg_t(vgpu, DSPSURF(pipe)) & I915_GTT_PAGE_MASK; if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) { gvt_vgpu_err("invalid gma address: %lx\n", (unsigned long)plane->base); @@ -263,14 +263,14 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, (_PRI_PLANE_STRIDE_MASK >> 6) : _PRI_PLANE_STRIDE_MASK, plane->bpp); - plane->width = (vgpu_vreg(vgpu, PIPESRC(pipe)) & _PIPE_H_SRCSZ_MASK) >> + plane->width = (vgpu_vreg_t(vgpu, PIPESRC(pipe)) & _PIPE_H_SRCSZ_MASK) >> _PIPE_H_SRCSZ_SHIFT; plane->width += 1; - plane->height = (vgpu_vreg(vgpu, PIPESRC(pipe)) & + plane->height = (vgpu_vreg_t(vgpu, PIPESRC(pipe)) & _PIPE_V_SRCSZ_MASK) >> _PIPE_V_SRCSZ_SHIFT; plane->height += 1; /* raw height is one minus the real value */ - val = vgpu_vreg(vgpu, DSPTILEOFF(pipe)); + val = vgpu_vreg_t(vgpu, DSPTILEOFF(pipe)); plane->x_offset = (val & _PRI_PLANE_X_OFF_MASK) >> _PRI_PLANE_X_OFF_SHIFT; plane->y_offset = (val & _PRI_PLANE_Y_OFF_MASK) >> @@ -344,7 +344,7 @@ int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu, if (pipe >= I915_MAX_PIPES) return -ENODEV; - val = vgpu_vreg(vgpu, CURCNTR(pipe)); + val = vgpu_vreg_t(vgpu, CURCNTR(pipe)); mode = val & CURSOR_MODE; plane->enabled = (mode != CURSOR_MODE_DISABLE); if (!plane->enabled) @@ -370,7 +370,7 @@ int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu, gvt_dbg_core("alpha_plane=0x%x, alpha_force=0x%x\n", alpha_plane, alpha_force); - plane->base = vgpu_vreg(vgpu, CURBASE(pipe)) & I915_GTT_PAGE_MASK; + plane->base = vgpu_vreg_t(vgpu, CURBASE(pipe)) & I915_GTT_PAGE_MASK; if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) { gvt_vgpu_err("invalid gma address: %lx\n", (unsigned long)plane->base); @@ -384,7 +384,7 @@ int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu, return -EINVAL; } - val = vgpu_vreg(vgpu, CURPOS(pipe)); + val = vgpu_vreg_t(vgpu, CURPOS(pipe)); plane->x_pos = (val & _CURSOR_POS_X_MASK) >> _CURSOR_POS_X_SHIFT; plane->x_sign = (val & _CURSOR_SIGN_X_MASK) >> _CURSOR_SIGN_X_SHIFT; plane->y_pos = (val & _CURSOR_POS_Y_MASK) >> _CURSOR_POS_Y_SHIFT; @@ -424,7 +424,7 @@ int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu, if (pipe >= I915_MAX_PIPES) return -ENODEV; - val = vgpu_vreg(vgpu, SPRCTL(pipe)); + val = vgpu_vreg_t(vgpu, SPRCTL(pipe)); plane->enabled = !!(val & SPRITE_ENABLE); if (!plane->enabled) return -ENODEV; @@ -475,7 +475,7 @@ int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu, plane->drm_format = drm_format; - plane->base = vgpu_vreg(vgpu, SPRSURF(pipe)) & I915_GTT_PAGE_MASK; + plane->base = vgpu_vreg_t(vgpu, SPRSURF(pipe)) & I915_GTT_PAGE_MASK; if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) { gvt_vgpu_err("invalid gma address: %lx\n", (unsigned long)plane->base); @@ -489,10 +489,10 @@ int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu, return -EINVAL; } - plane->stride = vgpu_vreg(vgpu, SPRSTRIDE(pipe)) & + plane->stride = vgpu_vreg_t(vgpu, SPRSTRIDE(pipe)) & _SPRITE_STRIDE_MASK; - val = vgpu_vreg(vgpu, SPRSIZE(pipe)); + val = vgpu_vreg_t(vgpu, SPRSIZE(pipe)); plane->height = (val & _SPRITE_SIZE_HEIGHT_MASK) >> _SPRITE_SIZE_HEIGHT_SHIFT; plane->width = (val & _SPRITE_SIZE_WIDTH_MASK) >> @@ -500,11 +500,11 @@ int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu, plane->height += 1; /* raw height is one minus the real value */ plane->width += 1; /* raw width is one minus the real value */ - val = vgpu_vreg(vgpu, SPRPOS(pipe)); + val = vgpu_vreg_t(vgpu, SPRPOS(pipe)); plane->x_pos = (val & _SPRITE_POS_X_MASK) >> _SPRITE_POS_X_SHIFT; plane->y_pos = (val & _SPRITE_POS_Y_MASK) >> _SPRITE_POS_Y_SHIFT; - val = vgpu_vreg(vgpu, SPROFFSET(pipe)); + val = vgpu_vreg_t(vgpu, SPROFFSET(pipe)); plane->x_offset = (val & _SPRITE_OFFSET_START_X_MASK) >> _SPRITE_OFFSET_START_X_SHIFT; plane->y_offset = (val & _SPRITE_OFFSET_START_Y_MASK) >> diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 71a0f2b87b3a..c4f752eeadcc 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1968,6 +1968,39 @@ int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, return ret; } +int intel_vgpu_write_protect_handler(struct intel_vgpu *vgpu, u64 pa, + void *p_data, unsigned int bytes) +{ + struct intel_gvt *gvt = vgpu->gvt; + int ret = 0; + + if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) { + struct intel_vgpu_page_track *t; + + mutex_lock(&gvt->lock); + + t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT); + if (t) { + if (unlikely(vgpu->failsafe)) { + /* remove write protection to prevent furture traps */ + intel_vgpu_clean_page_track(vgpu, t); + } else { + ret = t->handler(t, pa, p_data, bytes); + if (ret) { + gvt_err("guest page write error %d, " + "gfn 0x%lx, pa 0x%llx, " + "var 0x%x, len %d\n", + ret, t->gfn, pa, + *(u32 *)p_data, bytes); + } + } + } + mutex_unlock(&gvt->lock); + } + return ret; +} + + static int alloc_scratch_pages(struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type) { @@ -2244,7 +2277,7 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu, int page_table_level) { - u64 *pdp = (u64 *)&vgpu_vreg64(vgpu, vgtif_reg(pdp[0])); + u64 *pdp = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0])); struct intel_vgpu_mm *mm; if (WARN_ON((page_table_level != 4) && (page_table_level != 3))) @@ -2279,7 +2312,7 @@ int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu, int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu, int page_table_level) { - u64 *pdp = (u64 *)&vgpu_vreg64(vgpu, vgtif_reg(pdp[0])); + u64 *pdp = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0])); struct intel_vgpu_mm *mm; if (WARN_ON((page_table_level != 4) && (page_table_level != 3))) diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index f98c1c19b4cb..4cc13b5934f1 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -308,4 +308,7 @@ int intel_vgpu_emulate_gtt_mmio_read(struct intel_vgpu *vgpu, int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes); +int intel_vgpu_write_protect_handler(struct intel_vgpu *vgpu, u64 pa, + void *p_data, unsigned int bytes); + #endif /* _GVT_GTT_H_ */ diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 643bb961d40d..fac54f32d33f 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -183,6 +183,7 @@ static const struct intel_gvt_ops intel_gvt_ops = { .get_gvt_attrs = intel_get_gvt_attrs, .vgpu_query_plane = intel_vgpu_query_plane, .vgpu_get_dmabuf = intel_vgpu_get_dmabuf, + .write_protect_handler = intel_vgpu_write_protect_handler, }; /** diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 1e9f11c8b7bb..7dc7a80213a8 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -412,23 +412,20 @@ void intel_vgpu_free_resource(struct intel_vgpu *vgpu); void intel_vgpu_write_fence(struct intel_vgpu *vgpu, u32 fence, u64 value); -/* Macros for easily accessing vGPU virtual/shadow register */ -#define vgpu_vreg(vgpu, reg) \ - (*(u32 *)(vgpu->mmio.vreg + INTEL_GVT_MMIO_OFFSET(reg))) -#define vgpu_vreg8(vgpu, reg) \ - (*(u8 *)(vgpu->mmio.vreg + INTEL_GVT_MMIO_OFFSET(reg))) -#define vgpu_vreg16(vgpu, reg) \ - (*(u16 *)(vgpu->mmio.vreg + INTEL_GVT_MMIO_OFFSET(reg))) -#define vgpu_vreg64(vgpu, reg) \ - (*(u64 *)(vgpu->mmio.vreg + INTEL_GVT_MMIO_OFFSET(reg))) -#define vgpu_sreg(vgpu, reg) \ - (*(u32 *)(vgpu->mmio.sreg + INTEL_GVT_MMIO_OFFSET(reg))) -#define vgpu_sreg8(vgpu, reg) \ - (*(u8 *)(vgpu->mmio.sreg + INTEL_GVT_MMIO_OFFSET(reg))) -#define vgpu_sreg16(vgpu, reg) \ - (*(u16 *)(vgpu->mmio.sreg + INTEL_GVT_MMIO_OFFSET(reg))) -#define vgpu_sreg64(vgpu, reg) \ - (*(u64 *)(vgpu->mmio.sreg + INTEL_GVT_MMIO_OFFSET(reg))) +/* Macros for easily accessing vGPU virtual/shadow register. + Explicitly seperate use for typed MMIO reg or real offset.*/ +#define vgpu_vreg_t(vgpu, reg) \ + (*(u32 *)(vgpu->mmio.vreg + i915_mmio_reg_offset(reg))) +#define vgpu_vreg(vgpu, offset) \ + (*(u32 *)(vgpu->mmio.vreg + (offset))) +#define vgpu_vreg64_t(vgpu, reg) \ + (*(u64 *)(vgpu->mmio.vreg + i915_mmio_reg_offset(reg))) +#define vgpu_vreg64(vgpu, offset) \ + (*(u64 *)(vgpu->mmio.vreg + (offset))) +#define vgpu_sreg_t(vgpu, reg) \ + (*(u32 *)(vgpu->mmio.sreg + i915_mmio_reg_offset(reg))) +#define vgpu_sreg(vgpu, offset) \ + (*(u32 *)(vgpu->mmio.sreg + (offset))) #define for_each_active_vgpu(gvt, vgpu, id) \ idr_for_each_entry((&(gvt)->vgpu_idr), (vgpu), (id)) \ @@ -549,6 +546,8 @@ struct intel_gvt_ops { struct attribute_group ***intel_vgpu_type_groups); int (*vgpu_query_plane)(struct intel_vgpu *vgpu, void *); int (*vgpu_get_dmabuf)(struct intel_vgpu *vgpu, unsigned int); + int (*write_protect_handler)(struct intel_vgpu *, u64, void *, + unsigned int); }; diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index c982867e7c2b..92d6468daeee 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -343,13 +343,13 @@ static int pch_pp_control_mmio_write(struct intel_vgpu *vgpu, write_vreg(vgpu, offset, p_data, bytes); if (vgpu_vreg(vgpu, offset) & PANEL_POWER_ON) { - vgpu_vreg(vgpu, PCH_PP_STATUS) |= PP_ON; - vgpu_vreg(vgpu, PCH_PP_STATUS) |= PP_SEQUENCE_STATE_ON_IDLE; - vgpu_vreg(vgpu, PCH_PP_STATUS) &= ~PP_SEQUENCE_POWER_DOWN; - vgpu_vreg(vgpu, PCH_PP_STATUS) &= ~PP_CYCLE_DELAY_ACTIVE; + vgpu_vreg_t(vgpu, PCH_PP_STATUS) |= PP_ON; + vgpu_vreg_t(vgpu, PCH_PP_STATUS) |= PP_SEQUENCE_STATE_ON_IDLE; + vgpu_vreg_t(vgpu, PCH_PP_STATUS) &= ~PP_SEQUENCE_POWER_DOWN; + vgpu_vreg_t(vgpu, PCH_PP_STATUS) &= ~PP_CYCLE_DELAY_ACTIVE; } else - vgpu_vreg(vgpu, PCH_PP_STATUS) &= + vgpu_vreg_t(vgpu, PCH_PP_STATUS) &= ~(PP_ON | PP_SEQUENCE_POWER_DOWN | PP_CYCLE_DELAY_ACTIVE); return 0; @@ -503,7 +503,7 @@ static int ddi_buf_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, } else { vgpu_vreg(vgpu, offset) |= DDI_BUF_IS_IDLE; if (offset == i915_mmio_reg_offset(DDI_BUF_CTL(PORT_E))) - vgpu_vreg(vgpu, DP_TP_STATUS(PORT_E)) + vgpu_vreg_t(vgpu, DP_TP_STATUS(PORT_E)) &= ~DP_TP_STATUS_AUTOTRAIN_DONE; } return 0; @@ -521,9 +521,9 @@ static int fdi_rx_iir_mmio_write(struct intel_vgpu *vgpu, static int fdi_auto_training_started(struct intel_vgpu *vgpu) { - u32 ddi_buf_ctl = vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_E)); + u32 ddi_buf_ctl = vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_E)); u32 rx_ctl = vgpu_vreg(vgpu, _FDI_RXA_CTL); - u32 tx_ctl = vgpu_vreg(vgpu, DP_TP_CTL(PORT_E)); + u32 tx_ctl = vgpu_vreg_t(vgpu, DP_TP_CTL(PORT_E)); if ((ddi_buf_ctl & DDI_BUF_CTL_ENABLE) && (rx_ctl & FDI_RX_ENABLE) && @@ -564,12 +564,12 @@ static int check_fdi_rx_train_status(struct intel_vgpu *vgpu, fdi_tx_check_bits = FDI_TX_ENABLE | fdi_tx_train_bits; /* If imr bit has been masked */ - if (vgpu_vreg(vgpu, fdi_rx_imr) & fdi_iir_check_bits) + if (vgpu_vreg_t(vgpu, fdi_rx_imr) & fdi_iir_check_bits) return 0; - if (((vgpu_vreg(vgpu, fdi_tx_ctl) & fdi_tx_check_bits) + if (((vgpu_vreg_t(vgpu, fdi_tx_ctl) & fdi_tx_check_bits) == fdi_tx_check_bits) - && ((vgpu_vreg(vgpu, fdi_rx_ctl) & fdi_rx_check_bits) + && ((vgpu_vreg_t(vgpu, fdi_rx_ctl) & fdi_rx_check_bits) == fdi_rx_check_bits)) return 1; else @@ -626,17 +626,17 @@ static int update_fdi_rx_iir_status(struct intel_vgpu *vgpu, if (ret < 0) return ret; if (ret) - vgpu_vreg(vgpu, fdi_rx_iir) |= FDI_RX_BIT_LOCK; + vgpu_vreg_t(vgpu, fdi_rx_iir) |= FDI_RX_BIT_LOCK; ret = check_fdi_rx_train_status(vgpu, index, FDI_LINK_TRAIN_PATTERN2); if (ret < 0) return ret; if (ret) - vgpu_vreg(vgpu, fdi_rx_iir) |= FDI_RX_SYMBOL_LOCK; + vgpu_vreg_t(vgpu, fdi_rx_iir) |= FDI_RX_SYMBOL_LOCK; if (offset == _FDI_RXA_CTL) if (fdi_auto_training_started(vgpu)) - vgpu_vreg(vgpu, DP_TP_STATUS(PORT_E)) |= + vgpu_vreg_t(vgpu, DP_TP_STATUS(PORT_E)) |= DP_TP_STATUS_AUTOTRAIN_DONE; return 0; } @@ -657,7 +657,7 @@ static int dp_tp_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, data = (vgpu_vreg(vgpu, offset) & GENMASK(10, 8)) >> 8; if (data == 0x2) { status_reg = DP_TP_STATUS(index); - vgpu_vreg(vgpu, status_reg) |= (1 << 25); + vgpu_vreg_t(vgpu, status_reg) |= (1 << 25); } return 0; } @@ -721,7 +721,7 @@ static int pri_surf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, }; write_vreg(vgpu, offset, p_data, bytes); - vgpu_vreg(vgpu, surflive_reg) = vgpu_vreg(vgpu, offset); + vgpu_vreg_t(vgpu, surflive_reg) = vgpu_vreg(vgpu, offset); set_bit(flip_event[index], vgpu->irq.flip_done_event[index]); return 0; @@ -742,7 +742,7 @@ static int spr_surf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, }; write_vreg(vgpu, offset, p_data, bytes); - vgpu_vreg(vgpu, surflive_reg) = vgpu_vreg(vgpu, offset); + vgpu_vreg_t(vgpu, surflive_reg) = vgpu_vreg(vgpu, offset); set_bit(flip_event[index], vgpu->irq.flip_done_event[index]); return 0; @@ -1064,9 +1064,9 @@ static void write_virtual_sbi_register(struct intel_vgpu *vgpu, static int sbi_data_mmio_read(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { - if (((vgpu_vreg(vgpu, SBI_CTL_STAT) & SBI_OPCODE_MASK) >> + if (((vgpu_vreg_t(vgpu, SBI_CTL_STAT) & SBI_OPCODE_MASK) >> SBI_OPCODE_SHIFT) == SBI_CMD_CRRD) { - unsigned int sbi_offset = (vgpu_vreg(vgpu, SBI_ADDR) & + unsigned int sbi_offset = (vgpu_vreg_t(vgpu, SBI_ADDR) & SBI_ADDR_OFFSET_MASK) >> SBI_ADDR_OFFSET_SHIFT; vgpu_vreg(vgpu, offset) = read_virtual_sbi_register(vgpu, sbi_offset); @@ -1091,13 +1091,13 @@ static int sbi_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, vgpu_vreg(vgpu, offset) = data; - if (((vgpu_vreg(vgpu, SBI_CTL_STAT) & SBI_OPCODE_MASK) >> + if (((vgpu_vreg_t(vgpu, SBI_CTL_STAT) & SBI_OPCODE_MASK) >> SBI_OPCODE_SHIFT) == SBI_CMD_CRWR) { - unsigned int sbi_offset = (vgpu_vreg(vgpu, SBI_ADDR) & + unsigned int sbi_offset = (vgpu_vreg_t(vgpu, SBI_ADDR) & SBI_ADDR_OFFSET_MASK) >> SBI_ADDR_OFFSET_SHIFT; write_virtual_sbi_register(vgpu, sbi_offset, - vgpu_vreg(vgpu, SBI_DATA)); + vgpu_vreg_t(vgpu, SBI_DATA)); } return 0; } @@ -1343,7 +1343,7 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, { u32 value = *(u32 *)p_data; u32 cmd = value & 0xff; - u32 *data0 = &vgpu_vreg(vgpu, GEN6_PCODE_DATA); + u32 *data0 = &vgpu_vreg_t(vgpu, GEN6_PCODE_DATA); switch (cmd) { case GEN9_PCODE_READ_MEM_LATENCY: @@ -1586,7 +1586,7 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu, } #define MMIO_F(reg, s, f, am, rm, d, r, w) do { \ - ret = new_mmio_info(gvt, INTEL_GVT_MMIO_OFFSET(reg), \ + ret = new_mmio_info(gvt, i915_mmio_reg_offset(reg), \ f, s, am, rm, d, r, w); \ if (ret) \ return ret; \ @@ -1654,22 +1654,22 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_GM_RDR(BLT_HWS_PGA_GEN7, D_ALL, NULL, NULL); MMIO_GM_RDR(VEBOX_HWS_PGA_GEN7, D_ALL, NULL, NULL); -#define RING_REG(base) (base + 0x28) +#define RING_REG(base) _MMIO((base) + 0x28) MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL); #undef RING_REG -#define RING_REG(base) (base + 0x134) +#define RING_REG(base) _MMIO((base) + 0x134) MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL); #undef RING_REG -#define RING_REG(base) (base + 0x6c) +#define RING_REG(base) _MMIO((base) + 0x6c) MMIO_RING_DFH(RING_REG, D_ALL, 0, mmio_read_from_hw, NULL); #undef RING_REG MMIO_DH(GEN7_SC_INSTDONE, D_BDW_PLUS, mmio_read_from_hw, NULL); - MMIO_GM_RDR(0x2148, D_ALL, NULL, NULL); + MMIO_GM_RDR(_MMIO(0x2148), D_ALL, NULL, NULL); MMIO_GM_RDR(CCID, D_ALL, NULL, NULL); - MMIO_GM_RDR(0x12198, D_ALL, NULL, NULL); + MMIO_GM_RDR(_MMIO(0x12198), D_ALL, NULL, NULL); MMIO_D(GEN7_CXT_SIZE, D_ALL); MMIO_RING_DFH(RING_TAIL, D_ALL, F_CMD_ACCESS, NULL, NULL); @@ -1679,7 +1679,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_RING_GM_RDR(RING_START, D_ALL, NULL, NULL); /* RING MODE */ -#define RING_REG(base) (base + 0x29c) +#define RING_REG(base) _MMIO((base) + 0x29c) MMIO_RING_DFH(RING_REG, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, ring_mode_mmio_write); #undef RING_REG @@ -1698,37 +1698,37 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) NULL, NULL); MMIO_DFH(CACHE_MODE_1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(CACHE_MODE_0, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2124, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2124), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x20dc), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2088, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x20e4, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2470, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2088), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x20e4), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2470), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x9030, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x20a0, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2420, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2430, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2434, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2438, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x243c, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x7018, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x9030), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x20a0), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2420), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2430), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2434), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2438), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x243c), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x7018), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); /* display */ - MMIO_F(0x60220, 0x20, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_D(0x602a0, D_ALL); + MMIO_F(_MMIO(0x60220), 0x20, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_D(_MMIO(0x602a0), D_ALL); - MMIO_D(0x65050, D_ALL); - MMIO_D(0x650b4, D_ALL); + MMIO_D(_MMIO(0x65050), D_ALL); + MMIO_D(_MMIO(0x650b4), D_ALL); - MMIO_D(0xc4040, D_ALL); + MMIO_D(_MMIO(0xc4040), D_ALL); MMIO_D(DERRMR, D_ALL); MMIO_D(PIPEDSL(PIPE_A), D_ALL); @@ -1768,14 +1768,14 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(CURBASE(PIPE_B), D_ALL); MMIO_D(CURBASE(PIPE_C), D_ALL); - MMIO_D(0x700ac, D_ALL); - MMIO_D(0x710ac, D_ALL); - MMIO_D(0x720ac, D_ALL); + MMIO_D(_MMIO(0x700ac), D_ALL); + MMIO_D(_MMIO(0x710ac), D_ALL); + MMIO_D(_MMIO(0x720ac), D_ALL); - MMIO_D(0x70090, D_ALL); - MMIO_D(0x70094, D_ALL); - MMIO_D(0x70098, D_ALL); - MMIO_D(0x7009c, D_ALL); + MMIO_D(_MMIO(0x70090), D_ALL); + MMIO_D(_MMIO(0x70094), D_ALL); + MMIO_D(_MMIO(0x70098), D_ALL); + MMIO_D(_MMIO(0x7009c), D_ALL); MMIO_D(DSPCNTR(PIPE_A), D_ALL); MMIO_D(DSPADDR(PIPE_A), D_ALL); @@ -1951,24 +1951,24 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(BLC_PWM_PCH_CTL1, D_ALL); MMIO_D(BLC_PWM_PCH_CTL2, D_ALL); - MMIO_D(0x48268, D_ALL); + MMIO_D(_MMIO(0x48268), D_ALL); MMIO_F(PCH_GMBUS0, 4 * 4, 0, 0, 0, D_ALL, gmbus_mmio_read, gmbus_mmio_write); MMIO_F(PCH_GPIOA, 6 * 4, F_UNALIGN, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0xe4f00, 0x28, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0xe4f00), 0x28, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(_PCH_DPB_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_PRE_SKL, NULL, + MMIO_F(_MMIO(_PCH_DPB_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_PRE_SKL, NULL, dp_aux_ch_ctl_mmio_write); - MMIO_F(_PCH_DPC_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_PRE_SKL, NULL, + MMIO_F(_MMIO(_PCH_DPC_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_PRE_SKL, NULL, dp_aux_ch_ctl_mmio_write); - MMIO_F(_PCH_DPD_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_PRE_SKL, NULL, + MMIO_F(_MMIO(_PCH_DPD_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_PRE_SKL, NULL, dp_aux_ch_ctl_mmio_write); MMIO_DH(PCH_ADPA, D_PRE_SKL, NULL, pch_adpa_mmio_write); - MMIO_DH(_PCH_TRANSACONF, D_ALL, NULL, transconf_mmio_write); - MMIO_DH(_PCH_TRANSBCONF, D_ALL, NULL, transconf_mmio_write); + MMIO_DH(_MMIO(_PCH_TRANSACONF), D_ALL, NULL, transconf_mmio_write); + MMIO_DH(_MMIO(_PCH_TRANSBCONF), D_ALL, NULL, transconf_mmio_write); MMIO_DH(FDI_RX_IIR(PIPE_A), D_ALL, NULL, fdi_rx_iir_mmio_write); MMIO_DH(FDI_RX_IIR(PIPE_B), D_ALL, NULL, fdi_rx_iir_mmio_write); @@ -1980,30 +1980,30 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DH(FDI_RX_CTL(PIPE_B), D_ALL, NULL, update_fdi_rx_iir_status); MMIO_DH(FDI_RX_CTL(PIPE_C), D_ALL, NULL, update_fdi_rx_iir_status); - MMIO_D(_PCH_TRANS_HTOTAL_A, D_ALL); - MMIO_D(_PCH_TRANS_HBLANK_A, D_ALL); - MMIO_D(_PCH_TRANS_HSYNC_A, D_ALL); - MMIO_D(_PCH_TRANS_VTOTAL_A, D_ALL); - MMIO_D(_PCH_TRANS_VBLANK_A, D_ALL); - MMIO_D(_PCH_TRANS_VSYNC_A, D_ALL); - MMIO_D(_PCH_TRANS_VSYNCSHIFT_A, D_ALL); - - MMIO_D(_PCH_TRANS_HTOTAL_B, D_ALL); - MMIO_D(_PCH_TRANS_HBLANK_B, D_ALL); - MMIO_D(_PCH_TRANS_HSYNC_B, D_ALL); - MMIO_D(_PCH_TRANS_VTOTAL_B, D_ALL); - MMIO_D(_PCH_TRANS_VBLANK_B, D_ALL); - MMIO_D(_PCH_TRANS_VSYNC_B, D_ALL); - MMIO_D(_PCH_TRANS_VSYNCSHIFT_B, D_ALL); - - MMIO_D(_PCH_TRANSA_DATA_M1, D_ALL); - MMIO_D(_PCH_TRANSA_DATA_N1, D_ALL); - MMIO_D(_PCH_TRANSA_DATA_M2, D_ALL); - MMIO_D(_PCH_TRANSA_DATA_N2, D_ALL); - MMIO_D(_PCH_TRANSA_LINK_M1, D_ALL); - MMIO_D(_PCH_TRANSA_LINK_N1, D_ALL); - MMIO_D(_PCH_TRANSA_LINK_M2, D_ALL); - MMIO_D(_PCH_TRANSA_LINK_N2, D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_HTOTAL_A), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_HBLANK_A), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_HSYNC_A), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_VTOTAL_A), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_VBLANK_A), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_VSYNC_A), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_VSYNCSHIFT_A), D_ALL); + + MMIO_D(_MMIO(_PCH_TRANS_HTOTAL_B), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_HBLANK_B), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_HSYNC_B), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_VTOTAL_B), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_VBLANK_B), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_VSYNC_B), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_VSYNCSHIFT_B), D_ALL); + + MMIO_D(_MMIO(_PCH_TRANSA_DATA_M1), D_ALL); + MMIO_D(_MMIO(_PCH_TRANSA_DATA_N1), D_ALL); + MMIO_D(_MMIO(_PCH_TRANSA_DATA_M2), D_ALL); + MMIO_D(_MMIO(_PCH_TRANSA_DATA_N2), D_ALL); + MMIO_D(_MMIO(_PCH_TRANSA_LINK_M1), D_ALL); + MMIO_D(_MMIO(_PCH_TRANSA_LINK_N1), D_ALL); + MMIO_D(_MMIO(_PCH_TRANSA_LINK_M2), D_ALL); + MMIO_D(_MMIO(_PCH_TRANSA_LINK_N2), D_ALL); MMIO_D(TRANS_DP_CTL(PIPE_A), D_ALL); MMIO_D(TRANS_DP_CTL(PIPE_B), D_ALL); @@ -2021,38 +2021,38 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(TVIDEO_DIP_DATA(PIPE_C), D_ALL); MMIO_D(TVIDEO_DIP_GCP(PIPE_C), D_ALL); - MMIO_D(_FDI_RXA_MISC, D_ALL); - MMIO_D(_FDI_RXB_MISC, D_ALL); - MMIO_D(_FDI_RXA_TUSIZE1, D_ALL); - MMIO_D(_FDI_RXA_TUSIZE2, D_ALL); - MMIO_D(_FDI_RXB_TUSIZE1, D_ALL); - MMIO_D(_FDI_RXB_TUSIZE2, D_ALL); + MMIO_D(_MMIO(_FDI_RXA_MISC), D_ALL); + MMIO_D(_MMIO(_FDI_RXB_MISC), D_ALL); + MMIO_D(_MMIO(_FDI_RXA_TUSIZE1), D_ALL); + MMIO_D(_MMIO(_FDI_RXA_TUSIZE2), D_ALL); + MMIO_D(_MMIO(_FDI_RXB_TUSIZE1), D_ALL); + MMIO_D(_MMIO(_FDI_RXB_TUSIZE2), D_ALL); MMIO_DH(PCH_PP_CONTROL, D_ALL, NULL, pch_pp_control_mmio_write); MMIO_D(PCH_PP_DIVISOR, D_ALL); MMIO_D(PCH_PP_STATUS, D_ALL); MMIO_D(PCH_LVDS, D_ALL); - MMIO_D(_PCH_DPLL_A, D_ALL); - MMIO_D(_PCH_DPLL_B, D_ALL); - MMIO_D(_PCH_FPA0, D_ALL); - MMIO_D(_PCH_FPA1, D_ALL); - MMIO_D(_PCH_FPB0, D_ALL); - MMIO_D(_PCH_FPB1, D_ALL); + MMIO_D(_MMIO(_PCH_DPLL_A), D_ALL); + MMIO_D(_MMIO(_PCH_DPLL_B), D_ALL); + MMIO_D(_MMIO(_PCH_FPA0), D_ALL); + MMIO_D(_MMIO(_PCH_FPA1), D_ALL); + MMIO_D(_MMIO(_PCH_FPB0), D_ALL); + MMIO_D(_MMIO(_PCH_FPB1), D_ALL); MMIO_D(PCH_DREF_CONTROL, D_ALL); MMIO_D(PCH_RAWCLK_FREQ, D_ALL); MMIO_D(PCH_DPLL_SEL, D_ALL); - MMIO_D(0x61208, D_ALL); - MMIO_D(0x6120c, D_ALL); + MMIO_D(_MMIO(0x61208), D_ALL); + MMIO_D(_MMIO(0x6120c), D_ALL); MMIO_D(PCH_PP_ON_DELAYS, D_ALL); MMIO_D(PCH_PP_OFF_DELAYS, D_ALL); - MMIO_DH(0xe651c, D_ALL, dpy_reg_mmio_read, NULL); - MMIO_DH(0xe661c, D_ALL, dpy_reg_mmio_read, NULL); - MMIO_DH(0xe671c, D_ALL, dpy_reg_mmio_read, NULL); - MMIO_DH(0xe681c, D_ALL, dpy_reg_mmio_read, NULL); - MMIO_DH(0xe6c04, D_ALL, dpy_reg_mmio_read, NULL); - MMIO_DH(0xe6e1c, D_ALL, dpy_reg_mmio_read, NULL); + MMIO_DH(_MMIO(0xe651c), D_ALL, dpy_reg_mmio_read, NULL); + MMIO_DH(_MMIO(0xe661c), D_ALL, dpy_reg_mmio_read, NULL); + MMIO_DH(_MMIO(0xe671c), D_ALL, dpy_reg_mmio_read, NULL); + MMIO_DH(_MMIO(0xe681c), D_ALL, dpy_reg_mmio_read, NULL); + MMIO_DH(_MMIO(0xe6c04), D_ALL, dpy_reg_mmio_read, NULL); + MMIO_DH(_MMIO(0xe6e1c), D_ALL, dpy_reg_mmio_read, NULL); MMIO_RO(PCH_PORT_HOTPLUG, D_ALL, 0, PORTA_HOTPLUG_STATUS_MASK @@ -2074,11 +2074,11 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(SOUTH_CHICKEN1, D_ALL); MMIO_DH(SOUTH_CHICKEN2, D_ALL, NULL, south_chicken2_mmio_write); - MMIO_D(_TRANSA_CHICKEN1, D_ALL); - MMIO_D(_TRANSB_CHICKEN1, D_ALL); + MMIO_D(_MMIO(_TRANSA_CHICKEN1), D_ALL); + MMIO_D(_MMIO(_TRANSB_CHICKEN1), D_ALL); MMIO_D(SOUTH_DSPCLK_GATE_D, D_ALL); - MMIO_D(_TRANSA_CHICKEN2, D_ALL); - MMIO_D(_TRANSB_CHICKEN2, D_ALL); + MMIO_D(_MMIO(_TRANSA_CHICKEN2), D_ALL); + MMIO_D(_MMIO(_TRANSB_CHICKEN2), D_ALL); MMIO_D(ILK_DPFC_CB_BASE, D_ALL); MMIO_D(ILK_DPFC_CONTROL, D_ALL); @@ -2144,24 +2144,24 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(PREC_PAL_DATA(PIPE_C), D_ALL); MMIO_F(PREC_PAL_GC_MAX(PIPE_C, 0), 4 * 3, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_D(0x60110, D_ALL); - MMIO_D(0x61110, D_ALL); - MMIO_F(0x70400, 0x40, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x71400, 0x40, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x72400, 0x40, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x70440, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); - MMIO_F(0x71440, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); - MMIO_F(0x72440, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); - MMIO_F(0x7044c, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); - MMIO_F(0x7144c, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); - MMIO_F(0x7244c, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); + MMIO_D(_MMIO(0x60110), D_ALL); + MMIO_D(_MMIO(0x61110), D_ALL); + MMIO_F(_MMIO(0x70400), 0x40, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x71400), 0x40, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x72400), 0x40, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x70440), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); + MMIO_F(_MMIO(0x71440), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); + MMIO_F(_MMIO(0x72440), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); + MMIO_F(_MMIO(0x7044c), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); + MMIO_F(_MMIO(0x7144c), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); + MMIO_F(_MMIO(0x7244c), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); MMIO_D(PIPE_WM_LINETIME(PIPE_A), D_ALL); MMIO_D(PIPE_WM_LINETIME(PIPE_B), D_ALL); MMIO_D(PIPE_WM_LINETIME(PIPE_C), D_ALL); MMIO_D(SPLL_CTL, D_ALL); - MMIO_D(_WRPLL_CTL1, D_ALL); - MMIO_D(_WRPLL_CTL2, D_ALL); + MMIO_D(_MMIO(_WRPLL_CTL1), D_ALL); + MMIO_D(_MMIO(_WRPLL_CTL2), D_ALL); MMIO_D(PORT_CLK_SEL(PORT_A), D_ALL); MMIO_D(PORT_CLK_SEL(PORT_B), D_ALL); MMIO_D(PORT_CLK_SEL(PORT_C), D_ALL); @@ -2172,15 +2172,15 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(TRANS_CLK_SEL(TRANSCODER_C), D_ALL); MMIO_D(HSW_NDE_RSTWRN_OPT, D_ALL); - MMIO_D(0x46508, D_ALL); + MMIO_D(_MMIO(0x46508), D_ALL); - MMIO_D(0x49080, D_ALL); - MMIO_D(0x49180, D_ALL); - MMIO_D(0x49280, D_ALL); + MMIO_D(_MMIO(0x49080), D_ALL); + MMIO_D(_MMIO(0x49180), D_ALL); + MMIO_D(_MMIO(0x49280), D_ALL); - MMIO_F(0x49090, 0x14, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x49190, 0x14, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x49290, 0x14, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x49090), 0x14, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x49190), 0x14, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x49290), 0x14, 0, 0, 0, D_ALL, NULL, NULL); MMIO_D(GAMMA_MODE(PIPE_A), D_ALL); MMIO_D(GAMMA_MODE(PIPE_B), D_ALL); @@ -2200,7 +2200,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DH(SBI_CTL_STAT, D_ALL, NULL, sbi_ctl_mmio_write); MMIO_D(PIXCLK_GATE, D_ALL); - MMIO_F(_DPA_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_ALL, NULL, + MMIO_F(_MMIO(_DPA_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_ALL, NULL, dp_aux_ch_ctl_mmio_write); MMIO_DH(DDI_BUF_CTL(PORT_A), D_ALL, NULL, ddi_buf_ctl_mmio_write); @@ -2221,24 +2221,24 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DH(DP_TP_STATUS(PORT_D), D_ALL, NULL, dp_tp_status_mmio_write); MMIO_DH(DP_TP_STATUS(PORT_E), D_ALL, NULL, NULL); - MMIO_F(_DDI_BUF_TRANS_A, 0x50, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x64e60, 0x50, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x64eC0, 0x50, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x64f20, 0x50, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x64f80, 0x50, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(_DDI_BUF_TRANS_A), 0x50, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x64e60), 0x50, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x64eC0), 0x50, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x64f20), 0x50, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x64f80), 0x50, 0, 0, 0, D_ALL, NULL, NULL); MMIO_D(HSW_AUD_CFG(PIPE_A), D_ALL); MMIO_D(HSW_AUD_PIN_ELD_CP_VLD, D_ALL); - MMIO_DH(_TRANS_DDI_FUNC_CTL_A, D_ALL, NULL, NULL); - MMIO_DH(_TRANS_DDI_FUNC_CTL_B, D_ALL, NULL, NULL); - MMIO_DH(_TRANS_DDI_FUNC_CTL_C, D_ALL, NULL, NULL); - MMIO_DH(_TRANS_DDI_FUNC_CTL_EDP, D_ALL, NULL, NULL); + MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_A), D_ALL, NULL, NULL); + MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_B), D_ALL, NULL, NULL); + MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_C), D_ALL, NULL, NULL); + MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_EDP), D_ALL, NULL, NULL); - MMIO_D(_TRANSA_MSA_MISC, D_ALL); - MMIO_D(_TRANSB_MSA_MISC, D_ALL); - MMIO_D(_TRANSC_MSA_MISC, D_ALL); - MMIO_D(_TRANS_EDP_MSA_MISC, D_ALL); + MMIO_D(_MMIO(_TRANSA_MSA_MISC), D_ALL); + MMIO_D(_MMIO(_TRANSB_MSA_MISC), D_ALL); + MMIO_D(_MMIO(_TRANSC_MSA_MISC), D_ALL); + MMIO_D(_MMIO(_TRANS_EDP_MSA_MISC), D_ALL); MMIO_DH(FORCEWAKE, D_ALL, NULL, NULL); MMIO_D(FORCEWAKE_ACK, D_ALL); @@ -2304,101 +2304,101 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN6_UCGCTL1, D_ALL); MMIO_D(GEN6_UCGCTL2, D_ALL); - MMIO_F(0x4f000, 0x90, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x4f000), 0x90, 0, 0, 0, D_ALL, NULL, NULL); MMIO_D(GEN6_PCODE_DATA, D_ALL); - MMIO_D(0x13812c, D_ALL); + MMIO_D(_MMIO(0x13812c), D_ALL); MMIO_DH(GEN7_ERR_INT, D_ALL, NULL, NULL); MMIO_D(HSW_EDRAM_CAP, D_ALL); MMIO_D(HSW_IDICR, D_ALL); MMIO_DH(GFX_FLSH_CNTL_GEN6, D_ALL, NULL, NULL); - MMIO_D(0x3c, D_ALL); - MMIO_D(0x860, D_ALL); + MMIO_D(_MMIO(0x3c), D_ALL); + MMIO_D(_MMIO(0x860), D_ALL); MMIO_D(ECOSKPD, D_ALL); - MMIO_D(0x121d0, D_ALL); + MMIO_D(_MMIO(0x121d0), D_ALL); MMIO_D(GEN6_BLITTER_ECOSKPD, D_ALL); - MMIO_D(0x41d0, D_ALL); + MMIO_D(_MMIO(0x41d0), D_ALL); MMIO_D(GAC_ECO_BITS, D_ALL); - MMIO_D(0x6200, D_ALL); - MMIO_D(0x6204, D_ALL); - MMIO_D(0x6208, D_ALL); - MMIO_D(0x7118, D_ALL); - MMIO_D(0x7180, D_ALL); - MMIO_D(0x7408, D_ALL); - MMIO_D(0x7c00, D_ALL); + MMIO_D(_MMIO(0x6200), D_ALL); + MMIO_D(_MMIO(0x6204), D_ALL); + MMIO_D(_MMIO(0x6208), D_ALL); + MMIO_D(_MMIO(0x7118), D_ALL); + MMIO_D(_MMIO(0x7180), D_ALL); + MMIO_D(_MMIO(0x7408), D_ALL); + MMIO_D(_MMIO(0x7c00), D_ALL); MMIO_DH(GEN6_MBCTL, D_ALL, NULL, mbctl_write); - MMIO_D(0x911c, D_ALL); - MMIO_D(0x9120, D_ALL); + MMIO_D(_MMIO(0x911c), D_ALL); + MMIO_D(_MMIO(0x9120), D_ALL); MMIO_DFH(GEN7_UCGCTL4, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_D(GAB_CTL, D_ALL); - MMIO_D(0x48800, D_ALL); - MMIO_D(0xce044, D_ALL); - MMIO_D(0xe6500, D_ALL); - MMIO_D(0xe6504, D_ALL); - MMIO_D(0xe6600, D_ALL); - MMIO_D(0xe6604, D_ALL); - MMIO_D(0xe6700, D_ALL); - MMIO_D(0xe6704, D_ALL); - MMIO_D(0xe6800, D_ALL); - MMIO_D(0xe6804, D_ALL); + MMIO_D(_MMIO(0x48800), D_ALL); + MMIO_D(_MMIO(0xce044), D_ALL); + MMIO_D(_MMIO(0xe6500), D_ALL); + MMIO_D(_MMIO(0xe6504), D_ALL); + MMIO_D(_MMIO(0xe6600), D_ALL); + MMIO_D(_MMIO(0xe6604), D_ALL); + MMIO_D(_MMIO(0xe6700), D_ALL); + MMIO_D(_MMIO(0xe6704), D_ALL); + MMIO_D(_MMIO(0xe6800), D_ALL); + MMIO_D(_MMIO(0xe6804), D_ALL); MMIO_D(PCH_GMBUS4, D_ALL); MMIO_D(PCH_GMBUS5, D_ALL); - MMIO_D(0x902c, D_ALL); - MMIO_D(0xec008, D_ALL); - MMIO_D(0xec00c, D_ALL); - MMIO_D(0xec008 + 0x18, D_ALL); - MMIO_D(0xec00c + 0x18, D_ALL); - MMIO_D(0xec008 + 0x18 * 2, D_ALL); - MMIO_D(0xec00c + 0x18 * 2, D_ALL); - MMIO_D(0xec008 + 0x18 * 3, D_ALL); - MMIO_D(0xec00c + 0x18 * 3, D_ALL); - MMIO_D(0xec408, D_ALL); - MMIO_D(0xec40c, D_ALL); - MMIO_D(0xec408 + 0x18, D_ALL); - MMIO_D(0xec40c + 0x18, D_ALL); - MMIO_D(0xec408 + 0x18 * 2, D_ALL); - MMIO_D(0xec40c + 0x18 * 2, D_ALL); - MMIO_D(0xec408 + 0x18 * 3, D_ALL); - MMIO_D(0xec40c + 0x18 * 3, D_ALL); - MMIO_D(0xfc810, D_ALL); - MMIO_D(0xfc81c, D_ALL); - MMIO_D(0xfc828, D_ALL); - MMIO_D(0xfc834, D_ALL); - MMIO_D(0xfcc00, D_ALL); - MMIO_D(0xfcc0c, D_ALL); - MMIO_D(0xfcc18, D_ALL); - MMIO_D(0xfcc24, D_ALL); - MMIO_D(0xfd000, D_ALL); - MMIO_D(0xfd00c, D_ALL); - MMIO_D(0xfd018, D_ALL); - MMIO_D(0xfd024, D_ALL); - MMIO_D(0xfd034, D_ALL); + MMIO_D(_MMIO(0x902c), D_ALL); + MMIO_D(_MMIO(0xec008), D_ALL); + MMIO_D(_MMIO(0xec00c), D_ALL); + MMIO_D(_MMIO(0xec008 + 0x18), D_ALL); + MMIO_D(_MMIO(0xec00c + 0x18), D_ALL); + MMIO_D(_MMIO(0xec008 + 0x18 * 2), D_ALL); + MMIO_D(_MMIO(0xec00c + 0x18 * 2), D_ALL); + MMIO_D(_MMIO(0xec008 + 0x18 * 3), D_ALL); + MMIO_D(_MMIO(0xec00c + 0x18 * 3), D_ALL); + MMIO_D(_MMIO(0xec408), D_ALL); + MMIO_D(_MMIO(0xec40c), D_ALL); + MMIO_D(_MMIO(0xec408 + 0x18), D_ALL); + MMIO_D(_MMIO(0xec40c + 0x18), D_ALL); + MMIO_D(_MMIO(0xec408 + 0x18 * 2), D_ALL); + MMIO_D(_MMIO(0xec40c + 0x18 * 2), D_ALL); + MMIO_D(_MMIO(0xec408 + 0x18 * 3), D_ALL); + MMIO_D(_MMIO(0xec40c + 0x18 * 3), D_ALL); + MMIO_D(_MMIO(0xfc810), D_ALL); + MMIO_D(_MMIO(0xfc81c), D_ALL); + MMIO_D(_MMIO(0xfc828), D_ALL); + MMIO_D(_MMIO(0xfc834), D_ALL); + MMIO_D(_MMIO(0xfcc00), D_ALL); + MMIO_D(_MMIO(0xfcc0c), D_ALL); + MMIO_D(_MMIO(0xfcc18), D_ALL); + MMIO_D(_MMIO(0xfcc24), D_ALL); + MMIO_D(_MMIO(0xfd000), D_ALL); + MMIO_D(_MMIO(0xfd00c), D_ALL); + MMIO_D(_MMIO(0xfd018), D_ALL); + MMIO_D(_MMIO(0xfd024), D_ALL); + MMIO_D(_MMIO(0xfd034), D_ALL); MMIO_DH(FPGA_DBG, D_ALL, NULL, fpga_dbg_mmio_write); - MMIO_D(0x2054, D_ALL); - MMIO_D(0x12054, D_ALL); - MMIO_D(0x22054, D_ALL); - MMIO_D(0x1a054, D_ALL); - - MMIO_D(0x44070, D_ALL); - MMIO_DFH(0x215c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2178, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x217c, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x12178, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x1217c, D_ALL, F_CMD_ACCESS, NULL, NULL); - - MMIO_F(0x2290, 8, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, NULL, NULL); - MMIO_D(0x2b00, D_BDW_PLUS); - MMIO_D(0x2360, D_BDW_PLUS); - MMIO_F(0x5200, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x5240, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x5280, 16, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); - - MMIO_DFH(0x1c17c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x1c178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_D(_MMIO(0x2054), D_ALL); + MMIO_D(_MMIO(0x12054), D_ALL); + MMIO_D(_MMIO(0x22054), D_ALL); + MMIO_D(_MMIO(0x1a054), D_ALL); + + MMIO_D(_MMIO(0x44070), D_ALL); + MMIO_DFH(_MMIO(0x215c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2178), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x217c), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x12178), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x1217c), D_ALL, F_CMD_ACCESS, NULL, NULL); + + MMIO_F(_MMIO(0x2290), 8, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, NULL, NULL); + MMIO_D(_MMIO(0x2b00), D_BDW_PLUS); + MMIO_D(_MMIO(0x2360), D_BDW_PLUS); + MMIO_F(_MMIO(0x5200), 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x5240), 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x5280), 16, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + + MMIO_DFH(_MMIO(0x1c17c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x1c178), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(BCS_SWCTRL, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_F(HS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); @@ -2412,24 +2412,24 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_F(CL_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); MMIO_F(PS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); MMIO_F(PS_DEPTH_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); - MMIO_DH(0x4260, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); - MMIO_DH(0x4264, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); - MMIO_DH(0x4268, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); - MMIO_DH(0x426c, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); - MMIO_DH(0x4270, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); - MMIO_DFH(0x4094, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DH(_MMIO(0x4260), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); + MMIO_DH(_MMIO(0x4264), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); + MMIO_DH(_MMIO(0x4268), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); + MMIO_DH(_MMIO(0x426c), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); + MMIO_DH(_MMIO(0x4270), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); + MMIO_DFH(_MMIO(0x4094), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(ARB_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_RING_GM_RDR(RING_BBADDR, D_ALL, NULL, NULL); - MMIO_DFH(0x2220, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x12220, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x22220, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2220), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x12220), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x22220), D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_RING_DFH(RING_SYNC_1, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_RING_DFH(RING_SYNC_0, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x22178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x1a178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x1a17c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2217c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x22178), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x1a178), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x1a17c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2217c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); return 0; } @@ -2503,40 +2503,40 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS, mmio_read_from_hw, NULL); -#define RING_REG(base) (base + 0xd0) +#define RING_REG(base) _MMIO((base) + 0xd0) MMIO_RING_F(RING_REG, 4, F_RO, 0, ~_MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET), D_BDW_PLUS, NULL, ring_reset_ctl_write); #undef RING_REG -#define RING_REG(base) (base + 0x230) +#define RING_REG(base) _MMIO((base) + 0x230) MMIO_RING_DFH(RING_REG, D_BDW_PLUS, 0, NULL, elsp_mmio_write); #undef RING_REG -#define RING_REG(base) (base + 0x234) +#define RING_REG(base) _MMIO((base) + 0x234) MMIO_RING_F(RING_REG, 8, F_RO | F_CMD_ACCESS, 0, ~0, D_BDW_PLUS, NULL, NULL); #undef RING_REG -#define RING_REG(base) (base + 0x244) +#define RING_REG(base) _MMIO((base) + 0x244) MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); #undef RING_REG -#define RING_REG(base) (base + 0x370) +#define RING_REG(base) _MMIO((base) + 0x370) MMIO_RING_F(RING_REG, 48, F_RO, 0, ~0, D_BDW_PLUS, NULL, NULL); #undef RING_REG -#define RING_REG(base) (base + 0x3a0) +#define RING_REG(base) _MMIO((base) + 0x3a0) MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_MODE_MASK, NULL, NULL); #undef RING_REG MMIO_D(PIPEMISC(PIPE_A), D_BDW_PLUS); MMIO_D(PIPEMISC(PIPE_B), D_BDW_PLUS); MMIO_D(PIPEMISC(PIPE_C), D_BDW_PLUS); - MMIO_D(0x1c1d0, D_BDW_PLUS); + MMIO_D(_MMIO(0x1c1d0), D_BDW_PLUS); MMIO_D(GEN6_MBCUNIT_SNPCR, D_BDW_PLUS); MMIO_D(GEN7_MISCCPCTL, D_BDW_PLUS); - MMIO_D(0x1c054, D_BDW_PLUS); + MMIO_D(_MMIO(0x1c054), D_BDW_PLUS); MMIO_DH(GEN6_PCODE_MAILBOX, D_BDW_PLUS, NULL, mailbox_write); @@ -2545,7 +2545,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(GAMTARBMODE, D_BDW_PLUS); -#define RING_REG(base) (base + 0x270) +#define RING_REG(base) _MMIO((base) + 0x270) MMIO_RING_F(RING_REG, 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL); #undef RING_REG @@ -2558,10 +2558,10 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(CHICKEN_PIPESL_1(PIPE_C), D_BDW_PLUS); MMIO_D(WM_MISC, D_BDW); - MMIO_D(BDW_EDP_PSR_BASE, D_BDW); + MMIO_D(_MMIO(BDW_EDP_PSR_BASE), D_BDW); - MMIO_D(0x66c00, D_BDW_PLUS); - MMIO_D(0x66c04, D_BDW_PLUS); + MMIO_D(_MMIO(0x66c00), D_BDW_PLUS); + MMIO_D(_MMIO(0x66c04), D_BDW_PLUS); MMIO_D(HSW_GTT_CACHE_EN, D_BDW_PLUS); @@ -2569,54 +2569,54 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN8_EU_DISABLE1, D_BDW_PLUS); MMIO_D(GEN8_EU_DISABLE2, D_BDW_PLUS); - MMIO_D(0xfdc, D_BDW_PLUS); + MMIO_D(_MMIO(0xfdc), D_BDW_PLUS); MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN7_ROW_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN8_UCGCTL6, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xb1f0, D_BDW, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xb1c0, D_BDW, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xb1f0), D_BDW, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xb1c0), D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN8_L3SQCREG4, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xb100, D_BDW, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xb10c, D_BDW, F_CMD_ACCESS, NULL, NULL); - MMIO_D(0xb110, D_BDW); + MMIO_DFH(_MMIO(0xb100), D_BDW, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xb10c), D_BDW, F_CMD_ACCESS, NULL, NULL); + MMIO_D(_MMIO(0xb110), D_BDW); - MMIO_F(0x24d0, 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, + MMIO_F(_MMIO(0x24d0), 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, NULL, force_nonpriv_write); - MMIO_D(0x44484, D_BDW_PLUS); - MMIO_D(0x4448c, D_BDW_PLUS); + MMIO_D(_MMIO(0x44484), D_BDW_PLUS); + MMIO_D(_MMIO(0x4448c), D_BDW_PLUS); - MMIO_DFH(0x83a4, D_BDW, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x83a4), D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_D(GEN8_L3_LRA_1_GPGPU, D_BDW_PLUS); - MMIO_DFH(0x8430, D_BDW, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x8430), D_BDW, F_CMD_ACCESS, NULL, NULL); - MMIO_D(0x110000, D_BDW_PLUS); + MMIO_D(_MMIO(0x110000), D_BDW_PLUS); - MMIO_D(0x48400, D_BDW_PLUS); + MMIO_D(_MMIO(0x48400), D_BDW_PLUS); - MMIO_D(0x6e570, D_BDW_PLUS); - MMIO_D(0x65f10, D_BDW_PLUS); + MMIO_D(_MMIO(0x6e570), D_BDW_PLUS); + MMIO_D(_MMIO(0x65f10), D_BDW_PLUS); - MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xe194), D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xe188), D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - - MMIO_DFH(0x2248, D_BDW, F_CMD_ACCESS, NULL, NULL); - - MMIO_DFH(0xe220, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xe230, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xe240, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xe260, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xe270, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xe280, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xe2a0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xe2b0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xe2c0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2580), D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + + MMIO_DFH(_MMIO(0x2248), D_BDW, F_CMD_ACCESS, NULL, NULL); + + MMIO_DFH(_MMIO(0xe220), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xe230), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xe240), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xe260), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xe270), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xe280), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xe2a0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xe2b0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xe2c0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); return 0; } @@ -2632,11 +2632,11 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_DH(FORCEWAKE_MEDIA_GEN9, D_SKL_PLUS, NULL, mul_force_wake_write); MMIO_DH(FORCEWAKE_ACK_MEDIA_GEN9, D_SKL_PLUS, NULL, NULL); - MMIO_F(_DPB_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + MMIO_F(_MMIO(_DPB_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, dp_aux_ch_ctl_mmio_write); - MMIO_F(_DPC_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + MMIO_F(_MMIO(_DPC_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, dp_aux_ch_ctl_mmio_write); - MMIO_F(_DPD_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + MMIO_F(_MMIO(_DPD_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, dp_aux_ch_ctl_mmio_write); /* @@ -2647,26 +2647,26 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_DH(HSW_PWR_WELL_CTL_DRIVER(SKL_DISP_PW_MISC_IO), D_SKL_PLUS, NULL, skl_power_well_ctl_write); - MMIO_D(0xa210, D_SKL_PLUS); + MMIO_D(_MMIO(0xa210), D_SKL_PLUS); MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_DFH(GEN9_GAMT_ECO_REG_RW_IA, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DH(0x4ddc, D_SKL_PLUS, NULL, NULL); - MMIO_DH(0x42080, D_SKL_PLUS, NULL, NULL); - MMIO_D(0x45504, D_SKL_PLUS); - MMIO_D(0x45520, D_SKL_PLUS); - MMIO_D(0x46000, D_SKL_PLUS); - MMIO_DH(0x46010, D_SKL | D_KBL, NULL, skl_lcpll_write); - MMIO_DH(0x46014, D_SKL | D_KBL, NULL, skl_lcpll_write); - MMIO_D(0x6C040, D_SKL | D_KBL); - MMIO_D(0x6C048, D_SKL | D_KBL); - MMIO_D(0x6C050, D_SKL | D_KBL); - MMIO_D(0x6C044, D_SKL | D_KBL); - MMIO_D(0x6C04C, D_SKL | D_KBL); - MMIO_D(0x6C054, D_SKL | D_KBL); - MMIO_D(0x6c058, D_SKL | D_KBL); - MMIO_D(0x6c05c, D_SKL | D_KBL); - MMIO_DH(0X6c060, D_SKL | D_KBL, dpll_status_read, NULL); + MMIO_DH(_MMIO(0x4ddc), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(0x42080), D_SKL_PLUS, NULL, NULL); + MMIO_D(_MMIO(0x45504), D_SKL_PLUS); + MMIO_D(_MMIO(0x45520), D_SKL_PLUS); + MMIO_D(_MMIO(0x46000), D_SKL_PLUS); + MMIO_DH(_MMIO(0x46010), D_SKL | D_KBL, NULL, skl_lcpll_write); + MMIO_DH(_MMIO(0x46014), D_SKL | D_KBL, NULL, skl_lcpll_write); + MMIO_D(_MMIO(0x6C040), D_SKL | D_KBL); + MMIO_D(_MMIO(0x6C048), D_SKL | D_KBL); + MMIO_D(_MMIO(0x6C050), D_SKL | D_KBL); + MMIO_D(_MMIO(0x6C044), D_SKL | D_KBL); + MMIO_D(_MMIO(0x6C04C), D_SKL | D_KBL); + MMIO_D(_MMIO(0x6C054), D_SKL | D_KBL); + MMIO_D(_MMIO(0x6c058), D_SKL | D_KBL); + MMIO_D(_MMIO(0x6c05c), D_SKL | D_KBL); + MMIO_DH(_MMIO(0x6c060), D_SKL | D_KBL, dpll_status_read, NULL); MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write); MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write); @@ -2755,105 +2755,105 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 3), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_A, 1), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_A, 2), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_A, 3), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_A, 4), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_A, 1)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_A, 2)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_A, 3)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_A, 4)), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_B, 1), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_B, 2), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_B, 3), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_B, 4), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_B, 1)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_B, 2)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_B, 3)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_B, 4)), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_C, 1), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_C, 3), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_C, 4), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_C, 1)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_C, 2)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_C, 3)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_C, 4)), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_A, 1), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_A, 2), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_A, 3), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_A, 4), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_A, 1)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_A, 2)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_A, 3)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_A, 4)), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_B, 1), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_B, 2), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_B, 3), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_B, 4), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_B, 1)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_B, 2)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_B, 3)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_B, 4)), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_C, 1), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_C, 3), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_C, 4), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_C, 1)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_C, 2)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_C, 3)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_C, 4)), D_SKL_PLUS, NULL, NULL); - MMIO_D(0x70380, D_SKL_PLUS); - MMIO_D(0x71380, D_SKL_PLUS); - MMIO_D(0x72380, D_SKL_PLUS); - MMIO_D(0x7039c, D_SKL_PLUS); + MMIO_D(_MMIO(0x70380), D_SKL_PLUS); + MMIO_D(_MMIO(0x71380), D_SKL_PLUS); + MMIO_D(_MMIO(0x72380), D_SKL_PLUS); + MMIO_D(_MMIO(0x7039c), D_SKL_PLUS); - MMIO_D(0x8f074, D_SKL | D_KBL); - MMIO_D(0x8f004, D_SKL | D_KBL); - MMIO_D(0x8f034, D_SKL | D_KBL); + MMIO_D(_MMIO(0x8f074), D_SKL | D_KBL); + MMIO_D(_MMIO(0x8f004), D_SKL | D_KBL); + MMIO_D(_MMIO(0x8f034), D_SKL | D_KBL); - MMIO_D(0xb11c, D_SKL | D_KBL); + MMIO_D(_MMIO(0xb11c), D_SKL | D_KBL); - MMIO_D(0x51000, D_SKL | D_KBL); - MMIO_D(0x6c00c, D_SKL_PLUS); + MMIO_D(_MMIO(0x51000), D_SKL | D_KBL); + MMIO_D(_MMIO(0x6c00c), D_SKL_PLUS); - MMIO_F(0xc800, 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); - MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); + MMIO_F(_MMIO(0xc800), 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); + MMIO_F(_MMIO(0xb020), 0x80, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); - MMIO_D(0xd08, D_SKL_PLUS); - MMIO_DFH(0x20e0, D_SKL_PLUS, F_MODE_MASK, NULL, NULL); - MMIO_DFH(0x20ec, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_D(_MMIO(0xd08), D_SKL_PLUS); + MMIO_DFH(_MMIO(0x20e0), D_SKL_PLUS, F_MODE_MASK, NULL, NULL); + MMIO_DFH(_MMIO(0x20ec), D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); /* TRTT */ - MMIO_DFH(0x4de0, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x4de4, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x4de8, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x4dec, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x4df0, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x4df4, D_SKL | D_KBL, F_CMD_ACCESS, NULL, gen9_trtte_write); - MMIO_DH(0x4dfc, D_SKL | D_KBL, NULL, gen9_trtt_chicken_write); + MMIO_DFH(_MMIO(0x4de0), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x4de4), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x4de8), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x4dec), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x4df0), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x4df4), D_SKL | D_KBL, F_CMD_ACCESS, NULL, gen9_trtte_write); + MMIO_DH(_MMIO(0x4dfc), D_SKL | D_KBL, NULL, gen9_trtt_chicken_write); - MMIO_D(0x45008, D_SKL | D_KBL); + MMIO_D(_MMIO(0x45008), D_SKL | D_KBL); - MMIO_D(0x46430, D_SKL | D_KBL); + MMIO_D(_MMIO(0x46430), D_SKL | D_KBL); - MMIO_D(0x46520, D_SKL | D_KBL); + MMIO_D(_MMIO(0x46520), D_SKL | D_KBL); - MMIO_D(0xc403c, D_SKL | D_KBL); - MMIO_D(0xb004, D_SKL_PLUS); + MMIO_D(_MMIO(0xc403c), D_SKL | D_KBL); + MMIO_D(_MMIO(0xb004), D_SKL_PLUS); MMIO_DH(DMA_CTRL, D_SKL_PLUS, NULL, dma_ctrl_write); - MMIO_D(0x65900, D_SKL_PLUS); - MMIO_D(0x1082c0, D_SKL | D_KBL); - MMIO_D(0x4068, D_SKL | D_KBL); - MMIO_D(0x67054, D_SKL | D_KBL); - MMIO_D(0x6e560, D_SKL | D_KBL); - MMIO_D(0x6e554, D_SKL | D_KBL); - MMIO_D(0x2b20, D_SKL | D_KBL); - MMIO_D(0x65f00, D_SKL | D_KBL); - MMIO_D(0x65f08, D_SKL | D_KBL); - MMIO_D(0x320f0, D_SKL | D_KBL); - - MMIO_D(0x70034, D_SKL_PLUS); - MMIO_D(0x71034, D_SKL_PLUS); - MMIO_D(0x72034, D_SKL_PLUS); - - MMIO_D(_PLANE_KEYVAL_1(PIPE_A), D_SKL_PLUS); - MMIO_D(_PLANE_KEYVAL_1(PIPE_B), D_SKL_PLUS); - MMIO_D(_PLANE_KEYVAL_1(PIPE_C), D_SKL_PLUS); - MMIO_D(_PLANE_KEYMSK_1(PIPE_A), D_SKL_PLUS); - MMIO_D(_PLANE_KEYMSK_1(PIPE_B), D_SKL_PLUS); - MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL_PLUS); - - MMIO_D(0x44500, D_SKL_PLUS); + MMIO_D(_MMIO(0x65900), D_SKL_PLUS); + MMIO_D(_MMIO(0x1082c0), D_SKL | D_KBL); + MMIO_D(_MMIO(0x4068), D_SKL | D_KBL); + MMIO_D(_MMIO(0x67054), D_SKL | D_KBL); + MMIO_D(_MMIO(0x6e560), D_SKL | D_KBL); + MMIO_D(_MMIO(0x6e554), D_SKL | D_KBL); + MMIO_D(_MMIO(0x2b20), D_SKL | D_KBL); + MMIO_D(_MMIO(0x65f00), D_SKL | D_KBL); + MMIO_D(_MMIO(0x65f08), D_SKL | D_KBL); + MMIO_D(_MMIO(0x320f0), D_SKL | D_KBL); + + MMIO_D(_MMIO(0x70034), D_SKL_PLUS); + MMIO_D(_MMIO(0x71034), D_SKL_PLUS); + MMIO_D(_MMIO(0x72034), D_SKL_PLUS); + + MMIO_D(_MMIO(_PLANE_KEYVAL_1(PIPE_A)), D_SKL_PLUS); + MMIO_D(_MMIO(_PLANE_KEYVAL_1(PIPE_B)), D_SKL_PLUS); + MMIO_D(_MMIO(_PLANE_KEYVAL_1(PIPE_C)), D_SKL_PLUS); + MMIO_D(_MMIO(_PLANE_KEYMSK_1(PIPE_A)), D_SKL_PLUS); + MMIO_D(_MMIO(_PLANE_KEYMSK_1(PIPE_B)), D_SKL_PLUS); + MMIO_D(_MMIO(_PLANE_KEYMSK_1(PIPE_C)), D_SKL_PLUS); + + MMIO_D(_MMIO(0x44500), D_SKL_PLUS); MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL | D_KBL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_D(0x4ab8, D_KBL); - MMIO_D(0x2248, D_SKL_PLUS | D_KBL); + MMIO_D(_MMIO(0x4ab8), D_KBL); + MMIO_D(_MMIO(0x2248), D_SKL_PLUS | D_KBL); return 0; } @@ -2869,8 +2869,8 @@ static struct gvt_mmio_block *find_mmio_block(struct intel_gvt *gvt, for (i = 0; i < num; i++, block++) { if (!(device & block->device)) continue; - if (offset >= INTEL_GVT_MMIO_OFFSET(block->offset) && - offset < INTEL_GVT_MMIO_OFFSET(block->offset) + block->size) + if (offset >= i915_mmio_reg_offset(block->offset) && + offset < i915_mmio_reg_offset(block->offset) + block->size) return block; } return NULL; @@ -2982,8 +2982,8 @@ int intel_gvt_for_each_tracked_mmio(struct intel_gvt *gvt, for (i = 0; i < gvt->mmio.num_mmio_block; i++, block++) { for (j = 0; j < block->size; j += 4) { ret = handler(gvt, - INTEL_GVT_MMIO_OFFSET(block->offset) + j, - data); + i915_mmio_reg_offset(block->offset) + j, + data); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index f86983d6655b..45bab5a6290b 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1360,8 +1360,8 @@ static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvmgt_guest_info, track_node); if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa))) - intel_gvt_ops->emulate_mmio_write(info->vgpu, gpa, - (void *)val, len); + intel_gvt_ops->write_protect_handler(info->vgpu, gpa, + (void *)val, len); } static void kvmgt_page_track_flush_slot(struct kvm *kvm, diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index f7227a3ad469..562b5ad857a4 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -117,25 +117,6 @@ static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa, else memcpy(pt, p_data, bytes); - } else if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) { - struct intel_vgpu_page_track *t; - - /* Since we enter the failsafe mode early during guest boot, - * guest may not have chance to set up its ppgtt table, so - * there should not be any wp pages for guest. Keep the wp - * related code here in case we need to handle it in furture. - */ - t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT); - if (t) { - /* remove write protection to prevent furture traps */ - intel_vgpu_clean_page_track(vgpu, t); - if (read) - intel_gvt_hypervisor_read_gpa(vgpu, pa, - p_data, bytes); - else - intel_gvt_hypervisor_write_gpa(vgpu, pa, - p_data, bytes); - } } mutex_unlock(&gvt->lock); } @@ -168,23 +149,6 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, goto out; } - if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) { - struct intel_vgpu_page_track *t; - - t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT); - if (t) { - ret = intel_gvt_hypervisor_read_gpa(vgpu, pa, - p_data, bytes); - if (ret) { - gvt_vgpu_err("guest page read error %d, " - "gfn 0x%lx, pa 0x%llx, var 0x%x, len %d\n", - ret, t->gfn, pa, *(u32 *)p_data, - bytes); - } - goto out; - } - } - offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa); if (WARN_ON(bytes > 8)) @@ -263,23 +227,6 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, goto out; } - if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) { - struct intel_vgpu_page_track *t; - - t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT); - if (t) { - ret = t->handler(t, pa, p_data, bytes); - if (ret) { - gvt_err("guest page write error %d, " - "gfn 0x%lx, pa 0x%llx, " - "var 0x%x, len %d\n", - ret, t->gfn, pa, - *(u32 *)p_data, bytes); - } - goto out; - } - } - offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa); if (WARN_ON(bytes > 8)) @@ -336,10 +283,10 @@ void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr) memcpy(vgpu->mmio.vreg, mmio, info->mmio_size); memcpy(vgpu->mmio.sreg, mmio, info->mmio_size); - vgpu_vreg(vgpu, GEN6_GT_THREAD_STATUS_REG) = 0; + vgpu_vreg_t(vgpu, GEN6_GT_THREAD_STATUS_REG) = 0; /* set the bit 0:2(Core C-State ) to C0 */ - vgpu_vreg(vgpu, GEN6_GT_CORE_STATUS) = 0; + vgpu_vreg_t(vgpu, GEN6_GT_CORE_STATUS) = 0; vgpu->mmio.disable_warn_untrack = false; } else { diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index 62709ac351cd..71b620875943 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -76,13 +76,6 @@ int intel_gvt_for_each_tracked_mmio(struct intel_gvt *gvt, int (*handler)(struct intel_gvt *gvt, u32 offset, void *data), void *data); - -#define INTEL_GVT_MMIO_OFFSET(reg) ({ \ - typeof(reg) __reg = reg; \ - u32 *offset = (u32 *)&__reg; \ - *offset; \ -}) - int intel_vgpu_init_mmio(struct intel_vgpu *vgpu); void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr); void intel_vgpu_clean_mmio(struct intel_vgpu *vgpu); diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 8a52b56f0e86..74834395dd89 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -149,8 +149,41 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = { { /* Terminated */ } }; -static u32 gen9_render_mocs[I915_NUM_ENGINES][64]; -static u32 gen9_render_mocs_L3[32]; +static struct { + bool initialized; + u32 control_table[I915_NUM_ENGINES][64]; + u32 l3cc_table[32]; +} gen9_render_mocs; + +static void load_render_mocs(struct drm_i915_private *dev_priv) +{ + i915_reg_t offset; + u32 regs[] = { + [RCS] = 0xc800, + [VCS] = 0xc900, + [VCS2] = 0xca00, + [BCS] = 0xcc00, + [VECS] = 0xcb00, + }; + int ring_id, i; + + for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) { + offset.reg = regs[ring_id]; + for (i = 0; i < 64; i++) { + gen9_render_mocs.control_table[ring_id][i] = + I915_READ_FW(offset); + offset.reg += 4; + } + } + + offset.reg = 0xb020; + for (i = 0; i < 32; i++) { + gen9_render_mocs.l3cc_table[i] = + I915_READ_FW(offset); + offset.reg += 4; + } + gen9_render_mocs.initialized = true; +} static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) { @@ -191,17 +224,20 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) if (wait_for_atomic((I915_READ_FW(reg) == 0), 50)) gvt_vgpu_err("timeout in invalidate ring (%d) tlb\n", ring_id); else - vgpu_vreg(vgpu, regs[ring_id]) = 0; + vgpu_vreg_t(vgpu, reg) = 0; intel_uncore_forcewake_put(dev_priv, fw); gvt_dbg_core("invalidate TLB for ring %d\n", ring_id); } -static void load_mocs(struct intel_vgpu *vgpu, int ring_id) +static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, + int ring_id) { - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + struct drm_i915_private *dev_priv; i915_reg_t offset, l3_offset; + u32 old_v, new_v; + u32 regs[] = { [RCS] = 0xc800, [VCS] = 0xc900, @@ -211,54 +247,45 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id) }; int i; + dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv; if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) return; + if (!pre && !gen9_render_mocs.initialized) + load_render_mocs(dev_priv); + offset.reg = regs[ring_id]; for (i = 0; i < 64; i++) { - gen9_render_mocs[ring_id][i] = I915_READ_FW(offset); - I915_WRITE_FW(offset, vgpu_vreg(vgpu, offset)); - offset.reg += 4; - } - - if (ring_id == RCS) { - l3_offset.reg = 0xb020; - for (i = 0; i < 32; i++) { - gen9_render_mocs_L3[i] = I915_READ_FW(l3_offset); - I915_WRITE_FW(l3_offset, vgpu_vreg(vgpu, l3_offset)); - l3_offset.reg += 4; - } - } -} + if (pre) + old_v = vgpu_vreg_t(pre, offset); + else + old_v = gen9_render_mocs.control_table[ring_id][i]; + if (next) + new_v = vgpu_vreg_t(next, offset); + else + new_v = gen9_render_mocs.control_table[ring_id][i]; -static void restore_mocs(struct intel_vgpu *vgpu, int ring_id) -{ - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - i915_reg_t offset, l3_offset; - u32 regs[] = { - [RCS] = 0xc800, - [VCS] = 0xc900, - [VCS2] = 0xca00, - [BCS] = 0xcc00, - [VECS] = 0xcb00, - }; - int i; + if (old_v != new_v) + I915_WRITE_FW(offset, new_v); - if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) - return; - - offset.reg = regs[ring_id]; - for (i = 0; i < 64; i++) { - vgpu_vreg(vgpu, offset) = I915_READ_FW(offset); - I915_WRITE_FW(offset, gen9_render_mocs[ring_id][i]); offset.reg += 4; } if (ring_id == RCS) { l3_offset.reg = 0xb020; for (i = 0; i < 32; i++) { - vgpu_vreg(vgpu, l3_offset) = I915_READ_FW(l3_offset); - I915_WRITE_FW(l3_offset, gen9_render_mocs_L3[i]); + if (pre) + old_v = vgpu_vreg_t(pre, l3_offset); + else + old_v = gen9_render_mocs.l3cc_table[i]; + if (next) + new_v = vgpu_vreg_t(next, l3_offset); + else + new_v = gen9_render_mocs.l3cc_table[i]; + + if (old_v != new_v) + I915_WRITE_FW(l3_offset, new_v); + l3_offset.reg += 4; } } @@ -266,84 +293,77 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id) #define CTX_CONTEXT_CONTROL_VAL 0x03 -/* Switch ring mmio values (context) from host to a vgpu. */ -static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) +/* Switch ring mmio values (context). */ +static void switch_mmio(struct intel_vgpu *pre, + struct intel_vgpu *next, + int ring_id) { - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - struct intel_vgpu_submission *s = &vgpu->submission; - u32 *reg_state = s->shadow_ctx->engine[ring_id].lrc_reg_state; - u32 ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL]; + struct drm_i915_private *dev_priv; + struct intel_vgpu_submission *s; + u32 *reg_state, ctx_ctrl; u32 inhibit_mask = _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); struct engine_mmio *mmio; - u32 v; - - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) - load_mocs(vgpu, ring_id); - - mmio = vgpu->gvt->engine_mmio_list; - while (i915_mmio_reg_offset((mmio++)->reg)) { - if (mmio->ring_id != ring_id) - continue; - - mmio->value = I915_READ_FW(mmio->reg); - - /* - * if it is an inhibit context, load in_context mmio - * into HW by mmio write. If it is not, skip this mmio - * write. - */ - if (mmio->in_context && - (ctx_ctrl & inhibit_mask) != inhibit_mask) - continue; - - if (mmio->mask) - v = vgpu_vreg(vgpu, mmio->reg) | (mmio->mask << 16); - else - v = vgpu_vreg(vgpu, mmio->reg); - - I915_WRITE_FW(mmio->reg, v); - - trace_render_mmio(vgpu->id, "load", - i915_mmio_reg_offset(mmio->reg), - mmio->value, v); - } - - handle_tlb_pending_event(vgpu, ring_id); -} - -/* Switch ring mmio values (context) from vgpu to host. */ -static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id) -{ - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - struct engine_mmio *mmio; - u32 v; + u32 old_v, new_v; + dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv; if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) - restore_mocs(vgpu, ring_id); + switch_mocs(pre, next, ring_id); - mmio = vgpu->gvt->engine_mmio_list; + mmio = dev_priv->gvt->engine_mmio_list; while (i915_mmio_reg_offset((mmio++)->reg)) { if (mmio->ring_id != ring_id) continue; - - vgpu_vreg(vgpu, mmio->reg) = I915_READ_FW(mmio->reg); - - if (mmio->mask) { - vgpu_vreg(vgpu, mmio->reg) &= ~(mmio->mask << 16); - v = mmio->value | (mmio->mask << 16); + // save + if (pre) { + vgpu_vreg_t(pre, mmio->reg) = I915_READ_FW(mmio->reg); + if (mmio->mask) + vgpu_vreg_t(pre, mmio->reg) &= + ~(mmio->mask << 16); + old_v = vgpu_vreg_t(pre, mmio->reg); } else - v = mmio->value; - - if (mmio->in_context) - continue; + old_v = mmio->value = I915_READ_FW(mmio->reg); + + // restore + if (next) { + s = &next->submission; + reg_state = + s->shadow_ctx->engine[ring_id].lrc_reg_state; + ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL]; + /* + * if it is an inhibit context, load in_context mmio + * into HW by mmio write. If it is not, skip this mmio + * write. + */ + if (mmio->in_context && + (ctx_ctrl & inhibit_mask) != inhibit_mask) + continue; + + if (mmio->mask) + new_v = vgpu_vreg_t(next, mmio->reg) | + (mmio->mask << 16); + else + new_v = vgpu_vreg_t(next, mmio->reg); + } else { + if (mmio->in_context) + continue; + if (mmio->mask) + new_v = mmio->value | (mmio->mask << 16); + else + new_v = mmio->value; + } - I915_WRITE_FW(mmio->reg, v); + I915_WRITE_FW(mmio->reg, new_v); - trace_render_mmio(vgpu->id, "restore", + trace_render_mmio(pre ? pre->id : 0, + next ? next->id : 0, + "switch", i915_mmio_reg_offset(mmio->reg), - mmio->value, v); + old_v, new_v); } + + if (next) + handle_tlb_pending_event(next, ring_id); } /** @@ -374,17 +394,7 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre, * handle forcewake mannually. */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - - /** - * TODO: Optimize for vGPU to vGPU switch by merging - * switch_mmio_to_host() and switch_mmio_to_vgpu(). - */ - if (pre) - switch_mmio_to_host(pre, ring_id); - - if (next) - switch_mmio_to_vgpu(next, ring_id); - + switch_mmio(pre, next, ring_id); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h index 8c150381d9a4..7a2511538f34 100644 --- a/drivers/gpu/drm/i915/gvt/trace.h +++ b/drivers/gpu/drm/i915/gvt/trace.h @@ -330,13 +330,14 @@ TRACE_EVENT(inject_msi, ); TRACE_EVENT(render_mmio, - TP_PROTO(int id, char *action, unsigned int reg, + TP_PROTO(int old_id, int new_id, char *action, unsigned int reg, unsigned int old_val, unsigned int new_val), - TP_ARGS(id, action, reg, new_val, old_val), + TP_ARGS(old_id, new_id, action, reg, new_val, old_val), TP_STRUCT__entry( - __field(int, id) + __field(int, old_id) + __field(int, new_id) __array(char, buf, GVT_TEMP_STR_LEN) __field(unsigned int, reg) __field(unsigned int, old_val) @@ -344,15 +345,17 @@ TRACE_EVENT(render_mmio, ), TP_fast_assign( - __entry->id = id; + __entry->old_id = old_id; + __entry->new_id = new_id; snprintf(__entry->buf, GVT_TEMP_STR_LEN, "%s", action); __entry->reg = reg; __entry->old_val = old_val; __entry->new_val = new_val; ), - TP_printk("VM%u %s reg %x, old %08x new %08x\n", - __entry->id, __entry->buf, __entry->reg, + TP_printk("VM%u -> VM%u %s reg %x, old %08x new %08x\n", + __entry->old_id, __entry->new_id, + __entry->buf, __entry->reg, __entry->old_val, __entry->new_val) ); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 39926176fbeb..4688619f6a1c 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -38,25 +38,25 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu) { /* setup the ballooning information */ - vgpu_vreg64(vgpu, vgtif_reg(magic)) = VGT_MAGIC; - vgpu_vreg(vgpu, vgtif_reg(version_major)) = 1; - vgpu_vreg(vgpu, vgtif_reg(version_minor)) = 0; - vgpu_vreg(vgpu, vgtif_reg(display_ready)) = 0; - vgpu_vreg(vgpu, vgtif_reg(vgt_id)) = vgpu->id; + vgpu_vreg64_t(vgpu, vgtif_reg(magic)) = VGT_MAGIC; + vgpu_vreg_t(vgpu, vgtif_reg(version_major)) = 1; + vgpu_vreg_t(vgpu, vgtif_reg(version_minor)) = 0; + vgpu_vreg_t(vgpu, vgtif_reg(display_ready)) = 0; + vgpu_vreg_t(vgpu, vgtif_reg(vgt_id)) = vgpu->id; - vgpu_vreg(vgpu, vgtif_reg(vgt_caps)) = VGT_CAPS_FULL_48BIT_PPGTT; - vgpu_vreg(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_HWSP_EMULATION; + vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) = VGT_CAPS_FULL_48BIT_PPGTT; + vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_HWSP_EMULATION; - vgpu_vreg(vgpu, vgtif_reg(avail_rs.mappable_gmadr.base)) = + vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.mappable_gmadr.base)) = vgpu_aperture_gmadr_base(vgpu); - vgpu_vreg(vgpu, vgtif_reg(avail_rs.mappable_gmadr.size)) = + vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.mappable_gmadr.size)) = vgpu_aperture_sz(vgpu); - vgpu_vreg(vgpu, vgtif_reg(avail_rs.nonmappable_gmadr.base)) = + vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.nonmappable_gmadr.base)) = vgpu_hidden_gmadr_base(vgpu); - vgpu_vreg(vgpu, vgtif_reg(avail_rs.nonmappable_gmadr.size)) = + vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.nonmappable_gmadr.size)) = vgpu_hidden_sz(vgpu); - vgpu_vreg(vgpu, vgtif_reg(avail_rs.fence_num)) = vgpu_fence_sz(vgpu); + vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.fence_num)) = vgpu_fence_sz(vgpu); gvt_dbg_core("Populate PVINFO PAGE for vGPU %d\n", vgpu->id); gvt_dbg_core("aperture base [GMADR] 0x%llx size 0x%llx\n", diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index d8c6ec3cca71..e968aeae1d84 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -37,40 +37,21 @@ static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) return to_i915(node->minor->dev); } -static __always_inline void seq_print_param(struct seq_file *m, - const char *name, - const char *type, - const void *x) -{ - if (!__builtin_strcmp(type, "bool")) - seq_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x)); - else if (!__builtin_strcmp(type, "int")) - seq_printf(m, "i915.%s=%d\n", name, *(const int *)x); - else if (!__builtin_strcmp(type, "unsigned int")) - seq_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x); - else if (!__builtin_strcmp(type, "char *")) - seq_printf(m, "i915.%s=%s\n", name, *(const char **)x); - else - BUILD_BUG(); -} - static int i915_capabilities(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); const struct intel_device_info *info = INTEL_INFO(dev_priv); + struct drm_printer p = drm_seq_file_printer(m); seq_printf(m, "gen: %d\n", INTEL_GEN(dev_priv)); seq_printf(m, "platform: %s\n", intel_platform_name(info->platform)); seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(dev_priv)); -#define PRINT_FLAG(x) seq_printf(m, #x ": %s\n", yesno(info->x)) - DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG); -#undef PRINT_FLAG + intel_device_info_dump_flags(info, &p); + intel_device_info_dump_runtime(info, &p); kernel_param_lock(THIS_MODULE); -#define PRINT_PARAM(T, x, ...) seq_print_param(m, #x, #T, &i915_modparams.x); - I915_PARAMS_FOR_EACH(PRINT_PARAM); -#undef PRINT_PARAM + i915_params_dump(&i915_modparams, &p); kernel_param_unlock(THIS_MODULE); return 0; @@ -1601,20 +1582,23 @@ static int i915_frontbuffer_tracking(struct seq_file *m, void *unused) static int i915_fbc_status(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct intel_fbc *fbc = &dev_priv->fbc; - if (!HAS_FBC(dev_priv)) { - seq_puts(m, "FBC unsupported on this chipset\n"); - return 0; - } + if (!HAS_FBC(dev_priv)) + return -ENODEV; intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->fbc.lock); + mutex_lock(&fbc->lock); if (intel_fbc_is_active(dev_priv)) seq_puts(m, "FBC enabled\n"); else - seq_printf(m, "FBC disabled: %s\n", - dev_priv->fbc.no_fbc_reason); + seq_printf(m, "FBC disabled: %s\n", fbc->no_fbc_reason); + + if (fbc->work.scheduled) + seq_printf(m, "FBC worker scheduled on vblank %u, now %llu\n", + fbc->work.scheduled_vblank, + drm_crtc_vblank_count(&fbc->crtc->base)); if (intel_fbc_is_active(dev_priv)) { u32 mask; @@ -1634,7 +1618,7 @@ static int i915_fbc_status(struct seq_file *m, void *unused) seq_printf(m, "Compressing: %s\n", yesno(mask)); } - mutex_unlock(&dev_priv->fbc.lock); + mutex_unlock(&fbc->lock); intel_runtime_pm_put(dev_priv); return 0; @@ -1681,10 +1665,8 @@ static int i915_ips_status(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - if (!HAS_IPS(dev_priv)) { - seq_puts(m, "not supported\n"); - return 0; - } + if (!HAS_IPS(dev_priv)) + return -ENODEV; intel_runtime_pm_get(dev_priv); @@ -1770,10 +1752,8 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) int gpu_freq, ia_freq; unsigned int max_gpu_freq, min_gpu_freq; - if (!HAS_LLC(dev_priv)) { - seq_puts(m, "unsupported on this chipset\n"); - return 0; - } + if (!HAS_LLC(dev_priv)) + return -ENODEV; intel_runtime_pm_get(dev_priv); @@ -2253,8 +2233,8 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data) struct drm_i915_private *dev_priv = node_to_i915(m->private); struct drm_printer p; - if (!HAS_HUC_UCODE(dev_priv)) - return 0; + if (!HAS_HUC(dev_priv)) + return -ENODEV; p = drm_seq_file_printer(m); intel_uc_fw_dump(&dev_priv->huc.fw, &p); @@ -2272,8 +2252,8 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) struct drm_printer p; u32 tmp, i; - if (!HAS_GUC_UCODE(dev_priv)) - return 0; + if (!HAS_GUC(dev_priv)) + return -ENODEV; p = drm_seq_file_printer(m); intel_uc_fw_dump(&dev_priv->guc.fw, &p); @@ -2346,29 +2326,16 @@ static void i915_guc_client_info(struct seq_file *m, seq_printf(m, "\tTotal: %llu\n", tot); } -static bool check_guc_submission(struct seq_file *m) -{ - struct drm_i915_private *dev_priv = node_to_i915(m->private); - const struct intel_guc *guc = &dev_priv->guc; - - if (!guc->execbuf_client) { - seq_printf(m, "GuC submission %s\n", - HAS_GUC_SCHED(dev_priv) ? - "disabled" : - "not supported"); - return false; - } - - return true; -} - static int i915_guc_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); const struct intel_guc *guc = &dev_priv->guc; - if (!check_guc_submission(m)) - return 0; + if (!USES_GUC_SUBMISSION(dev_priv)) + return -ENODEV; + + GEM_BUG_ON(!guc->execbuf_client); + GEM_BUG_ON(!guc->preempt_client); seq_printf(m, "Doorbell map:\n"); seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap); @@ -2395,8 +2362,8 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data) unsigned int tmp; int index; - if (!check_guc_submission(m)) - return 0; + if (!USES_GUC_SUBMISSION(dev_priv)) + return -ENODEV; for (index = 0; index < GUC_MAX_STAGE_DESCRIPTORS; index++, desc++) { struct intel_engine_cs *engine; @@ -2449,6 +2416,9 @@ static int i915_guc_log_dump(struct seq_file *m, void *data) u32 *log; int i = 0; + if (!HAS_GUC(dev_priv)) + return -ENODEV; + if (dump_load_err) obj = dev_priv->guc.load_err_log; else if (dev_priv->guc.log.vma) @@ -2480,6 +2450,9 @@ static int i915_guc_log_control_get(void *data, u64 *val) { struct drm_i915_private *dev_priv = data; + if (!HAS_GUC(dev_priv)) + return -ENODEV; + if (!dev_priv->guc.log.vma) return -EINVAL; @@ -2493,6 +2466,9 @@ static int i915_guc_log_control_set(void *data, u64 val) struct drm_i915_private *dev_priv = data; int ret; + if (!HAS_GUC(dev_priv)) + return -ENODEV; + if (!dev_priv->guc.log.vma) return -EINVAL; @@ -2543,10 +2519,8 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) enum pipe pipe; bool enabled = false; - if (!HAS_PSR(dev_priv)) { - seq_puts(m, "PSR not supported\n"); - return 0; - } + if (!HAS_PSR(dev_priv)) + return -ENODEV; intel_runtime_pm_get(dev_priv); @@ -2785,10 +2759,8 @@ static int i915_dmc_info(struct seq_file *m, void *unused) struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_csr *csr; - if (!HAS_CSR(dev_priv)) { - seq_puts(m, "not supported\n"); - return 0; - } + if (!HAS_CSR(dev_priv)) + return -ENODEV; csr = &dev_priv->csr; @@ -3324,7 +3296,7 @@ static int i915_ddb_info(struct seq_file *m, void *unused) int plane; if (INTEL_GEN(dev_priv) < 9) - return 0; + return -ENODEV; drm_modeset_lock_all(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ca9f4b2862eb..6c8da9d20c33 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -931,8 +931,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, intel_display_crc_init(dev_priv); - intel_device_info_dump(dev_priv); - intel_detect_preproduction_hw(dev_priv); return 0; @@ -1084,7 +1082,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) if (i915_inject_load_failure()) return -ENODEV; - intel_device_info_runtime_init(dev_priv); + intel_device_info_runtime_init(mkwrite_device_info(dev_priv)); intel_sanitize_options(dev_priv); @@ -1294,6 +1292,21 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) i915_gem_shrinker_unregister(dev_priv); } +static void i915_welcome_messages(struct drm_i915_private *dev_priv) +{ + if (drm_debug & DRM_UT_DRIVER) { + struct drm_printer p = drm_debug_printer("i915 device info:"); + + intel_device_info_dump(&dev_priv->info, &p); + intel_device_info_dump_runtime(&dev_priv->info, &p); + } + + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG)) + DRM_INFO("DRM_I915_DEBUG enabled\n"); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + DRM_INFO("DRM_I915_DEBUG_GEM enabled\n"); +} + /** * i915_driver_load - setup chip and create an initial config * @pdev: PCI device @@ -1379,13 +1392,10 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) intel_init_ipc(dev_priv); - if (IS_ENABLED(CONFIG_DRM_I915_DEBUG)) - DRM_INFO("DRM_I915_DEBUG enabled\n"); - if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - DRM_INFO("DRM_I915_DEBUG_GEM enabled\n"); - intel_runtime_pm_put(dev_priv); + i915_welcome_messages(dev_priv); + return 0; out_cleanup_hw: @@ -1924,9 +1934,6 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags) goto taint; } - i915_gem_reset(i915); - intel_overlay_reset(i915); - /* Ok, now get things going again... */ /* @@ -1939,6 +1946,9 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags) goto error; } + i915_gem_reset(i915); + intel_overlay_reset(i915); + /* * Next we need to restore the context, but we don't use those * yet either... @@ -2011,19 +2021,19 @@ int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags) GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); + active_request = i915_gem_reset_prepare_engine(engine); + if (IS_ERR_OR_NULL(active_request)) { + /* Either the previous reset failed, or we pardon the reset. */ + ret = PTR_ERR(active_request); + goto out; + } + if (!(flags & I915_RESET_QUIET)) { dev_notice(engine->i915->drm.dev, "Resetting %s after gpu hang\n", engine->name); } error->reset_engine_count[engine->id]++; - active_request = i915_gem_reset_prepare_engine(engine); - if (IS_ERR(active_request)) { - DRM_DEBUG_DRIVER("Previous reset failed, promote to full reset\n"); - ret = PTR_ERR(active_request); - goto out; - } - if (!engine->i915->guc.execbuf_client) ret = intel_gt_reset_engine(engine->i915, engine); else diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1aba5657f5f0..caebd5825279 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -56,12 +56,15 @@ #include "i915_reg.h" #include "i915_utils.h" -#include "intel_uncore.h" #include "intel_bios.h" +#include "intel_device_info.h" +#include "intel_display.h" #include "intel_dpll_mgr.h" -#include "intel_uc.h" #include "intel_lrc.h" +#include "intel_opregion.h" #include "intel_ringbuffer.h" +#include "intel_uncore.h" +#include "intel_uc.h" #include "i915_gem.h" #include "i915_gem_context.h" @@ -80,8 +83,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20171214" -#define DRIVER_TIMESTAMP 1513282202 +#define DRIVER_DATE "20171222" +#define DRIVER_TIMESTAMP 1513971710 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions @@ -243,174 +246,6 @@ static inline uint_fixed_16_16_t add_fixed16_u32(uint_fixed_16_16_t add1, return clamp_u64_to_fixed16(interm_sum); } -static inline const char *yesno(bool v) -{ - return v ? "yes" : "no"; -} - -static inline const char *onoff(bool v) -{ - return v ? "on" : "off"; -} - -static inline const char *enableddisabled(bool v) -{ - return v ? "enabled" : "disabled"; -} - -enum pipe { - INVALID_PIPE = -1, - PIPE_A = 0, - PIPE_B, - PIPE_C, - _PIPE_EDP, - I915_MAX_PIPES = _PIPE_EDP -}; -#define pipe_name(p) ((p) + 'A') - -enum transcoder { - TRANSCODER_A = 0, - TRANSCODER_B, - TRANSCODER_C, - TRANSCODER_EDP, - TRANSCODER_DSI_A, - TRANSCODER_DSI_C, - I915_MAX_TRANSCODERS -}; - -static inline const char *transcoder_name(enum transcoder transcoder) -{ - switch (transcoder) { - case TRANSCODER_A: - return "A"; - case TRANSCODER_B: - return "B"; - case TRANSCODER_C: - return "C"; - case TRANSCODER_EDP: - return "EDP"; - case TRANSCODER_DSI_A: - return "DSI A"; - case TRANSCODER_DSI_C: - return "DSI C"; - default: - return "<invalid>"; - } -} - -static inline bool transcoder_is_dsi(enum transcoder transcoder) -{ - return transcoder == TRANSCODER_DSI_A || transcoder == TRANSCODER_DSI_C; -} - -/* - * Global legacy plane identifier. Valid only for primary/sprite - * planes on pre-g4x, and only for primary planes on g4x-bdw. - */ -enum i9xx_plane_id { - PLANE_A, - PLANE_B, - PLANE_C, -}; -#define plane_name(p) ((p) + 'A') - -#define sprite_name(p, s) ((p) * INTEL_INFO(dev_priv)->num_sprites[(p)] + (s) + 'A') - -/* - * Per-pipe plane identifier. - * I915_MAX_PLANES in the enum below is the maximum (across all platforms) - * number of planes per CRTC. Not all platforms really have this many planes, - * which means some arrays of size I915_MAX_PLANES may have unused entries - * between the topmost sprite plane and the cursor plane. - * - * This is expected to be passed to various register macros - * (eg. PLANE_CTL(), PS_PLANE_SEL(), etc.) so adjust with care. - */ -enum plane_id { - PLANE_PRIMARY, - PLANE_SPRITE0, - PLANE_SPRITE1, - PLANE_SPRITE2, - PLANE_CURSOR, - I915_MAX_PLANES, -}; - -#define for_each_plane_id_on_crtc(__crtc, __p) \ - for ((__p) = PLANE_PRIMARY; (__p) < I915_MAX_PLANES; (__p)++) \ - for_each_if ((__crtc)->plane_ids_mask & BIT(__p)) - -enum port { - PORT_NONE = -1, - PORT_A = 0, - PORT_B, - PORT_C, - PORT_D, - PORT_E, - I915_MAX_PORTS -}; -#define port_name(p) ((p) + 'A') - -#define I915_NUM_PHYS_VLV 2 - -enum dpio_channel { - DPIO_CH0, - DPIO_CH1 -}; - -enum dpio_phy { - DPIO_PHY0, - DPIO_PHY1, - DPIO_PHY2, -}; - -enum intel_display_power_domain { - POWER_DOMAIN_PIPE_A, - POWER_DOMAIN_PIPE_B, - POWER_DOMAIN_PIPE_C, - POWER_DOMAIN_PIPE_A_PANEL_FITTER, - POWER_DOMAIN_PIPE_B_PANEL_FITTER, - POWER_DOMAIN_PIPE_C_PANEL_FITTER, - POWER_DOMAIN_TRANSCODER_A, - POWER_DOMAIN_TRANSCODER_B, - POWER_DOMAIN_TRANSCODER_C, - POWER_DOMAIN_TRANSCODER_EDP, - POWER_DOMAIN_TRANSCODER_DSI_A, - POWER_DOMAIN_TRANSCODER_DSI_C, - POWER_DOMAIN_PORT_DDI_A_LANES, - POWER_DOMAIN_PORT_DDI_B_LANES, - POWER_DOMAIN_PORT_DDI_C_LANES, - POWER_DOMAIN_PORT_DDI_D_LANES, - POWER_DOMAIN_PORT_DDI_E_LANES, - POWER_DOMAIN_PORT_DDI_A_IO, - POWER_DOMAIN_PORT_DDI_B_IO, - POWER_DOMAIN_PORT_DDI_C_IO, - POWER_DOMAIN_PORT_DDI_D_IO, - POWER_DOMAIN_PORT_DDI_E_IO, - POWER_DOMAIN_PORT_DSI, - POWER_DOMAIN_PORT_CRT, - POWER_DOMAIN_PORT_OTHER, - POWER_DOMAIN_VGA, - POWER_DOMAIN_AUDIO, - POWER_DOMAIN_PLLS, - POWER_DOMAIN_AUX_A, - POWER_DOMAIN_AUX_B, - POWER_DOMAIN_AUX_C, - POWER_DOMAIN_AUX_D, - POWER_DOMAIN_GMBUS, - POWER_DOMAIN_MODESET, - POWER_DOMAIN_GT_IRQ, - POWER_DOMAIN_INIT, - - POWER_DOMAIN_NUM, -}; - -#define POWER_DOMAIN_PIPE(pipe) ((pipe) + POWER_DOMAIN_PIPE_A) -#define POWER_DOMAIN_PIPE_PANEL_FITTER(pipe) \ - ((pipe) + POWER_DOMAIN_PIPE_A_PANEL_FITTER) -#define POWER_DOMAIN_TRANSCODER(tran) \ - ((tran) == TRANSCODER_EDP ? POWER_DOMAIN_TRANSCODER_EDP : \ - (tran) + POWER_DOMAIN_TRANSCODER_A) - enum hpd_pin { HPD_NONE = 0, HPD_TV = HPD_NONE, /* TV is known to be unreliable */ @@ -472,121 +307,6 @@ struct i915_hotplug { I915_GEM_DOMAIN_INSTRUCTION | \ I915_GEM_DOMAIN_VERTEX) -#define for_each_pipe(__dev_priv, __p) \ - for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) -#define for_each_pipe_masked(__dev_priv, __p, __mask) \ - for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) \ - for_each_if ((__mask) & (1 << (__p))) -#define for_each_universal_plane(__dev_priv, __pipe, __p) \ - for ((__p) = 0; \ - (__p) < INTEL_INFO(__dev_priv)->num_sprites[(__pipe)] + 1; \ - (__p)++) -#define for_each_sprite(__dev_priv, __p, __s) \ - for ((__s) = 0; \ - (__s) < INTEL_INFO(__dev_priv)->num_sprites[(__p)]; \ - (__s)++) - -#define for_each_port_masked(__port, __ports_mask) \ - for ((__port) = PORT_A; (__port) < I915_MAX_PORTS; (__port)++) \ - for_each_if ((__ports_mask) & (1 << (__port))) - -#define for_each_crtc(dev, crtc) \ - list_for_each_entry(crtc, &(dev)->mode_config.crtc_list, head) - -#define for_each_intel_plane(dev, intel_plane) \ - list_for_each_entry(intel_plane, \ - &(dev)->mode_config.plane_list, \ - base.head) - -#define for_each_intel_plane_mask(dev, intel_plane, plane_mask) \ - list_for_each_entry(intel_plane, \ - &(dev)->mode_config.plane_list, \ - base.head) \ - for_each_if ((plane_mask) & \ - (1 << drm_plane_index(&intel_plane->base))) - -#define for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) \ - list_for_each_entry(intel_plane, \ - &(dev)->mode_config.plane_list, \ - base.head) \ - for_each_if ((intel_plane)->pipe == (intel_crtc)->pipe) - -#define for_each_intel_crtc(dev, intel_crtc) \ - list_for_each_entry(intel_crtc, \ - &(dev)->mode_config.crtc_list, \ - base.head) - -#define for_each_intel_crtc_mask(dev, intel_crtc, crtc_mask) \ - list_for_each_entry(intel_crtc, \ - &(dev)->mode_config.crtc_list, \ - base.head) \ - for_each_if ((crtc_mask) & (1 << drm_crtc_index(&intel_crtc->base))) - -#define for_each_intel_encoder(dev, intel_encoder) \ - list_for_each_entry(intel_encoder, \ - &(dev)->mode_config.encoder_list, \ - base.head) - -#define for_each_intel_connector_iter(intel_connector, iter) \ - while ((intel_connector = to_intel_connector(drm_connector_list_iter_next(iter)))) - -#define for_each_encoder_on_crtc(dev, __crtc, intel_encoder) \ - list_for_each_entry((intel_encoder), &(dev)->mode_config.encoder_list, base.head) \ - for_each_if ((intel_encoder)->base.crtc == (__crtc)) - -#define for_each_connector_on_encoder(dev, __encoder, intel_connector) \ - list_for_each_entry((intel_connector), &(dev)->mode_config.connector_list, base.head) \ - for_each_if ((intel_connector)->base.encoder == (__encoder)) - -#define for_each_power_domain(domain, mask) \ - for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++) \ - for_each_if (BIT_ULL(domain) & (mask)) - -#define for_each_power_well(__dev_priv, __power_well) \ - for ((__power_well) = (__dev_priv)->power_domains.power_wells; \ - (__power_well) - (__dev_priv)->power_domains.power_wells < \ - (__dev_priv)->power_domains.power_well_count; \ - (__power_well)++) - -#define for_each_power_well_rev(__dev_priv, __power_well) \ - for ((__power_well) = (__dev_priv)->power_domains.power_wells + \ - (__dev_priv)->power_domains.power_well_count - 1; \ - (__power_well) - (__dev_priv)->power_domains.power_wells >= 0; \ - (__power_well)--) - -#define for_each_power_domain_well(__dev_priv, __power_well, __domain_mask) \ - for_each_power_well(__dev_priv, __power_well) \ - for_each_if ((__power_well)->domains & (__domain_mask)) - -#define for_each_power_domain_well_rev(__dev_priv, __power_well, __domain_mask) \ - for_each_power_well_rev(__dev_priv, __power_well) \ - for_each_if ((__power_well)->domains & (__domain_mask)) - -#define for_each_new_intel_plane_in_state(__state, plane, new_plane_state, __i) \ - for ((__i) = 0; \ - (__i) < (__state)->base.dev->mode_config.num_total_plane && \ - ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \ - (new_plane_state) = to_intel_plane_state((__state)->base.planes[__i].new_state), 1); \ - (__i)++) \ - for_each_if (plane) - -#define for_each_new_intel_crtc_in_state(__state, crtc, new_crtc_state, __i) \ - for ((__i) = 0; \ - (__i) < (__state)->base.dev->mode_config.num_crtc && \ - ((crtc) = to_intel_crtc((__state)->base.crtcs[__i].ptr), \ - (new_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].new_state), 1); \ - (__i)++) \ - for_each_if (crtc) - -#define for_each_oldnew_intel_plane_in_state(__state, plane, old_plane_state, new_plane_state, __i) \ - for ((__i) = 0; \ - (__i) < (__state)->base.dev->mode_config.num_total_plane && \ - ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \ - (old_plane_state) = to_intel_plane_state((__state)->base.planes[__i].old_state), \ - (new_plane_state) = to_intel_plane_state((__state)->base.planes[__i].new_state), 1); \ - (__i)++) \ - for_each_if (plane) - struct drm_i915_private; struct i915_mm_struct; struct i915_mmu_object; @@ -623,20 +343,6 @@ struct drm_i915_file_private { atomic_t context_bans; }; -/* Used by dp and fdi links */ -struct intel_link_m_n { - uint32_t tu; - uint32_t gmch_m; - uint32_t gmch_n; - uint32_t link_m; - uint32_t link_n; -}; - -void intel_link_compute_m_n(int bpp, int nlanes, - int pixel_clock, int link_clock, - struct intel_link_m_n *m_n, - bool reduce_m_n); - /* Interface history: * * 1.1: Original. @@ -651,27 +357,6 @@ void intel_link_compute_m_n(int bpp, int nlanes, #define DRIVER_MINOR 6 #define DRIVER_PATCHLEVEL 0 -struct opregion_header; -struct opregion_acpi; -struct opregion_swsci; -struct opregion_asle; - -struct intel_opregion { - struct opregion_header *header; - struct opregion_acpi *acpi; - struct opregion_swsci *swsci; - u32 swsci_gbda_sub_functions; - u32 swsci_sbcb_sub_functions; - struct opregion_asle *asle; - void *rvda; - void *vbt_firmware; - const void *vbt; - u32 vbt_size; - u32 *lid_state; - struct work_struct asle_work; -}; -#define OPREGION_SIZE (8*1024) - struct intel_overlay; struct intel_overlay_error_state; @@ -764,137 +449,6 @@ struct intel_csr { uint32_t allowed_dc_mask; }; -#define DEV_INFO_FOR_EACH_FLAG(func) \ - func(is_mobile); \ - func(is_lp); \ - func(is_alpha_support); \ - /* Keep has_* in alphabetical order */ \ - func(has_64bit_reloc); \ - func(has_aliasing_ppgtt); \ - func(has_csr); \ - func(has_ddi); \ - func(has_dp_mst); \ - func(has_reset_engine); \ - func(has_fbc); \ - func(has_fpga_dbg); \ - func(has_full_ppgtt); \ - func(has_full_48bit_ppgtt); \ - func(has_gmch_display); \ - func(has_guc); \ - func(has_guc_ct); \ - func(has_hotplug); \ - func(has_l3_dpf); \ - func(has_llc); \ - func(has_logical_ring_contexts); \ - func(has_logical_ring_preemption); \ - func(has_overlay); \ - func(has_pooled_eu); \ - func(has_psr); \ - func(has_rc6); \ - func(has_rc6p); \ - func(has_resource_streamer); \ - func(has_runtime_pm); \ - func(has_snoop); \ - func(unfenced_needs_alignment); \ - func(cursor_needs_physical); \ - func(hws_needs_physical); \ - func(overlay_needs_physical); \ - func(supports_tv); \ - func(has_ipc); - -struct sseu_dev_info { - u8 slice_mask; - u8 subslice_mask; - u8 eu_total; - u8 eu_per_subslice; - u8 min_eu_in_pool; - /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ - u8 subslice_7eu[3]; - u8 has_slice_pg:1; - u8 has_subslice_pg:1; - u8 has_eu_pg:1; -}; - -static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu) -{ - return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask); -} - -/* Keep in gen based order, and chronological order within a gen */ -enum intel_platform { - INTEL_PLATFORM_UNINITIALIZED = 0, - INTEL_I830, - INTEL_I845G, - INTEL_I85X, - INTEL_I865G, - INTEL_I915G, - INTEL_I915GM, - INTEL_I945G, - INTEL_I945GM, - INTEL_G33, - INTEL_PINEVIEW, - INTEL_I965G, - INTEL_I965GM, - INTEL_G45, - INTEL_GM45, - INTEL_IRONLAKE, - INTEL_SANDYBRIDGE, - INTEL_IVYBRIDGE, - INTEL_VALLEYVIEW, - INTEL_HASWELL, - INTEL_BROADWELL, - INTEL_CHERRYVIEW, - INTEL_SKYLAKE, - INTEL_BROXTON, - INTEL_KABYLAKE, - INTEL_GEMINILAKE, - INTEL_COFFEELAKE, - INTEL_CANNONLAKE, - INTEL_MAX_PLATFORMS -}; - -struct intel_device_info { - u16 device_id; - u16 gen_mask; - - u8 gen; - u8 gt; /* GT number, 0 if undefined */ - u8 num_rings; - u8 ring_mask; /* Rings supported by the HW */ - - enum intel_platform platform; - u32 platform_mask; - - u32 display_mmio_offset; - - u8 num_pipes; - u8 num_sprites[I915_MAX_PIPES]; - u8 num_scalers[I915_MAX_PIPES]; - - unsigned int page_sizes; /* page sizes supported by the HW */ - -#define DEFINE_FLAG(name) u8 name:1 - DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); -#undef DEFINE_FLAG - u16 ddb_size; /* in blocks */ - - /* Register offsets for the various display pipes and transcoders */ - int pipe_offsets[I915_MAX_TRANSCODERS]; - int trans_offsets[I915_MAX_TRANSCODERS]; - int palette_offsets[I915_MAX_PIPES]; - int cursor_offsets[I915_MAX_PIPES]; - - /* Slice/subslice/EU info */ - struct sseu_dev_info sseu; - - u32 cs_timestamp_frequency_khz; - - struct color_luts { - u16 degamma_lut_size; - u16 gamma_lut_size; - } color; -}; - struct intel_display_error_state; struct i915_gpu_state { @@ -948,6 +502,7 @@ struct i915_gpu_state { struct drm_i915_error_engine { int engine_id; /* Software tracked state */ + bool idle; bool waiting; int num_waiters; unsigned long hangcheck_timestamp; @@ -2405,6 +1960,9 @@ struct drm_i915_private { */ struct workqueue_struct *wq; + /* ordered wq for modesets */ + struct workqueue_struct *modeset_wq; + /* Display functions */ struct drm_i915_display_funcs display; @@ -4111,41 +3669,6 @@ bool intel_bios_is_port_hpd_inverted(struct drm_i915_private *dev_priv, bool intel_bios_is_lspcon_present(struct drm_i915_private *dev_priv, enum port port); - -/* intel_opregion.c */ -#ifdef CONFIG_ACPI -extern int intel_opregion_setup(struct drm_i915_private *dev_priv); -extern void intel_opregion_register(struct drm_i915_private *dev_priv); -extern void intel_opregion_unregister(struct drm_i915_private *dev_priv); -extern void intel_opregion_asle_intr(struct drm_i915_private *dev_priv); -extern int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, - bool enable); -extern int intel_opregion_notify_adapter(struct drm_i915_private *dev_priv, - pci_power_t state); -extern int intel_opregion_get_panel_type(struct drm_i915_private *dev_priv); -#else -static inline int intel_opregion_setup(struct drm_i915_private *dev) { return 0; } -static inline void intel_opregion_register(struct drm_i915_private *dev_priv) { } -static inline void intel_opregion_unregister(struct drm_i915_private *dev_priv) { } -static inline void intel_opregion_asle_intr(struct drm_i915_private *dev_priv) -{ -} -static inline int -intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, bool enable) -{ - return 0; -} -static inline int -intel_opregion_notify_adapter(struct drm_i915_private *dev, pci_power_t state) -{ - return 0; -} -static inline int intel_opregion_get_panel_type(struct drm_i915_private *dev) -{ - return -ENODEV; -} -#endif - /* intel_acpi.c */ #ifdef CONFIG_ACPI extern void intel_register_dsm_handler(void); @@ -4162,10 +3685,6 @@ mkwrite_device_info(struct drm_i915_private *dev_priv) return (struct intel_device_info *)&dev_priv->info; } -const char *intel_platform_name(enum intel_platform platform); -void intel_device_info_runtime_init(struct drm_i915_private *dev_priv); -void intel_device_info_dump(struct drm_i915_private *dev_priv); - /* modesetting */ extern void intel_modeset_init_hw(struct drm_device *dev); extern int intel_modeset_init(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4a7f5579a7a5..ba9f67c256f4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2596,7 +2596,7 @@ static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) } err = obj->ops->get_pages(obj); - GEM_BUG_ON(!err && IS_ERR_OR_NULL(obj->mm.pages)); + GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj)); return err; } @@ -3089,7 +3089,12 @@ i915_gem_reset_request(struct intel_engine_cs *engine, void i915_gem_reset_engine(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { - engine->irq_posted = 0; + /* + * Make sure this write is visible before we re-enable the interrupt + * handlers on another CPU, as tasklet_enable() resolves to just + * a compiler barrier which is insufficient for our purpose here. + */ + smp_store_mb(engine->irq_posted, 0); if (request) request = i915_gem_reset_request(engine, request); @@ -3119,6 +3124,25 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) ctx = fetch_and_zero(&engine->last_retired_context); if (ctx) engine->context_unpin(engine, ctx); + + /* + * Ostensibily, we always want a context loaded for powersaving, + * so if the engine is idle after the reset, send a request + * to load our scratch kernel_context. + * + * More mysteriously, if we leave the engine idle after a reset, + * the next userspace batch may hang, with what appears to be + * an incoherent read by the CS (presumably stale TLB). An + * empty request appears sufficient to paper over the glitch. + */ + if (list_empty(&engine->timeline->requests)) { + struct drm_i915_gem_request *rq; + + rq = i915_gem_request_alloc(engine, + dev_priv->kernel_context); + if (!IS_ERR(rq)) + __i915_add_request(rq, false); + } } i915_gem_restore_fences(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index ee83ec838ee7..a1d6956734f7 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -27,6 +27,7 @@ #include "i915_drv.h" #define QUIET (__GFP_NORETRY | __GFP_NOWARN) +#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN) /* convert swiotlb segment size into sensible units (pages)! */ #define IO_TLB_SEGPAGES (IO_TLB_SEGSIZE << IO_TLB_SHIFT >> PAGE_SHIFT) @@ -95,7 +96,8 @@ create_st: struct page *page; do { - page = alloc_pages(gfp | (order ? QUIET : 0), order); + page = alloc_pages(gfp | (order ? QUIET : MAYFAIL), + order); if (page) break; if (!order--) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 59f023bb7015..d575109f7a7f 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -479,6 +479,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) /* Transfer from per-context onto the global per-engine timeline */ timeline = engine->timeline; GEM_BUG_ON(timeline == request->timeline); + GEM_BUG_ON(request->global_seqno); seqno = timeline_get_seqno(timeline); GEM_BUG_ON(!seqno); @@ -525,6 +526,7 @@ void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) /* Only unwind in reverse order, required so that the per-context list * is kept in seqno/ring order. */ + GEM_BUG_ON(!request->global_seqno); GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); engine->timeline->seqno--; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index aba50aa613f1..944059322daa 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -416,6 +416,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, int n; err_printf(m, "%s command stream:\n", engine_str(ee->engine_id)); + err_printf(m, " IDLE?: %s\n", yesno(ee->idle)); err_printf(m, " START: 0x%08x\n", ee->start); err_printf(m, " HEAD: 0x%08x [0x%08x]\n", ee->head, ee->rq_head); err_printf(m, " TAIL: 0x%08x [0x%08x, 0x%08x]\n", @@ -564,34 +565,17 @@ static void print_error_obj(struct drm_i915_error_state_buf *m, static void err_print_capabilities(struct drm_i915_error_state_buf *m, const struct intel_device_info *info) { -#define PRINT_FLAG(x) err_printf(m, #x ": %s\n", yesno(info->x)) - DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG); -#undef PRINT_FLAG -} + struct drm_printer p = i915_error_printer(m); -static __always_inline void err_print_param(struct drm_i915_error_state_buf *m, - const char *name, - const char *type, - const void *x) -{ - if (!__builtin_strcmp(type, "bool")) - err_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x)); - else if (!__builtin_strcmp(type, "int")) - err_printf(m, "i915.%s=%d\n", name, *(const int *)x); - else if (!__builtin_strcmp(type, "unsigned int")) - err_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x); - else if (!__builtin_strcmp(type, "char *")) - err_printf(m, "i915.%s=%s\n", name, *(const char **)x); - else - BUILD_BUG(); + intel_device_info_dump_flags(info, &p); } static void err_print_params(struct drm_i915_error_state_buf *m, - const struct i915_params *p) + const struct i915_params *params) { -#define PRINT(T, x, ...) err_print_param(m, #x, #T, &p->x); - I915_PARAMS_FOR_EACH(PRINT); -#undef PRINT + struct drm_printer p = i915_error_printer(m); + + i915_params_dump(params, &p); } static void err_print_pciid(struct drm_i915_error_state_buf *m, @@ -1256,6 +1240,7 @@ static void error_record_engine_registers(struct i915_gpu_state *error, ee->hws = I915_READ(mmio); } + ee->idle = intel_engine_is_idle(engine); ee->hangcheck_timestamp = engine->hangcheck.action_timestamp; ee->hangcheck_action = engine->hangcheck.action; ee->hangcheck_stalled = engine->hangcheck.stalled; diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c index 49a079494b68..79f8ec756362 100644 --- a/drivers/gpu/drm/i915/i915_memcpy.c +++ b/drivers/gpu/drm/i915/i915_memcpy.c @@ -96,6 +96,11 @@ bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len) void i915_memcpy_init_early(struct drm_i915_private *dev_priv) { - if (static_cpu_has(X86_FEATURE_XMM4_1)) + /* + * Some hypervisors (e.g. KVM) don't support VEX-prefix instructions + * emulation. So don't enable movntdqa in hypervisor guest. + */ + if (static_cpu_has(X86_FEATURE_XMM4_1) && + !boot_cpu_has(X86_FEATURE_HYPERVISOR)) static_branch_enable(&has_movntdqa); } diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 8dfea0320c2f..b5f3eb4fa8a3 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -22,6 +22,8 @@ * IN THE SOFTWARE. */ +#include <drm/drm_print.h> + #include "i915_params.h" #include "i915_drv.h" @@ -172,3 +174,34 @@ i915_param_named(enable_dpcd_backlight, bool, 0600, i915_param_named(enable_gvt, bool, 0400, "Enable support for Intel GVT-g graphics virtualization host support(default:false)"); + +static __always_inline void _print_param(struct drm_printer *p, + const char *name, + const char *type, + const void *x) +{ + if (!__builtin_strcmp(type, "bool")) + drm_printf(p, "i915.%s=%s\n", name, yesno(*(const bool *)x)); + else if (!__builtin_strcmp(type, "int")) + drm_printf(p, "i915.%s=%d\n", name, *(const int *)x); + else if (!__builtin_strcmp(type, "unsigned int")) + drm_printf(p, "i915.%s=%u\n", name, *(const unsigned int *)x); + else if (!__builtin_strcmp(type, "char *")) + drm_printf(p, "i915.%s=%s\n", name, *(const char **)x); + else + BUILD_BUG(); +} + +/** + * i915_params_dump - dump i915 modparams + * @params: i915 modparams + * @p: the &drm_printer + * + * Pretty printer for i915 modparams. + */ +void i915_params_dump(const struct i915_params *params, struct drm_printer *p) +{ +#define PRINT(T, x, ...) _print_param(p, #x, #T, ¶ms->x); + I915_PARAMS_FOR_EACH(PRINT); +#undef PRINT +} diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 792ce26d7449..c96360398072 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -28,6 +28,8 @@ #include <linux/bitops.h> #include <linux/cache.h> /* for __read_mostly */ +struct drm_printer; + #define ENABLE_GUC_SUBMISSION BIT(0) #define ENABLE_GUC_LOAD_HUC BIT(1) @@ -77,5 +79,7 @@ struct i915_params { extern struct i915_params i915_modparams __read_mostly; +void i915_params_dump(const struct i915_params *params, struct drm_printer *p); + #endif diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index fa67d3dde20e..36d48422b475 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -633,6 +633,8 @@ static const struct pci_device_id pciidlist[] = { INTEL_CFL_S_GT1_IDS(&intel_coffeelake_gt1_info), INTEL_CFL_S_GT2_IDS(&intel_coffeelake_gt2_info), INTEL_CFL_H_GT2_IDS(&intel_coffeelake_gt2_info), + INTEL_CFL_U_GT1_IDS(&intel_coffeelake_gt1_info), + INTEL_CFL_U_GT2_IDS(&intel_coffeelake_gt2_info), INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info), INTEL_CNL_U_GT2_IDS(&intel_cannonlake_gt2_info), INTEL_CNL_Y_GT2_IDS(&intel_cannonlake_gt2_info), diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 09bf043c1c2e..41285bec8fc0 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3278,6 +3278,7 @@ enum i915_power_well_id { # define AUDUNIT_CLOCK_GATE_DISABLE (1 << 26) /* 965 */ # define DPUNIT_A_CLOCK_GATE_DISABLE (1 << 25) /* 965 */ # define DPCUNIT_CLOCK_GATE_DISABLE (1 << 24) /* 965 */ +# define PNV_GMBUSUNIT_CLOCK_GATE_DISABLE (1 << 24) /* pnv */ # define TVRUNIT_CLOCK_GATE_DISABLE (1 << 23) /* 915-945 */ # define TVCUNIT_CLOCK_GATE_DISABLE (1 << 22) /* 915-945 */ # define TVFUNIT_CLOCK_GATE_DISABLE (1 << 21) /* 915-945 */ @@ -3858,6 +3859,9 @@ enum { #define PWM2_GATING_DIS (1 << 14) #define PWM1_GATING_DIS (1 << 13) +#define GEN9_CLKGATE_DIS_4 _MMIO(0x4653C) +#define BXT_GMBUS_GATING_DIS (1 << 14) + #define _CLKGATE_DIS_PSL_A 0x46520 #define _CLKGATE_DIS_PSL_B 0x46524 #define _CLKGATE_DIS_PSL_C 0x46528 @@ -3875,6 +3879,9 @@ enum { #define SARBUNIT_CLKGATE_DIS (1 << 5) #define RCCUNIT_CLKGATE_DIS (1 << 7) +#define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434) +#define VFUNIT_CLKGATE_DIS (1 << 20) + /* * Display engine regs */ @@ -6329,6 +6336,7 @@ enum { #define PLANE_CTL_TILED_X ( 1 << 10) #define PLANE_CTL_TILED_Y ( 4 << 10) #define PLANE_CTL_TILED_YF ( 5 << 10) +#define PLANE_CTL_FLIP_HORIZONTAL ( 1 << 8) #define PLANE_CTL_ALPHA_MASK (0x3 << 4) /* Pre-GLK */ #define PLANE_CTL_ALPHA_DISABLE ( 0 << 4) #define PLANE_CTL_ALPHA_SW_PREMULTIPLY ( 2 << 4) @@ -7552,6 +7560,7 @@ enum { #define FDI_RX_CHICKEN(pipe) _MMIO_PIPE(pipe, _FDI_RXA_CHICKEN, _FDI_RXB_CHICKEN) #define SOUTH_DSPCLK_GATE_D _MMIO(0xc2020) +#define PCH_GMBUSUNIT_CLOCK_GATE_DISABLE (1<<31) #define PCH_DPLUNIT_CLOCK_GATE_DISABLE (1<<30) #define PCH_DPLSUNIT_CLOCK_GATE_DISABLE (1<<29) #define PCH_CPUNIT_CLOCK_GATE_DISABLE (1<<14) @@ -8142,6 +8151,7 @@ enum { #define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE (1<<8) #define STALL_DOP_GATING_DISABLE (1<<5) #define THROTTLE_12_5 (7<<2) +#define DISABLE_EARLY_EOT (1<<1) #define GEN7_ROW_CHICKEN2 _MMIO(0xe4f4) #define GEN7_ROW_CHICKEN2_GT2 _MMIO(0xf4f4) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 4e76768ffa95..e1169c02eb2b 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -616,6 +616,7 @@ TRACE_EVENT(i915_gem_request_queue, TP_STRUCT__entry( __field(u32, dev) + __field(u32, hw_id) __field(u32, ring) __field(u32, ctx) __field(u32, seqno) @@ -624,15 +625,16 @@ TRACE_EVENT(i915_gem_request_queue, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; + __entry->hw_id = req->ctx->hw_id; __entry->ring = req->engine->id; __entry->ctx = req->fence.context; __entry->seqno = req->fence.seqno; __entry->flags = flags; ), - TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, flags=0x%x", - __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, - __entry->flags) + TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, flags=0x%x", + __entry->dev, __entry->hw_id, __entry->ring, __entry->ctx, + __entry->seqno, __entry->flags) ); DECLARE_EVENT_CLASS(i915_gem_request, @@ -641,23 +643,25 @@ DECLARE_EVENT_CLASS(i915_gem_request, TP_STRUCT__entry( __field(u32, dev) - __field(u32, ctx) + __field(u32, hw_id) __field(u32, ring) + __field(u32, ctx) __field(u32, seqno) __field(u32, global) ), TP_fast_assign( __entry->dev = req->i915->drm.primary->index; + __entry->hw_id = req->ctx->hw_id; __entry->ring = req->engine->id; __entry->ctx = req->fence.context; __entry->seqno = req->fence.seqno; __entry->global = req->global_seqno; ), - TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u", - __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, - __entry->global) + TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u", + __entry->dev, __entry->hw_id, __entry->ring, __entry->ctx, + __entry->seqno, __entry->global) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_add, @@ -683,15 +687,17 @@ DECLARE_EVENT_CLASS(i915_gem_request_hw, TP_STRUCT__entry( __field(u32, dev) + __field(u32, hw_id) __field(u32, ring) + __field(u32, ctx) __field(u32, seqno) __field(u32, global_seqno) - __field(u32, ctx) __field(u32, port) ), TP_fast_assign( __entry->dev = req->i915->drm.primary->index; + __entry->hw_id = req->ctx->hw_id; __entry->ring = req->engine->id; __entry->ctx = req->fence.context; __entry->seqno = req->fence.seqno; @@ -699,10 +705,10 @@ DECLARE_EVENT_CLASS(i915_gem_request_hw, __entry->port = port; ), - TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u", - __entry->dev, __entry->ring, __entry->ctx, - __entry->seqno, __entry->global_seqno, - __entry->port) + TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u", + __entry->dev, __entry->hw_id, __entry->ring, + __entry->ctx, __entry->seqno, + __entry->global_seqno, __entry->port) ); DEFINE_EVENT(i915_gem_request_hw, i915_gem_request_in, @@ -772,6 +778,7 @@ TRACE_EVENT(i915_gem_request_wait_begin, TP_STRUCT__entry( __field(u32, dev) + __field(u32, hw_id) __field(u32, ring) __field(u32, ctx) __field(u32, seqno) @@ -787,6 +794,7 @@ TRACE_EVENT(i915_gem_request_wait_begin, */ TP_fast_assign( __entry->dev = req->i915->drm.primary->index; + __entry->hw_id = req->ctx->hw_id; __entry->ring = req->engine->id; __entry->ctx = req->fence.context; __entry->seqno = req->fence.seqno; @@ -794,10 +802,10 @@ TRACE_EVENT(i915_gem_request_wait_begin, __entry->flags = flags; ), - TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u, blocking=%u, flags=0x%x", - __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, - __entry->global, !!(__entry->flags & I915_WAIT_LOCKED), - __entry->flags) + TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, blocking=%u, flags=0x%x", + __entry->dev, __entry->hw_id, __entry->ring, __entry->ctx, + __entry->seqno, __entry->global, + !!(__entry->flags & I915_WAIT_LOCKED), __entry->flags) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_wait_end, diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 8d07764887ec..51dbfe5bb418 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -140,4 +140,19 @@ static inline void drain_delayed_work(struct delayed_work *dw) } while (delayed_work_pending(dw)); } +static inline const char *yesno(bool v) +{ + return v ? "yes" : "no"; +} + +static inline const char *onoff(bool v) +{ + return v ? "on" : "off"; +} + +static inline const char *enableddisabled(bool v) +{ + return v ? "enabled" : "disabled"; +} + #endif /* !__I915_UTILS_H */ diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 369f780588fb..f51645a08dca 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2095,6 +2095,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, if (WARN_ON(!pll)) return; + mutex_lock(&dev_priv->dpll_lock); + if (IS_CANNONLAKE(dev_priv)) { /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */ val = I915_READ(DPCLKA_CFGCR0); @@ -2115,7 +2117,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, val = I915_READ(DPLL_CTRL2); val &= ~(DPLL_CTRL2_DDI_CLK_OFF(port) | - DPLL_CTRL2_DDI_CLK_SEL_MASK(port)); + DPLL_CTRL2_DDI_CLK_SEL_MASK(port)); val |= (DPLL_CTRL2_DDI_CLK_SEL(pll->id, port) | DPLL_CTRL2_DDI_SEL_OVERRIDE(port)); @@ -2124,6 +2126,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, } else if (INTEL_INFO(dev_priv)->gen < 9) { I915_WRITE(PORT_CLK_SEL(port), hsw_pll_to_ddi_pll_sel(pll)); } + + mutex_unlock(&dev_priv->dpll_lock); } static void intel_ddi_clk_disable(struct intel_encoder *encoder) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index f478be3ae0ba..d28592e43512 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -22,6 +22,9 @@ * */ +#include <drm/drm_print.h> + +#include "intel_device_info.h" #include "i915_drv.h" #define PLATFORM_NAME(x) [INTEL_##x] = #x @@ -67,21 +70,55 @@ const char *intel_platform_name(enum intel_platform platform) return platform_names[platform]; } -void intel_device_info_dump(struct drm_i915_private *dev_priv) +void intel_device_info_dump_flags(const struct intel_device_info *info, + struct drm_printer *p) { - const struct intel_device_info *info = &dev_priv->info; - - DRM_DEBUG_DRIVER("i915 device info: platform=%s gen=%i pciid=0x%04x rev=0x%02x", - intel_platform_name(info->platform), - info->gen, - dev_priv->drm.pdev->device, - dev_priv->drm.pdev->revision); -#define PRINT_FLAG(name) \ - DRM_DEBUG_DRIVER("i915 device info: " #name ": %s", yesno(info->name)) +#define PRINT_FLAG(name) drm_printf(p, "%s: %s\n", #name, yesno(info->name)); DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG); #undef PRINT_FLAG } +static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) +{ + drm_printf(p, "slice mask: %04x\n", sseu->slice_mask); + drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask)); + drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu)); + drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask); + drm_printf(p, "subslice per slice: %u\n", + hweight8(sseu->subslice_mask)); + drm_printf(p, "EU total: %u\n", sseu->eu_total); + drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); + drm_printf(p, "has slice power gating: %s\n", + yesno(sseu->has_slice_pg)); + drm_printf(p, "has subslice power gating: %s\n", + yesno(sseu->has_subslice_pg)); + drm_printf(p, "has EU power gating: %s\n", yesno(sseu->has_eu_pg)); +} + +void intel_device_info_dump_runtime(const struct intel_device_info *info, + struct drm_printer *p) +{ + sseu_dump(&info->sseu, p); + + drm_printf(p, "CS timestamp frequency: %u kHz\n", + info->cs_timestamp_frequency_khz); +} + +void intel_device_info_dump(const struct intel_device_info *info, + struct drm_printer *p) +{ + struct drm_i915_private *dev_priv = + container_of(info, struct drm_i915_private, info); + + drm_printf(p, "pciid=0x%04x rev=0x%02x platform=%s gen=%i\n", + INTEL_DEVID(dev_priv), + INTEL_REVID(dev_priv), + intel_platform_name(info->platform), + info->gen); + + intel_device_info_dump_flags(info, p); +} + static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; @@ -420,7 +457,10 @@ static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv) return 0; } -/* +/** + * intel_device_info_runtime_init - initialize runtime info + * @info: intel device info struct + * * Determine various intel_device_info fields at runtime. * * Use it when either: @@ -433,9 +473,10 @@ static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv) * - after the PCH has been detected, * - before the first usage of the fields it can tweak. */ -void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) +void intel_device_info_runtime_init(struct intel_device_info *info) { - struct intel_device_info *info = mkwrite_device_info(dev_priv); + struct drm_i915_private *dev_priv = + container_of(info, struct drm_i915_private, info); enum pipe pipe; if (INTEL_GEN(dev_priv) >= 10) { @@ -543,22 +584,4 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) /* Initialize command stream timestamp frequency */ info->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv); - - DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask); - DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask)); - DRM_DEBUG_DRIVER("subslice total: %u\n", - sseu_subslice_total(&info->sseu)); - DRM_DEBUG_DRIVER("subslice mask %04x\n", info->sseu.subslice_mask); - DRM_DEBUG_DRIVER("subslice per slice: %u\n", - hweight8(info->sseu.subslice_mask)); - DRM_DEBUG_DRIVER("EU total: %u\n", info->sseu.eu_total); - DRM_DEBUG_DRIVER("EU per subslice: %u\n", info->sseu.eu_per_subslice); - DRM_DEBUG_DRIVER("has slice power gating: %s\n", - info->sseu.has_slice_pg ? "y" : "n"); - DRM_DEBUG_DRIVER("has subslice power gating: %s\n", - info->sseu.has_subslice_pg ? "y" : "n"); - DRM_DEBUG_DRIVER("has EU power gating: %s\n", - info->sseu.has_eu_pg ? "y" : "n"); - DRM_DEBUG_DRIVER("CS timestamp frequency: %u kHz\n", - info->cs_timestamp_frequency_khz); } diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h new file mode 100644 index 000000000000..49cb27bd04c1 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -0,0 +1,183 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_DEVICE_INFO_H_ +#define _INTEL_DEVICE_INFO_H_ + +#include "intel_display.h" + +struct drm_printer; +struct drm_i915_private; + +/* Keep in gen based order, and chronological order within a gen */ +enum intel_platform { + INTEL_PLATFORM_UNINITIALIZED = 0, + /* gen2 */ + INTEL_I830, + INTEL_I845G, + INTEL_I85X, + INTEL_I865G, + /* gen3 */ + INTEL_I915G, + INTEL_I915GM, + INTEL_I945G, + INTEL_I945GM, + INTEL_G33, + INTEL_PINEVIEW, + /* gen4 */ + INTEL_I965G, + INTEL_I965GM, + INTEL_G45, + INTEL_GM45, + /* gen5 */ + INTEL_IRONLAKE, + /* gen6 */ + INTEL_SANDYBRIDGE, + /* gen7 */ + INTEL_IVYBRIDGE, + INTEL_VALLEYVIEW, + INTEL_HASWELL, + /* gen8 */ + INTEL_BROADWELL, + INTEL_CHERRYVIEW, + /* gen9 */ + INTEL_SKYLAKE, + INTEL_BROXTON, + INTEL_KABYLAKE, + INTEL_GEMINILAKE, + INTEL_COFFEELAKE, + /* gen10 */ + INTEL_CANNONLAKE, + INTEL_MAX_PLATFORMS +}; + +#define DEV_INFO_FOR_EACH_FLAG(func) \ + func(is_mobile); \ + func(is_lp); \ + func(is_alpha_support); \ + /* Keep has_* in alphabetical order */ \ + func(has_64bit_reloc); \ + func(has_aliasing_ppgtt); \ + func(has_csr); \ + func(has_ddi); \ + func(has_dp_mst); \ + func(has_reset_engine); \ + func(has_fbc); \ + func(has_fpga_dbg); \ + func(has_full_ppgtt); \ + func(has_full_48bit_ppgtt); \ + func(has_gmch_display); \ + func(has_guc); \ + func(has_guc_ct); \ + func(has_hotplug); \ + func(has_l3_dpf); \ + func(has_llc); \ + func(has_logical_ring_contexts); \ + func(has_logical_ring_preemption); \ + func(has_overlay); \ + func(has_pooled_eu); \ + func(has_psr); \ + func(has_rc6); \ + func(has_rc6p); \ + func(has_resource_streamer); \ + func(has_runtime_pm); \ + func(has_snoop); \ + func(unfenced_needs_alignment); \ + func(cursor_needs_physical); \ + func(hws_needs_physical); \ + func(overlay_needs_physical); \ + func(supports_tv); \ + func(has_ipc); + +struct sseu_dev_info { + u8 slice_mask; + u8 subslice_mask; + u8 eu_total; + u8 eu_per_subslice; + u8 min_eu_in_pool; + /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ + u8 subslice_7eu[3]; + u8 has_slice_pg:1; + u8 has_subslice_pg:1; + u8 has_eu_pg:1; +}; + +struct intel_device_info { + u16 device_id; + u16 gen_mask; + + u8 gen; + u8 gt; /* GT number, 0 if undefined */ + u8 num_rings; + u8 ring_mask; /* Rings supported by the HW */ + + enum intel_platform platform; + u32 platform_mask; + + u32 display_mmio_offset; + + u8 num_pipes; + u8 num_sprites[I915_MAX_PIPES]; + u8 num_scalers[I915_MAX_PIPES]; + + unsigned int page_sizes; /* page sizes supported by the HW */ + +#define DEFINE_FLAG(name) u8 name:1 + DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); +#undef DEFINE_FLAG + u16 ddb_size; /* in blocks */ + + /* Register offsets for the various display pipes and transcoders */ + int pipe_offsets[I915_MAX_TRANSCODERS]; + int trans_offsets[I915_MAX_TRANSCODERS]; + int palette_offsets[I915_MAX_PIPES]; + int cursor_offsets[I915_MAX_PIPES]; + + /* Slice/subslice/EU info */ + struct sseu_dev_info sseu; + + u32 cs_timestamp_frequency_khz; + + struct color_luts { + u16 degamma_lut_size; + u16 gamma_lut_size; + } color; +}; + +static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu) +{ + return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask); +} + +const char *intel_platform_name(enum intel_platform platform); + +void intel_device_info_runtime_init(struct intel_device_info *info); +void intel_device_info_dump(const struct intel_device_info *info, + struct drm_printer *p); +void intel_device_info_dump_flags(const struct intel_device_info *info, + struct drm_printer *p); +void intel_device_info_dump_runtime(const struct intel_device_info *info, + struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index efa6c6d19664..0cd355978ab4 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3073,6 +3073,12 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) unsigned int rotation = plane_state->base.rotation; int ret; + if (rotation & DRM_MODE_REFLECT_X && + fb->modifier == DRM_FORMAT_MOD_LINEAR) { + DRM_DEBUG_KMS("horizontal flip is not supported with linear surface formats\n"); + return -EINVAL; + } + if (!plane_state->base.visible) return 0; @@ -3453,9 +3459,9 @@ static u32 skl_plane_ctl_tiling(uint64_t fb_modifier) return 0; } -static u32 skl_plane_ctl_rotation(unsigned int rotation) +static u32 skl_plane_ctl_rotate(unsigned int rotate) { - switch (rotation) { + switch (rotate) { case DRM_MODE_ROTATE_0: break; /* @@ -3469,7 +3475,22 @@ static u32 skl_plane_ctl_rotation(unsigned int rotation) case DRM_MODE_ROTATE_270: return PLANE_CTL_ROTATE_90; default: - MISSING_CASE(rotation); + MISSING_CASE(rotate); + } + + return 0; +} + +static u32 cnl_plane_ctl_flip(unsigned int reflect) +{ + switch (reflect) { + case 0: + break; + case DRM_MODE_REFLECT_X: + return PLANE_CTL_FLIP_HORIZONTAL; + case DRM_MODE_REFLECT_Y: + default: + MISSING_CASE(reflect); } return 0; @@ -3497,7 +3518,11 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, plane_ctl |= skl_plane_ctl_format(fb->format->format); plane_ctl |= skl_plane_ctl_tiling(fb->modifier); - plane_ctl |= skl_plane_ctl_rotation(rotation); + plane_ctl |= skl_plane_ctl_rotate(rotation & DRM_MODE_ROTATE_MASK); + + if (INTEL_GEN(dev_priv) >= 10) + plane_ctl |= cnl_plane_ctl_flip(rotation & + DRM_MODE_REFLECT_MASK); if (key->flags & I915_SET_COLORKEY_DESTINATION) plane_ctl |= PLANE_CTL_KEY_ENABLE_DESTINATION; @@ -9694,111 +9719,27 @@ err: return ERR_PTR(ret); } -static u32 -intel_framebuffer_pitch_for_width(int width, int bpp) -{ - u32 pitch = DIV_ROUND_UP(width * bpp, 8); - return ALIGN(pitch, 64); -} - -static u32 -intel_framebuffer_size_for_mode(const struct drm_display_mode *mode, int bpp) -{ - u32 pitch = intel_framebuffer_pitch_for_width(mode->hdisplay, bpp); - return PAGE_ALIGN(pitch * mode->vdisplay); -} - -static struct drm_framebuffer * -intel_framebuffer_create_for_mode(struct drm_device *dev, - const struct drm_display_mode *mode, - int depth, int bpp) -{ - struct drm_framebuffer *fb; - struct drm_i915_gem_object *obj; - struct drm_mode_fb_cmd2 mode_cmd = { 0 }; - - obj = i915_gem_object_create(to_i915(dev), - intel_framebuffer_size_for_mode(mode, bpp)); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - mode_cmd.width = mode->hdisplay; - mode_cmd.height = mode->vdisplay; - mode_cmd.pitches[0] = intel_framebuffer_pitch_for_width(mode_cmd.width, - bpp); - mode_cmd.pixel_format = drm_mode_legacy_fb_format(bpp, depth); - - fb = intel_framebuffer_create(obj, &mode_cmd); - if (IS_ERR(fb)) - i915_gem_object_put(obj); - - return fb; -} - -static struct drm_framebuffer * -mode_fits_in_fbdev(struct drm_device *dev, - const struct drm_display_mode *mode) -{ -#ifdef CONFIG_DRM_FBDEV_EMULATION - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_object *obj; - struct drm_framebuffer *fb; - - if (!dev_priv->fbdev) - return NULL; - - if (!dev_priv->fbdev->fb) - return NULL; - - obj = dev_priv->fbdev->fb->obj; - BUG_ON(!obj); - - fb = &dev_priv->fbdev->fb->base; - if (fb->pitches[0] < intel_framebuffer_pitch_for_width(mode->hdisplay, - fb->format->cpp[0] * 8)) - return NULL; - - if (obj->base.size < mode->vdisplay * fb->pitches[0]) - return NULL; - - drm_framebuffer_get(fb); - return fb; -#else - return NULL; -#endif -} - -static int intel_modeset_setup_plane_state(struct drm_atomic_state *state, - struct drm_crtc *crtc, - const struct drm_display_mode *mode, - struct drm_framebuffer *fb, - int x, int y) +static int intel_modeset_disable_planes(struct drm_atomic_state *state, + struct drm_crtc *crtc) { + struct drm_plane *plane; struct drm_plane_state *plane_state; - int hdisplay, vdisplay; - int ret; - - plane_state = drm_atomic_get_plane_state(state, crtc->primary); - if (IS_ERR(plane_state)) - return PTR_ERR(plane_state); - - if (mode) - drm_mode_get_hv_timing(mode, &hdisplay, &vdisplay); - else - hdisplay = vdisplay = 0; + int ret, i; - ret = drm_atomic_set_crtc_for_plane(plane_state, fb ? crtc : NULL); + ret = drm_atomic_add_affected_planes(state, crtc); if (ret) return ret; - drm_atomic_set_fb_for_plane(plane_state, fb); - plane_state->crtc_x = 0; - plane_state->crtc_y = 0; - plane_state->crtc_w = hdisplay; - plane_state->crtc_h = vdisplay; - plane_state->src_x = x << 16; - plane_state->src_y = y << 16; - plane_state->src_w = hdisplay << 16; - plane_state->src_h = vdisplay << 16; + + for_each_new_plane_in_state(state, plane, plane_state, i) { + if (plane_state->crtc != crtc) + continue; + + ret = drm_atomic_set_crtc_for_plane(plane_state, NULL); + if (ret) + return ret; + + drm_atomic_set_fb_for_plane(plane_state, NULL); + } return 0; } @@ -9816,7 +9757,6 @@ int intel_get_load_detect_pipe(struct drm_connector *connector, struct drm_crtc *crtc = NULL; struct drm_device *dev = encoder->dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_framebuffer *fb; struct drm_mode_config *config = &dev->mode_config; struct drm_atomic_state *state = NULL, *restore_state = NULL; struct drm_connector_state *connector_state; @@ -9884,10 +9824,6 @@ int intel_get_load_detect_pipe(struct drm_connector *connector, found: intel_crtc = to_intel_crtc(crtc); - ret = drm_modeset_lock(&crtc->primary->mutex, ctx); - if (ret) - goto fail; - state = drm_atomic_state_alloc(dev); restore_state = drm_atomic_state_alloc(dev); if (!state || !restore_state) { @@ -9919,39 +9855,17 @@ found: if (!mode) mode = &load_detect_mode; - /* We need a framebuffer large enough to accommodate all accesses - * that the plane may generate whilst we perform load detection. - * We can not rely on the fbcon either being present (we get called - * during its initialisation to detect all boot displays, or it may - * not even exist) or that it is large enough to satisfy the - * requested mode. - */ - fb = mode_fits_in_fbdev(dev, mode); - if (fb == NULL) { - DRM_DEBUG_KMS("creating tmp fb for load-detection\n"); - fb = intel_framebuffer_create_for_mode(dev, mode, 24, 32); - } else - DRM_DEBUG_KMS("reusing fbdev for load-detection framebuffer\n"); - if (IS_ERR(fb)) { - DRM_DEBUG_KMS("failed to allocate framebuffer for load-detection\n"); - ret = PTR_ERR(fb); - goto fail; - } - - ret = intel_modeset_setup_plane_state(state, crtc, mode, fb, 0, 0); - drm_framebuffer_put(fb); + ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode); if (ret) goto fail; - ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode); + ret = intel_modeset_disable_planes(state, crtc); if (ret) goto fail; ret = PTR_ERR_OR_ZERO(drm_atomic_get_connector_state(restore_state, connector)); if (!ret) ret = PTR_ERR_OR_ZERO(drm_atomic_get_crtc_state(restore_state, crtc)); - if (!ret) - ret = PTR_ERR_OR_ZERO(drm_atomic_get_plane_state(restore_state, crtc->primary)); if (ret) { DRM_DEBUG_KMS("Failed to create a copy of old state to restore: %i\n", ret); goto fail; @@ -12569,11 +12483,15 @@ static int intel_atomic_commit(struct drm_device *dev, INIT_WORK(&state->commit_work, intel_atomic_commit_work); i915_sw_fence_commit(&intel_state->commit_ready); - if (nonblock) + if (nonblock && intel_state->modeset) { + queue_work(dev_priv->modeset_wq, &state->commit_work); + } else if (nonblock) { queue_work(system_unbound_wq, &state->commit_work); - else + } else { + if (intel_state->modeset) + flush_workqueue(dev_priv->modeset_wq); intel_atomic_commit_tail(state); - + } return 0; } @@ -13238,7 +13156,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->frontbuffer_bit = INTEL_FRONTBUFFER_PRIMARY(pipe); primary->check_plane = intel_check_primary_plane; - if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 10) { intel_primary_formats = skl_primary_formats; num_formats = ARRAY_SIZE(skl_primary_formats); modifiers = skl_format_modifiers_ccs; @@ -13300,7 +13218,12 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) if (ret) goto fail; - if (INTEL_GEN(dev_priv) >= 9) { + if (INTEL_GEN(dev_priv) >= 10) { + supported_rotations = + DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 | + DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270 | + DRM_MODE_REFLECT_X; + } else if (INTEL_GEN(dev_priv) >= 9) { supported_rotations = DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270; @@ -14531,6 +14454,8 @@ int intel_modeset_init(struct drm_device *dev) enum pipe pipe; struct intel_crtc *crtc; + dev_priv->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0); + drm_mode_config_init(dev); dev->mode_config.min_width = 0; @@ -15335,6 +15260,8 @@ void intel_modeset_cleanup(struct drm_device *dev) intel_cleanup_gt_powersave(dev_priv); intel_teardown_gmbus(dev_priv); + + destroy_workqueue(dev_priv->modeset_wq); } void intel_connector_attach_encoder(struct intel_connector *connector, diff --git a/drivers/gpu/drm/i915/intel_display.h b/drivers/gpu/drm/i915/intel_display.h new file mode 100644 index 000000000000..a0d2b6169361 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_display.h @@ -0,0 +1,321 @@ +/* + * Copyright © 2006-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_DISPLAY_H_ +#define _INTEL_DISPLAY_H_ + +enum pipe { + INVALID_PIPE = -1, + + PIPE_A = 0, + PIPE_B, + PIPE_C, + _PIPE_EDP, + + I915_MAX_PIPES = _PIPE_EDP +}; + +#define pipe_name(p) ((p) + 'A') + +enum transcoder { + TRANSCODER_A = 0, + TRANSCODER_B, + TRANSCODER_C, + TRANSCODER_EDP, + TRANSCODER_DSI_A, + TRANSCODER_DSI_C, + + I915_MAX_TRANSCODERS +}; + +static inline const char *transcoder_name(enum transcoder transcoder) +{ + switch (transcoder) { + case TRANSCODER_A: + return "A"; + case TRANSCODER_B: + return "B"; + case TRANSCODER_C: + return "C"; + case TRANSCODER_EDP: + return "EDP"; + case TRANSCODER_DSI_A: + return "DSI A"; + case TRANSCODER_DSI_C: + return "DSI C"; + default: + return "<invalid>"; + } +} + +static inline bool transcoder_is_dsi(enum transcoder transcoder) +{ + return transcoder == TRANSCODER_DSI_A || transcoder == TRANSCODER_DSI_C; +} + +/* + * Global legacy plane identifier. Valid only for primary/sprite + * planes on pre-g4x, and only for primary planes on g4x-bdw. + */ +enum i9xx_plane_id { + PLANE_A, + PLANE_B, + PLANE_C, +}; + +#define plane_name(p) ((p) + 'A') +#define sprite_name(p, s) ((p) * INTEL_INFO(dev_priv)->num_sprites[(p)] + (s) + 'A') + +/* + * Per-pipe plane identifier. + * I915_MAX_PLANES in the enum below is the maximum (across all platforms) + * number of planes per CRTC. Not all platforms really have this many planes, + * which means some arrays of size I915_MAX_PLANES may have unused entries + * between the topmost sprite plane and the cursor plane. + * + * This is expected to be passed to various register macros + * (eg. PLANE_CTL(), PS_PLANE_SEL(), etc.) so adjust with care. + */ +enum plane_id { + PLANE_PRIMARY, + PLANE_SPRITE0, + PLANE_SPRITE1, + PLANE_SPRITE2, + PLANE_CURSOR, + + I915_MAX_PLANES, +}; + +#define for_each_plane_id_on_crtc(__crtc, __p) \ + for ((__p) = PLANE_PRIMARY; (__p) < I915_MAX_PLANES; (__p)++) \ + for_each_if((__crtc)->plane_ids_mask & BIT(__p)) + +enum port { + PORT_NONE = -1, + + PORT_A = 0, + PORT_B, + PORT_C, + PORT_D, + PORT_E, + + I915_MAX_PORTS +}; + +#define port_name(p) ((p) + 'A') + +enum dpio_channel { + DPIO_CH0, + DPIO_CH1 +}; + +enum dpio_phy { + DPIO_PHY0, + DPIO_PHY1, + DPIO_PHY2, +}; + +#define I915_NUM_PHYS_VLV 2 + +enum intel_display_power_domain { + POWER_DOMAIN_PIPE_A, + POWER_DOMAIN_PIPE_B, + POWER_DOMAIN_PIPE_C, + POWER_DOMAIN_PIPE_A_PANEL_FITTER, + POWER_DOMAIN_PIPE_B_PANEL_FITTER, + POWER_DOMAIN_PIPE_C_PANEL_FITTER, + POWER_DOMAIN_TRANSCODER_A, + POWER_DOMAIN_TRANSCODER_B, + POWER_DOMAIN_TRANSCODER_C, + POWER_DOMAIN_TRANSCODER_EDP, + POWER_DOMAIN_TRANSCODER_DSI_A, + POWER_DOMAIN_TRANSCODER_DSI_C, + POWER_DOMAIN_PORT_DDI_A_LANES, + POWER_DOMAIN_PORT_DDI_B_LANES, + POWER_DOMAIN_PORT_DDI_C_LANES, + POWER_DOMAIN_PORT_DDI_D_LANES, + POWER_DOMAIN_PORT_DDI_E_LANES, + POWER_DOMAIN_PORT_DDI_A_IO, + POWER_DOMAIN_PORT_DDI_B_IO, + POWER_DOMAIN_PORT_DDI_C_IO, + POWER_DOMAIN_PORT_DDI_D_IO, + POWER_DOMAIN_PORT_DDI_E_IO, + POWER_DOMAIN_PORT_DSI, + POWER_DOMAIN_PORT_CRT, + POWER_DOMAIN_PORT_OTHER, + POWER_DOMAIN_VGA, + POWER_DOMAIN_AUDIO, + POWER_DOMAIN_PLLS, + POWER_DOMAIN_AUX_A, + POWER_DOMAIN_AUX_B, + POWER_DOMAIN_AUX_C, + POWER_DOMAIN_AUX_D, + POWER_DOMAIN_GMBUS, + POWER_DOMAIN_MODESET, + POWER_DOMAIN_GT_IRQ, + POWER_DOMAIN_INIT, + + POWER_DOMAIN_NUM, +}; + +#define POWER_DOMAIN_PIPE(pipe) ((pipe) + POWER_DOMAIN_PIPE_A) +#define POWER_DOMAIN_PIPE_PANEL_FITTER(pipe) \ + ((pipe) + POWER_DOMAIN_PIPE_A_PANEL_FITTER) +#define POWER_DOMAIN_TRANSCODER(tran) \ + ((tran) == TRANSCODER_EDP ? POWER_DOMAIN_TRANSCODER_EDP : \ + (tran) + POWER_DOMAIN_TRANSCODER_A) + +/* Used by dp and fdi links */ +struct intel_link_m_n { + u32 tu; + u32 gmch_m; + u32 gmch_n; + u32 link_m; + u32 link_n; +}; + +#define for_each_pipe(__dev_priv, __p) \ + for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) + +#define for_each_pipe_masked(__dev_priv, __p, __mask) \ + for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) \ + for_each_if((__mask) & BIT(__p)) + +#define for_each_universal_plane(__dev_priv, __pipe, __p) \ + for ((__p) = 0; \ + (__p) < INTEL_INFO(__dev_priv)->num_sprites[(__pipe)] + 1; \ + (__p)++) + +#define for_each_sprite(__dev_priv, __p, __s) \ + for ((__s) = 0; \ + (__s) < INTEL_INFO(__dev_priv)->num_sprites[(__p)]; \ + (__s)++) + +#define for_each_port_masked(__port, __ports_mask) \ + for ((__port) = PORT_A; (__port) < I915_MAX_PORTS; (__port)++) \ + for_each_if((__ports_mask) & BIT(__port)) + +#define for_each_crtc(dev, crtc) \ + list_for_each_entry(crtc, &(dev)->mode_config.crtc_list, head) + +#define for_each_intel_plane(dev, intel_plane) \ + list_for_each_entry(intel_plane, \ + &(dev)->mode_config.plane_list, \ + base.head) + +#define for_each_intel_plane_mask(dev, intel_plane, plane_mask) \ + list_for_each_entry(intel_plane, \ + &(dev)->mode_config.plane_list, \ + base.head) \ + for_each_if((plane_mask) & \ + BIT(drm_plane_index(&intel_plane->base))) + +#define for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) \ + list_for_each_entry(intel_plane, \ + &(dev)->mode_config.plane_list, \ + base.head) \ + for_each_if((intel_plane)->pipe == (intel_crtc)->pipe) + +#define for_each_intel_crtc(dev, intel_crtc) \ + list_for_each_entry(intel_crtc, \ + &(dev)->mode_config.crtc_list, \ + base.head) + +#define for_each_intel_crtc_mask(dev, intel_crtc, crtc_mask) \ + list_for_each_entry(intel_crtc, \ + &(dev)->mode_config.crtc_list, \ + base.head) \ + for_each_if((crtc_mask) & BIT(drm_crtc_index(&intel_crtc->base))) + +#define for_each_intel_encoder(dev, intel_encoder) \ + list_for_each_entry(intel_encoder, \ + &(dev)->mode_config.encoder_list, \ + base.head) + +#define for_each_intel_connector_iter(intel_connector, iter) \ + while ((intel_connector = to_intel_connector(drm_connector_list_iter_next(iter)))) + +#define for_each_encoder_on_crtc(dev, __crtc, intel_encoder) \ + list_for_each_entry((intel_encoder), &(dev)->mode_config.encoder_list, base.head) \ + for_each_if((intel_encoder)->base.crtc == (__crtc)) + +#define for_each_connector_on_encoder(dev, __encoder, intel_connector) \ + list_for_each_entry((intel_connector), &(dev)->mode_config.connector_list, base.head) \ + for_each_if((intel_connector)->base.encoder == (__encoder)) + +#define for_each_power_domain(domain, mask) \ + for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++) \ + for_each_if(BIT_ULL(domain) & (mask)) + +#define for_each_power_well(__dev_priv, __power_well) \ + for ((__power_well) = (__dev_priv)->power_domains.power_wells; \ + (__power_well) - (__dev_priv)->power_domains.power_wells < \ + (__dev_priv)->power_domains.power_well_count; \ + (__power_well)++) + +#define for_each_power_well_rev(__dev_priv, __power_well) \ + for ((__power_well) = (__dev_priv)->power_domains.power_wells + \ + (__dev_priv)->power_domains.power_well_count - 1; \ + (__power_well) - (__dev_priv)->power_domains.power_wells >= 0; \ + (__power_well)--) + +#define for_each_power_domain_well(__dev_priv, __power_well, __domain_mask) \ + for_each_power_well(__dev_priv, __power_well) \ + for_each_if((__power_well)->domains & (__domain_mask)) + +#define for_each_power_domain_well_rev(__dev_priv, __power_well, __domain_mask) \ + for_each_power_well_rev(__dev_priv, __power_well) \ + for_each_if((__power_well)->domains & (__domain_mask)) + +#define for_each_new_intel_plane_in_state(__state, plane, new_plane_state, __i) \ + for ((__i) = 0; \ + (__i) < (__state)->base.dev->mode_config.num_total_plane && \ + ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \ + (new_plane_state) = to_intel_plane_state((__state)->base.planes[__i].new_state), 1); \ + (__i)++) \ + for_each_if(plane) + +#define for_each_new_intel_crtc_in_state(__state, crtc, new_crtc_state, __i) \ + for ((__i) = 0; \ + (__i) < (__state)->base.dev->mode_config.num_crtc && \ + ((crtc) = to_intel_crtc((__state)->base.crtcs[__i].ptr), \ + (new_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].new_state), 1); \ + (__i)++) \ + for_each_if(crtc) + +#define for_each_oldnew_intel_plane_in_state(__state, plane, old_plane_state, new_plane_state, __i) \ + for ((__i) = 0; \ + (__i) < (__state)->base.dev->mode_config.num_total_plane && \ + ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \ + (old_plane_state) = to_intel_plane_state((__state)->base.planes[__i].old_state), \ + (new_plane_state) = to_intel_plane_state((__state)->base.planes[__i].new_state), 1); \ + (__i)++) \ + for_each_if(plane) + +void intel_link_compute_m_n(int bpp, int nlanes, + int pixel_clock, int link_clock, + struct intel_link_m_n *m_n, + bool reduce_m_n); + +#endif diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 510e0bc3a377..ebdcbcbacb3c 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1272,6 +1272,9 @@ static int cnl_init_workarounds(struct intel_engine_cs *engine) if (ret) return ret; + /* WaDisableEarlyEOT:cnl */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); + return 0; } @@ -1664,6 +1667,35 @@ static void print_request(struct drm_printer *m, rq->timeline->common->name); } +static void hexdump(struct drm_printer *m, const void *buf, size_t len) +{ + const size_t rowsize = 8 * sizeof(u32); + const void *prev = NULL; + bool skip = false; + size_t pos; + + for (pos = 0; pos < len; pos += rowsize) { + char line[128]; + + if (prev && !memcmp(prev, buf + pos, rowsize)) { + if (!skip) { + drm_printf(m, "*\n"); + skip = true; + } + continue; + } + + WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, + rowsize, sizeof(u32), + line, sizeof(line), + false) >= sizeof(line)); + drm_printf(m, "%08zx %s\n", pos, line); + + prev = buf + pos; + skip = false; + } +} + void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...) @@ -1757,6 +1789,24 @@ void intel_engine_dump(struct intel_engine_cs *engine, addr = intel_engine_get_last_batch_head(engine); drm_printf(m, "\tBBADDR: 0x%08x_%08x\n", upper_32_bits(addr), lower_32_bits(addr)); + if (INTEL_GEN(dev_priv) >= 8) + addr = I915_READ64_2x32(RING_DMA_FADD(engine->mmio_base), + RING_DMA_FADD_UDW(engine->mmio_base)); + else if (INTEL_GEN(dev_priv) >= 4) + addr = I915_READ(RING_DMA_FADD(engine->mmio_base)); + else + addr = I915_READ(DMA_FADD_I8XX); + drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n", + upper_32_bits(addr), lower_32_bits(addr)); + if (INTEL_GEN(dev_priv) >= 4) { + drm_printf(m, "\tIPEIR: 0x%08x\n", + I915_READ(RING_IPEIR(engine->mmio_base))); + drm_printf(m, "\tIPEHR: 0x%08x\n", + I915_READ(RING_IPEHR(engine->mmio_base))); + } else { + drm_printf(m, "\tIPEIR: 0x%08x\n", I915_READ(IPEIR)); + drm_printf(m, "\tIPEHR: 0x%08x\n", I915_READ(IPEHR)); + } if (HAS_EXECLISTS(dev_priv)) { const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; @@ -1848,8 +1898,11 @@ void intel_engine_dump(struct intel_engine_cs *engine, &engine->irq_posted)), yesno(test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))); + + drm_printf(m, "HWSP:\n"); + hexdump(m, engine->status_page.page_addr, PAGE_SIZE); + drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine))); - drm_printf(m, "\n"); } static u8 user_class_map[] = { diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index 0acd9dd3ed5c..31f01d64c021 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -429,18 +429,18 @@ static void i915_hangcheck_elapsed(struct work_struct *work) intel_uncore_arm_unclaimed_mmio_detection(dev_priv); for_each_engine(engine, dev_priv, id) { - struct intel_engine_hangcheck cur_state, *hc = &cur_state; const bool busy = intel_engine_has_waiter(engine); + struct intel_engine_hangcheck hc; semaphore_clear_deadlocks(dev_priv); - hangcheck_load_sample(engine, hc); - hangcheck_accumulate_sample(engine, hc); - hangcheck_store_sample(engine, hc); + hangcheck_load_sample(engine, &hc); + hangcheck_accumulate_sample(engine, &hc); + hangcheck_store_sample(engine, &hc); if (engine->hangcheck.stalled) { hung |= intel_engine_flag(engine); - if (hc->action != ENGINE_DEAD) + if (hc.action != ENGINE_DEAD) stuck |= intel_engine_flag(engine); } diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index 49fdf09f9919..ef9f91a0b0c9 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -128,22 +128,46 @@ intel_i2c_reset(struct drm_i915_private *dev_priv) I915_WRITE(GMBUS4, 0); } -static void intel_i2c_quirk_set(struct drm_i915_private *dev_priv, bool enable) +static void pnv_gmbus_clock_gating(struct drm_i915_private *dev_priv, + bool enable) { u32 val; /* When using bit bashing for I2C, this bit needs to be set to 1 */ - if (!IS_PINEVIEW(dev_priv)) - return; - val = I915_READ(DSPCLK_GATE_D); - if (enable) - val |= DPCUNIT_CLOCK_GATE_DISABLE; + if (!enable) + val |= PNV_GMBUSUNIT_CLOCK_GATE_DISABLE; else - val &= ~DPCUNIT_CLOCK_GATE_DISABLE; + val &= ~PNV_GMBUSUNIT_CLOCK_GATE_DISABLE; I915_WRITE(DSPCLK_GATE_D, val); } +static void pch_gmbus_clock_gating(struct drm_i915_private *dev_priv, + bool enable) +{ + u32 val; + + val = I915_READ(SOUTH_DSPCLK_GATE_D); + if (!enable) + val |= PCH_GMBUSUNIT_CLOCK_GATE_DISABLE; + else + val &= ~PCH_GMBUSUNIT_CLOCK_GATE_DISABLE; + I915_WRITE(SOUTH_DSPCLK_GATE_D, val); +} + +static void bxt_gmbus_clock_gating(struct drm_i915_private *dev_priv, + bool enable) +{ + u32 val; + + val = I915_READ(GEN9_CLKGATE_DIS_4); + if (!enable) + val |= BXT_GMBUS_GATING_DIS; + else + val &= ~BXT_GMBUS_GATING_DIS; + I915_WRITE(GEN9_CLKGATE_DIS_4, val); +} + static u32 get_reserved(struct intel_gmbus *bus) { struct drm_i915_private *dev_priv = bus->dev_priv; @@ -221,7 +245,10 @@ intel_gpio_pre_xfer(struct i2c_adapter *adapter) struct drm_i915_private *dev_priv = bus->dev_priv; intel_i2c_reset(dev_priv); - intel_i2c_quirk_set(dev_priv, true); + + if (IS_PINEVIEW(dev_priv)) + pnv_gmbus_clock_gating(dev_priv, false); + set_data(bus, 1); set_clock(bus, 1); udelay(I2C_RISEFALL_TIME); @@ -238,7 +265,9 @@ intel_gpio_post_xfer(struct i2c_adapter *adapter) set_data(bus, 1); set_clock(bus, 1); - intel_i2c_quirk_set(dev_priv, false); + + if (IS_PINEVIEW(dev_priv)) + pnv_gmbus_clock_gating(dev_priv, true); } static void @@ -481,6 +510,13 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) int i = 0, inc, try = 0; int ret = 0; + /* Display WA #0868: skl,bxt,kbl,cfl,glk,cnl */ + if (IS_GEN9_LP(dev_priv)) + bxt_gmbus_clock_gating(dev_priv, false); + else if (HAS_PCH_SPT(dev_priv) || + HAS_PCH_KBP(dev_priv) || HAS_PCH_CNP(dev_priv)) + pch_gmbus_clock_gating(dev_priv, false); + retry: I915_WRITE_FW(GMBUS0, bus->reg0); @@ -582,6 +618,13 @@ timeout: ret = -EAGAIN; out: + /* Display WA #0868: skl,bxt,kbl,cfl,glk,cnl */ + if (IS_GEN9_LP(dev_priv)) + bxt_gmbus_clock_gating(dev_priv, true); + else if (HAS_PCH_SPT(dev_priv) || + HAS_PCH_KBP(dev_priv) || HAS_PCH_CNP(dev_priv)) + pch_gmbus_clock_gating(dev_priv, true); + return ret; } diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 3bf65288ffff..5809b29044fc 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -193,7 +193,7 @@ static bool lpe_audio_detect(struct drm_i915_private *dev_priv) }; if (!pci_dev_present(atom_hdaudio_ids)) { - DRM_INFO("%s\n", "HDaudio controller not detected, using LPE audio instead\n"); + DRM_INFO("HDaudio controller not detected, using LPE audio instead\n"); lpe_present = true; } } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 2e38fbfdf08f..739c33b07c59 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -449,7 +449,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x\n", engine->name, n, - rq->ctx->hw_id, count, + port[n].context_id, count, rq->global_seqno); } else { GEM_BUG_ON(!n); @@ -504,7 +504,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) ce->ring->tail &= (ce->ring->size - 1); ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail; - GEM_TRACE("\n"); + GEM_TRACE("%s\n", engine->name); for (n = execlists_num_ports(&engine->execlists); --n; ) elsp_write(0, engine->execlists.elsp); @@ -861,9 +861,10 @@ static void execlists_submission_tasklet(unsigned long data) */ status = READ_ONCE(buf[2 * head]); /* maybe mmio! */ - GEM_TRACE("%s csb[%dd]: status=0x%08x:0x%08x\n", + GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", engine->name, head, - status, buf[2*head + 1]); + status, buf[2*head + 1], + execlists->active); if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED)) @@ -881,6 +882,8 @@ static void execlists_submission_tasklet(unsigned long data) if (status & GEN8_CTX_STATUS_COMPLETE && buf[2*head + 1] == PREEMPT_ID) { + GEM_TRACE("%s preempt-idle\n", engine->name); + execlists_cancel_port_requests(execlists); execlists_unwind_incomplete_requests(execlists); @@ -905,8 +908,8 @@ static void execlists_submission_tasklet(unsigned long data) rq = port_unpack(port, &count); GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n", engine->name, - rq->ctx->hw_id, count, - rq->global_seqno); + port->context_id, count, + rq ? rq->global_seqno : 0); GEM_BUG_ON(count == 0); if (--count == 0) { GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); @@ -1555,6 +1558,8 @@ static void reset_common_ring(struct intel_engine_cs *engine, struct intel_context *ce; unsigned long flags; + GEM_TRACE("%s seqno=%x\n", + engine->name, request ? request->global_seqno : 0); spin_lock_irqsave(&engine->timeline->lock, flags); /* diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index fc65f5e451dd..c58e5f53bab0 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -32,6 +32,8 @@ #include <drm/drmP.h> #include <drm/i915_drm.h> + +#include "intel_opregion.h" #include "i915_drv.h" #include "intel_drv.h" diff --git a/drivers/gpu/drm/i915/intel_opregion.h b/drivers/gpu/drm/i915/intel_opregion.h new file mode 100644 index 000000000000..e0e437ba9e51 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_opregion.h @@ -0,0 +1,106 @@ +/* + * Copyright © 2008-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_OPREGION_H_ +#define _INTEL_OPREGION_H_ + +#include <linux/workqueue.h> +#include <linux/pci.h> + +struct drm_i915_private; +struct intel_encoder; + +struct opregion_header; +struct opregion_acpi; +struct opregion_swsci; +struct opregion_asle; + +struct intel_opregion { + struct opregion_header *header; + struct opregion_acpi *acpi; + struct opregion_swsci *swsci; + u32 swsci_gbda_sub_functions; + u32 swsci_sbcb_sub_functions; + struct opregion_asle *asle; + void *rvda; + void *vbt_firmware; + const void *vbt; + u32 vbt_size; + u32 *lid_state; + struct work_struct asle_work; +}; + +#define OPREGION_SIZE (8 * 1024) + +#ifdef CONFIG_ACPI + +int intel_opregion_setup(struct drm_i915_private *dev_priv); +void intel_opregion_register(struct drm_i915_private *dev_priv); +void intel_opregion_unregister(struct drm_i915_private *dev_priv); +void intel_opregion_asle_intr(struct drm_i915_private *dev_priv); +int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, + bool enable); +int intel_opregion_notify_adapter(struct drm_i915_private *dev_priv, + pci_power_t state); +int intel_opregion_get_panel_type(struct drm_i915_private *dev_priv); + +#else /* CONFIG_ACPI*/ + +static inline int intel_opregion_setup(struct drm_i915_private *dev_priv) +{ + return 0; +} + +static inline void intel_opregion_register(struct drm_i915_private *dev_priv) +{ +} + +static inline void intel_opregion_unregister(struct drm_i915_private *dev_priv) +{ +} + +static inline void intel_opregion_asle_intr(struct drm_i915_private *dev_priv) +{ +} + +static inline int +intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, bool enable) +{ + return 0; +} + +static inline int +intel_opregion_notify_adapter(struct drm_i915_private *dev, pci_power_t state) +{ + return 0; +} + +static inline int intel_opregion_get_panel_type(struct drm_i915_private *dev) +{ + return -ENODEV; +} + +#endif /* CONFIG_ACPI */ + +#endif diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 18779c6eb4bf..1db79a860b96 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8447,6 +8447,11 @@ static void cnl_init_clock_gating(struct drm_i915_private *dev_priv) if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) val |= SARBUNIT_CLKGATE_DIS; I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val); + + /* WaDisableVFclkgate:cnl */ + val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE); + val |= VFUNIT_CLKGATE_DIS; + I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE, val); } static void cfl_init_clock_gating(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index a1ad85fa5c1a..2e32615eeada 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -590,7 +590,7 @@ static void hsw_psr_disable(struct intel_dp *intel_dp, struct drm_i915_private *dev_priv = to_i915(dev); if (dev_priv->psr.active) { - i915_reg_t psr_ctl; + i915_reg_t psr_status; u32 psr_status_mask; if (dev_priv->psr.aux_frame_sync) @@ -599,24 +599,24 @@ static void hsw_psr_disable(struct intel_dp *intel_dp, 0); if (dev_priv->psr.psr2_support) { - psr_ctl = EDP_PSR2_CTL; + psr_status = EDP_PSR2_STATUS_CTL; psr_status_mask = EDP_PSR2_STATUS_STATE_MASK; - I915_WRITE(psr_ctl, - I915_READ(psr_ctl) & + I915_WRITE(EDP_PSR2_CTL, + I915_READ(EDP_PSR2_CTL) & ~(EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE)); } else { - psr_ctl = EDP_PSR_STATUS_CTL; + psr_status = EDP_PSR_STATUS_CTL; psr_status_mask = EDP_PSR_STATUS_STATE_MASK; - I915_WRITE(psr_ctl, - I915_READ(psr_ctl) & ~EDP_PSR_ENABLE); + I915_WRITE(EDP_PSR_CTL, + I915_READ(EDP_PSR_CTL) & ~EDP_PSR_ENABLE); } /* Wait till PSR is idle */ if (intel_wait_for_register(dev_priv, - psr_ctl, psr_status_mask, 0, + psr_status, psr_status_mask, 0, 2000)) DRM_ERROR("Timed out waiting for PSR Idle State\n"); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 96ab74f3d101..db9d57f39534 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -1726,13 +1726,13 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_AUX_C) | \ BIT_ULL(POWER_DOMAIN_AUDIO) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ - BIT_ULL(POWER_DOMAIN_GMBUS) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DISPLAY_DC_OFF_POWER_DOMAINS ( \ BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ BIT_ULL(POWER_DOMAIN_GT_IRQ) | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DPIO_CMN_A_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ @@ -1792,6 +1792,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_GT_IRQ) | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index e6b31041cc88..2ea69394f428 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -1637,7 +1637,7 @@ static int igt_shrink_thp(void *arg) * shmem to truncate our pages. */ i915_gem_shrink_all(i915); - if (!IS_ERR_OR_NULL(obj->mm.pages)) { + if (i915_gem_object_has_pages(obj)) { pr_err("shrink-all didn't truncate the pages\n"); err = -EINVAL; goto out_close; diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index f98546b8a7fa..d1f91a533afa 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -132,6 +132,12 @@ static int emit_recurse_batch(struct hang *h, *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = upper_32_bits(hws_address(hws, rq)); *batch++ = rq->fence.seqno; + *batch++ = MI_ARB_CHECK; + + memset(batch, 0, 1024); + batch += 1024 / sizeof(*batch); + + *batch++ = MI_ARB_CHECK; *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; *batch++ = lower_32_bits(vma->node.start); *batch++ = upper_32_bits(vma->node.start); @@ -140,6 +146,12 @@ static int emit_recurse_batch(struct hang *h, *batch++ = 0; *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = rq->fence.seqno; + *batch++ = MI_ARB_CHECK; + + memset(batch, 0, 1024); + batch += 1024 / sizeof(*batch); + + *batch++ = MI_ARB_CHECK; *batch++ = MI_BATCH_BUFFER_START | 1 << 8; *batch++ = lower_32_bits(vma->node.start); } else if (INTEL_GEN(i915) >= 4) { @@ -147,12 +159,24 @@ static int emit_recurse_batch(struct hang *h, *batch++ = 0; *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = rq->fence.seqno; + *batch++ = MI_ARB_CHECK; + + memset(batch, 0, 1024); + batch += 1024 / sizeof(*batch); + + *batch++ = MI_ARB_CHECK; *batch++ = MI_BATCH_BUFFER_START | 2 << 6; *batch++ = lower_32_bits(vma->node.start); } else { *batch++ = MI_STORE_DWORD_IMM; *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = rq->fence.seqno; + *batch++ = MI_ARB_CHECK; + + memset(batch, 0, 1024); + batch += 1024 / sizeof(*batch); + + *batch++ = MI_ARB_CHECK; *batch++ = MI_BATCH_BUFFER_START | 2 << 6 | 1; *batch++ = lower_32_bits(vma->node.start); } @@ -234,6 +258,16 @@ static void hang_fini(struct hang *h) i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED); } +static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq) +{ + return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), + rq->fence.seqno), + 10) && + wait_for(i915_seqno_passed(hws_seqno(h, rq), + rq->fence.seqno), + 1000)); +} + static int igt_hang_sanitycheck(void *arg) { struct drm_i915_private *i915 = arg; @@ -296,6 +330,9 @@ static void global_reset_lock(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; + pr_debug("%s: current gpu_error=%08lx\n", + __func__, i915->gpu_error.flags); + while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags)) wait_event(i915->gpu_error.reset_queue, !test_bit(I915_RESET_BACKOFF, @@ -353,54 +390,128 @@ static int igt_global_reset(void *arg) return err; } -static int igt_reset_engine(void *arg) +static int __igt_reset_engine(struct drm_i915_private *i915, bool active) { - struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; enum intel_engine_id id; - unsigned int reset_count, reset_engine_count; + struct hang h; int err = 0; - /* Check that we can issue a global GPU and engine reset */ + /* Check that we can issue an engine reset on an idle engine (no-op) */ if (!intel_has_reset_engine(i915)) return 0; + if (active) { + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + mutex_unlock(&i915->drm.struct_mutex); + if (err) + return err; + } + for_each_engine(engine, i915, id) { - set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags); + unsigned int reset_count, reset_engine_count; + IGT_TIMEOUT(end_time); + + if (active && !intel_engine_can_store_dword(engine)) + continue; + reset_count = i915_reset_count(&i915->gpu_error); reset_engine_count = i915_reset_engine_count(&i915->gpu_error, engine); - err = i915_reset_engine(engine, I915_RESET_QUIET); - if (err) { - pr_err("i915_reset_engine failed\n"); - break; - } + set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + do { + if (active) { + struct drm_i915_gem_request *rq; + + mutex_lock(&i915->drm.struct_mutex); + rq = hang_create_request(&h, engine, + i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + mutex_unlock(&i915->drm.struct_mutex); + break; + } + + i915_gem_request_get(rq); + __i915_add_request(rq, true); + mutex_unlock(&i915->drm.struct_mutex); + + if (!wait_for_hang(&h, rq)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + + pr_err("%s: Failed to start request %x, at %x\n", + __func__, rq->fence.seqno, hws_seqno(&h, rq)); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + i915_gem_request_put(rq); + err = -EIO; + break; + } - if (i915_reset_count(&i915->gpu_error) != reset_count) { - pr_err("Full GPU reset recorded! (engine reset expected)\n"); - err = -EINVAL; - break; - } + i915_gem_request_put(rq); + } + + engine->hangcheck.stalled = true; + engine->hangcheck.seqno = + intel_engine_get_seqno(engine); + + err = i915_reset_engine(engine, I915_RESET_QUIET); + if (err) { + pr_err("i915_reset_engine failed\n"); + break; + } + + if (i915_reset_count(&i915->gpu_error) != reset_count) { + pr_err("Full GPU reset recorded! (engine reset expected)\n"); + err = -EINVAL; + break; + } + + reset_engine_count += active; + if (i915_reset_engine_count(&i915->gpu_error, engine) != + reset_engine_count) { + pr_err("%s engine reset %srecorded!\n", + engine->name, active ? "not " : ""); + err = -EINVAL; + break; + } + + engine->hangcheck.stalled = false; + } while (time_before(jiffies, end_time)); + clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); - if (i915_reset_engine_count(&i915->gpu_error, engine) == - reset_engine_count) { - pr_err("No %s engine reset recorded!\n", engine->name); - err = -EINVAL; + if (err) break; - } - clear_bit(I915_RESET_ENGINE + engine->id, - &i915->gpu_error.flags); + cond_resched(); } if (i915_terminally_wedged(&i915->gpu_error)) err = -EIO; + if (active) { + mutex_lock(&i915->drm.struct_mutex); + hang_fini(&h); + mutex_unlock(&i915->drm.struct_mutex); + } + return err; } +static int igt_reset_idle_engine(void *arg) +{ + return __igt_reset_engine(arg, false); +} + +static int igt_reset_active_engine(void *arg) +{ + return __igt_reset_engine(arg, true); +} + static int active_engine(void *data) { struct intel_engine_cs *engine = data; @@ -462,11 +573,12 @@ err_file: return err; } -static int igt_reset_active_engines(void *arg) +static int __igt_reset_engine_others(struct drm_i915_private *i915, + bool active) { - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine, *active; + struct intel_engine_cs *engine, *other; enum intel_engine_id id, tmp; + struct hang h; int err = 0; /* Check that issuing a reset on one engine does not interfere @@ -476,24 +588,36 @@ static int igt_reset_active_engines(void *arg) if (!intel_has_reset_engine(i915)) return 0; + if (active) { + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + mutex_unlock(&i915->drm.struct_mutex); + if (err) + return err; + } + for_each_engine(engine, i915, id) { - struct task_struct *threads[I915_NUM_ENGINES]; + struct task_struct *threads[I915_NUM_ENGINES] = {}; unsigned long resets[I915_NUM_ENGINES]; unsigned long global = i915_reset_count(&i915->gpu_error); + unsigned long count = 0; IGT_TIMEOUT(end_time); + if (active && !intel_engine_can_store_dword(engine)) + continue; + memset(threads, 0, sizeof(threads)); - for_each_engine(active, i915, tmp) { + for_each_engine(other, i915, tmp) { struct task_struct *tsk; - if (active == engine) - continue; - resets[tmp] = i915_reset_engine_count(&i915->gpu_error, - active); + other); - tsk = kthread_run(active_engine, active, - "igt/%s", active->name); + if (other == engine) + continue; + + tsk = kthread_run(active_engine, other, + "igt/%s", other->name); if (IS_ERR(tsk)) { err = PTR_ERR(tsk); goto unwind; @@ -503,20 +627,70 @@ static int igt_reset_active_engines(void *arg) get_task_struct(tsk); } - set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags); + set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { + if (active) { + struct drm_i915_gem_request *rq; + + mutex_lock(&i915->drm.struct_mutex); + rq = hang_create_request(&h, engine, + i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + mutex_unlock(&i915->drm.struct_mutex); + break; + } + + i915_gem_request_get(rq); + __i915_add_request(rq, true); + mutex_unlock(&i915->drm.struct_mutex); + + if (!wait_for_hang(&h, rq)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + + pr_err("%s: Failed to start request %x, at %x\n", + __func__, rq->fence.seqno, hws_seqno(&h, rq)); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + i915_gem_request_put(rq); + err = -EIO; + break; + } + + i915_gem_request_put(rq); + } + + engine->hangcheck.stalled = true; + engine->hangcheck.seqno = + intel_engine_get_seqno(engine); + err = i915_reset_engine(engine, I915_RESET_QUIET); if (err) { - pr_err("i915_reset_engine(%s) failed, err=%d\n", - engine->name, err); + pr_err("i915_reset_engine(%s:%s) failed, err=%d\n", + engine->name, active ? "active" : "idle", err); break; } + + engine->hangcheck.stalled = false; + count++; } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + engine->id, - &i915->gpu_error.flags); + clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + pr_info("i915_reset_engine(%s:%s): %lu resets\n", + engine->name, active ? "active" : "idle", count); + + if (i915_reset_engine_count(&i915->gpu_error, engine) - + resets[engine->id] != (active ? count : 0)) { + pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", + engine->name, active ? "active" : "idle", count, + i915_reset_engine_count(&i915->gpu_error, + engine) - resets[engine->id]); + if (!err) + err = -EINVAL; + } unwind: - for_each_engine(active, i915, tmp) { + for_each_engine(other, i915, tmp) { int ret; if (!threads[tmp]) @@ -524,27 +698,29 @@ unwind: ret = kthread_stop(threads[tmp]); if (ret) { - pr_err("kthread for active engine %s failed, err=%d\n", - active->name, ret); + pr_err("kthread for other engine %s failed, err=%d\n", + other->name, ret); if (!err) err = ret; } put_task_struct(threads[tmp]); if (resets[tmp] != i915_reset_engine_count(&i915->gpu_error, - active)) { + other)) { pr_err("Innocent engine %s was reset (count=%ld)\n", - active->name, + other->name, i915_reset_engine_count(&i915->gpu_error, - active) - resets[tmp]); - err = -EIO; + other) - resets[tmp]); + if (!err) + err = -EINVAL; } } if (global != i915_reset_count(&i915->gpu_error)) { pr_err("Global reset (count=%ld)!\n", i915_reset_count(&i915->gpu_error) - global); - err = -EIO; + if (!err) + err = -EINVAL; } if (err) @@ -556,9 +732,25 @@ unwind: if (i915_terminally_wedged(&i915->gpu_error)) err = -EIO; + if (active) { + mutex_lock(&i915->drm.struct_mutex); + hang_fini(&h); + mutex_unlock(&i915->drm.struct_mutex); + } + return err; } +static int igt_reset_idle_engine_others(void *arg) +{ + return __igt_reset_engine_others(arg, false); +} + +static int igt_reset_active_engine_others(void *arg) +{ + return __igt_reset_engine_others(arg, true); +} + static u32 fake_hangcheck(struct drm_i915_gem_request *rq) { u32 reset_count; @@ -574,16 +766,6 @@ static u32 fake_hangcheck(struct drm_i915_gem_request *rq) return reset_count; } -static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq) -{ - return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), - rq->fence.seqno), - 10) && - wait_for(i915_seqno_passed(hws_seqno(h, rq), - rq->fence.seqno), - 1000)); -} - static int igt_wait_reset(void *arg) { struct drm_i915_private *i915 = arg; @@ -617,8 +799,8 @@ static int igt_wait_reset(void *arg) if (!wait_for_hang(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); - pr_err("Failed to start request %x, at %x\n", - rq->fence.seqno, hws_seqno(&h, rq)); + pr_err("%s: Failed to start request %x, at %x\n", + __func__, rq->fence.seqno, hws_seqno(&h, rq)); intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); i915_reset(i915, 0); @@ -712,8 +894,8 @@ static int igt_reset_queue(void *arg) if (!wait_for_hang(&h, prev)) { struct drm_printer p = drm_info_printer(i915->drm.dev); - pr_err("Failed to start request %x, at %x\n", - prev->fence.seqno, hws_seqno(&h, prev)); + pr_err("%s: Failed to start request %x, at %x\n", + __func__, prev->fence.seqno, hws_seqno(&h, prev)); intel_engine_dump(prev->engine, &p, "%s\n", prev->engine->name); @@ -819,8 +1001,8 @@ static int igt_handle_error(void *arg) if (!wait_for_hang(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); - pr_err("Failed to start request %x, at %x\n", - rq->fence.seqno, hws_seqno(&h, rq)); + pr_err("%s: Failed to start request %x, at %x\n", + __func__, rq->fence.seqno, hws_seqno(&h, rq)); intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); i915_reset(i915, 0); @@ -864,21 +1046,26 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) static const struct i915_subtest tests[] = { SUBTEST(igt_global_reset), /* attempt to recover GPU first */ SUBTEST(igt_hang_sanitycheck), - SUBTEST(igt_reset_engine), - SUBTEST(igt_reset_active_engines), + SUBTEST(igt_reset_idle_engine), + SUBTEST(igt_reset_active_engine), + SUBTEST(igt_reset_idle_engine_others), + SUBTEST(igt_reset_active_engine_others), SUBTEST(igt_wait_reset), SUBTEST(igt_reset_queue), SUBTEST(igt_handle_error), }; + bool saved_hangcheck; int err; if (!intel_has_gpu_reset(i915)) return 0; intel_runtime_pm_get(i915); + saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); err = i915_subtests(tests, i915); + i915_modparams.enable_hangcheck = saved_hangcheck; intel_runtime_pm_put(i915); return err; diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c index 3f2b4afcb8a7..1d053bbefc02 100644 --- a/drivers/gpu/drm/imx/imx-drm-core.c +++ b/drivers/gpu/drm/imx/imx-drm-core.c @@ -257,6 +257,7 @@ static int imx_drm_bind(struct device *dev) drm->mode_config.max_height = 4096; drm->mode_config.funcs = &imx_drm_mode_config_funcs; drm->mode_config.helper_private = &imx_drm_mode_config_helpers; + drm->mode_config.allow_fb_modifiers = true; drm_mode_config_init(drm); diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index 5a67daedcf4d..57ed56d8623f 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -77,6 +77,18 @@ static const uint32_t ipu_plane_formats[] = { DRM_FORMAT_BGRX8888_A8, }; +static const uint64_t ipu_format_modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID +}; + +static const uint64_t pre_format_modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_VIVANTE_TILED, + DRM_FORMAT_MOD_VIVANTE_SUPER_TILED, + DRM_FORMAT_MOD_INVALID +}; + int ipu_plane_irq(struct ipu_plane *ipu_plane) { return ipu_idmac_channel_irq(ipu_plane->ipu, ipu_plane->ipu_ch, @@ -303,6 +315,22 @@ void ipu_plane_destroy_state(struct drm_plane *plane, kfree(ipu_state); } +static bool ipu_plane_format_mod_supported(struct drm_plane *plane, + uint32_t format, uint64_t modifier) +{ + struct ipu_soc *ipu = to_ipu_plane(plane)->ipu; + + /* linear is supported for all planes and formats */ + if (modifier == DRM_FORMAT_MOD_LINEAR) + return true; + + /* without a PRG there are no supported modifiers */ + if (!ipu_prg_present(ipu)) + return false; + + return ipu_prg_format_supported(ipu, format, modifier); +} + static const struct drm_plane_funcs ipu_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, @@ -310,6 +338,7 @@ static const struct drm_plane_funcs ipu_plane_funcs = { .reset = ipu_plane_state_reset, .atomic_duplicate_state = ipu_plane_duplicate_state, .atomic_destroy_state = ipu_plane_destroy_state, + .format_mod_supported = ipu_plane_format_mod_supported, }; static int ipu_plane_atomic_check(struct drm_plane *plane, @@ -550,8 +579,8 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id, drm_rect_width(&state->src) >> 16, drm_rect_height(&state->src) >> 16, - fb->pitches[0], - fb->format->format, &eba); + fb->pitches[0], fb->format->format, + fb->modifier, &eba); } if (old_state->fb && !drm_atomic_crtc_needs_modeset(crtc_state)) { @@ -700,18 +729,71 @@ static const struct drm_plane_helper_funcs ipu_plane_helper_funcs = { int ipu_planes_assign_pre(struct drm_device *dev, struct drm_atomic_state *state) { + struct drm_crtc_state *old_crtc_state, *crtc_state; struct drm_plane_state *plane_state; + struct ipu_plane_state *ipu_state; + struct ipu_plane *ipu_plane; struct drm_plane *plane; + struct drm_crtc *crtc; int available_pres = ipu_prg_max_active_channels(); - int i; + int ret, i; + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, crtc_state, i) { + ret = drm_atomic_add_affected_planes(state, crtc); + if (ret) + return ret; + } + + /* + * We are going over the planes in 2 passes: first we assign PREs to + * planes with a tiling modifier, which need the PREs to resolve into + * linear. Any failure to assign a PRE there is fatal. In the second + * pass we try to assign PREs to linear FBs, to improve memory access + * patterns for them. Failure at this point is non-fatal, as we can + * scan out linear FBs without a PRE. + */ for_each_new_plane_in_state(state, plane, plane_state, i) { - struct ipu_plane_state *ipu_state = - to_ipu_plane_state(plane_state); - struct ipu_plane *ipu_plane = to_ipu_plane(plane); + ipu_state = to_ipu_plane_state(plane_state); + ipu_plane = to_ipu_plane(plane); + + if (!plane_state->fb) { + ipu_state->use_pre = false; + continue; + } + + if (!(plane_state->fb->flags & DRM_MODE_FB_MODIFIERS) || + plane_state->fb->modifier == DRM_FORMAT_MOD_LINEAR) + continue; + + if (!ipu_prg_present(ipu_plane->ipu) || !available_pres) + return -EINVAL; + + if (!ipu_prg_format_supported(ipu_plane->ipu, + plane_state->fb->format->format, + plane_state->fb->modifier)) + return -EINVAL; + + ipu_state->use_pre = true; + available_pres--; + } + + for_each_new_plane_in_state(state, plane, plane_state, i) { + ipu_state = to_ipu_plane_state(plane_state); + ipu_plane = to_ipu_plane(plane); + + if (!plane_state->fb) { + ipu_state->use_pre = false; + continue; + } + + if ((plane_state->fb->flags & DRM_MODE_FB_MODIFIERS) && + plane_state->fb->modifier != DRM_FORMAT_MOD_LINEAR) + continue; + + /* make sure that modifier is initialized */ + plane_state->fb->modifier = DRM_FORMAT_MOD_LINEAR; if (ipu_prg_present(ipu_plane->ipu) && available_pres && - plane_state->fb && ipu_prg_format_supported(ipu_plane->ipu, plane_state->fb->format->format, plane_state->fb->modifier)) { @@ -731,6 +813,7 @@ struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu, enum drm_plane_type type) { struct ipu_plane *ipu_plane; + const uint64_t *modifiers = ipu_format_modifiers; int ret; DRM_DEBUG_KMS("channel %d, dp flow %d, possible_crtcs=0x%x\n", @@ -746,10 +829,13 @@ struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu, ipu_plane->dma = dma; ipu_plane->dp_flow = dp; + if (ipu_prg_present(ipu)) + modifiers = pre_format_modifiers; + ret = drm_universal_plane_init(dev, &ipu_plane->base, possible_crtcs, &ipu_plane_funcs, ipu_plane_formats, ARRAY_SIZE(ipu_plane_formats), - NULL, type, NULL); + modifiers, type, NULL); if (ret) { DRM_ERROR("failed to initialize plane\n"); kfree(ipu_plane); diff --git a/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c b/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c index 82dcb20cdaa3..fd02506274da 100644 --- a/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c +++ b/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c @@ -290,11 +290,6 @@ static int dw_mipi_dsi_stm_probe(struct platform_device *pdev) return -ENOMEM; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - DRM_ERROR("Unable to get resource\n"); - return -ENODEV; - } - dsi->base = devm_ioremap_resource(dev, res); if (IS_ERR(dsi->base)) { DRM_ERROR("Unable to get dsi registers\n"); diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c index 394613b0fd46..6dc5d4ec4e17 100644 --- a/drivers/gpu/drm/stm/ltdc.c +++ b/drivers/gpu/drm/stm/ltdc.c @@ -901,12 +901,6 @@ int ltdc_load(struct drm_device *ddev) } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - DRM_ERROR("Unable to get resource\n"); - ret = -ENODEV; - goto err; - } - ldev->regs = devm_ioremap_resource(dev, res); if (IS_ERR(ldev->regs)) { DRM_ERROR("Unable to get ltdc registers\n"); diff --git a/drivers/gpu/ipu-v3/Kconfig b/drivers/gpu/ipu-v3/Kconfig index 87a20b3dcf7a..fe6f8c5b4445 100644 --- a/drivers/gpu/ipu-v3/Kconfig +++ b/drivers/gpu/ipu-v3/Kconfig @@ -1,7 +1,9 @@ config IMX_IPUV3_CORE tristate "IPUv3 core support" - depends on SOC_IMX5 || SOC_IMX6Q || ARCH_MULTIPLATFORM + depends on SOC_IMX5 || SOC_IMX6Q || ARCH_MULTIPLATFORM || COMPILE_TEST depends on DRM || !DRM # if DRM=m, this can't be 'y' + select BITREVERSE + select GENERIC_ALLOCATOR if DRM select GENERIC_IRQ_CHIP help Choose this if you have a i.MX5/6 system and want to use the Image diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c index 1cb82f445f91..bb9c087e6c0d 100644 --- a/drivers/gpu/ipu-v3/ipu-cpmem.c +++ b/drivers/gpu/ipu-v3/ipu-cpmem.c @@ -12,6 +12,7 @@ #include <linux/types.h> #include <linux/bitrev.h> #include <linux/io.h> +#include <linux/sizes.h> #include <drm/drm_fourcc.h> #include "ipu-prv.h" diff --git a/drivers/gpu/ipu-v3/ipu-ic.c b/drivers/gpu/ipu-v3/ipu-ic.c index 321eb983c2f5..67cc820253a9 100644 --- a/drivers/gpu/ipu-v3/ipu-ic.c +++ b/drivers/gpu/ipu-v3/ipu-ic.c @@ -17,6 +17,7 @@ #include <linux/bitrev.h> #include <linux/io.h> #include <linux/err.h> +#include <linux/sizes.h> #include "ipu-prv.h" /* IC Register Offsets */ diff --git a/drivers/gpu/ipu-v3/ipu-pre.c b/drivers/gpu/ipu-v3/ipu-pre.c index c860a7997cb5..f1cec3d70498 100644 --- a/drivers/gpu/ipu-v3/ipu-pre.c +++ b/drivers/gpu/ipu-v3/ipu-pre.c @@ -49,6 +49,10 @@ #define IPU_PRE_TPR_CTRL 0x070 #define IPU_PRE_TPR_CTRL_TILE_FORMAT(v) ((v & 0xff) << 0) #define IPU_PRE_TPR_CTRL_TILE_FORMAT_MASK 0xff +#define IPU_PRE_TPR_CTRL_TILE_FORMAT_16_BIT (1 << 0) +#define IPU_PRE_TPR_CTRL_TILE_FORMAT_SPLIT_BUF (1 << 4) +#define IPU_PRE_TPR_CTRL_TILE_FORMAT_SINGLE_BUF (1 << 5) +#define IPU_PRE_TPR_CTRL_TILE_FORMAT_SUPER_TILED (1 << 6) #define IPU_PRE_PREFETCH_ENG_CTRL 0x080 #define IPU_PRE_PREF_ENG_CTRL_PREFETCH_EN (1 << 0) @@ -147,7 +151,7 @@ int ipu_pre_get(struct ipu_pre *pre) val = IPU_PRE_CTRL_HANDSHAKE_ABORT_SKIP_EN | IPU_PRE_CTRL_HANDSHAKE_EN | IPU_PRE_CTRL_TPR_REST_SEL | - IPU_PRE_CTRL_BLOCK_16 | IPU_PRE_CTRL_SDW_UPDATE; + IPU_PRE_CTRL_SDW_UPDATE; writel(val, pre->regs + IPU_PRE_CTRL); pre->in_use = true; @@ -163,14 +167,17 @@ void ipu_pre_put(struct ipu_pre *pre) void ipu_pre_configure(struct ipu_pre *pre, unsigned int width, unsigned int height, unsigned int stride, u32 format, - unsigned int bufaddr) + uint64_t modifier, unsigned int bufaddr) { const struct drm_format_info *info = drm_format_info(format); u32 active_bpp = info->cpp[0] >> 1; u32 val; /* calculate safe window for ctrl register updates */ - pre->safe_window_end = height - 2; + if (modifier == DRM_FORMAT_MOD_LINEAR) + pre->safe_window_end = height - 2; + else + pre->safe_window_end = DIV_ROUND_UP(height, 4) - 1; writel(bufaddr, pre->regs + IPU_PRE_CUR_BUF); writel(bufaddr, pre->regs + IPU_PRE_NEXT_BUF); @@ -203,9 +210,25 @@ void ipu_pre_configure(struct ipu_pre *pre, unsigned int width, writel(pre->buffer_paddr, pre->regs + IPU_PRE_STORE_ENG_ADDR); + val = readl(pre->regs + IPU_PRE_TPR_CTRL); + val &= ~IPU_PRE_TPR_CTRL_TILE_FORMAT_MASK; + if (modifier != DRM_FORMAT_MOD_LINEAR) { + /* only support single buffer formats for now */ + val |= IPU_PRE_TPR_CTRL_TILE_FORMAT_SINGLE_BUF; + if (modifier == DRM_FORMAT_MOD_VIVANTE_SUPER_TILED) + val |= IPU_PRE_TPR_CTRL_TILE_FORMAT_SUPER_TILED; + if (info->cpp[0] == 2) + val |= IPU_PRE_TPR_CTRL_TILE_FORMAT_16_BIT; + } + writel(val, pre->regs + IPU_PRE_TPR_CTRL); + val = readl(pre->regs + IPU_PRE_CTRL); val |= IPU_PRE_CTRL_EN_REPEAT | IPU_PRE_CTRL_ENABLE | IPU_PRE_CTRL_SDW_UPDATE; + if (modifier == DRM_FORMAT_MOD_LINEAR) + val &= ~IPU_PRE_CTRL_BLOCK_EN; + else + val |= IPU_PRE_CTRL_BLOCK_EN; writel(val, pre->regs + IPU_PRE_CTRL); } diff --git a/drivers/gpu/ipu-v3/ipu-prg.c b/drivers/gpu/ipu-v3/ipu-prg.c index 0013ca9f72c8..067365c733c6 100644 --- a/drivers/gpu/ipu-v3/ipu-prg.c +++ b/drivers/gpu/ipu-v3/ipu-prg.c @@ -20,6 +20,7 @@ #include <linux/module.h> #include <linux/of.h> #include <linux/platform_device.h> +#include <linux/pm_runtime.h> #include <linux/regmap.h> #include <video/imx-ipu-v3.h> @@ -132,28 +133,25 @@ bool ipu_prg_format_supported(struct ipu_soc *ipu, uint32_t format, if (info->num_planes != 1) return false; - return true; + switch (modifier) { + case DRM_FORMAT_MOD_LINEAR: + case DRM_FORMAT_MOD_VIVANTE_TILED: + case DRM_FORMAT_MOD_VIVANTE_SUPER_TILED: + return true; + default: + return false; + } } EXPORT_SYMBOL_GPL(ipu_prg_format_supported); int ipu_prg_enable(struct ipu_soc *ipu) { struct ipu_prg *prg = ipu->prg_priv; - int ret; if (!prg) return 0; - ret = clk_prepare_enable(prg->clk_axi); - if (ret) - goto fail_disable_ipg; - - return 0; - -fail_disable_ipg: - clk_disable_unprepare(prg->clk_ipg); - - return ret; + return pm_runtime_get_sync(prg->dev); } EXPORT_SYMBOL_GPL(ipu_prg_enable); @@ -164,7 +162,7 @@ void ipu_prg_disable(struct ipu_soc *ipu) if (!prg) return; - clk_disable_unprepare(prg->clk_axi); + pm_runtime_put(prg->dev); } EXPORT_SYMBOL_GPL(ipu_prg_disable); @@ -255,7 +253,7 @@ void ipu_prg_channel_disable(struct ipuv3_channel *ipu_chan) if (!chan->enabled || prg_chan < 0) return; - clk_prepare_enable(prg->clk_ipg); + pm_runtime_get_sync(prg->dev); val = readl(prg->regs + IPU_PRG_CTL); val |= IPU_PRG_CTL_BYPASS(prg_chan); @@ -264,7 +262,7 @@ void ipu_prg_channel_disable(struct ipuv3_channel *ipu_chan) val = IPU_PRG_REG_UPDATE_REG_UPDATE; writel(val, prg->regs + IPU_PRG_REG_UPDATE); - clk_disable_unprepare(prg->clk_ipg); + pm_runtime_put(prg->dev); ipu_prg_put_pre(prg, prg_chan); @@ -275,7 +273,7 @@ EXPORT_SYMBOL_GPL(ipu_prg_channel_disable); int ipu_prg_channel_configure(struct ipuv3_channel *ipu_chan, unsigned int axi_id, unsigned int width, unsigned int height, unsigned int stride, - u32 format, unsigned long *eba) + u32 format, uint64_t modifier, unsigned long *eba) { int prg_chan = ipu_prg_ipu_to_prg_chan(ipu_chan->num); struct ipu_prg *prg = ipu_chan->ipu->prg_priv; @@ -296,14 +294,10 @@ int ipu_prg_channel_configure(struct ipuv3_channel *ipu_chan, return ret; ipu_pre_configure(prg->pres[chan->used_pre], - width, height, stride, format, *eba); + width, height, stride, format, modifier, *eba); - ret = clk_prepare_enable(prg->clk_ipg); - if (ret) { - ipu_prg_put_pre(prg, prg_chan); - return ret; - } + pm_runtime_get_sync(prg->dev); val = (stride - 1) & IPU_PRG_STRIDE_STRIDE_MASK; writel(val, prg->regs + IPU_PRG_STRIDE(prg_chan)); @@ -336,7 +330,7 @@ int ipu_prg_channel_configure(struct ipuv3_channel *ipu_chan, (val & IPU_PRG_STATUS_BUFFER1_READY(prg_chan)), 5, 1000); - clk_disable_unprepare(prg->clk_ipg); + pm_runtime_put(prg->dev); chan->enabled = true; return 0; @@ -384,6 +378,12 @@ static int ipu_prg_probe(struct platform_device *pdev) if (ret) return ret; + ret = clk_prepare_enable(prg->clk_axi); + if (ret) { + clk_disable_unprepare(prg->clk_ipg); + return ret; + } + /* init to free running mode */ val = readl(prg->regs + IPU_PRG_CTL); val |= IPU_PRG_CTL_SHADOW_EN; @@ -392,7 +392,8 @@ static int ipu_prg_probe(struct platform_device *pdev) /* disable address threshold */ writel(0xffffffff, prg->regs + IPU_PRG_THD); - clk_disable_unprepare(prg->clk_ipg); + pm_runtime_set_active(dev); + pm_runtime_enable(dev); prg->dev = dev; platform_set_drvdata(pdev, prg); @@ -414,6 +415,40 @@ static int ipu_prg_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM +static int prg_suspend(struct device *dev) +{ + struct ipu_prg *prg = dev_get_drvdata(dev); + + clk_disable_unprepare(prg->clk_axi); + clk_disable_unprepare(prg->clk_ipg); + + return 0; +} + +static int prg_resume(struct device *dev) +{ + struct ipu_prg *prg = dev_get_drvdata(dev); + int ret; + + ret = clk_prepare_enable(prg->clk_ipg); + if (ret) + return ret; + + ret = clk_prepare_enable(prg->clk_axi); + if (ret) { + clk_disable_unprepare(prg->clk_ipg); + return ret; + } + + return 0; +} +#endif + +static const struct dev_pm_ops prg_pm_ops = { + SET_RUNTIME_PM_OPS(prg_suspend, prg_resume, NULL) +}; + static const struct of_device_id ipu_prg_dt_ids[] = { { .compatible = "fsl,imx6qp-prg", }, { /* sentinel */ }, @@ -424,6 +459,7 @@ struct platform_driver ipu_prg_drv = { .remove = ipu_prg_remove, .driver = { .name = "imx-ipu-prg", + .pm = &prg_pm_ops, .of_match_table = ipu_prg_dt_ids, }, }; diff --git a/drivers/gpu/ipu-v3/ipu-prv.h b/drivers/gpu/ipu-v3/ipu-prv.h index ac4b8d658500..d6beee99b6b8 100644 --- a/drivers/gpu/ipu-v3/ipu-prv.h +++ b/drivers/gpu/ipu-v3/ipu-prv.h @@ -269,8 +269,8 @@ int ipu_pre_get(struct ipu_pre *pre); void ipu_pre_put(struct ipu_pre *pre); u32 ipu_pre_get_baddr(struct ipu_pre *pre); void ipu_pre_configure(struct ipu_pre *pre, unsigned int width, - unsigned int height, - unsigned int stride, u32 format, unsigned int bufaddr); + unsigned int height, unsigned int stride, u32 format, + uint64_t modifier, unsigned int bufaddr); void ipu_pre_update(struct ipu_pre *pre, unsigned int bufaddr); struct ipu_prg *ipu_prg_lookup_by_phandle(struct device *dev, const char *name, diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index 5afd6e364fb6..1c27526c499e 100644 --- a/include/drm/drm_atomic.h +++ b/include/drm/drm_atomic.h @@ -189,12 +189,40 @@ struct drm_private_state_funcs { struct drm_private_state *state); }; +/** + * struct drm_private_obj - base struct for driver private atomic object + * + * A driver private object is initialized by calling + * drm_atomic_private_obj_init() and cleaned up by calling + * drm_atomic_private_obj_fini(). + * + * Currently only tracks the state update functions and the opaque driver + * private state itself, but in the future might also track which + * &drm_modeset_lock is required to duplicate and update this object's state. + */ struct drm_private_obj { + /** + * @state: Current atomic state for this driver private object. + */ struct drm_private_state *state; + /** + * @funcs: + * + * Functions to manipulate the state of this driver private object, see + * &drm_private_state_funcs. + */ const struct drm_private_state_funcs *funcs; }; +/** + * struct drm_private_state - base struct for driver private object state + * @state: backpointer to global drm_atomic_state + * + * Currently only contains a backpointer to the overall atomic update, but in + * the future also might hold synchronization information similar to e.g. + * &drm_crtc.commit. + */ struct drm_private_state { struct drm_atomic_state *state; }; @@ -218,6 +246,10 @@ struct __drm_private_objs_state { * @num_private_objs: size of the @private_objs array * @private_objs: pointer to array of private object pointers * @acquire_ctx: acquire context for this atomic modeset state update + * + * States are added to an atomic update by calling drm_atomic_get_crtc_state(), + * drm_atomic_get_plane_state(), drm_atomic_get_connector_state(), or for + * private state structures, drm_atomic_get_private_obj_state(). */ struct drm_atomic_state { struct kref ref; diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h index 1494b12a984a..b069433e7fc1 100644 --- a/include/drm/drm_fb_helper.h +++ b/include/drm/drm_fb_helper.h @@ -33,6 +33,7 @@ struct drm_fb_helper; #include <drm/drm_crtc.h> +#include <drm/drm_device.h> #include <linux/kgdb.h> enum mode_set_atomic { @@ -275,6 +276,7 @@ void drm_fb_helper_unlink_fbi(struct drm_fb_helper *fb_helper); void drm_fb_helper_deferred_io(struct fb_info *info, struct list_head *pagelist); +int drm_fb_helper_defio_init(struct drm_fb_helper *fb_helper); ssize_t drm_fb_helper_sys_read(struct fb_info *info, char __user *buf, size_t count, loff_t *ppos); @@ -319,6 +321,13 @@ int drm_fb_helper_add_one_connector(struct drm_fb_helper *fb_helper, struct drm_ int drm_fb_helper_remove_one_connector(struct drm_fb_helper *fb_helper, struct drm_connector *connector); +int drm_fb_helper_fbdev_setup(struct drm_device *dev, + struct drm_fb_helper *fb_helper, + const struct drm_fb_helper_funcs *funcs, + unsigned int preferred_bpp, + unsigned int max_conn_count); +void drm_fb_helper_fbdev_teardown(struct drm_device *dev); + void drm_fb_helper_lastclose(struct drm_device *dev); void drm_fb_helper_output_poll_changed(struct drm_device *dev); #else @@ -332,11 +341,17 @@ static inline int drm_fb_helper_init(struct drm_device *dev, struct drm_fb_helper *helper, int max_conn) { + /* So drivers can use it to free the struct */ + helper->dev = dev; + dev->fb_helper = helper; + return 0; } static inline void drm_fb_helper_fini(struct drm_fb_helper *helper) { + if (helper && helper->dev) + helper->dev->fb_helper = NULL; } static inline int drm_fb_helper_blank(int blank, struct fb_info *info) @@ -409,6 +424,11 @@ static inline void drm_fb_helper_deferred_io(struct fb_info *info, { } +static inline int drm_fb_helper_defio_init(struct drm_fb_helper *fb_helper) +{ + return -ENODEV; +} + static inline ssize_t drm_fb_helper_sys_read(struct fb_info *info, char __user *buf, size_t count, loff_t *ppos) @@ -518,6 +538,24 @@ drm_fb_helper_remove_one_connector(struct drm_fb_helper *fb_helper, return 0; } +static inline int +drm_fb_helper_fbdev_setup(struct drm_device *dev, + struct drm_fb_helper *fb_helper, + const struct drm_fb_helper_funcs *funcs, + unsigned int preferred_bpp, + unsigned int max_conn_count) +{ + /* So drivers can use it to free the struct */ + dev->fb_helper = fb_helper; + + return 0; +} + +static inline void drm_fb_helper_fbdev_teardown(struct drm_device *dev) +{ + dev->fb_helper = NULL; +} + static inline void drm_fb_helper_lastclose(struct drm_device *dev) { } diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h index dccb897951ba..c50502c656e5 100644 --- a/include/drm/drm_framebuffer.h +++ b/include/drm/drm_framebuffer.h @@ -121,6 +121,12 @@ struct drm_framebuffer { * @base: base modeset object structure, contains the reference count. */ struct drm_mode_object base; + + /** + * @comm: Name of the process allocating the fb, used for fb dumping. + */ + char comm[TASK_COMM_LEN]; + /** * @format: framebuffer format information */ diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index c6639e8e5007..2cb6f02df64a 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -269,6 +269,9 @@ struct drm_mode_config_funcs { * state easily. If this hook is implemented, drivers must also * implement @atomic_state_clear and @atomic_state_free. * + * Subclassing of &drm_atomic_state is deprecated in favour of using + * &drm_private_state and &drm_private_obj. + * * RETURNS: * * A new &drm_atomic_state on success or NULL on failure. @@ -290,6 +293,9 @@ struct drm_mode_config_funcs { * * Drivers that implement this must call drm_atomic_state_default_clear() * to clear common state. + * + * Subclassing of &drm_atomic_state is deprecated in favour of using + * &drm_private_state and &drm_private_obj. */ void (*atomic_state_clear)(struct drm_atomic_state *state); @@ -302,6 +308,9 @@ struct drm_mode_config_funcs { * * Drivers that implement this must call * drm_atomic_state_default_release() to release common resources. + * + * Subclassing of &drm_atomic_state is deprecated in favour of using + * &drm_private_state and &drm_private_obj. */ void (*atomic_state_free)(struct drm_atomic_state *state); }; diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index 5f9932e2246e..2a4a42e59a47 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -80,13 +80,13 @@ void __drm_printfn_debug(struct drm_printer *p, struct va_format *vaf); __printf(2, 3) void drm_printf(struct drm_printer *p, const char *f, ...); +__printf(2, 0) /** * drm_vprintf - print to a &drm_printer stream * @p: the &drm_printer * @fmt: format string * @va: the va_list */ -__printf(2, 0) static inline void drm_vprintf(struct drm_printer *p, const char *fmt, va_list *va) { diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h index 9e8ba90c6784..3980602472c0 100644 --- a/include/drm/drm_syncobj.h +++ b/include/drm/drm_syncobj.h @@ -33,36 +33,31 @@ struct drm_syncobj_cb; /** * struct drm_syncobj - sync object. * - * This structure defines a generic sync object which wraps a dma fence. + * This structure defines a generic sync object which wraps a &dma_fence. */ struct drm_syncobj { /** - * @refcount: - * - * Reference count of this object. + * @refcount: Reference count of this object. */ struct kref refcount; /** * @fence: * NULL or a pointer to the fence bound to this object. * - * This field should not be used directly. Use drm_syncobj_fence_get - * and drm_syncobj_replace_fence instead. + * This field should not be used directly. Use drm_syncobj_fence_get() + * and drm_syncobj_replace_fence() instead. */ struct dma_fence __rcu *fence; /** - * @cb_list: - * List of callbacks to call when the fence gets replaced + * @cb_list: List of callbacks to call when the &fence gets replaced. */ struct list_head cb_list; /** - * @lock: - * locks cb_list and write-locks fence. + * @lock: Protects &cb_list and write-locks &fence. */ spinlock_t lock; /** - * @file: - * a file backing for this syncobj. + * @file: A file backing for this syncobj. */ struct file *file; }; @@ -73,7 +68,7 @@ typedef void (*drm_syncobj_func_t)(struct drm_syncobj *syncobj, /** * struct drm_syncobj_cb - callback for drm_syncobj_add_callback * @node: used by drm_syncob_add_callback to append this struct to - * syncobj::cb_list + * &drm_syncobj.cb_list * @func: drm_syncobj_func_t to call * * This struct will be initialized by drm_syncobj_add_callback, additional @@ -92,7 +87,7 @@ void drm_syncobj_free(struct kref *kref); * drm_syncobj_get - acquire a syncobj reference * @obj: sync object * - * This acquires additional reference to @obj. It is illegal to call this + * This acquires an additional reference to @obj. It is illegal to call this * without already holding a reference. No locks required. */ static inline void @@ -111,6 +106,17 @@ drm_syncobj_put(struct drm_syncobj *obj) kref_put(&obj->refcount, drm_syncobj_free); } +/** + * drm_syncobj_fence_get - get a reference to a fence in a sync object + * @syncobj: sync object. + * + * This acquires additional reference to &drm_syncobj.fence contained in @obj, + * if not NULL. It is illegal to call this without already holding a reference. + * No locks required. + * + * Returns: + * Either the fence of @obj or NULL if there's none. + */ static inline struct dma_fence * drm_syncobj_fence_get(struct drm_syncobj *syncobj) { diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 972a25633525..5db0458dd832 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -373,24 +373,46 @@ /* CFL S */ #define INTEL_CFL_S_GT1_IDS(info) \ INTEL_VGA_DEVICE(0x3E90, info), /* SRV GT1 */ \ - INTEL_VGA_DEVICE(0x3E93, info) /* SRV GT1 */ + INTEL_VGA_DEVICE(0x3E93, info), /* SRV GT1 */ \ + INTEL_VGA_DEVICE(0x3E99, info) /* SRV GT1 */ #define INTEL_CFL_S_GT2_IDS(info) \ INTEL_VGA_DEVICE(0x3E91, info), /* SRV GT2 */ \ INTEL_VGA_DEVICE(0x3E92, info), /* SRV GT2 */ \ - INTEL_VGA_DEVICE(0x3E96, info) /* SRV GT2 */ + INTEL_VGA_DEVICE(0x3E96, info), /* SRV GT2 */ \ + INTEL_VGA_DEVICE(0x3E9A, info) /* SRV GT2 */ /* CFL H */ #define INTEL_CFL_H_GT2_IDS(info) \ INTEL_VGA_DEVICE(0x3E9B, info), /* Halo GT2 */ \ INTEL_VGA_DEVICE(0x3E94, info) /* Halo GT2 */ -/* CFL U */ +/* CFL U GT1 */ +#define INTEL_CFL_U_GT1_IDS(info) \ + INTEL_VGA_DEVICE(0x3EA1, info), \ + INTEL_VGA_DEVICE(0x3EA4, info) + +/* CFL U GT2 */ +#define INTEL_CFL_U_GT2_IDS(info) \ + INTEL_VGA_DEVICE(0x3EA0, info), \ + INTEL_VGA_DEVICE(0x3EA3, info), \ + INTEL_VGA_DEVICE(0x3EA9, info) + +/* CFL U GT3 */ #define INTEL_CFL_U_GT3_IDS(info) \ + INTEL_VGA_DEVICE(0x3EA2, info), /* ULT GT3 */ \ + INTEL_VGA_DEVICE(0x3EA5, info), /* ULT GT3 */ \ INTEL_VGA_DEVICE(0x3EA6, info), /* ULT GT3 */ \ INTEL_VGA_DEVICE(0x3EA7, info), /* ULT GT3 */ \ - INTEL_VGA_DEVICE(0x3EA8, info), /* ULT GT3 */ \ - INTEL_VGA_DEVICE(0x3EA5, info) /* ULT GT3 */ + INTEL_VGA_DEVICE(0x3EA8, info) /* ULT GT3 */ + +#define INTEL_CFL_IDS(info) \ + INTEL_CFL_S_GT1_IDS(info), \ + INTEL_CFL_S_GT2_IDS(info), \ + INTEL_CFL_H_GT2_IDS(info), \ + INTEL_CFL_U_GT1_IDS(info), \ + INTEL_CFL_U_GT2_IDS(info), \ + INTEL_CFL_U_GT3_IDS(info) /* CNL U 2+2 */ #define INTEL_CNL_U_GT2_IDS(info) \ diff --git a/include/uapi/drm/exynos_drm.h b/include/uapi/drm/exynos_drm.h index d01087b2a651..4a54305120e0 100644 --- a/include/uapi/drm/exynos_drm.h +++ b/include/uapi/drm/exynos_drm.h @@ -135,172 +135,6 @@ struct drm_exynos_g2d_exec { __u64 async; }; -enum drm_exynos_ops_id { - EXYNOS_DRM_OPS_SRC, - EXYNOS_DRM_OPS_DST, - EXYNOS_DRM_OPS_MAX, -}; - -struct drm_exynos_sz { - __u32 hsize; - __u32 vsize; -}; - -struct drm_exynos_pos { - __u32 x; - __u32 y; - __u32 w; - __u32 h; -}; - -enum drm_exynos_flip { - EXYNOS_DRM_FLIP_NONE = (0 << 0), - EXYNOS_DRM_FLIP_VERTICAL = (1 << 0), - EXYNOS_DRM_FLIP_HORIZONTAL = (1 << 1), - EXYNOS_DRM_FLIP_BOTH = EXYNOS_DRM_FLIP_VERTICAL | - EXYNOS_DRM_FLIP_HORIZONTAL, -}; - -enum drm_exynos_degree { - EXYNOS_DRM_DEGREE_0, - EXYNOS_DRM_DEGREE_90, - EXYNOS_DRM_DEGREE_180, - EXYNOS_DRM_DEGREE_270, -}; - -enum drm_exynos_planer { - EXYNOS_DRM_PLANAR_Y, - EXYNOS_DRM_PLANAR_CB, - EXYNOS_DRM_PLANAR_CR, - EXYNOS_DRM_PLANAR_MAX, -}; - -/** - * A structure for ipp supported property list. - * - * @version: version of this structure. - * @ipp_id: id of ipp driver. - * @count: count of ipp driver. - * @writeback: flag of writeback supporting. - * @flip: flag of flip supporting. - * @degree: flag of degree information. - * @csc: flag of csc supporting. - * @crop: flag of crop supporting. - * @scale: flag of scale supporting. - * @refresh_min: min hz of refresh. - * @refresh_max: max hz of refresh. - * @crop_min: crop min resolution. - * @crop_max: crop max resolution. - * @scale_min: scale min resolution. - * @scale_max: scale max resolution. - */ -struct drm_exynos_ipp_prop_list { - __u32 version; - __u32 ipp_id; - __u32 count; - __u32 writeback; - __u32 flip; - __u32 degree; - __u32 csc; - __u32 crop; - __u32 scale; - __u32 refresh_min; - __u32 refresh_max; - __u32 reserved; - struct drm_exynos_sz crop_min; - struct drm_exynos_sz crop_max; - struct drm_exynos_sz scale_min; - struct drm_exynos_sz scale_max; -}; - -/** - * A structure for ipp config. - * - * @ops_id: property of operation directions. - * @flip: property of mirror, flip. - * @degree: property of rotation degree. - * @fmt: property of image format. - * @sz: property of image size. - * @pos: property of image position(src-cropped,dst-scaler). - */ -struct drm_exynos_ipp_config { - __u32 ops_id; - __u32 flip; - __u32 degree; - __u32 fmt; - struct drm_exynos_sz sz; - struct drm_exynos_pos pos; -}; - -enum drm_exynos_ipp_cmd { - IPP_CMD_NONE, - IPP_CMD_M2M, - IPP_CMD_WB, - IPP_CMD_OUTPUT, - IPP_CMD_MAX, -}; - -/** - * A structure for ipp property. - * - * @config: source, destination config. - * @cmd: definition of command. - * @ipp_id: id of ipp driver. - * @prop_id: id of property. - * @refresh_rate: refresh rate. - */ -struct drm_exynos_ipp_property { - struct drm_exynos_ipp_config config[EXYNOS_DRM_OPS_MAX]; - __u32 cmd; - __u32 ipp_id; - __u32 prop_id; - __u32 refresh_rate; -}; - -enum drm_exynos_ipp_buf_type { - IPP_BUF_ENQUEUE, - IPP_BUF_DEQUEUE, -}; - -/** - * A structure for ipp buffer operations. - * - * @ops_id: operation directions. - * @buf_type: definition of buffer. - * @prop_id: id of property. - * @buf_id: id of buffer. - * @handle: Y, Cb, Cr each planar handle. - * @user_data: user data. - */ -struct drm_exynos_ipp_queue_buf { - __u32 ops_id; - __u32 buf_type; - __u32 prop_id; - __u32 buf_id; - __u32 handle[EXYNOS_DRM_PLANAR_MAX]; - __u32 reserved; - __u64 user_data; -}; - -enum drm_exynos_ipp_ctrl { - IPP_CTRL_PLAY, - IPP_CTRL_STOP, - IPP_CTRL_PAUSE, - IPP_CTRL_RESUME, - IPP_CTRL_MAX, -}; - -/** - * A structure for ipp start/stop operations. - * - * @prop_id: id of property. - * @ctrl: definition of control. - */ -struct drm_exynos_ipp_cmd_ctrl { - __u32 prop_id; - __u32 ctrl; -}; - #define DRM_EXYNOS_GEM_CREATE 0x00 #define DRM_EXYNOS_GEM_MAP 0x01 /* Reserved 0x03 ~ 0x05 for exynos specific gem ioctl */ @@ -312,11 +146,7 @@ struct drm_exynos_ipp_cmd_ctrl { #define DRM_EXYNOS_G2D_SET_CMDLIST 0x21 #define DRM_EXYNOS_G2D_EXEC 0x22 -/* IPP - Image Post Processing */ -#define DRM_EXYNOS_IPP_GET_PROPERTY 0x30 -#define DRM_EXYNOS_IPP_SET_PROPERTY 0x31 -#define DRM_EXYNOS_IPP_QUEUE_BUF 0x32 -#define DRM_EXYNOS_IPP_CMD_CTRL 0x33 +/* Reserved 0x30 ~ 0x33 for obsolete Exynos IPP ioctls */ #define DRM_IOCTL_EXYNOS_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + \ DRM_EXYNOS_GEM_CREATE, struct drm_exynos_gem_create) @@ -335,18 +165,8 @@ struct drm_exynos_ipp_cmd_ctrl { #define DRM_IOCTL_EXYNOS_G2D_EXEC DRM_IOWR(DRM_COMMAND_BASE + \ DRM_EXYNOS_G2D_EXEC, struct drm_exynos_g2d_exec) -#define DRM_IOCTL_EXYNOS_IPP_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_EXYNOS_IPP_GET_PROPERTY, struct drm_exynos_ipp_prop_list) -#define DRM_IOCTL_EXYNOS_IPP_SET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_EXYNOS_IPP_SET_PROPERTY, struct drm_exynos_ipp_property) -#define DRM_IOCTL_EXYNOS_IPP_QUEUE_BUF DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_EXYNOS_IPP_QUEUE_BUF, struct drm_exynos_ipp_queue_buf) -#define DRM_IOCTL_EXYNOS_IPP_CMD_CTRL DRM_IOWR(DRM_COMMAND_BASE + \ - DRM_EXYNOS_IPP_CMD_CTRL, struct drm_exynos_ipp_cmd_ctrl) - /* EXYNOS specific events */ #define DRM_EXYNOS_G2D_EVENT 0x80000000 -#define DRM_EXYNOS_IPP_EVENT 0x80000001 struct drm_exynos_g2d_event { struct drm_event base; @@ -357,16 +177,6 @@ struct drm_exynos_g2d_event { __u32 reserved; }; -struct drm_exynos_ipp_event { - struct drm_event base; - __u64 user_data; - __u32 tv_sec; - __u32 tv_usec; - __u32 prop_id; - __u32 reserved; - __u32 buf_id[EXYNOS_DRM_OPS_MAX]; -}; - #if defined(__cplusplus) } #endif diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 6e80501368ae..f4cab5b3ba9a 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -58,7 +58,8 @@ struct kfd_ioctl_create_queue_args { __u64 eop_buffer_address; /* to KFD */ __u64 eop_buffer_size; /* to KFD */ __u64 ctx_save_restore_address; /* to KFD */ - __u64 ctx_save_restore_size; /* to KFD */ + __u32 ctx_save_restore_size; /* to KFD */ + __u32 ctl_stack_size; /* to KFD */ }; struct kfd_ioctl_destroy_queue_args { @@ -261,6 +262,13 @@ struct kfd_ioctl_get_tile_config_args { */ }; +struct kfd_ioctl_set_trap_handler_args { + uint64_t tba_addr; /* to KFD */ + uint64_t tma_addr; /* to KFD */ + uint32_t gpu_id; /* to KFD */ + uint32_t pad; +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -321,7 +329,10 @@ struct kfd_ioctl_get_tile_config_args { #define AMDKFD_IOC_GET_TILE_CONFIG \ AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args) +#define AMDKFD_IOC_SET_TRAP_HANDLER \ + AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x13 +#define AMDKFD_COMMAND_END 0x14 #endif diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index ce4c07688b13..abbad94e14a1 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -344,7 +344,7 @@ void ipu_prg_channel_disable(struct ipuv3_channel *ipu_chan); int ipu_prg_channel_configure(struct ipuv3_channel *ipu_chan, unsigned int axi_id, unsigned int width, unsigned int height, unsigned int stride, - u32 format, unsigned long *eba); + u32 format, uint64_t modifier, unsigned long *eba); /* * IPU CMOS Sensor Interface (csi) functions |