diff options
Diffstat (limited to 'drivers/gpu')
174 files changed, 2434 insertions, 1372 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index e88c497fa010..f65656df3619 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -256,7 +256,6 @@ config DRM_AMDGPU select HWMON select BACKLIGHT_CLASS_DEVICE select INTERVAL_TREE - select DRM_BUDDY help Choose this option if you have a recent AMD Radeon graphics card. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 1f8161cd507f..3b1c675aba34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -714,7 +714,8 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, { bool all_hub = false; - if (adev->family == AMDGPU_FAMILY_AI) + if (adev->family == AMDGPU_FAMILY_AI || + adev->family == AMDGPU_FAMILY_RV) all_hub = true; return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 67abf8dcd30a..4608599ba6bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1364,16 +1364,10 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdkfd_process_info *process_info = vm->process_info; - struct amdgpu_bo *pd = vm->root.bo; if (!process_info) return; - /* Release eviction fence from PD */ - amdgpu_bo_reserve(pd, false); - amdgpu_bo_fence(pd, NULL, false); - amdgpu_bo_unreserve(pd); - /* Update process info */ mutex_lock(&process_info->lock); process_info->n_vms--; @@ -1918,9 +1912,6 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, return -EINVAL; } - /* delete kgd_mem from kfd_bo_list to avoid re-validating - * this BO in BO's restoring after eviction. - */ mutex_lock(&mem->process_info->lock); ret = amdgpu_bo_reserve(bo, true); @@ -1943,7 +1934,6 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, amdgpu_amdkfd_remove_eviction_fence( bo, mem->process_info->eviction_fence); - list_del_init(&mem->validate_list.head); if (size) *size = amdgpu_bo_size(bo); @@ -2512,12 +2502,15 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) process_info->eviction_fence = new_fence; *ef = dma_fence_get(&new_fence->base); - /* Attach new eviction fence to all BOs */ + /* Attach new eviction fence to all BOs except pinned ones */ list_for_each_entry(mem, &process_info->kfd_bo_list, - validate_list.head) + validate_list.head) { + if (mem->bo->tbo.pin_count) + continue; + amdgpu_bo_fence(mem->bo, &process_info->eviction_fence->base, true); - + } /* Attach eviction fence to PD / PT BOs */ list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 714178f1b6c6..2168163aad2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -40,7 +40,7 @@ static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu) { struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list, rhead); - + mutex_destroy(&list->bo_list_mutex); kvfree(list); } @@ -136,6 +136,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, trace_amdgpu_cs_bo_status(list->num_entries, total_size); + mutex_init(&list->bo_list_mutex); *result = list; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 529d52a204cf..9caea1688fc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -47,6 +47,10 @@ struct amdgpu_bo_list { struct amdgpu_bo *oa_obj; unsigned first_userptr; unsigned num_entries; + + /* Protect access during command submission. + */ + struct mutex bo_list_mutex; }; int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b28af04b0c3e..d8f1335bc68f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -519,6 +519,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, return r; } + mutex_lock(&p->bo_list->bo_list_mutex); + /* One for TTM and one for the CS job */ amdgpu_bo_list_for_each_entry(e, p->bo_list) e->tv.num_shared = 2; @@ -651,6 +653,7 @@ out_free_user_pages: kvfree(e->user_pages); e->user_pages = NULL; } + mutex_unlock(&p->bo_list->bo_list_mutex); } return r; } @@ -690,9 +693,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, { unsigned i; - if (error && backoff) + if (error && backoff) { ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); + mutex_unlock(&parser->bo_list->bo_list_mutex); + } for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); @@ -832,12 +837,16 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) continue; r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) + if (r) { + mutex_unlock(&p->bo_list->bo_list_mutex); return r; + } r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); - if (r) + if (r) { + mutex_unlock(&p->bo_list->bo_list_mutex); return r; + } } r = amdgpu_vm_handle_moved(adev, vm); @@ -1278,6 +1287,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); mutex_unlock(&p->adev->notifier_lock); + mutex_unlock(&p->bo_list->bo_list_mutex); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 625424f3082b..58df107e3beb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5164,7 +5164,7 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, */ amdgpu_unregister_gpu_instance(tmp_adev); - drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); + drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true); /* disable ras on ALL IPs */ if (!need_emergency_restart && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 17c9bbe0cbc5..4dfd6724b3ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1528,6 +1528,21 @@ bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc, stime, etime, mode); } +static bool +amdgpu_display_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj) +{ + struct drm_device *dev = adev_to_drm(adev); + struct drm_fb_helper *fb_helper = dev->fb_helper; + + if (!fb_helper || !fb_helper->buffer) + return false; + + if (gem_to_amdgpu_bo(fb_helper->buffer->gem) != robj) + return false; + + return true; +} + int amdgpu_display_suspend_helper(struct amdgpu_device *adev) { struct drm_device *dev = adev_to_drm(adev); @@ -1563,10 +1578,12 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev) continue; } robj = gem_to_amdgpu_bo(fb->obj[0]); - r = amdgpu_bo_reserve(robj, true); - if (r == 0) { - amdgpu_bo_unpin(robj); - amdgpu_bo_unreserve(robj); + if (!amdgpu_display_robj_is_fb(adev, robj)) { + r = amdgpu_bo_reserve(robj, true); + if (r == 0) { + amdgpu_bo_unpin(robj); + amdgpu_bo_unreserve(robj); + } } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index ede2fa56f6c9..16699158e00d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -594,17 +594,20 @@ int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; - r = amdgpu_ras_block_late_init(adev, ras_block); - if (r) - return r; if (amdgpu_ras_is_supported(adev, ras_block->block)) { if (!amdgpu_persistent_edc_harvesting_supported(adev)) amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); if (r) goto late_fini; + } else { + amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 798c56214a23..aebc384531ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -518,6 +518,8 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case IP_VERSION(9, 1, 0): /* RENOIR looks like RAVEN */ case IP_VERSION(9, 3, 0): + /* GC 10.3.7 */ + case IP_VERSION(10, 3, 7): if (amdgpu_tmz == 0) { adev->gmc.tmz_enabled = false; dev_info(adev->dev, @@ -540,8 +542,6 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): /* YELLOW_CARP*/ case IP_VERSION(10, 3, 3): - /* GC 10.3.7 */ - case IP_VERSION(10, 3, 7): /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index b4cf8717f554..89011bae7588 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -320,6 +320,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) if (!amdgpu_device_has_dc_support(adev)) { if (!adev->enable_virtual_display) /* Disable vblank IRQs aggressively for power-saving */ + /* XXX: can this be enabled for DC? */ adev_to_drm(adev)->vblank_disable_immediate = true; r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 801f6fa692e9..6de63ea6687e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -642,7 +642,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) atomic64_read(&adev->visible_pin_size), vram_gtt.vram_size); vram_gtt.gtt_size = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT)->size; - vram_gtt.gtt_size *= PAGE_SIZE; vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size); return copy_to_user(out, &vram_gtt, min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0; @@ -675,7 +674,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) mem.cpu_accessible_vram.usable_heap_size * 3 / 4; mem.gtt.total_heap_size = gtt_man->size; - mem.gtt.total_heap_size *= PAGE_SIZE; mem.gtt.usable_heap_size = mem.gtt.total_heap_size - atomic64_read(&adev->gart_pin_size); mem.gtt.heap_usage = ttm_resource_manager_usage(gtt_man); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 2de9309a4193..dac202ae864d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -197,6 +197,13 @@ static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf, if (amdgpu_ras_query_error_status(obj->adev, &info)) return -EINVAL; + /* Hardware counter will be reset automatically after the query on Vega20 and Arcturus */ + if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && + obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { + if (amdgpu_ras_reset_error_status(obj->adev, info.head.block)) + dev_warn(obj->adev->dev, "Failed to reset error counter and error status"); + } + s = snprintf(val, sizeof(val), "%s: %lu\n%s: %lu\n", "ue", info.ue_count, "ce", info.ce_count); @@ -550,9 +557,10 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev, if (amdgpu_ras_query_error_status(obj->adev, &info)) return -EINVAL; - if (obj->adev->asic_type == CHIP_ALDEBARAN) { + if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && + obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { if (amdgpu_ras_reset_error_status(obj->adev, info.head.block)) - DRM_WARN("Failed to reset error counter and error status"); + dev_warn(obj->adev->dev, "Failed to reset error counter and error status"); } return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count, @@ -1027,9 +1035,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, } } - if (!amdgpu_persistent_edc_harvesting_supported(adev)) - amdgpu_ras_reset_error_status(adev, info->head.block); - return 0; } @@ -1149,6 +1154,12 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev, if (res) return res; + if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && + adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { + if (amdgpu_ras_reset_error_status(adev, info.head.block)) + dev_warn(adev->dev, "Failed to reset error counter and error status"); + } + ce += info.ce_count; ue += info.ue_count; } @@ -1792,6 +1803,12 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) continue; amdgpu_ras_query_error_status(adev, &info); + + if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && + adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { + if (amdgpu_ras_reset_error_status(adev, info.head.block)) + dev_warn(adev->dev, "Failed to reset error counter and error status"); + } } } @@ -2278,8 +2295,9 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) !amdgpu_ras_asic_supported(adev)) return; - if (!(amdgpu_sriov_vf(adev) && - (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2)))) + /* If driver run on sriov guest side, only enable ras for aldebaran */ + if (amdgpu_sriov_vf(adev) && + adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 2)) return; if (!adev->gmc.xgmi.connected_to_cpu) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index 6546552e596c..acfa207cf970 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -30,15 +30,12 @@ #include <drm/ttm/ttm_resource.h> #include <drm/ttm/ttm_range_manager.h> -#include "amdgpu_vram_mgr.h" - /* state back for walking over vram_mgr and gtt_mgr allocations */ struct amdgpu_res_cursor { uint64_t start; uint64_t size; uint64_t remaining; - void *node; - uint32_t mem_type; + struct drm_mm_node *node; }; /** @@ -55,63 +52,27 @@ static inline void amdgpu_res_first(struct ttm_resource *res, uint64_t start, uint64_t size, struct amdgpu_res_cursor *cur) { - struct drm_buddy_block *block; - struct list_head *head, *next; struct drm_mm_node *node; - if (!res) - goto fallback; - - BUG_ON(start + size > res->num_pages << PAGE_SHIFT); - - cur->mem_type = res->mem_type; - - switch (cur->mem_type) { - case TTM_PL_VRAM: - head = &to_amdgpu_vram_mgr_resource(res)->blocks; - - block = list_first_entry_or_null(head, - struct drm_buddy_block, - link); - if (!block) - goto fallback; - - while (start >= amdgpu_vram_mgr_block_size(block)) { - start -= amdgpu_vram_mgr_block_size(block); - - next = block->link.next; - if (next != head) - block = list_entry(next, struct drm_buddy_block, link); - } - - cur->start = amdgpu_vram_mgr_block_start(block) + start; - cur->size = min(amdgpu_vram_mgr_block_size(block) - start, size); - cur->remaining = size; - cur->node = block; - break; - case TTM_PL_TT: - node = to_ttm_range_mgr_node(res)->mm_nodes; - while (start >= node->size << PAGE_SHIFT) - start -= node++->size << PAGE_SHIFT; - - cur->start = (node->start << PAGE_SHIFT) + start; - cur->size = min((node->size << PAGE_SHIFT) - start, size); + if (!res || res->mem_type == TTM_PL_SYSTEM) { + cur->start = start; + cur->size = size; cur->remaining = size; - cur->node = node; - break; - default: - goto fallback; + cur->node = NULL; + WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT); + return; } - return; + BUG_ON(start + size > res->num_pages << PAGE_SHIFT); -fallback: - cur->start = start; - cur->size = size; + node = to_ttm_range_mgr_node(res)->mm_nodes; + while (start >= node->size << PAGE_SHIFT) + start -= node++->size << PAGE_SHIFT; + + cur->start = (node->start << PAGE_SHIFT) + start; + cur->size = min((node->size << PAGE_SHIFT) - start, size); cur->remaining = size; - cur->node = NULL; - WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT); - return; + cur->node = node; } /** @@ -124,9 +85,7 @@ fallback: */ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) { - struct drm_buddy_block *block; - struct drm_mm_node *node; - struct list_head *next; + struct drm_mm_node *node = cur->node; BUG_ON(size > cur->remaining); @@ -140,27 +99,9 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) return; } - switch (cur->mem_type) { - case TTM_PL_VRAM: - block = cur->node; - - next = block->link.next; - block = list_entry(next, struct drm_buddy_block, link); - - cur->node = block; - cur->start = amdgpu_vram_mgr_block_start(block); - cur->size = min(amdgpu_vram_mgr_block_size(block), cur->remaining); - break; - case TTM_PL_TT: - node = cur->node; - - cur->node = ++node; - cur->start = node->start << PAGE_SHIFT; - cur->size = min(node->size << PAGE_SHIFT, cur->remaining); - break; - default: - return; - } + cur->node = ++node; + cur->start = node->start << PAGE_SHIFT; + cur->size = min(node->size << PAGE_SHIFT, cur->remaining); } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index be6f76a30ac6..3b4c19412625 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1798,18 +1798,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); - /* Compute GTT size, either bsaed on 3/4th the size of RAM size + /* Compute GTT size, either based on 1/2 the size of RAM size * or whatever the user passed on module init */ if (amdgpu_gtt_size == -1) { struct sysinfo si; si_meminfo(&si); - gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), - adev->gmc.mc_vram_size), - ((uint64_t)si.totalram * si.mem_unit * 3/4)); - } - else + /* Certain GL unit tests for large textures can cause problems + * with the OOM killer since there is no way to link this memory + * to a process. This was originally mitigated (but not necessarily + * eliminated) by limiting the GTT size. The problem is this limit + * is often too low for many modern games so just make the limit 1/2 + * of system memory which aligns with TTM. The OOM accounting needs + * to be addressed, but we shouldn't prevent common 3D applications + * from being usable just to potentially mitigate that corner case. + */ + gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), + (u64)si.totalram * si.mem_unit / 2); + } else { gtt_size = (uint64_t)amdgpu_gtt_size << 20; + } /* Initialize GTT memory pool */ r = amdgpu_gtt_mgr_init(adev, gtt_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 6a70818039dd..9120ae80ef52 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -26,7 +26,6 @@ #include <linux/dma-direction.h> #include <drm/gpu_scheduler.h> -#include "amdgpu_vram_mgr.h" #include "amdgpu.h" #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) @@ -39,6 +38,15 @@ #define AMDGPU_POISON 0xd0bed0be +struct amdgpu_vram_mgr { + struct ttm_resource_manager manager; + struct drm_mm mm; + spinlock_t lock; + struct list_head reservations_pending; + struct list_head reserved_pages; + atomic64_t vis_usage; +}; + struct amdgpu_gtt_mgr { struct ttm_resource_manager manager; struct drm_mm mm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 576849e95296..108e8e8a1a36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -496,7 +496,8 @@ static int amdgpu_vkms_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = YRES_MAX; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 2ceeaa4c793a..dc76d2b3ce52 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -679,6 +679,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, { struct amdgpu_vm_update_params params; struct amdgpu_vm_bo_base *entry; + bool flush_tlb_needed = false; int r, idx; if (list_empty(&vm->relocated)) @@ -697,6 +698,9 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, goto error; list_for_each_entry(entry, &vm->relocated, vm_status) { + /* vm_flush_needed after updating moved PDEs */ + flush_tlb_needed |= entry->moved; + r = amdgpu_vm_pde_update(¶ms, entry); if (r) goto error; @@ -706,8 +710,8 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, if (r) goto error; - /* vm_flush_needed after updating PDEs */ - atomic64_inc(&vm->tlb_seq); + if (flush_tlb_needed) + atomic64_inc(&vm->tlb_seq); while (!list_empty(&vm->relocated)) { entry = list_first_entry(&vm->relocated, @@ -789,6 +793,11 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, flush_tlb |= adev->gmc.xgmi.num_physical_nodes && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0); + /* + * On GFX8 and older any 8 PTE block with a valid bit set enters the TLB + */ + flush_tlb |= adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 0); + memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 49e4092f447f..0a7611648573 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -32,10 +32,8 @@ #include "atom.h" struct amdgpu_vram_reservation { - u64 start; - u64 size; - struct list_head allocated; - struct list_head blocks; + struct list_head node; + struct drm_mm_node mm_node; }; static inline struct amdgpu_vram_mgr * @@ -188,18 +186,18 @@ const struct attribute_group amdgpu_vram_mgr_attr_group = { }; /** - * amdgpu_vram_mgr_vis_size - Calculate visible block size + * amdgpu_vram_mgr_vis_size - Calculate visible node size * * @adev: amdgpu_device pointer - * @block: DRM BUDDY block structure + * @node: MM node structure * - * Calculate how many bytes of the DRM BUDDY block are inside visible VRAM + * Calculate how many bytes of the MM node are inside visible VRAM */ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, - struct drm_buddy_block *block) + struct drm_mm_node *node) { - u64 start = amdgpu_vram_mgr_block_start(block); - u64 end = start + amdgpu_vram_mgr_block_size(block); + uint64_t start = node->start << PAGE_SHIFT; + uint64_t end = (node->size + node->start) << PAGE_SHIFT; if (start >= adev->gmc.visible_vram_size) return 0; @@ -220,9 +218,9 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_resource *res = bo->tbo.resource; - struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res); - struct drm_buddy_block *block; - u64 usage = 0; + unsigned pages = res->num_pages; + struct drm_mm_node *mm; + u64 usage; if (amdgpu_gmc_vram_full_visible(&adev->gmc)) return amdgpu_bo_size(bo); @@ -230,8 +228,9 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) if (res->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT) return 0; - list_for_each_entry(block, &vres->blocks, link) - usage += amdgpu_vram_mgr_vis_size(adev, block); + mm = &container_of(res, struct ttm_range_mgr_node, base)->mm_nodes[0]; + for (usage = 0; pages; pages -= mm->size, mm++) + usage += amdgpu_vram_mgr_vis_size(adev, mm); return usage; } @@ -241,30 +240,23 @@ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man) { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); - struct drm_buddy *mm = &mgr->mm; + struct drm_mm *mm = &mgr->mm; struct amdgpu_vram_reservation *rsv, *temp; - struct drm_buddy_block *block; uint64_t vis_usage; - list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) { - if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size, - rsv->size, mm->chunk_size, &rsv->allocated, - DRM_BUDDY_RANGE_ALLOCATION)) - continue; - - block = amdgpu_vram_mgr_first_block(&rsv->allocated); - if (!block) + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) { + if (drm_mm_reserve_node(mm, &rsv->mm_node)) continue; dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n", - rsv->start, rsv->size); + rsv->mm_node.start, rsv->mm_node.size); - vis_usage = amdgpu_vram_mgr_vis_size(adev, block); + vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node); atomic64_add(vis_usage, &mgr->vis_usage); spin_lock(&man->bdev->lru_lock); - man->usage += rsv->size; + man->usage += rsv->mm_node.size << PAGE_SHIFT; spin_unlock(&man->bdev->lru_lock); - list_move(&rsv->blocks, &mgr->reserved_pages); + list_move(&rsv->node, &mgr->reserved_pages); } } @@ -286,16 +278,14 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr, if (!rsv) return -ENOMEM; - INIT_LIST_HEAD(&rsv->allocated); - INIT_LIST_HEAD(&rsv->blocks); + INIT_LIST_HEAD(&rsv->node); + rsv->mm_node.start = start >> PAGE_SHIFT; + rsv->mm_node.size = size >> PAGE_SHIFT; - rsv->start = start; - rsv->size = size; - - mutex_lock(&mgr->lock); - list_add_tail(&rsv->blocks, &mgr->reservations_pending); + spin_lock(&mgr->lock); + list_add_tail(&rsv->node, &mgr->reservations_pending); amdgpu_vram_mgr_do_reserve(&mgr->manager); - mutex_unlock(&mgr->lock); + spin_unlock(&mgr->lock); return 0; } @@ -317,19 +307,19 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, struct amdgpu_vram_reservation *rsv; int ret; - mutex_lock(&mgr->lock); + spin_lock(&mgr->lock); - list_for_each_entry(rsv, &mgr->reservations_pending, blocks) { - if (rsv->start <= start && - (start < (rsv->start + rsv->size))) { + list_for_each_entry(rsv, &mgr->reservations_pending, node) { + if ((rsv->mm_node.start <= start) && + (start < (rsv->mm_node.start + rsv->mm_node.size))) { ret = -EBUSY; goto out; } } - list_for_each_entry(rsv, &mgr->reserved_pages, blocks) { - if (rsv->start <= start && - (start < (rsv->start + rsv->size))) { + list_for_each_entry(rsv, &mgr->reserved_pages, node) { + if ((rsv->mm_node.start <= start) && + (start < (rsv->mm_node.start + rsv->mm_node.size))) { ret = 0; goto out; } @@ -337,11 +327,33 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, ret = -ENOENT; out: - mutex_unlock(&mgr->lock); + spin_unlock(&mgr->lock); return ret; } /** + * amdgpu_vram_mgr_virt_start - update virtual start address + * + * @mem: ttm_resource to update + * @node: just allocated node + * + * Calculate a virtual BO start address to easily check if everything is CPU + * accessible. + */ +static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem, + struct drm_mm_node *node) +{ + unsigned long start; + + start = node->start + node->size; + if (start > mem->num_pages) + start -= mem->num_pages; + else + start = 0; + mem->start = max(mem->start, start); +} + +/** * amdgpu_vram_mgr_new - allocate new ranges * * @man: TTM memory type manager @@ -356,44 +368,46 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, const struct ttm_place *place, struct ttm_resource **res) { - u64 vis_usage = 0, max_bytes, cur_size, min_block_size; + unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages; struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); - struct amdgpu_vram_mgr_resource *vres; - u64 size, remaining_size, lpfn, fpfn; - struct drm_buddy *mm = &mgr->mm; - struct drm_buddy_block *block; - unsigned long pages_per_block; + uint64_t vis_usage = 0, mem_bytes, max_bytes; + struct ttm_range_mgr_node *node; + struct drm_mm *mm = &mgr->mm; + enum drm_mm_insert_mode mode; + unsigned i; int r; - lpfn = place->lpfn << PAGE_SHIFT; + lpfn = place->lpfn; if (!lpfn) - lpfn = man->size; - - fpfn = place->fpfn << PAGE_SHIFT; + lpfn = man->size >> PAGE_SHIFT; max_bytes = adev->gmc.mc_vram_size; if (tbo->type != ttm_bo_type_kernel) max_bytes -= AMDGPU_VM_RESERVED_VRAM; + mem_bytes = tbo->base.size; if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - pages_per_block = ~0ul; + pages_per_node = ~0ul; + num_nodes = 1; } else { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - pages_per_block = HPAGE_PMD_NR; + pages_per_node = HPAGE_PMD_NR; #else /* default to 2MB */ - pages_per_block = 2UL << (20UL - PAGE_SHIFT); + pages_per_node = 2UL << (20UL - PAGE_SHIFT); #endif - pages_per_block = max_t(uint32_t, pages_per_block, - tbo->page_alignment); + pages_per_node = max_t(uint32_t, pages_per_node, + tbo->page_alignment); + num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node); } - vres = kzalloc(sizeof(*vres), GFP_KERNEL); - if (!vres) + node = kvmalloc(struct_size(node, mm_nodes, num_nodes), + GFP_KERNEL | __GFP_ZERO); + if (!node) return -ENOMEM; - ttm_resource_init(tbo, place, &vres->base); + ttm_resource_init(tbo, place, &node->base); /* bail out quickly if there's likely not enough VRAM for this BO */ if (ttm_resource_manager_usage(man) > max_bytes) { @@ -401,130 +415,66 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, goto error_fini; } - INIT_LIST_HEAD(&vres->blocks); - + mode = DRM_MM_INSERT_BEST; if (place->flags & TTM_PL_FLAG_TOPDOWN) - vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; - - if (fpfn || lpfn != man->size) - /* Allocate blocks in desired range */ - vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; - - remaining_size = vres->base.num_pages << PAGE_SHIFT; - - mutex_lock(&mgr->lock); - while (remaining_size) { - if (tbo->page_alignment) - min_block_size = tbo->page_alignment << PAGE_SHIFT; - else - min_block_size = mgr->default_page_size; - - BUG_ON(min_block_size < mm->chunk_size); - - /* Limit maximum size to 2GiB due to SG table limitations */ - size = min(remaining_size, 2ULL << 30); - - if (size >= pages_per_block << PAGE_SHIFT) - min_block_size = pages_per_block << PAGE_SHIFT; - - cur_size = size; - - if (fpfn + size != place->lpfn << PAGE_SHIFT) { - /* - * Except for actual range allocation, modify the size and - * min_block_size conforming to continuous flag enablement - */ - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - size = roundup_pow_of_two(size); - min_block_size = size; - /* - * Modify the size value if size is not - * aligned with min_block_size - */ - } else if (!IS_ALIGNED(size, min_block_size)) { - size = round_up(size, min_block_size); + mode = DRM_MM_INSERT_HIGH; + + pages_left = node->base.num_pages; + + /* Limit maximum size to 2GB due to SG table limitations */ + pages = min(pages_left, 2UL << (30 - PAGE_SHIFT)); + + i = 0; + spin_lock(&mgr->lock); + while (pages_left) { + uint32_t alignment = tbo->page_alignment; + + if (pages >= pages_per_node) + alignment = pages_per_node; + + r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages, + alignment, 0, place->fpfn, + lpfn, mode); + if (unlikely(r)) { + if (pages > pages_per_node) { + if (is_power_of_2(pages)) + pages = pages / 2; + else + pages = rounddown_pow_of_two(pages); + continue; } + goto error_free; } - r = drm_buddy_alloc_blocks(mm, fpfn, - lpfn, - size, - min_block_size, - &vres->blocks, - vres->flags); - if (unlikely(r)) - goto error_free_blocks; - - if (size > remaining_size) - remaining_size = 0; - else - remaining_size -= size; - } - mutex_unlock(&mgr->lock); - - if (cur_size != size) { - struct drm_buddy_block *block; - struct list_head *trim_list; - u64 original_size; - LIST_HEAD(temp); - - trim_list = &vres->blocks; - original_size = vres->base.num_pages << PAGE_SHIFT; - - /* - * If size value is rounded up to min_block_size, trim the last - * block to the required size - */ - if (!list_is_singular(&vres->blocks)) { - block = list_last_entry(&vres->blocks, typeof(*block), link); - list_move_tail(&block->link, &temp); - trim_list = &temp; - /* - * Compute the original_size value by subtracting the - * last block size with (aligned size - original size) - */ - original_size = amdgpu_vram_mgr_block_size(block) - (size - cur_size); - } + vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]); + amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]); + pages_left -= pages; + ++i; - mutex_lock(&mgr->lock); - drm_buddy_block_trim(mm, - original_size, - trim_list); - mutex_unlock(&mgr->lock); - - if (!list_empty(&temp)) - list_splice_tail(trim_list, &vres->blocks); - } - - list_for_each_entry(block, &vres->blocks, link) - vis_usage += amdgpu_vram_mgr_vis_size(adev, block); - - block = amdgpu_vram_mgr_first_block(&vres->blocks); - if (!block) { - r = -EINVAL; - goto error_fini; + if (pages > pages_left) + pages = pages_left; } + spin_unlock(&mgr->lock); - vres->base.start = amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT; - - if (amdgpu_is_vram_mgr_blocks_contiguous(&vres->blocks)) - vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS; + if (i == 1) + node->base.placement |= TTM_PL_FLAG_CONTIGUOUS; if (adev->gmc.xgmi.connected_to_cpu) - vres->base.bus.caching = ttm_cached; + node->base.bus.caching = ttm_cached; else - vres->base.bus.caching = ttm_write_combined; + node->base.bus.caching = ttm_write_combined; atomic64_add(vis_usage, &mgr->vis_usage); - *res = &vres->base; + *res = &node->base; return 0; -error_free_blocks: - drm_buddy_free_list(mm, &vres->blocks); - mutex_unlock(&mgr->lock); +error_free: + while (i--) + drm_mm_remove_node(&node->mm_nodes[i]); + spin_unlock(&mgr->lock); error_fini: - ttm_resource_fini(man, &vres->base); - kfree(vres); + ttm_resource_fini(man, &node->base); + kvfree(node); return r; } @@ -540,26 +490,27 @@ error_fini: static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, struct ttm_resource *res) { - struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res); + struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res); struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); - struct drm_buddy *mm = &mgr->mm; - struct drm_buddy_block *block; uint64_t vis_usage = 0; + unsigned i, pages; - mutex_lock(&mgr->lock); - list_for_each_entry(block, &vres->blocks, link) - vis_usage += amdgpu_vram_mgr_vis_size(adev, block); + spin_lock(&mgr->lock); + for (i = 0, pages = res->num_pages; pages; + pages -= node->mm_nodes[i].size, ++i) { + struct drm_mm_node *mm = &node->mm_nodes[i]; + drm_mm_remove_node(mm); + vis_usage += amdgpu_vram_mgr_vis_size(adev, mm); + } amdgpu_vram_mgr_do_reserve(man); - - drm_buddy_free_list(mm, &vres->blocks); - mutex_unlock(&mgr->lock); + spin_unlock(&mgr->lock); atomic64_sub(vis_usage, &mgr->vis_usage); ttm_resource_fini(man, res); - kfree(vres); + kvfree(node); } /** @@ -591,7 +542,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, if (!*sgt) return -ENOMEM; - /* Determine the number of DRM_BUDDY blocks to export */ + /* Determine the number of DRM_MM nodes to export */ amdgpu_res_first(res, offset, length, &cursor); while (cursor.remaining) { num_entries++; @@ -607,10 +558,10 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, sg->length = 0; /* - * Walk down DRM_BUDDY blocks to populate scatterlist nodes - * @note: Use iterator api to get first the DRM_BUDDY block + * Walk down DRM_MM nodes to populate scatterlist nodes + * @note: Use iterator api to get first the DRM_MM node * and the number of bytes from it. Access the following - * DRM_BUDDY block(s) if more buffer needs to exported + * DRM_MM node(s) if more buffer needs to exported */ amdgpu_res_first(res, offset, length, &cursor); for_each_sgtable_sg((*sgt), sg, i) { @@ -697,22 +648,13 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man, struct drm_printer *printer) { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); - struct drm_buddy *mm = &mgr->mm; - struct drm_buddy_block *block; drm_printf(printer, " vis usage:%llu\n", amdgpu_vram_mgr_vis_usage(mgr)); - mutex_lock(&mgr->lock); - drm_printf(printer, "default_page_size: %lluKiB\n", - mgr->default_page_size >> 10); - - drm_buddy_print(mm, printer); - - drm_printf(printer, "reserved:\n"); - list_for_each_entry(block, &mgr->reserved_pages, link) - drm_buddy_block_print(mm, block, printer); - mutex_unlock(&mgr->lock); + spin_lock(&mgr->lock); + drm_mm_print(&mgr->mm, printer); + spin_unlock(&mgr->lock); } static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = { @@ -732,21 +674,16 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) { struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; struct ttm_resource_manager *man = &mgr->manager; - int err; ttm_resource_manager_init(man, &adev->mman.bdev, adev->gmc.real_vram_size); man->func = &amdgpu_vram_mgr_func; - err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE); - if (err) - return err; - - mutex_init(&mgr->lock); + drm_mm_init(&mgr->mm, 0, man->size >> PAGE_SHIFT); + spin_lock_init(&mgr->lock); INIT_LIST_HEAD(&mgr->reservations_pending); INIT_LIST_HEAD(&mgr->reserved_pages); - mgr->default_page_size = PAGE_SIZE; ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager); ttm_resource_manager_set_used(man, true); @@ -774,16 +711,16 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) if (ret) return; - mutex_lock(&mgr->lock); - list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) + spin_lock(&mgr->lock); + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) kfree(rsv); - list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) { - drm_buddy_free_list(&mgr->mm, &rsv->blocks); + list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) { + drm_mm_remove_node(&rsv->mm_node); kfree(rsv); } - drm_buddy_fini(&mgr->mm); - mutex_unlock(&mgr->lock); + drm_mm_takedown(&mgr->mm); + spin_unlock(&mgr->lock); ttm_resource_manager_cleanup(man); ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h deleted file mode 100644 index 9a2db87186c7..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h +++ /dev/null @@ -1,89 +0,0 @@ -/* SPDX-License-Identifier: MIT - * Copyright 2021 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __AMDGPU_VRAM_MGR_H__ -#define __AMDGPU_VRAM_MGR_H__ - -#include <drm/drm_buddy.h> - -struct amdgpu_vram_mgr { - struct ttm_resource_manager manager; - struct drm_buddy mm; - /* protects access to buffer objects */ - struct mutex lock; - struct list_head reservations_pending; - struct list_head reserved_pages; - atomic64_t vis_usage; - u64 default_page_size; -}; - -struct amdgpu_vram_mgr_resource { - struct ttm_resource base; - struct list_head blocks; - unsigned long flags; -}; - -static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block) -{ - return drm_buddy_block_offset(block); -} - -static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block) -{ - return PAGE_SIZE << drm_buddy_block_order(block); -} - -static inline struct drm_buddy_block * -amdgpu_vram_mgr_first_block(struct list_head *list) -{ - return list_first_entry_or_null(list, struct drm_buddy_block, link); -} - -static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head) -{ - struct drm_buddy_block *block; - u64 start, size; - - block = amdgpu_vram_mgr_first_block(head); - if (!block) - return false; - - while (head != block->link.next) { - start = amdgpu_vram_mgr_block_start(block); - size = amdgpu_vram_mgr_block_size(block); - - block = list_entry(block->link.next, struct drm_buddy_block, link); - if (start + size != amdgpu_vram_mgr_block_start(block)) - return false; - } - - return true; -} - -static inline struct amdgpu_vram_mgr_resource * -to_amdgpu_vram_mgr_resource(struct ttm_resource *res) -{ - return container_of(res, struct amdgpu_vram_mgr_resource, base); -} - -#endif diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 288fce7dc0ed..9c964cd3b5d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2796,7 +2796,8 @@ static int dce_v10_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index cbe5250b31cb..e0ad9f27dc3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2914,7 +2914,8 @@ static int dce_v11_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 982855e6cf52..3caf6f386042 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2673,7 +2673,8 @@ static int dce_v6_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_width = 16384; adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 84440741c60b..7c75df5bffed 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2693,7 +2693,8 @@ static int dce_v8_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 8c0a3fc7aaa6..a4a6751b1e44 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1096,6 +1096,7 @@ static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); } static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, @@ -1316,7 +1317,7 @@ static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *ade memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME)) - *(uint64_t *)fw_autoload_mask |= 1 << id; + *(uint64_t *)fw_autoload_mask |= 1ULL << id; } static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev, @@ -1983,7 +1984,7 @@ static int gfx_v11_0_init_csb(struct amdgpu_device *adev) return 0; } -void gfx_v11_0_rlc_stop(struct amdgpu_device *adev) +static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL); @@ -6028,6 +6029,7 @@ static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, break; default: BUG(); + break; } } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index a0c0b7d9f444..7f4b480ae66e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -638,6 +638,12 @@ static int gmc_v11_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_base = pci_resource_start(adev->pdev, 0); adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); +#ifdef CONFIG_X86_64 + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) { + adev->gmc.aper_base = adev->mmhub.funcs->get_mc_fb_offset(adev); + adev->gmc.aper_size = adev->gmc.real_vram_size; + } +#endif /* In case the PCI BAR is larger than the actual amount of vram */ adev->gmc.visible_vram_size = adev->gmc.aper_size; if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size) diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index 5d2dfeff8fe5..d63d3f2b8a16 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -299,7 +299,7 @@ static const struct imu_rlc_ram_golden imu_rlc_ram_golden_11_0_2[] = IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPG_PSP_DEBUG, CPG_PSP_DEBUG__GPA_OVERRIDE_MASK, 0) }; -void program_imu_rlc_ram(struct amdgpu_device *adev, +static void program_imu_rlc_ram(struct amdgpu_device *adev, const struct imu_rlc_ram_golden *regs, const u32 array_size) { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index d2722adabd1b..f3c1af5130ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -535,6 +535,10 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, { unsigned vmid = AMDGPU_JOB_GET_VMID(job); + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid << JPEG_IH_CTRL__IH_VMID__SHIFT)); + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); amdgpu_ring_write(ring, (vmid | (vmid << 4))); @@ -768,7 +772,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { 8 + /* jpeg_v2_0_dec_ring_emit_vm_flush */ 18 + 18 + /* jpeg_v2_0_dec_ring_emit_fence x2 vm fence */ 8 + 16, - .emit_ib_size = 22, /* jpeg_v2_0_dec_ring_emit_ib */ + .emit_ib_size = 24, /* jpeg_v2_0_dec_ring_emit_ib */ .emit_ib = jpeg_v2_0_dec_ring_emit_ib, .emit_fence = jpeg_v2_0_dec_ring_emit_fence, .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h index 1a03baa59755..654e43e83e2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h @@ -41,6 +41,7 @@ #define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084 #define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089 #define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f +#define mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET 0x4149 #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index fcf51947bb18..7eee004cf3ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -541,7 +541,7 @@ static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable) /* This function is for backdoor MES firmware */ static int mes_v11_0_load_microcode(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) + enum admgpu_mes_pipe pipe, bool prime_icache) { int r; uint32_t data; @@ -593,16 +593,18 @@ static int mes_v11_0_load_microcode(struct amdgpu_device *adev, /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */ WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x3FFFF); - /* invalidate ICACHE */ - data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL); - data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0); - data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); - WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data); - - /* prime the ICACHE. */ - data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL); - data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1); - WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data); + if (prime_icache) { + /* invalidate ICACHE */ + data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL); + data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0); + data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data); + + /* prime the ICACHE. */ + data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL); + data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1); + WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data); + } soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); @@ -1044,17 +1046,19 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev) int r = 0; if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { - r = mes_v11_0_load_microcode(adev, AMDGPU_MES_KIQ_PIPE); + + r = mes_v11_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, false); if (r) { - DRM_ERROR("failed to load MES kiq fw, r=%d\n", r); + DRM_ERROR("failed to load MES fw, r=%d\n", r); return r; } - r = mes_v11_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE); + r = mes_v11_0_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, true); if (r) { - DRM_ERROR("failed to load MES fw, r=%d\n", r); + DRM_ERROR("failed to load MES kiq fw, r=%d\n", r); return r; } + } mes_v11_0_enable(adev, true); @@ -1086,7 +1090,7 @@ static int mes_v11_0_hw_init(void *handle) if (!adev->enable_mes_kiq) { if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { r = mes_v11_0_load_microcode(adev, - AMDGPU_MES_SCHED_PIPE); + AMDGPU_MES_SCHED_PIPE, true); if (r) { DRM_ERROR("failed to MES fw, r=%d\n", r); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index d016e3c3e221..b3fba8dea63c 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -170,6 +170,7 @@ static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static const struct amdgpu_video_codecs yc_video_codecs_decode = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 06b2635b142a..83c6ccaaa9e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -469,6 +469,7 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } } + /** * sdma_v5_2_gfx_stop - stop the gfx async dma engines * @@ -514,21 +515,17 @@ static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev) } /** - * sdma_v5_2_ctx_switch_enable_for_instance - start the async dma engines - * context switch for an instance + * sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch * * @adev: amdgpu_device pointer - * @instance_idx: the index of the SDMA instance + * @enable: enable/disable the DMA MEs context switch. * - * Unhalt the async dma engines context switch. + * Halt or unhalt the async dma engines context switch. */ -static void sdma_v5_2_ctx_switch_enable_for_instance(struct amdgpu_device *adev, int instance_idx) +static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable) { u32 f32_cntl, phase_quantum = 0; - - if (WARN_ON(instance_idx >= adev->sdma.num_instances)) { - return; - } + int i; if (amdgpu_sdma_phase_quantum) { unsigned value = amdgpu_sdma_phase_quantum; @@ -552,68 +549,50 @@ static void sdma_v5_2_ctx_switch_enable_for_instance(struct amdgpu_device *adev, phase_quantum = value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT | unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT; - - WREG32_SOC15_IP(GC, - sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE0_QUANTUM), - phase_quantum); - WREG32_SOC15_IP(GC, - sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE1_QUANTUM), - phase_quantum); - WREG32_SOC15_IP(GC, - sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE2_QUANTUM), - phase_quantum); } - if (!amdgpu_sriov_vf(adev)) { - f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL)); - f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, - AUTO_CTXSW_ENABLE, 1); - WREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL), f32_cntl); + for (i = 0; i < adev->sdma.num_instances; i++) { + if (enable && amdgpu_sdma_phase_quantum) { + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), + phase_quantum); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM), + phase_quantum); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM), + phase_quantum); + } + + if (!amdgpu_sriov_vf(adev)) { + f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + AUTO_CTXSW_ENABLE, enable ? 1 : 0); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); + } } + } /** - * sdma_v5_2_ctx_switch_disable_all - stop the async dma engines context switch + * sdma_v5_2_enable - stop the async dma engines * * @adev: amdgpu_device pointer + * @enable: enable/disable the DMA MEs. * - * Halt the async dma engines context switch. + * Halt or unhalt the async dma engines. */ -static void sdma_v5_2_ctx_switch_disable_all(struct amdgpu_device *adev) +static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable) { u32 f32_cntl; int i; - if (amdgpu_sriov_vf(adev)) - return; - - for (i = 0; i < adev->sdma.num_instances; i++) { - f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL)); - f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, - AUTO_CTXSW_ENABLE, 0); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); + if (!enable) { + sdma_v5_2_gfx_stop(adev); + sdma_v5_2_rlc_stop(adev); } -} - -/** - * sdma_v5_2_halt - stop the async dma engines - * - * @adev: amdgpu_device pointer - * - * Halt the async dma engines. - */ -static void sdma_v5_2_halt(struct amdgpu_device *adev) -{ - int i; - u32 f32_cntl; - - sdma_v5_2_gfx_stop(adev); - sdma_v5_2_rlc_stop(adev); if (!amdgpu_sriov_vf(adev)) { for (i = 0; i < adev->sdma.num_instances; i++) { f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); - f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1); + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl); } } @@ -625,9 +604,6 @@ static void sdma_v5_2_halt(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * * Set up the gfx DMA ring buffers and enable them. - * It assumes that the dma engine is stopped for each instance. - * The function enables the engine and preemptions sequentially for each instance. - * * Returns 0 for success, error for failure. */ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) @@ -769,7 +745,10 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) ring->sched.ready = true; - sdma_v5_2_ctx_switch_enable_for_instance(adev, i); + if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ + sdma_v5_2_ctx_switch_enable(adev, true); + sdma_v5_2_enable(adev, true); + } r = amdgpu_ring_test_ring(ring); if (r) { @@ -813,7 +792,7 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev) int i, j; /* halt the MEs */ - sdma_v5_2_halt(adev); + sdma_v5_2_enable(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { if (!adev->sdma.instance[i].fw) @@ -885,8 +864,8 @@ static int sdma_v5_2_start(struct amdgpu_device *adev) int r = 0; if (amdgpu_sriov_vf(adev)) { - sdma_v5_2_ctx_switch_disable_all(adev); - sdma_v5_2_halt(adev); + sdma_v5_2_ctx_switch_enable(adev, false); + sdma_v5_2_enable(adev, false); /* set RB registers */ r = sdma_v5_2_gfx_resume(adev); @@ -910,10 +889,12 @@ static int sdma_v5_2_start(struct amdgpu_device *adev) amdgpu_gfx_off_ctrl(adev, false); sdma_v5_2_soft_reset(adev); + /* unhalt the MEs */ + sdma_v5_2_enable(adev, true); + /* enable sdma ring preemption */ + sdma_v5_2_ctx_switch_enable(adev, true); - /* Soft reset supposes to disable the dma engine and preemption. - * Now start the gfx rings and rlc compute queues. - */ + /* start the gfx rings and rlc compute queues */ r = sdma_v5_2_gfx_resume(adev); if (adev->in_s0ix) amdgpu_gfx_off_ctrl(adev, true); @@ -1447,8 +1428,8 @@ static int sdma_v5_2_hw_fini(void *handle) if (amdgpu_sriov_vf(adev)) return 0; - sdma_v5_2_ctx_switch_disable_all(adev); - sdma_v5_2_halt(adev); + sdma_v5_2_ctx_switch_enable(adev, false); + sdma_v5_2_enable(adev, false); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 3cabceee5f57..39405f0db824 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1761,23 +1761,21 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; -static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p, - struct amdgpu_job *job) +static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p) { struct drm_gpu_scheduler **scheds; /* The create msg must be in the first IB submitted */ - if (atomic_read(&job->base.entity->fence_seq)) + if (atomic_read(&p->entity->fence_seq)) return -EINVAL; scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC] [AMDGPU_RING_PRIO_DEFAULT].sched; - drm_sched_entity_modify_sched(job->base.entity, scheds, 1); + drm_sched_entity_modify_sched(p->entity, scheds, 1); return 0; } -static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, - uint64_t addr) +static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) { struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_bo_va_mapping *map; @@ -1848,7 +1846,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) continue; - r = vcn_v3_0_limit_sched(p, job); + r = vcn_v3_0_limit_sched(p); if (r) goto out; } @@ -1862,7 +1860,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, struct amdgpu_job *job, struct amdgpu_ib *ib) { - struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); uint32_t msg_lo = 0, msg_hi = 0; unsigned i; int r; @@ -1881,8 +1879,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, msg_hi = val; } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) && val == 0) { - r = vcn_v3_0_dec_msg(p, job, - ((u64)msg_hi) << 32 | msg_lo); + r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 | msg_lo); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 5e9adbc71bbd..cbfb32b3d235 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1516,6 +1516,8 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info); break; case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 6): /* TODO: Double check these on production silicon */ + case IP_VERSION(10, 3, 7): /* TODO: Double check these on production silicon */ pcache_info = yellow_carp_cache_info; num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info); break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 8667e3df2d0b..a08769c5e94b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -73,6 +73,8 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) case IP_VERSION(4, 1, 2):/* RENOIR */ case IP_VERSION(5, 2, 1):/* VANGOGH */ case IP_VERSION(5, 2, 3):/* YELLOW_CARP */ + case IP_VERSION(5, 2, 6):/* GC 10.3.6 */ + case IP_VERSION(5, 2, 7):/* GC 10.3.7 */ case IP_VERSION(6, 0, 1): kfd->device_info.num_sdma_queues_per_engine = 2; break; @@ -127,6 +129,8 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd) case IP_VERSION(9, 4, 2): /* ALDEBARAN */ case IP_VERSION(10, 3, 1): /* VANGOGH */ case IP_VERSION(10, 3, 3): /* YELLOW_CARP */ + case IP_VERSION(10, 3, 6): /* GC 10.3.6 */ + case IP_VERSION(10, 3, 7): /* GC 10.3.7 */ case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */ case IP_VERSION(10, 1, 4): case IP_VERSION(10, 1, 10): /* NAVI10 */ @@ -178,7 +182,11 @@ static void kfd_device_info_init(struct kfd_dev *kfd, if (gc_version < IP_VERSION(11, 0, 0)) { /* Navi2x+, Navi1x+ */ - if (gc_version >= IP_VERSION(10, 3, 0)) + if (gc_version == IP_VERSION(10, 3, 6)) + kfd->device_info.no_atomic_fw_version = 14; + else if (gc_version == IP_VERSION(10, 3, 7)) + kfd->device_info.no_atomic_fw_version = 3; + else if (gc_version >= IP_VERSION(10, 3, 0)) kfd->device_info.no_atomic_fw_version = 92; else if (gc_version >= IP_VERSION(10, 1, 1)) kfd->device_info.no_atomic_fw_version = 145; @@ -368,6 +376,16 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) if (!vf) f2g = &gfx_v10_3_kfd2kgd; break; + case IP_VERSION(10, 3, 6): + gfx_target_version = 100306; + if (!vf) + f2g = &gfx_v10_3_kfd2kgd; + break; + case IP_VERSION(10, 3, 7): + gfx_target_version = 100307; + if (!vf) + f2g = &gfx_v10_3_kfd2kgd; + break; case IP_VERSION(11, 0, 0): gfx_target_version = 110000; f2g = &gfx_v11_kfd2kgd; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 997650d597ec..e44376c2ecdc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -296,7 +296,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, struct migrate_vma *migrate, struct dma_fence **mfence, dma_addr_t *scratch) { - uint64_t npages = migrate->cpages; + uint64_t npages = migrate->npages; struct device *dev = adev->dev; struct amdgpu_res_cursor cursor; dma_addr_t *src; @@ -344,7 +344,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, mfence); if (r) goto out_free_vram_pages; - amdgpu_res_next(&cursor, j << PAGE_SHIFT); + amdgpu_res_next(&cursor, (j + 1) << PAGE_SHIFT); j = 0; } else { amdgpu_res_next(&cursor, PAGE_SIZE); @@ -590,7 +590,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, continue; } src[i] = svm_migrate_addr(adev, spage); - if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) { + if (j > 0 && src[i] != src[i - 1] + PAGE_SIZE) { r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j, FROM_VRAM_TO_RAM, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 2ebf0132c25b..7b332246eda3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1295,7 +1295,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL, last_start, prange->start + i, pte_flags, - last_start - prange->start, + (last_start - prange->start) << PAGE_SHIFT, bo_adev ? bo_adev->vm_manager.vram_base_offset : 0, NULL, dma_addr, &vm->last_update); @@ -2307,6 +2307,8 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, if (range->event == MMU_NOTIFY_RELEASE) return true; + if (!mmget_not_zero(mni->mm)) + return true; start = mni->interval_tree.start; last = mni->interval_tree.last; @@ -2333,6 +2335,7 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, } svm_range_unlock(prange); + mmput(mni->mm); return true; } diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index b4029c0d5d8c..ec6771e87e73 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -6,7 +6,7 @@ config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y select SND_HDA_COMPONENT if SND_HDA_CORE - select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) + select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 70be67a56673..3087dd1a1856 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -72,6 +72,7 @@ #include <linux/pci.h> #include <linux/firmware.h> #include <linux/component.h> +#include <linux/dmi.h> #include <drm/display/drm_dp_mst_helper.h> #include <drm/display/drm_hdmi_helper.h> @@ -462,6 +463,26 @@ static void dm_pflip_high_irq(void *interrupt_params) vrr_active, (int) !e); } +static void dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc) +{ + struct drm_crtc *crtc = &acrtc->base; + struct drm_device *dev = crtc->dev; + unsigned long flags; + + drm_crtc_handle_vblank(crtc); + + spin_lock_irqsave(&dev->event_lock, flags); + + /* Send completion event for cursor-only commits */ + if (acrtc->event && acrtc->pflip_status != AMDGPU_FLIP_SUBMITTED) { + drm_crtc_send_vblank_event(crtc, acrtc->event); + drm_crtc_vblank_put(crtc); + acrtc->event = NULL; + } + + spin_unlock_irqrestore(&dev->event_lock, flags); +} + static void dm_vupdate_high_irq(void *interrupt_params) { struct common_irq_params *irq_params = interrupt_params; @@ -500,7 +521,7 @@ static void dm_vupdate_high_irq(void *interrupt_params) * if a pageflip happened inside front-porch. */ if (vrr_active) { - drm_crtc_handle_vblank(&acrtc->base); + dm_crtc_handle_vblank(acrtc); /* BTR processing for pre-DCE12 ASICs */ if (acrtc->dm_irq_params.stream && @@ -552,7 +573,7 @@ static void dm_crtc_high_irq(void *interrupt_params) * to dm_vupdate_high_irq after end of front-porch. */ if (!vrr_active) - drm_crtc_handle_vblank(&acrtc->base); + dm_crtc_handle_vblank(acrtc); /** * Following stuff must happen at start of vblank, for crc @@ -1382,6 +1403,41 @@ static bool dm_should_disable_stutter(struct pci_dev *pdev) return false; } +static const struct dmi_system_id hpd_disconnect_quirk_table[] = { + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3660"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3260"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"), + }, + }, + {} +}; + +static void retrieve_dmi_info(struct amdgpu_display_manager *dm) +{ + const struct dmi_system_id *dmi_id; + + dm->aux_hpd_discon_quirk = false; + + dmi_id = dmi_first_match(hpd_disconnect_quirk_table); + if (dmi_id) { + dm->aux_hpd_discon_quirk = true; + DRM_INFO("aux_hpd_discon_quirk attached\n"); + } +} + static int amdgpu_dm_init(struct amdgpu_device *adev) { struct dc_init_data init_data; @@ -1508,6 +1564,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) } INIT_LIST_HEAD(&adev->dm.da_list); + + retrieve_dmi_info(&adev->dm); + /* Display Core create. */ adev->dm.dc = dc_create(&init_data); @@ -1594,7 +1653,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) adev->dm.crc_rd_wrk = amdgpu_dm_crtc_secure_display_create_work(); #endif - if (dc_enable_dmub_notifications(adev->dm.dc)) { + if (dc_is_dmub_outbox_supported(adev->dm.dc)) { init_completion(&adev->dm.dmub_aux_transfer_done); adev->dm.dmub_notify = kzalloc(sizeof(struct dmub_notification), GFP_KERNEL); if (!adev->dm.dmub_notify) { @@ -1630,6 +1689,13 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) goto error; } + /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive. + * It is expected that DMUB will resend any pending notifications at this point, for + * example HPD from DPIA. + */ + if (dc_is_dmub_outbox_supported(adev->dm.dc)) + dc_enable_dmub_outbox(adev->dm.dc); + /* create fake encoders for MST */ dm_dp_create_fake_mst_encoders(adev); @@ -2619,9 +2685,6 @@ static int dm_resume(void *handle) */ link_enc_cfg_copy(adev->dm.dc->current_state, dc_state); - if (dc_enable_dmub_notifications(adev->dm.dc)) - amdgpu_dm_outbox_init(adev); - r = dm_dmub_hw_init(adev); if (r) DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r); @@ -2639,6 +2702,11 @@ static int dm_resume(void *handle) } } + if (dc_is_dmub_outbox_supported(adev->dm.dc)) { + amdgpu_dm_outbox_init(adev); + dc_enable_dmub_outbox(adev->dm.dc); + } + WARN_ON(!dc_commit_state(dm->dc, dc_state)); dm_gpureset_commit_state(dm->cached_dc_state, dm); @@ -2660,13 +2728,15 @@ static int dm_resume(void *handle) /* TODO: Remove dc_state->dccg, use dc->dccg directly. */ dc_resource_state_construct(dm->dc, dm_state->context); - /* Re-enable outbox interrupts for DPIA. */ - if (dc_enable_dmub_notifications(adev->dm.dc)) - amdgpu_dm_outbox_init(adev); - /* Before powering on DC we need to re-initialize DMUB. */ dm_dmub_hw_resume(adev); + /* Re-enable outbox interrupts for DPIA. */ + if (dc_is_dmub_outbox_supported(adev->dm.dc)) { + amdgpu_dm_outbox_init(adev); + dc_enable_dmub_outbox(adev->dm.dc); + } + /* power on hardware */ dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); @@ -2812,7 +2882,7 @@ static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = { static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) { - u32 max_cll, min_cll, max, min, q, r; + u32 max_avg, min_cll, max, min, q, r; struct amdgpu_dm_backlight_caps *caps; struct amdgpu_display_manager *dm; struct drm_connector *conn_base; @@ -2842,7 +2912,7 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) caps = &dm->backlight_caps[i]; caps->ext_caps = &aconnector->dc_link->dpcd_sink_ext_caps; caps->aux_support = false; - max_cll = conn_base->hdr_sink_metadata.hdmi_type1.max_cll; + max_avg = conn_base->hdr_sink_metadata.hdmi_type1.max_fall; min_cll = conn_base->hdr_sink_metadata.hdmi_type1.min_cll; if (caps->ext_caps->bits.oled == 1 /*|| @@ -2870,8 +2940,8 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) * The results of the above expressions can be verified at * pre_computed_values. */ - q = max_cll >> 5; - r = max_cll % 32; + q = max_avg >> 5; + r = max_avg % 32; max = (1 << q) * pre_computed_values[r]; // min luminance: maxLum * (CV/255)^2 / 100 @@ -3822,7 +3892,8 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; /* indicates support for immediate flip */ adev_to_drm(adev)->mode_config.async_page_flip = true; @@ -4259,9 +4330,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) } } - /* Disable vblank IRQs aggressively for power-saving. */ - adev_to_drm(adev)->vblank_disable_immediate = true; - /* loops over all connectors on the board */ for (i = 0; i < link_cnt; i++) { struct dc_link *link = NULL; @@ -5409,7 +5477,7 @@ fill_blending_from_plane_state(const struct drm_plane_state *plane_state, } } - if (per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) + if (*per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) *pre_multiplied_alpha = false; } @@ -9137,6 +9205,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct amdgpu_bo *abo; uint32_t target_vblank, last_flip_vblank; bool vrr_active = amdgpu_dm_vrr_active(acrtc_state); + bool cursor_update = false; bool pflip_present = false; struct { struct dc_surface_update surface_updates[MAX_SURFACES]; @@ -9172,8 +9241,13 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct dm_plane_state *dm_new_plane_state = to_dm_plane_state(new_plane_state); /* Cursor plane is handled after stream updates */ - if (plane->type == DRM_PLANE_TYPE_CURSOR) + if (plane->type == DRM_PLANE_TYPE_CURSOR) { + if ((fb && crtc == pcrtc) || + (old_plane_state->fb && old_plane_state->crtc == pcrtc)) + cursor_update = true; + continue; + } if (!fb || !crtc || pcrtc != crtc) continue; @@ -9336,6 +9410,17 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->stream_update.vrr_infopacket = &acrtc_state->stream->vrr_infopacket; } + } else if (cursor_update && acrtc_state->active_planes > 0 && + !acrtc_state->force_dpms_off && + acrtc_attach->base.state->event) { + drm_crtc_vblank_get(pcrtc); + + spin_lock_irqsave(&pcrtc->dev->event_lock, flags); + + acrtc_attach->event = acrtc_attach->base.state->event; + acrtc_attach->base.state->event = NULL; + + spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags); } /* Update the planes if changed or disable if we don't have any. */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index aa34c0068f41..e80ef93f6550 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -540,6 +540,14 @@ struct amdgpu_display_manager { * last successfully applied backlight values. */ u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP]; + + /** + * @aux_hpd_discon_quirk: + * + * quirk for hpd discon while aux is on-going. + * occurred on certain intel platform + */ + bool aux_hpd_discon_quirk; }; enum dsc_clock_force_state { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 9221b6690a4a..2b9b095e5f03 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -56,6 +56,8 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, ssize_t result = 0; struct aux_payload payload; enum aux_return_code_type operation_result; + struct amdgpu_device *adev; + struct ddc_service *ddc; if (WARN_ON(msg->size > 16)) return -E2BIG; @@ -74,6 +76,21 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, result = dc_link_aux_transfer_raw(TO_DM_AUX(aux)->ddc_service, &payload, &operation_result); + /* + * w/a on certain intel platform where hpd is unexpected to pull low during + * 1st sideband message transaction by return AUX_RET_ERROR_HPD_DISCON + * aux transaction is succuess in such case, therefore bypass the error + */ + ddc = TO_DM_AUX(aux)->ddc_service; + adev = ddc->ctx->driver_context; + if (adev->dm.aux_hpd_discon_quirk) { + if (msg->address == DP_SIDEBAND_MSG_DOWN_REQ_BASE && + operation_result == AUX_RET_ERROR_HPD_DISCON) { + result = 0; + operation_result = AUX_RET_SUCCESS; + } + } + if (payload.write && result >= 0) result = msg->size; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index ceb34376decb..bca5f01da763 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -287,8 +287,11 @@ static void dcn31_enable_pme_wa(struct clk_mgr *clk_mgr_base) void dcn31_init_clocks(struct clk_mgr *clk_mgr) { + uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz; + memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); // Assumption is that boot state always supports pstate + clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk clk_mgr->clks.p_state_change_support = true; clk_mgr->clks.prev_p_state_change_support = true; clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN; @@ -638,8 +641,14 @@ static void dcn31_set_low_power_state(struct clk_mgr *clk_mgr_base) } } +int dcn31_get_dtb_ref_freq_khz(struct clk_mgr *clk_mgr_base) +{ + return clk_mgr_base->clks.ref_dtbclk_khz; +} + static struct clk_mgr_funcs dcn31_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, + .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz, .update_clocks = dcn31_update_clocks, .init_clocks = dcn31_init_clocks, .enable_pme_wa = dcn31_enable_pme_wa, @@ -719,7 +728,7 @@ void dcn31_clk_mgr_construct( } clk_mgr->base.base.dprefclk_khz = 600000; - clk_mgr->base.dccg->ref_dtbclk_khz = 600000; + clk_mgr->base.base.clks.ref_dtbclk_khz = 600000; dce_clock_read_ss_info(&clk_mgr->base); /*if bios enabled SS, driver needs to adjust dtb clock, only enable with correct bios*/ //clk_mgr->base.dccg->ref_dtbclk_khz = dce_adjust_dp_ref_freq_for_ss(clk_mgr_internal, clk_mgr->base.base.dprefclk_khz); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h index 961b10a49486..be06fdbd0c22 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h @@ -51,6 +51,8 @@ void dcn31_clk_mgr_construct(struct dc_context *ctx, struct pp_smu_funcs *pp_smu, struct dccg *dccg); +int dcn31_get_dtb_ref_freq_khz(struct clk_mgr *clk_mgr_base); + void dcn31_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int); #endif //__DCN31_CLK_MGR_H__ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index a2ade6e93f5e..f4381725b210 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -41,9 +41,7 @@ #include "dc_dmub_srv.h" -#if defined (CONFIG_DRM_AMD_DC_DP2_0) #include "dc_link_dp.h" -#endif #define TO_CLK_MGR_DCN315(clk_mgr)\ container_of(clk_mgr, struct clk_mgr_dcn315, base) @@ -552,7 +550,7 @@ static void dcn315_clk_mgr_helper_populate_bw_params( if (!bw_params->clk_table.entries[i].dtbclk_mhz) bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz; } - ASSERT(bw_params->clk_table.entries[i].dcfclk_mhz); + ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz); bw_params->vram_type = bios_info->memory_type; bw_params->num_channels = bios_info->ma_channel_number; if (!bw_params->num_channels) @@ -580,6 +578,7 @@ static void dcn315_enable_pme_wa(struct clk_mgr *clk_mgr_base) static struct clk_mgr_funcs dcn315_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, + .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz, .update_clocks = dcn315_update_clocks, .init_clocks = dcn31_init_clocks, .enable_pme_wa = dcn315_enable_pme_wa, @@ -656,9 +655,9 @@ void dcn315_clk_mgr_construct( clk_mgr->base.base.dprefclk_khz = 600000; clk_mgr->base.base.dprefclk_khz = dcn315_smu_get_dpref_clk(&clk_mgr->base); - clk_mgr->base.dccg->ref_dtbclk_khz = clk_mgr->base.base.dprefclk_khz; + clk_mgr->base.base.clks.ref_dtbclk_khz = clk_mgr->base.base.dprefclk_khz; dce_clock_read_ss_info(&clk_mgr->base); - clk_mgr->base.dccg->ref_dtbclk_khz = dce_adjust_dp_ref_freq_for_ss(&clk_mgr->base, clk_mgr->base.base.dprefclk_khz); + clk_mgr->base.base.clks.ref_dtbclk_khz = dce_adjust_dp_ref_freq_for_ss(&clk_mgr->base, clk_mgr->base.base.dprefclk_khz); clk_mgr->base.base.bw_params = &dcn315_bw_params; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c index fc3af81ed6c6..e4bb9c6193b5 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c @@ -571,6 +571,7 @@ static void dcn316_clk_mgr_helper_populate_bw_params( static struct clk_mgr_funcs dcn316_funcs = { .enable_pme_wa = dcn316_enable_pme_wa, .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, + .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz, .update_clocks = dcn316_update_clocks, .init_clocks = dcn31_init_clocks, .are_clock_states_equal = dcn31_are_clock_states_equal, @@ -685,7 +686,7 @@ void dcn316_clk_mgr_construct( clk_mgr->base.base.dprefclk_khz = 600000; clk_mgr->base.base.dprefclk_khz = dcn316_smu_get_dpref_clk(&clk_mgr->base); - clk_mgr->base.dccg->ref_dtbclk_khz = clk_mgr->base.base.dprefclk_khz; + clk_mgr->base.base.clks.ref_dtbclk_khz = clk_mgr->base.base.dprefclk_khz; dce_clock_read_ss_info(&clk_mgr->base); /*clk_mgr->base.dccg->ref_dtbclk_khz = dce_adjust_dp_ref_freq_for_ss(&clk_mgr->base, clk_mgr->base.base.dprefclk_khz);*/ diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index dc30ac366a50..d8eee89e63ce 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -114,8 +114,8 @@ static const struct dc_link_settings fail_safe_link_settings = { static bool decide_fallback_link_setting( struct dc_link *link, - struct dc_link_settings initial_link_settings, - struct dc_link_settings *current_link_setting, + struct dc_link_settings *max, + struct dc_link_settings *cur, enum link_training_result training_result); static void maximize_lane_settings(const struct link_training_settings *lt_settings, struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX]); @@ -944,7 +944,7 @@ static void override_lane_settings(const struct link_training_settings *lt_setti return; - for (lane = 1; lane < LANE_COUNT_DP_MAX; lane++) { + for (lane = 0; lane < LANE_COUNT_DP_MAX; lane++) { if (lt_settings->voltage_swing) lane_settings[lane].VOLTAGE_SWING = *lt_settings->voltage_swing; if (lt_settings->pre_emphasis) @@ -2784,6 +2784,7 @@ bool perform_link_training_with_retries( enum dp_panel_mode panel_mode = dp_get_panel_mode(link); enum link_training_result status = LINK_TRAINING_CR_FAIL_LANE0; struct dc_link_settings cur_link_settings = *link_setting; + struct dc_link_settings max_link_settings = *link_setting; const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res); int fail_count = 0; bool is_link_bw_low = false; /* link bandwidth < stream bandwidth */ @@ -2793,7 +2794,6 @@ bool perform_link_training_with_retries( dp_trace_commit_lt_init(link); - if (dp_get_link_encoding_format(&cur_link_settings) == DP_8b_10b_ENCODING) /* We need to do this before the link training to ensure the idle * pattern in SST mode will be sent right after the link training @@ -2909,19 +2909,15 @@ bool perform_link_training_with_retries( uint32_t req_bw; uint32_t link_bw; - decide_fallback_link_setting(link, *link_setting, &cur_link_settings, status); - /* Flag if reduced link bandwidth no longer meets stream requirements or fallen back to - * minimum link bandwidth. + decide_fallback_link_setting(link, &max_link_settings, + &cur_link_settings, status); + /* Fail link training if reduced link bandwidth no longer meets + * stream requirements. */ req_bw = dc_bandwidth_in_kbps_from_timing(&stream->timing); link_bw = dc_link_bandwidth_kbps(link, &cur_link_settings); - is_link_bw_low = (req_bw > link_bw); - is_link_bw_min = ((cur_link_settings.link_rate <= LINK_RATE_LOW) && - (cur_link_settings.lane_count <= LANE_COUNT_ONE)); - - if (is_link_bw_low) - DC_LOG_WARNING("%s: Link bandwidth too low after fallback req_bw(%d) > link_bw(%d)\n", - __func__, req_bw, link_bw); + if (req_bw > link_bw) + break; } msleep(delay_between_attempts); @@ -3309,7 +3305,7 @@ static bool dp_verify_link_cap( int *fail_count) { struct dc_link_settings cur_link_settings = {0}; - struct dc_link_settings initial_link_settings = *known_limit_link_setting; + struct dc_link_settings max_link_settings = *known_limit_link_setting; bool success = false; bool skip_video_pattern; enum clock_source_id dp_cs_id = get_clock_source_id(link); @@ -3318,7 +3314,7 @@ static bool dp_verify_link_cap( struct link_resource link_res; memset(&irq_data, 0, sizeof(irq_data)); - cur_link_settings = initial_link_settings; + cur_link_settings = max_link_settings; /* Grant extended timeout request */ if ((link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) && (link->dpcd_caps.lttpr_caps.max_ext_timeout > 0)) { @@ -3361,7 +3357,7 @@ static bool dp_verify_link_cap( dp_trace_lt_result_update(link, status, true); dp_disable_link_phy(link, &link_res, link->connector_signal); } while (!success && decide_fallback_link_setting(link, - initial_link_settings, &cur_link_settings, status)); + &max_link_settings, &cur_link_settings, status)); link->verified_link_cap = success ? cur_link_settings : fail_safe_link_settings; @@ -3596,16 +3592,19 @@ static bool decide_fallback_link_setting_max_bw_policy( */ static bool decide_fallback_link_setting( struct dc_link *link, - struct dc_link_settings initial_link_settings, - struct dc_link_settings *current_link_setting, + struct dc_link_settings *max, + struct dc_link_settings *cur, enum link_training_result training_result) { - if (!current_link_setting) + if (!cur) + return false; + if (!max) return false; - if (dp_get_link_encoding_format(&initial_link_settings) == DP_128b_132b_ENCODING || + + if (dp_get_link_encoding_format(max) == DP_128b_132b_ENCODING || link->dc->debug.force_dp2_lt_fallback_method) - return decide_fallback_link_setting_max_bw_policy(link, &initial_link_settings, - current_link_setting, training_result); + return decide_fallback_link_setting_max_bw_policy(link, max, cur, + training_result); switch (training_result) { case LINK_TRAINING_CR_FAIL_LANE0: @@ -3613,28 +3612,18 @@ static bool decide_fallback_link_setting( case LINK_TRAINING_CR_FAIL_LANE23: case LINK_TRAINING_LQA_FAIL: { - if (!reached_minimum_link_rate - (current_link_setting->link_rate)) { - current_link_setting->link_rate = - reduce_link_rate( - current_link_setting->link_rate); - } else if (!reached_minimum_lane_count - (current_link_setting->lane_count)) { - current_link_setting->link_rate = - initial_link_settings.link_rate; + if (!reached_minimum_link_rate(cur->link_rate)) { + cur->link_rate = reduce_link_rate(cur->link_rate); + } else if (!reached_minimum_lane_count(cur->lane_count)) { + cur->link_rate = max->link_rate; if (training_result == LINK_TRAINING_CR_FAIL_LANE0) return false; else if (training_result == LINK_TRAINING_CR_FAIL_LANE1) - current_link_setting->lane_count = - LANE_COUNT_ONE; - else if (training_result == - LINK_TRAINING_CR_FAIL_LANE23) - current_link_setting->lane_count = - LANE_COUNT_TWO; + cur->lane_count = LANE_COUNT_ONE; + else if (training_result == LINK_TRAINING_CR_FAIL_LANE23) + cur->lane_count = LANE_COUNT_TWO; else - current_link_setting->lane_count = - reduce_lane_count( - current_link_setting->lane_count); + cur->lane_count = reduce_lane_count(cur->lane_count); } else { return false; } @@ -3642,17 +3631,17 @@ static bool decide_fallback_link_setting( } case LINK_TRAINING_EQ_FAIL_EQ: { - if (!reached_minimum_lane_count - (current_link_setting->lane_count)) { - current_link_setting->lane_count = - reduce_lane_count( - current_link_setting->lane_count); - } else if (!reached_minimum_link_rate - (current_link_setting->link_rate)) { - current_link_setting->link_rate = - reduce_link_rate( - current_link_setting->link_rate); - current_link_setting->lane_count = initial_link_settings.lane_count; + if (!reached_minimum_lane_count(cur->lane_count)) { + cur->lane_count = reduce_lane_count(cur->lane_count); + } else if (!reached_minimum_link_rate(cur->link_rate)) { + cur->link_rate = reduce_link_rate(cur->link_rate); + /* Reduce max link rate to avoid potential infinite loop. + * Needed so that any subsequent CR_FAIL fallback can't + * re-set the link rate higher than the link rate from + * the latest EQ_FAIL fallback. + */ + max->link_rate = cur->link_rate; + cur->lane_count = max->lane_count; } else { return false; } @@ -3660,12 +3649,15 @@ static bool decide_fallback_link_setting( } case LINK_TRAINING_EQ_FAIL_CR: { - if (!reached_minimum_link_rate - (current_link_setting->link_rate)) { - current_link_setting->link_rate = - reduce_link_rate( - current_link_setting->link_rate); - current_link_setting->lane_count = initial_link_settings.lane_count; + if (!reached_minimum_link_rate(cur->link_rate)) { + cur->link_rate = reduce_link_rate(cur->link_rate); + /* Reduce max link rate to avoid potential infinite loop. + * Needed so that any subsequent CR_FAIL fallback can't + * re-set the link rate higher than the link rate from + * the latest EQ_FAIL fallback. + */ + max->link_rate = cur->link_rate; + cur->lane_count = max->lane_count; } else { return false; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 6774dd8bb53e..3fe3fbac1e63 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1117,12 +1117,13 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) * on certain displays, such as the Sharp 4k. 36bpp is needed * to support SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 and * SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616 with actual > 10 bpc - * precision on at least DCN display engines. However, at least - * Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth, - * so use only 30 bpp on DCE_VERSION_11_0. Testing with DCE 11.2 and 8.3 - * did not show such problems, so this seems to be the exception. + * precision on DCN display engines, but apparently not for DCE, as + * far as testing on DCE-11.2 and DCE-8 showed. Various DCE parts have + * problems: Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth, + * neither do DCE-8 at 4k resolution, or DCE-11.2 (broken identify pixel + * passthrough). Therefore only use 36 bpp on DCN where it is actually needed. */ - if (plane_state->ctx->dce_version > DCE_VERSION_11_0) + if (plane_state->ctx->dce_version > DCE_VERSION_MAX) pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP; else pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 3960c74482be..817028d3c4a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.186" +#define DC_VER "3.2.187" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -416,6 +416,7 @@ struct dc_clocks { bool p_state_change_support; enum dcn_zstate_support_state zstate_support; bool dtbclk_en; + int ref_dtbclk_khz; enum dcn_pwr_state pwr_state; /* * Elements below are not compared for the purposes of @@ -719,6 +720,8 @@ struct dc_debug_options { bool apply_vendor_specific_lttpr_wa; bool extended_blank_optimization; union aux_wake_wa_options aux_wake_wa; + /* uses value at boot and disables switch */ + bool disable_dtb_ref_clk_switch; uint8_t psr_power_use_phy_fsm; enum dml_hostvm_override_opts dml_hostvm_override; }; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 7eff7811769d..5f2afa5b4814 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1766,29 +1766,9 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) break; } } - - /* - * TO-DO: So far the code logic below only addresses single eDP case. - * For dual eDP case, there are a few things that need to be - * implemented first: - * - * 1. Change the fastboot logic above, so eDP link[0 or 1]'s - * stream[0 or 1] will all be checked. - * - * 2. Change keep_edp_vdd_on to an array, and maintain keep_edp_vdd_on - * for each eDP. - * - * Once above 2 things are completed, we can then change the logic below - * correspondingly, so dual eDP case will be fully covered. - */ - - // We are trying to enable eDP, don't power down VDD if eDP stream is existing - if ((edp_stream_num == 1 && edp_streams[0] != NULL) || can_apply_edp_fast_boot) { + // We are trying to enable eDP, don't power down VDD + if (can_apply_edp_fast_boot) keep_edp_vdd_on = true; - DC_LOG_EVENT_LINK_TRAINING("Keep eDP Vdd on\n"); - } else { - DC_LOG_EVENT_LINK_TRAINING("No eDP stream enabled, turn eDP Vdd off\n"); - } } // Check seamless boot support diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c index 970b65efeac1..eaa7032f0f1a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c @@ -212,6 +212,9 @@ static void dpp2_cnv_setup ( break; } + /* Set default color space based on format if none is given. */ + color_space = input_color_space ? input_color_space : color_space; + if (is_2bit == 1 && alpha_2bit_lut != NULL) { REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT0, alpha_2bit_lut->lut0); REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT1, alpha_2bit_lut->lut1); diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c index 8b6505b7dca8..f50ab961bc17 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c @@ -153,6 +153,9 @@ static void dpp201_cnv_setup( break; } + /* Set default color space based on format if none is given. */ + color_space = input_color_space ? input_color_space : color_space; + if (is_2bit == 1 && alpha_2bit_lut != NULL) { REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT0, alpha_2bit_lut->lut0); REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT1, alpha_2bit_lut->lut1); diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c index ab3918c0a15b..0dcc07531643 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c @@ -294,6 +294,9 @@ static void dpp3_cnv_setup ( break; } + /* Set default color space based on format if none is given. */ + color_space = input_color_space ? input_color_space : color_space; + if (is_2bit == 1 && alpha_2bit_lut != NULL) { REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT0, alpha_2bit_lut->lut0); REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT1, alpha_2bit_lut->lut1); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c index 287a1066b547..bbc58d167c63 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c @@ -513,12 +513,10 @@ void dccg31_set_physymclk( /* Controls the generation of pixel valid for OTG in (OTG -> HPO case) */ static void dccg31_set_dtbclk_dto( struct dccg *dccg, - int dtbclk_inst, - int req_dtbclk_khz, - int num_odm_segments, - const struct dc_crtc_timing *timing) + struct dtbclk_dto_params *params) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + int req_dtbclk_khz = params->pixclk_khz; uint32_t dtbdto_div; /* Mode DTBDTO Rate DTBCLK_DTO<x>_DIV Register @@ -529,57 +527,53 @@ static void dccg31_set_dtbclk_dto( * DSC native 4:2:2 pixel rate/2 4 * Other modes pixel rate 8 */ - if (num_odm_segments == 4) { + if (params->num_odm_segments == 4) { dtbdto_div = 2; - req_dtbclk_khz = req_dtbclk_khz / 4; - } else if ((num_odm_segments == 2) || - (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) || - (timing->flags.DSC && timing->pixel_encoding == PIXEL_ENCODING_YCBCR422 - && !timing->dsc_cfg.ycbcr422_simple)) { + req_dtbclk_khz = params->pixclk_khz / 4; + } else if ((params->num_odm_segments == 2) || + (params->timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) || + (params->timing->flags.DSC && params->timing->pixel_encoding == PIXEL_ENCODING_YCBCR422 + && !params->timing->dsc_cfg.ycbcr422_simple)) { dtbdto_div = 4; - req_dtbclk_khz = req_dtbclk_khz / 2; + req_dtbclk_khz = params->pixclk_khz / 2; } else dtbdto_div = 8; - if (dccg->ref_dtbclk_khz && req_dtbclk_khz) { + if (params->ref_dtbclk_khz && req_dtbclk_khz) { uint32_t modulo, phase; // phase / modulo = dtbclk / dtbclk ref - modulo = dccg->ref_dtbclk_khz * 1000; - phase = div_u64((((unsigned long long)modulo * req_dtbclk_khz) + dccg->ref_dtbclk_khz - 1), - dccg->ref_dtbclk_khz); + modulo = params->ref_dtbclk_khz * 1000; + phase = div_u64((((unsigned long long)modulo * req_dtbclk_khz) + params->ref_dtbclk_khz - 1), + params->ref_dtbclk_khz); - REG_UPDATE(OTG_PIXEL_RATE_CNTL[dtbclk_inst], - DTBCLK_DTO_DIV[dtbclk_inst], dtbdto_div); + REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], + DTBCLK_DTO_DIV[params->otg_inst], dtbdto_div); - REG_WRITE(DTBCLK_DTO_MODULO[dtbclk_inst], modulo); - REG_WRITE(DTBCLK_DTO_PHASE[dtbclk_inst], phase); + REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], modulo); + REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], phase); - REG_UPDATE(OTG_PIXEL_RATE_CNTL[dtbclk_inst], - DTBCLK_DTO_ENABLE[dtbclk_inst], 1); + REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], + DTBCLK_DTO_ENABLE[params->otg_inst], 1); - REG_WAIT(OTG_PIXEL_RATE_CNTL[dtbclk_inst], - DTBCLKDTO_ENABLE_STATUS[dtbclk_inst], 1, + REG_WAIT(OTG_PIXEL_RATE_CNTL[params->otg_inst], + DTBCLKDTO_ENABLE_STATUS[params->otg_inst], 1, 1, 100); /* The recommended programming sequence to enable DTBCLK DTO to generate * valid pixel HPO DPSTREAM ENCODER, specifies that DTO source select should * be set only after DTO is enabled */ - REG_UPDATE(OTG_PIXEL_RATE_CNTL[dtbclk_inst], - PIPE_DTO_SRC_SEL[dtbclk_inst], 1); - - dccg->dtbclk_khz[dtbclk_inst] = req_dtbclk_khz; + REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], + PIPE_DTO_SRC_SEL[params->otg_inst], 1); } else { - REG_UPDATE_3(OTG_PIXEL_RATE_CNTL[dtbclk_inst], - DTBCLK_DTO_ENABLE[dtbclk_inst], 0, - PIPE_DTO_SRC_SEL[dtbclk_inst], 0, - DTBCLK_DTO_DIV[dtbclk_inst], dtbdto_div); + REG_UPDATE_3(OTG_PIXEL_RATE_CNTL[params->otg_inst], + DTBCLK_DTO_ENABLE[params->otg_inst], 0, + PIPE_DTO_SRC_SEL[params->otg_inst], 0, + DTBCLK_DTO_DIV[params->otg_inst], dtbdto_div); - REG_WRITE(DTBCLK_DTO_MODULO[dtbclk_inst], 0); - REG_WRITE(DTBCLK_DTO_PHASE[dtbclk_inst], 0); - - dccg->dtbclk_khz[dtbclk_inst] = 0; + REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0); + REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0); } } @@ -606,16 +600,12 @@ void dccg31_set_audio_dtbclk_dto( REG_UPDATE(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO_SEL, 4); // 04 - DCCG_AUDIO_DTO_SEL_AUDIO_DTO_DTBCLK - - dccg->audio_dtbclk_khz = req_audio_dtbclk_khz; } else { REG_WRITE(DCCG_AUDIO_DTBCLK_DTO_PHASE, 0); REG_WRITE(DCCG_AUDIO_DTBCLK_DTO_MODULO, 0); REG_UPDATE(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO_SEL, 3); // 03 - DCCG_AUDIO_DTO_SEL_NO_AUDIO_DTO - - dccg->audio_dtbclk_khz = 0; } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c index d94fd1010deb..8b12b4111c88 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c @@ -230,9 +230,7 @@ static void enc31_hw_init(struct link_encoder *enc) AUX_RX_PHASE_DETECT_LEN, [21,20] = 0x3 default is 3 AUX_RX_DETECTION_THRESHOLD [30:28] = 1 */ - AUX_REG_WRITE(AUX_DPHY_RX_CONTROL0, 0x103d1110); - - AUX_REG_WRITE(AUX_DPHY_TX_CONTROL, 0x21c7a); + // dmub will read AUX_DPHY_RX_CONTROL0/AUX_DPHY_TX_CONTROL from vbios table in dp_aux_init //AUX_DPHY_TX_REF_CONTROL'AUX_TX_REF_DIV HW default is 0x32; // Set AUX_TX_REF_DIV Divider to generate 2 MHz reference from refclk diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c index 789f7562cdc7..d2273674e872 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c @@ -1284,10 +1284,8 @@ static bool is_dtbclk_required(struct dc *dc, struct dc_state *context) for (i = 0; i < dc->res_pool->pipe_count; i++) { if (!context->res_ctx.pipe_ctx[i].stream) continue; -#if defined (CONFIG_DRM_AMD_DC_DP2_0) if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i])) return true; -#endif } return false; } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index 46ce5a0ee4ec..b5570aa8e39d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -237,6 +237,7 @@ struct clk_mgr_funcs { bool safe_to_lower); int (*get_dp_ref_clk_frequency)(struct clk_mgr *clk_mgr); + int (*get_dtb_ref_clk_frequency)(struct clk_mgr *clk_mgr); void (*set_low_power_state)(struct clk_mgr *clk_mgr); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index b2fa4de47734..c7021915bac8 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -60,8 +60,17 @@ struct dccg { const struct dccg_funcs *funcs; int pipe_dppclk_khz[MAX_PIPES]; int ref_dppclk; - int dtbclk_khz[MAX_PIPES]; - int audio_dtbclk_khz; + //int dtbclk_khz[MAX_PIPES];/* TODO needs to be removed */ + //int audio_dtbclk_khz;/* TODO needs to be removed */ + int ref_dtbclk_khz;/* TODO needs to be removed */ +}; + +struct dtbclk_dto_params { + const struct dc_crtc_timing *timing; + int otg_inst; + int pixclk_khz; + int req_audio_dtbclk_khz; + int num_odm_segments; int ref_dtbclk_khz; }; @@ -111,10 +120,7 @@ struct dccg_funcs { void (*set_dtbclk_dto)( struct dccg *dccg, - int dtbclk_inst, - int req_dtbclk_khz, - int num_odm_segments, - const struct dc_crtc_timing *timing); + struct dtbclk_dto_params *dto_params); void (*set_audio_dtbclk_dto)( struct dccg *dccg, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c index 87972dc8443d..ea6cf8bfce30 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c @@ -27,6 +27,7 @@ #include "core_types.h" #include "dccg.h" #include "dc_link_dp.h" +#include "clk_mgr.h" static enum phyd32clk_clock_source get_phyd32clk_src(struct dc_link *link) { @@ -106,14 +107,18 @@ static void setup_hpo_dp_stream_encoder(struct pipe_ctx *pipe_ctx) struct hpo_dp_link_encoder *link_enc = pipe_ctx->link_res.hpo_dp_link_enc; struct dccg *dccg = dc->res_pool->dccg; struct timing_generator *tg = pipe_ctx->stream_res.tg; - int odm_segment_count = get_odm_segment_count(pipe_ctx); + struct dtbclk_dto_params dto_params = {0}; enum phyd32clk_clock_source phyd32clk = get_phyd32clk_src(pipe_ctx->stream->link); + dto_params.otg_inst = tg->inst; + dto_params.pixclk_khz = pipe_ctx->stream->phy_pix_clk; + dto_params.num_odm_segments = get_odm_segment_count(pipe_ctx); + dto_params.timing = &pipe_ctx->stream->timing; + dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr); + dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst); dccg->funcs->enable_symclk32_se(dccg, stream_enc->inst, phyd32clk); - dccg->funcs->set_dtbclk_dto(dccg, tg->inst, pipe_ctx->stream->phy_pix_clk, - odm_segment_count, - &pipe_ctx->stream->timing); + dccg->funcs->set_dtbclk_dto(dccg, &dto_params); stream_enc->funcs->enable_stream(stream_enc); stream_enc->funcs->map_stream_to_link(stream_enc, stream_enc->inst, link_enc->inst); } @@ -124,9 +129,13 @@ static void reset_hpo_dp_stream_encoder(struct pipe_ctx *pipe_ctx) struct hpo_dp_stream_encoder *stream_enc = pipe_ctx->stream_res.hpo_dp_stream_enc; struct dccg *dccg = dc->res_pool->dccg; struct timing_generator *tg = pipe_ctx->stream_res.tg; + struct dtbclk_dto_params dto_params = {0}; + + dto_params.otg_inst = tg->inst; + dto_params.timing = &pipe_ctx->stream->timing; stream_enc->funcs->disable(stream_enc); - dccg->funcs->set_dtbclk_dto(dccg, tg->inst, 0, 0, &pipe_ctx->stream->timing); + dccg->funcs->set_dtbclk_dto(dccg, &dto_params); dccg->funcs->disable_symclk32_se(dccg, stream_enc->inst); dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst); } diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c index 7c9330a61ac1..c7bd7e216710 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c @@ -84,7 +84,7 @@ void dmub_dcn31_reset(struct dmub_srv *dmub) { union dmub_gpint_data_register cmd; const uint32_t timeout = 100; - uint32_t in_reset, scratch, i; + uint32_t in_reset, scratch, i, pwait_mode; REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &in_reset); @@ -115,6 +115,13 @@ void dmub_dcn31_reset(struct dmub_srv *dmub) udelay(1); } + for (i = 0; i < timeout; ++i) { + REG_GET(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS, &pwait_mode); + if (pwait_mode & (1 << 0)) + break; + + udelay(1); + } /* Force reset in case we timed out, DMCUB is likely hung. */ } @@ -125,6 +132,8 @@ void dmub_dcn31_reset(struct dmub_srv *dmub) REG_WRITE(DMCUB_INBOX1_WPTR, 0); REG_WRITE(DMCUB_OUTBOX1_RPTR, 0); REG_WRITE(DMCUB_OUTBOX1_WPTR, 0); + REG_WRITE(DMCUB_OUTBOX0_RPTR, 0); + REG_WRITE(DMCUB_OUTBOX0_WPTR, 0); REG_WRITE(DMCUB_SCRATCH0, 0); /* Clear the GPINT command manually so we don't send anything during boot. */ diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h index 59ddc81b5a0e..f6db6f89d45d 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h @@ -151,7 +151,8 @@ struct dmub_srv; DMUB_SF(DCN_VM_FB_OFFSET, FB_OFFSET) \ DMUB_SF(DMCUB_INBOX0_WPTR, DMCUB_INBOX0_WPTR) \ DMUB_SF(DMCUB_INTERRUPT_ENABLE, DMCUB_GPINT_IH_INT_EN) \ - DMUB_SF(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK) + DMUB_SF(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK) \ + DMUB_SF(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS) struct dmub_srv_dcn31_reg_offset { #define DMUB_SR(reg) uint32_t reg; diff --git a/drivers/gpu/drm/amd/display/include/ddc_service_types.h b/drivers/gpu/drm/amd/display/include/ddc_service_types.h index 73b9e0a87e54..20a3d4e23f66 100644 --- a/drivers/gpu/drm/amd/display/include/ddc_service_types.h +++ b/drivers/gpu/drm/amd/display/include/ddc_service_types.h @@ -127,6 +127,8 @@ struct av_sync_data { static const uint8_t DP_SINK_DEVICE_STR_ID_1[] = {7, 1, 8, 7, 3, 0}; static const uint8_t DP_SINK_DEVICE_STR_ID_2[] = {7, 1, 8, 7, 5, 0}; +static const u8 DP_SINK_BRANCH_DEV_NAME_7580[] = "7580\x80u"; + /*MST Dock*/ static const uint8_t SYNAPTICS_DEVICE_ID[] = "SYNA"; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0_7_pptable.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0_7_pptable.h index 247c6e9632ba..1cb399dbc7cc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0_7_pptable.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0_7_pptable.h @@ -22,6 +22,7 @@ #ifndef SMU_11_0_7_PPTABLE_H #define SMU_11_0_7_PPTABLE_H +#pragma pack(push, 1) #define SMU_11_0_7_TABLE_FORMAT_REVISION 15 @@ -139,7 +140,7 @@ struct smu_11_0_7_overdrive_table uint32_t max[SMU_11_0_7_MAX_ODSETTING]; //default maximum settings uint32_t min[SMU_11_0_7_MAX_ODSETTING]; //default minimum settings int16_t pm_setting[SMU_11_0_7_MAX_PMSETTING]; //Optimized power mode feature settings -} __attribute__((packed)); +}; enum SMU_11_0_7_PPCLOCK_ID { SMU_11_0_7_PPCLOCK_GFXCLK = 0, @@ -166,7 +167,7 @@ struct smu_11_0_7_power_saving_clock_table uint32_t count; //power_saving_clock_count = SMU_11_0_7_PPCLOCK_COUNT uint32_t max[SMU_11_0_7_MAX_PPCLOCK]; //PowerSavingClock Mode Clock Maximum array In MHz uint32_t min[SMU_11_0_7_MAX_PPCLOCK]; //PowerSavingClock Mode Clock Minimum array In MHz -} __attribute__((packed)); +}; struct smu_11_0_7_powerplay_table { @@ -191,6 +192,8 @@ struct smu_11_0_7_powerplay_table struct smu_11_0_7_overdrive_table overdrive_table; PPTable_t smc_pptable; //PPTable_t in smu11_driver_if.h -} __attribute__((packed)); +}; + +#pragma pack(pop) #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0_pptable.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0_pptable.h index 7a63cf8e85ed..0116e3d04fad 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0_pptable.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0_pptable.h @@ -22,6 +22,7 @@ #ifndef SMU_11_0_PPTABLE_H #define SMU_11_0_PPTABLE_H +#pragma pack(push, 1) #define SMU_11_0_TABLE_FORMAT_REVISION 12 @@ -109,7 +110,7 @@ struct smu_11_0_overdrive_table uint8_t cap[SMU_11_0_MAX_ODFEATURE]; //OD feature support flags uint32_t max[SMU_11_0_MAX_ODSETTING]; //default maximum settings uint32_t min[SMU_11_0_MAX_ODSETTING]; //default minimum settings -} __attribute__((packed)); +}; enum SMU_11_0_PPCLOCK_ID { SMU_11_0_PPCLOCK_GFXCLK = 0, @@ -133,7 +134,7 @@ struct smu_11_0_power_saving_clock_table uint32_t count; //power_saving_clock_count = SMU_11_0_PPCLOCK_COUNT uint32_t max[SMU_11_0_MAX_PPCLOCK]; //PowerSavingClock Mode Clock Maximum array In MHz uint32_t min[SMU_11_0_MAX_PPCLOCK]; //PowerSavingClock Mode Clock Minimum array In MHz -} __attribute__((packed)); +}; struct smu_11_0_powerplay_table { @@ -162,6 +163,8 @@ struct smu_11_0_powerplay_table #ifndef SMU_11_0_PARTIAL_PPTABLE PPTable_t smc_pptable; //PPTable_t in smu11_driver_if.h #endif -} __attribute__((packed)); +}; + +#pragma pack(pop) #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_7_pptable.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_7_pptable.h index 3f29f4327378..478862ded0bd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_7_pptable.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_7_pptable.h @@ -22,6 +22,8 @@ #ifndef SMU_13_0_7_PPTABLE_H #define SMU_13_0_7_PPTABLE_H +#pragma pack(push, 1) + #define SMU_13_0_7_TABLE_FORMAT_REVISION 15 //// POWERPLAYTABLE::ulPlatformCaps @@ -194,7 +196,8 @@ struct smu_13_0_7_powerplay_table struct smu_13_0_7_overdrive_table overdrive_table; uint8_t padding1; PPTable_t smc_pptable; //PPTable_t in driver_if.h -} __attribute__((packed)); +}; +#pragma pack(pop) #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_pptable.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_pptable.h index 1f311396b706..043307485528 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_pptable.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_pptable.h @@ -22,6 +22,8 @@ #ifndef SMU_13_0_PPTABLE_H #define SMU_13_0_PPTABLE_H +#pragma pack(push, 1) + #define SMU_13_0_TABLE_FORMAT_REVISION 1 //// POWERPLAYTABLE::ulPlatformCaps @@ -109,7 +111,7 @@ struct smu_13_0_overdrive_table { uint8_t cap[SMU_13_0_MAX_ODFEATURE]; //OD feature support flags uint32_t max[SMU_13_0_MAX_ODSETTING]; //default maximum settings uint32_t min[SMU_13_0_MAX_ODSETTING]; //default minimum settings -} __attribute__((packed)); +}; enum SMU_13_0_PPCLOCK_ID { SMU_13_0_PPCLOCK_GFXCLK = 0, @@ -132,7 +134,7 @@ struct smu_13_0_power_saving_clock_table { uint32_t count; //power_saving_clock_count = SMU_11_0_PPCLOCK_COUNT uint32_t max[SMU_13_0_MAX_PPCLOCK]; //PowerSavingClock Mode Clock Maximum array In MHz uint32_t min[SMU_13_0_MAX_PPCLOCK]; //PowerSavingClock Mode Clock Minimum array In MHz -} __attribute__((packed)); +}; struct smu_13_0_powerplay_table { struct atom_common_table_header header; @@ -160,6 +162,8 @@ struct smu_13_0_powerplay_table { #ifndef SMU_13_0_PARTIAL_PPTABLE PPTable_t smc_pptable; //PPTable_t in driver_if.h #endif -} __attribute__((packed)); +}; + +#pragma pack(pop) #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 5f8809f6990d..2fbd2926a531 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1228,6 +1228,8 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, uint32_t crystal_clock_freq = 2500; uint32_t tach_period; + if (speed == 0) + return -EINVAL; /* * To prevent from possible overheat, some ASICs may have requirement * for minimum fan speed: diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c index 4551bc8a3ecf..f573d582407e 100644 --- a/drivers/gpu/drm/ast/ast_dp.c +++ b/drivers/gpu/drm/ast/ast_dp.c @@ -160,13 +160,12 @@ void ast_dp_launch(struct drm_device *dev, u8 bPower) } if (bDPExecute) - ast->tx_chip_type = AST_TX_ASTDP; + ast->tx_chip_types |= BIT(AST_TX_ASTDP); ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xE5, (u8) ~ASTDP_HOST_EDID_READ_DONE_MASK, ASTDP_HOST_EDID_READ_DONE); - } else - ast->tx_chip_type = AST_TX_NONE; + } } diff --git a/drivers/gpu/drm/ast/ast_dp501.c b/drivers/gpu/drm/ast/ast_dp501.c index 204c926a18ea..4f75a9efb610 100644 --- a/drivers/gpu/drm/ast/ast_dp501.c +++ b/drivers/gpu/drm/ast/ast_dp501.c @@ -450,7 +450,7 @@ void ast_init_3rdtx(struct drm_device *dev) ast_init_dvo(dev); break; default: - if (ast->tx_chip_type == AST_TX_SIL164) + if (ast->tx_chip_types & BIT(AST_TX_SIL164)) ast_init_dvo(dev); else ast_init_analog(dev); diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index afebe35f205e..a34db4380f68 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -73,6 +73,11 @@ enum ast_tx_chip { AST_TX_ASTDP, }; +#define AST_TX_NONE_BIT BIT(AST_TX_NONE) +#define AST_TX_SIL164_BIT BIT(AST_TX_SIL164) +#define AST_TX_DP501_BIT BIT(AST_TX_DP501) +#define AST_TX_ASTDP_BIT BIT(AST_TX_ASTDP) + #define AST_DRAM_512Mx16 0 #define AST_DRAM_1Gx16 1 #define AST_DRAM_512Mx32 2 @@ -173,7 +178,7 @@ struct ast_private { struct drm_plane primary_plane; struct ast_cursor_plane cursor_plane; struct drm_crtc crtc; - union { + struct { struct { struct drm_encoder encoder; struct ast_vga_connector vga_connector; @@ -199,7 +204,7 @@ struct ast_private { ast_use_defaults } config_mode; - enum ast_tx_chip tx_chip_type; + unsigned long tx_chip_types; /* bitfield of enum ast_chip_type */ u8 *dp501_fw_addr; const struct firmware *dp501_fw; /* dp501 fw */ }; diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index d770d5a23c1a..067453266897 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -216,7 +216,7 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post) } /* Check 3rd Tx option (digital output afaik) */ - ast->tx_chip_type = AST_TX_NONE; + ast->tx_chip_types |= AST_TX_NONE_BIT; /* * VGACRA3 Enhanced Color Mode Register, check if DVO is already @@ -229,7 +229,7 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post) if (!*need_post) { jreg = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa3, 0xff); if (jreg & 0x80) - ast->tx_chip_type = AST_TX_SIL164; + ast->tx_chip_types = AST_TX_SIL164_BIT; } if ((ast->chip == AST2300) || (ast->chip == AST2400) || (ast->chip == AST2500)) { @@ -241,7 +241,7 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post) jreg = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd1, 0xff); switch (jreg) { case 0x04: - ast->tx_chip_type = AST_TX_SIL164; + ast->tx_chip_types = AST_TX_SIL164_BIT; break; case 0x08: ast->dp501_fw_addr = drmm_kzalloc(dev, 32*1024, GFP_KERNEL); @@ -254,22 +254,19 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post) } fallthrough; case 0x0c: - ast->tx_chip_type = AST_TX_DP501; + ast->tx_chip_types = AST_TX_DP501_BIT; } } else if (ast->chip == AST2600) ast_dp_launch(&ast->base, 0); /* Print stuff for diagnostic purposes */ - switch(ast->tx_chip_type) { - case AST_TX_SIL164: + if (ast->tx_chip_types & AST_TX_NONE_BIT) + drm_info(dev, "Using analog VGA\n"); + if (ast->tx_chip_types & AST_TX_SIL164_BIT) drm_info(dev, "Using Sil164 TMDS transmitter\n"); - break; - case AST_TX_DP501: + if (ast->tx_chip_types & AST_TX_DP501_BIT) drm_info(dev, "Using DP501 DisplayPort transmitter\n"); - break; - default: - drm_info(dev, "Analog VGA only\n"); - } + return 0; } diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 323af2746aa9..db2010a55674 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -997,10 +997,10 @@ static void ast_crtc_dpms(struct drm_crtc *crtc, int mode) case DRM_MODE_DPMS_ON: ast_set_index_reg_mask(ast, AST_IO_SEQ_PORT, 0x01, 0xdf, 0); ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb6, 0xfc, 0); - if (ast->tx_chip_type == AST_TX_DP501) + if (ast->tx_chip_types & AST_TX_DP501_BIT) ast_set_dp501_video_output(crtc->dev, 1); - if (ast->tx_chip_type == AST_TX_ASTDP) { + if (ast->tx_chip_types & AST_TX_ASTDP_BIT) { ast_dp_power_on_off(crtc->dev, AST_DP_POWER_ON); ast_wait_for_vretrace(ast); ast_dp_set_on_off(crtc->dev, 1); @@ -1012,17 +1012,17 @@ static void ast_crtc_dpms(struct drm_crtc *crtc, int mode) case DRM_MODE_DPMS_SUSPEND: case DRM_MODE_DPMS_OFF: ch = mode; - if (ast->tx_chip_type == AST_TX_DP501) + if (ast->tx_chip_types & AST_TX_DP501_BIT) ast_set_dp501_video_output(crtc->dev, 0); - break; - if (ast->tx_chip_type == AST_TX_ASTDP) { + if (ast->tx_chip_types & AST_TX_ASTDP_BIT) { ast_dp_set_on_off(crtc->dev, 0); ast_dp_power_on_off(crtc->dev, AST_DP_POWER_OFF); } ast_set_index_reg_mask(ast, AST_IO_SEQ_PORT, 0x01, 0xdf, 0x20); ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb6, 0xfc, ch); + break; } } @@ -1155,7 +1155,7 @@ ast_crtc_helper_atomic_flush(struct drm_crtc *crtc, ast_crtc_load_lut(ast, crtc); //Set Aspeed Display-Port - if (ast->tx_chip_type == AST_TX_ASTDP) + if (ast->tx_chip_types & AST_TX_ASTDP_BIT) ast_dp_set_mode(crtc, vbios_mode_info); mutex_unlock(&ast->ioregs_lock); @@ -1739,22 +1739,26 @@ int ast_mode_config_init(struct ast_private *ast) ast_crtc_init(dev); - switch (ast->tx_chip_type) { - case AST_TX_NONE: + if (ast->tx_chip_types & AST_TX_NONE_BIT) { ret = ast_vga_output_init(ast); - break; - case AST_TX_SIL164: + if (ret) + return ret; + } + if (ast->tx_chip_types & AST_TX_SIL164_BIT) { ret = ast_sil164_output_init(ast); - break; - case AST_TX_DP501: + if (ret) + return ret; + } + if (ast->tx_chip_types & AST_TX_DP501_BIT) { ret = ast_dp501_output_init(ast); - break; - case AST_TX_ASTDP: + if (ret) + return ret; + } + if (ast->tx_chip_types & AST_TX_ASTDP_BIT) { ret = ast_astdp_output_init(ast); - break; + if (ret) + return ret; } - if (ret) - return ret; drm_mode_config_reset(dev); diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c index 0aa9cf0fb5c3..82fd3c8adee1 100644 --- a/drivers/gpu/drm/ast/ast_post.c +++ b/drivers/gpu/drm/ast/ast_post.c @@ -391,7 +391,7 @@ void ast_post_gpu(struct drm_device *dev) ast_init_3rdtx(dev); } else { - if (ast->tx_chip_type != AST_TX_NONE) + if (ast->tx_chip_types & AST_TX_SIL164_BIT) ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa3, 0xcf, 0x80); /* Enable DVO */ } } diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index b97f6e8f0f6b..01c8b80e34ec 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -1267,6 +1267,25 @@ static int analogix_dp_bridge_attach(struct drm_bridge *bridge, } static +struct drm_crtc *analogix_dp_get_old_crtc(struct analogix_dp_device *dp, + struct drm_atomic_state *state) +{ + struct drm_encoder *encoder = dp->encoder; + struct drm_connector *connector; + struct drm_connector_state *conn_state; + + connector = drm_atomic_get_old_connector_for_encoder(state, encoder); + if (!connector) + return NULL; + + conn_state = drm_atomic_get_old_connector_state(state, connector); + if (!conn_state) + return NULL; + + return conn_state->crtc; +} + +static struct drm_crtc *analogix_dp_get_new_crtc(struct analogix_dp_device *dp, struct drm_atomic_state *state) { @@ -1446,14 +1465,16 @@ analogix_dp_bridge_atomic_disable(struct drm_bridge *bridge, { struct drm_atomic_state *old_state = old_bridge_state->base.state; struct analogix_dp_device *dp = bridge->driver_private; - struct drm_crtc *crtc; + struct drm_crtc *old_crtc, *new_crtc; + struct drm_crtc_state *old_crtc_state = NULL; struct drm_crtc_state *new_crtc_state = NULL; + int ret; - crtc = analogix_dp_get_new_crtc(dp, old_state); - if (!crtc) + new_crtc = analogix_dp_get_new_crtc(dp, old_state); + if (!new_crtc) goto out; - new_crtc_state = drm_atomic_get_new_crtc_state(old_state, crtc); + new_crtc_state = drm_atomic_get_new_crtc_state(old_state, new_crtc); if (!new_crtc_state) goto out; @@ -1462,6 +1483,19 @@ analogix_dp_bridge_atomic_disable(struct drm_bridge *bridge, return; out: + old_crtc = analogix_dp_get_old_crtc(dp, old_state); + if (old_crtc) { + old_crtc_state = drm_atomic_get_old_crtc_state(old_state, + old_crtc); + + /* When moving from PSR to fully disabled, exit PSR first. */ + if (old_crtc_state && old_crtc_state->self_refresh_active) { + ret = analogix_dp_disable_psr(dp); + if (ret) + DRM_ERROR("Failed to disable psr (%d)\n", ret); + } + } + analogix_dp_bridge_disable(bridge); } diff --git a/drivers/gpu/drm/bridge/fsl-ldb.c b/drivers/gpu/drm/bridge/fsl-ldb.c index b2675c769a55..4b503c544256 100644 --- a/drivers/gpu/drm/bridge/fsl-ldb.c +++ b/drivers/gpu/drm/bridge/fsl-ldb.c @@ -74,22 +74,6 @@ static int fsl_ldb_attach(struct drm_bridge *bridge, bridge, flags); } -static int fsl_ldb_atomic_check(struct drm_bridge *bridge, - struct drm_bridge_state *bridge_state, - struct drm_crtc_state *crtc_state, - struct drm_connector_state *conn_state) -{ - /* Invert DE signal polarity. */ - bridge_state->input_bus_cfg.flags &= ~(DRM_BUS_FLAG_DE_LOW | - DRM_BUS_FLAG_DE_HIGH); - if (bridge_state->output_bus_cfg.flags & DRM_BUS_FLAG_DE_LOW) - bridge_state->input_bus_cfg.flags |= DRM_BUS_FLAG_DE_HIGH; - else if (bridge_state->output_bus_cfg.flags & DRM_BUS_FLAG_DE_HIGH) - bridge_state->input_bus_cfg.flags |= DRM_BUS_FLAG_DE_LOW; - - return 0; -} - static void fsl_ldb_atomic_enable(struct drm_bridge *bridge, struct drm_bridge_state *old_bridge_state) { @@ -153,7 +137,7 @@ static void fsl_ldb_atomic_enable(struct drm_bridge *bridge, reg = LDB_CTRL_CH0_ENABLE; if (fsl_ldb->lvds_dual_link) - reg |= LDB_CTRL_CH1_ENABLE; + reg |= LDB_CTRL_CH1_ENABLE | LDB_CTRL_SPLIT_MODE; if (lvds_format_24bpp) { reg |= LDB_CTRL_CH0_DATA_WIDTH; @@ -233,7 +217,7 @@ fsl_ldb_mode_valid(struct drm_bridge *bridge, { struct fsl_ldb *fsl_ldb = to_fsl_ldb(bridge); - if (mode->clock > (fsl_ldb->lvds_dual_link ? 80000 : 160000)) + if (mode->clock > (fsl_ldb->lvds_dual_link ? 160000 : 80000)) return MODE_CLOCK_HIGH; return MODE_OK; @@ -241,7 +225,6 @@ fsl_ldb_mode_valid(struct drm_bridge *bridge, static const struct drm_bridge_funcs funcs = { .attach = fsl_ldb_attach, - .atomic_check = fsl_ldb_atomic_check, .atomic_enable = fsl_ldb_atomic_enable, .atomic_disable = fsl_ldb_atomic_disable, .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c index 2831f0813c3a..ac66f408b40c 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c @@ -577,7 +577,7 @@ static int sn65dsi83_parse_dt(struct sn65dsi83 *ctx, enum sn65dsi83_model model) ctx->host_node = of_graph_get_remote_port_parent(endpoint); of_node_put(endpoint); - if (ctx->dsi_lanes < 0 || ctx->dsi_lanes > 4) { + if (ctx->dsi_lanes <= 0 || ctx->dsi_lanes > 4) { ret = -EINVAL; goto err_put_node; } diff --git a/drivers/gpu/drm/drm_aperture.c b/drivers/gpu/drm/drm_aperture.c index 74bd4a76b253..059fd71424f6 100644 --- a/drivers/gpu/drm/drm_aperture.c +++ b/drivers/gpu/drm/drm_aperture.c @@ -329,7 +329,20 @@ int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev, const struct drm_driver *req_driver) { resource_size_t base, size; - int bar, ret = 0; + int bar, ret; + + /* + * WARNING: Apparently we must kick fbdev drivers before vgacon, + * otherwise the vga fbdev driver falls over. + */ +#if IS_REACHABLE(CONFIG_FB) + ret = remove_conflicting_pci_framebuffers(pdev, req_driver->name); + if (ret) + return ret; +#endif + ret = vga_remove_vgacon(pdev); + if (ret) + return ret; for (bar = 0; bar < PCI_STD_NUM_BARS; ++bar) { if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) @@ -339,15 +352,6 @@ int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev, drm_aperture_detach_drivers(base, size); } - /* - * WARNING: Apparently we must kick fbdev drivers before vgacon, - * otherwise the vga fbdev driver falls over. - */ -#if IS_REACHABLE(CONFIG_FB) - ret = remove_conflicting_pci_framebuffers(pdev, req_driver->name); -#endif - if (ret == 0) - ret = vga_remove_vgacon(pdev); - return ret; + return 0; } EXPORT_SYMBOL(drm_aperture_remove_conflicting_pci_framebuffers); diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 9603193d2fa1..987e4b212e9f 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1011,9 +1011,19 @@ crtc_needs_disable(struct drm_crtc_state *old_state, return drm_atomic_crtc_effectively_active(old_state); /* - * We need to run through the crtc_funcs->disable() function if the CRTC - * is currently on, if it's transitioning to self refresh mode, or if - * it's in self refresh mode and needs to be fully disabled. + * We need to disable bridge(s) and CRTC if we're transitioning out of + * self-refresh and changing CRTCs at the same time, because the + * bridge tracks self-refresh status via CRTC state. + */ + if (old_state->self_refresh_active && + old_state->crtc != new_state->crtc) + return true; + + /* + * We also need to run through the crtc_funcs->disable() function if + * the CRTC is currently on, if it's transitioning to self refresh + * mode, or if it's in self refresh mode and needs to be fully + * disabled. */ return old_state->active || (old_state->self_refresh_active && !new_state->active) || diff --git a/drivers/gpu/drm/drm_gem_ttm_helper.c b/drivers/gpu/drm/drm_gem_ttm_helper.c index d5962a34c01d..e5fc875990c4 100644 --- a/drivers/gpu/drm/drm_gem_ttm_helper.c +++ b/drivers/gpu/drm/drm_gem_ttm_helper.c @@ -64,8 +64,13 @@ int drm_gem_ttm_vmap(struct drm_gem_object *gem, struct iosys_map *map) { struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem); + int ret; + + dma_resv_lock(gem->resv, NULL); + ret = ttm_bo_vmap(bo, map); + dma_resv_unlock(gem->resv); - return ttm_bo_vmap(bo, map); + return ret; } EXPORT_SYMBOL(drm_gem_ttm_vmap); @@ -82,7 +87,9 @@ void drm_gem_ttm_vunmap(struct drm_gem_object *gem, { struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem); + dma_resv_lock(gem->resv, NULL); ttm_bo_vunmap(bo, map); + dma_resv_unlock(gem->resv); } EXPORT_SYMBOL(drm_gem_ttm_vunmap); diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 4e853acfd1e8..d4e0f2e85548 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -152,6 +152,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "AYA NEO 2021"), }, .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* AYA NEO NEXT */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AYANEO"), + DMI_MATCH(DMI_BOARD_NAME, "NEXT"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* Chuwi HiBook (CWI514) */ .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Hampoo"), @@ -280,6 +286,21 @@ static const struct dmi_system_id orientation_data[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9"), }, .driver_data = (void *)&lcd1200x1920_rightside_up, + }, { /* Lenovo Yoga Tablet 2 830F / 830L */ + .matches = { + /* + * Note this also matches the Lenovo Yoga Tablet 2 1050F/L + * since that uses the same mainboard. The resolution match + * will limit this to only matching on the 830F/L. Neither has + * any external video outputs so those are not a concern. + */ + DMI_MATCH(DMI_SYS_VENDOR, "Intel Corp."), + DMI_MATCH(DMI_PRODUCT_NAME, "VALLEYVIEW C0 PLATFORM"), + DMI_MATCH(DMI_BOARD_NAME, "BYT-T FFD8"), + /* Partial match on beginning of BIOS version */ + DMI_MATCH(DMI_BIOS_VERSION, "BLADE_21"), + }, + .driver_data = (void *)&lcd1200x1920_rightside_up, }, { /* OneGX1 Pro */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SYSTEM_MANUFACTURER"), diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 424ea23eec32..16c539657f73 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -177,15 +177,15 @@ static struct exynos_drm_driver_info exynos_drm_drivers[] = { DRV_PTR(mixer_driver, CONFIG_DRM_EXYNOS_MIXER), DRM_COMPONENT_DRIVER }, { - DRV_PTR(mic_driver, CONFIG_DRM_EXYNOS_MIC), - DRM_COMPONENT_DRIVER - }, { DRV_PTR(dp_driver, CONFIG_DRM_EXYNOS_DP), DRM_COMPONENT_DRIVER }, { DRV_PTR(dsi_driver, CONFIG_DRM_EXYNOS_DSI), DRM_COMPONENT_DRIVER }, { + DRV_PTR(mic_driver, CONFIG_DRM_EXYNOS_MIC), + DRM_COMPONENT_DRIVER + }, { DRV_PTR(hdmi_driver, CONFIG_DRM_EXYNOS_HDMI), DRM_COMPONENT_DRIVER }, { diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c index 9e06f8e2a863..09ce28ee08d9 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_mic.c +++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c @@ -26,6 +26,7 @@ #include <drm/drm_print.h> #include "exynos_drm_drv.h" +#include "exynos_drm_crtc.h" /* Sysreg registers for MIC */ #define DSD_CFG_MUX 0x1004 @@ -100,9 +101,7 @@ struct exynos_mic { bool i80_mode; struct videomode vm; - struct drm_encoder *encoder; struct drm_bridge bridge; - struct drm_bridge *next_bridge; bool enabled; }; @@ -229,8 +228,6 @@ static void mic_set_reg_on(struct exynos_mic *mic, bool enable) writel(reg, mic->reg + MIC_OP); } -static void mic_disable(struct drm_bridge *bridge) { } - static void mic_post_disable(struct drm_bridge *bridge) { struct exynos_mic *mic = bridge->driver_private; @@ -297,34 +294,30 @@ unlock: mutex_unlock(&mic_mutex); } -static void mic_enable(struct drm_bridge *bridge) { } - -static int mic_attach(struct drm_bridge *bridge, - enum drm_bridge_attach_flags flags) -{ - struct exynos_mic *mic = bridge->driver_private; - - return drm_bridge_attach(bridge->encoder, mic->next_bridge, - &mic->bridge, flags); -} - static const struct drm_bridge_funcs mic_bridge_funcs = { - .disable = mic_disable, .post_disable = mic_post_disable, .mode_set = mic_mode_set, .pre_enable = mic_pre_enable, - .enable = mic_enable, - .attach = mic_attach, }; static int exynos_mic_bind(struct device *dev, struct device *master, void *data) { struct exynos_mic *mic = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_crtc *crtc = exynos_drm_crtc_get_by_type(drm_dev, + EXYNOS_DISPLAY_TYPE_LCD); + struct drm_encoder *e, *encoder = NULL; + + drm_for_each_encoder(e, drm_dev) + if (e->possible_crtcs == drm_crtc_mask(&crtc->base)) + encoder = e; + if (!encoder) + return -ENODEV; mic->bridge.driver_private = mic; - return 0; + return drm_bridge_attach(encoder, &mic->bridge, NULL, 0); } static void exynos_mic_unbind(struct device *dev, struct device *master, @@ -388,7 +381,6 @@ static int exynos_mic_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct exynos_mic *mic; - struct device_node *remote; struct resource res; int ret, i; @@ -432,16 +424,6 @@ static int exynos_mic_probe(struct platform_device *pdev) } } - remote = of_graph_get_remote_node(dev->of_node, 1, 0); - mic->next_bridge = of_drm_find_bridge(remote); - if (IS_ERR(mic->next_bridge)) { - DRM_DEV_ERROR(dev, "mic: Failed to find next bridge\n"); - ret = PTR_ERR(mic->next_bridge); - goto err; - } - - of_node_put(remote); - platform_set_drvdata(pdev, mic); mic->bridge.funcs = &mic_bridge_funcs; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index e4a79c11fd25..ff67899522cf 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -388,13 +388,23 @@ static int dg2_max_source_rate(struct intel_dp *intel_dp) return intel_dp_is_edp(intel_dp) ? 810000 : 1350000; } +static bool is_low_voltage_sku(struct drm_i915_private *i915, enum phy phy) +{ + u32 voltage; + + voltage = intel_de_read(i915, ICL_PORT_COMP_DW3(phy)) & VOLTAGE_INFO_MASK; + + return voltage == VOLTAGE_INFO_0_85V; +} + static int icl_max_source_rate(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); enum phy phy = intel_port_to_phy(dev_priv, dig_port->base.port); - if (intel_phy_is_combo(dev_priv, phy) && !intel_dp_is_edp(intel_dp)) + if (intel_phy_is_combo(dev_priv, phy) && + (is_low_voltage_sku(dev_priv, phy) || !intel_dp_is_edp(intel_dp))) return 540000; return 810000; @@ -402,7 +412,23 @@ static int icl_max_source_rate(struct intel_dp *intel_dp) static int ehl_max_source_rate(struct intel_dp *intel_dp) { - if (intel_dp_is_edp(intel_dp)) + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); + enum phy phy = intel_port_to_phy(dev_priv, dig_port->base.port); + + if (intel_dp_is_edp(intel_dp) || is_low_voltage_sku(dev_priv, phy)) + return 540000; + + return 810000; +} + +static int dg1_max_source_rate(struct intel_dp *intel_dp) +{ + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + enum phy phy = intel_port_to_phy(i915, dig_port->base.port); + + if (intel_phy_is_combo(i915, phy) && is_low_voltage_sku(i915, phy)) return 540000; return 810000; @@ -445,7 +471,7 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) max_rate = dg2_max_source_rate(intel_dp); else if (IS_ALDERLAKE_P(dev_priv) || IS_ALDERLAKE_S(dev_priv) || IS_DG1(dev_priv) || IS_ROCKETLAKE(dev_priv)) - max_rate = 810000; + max_rate = dg1_max_source_rate(intel_dp); else if (IS_JSL_EHL(dev_priv)) max_rate = ehl_max_source_rate(intel_dp); else diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 061b277e5ce7..14d2a64193b2 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -839,6 +839,7 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs, DRM_MODE_CONNECTOR_DisplayPort); if (ret) { + drm_dp_mst_put_port_malloc(port); intel_connector_free(intel_connector); return NULL; } diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 22f55574a35c..88c2f38aa870 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2396,7 +2396,7 @@ static void icl_wrpll_params_populate(struct skl_wrpll_params *params, } /* - * Display WA #22010492432: ehl, tgl, adl-p + * Display WA #22010492432: ehl, tgl, adl-s, adl-p * Program half of the nominal DCO divider fraction value. */ static bool @@ -2404,7 +2404,7 @@ ehl_combo_pll_div_frac_wa_needed(struct drm_i915_private *i915) { return ((IS_PLATFORM(i915, INTEL_ELKHARTLAKE) && IS_JSL_EHL_DISPLAY_STEP(i915, STEP_B0, STEP_FOREVER)) || - IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) && + IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) && i915->dpll.ref_clks.nssc == 38400; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index ab4c5ab28e4d..321af109d484 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -933,8 +933,9 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, case I915_CONTEXT_PARAM_PERSISTENCE: if (args->size) ret = -EINVAL; - ret = proto_context_set_persistence(fpriv->dev_priv, pc, - args->value); + else + ret = proto_context_set_persistence(fpriv->dev_priv, pc, + args->value); break; case I915_CONTEXT_PARAM_PROTECTED_CONTENT: diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 3e5d6057b3ef..1674b0c5802b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -35,12 +35,12 @@ bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj) if (obj->cache_dirty) return false; - if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) - return true; - if (IS_DGFX(i915)) return false; + if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) + return true; + /* Currently in use by HW (display engine)? Keep flushed. */ return i915_gem_object_is_framebuffer(obj); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index c326bd2b444f..30fe847c6664 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -999,7 +999,8 @@ static int eb_validate_vmas(struct i915_execbuffer *eb) } } - err = dma_resv_reserve_fences(vma->obj->base.resv, 1); + /* Reserve enough slots to accommodate composite fences */ + err = dma_resv_reserve_fences(vma->obj->base.resv, eb->num_batches); if (err) return err; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index f46ee16a323a..a4fb577eceb4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -60,6 +60,8 @@ __i915_gem_object_create_region(struct intel_memory_region *mem, if (page_size) default_page_size = page_size; + /* We should be able to fit a page within an sg entry */ + GEM_BUG_ON(overflows_type(default_page_size, u32)); GEM_BUG_ON(!is_power_of_2_u64(default_page_size)); GEM_BUG_ON(default_page_size < PAGE_SIZE); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4c25d9b2f138..8f1bb6a4b7d1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -620,10 +620,15 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, struct ttm_resource *res) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + u32 page_alignment; if (!i915_ttm_gtt_binds_lmem(res)) return i915_ttm_tt_get_st(bo->ttm); + page_alignment = bo->page_alignment << PAGE_SHIFT; + if (!page_alignment) + page_alignment = obj->mm.region->min_page_size; + /* * If CPU mapping differs, we need to add the ttm_tt pages to * the resulting st. Might make sense for GGTT. @@ -634,7 +639,8 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, struct i915_refct_sgt *rsgt; rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region, - res); + res, + page_alignment); if (IS_ERR(rsgt)) return rsgt; @@ -643,7 +649,8 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, return i915_refct_sgt_get(obj->ttm.cached_io_rsgt); } - return intel_region_ttm_resource_to_rsgt(obj->mm.region, res); + return intel_region_ttm_resource_to_rsgt(obj->mm.region, res, + page_alignment); } static int i915_ttm_truncate(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 319936f91ac5..e6e01c2a74a6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -9,6 +9,7 @@ #include <linux/jiffies.h> #include "gt/intel_engine.h" +#include "gt/intel_rps.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" @@ -31,6 +32,37 @@ i915_gem_object_wait_fence(struct dma_fence *fence, timeout); } +static void +i915_gem_object_boost(struct dma_resv *resv, unsigned int flags) +{ + struct dma_resv_iter cursor; + struct dma_fence *fence; + + /* + * Prescan all fences for potential boosting before we begin waiting. + * + * When we wait, we wait on outstanding fences serially. If the + * dma-resv contains a sequence such as 1:1, 1:2 instead of a reduced + * form 1:2, then as we look at each wait in turn we see that each + * request is currently executing and not worthy of boosting. But if + * we only happen to look at the final fence in the sequence (because + * of request coalescing or splitting between read/write arrays by + * the iterator), then we would boost. As such our decision to boost + * or not is delicately balanced on the order we wait on fences. + * + * So instead of looking for boosts sequentially, look for all boosts + * upfront and then wait on the outstanding fences. + */ + + dma_resv_iter_begin(&cursor, resv, + dma_resv_usage_rw(flags & I915_WAIT_ALL)); + dma_resv_for_each_fence_unlocked(&cursor, fence) + if (dma_fence_is_i915(fence) && + !i915_request_started(to_request(fence))) + intel_rps_boost(to_request(fence)); + dma_resv_iter_end(&cursor); +} + static long i915_gem_object_wait_reservation(struct dma_resv *resv, unsigned int flags, @@ -40,6 +72,8 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, struct dma_fence *fence; long ret = timeout ?: 1; + i915_gem_object_boost(resv, flags); + dma_resv_iter_begin(&cursor, resv, dma_resv_usage_rw(flags & I915_WAIT_ALL)); dma_resv_for_each_fence_unlocked(&cursor, fence) { diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 09f82545789f..44e7339e7a4a 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -273,10 +273,17 @@ struct intel_context { u8 child_index; /** @guc: GuC specific members for parallel submission */ struct { - /** @wqi_head: head pointer in work queue */ + /** @wqi_head: cached head pointer in work queue */ u16 wqi_head; - /** @wqi_tail: tail pointer in work queue */ + /** @wqi_tail: cached tail pointer in work queue */ u16 wqi_tail; + /** @wq_head: pointer to the actual head in work queue */ + u32 *wq_head; + /** @wq_tail: pointer to the actual head in work queue */ + u32 *wq_tail; + /** @wq_status: pointer to the status in work queue */ + u32 *wq_status; + /** * @parent_page: page in context state (ce->state) used * by parent for work queue, process descriptor diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 1431f1e9dbee..04e435bce79b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -201,6 +201,8 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine); int intel_engine_stop_cs(struct intel_engine_cs *engine); void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine); +void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine); + void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask); u64 intel_engine_get_active_head(const struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 14c6ddbbfde8..5b6ce10cb158 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1282,10 +1282,10 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine, intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING)); /* - * Wa_22011802037 : gen12, Prior to doing a reset, ensure CS is + * Wa_22011802037 : gen11, gen12, Prior to doing a reset, ensure CS is * stopped, set ring stop bit and prefetch disable bit to halt CS */ - if (GRAPHICS_VER(engine->i915) == 12) + if (IS_GRAPHICS_VER(engine->i915, 11, 12)) intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base), _MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE)); @@ -1308,6 +1308,18 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine) return -ENODEV; ENGINE_TRACE(engine, "\n"); + /* + * TODO: Find out why occasionally stopping the CS times out. Seen + * especially with gem_eio tests. + * + * Occasionally trying to stop the cs times out, but does not adversely + * affect functionality. The timeout is set as a config parameter that + * defaults to 100ms. In most cases the follow up operation is to wait + * for pending MI_FORCE_WAKES. The assumption is that this timeout is + * sufficient for any pending MI_FORCEWAKEs to complete. Once root + * caused, the caller must check and handle the return from this + * function. + */ if (__intel_engine_stop_cs(engine, 1000, stop_timeout(engine))) { ENGINE_TRACE(engine, "timed out on STOP_RING -> IDLE; HEAD:%04x, TAIL:%04x\n", @@ -1334,6 +1346,78 @@ void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine) ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); } +static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine) +{ + static const i915_reg_t _reg[I915_NUM_ENGINES] = { + [RCS0] = MSG_IDLE_CS, + [BCS0] = MSG_IDLE_BCS, + [VCS0] = MSG_IDLE_VCS0, + [VCS1] = MSG_IDLE_VCS1, + [VCS2] = MSG_IDLE_VCS2, + [VCS3] = MSG_IDLE_VCS3, + [VCS4] = MSG_IDLE_VCS4, + [VCS5] = MSG_IDLE_VCS5, + [VCS6] = MSG_IDLE_VCS6, + [VCS7] = MSG_IDLE_VCS7, + [VECS0] = MSG_IDLE_VECS0, + [VECS1] = MSG_IDLE_VECS1, + [VECS2] = MSG_IDLE_VECS2, + [VECS3] = MSG_IDLE_VECS3, + [CCS0] = MSG_IDLE_CS, + [CCS1] = MSG_IDLE_CS, + [CCS2] = MSG_IDLE_CS, + [CCS3] = MSG_IDLE_CS, + }; + u32 val; + + if (!_reg[engine->id].reg) { + drm_err(&engine->i915->drm, + "MSG IDLE undefined for engine id %u\n", engine->id); + return 0; + } + + val = intel_uncore_read(engine->uncore, _reg[engine->id]); + + /* bits[29:25] & bits[13:9] >> shift */ + return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT; +} + +static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask) +{ + int ret; + + /* Ensure GPM receives fw up/down after CS is stopped */ + udelay(1); + + /* Wait for forcewake request to complete in GPM */ + ret = __intel_wait_for_register_fw(gt->uncore, + GEN9_PWRGT_DOMAIN_STATUS, + fw_mask, fw_mask, 5000, 0, NULL); + + /* Ensure CS receives fw ack from GPM */ + udelay(1); + + if (ret) + GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret); +} + +/* + * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any + * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The + * pending status is indicated by bits[13:9] (masked by bits[29:25]) in the + * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we + * are concerned only with the gt reset here, we use a logical OR of pending + * forcewakeups from all reset domains and then wait for them to complete by + * querying PWRGT_DOMAIN_STATUS. + */ +void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine) +{ + u32 fw_pending = __cs_pending_mi_force_wakes(engine); + + if (fw_pending) + __gpm_wait_for_fw_complete(engine->gt, fw_pending); +} + static u32 read_subslice_reg(const struct intel_engine_cs *engine, int slice, int subslice, i915_reg_t reg) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 86f7a9ac1c39..0627fa10d2dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -661,6 +661,16 @@ static inline void execlists_schedule_out(struct i915_request *rq) i915_request_put(rq); } +static u32 map_i915_prio_to_lrc_desc_prio(int prio) +{ + if (prio > I915_PRIORITY_NORMAL) + return GEN12_CTX_PRIORITY_HIGH; + else if (prio < I915_PRIORITY_NORMAL) + return GEN12_CTX_PRIORITY_LOW; + else + return GEN12_CTX_PRIORITY_NORMAL; +} + static u64 execlists_update_context(struct i915_request *rq) { struct intel_context *ce = rq->context; @@ -669,7 +679,7 @@ static u64 execlists_update_context(struct i915_request *rq) desc = ce->lrc.desc; if (rq->engine->flags & I915_ENGINE_HAS_EU_PRIORITY) - desc |= lrc_desc_priority(rq_prio(rq)); + desc |= map_i915_prio_to_lrc_desc_prio(rq_prio(rq)); /* * WaIdleLiteRestore:bdw,skl @@ -2958,6 +2968,13 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) ring_set_paused(engine, 1); intel_engine_stop_cs(engine); + /* + * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need + * to wait for any pending mi force wakeups + */ + if (IS_GRAPHICS_VER(engine->i915, 11, 12)) + intel_engine_wait_for_pending_mi_fw(engine); + engine->execlists.reset_ccid = active_ccid(engine); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 53307ca0eed0..531af6ad7007 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -785,6 +785,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt) { intel_wakeref_t wakeref; + intel_gt_sysfs_unregister(gt); intel_rps_driver_unregister(>->rps); intel_gsc_fini(>->gsc); @@ -1208,6 +1209,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) mutex_lock(>->tlb_invalidate_lock); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */ + + for_each_engine(engine, gt, id) { + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + } + + spin_unlock_irq(&uncore->lock); + for_each_engine(engine, gt, id) { /* * HW architecture suggest typical invalidation time at 40us, @@ -1222,7 +1237,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) if (!i915_mmio_reg_offset(rb.reg)) continue; - intel_uncore_write_fw(uncore, rb.reg, rb.bit); if (__intel_wait_for_register_fw(uncore, rb.reg, rb.bit, 0, timeout_us, timeout_ms, diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c index 8ec8bc660c8c..9e4ebf53379b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c @@ -24,7 +24,7 @@ bool is_object_gt(struct kobject *kobj) static struct intel_gt *kobj_to_gt(struct kobject *kobj) { - return container_of(kobj, struct kobj_gt, base)->gt; + return container_of(kobj, struct intel_gt, sysfs_gt); } struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, @@ -72,9 +72,9 @@ static struct attribute *id_attrs[] = { }; ATTRIBUTE_GROUPS(id); +/* A kobject needs a release() method even if it does nothing */ static void kobj_gt_release(struct kobject *kobj) { - kfree(kobj); } static struct kobj_type kobj_gt_type = { @@ -85,8 +85,6 @@ static struct kobj_type kobj_gt_type = { void intel_gt_sysfs_register(struct intel_gt *gt) { - struct kobj_gt *kg; - /* * We need to make things right with the * ABI compatibility. The files were originally @@ -98,25 +96,22 @@ void intel_gt_sysfs_register(struct intel_gt *gt) if (gt_is_root(gt)) intel_gt_sysfs_pm_init(gt, gt_get_parent_obj(gt)); - kg = kzalloc(sizeof(*kg), GFP_KERNEL); - if (!kg) + /* init and xfer ownership to sysfs tree */ + if (kobject_init_and_add(>->sysfs_gt, &kobj_gt_type, + gt->i915->sysfs_gt, "gt%d", gt->info.id)) goto exit_fail; - kobject_init(&kg->base, &kobj_gt_type); - kg->gt = gt; - - /* xfer ownership to sysfs tree */ - if (kobject_add(&kg->base, gt->i915->sysfs_gt, "gt%d", gt->info.id)) - goto exit_kobj_put; - - intel_gt_sysfs_pm_init(gt, &kg->base); + intel_gt_sysfs_pm_init(gt, >->sysfs_gt); return; -exit_kobj_put: - kobject_put(&kg->base); - exit_fail: + kobject_put(>->sysfs_gt); drm_warn(>->i915->drm, "failed to initialize gt%d sysfs root\n", gt->info.id); } + +void intel_gt_sysfs_unregister(struct intel_gt *gt) +{ + kobject_put(>->sysfs_gt); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h index 9471b26752cf..a99aa7e8b01a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h @@ -13,11 +13,6 @@ struct intel_gt; -struct kobj_gt { - struct kobject base; - struct intel_gt *gt; -}; - bool is_object_gt(struct kobject *kobj); struct drm_i915_private *kobj_to_i915(struct kobject *kobj); @@ -28,6 +23,7 @@ intel_gt_create_kobj(struct intel_gt *gt, const char *name); void intel_gt_sysfs_register(struct intel_gt *gt); +void intel_gt_sysfs_unregister(struct intel_gt *gt); struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, const char *name); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index b06611c1d4ad..edd7a3cf5f5f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -224,6 +224,9 @@ struct intel_gt { } mocs; struct intel_pxp pxp; + + /* gt/gtN sysfs */ + struct kobject sysfs_gt; }; enum intel_gt_scratch_field { diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 31be734010db..a390f0813c8b 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -111,16 +111,6 @@ enum { #define XEHP_SW_COUNTER_SHIFT 58 #define XEHP_SW_COUNTER_WIDTH 6 -static inline u32 lrc_desc_priority(int prio) -{ - if (prio > I915_PRIORITY_NORMAL) - return GEN12_CTX_PRIORITY_HIGH; - else if (prio < I915_PRIORITY_NORMAL) - return GEN12_CTX_PRIORITY_LOW; - else - return GEN12_CTX_PRIORITY_NORMAL; -} - static inline void lrc_runtime_start(struct intel_context *ce) { struct intel_context_stats *stats = &ce->stats; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index a5338c3fde7a..c68d36fb5bbd 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -300,9 +300,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) return err; } -static int gen6_reset_engines(struct intel_gt *gt, - intel_engine_mask_t engine_mask, - unsigned int retry) +static int __gen6_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) { struct intel_engine_cs *engine; u32 hw_mask; @@ -321,6 +321,20 @@ static int gen6_reset_engines(struct intel_gt *gt, return gen6_hw_domain_reset(gt, hw_mask); } +static int gen6_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(>->uncore->lock, flags); + ret = __gen6_reset_engines(gt, engine_mask, retry); + spin_unlock_irqrestore(>->uncore->lock, flags); + + return ret; +} + static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine) { int vecs_id; @@ -487,9 +501,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine) rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit); } -static int gen11_reset_engines(struct intel_gt *gt, - intel_engine_mask_t engine_mask, - unsigned int retry) +static int __gen11_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) { struct intel_engine_cs *engine; intel_engine_mask_t tmp; @@ -583,8 +597,11 @@ static int gen8_reset_engines(struct intel_gt *gt, struct intel_engine_cs *engine; const bool reset_non_ready = retry >= 1; intel_engine_mask_t tmp; + unsigned long flags; int ret; + spin_lock_irqsave(>->uncore->lock, flags); + for_each_engine_masked(engine, gt, engine_mask, tmp) { ret = gen8_engine_reset_prepare(engine); if (ret && !reset_non_ready) @@ -612,17 +629,19 @@ static int gen8_reset_engines(struct intel_gt *gt, * This is best effort, so ignore any error from the initial reset. */ if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES) - gen11_reset_engines(gt, gt->info.engine_mask, 0); + __gen11_reset_engines(gt, gt->info.engine_mask, 0); if (GRAPHICS_VER(gt->i915) >= 11) - ret = gen11_reset_engines(gt, engine_mask, retry); + ret = __gen11_reset_engines(gt, engine_mask, retry); else - ret = gen6_reset_engines(gt, engine_mask, retry); + ret = __gen6_reset_engines(gt, engine_mask, retry); skip_reset: for_each_engine_masked(engine, gt, engine_mask, tmp) gen8_engine_reset_cancel(engine); + spin_unlock_irqrestore(>->uncore->lock, flags); + return ret; } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 8b2c11dbe354..1109088fe8f6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -176,8 +176,8 @@ static int live_lrc_layout(void *arg) continue; hw = shmem_pin_map(engine->default_state); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); + if (!hw) { + err = -ENOMEM; break; } hw += LRC_STATE_OFFSET / sizeof(*hw); @@ -365,8 +365,8 @@ static int live_lrc_fixed(void *arg) continue; hw = shmem_pin_map(engine->default_state); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); + if (!hw) { + err = -ENOMEM; break; } hw += LRC_STATE_OFFSET / sizeof(*hw); diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index 4ef9990ed7f8..29ef8afc8c2e 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -122,6 +122,9 @@ enum intel_guc_action { INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, INTEL_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003, INTEL_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004, + INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY = 0x1005, + INTEL_GUC_ACTION_V69_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006, + INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007, INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 2c4ad4a65089..8c6885f43d1a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -310,8 +310,8 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) if (IS_DG2(gt->i915)) flags |= GUC_WA_DUAL_QUEUE; - /* Wa_22011802037: graphics version 12 */ - if (GRAPHICS_VER(gt->i915) == 12) + /* Wa_22011802037: graphics version 11/12 */ + if (IS_GRAPHICS_VER(gt->i915, 11, 12)) flags |= GUC_WA_PRE_PARSER; /* Wa_16011777198:dg2 */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 966e69a8b1c1..9feda105f913 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -170,6 +170,11 @@ struct intel_guc { /** @ads_engine_usage_size: size of engine usage in the ADS */ u32 ads_engine_usage_size; + /** @lrc_desc_pool_v69: object allocated to hold the GuC LRC descriptor pool */ + struct i915_vma *lrc_desc_pool_v69; + /** @lrc_desc_pool_vaddr_v69: contents of the GuC LRC descriptor pool */ + void *lrc_desc_pool_vaddr_v69; + /** * @context_lookup: used to resolve intel_context from guc_id, if a * context is present in this structure it is registered with the GuC diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 42cb7a9a6199..89a7e5ec0614 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -203,6 +203,20 @@ struct guc_wq_item { u32 fence_id; } __packed; +struct guc_process_desc_v69 { + u32 stage_id; + u64 db_base_addr; + u32 head; + u32 tail; + u32 error_offset; + u64 wq_base_addr; + u32 wq_size_bytes; + u32 wq_status; + u32 engine_presence; + u32 priority; + u32 reserved[36]; +} __packed; + struct guc_sched_wq_desc { u32 head; u32 tail; @@ -227,6 +241,37 @@ struct guc_ctxt_registration_info { }; #define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) +/* Preempt to idle on quantum expiry */ +#define CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69 BIT(0) + +/* + * GuC Context registration descriptor. + * FIXME: This is only required to exist during context registration. + * The current 1:1 between guc_lrc_desc and LRCs for the lifetime of the LRC + * is not required. + */ +struct guc_lrc_desc_v69 { + u32 hw_context_desc; + u32 slpm_perf_mode_hint; /* SPLC v1 only */ + u32 slpm_freq_hint; + u32 engine_submit_mask; /* In logical space */ + u8 engine_class; + u8 reserved0[3]; + u32 priority; + u32 process_desc; + u32 wq_addr; + u32 wq_size; + u32 context_flags; /* CONTEXT_REGISTRATION_* */ + /* Time for one workload to execute. (in micro seconds) */ + u32 execution_quantum; + /* Time to wait for a preemption request to complete before issuing a + * reset. (in micro seconds). + */ + u32 preemption_timeout; + u32 policy_flags; /* CONTEXT_POLICY_* */ + u32 reserved1[19]; +} __packed; + /* 32-bit KLV structure as used by policy updates and others */ struct guc_klv_generic_dw_t { u32 kl; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 1726f0f19901..2d9f5f1c79d3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -414,12 +414,15 @@ struct sync_semaphore { }; struct parent_scratch { - struct guc_sched_wq_desc wq_desc; + union guc_descs { + struct guc_sched_wq_desc wq_desc; + struct guc_process_desc_v69 pdesc; + } descs; struct sync_semaphore go; struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; - u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) - + u8 unused[WQ_OFFSET - sizeof(union guc_descs) - sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; u32 wq[WQ_SIZE / sizeof(u32)]; @@ -456,17 +459,23 @@ __get_parent_scratch(struct intel_context *ce) LRC_STATE_OFFSET) / sizeof(u32))); } +static struct guc_process_desc_v69 * +__get_process_desc_v69(struct intel_context *ce) +{ + struct parent_scratch *ps = __get_parent_scratch(ce); + + return &ps->descs.pdesc; +} + static struct guc_sched_wq_desc * -__get_wq_desc(struct intel_context *ce) +__get_wq_desc_v70(struct intel_context *ce) { struct parent_scratch *ps = __get_parent_scratch(ce); - return &ps->wq_desc; + return &ps->descs.wq_desc; } -static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc, - struct intel_context *ce, - u32 wqi_size) +static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size) { /* * Check for space in work queue. Caching a value of head pointer in @@ -476,7 +485,7 @@ static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc, #define AVAILABLE_SPACE \ CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) if (wqi_size > AVAILABLE_SPACE) { - ce->parallel.guc.wqi_head = READ_ONCE(wq_desc->head); + ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head); if (wqi_size > AVAILABLE_SPACE) return NULL; @@ -495,11 +504,55 @@ static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) return ce; } +static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index) +{ + struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69; + + if (!base) + return NULL; + + GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID); + + return &base[index]; +} + +static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc) +{ + u32 size; + int ret; + + size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) * + GUC_MAX_CONTEXT_ID); + ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69, + (void **)&guc->lrc_desc_pool_vaddr_v69); + if (ret) + return ret; + + return 0; +} + +static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc) +{ + if (!guc->lrc_desc_pool_vaddr_v69) + return; + + guc->lrc_desc_pool_vaddr_v69 = NULL; + i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP); +} + static inline bool guc_submission_initialized(struct intel_guc *guc) { return guc->submission_initialized; } +static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id) +{ + struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id); + + if (desc) + memset(desc, 0, sizeof(*desc)); +} + static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) { return __get_context(guc, id); @@ -526,6 +579,8 @@ static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) if (unlikely(!guc_submission_initialized(guc))) return; + _reset_lrc_desc_v69(guc, id); + /* * xarray API doesn't have xa_erase_irqsave wrapper, so calling * the lower level functions directly. @@ -611,7 +666,7 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) true, timeout); } -static int guc_context_policy_init(struct intel_context *ce, bool loop); +static int guc_context_policy_init_v70(struct intel_context *ce, bool loop); static int try_context_registration(struct intel_context *ce, bool loop); static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) @@ -639,7 +694,7 @@ static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) GEM_BUG_ON(context_guc_id_invalid(ce)); if (context_policy_required(ce)) { - err = guc_context_policy_init(ce, false); + err = guc_context_policy_init_v70(ce, false); if (err) return err; } @@ -737,9 +792,7 @@ static u32 wq_space_until_wrap(struct intel_context *ce) return (WQ_SIZE - ce->parallel.guc.wqi_tail); } -static void write_wqi(struct guc_sched_wq_desc *wq_desc, - struct intel_context *ce, - u32 wqi_size) +static void write_wqi(struct intel_context *ce, u32 wqi_size) { BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); @@ -750,13 +803,12 @@ static void write_wqi(struct guc_sched_wq_desc *wq_desc, ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & (WQ_SIZE - 1); - WRITE_ONCE(wq_desc->tail, ce->parallel.guc.wqi_tail); + WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail); } static int guc_wq_noop_append(struct intel_context *ce) { - struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); - u32 *wqi = get_wq_pointer(wq_desc, ce, wq_space_until_wrap(ce)); + u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce)); u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; if (!wqi) @@ -775,7 +827,6 @@ static int __guc_wq_item_append(struct i915_request *rq) { struct intel_context *ce = request_to_scheduling_context(rq); struct intel_context *child; - struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); unsigned int wqi_size = (ce->parallel.number_children + 4) * sizeof(u32); u32 *wqi; @@ -795,7 +846,7 @@ static int __guc_wq_item_append(struct i915_request *rq) return ret; } - wqi = get_wq_pointer(wq_desc, ce, wqi_size); + wqi = get_wq_pointer(ce, wqi_size); if (!wqi) return -EBUSY; @@ -810,7 +861,7 @@ static int __guc_wq_item_append(struct i915_request *rq) for_each_child(ce, child) *wqi++ = child->ring->tail / sizeof(u64); - write_wqi(wq_desc, ce, wqi_size); + write_wqi(ce, wqi_size); return 0; } @@ -1527,87 +1578,18 @@ static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) lrc_update_regs(ce, engine, head); } -static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine) -{ - static const i915_reg_t _reg[I915_NUM_ENGINES] = { - [RCS0] = MSG_IDLE_CS, - [BCS0] = MSG_IDLE_BCS, - [VCS0] = MSG_IDLE_VCS0, - [VCS1] = MSG_IDLE_VCS1, - [VCS2] = MSG_IDLE_VCS2, - [VCS3] = MSG_IDLE_VCS3, - [VCS4] = MSG_IDLE_VCS4, - [VCS5] = MSG_IDLE_VCS5, - [VCS6] = MSG_IDLE_VCS6, - [VCS7] = MSG_IDLE_VCS7, - [VECS0] = MSG_IDLE_VECS0, - [VECS1] = MSG_IDLE_VECS1, - [VECS2] = MSG_IDLE_VECS2, - [VECS3] = MSG_IDLE_VECS3, - [CCS0] = MSG_IDLE_CS, - [CCS1] = MSG_IDLE_CS, - [CCS2] = MSG_IDLE_CS, - [CCS3] = MSG_IDLE_CS, - }; - u32 val; - - if (!_reg[engine->id].reg) - return 0; - - val = intel_uncore_read(engine->uncore, _reg[engine->id]); - - /* bits[29:25] & bits[13:9] >> shift */ - return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT; -} - -static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask) -{ - int ret; - - /* Ensure GPM receives fw up/down after CS is stopped */ - udelay(1); - - /* Wait for forcewake request to complete in GPM */ - ret = __intel_wait_for_register_fw(gt->uncore, - GEN9_PWRGT_DOMAIN_STATUS, - fw_mask, fw_mask, 5000, 0, NULL); - - /* Ensure CS receives fw ack from GPM */ - udelay(1); - - if (ret) - GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret); -} - -/* - * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any - * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The - * pending status is indicated by bits[13:9] (masked by bits[ 29:25]) in the - * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we - * are concerned only with the gt reset here, we use a logical OR of pending - * forcewakeups from all reset domains and then wait for them to complete by - * querying PWRGT_DOMAIN_STATUS. - */ static void guc_engine_reset_prepare(struct intel_engine_cs *engine) { - u32 fw_pending; - - if (GRAPHICS_VER(engine->i915) != 12) + if (!IS_GRAPHICS_VER(engine->i915, 11, 12)) return; - /* - * Wa_22011802037 - * TODO: Occasionally trying to stop the cs times out, but does not - * adversely affect functionality. The timeout is set as a config - * parameter that defaults to 100ms. Assuming that this timeout is - * sufficient for any pending MI_FORCEWAKEs to complete, ignore the - * timeout returned here until it is root caused. - */ intel_engine_stop_cs(engine); - fw_pending = __cs_pending_mi_force_wakes(engine); - if (fw_pending) - __gpm_wait_for_fw_complete(engine->gt, fw_pending); + /* + * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need + * to wait for any pending mi force wakeups + */ + intel_engine_wait_for_pending_mi_fw(engine); } static void guc_reset_nop(struct intel_engine_cs *engine) @@ -1868,20 +1850,34 @@ static void reset_fail_worker_func(struct work_struct *w); int intel_guc_submission_init(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); + int ret; if (guc->submission_initialized) return 0; + if (guc->fw.major_ver_found < 70) { + ret = guc_lrc_desc_pool_create_v69(guc); + if (ret) + return ret; + } + guc->submission_state.guc_ids_bitmap = bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); - if (!guc->submission_state.guc_ids_bitmap) - return -ENOMEM; + if (!guc->submission_state.guc_ids_bitmap) { + ret = -ENOMEM; + goto destroy_pool; + } guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; guc->timestamp.shift = gpm_timestamp_shift(gt); guc->submission_initialized = true; return 0; + +destroy_pool: + guc_lrc_desc_pool_destroy_v69(guc); + + return ret; } void intel_guc_submission_fini(struct intel_guc *guc) @@ -1890,6 +1886,7 @@ void intel_guc_submission_fini(struct intel_guc *guc) return; guc_flush_destroyed_contexts(guc); + guc_lrc_desc_pool_destroy_v69(guc); i915_sched_engine_put(guc->sched_engine); bitmap_free(guc->submission_state.guc_ids_bitmap); guc->submission_initialized = false; @@ -2147,10 +2144,34 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) spin_unlock_irqrestore(&guc->submission_state.lock, flags); } -static int __guc_action_register_multi_lrc(struct intel_guc *guc, - struct intel_context *ce, - struct guc_ctxt_registration_info *info, - bool loop) +static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc, + struct intel_context *ce, + u32 guc_id, + u32 offset, + bool loop) +{ + struct intel_context *child; + u32 action[4 + MAX_ENGINE_INSTANCE]; + int len = 0; + + GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); + + action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; + action[len++] = guc_id; + action[len++] = ce->parallel.number_children + 1; + action[len++] = offset; + for_each_child(ce, child) { + offset += sizeof(struct guc_lrc_desc_v69); + action[len++] = offset; + } + + return guc_submission_send_busy_loop(guc, action, len, 0, loop); +} + +static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc, + struct intel_context *ce, + struct guc_ctxt_registration_info *info, + bool loop) { struct intel_context *child; u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; @@ -2190,9 +2211,24 @@ static int __guc_action_register_multi_lrc(struct intel_guc *guc, return guc_submission_send_busy_loop(guc, action, len, 0, loop); } -static int __guc_action_register_context(struct intel_guc *guc, - struct guc_ctxt_registration_info *info, - bool loop) +static int __guc_action_register_context_v69(struct intel_guc *guc, + u32 guc_id, + u32 offset, + bool loop) +{ + u32 action[] = { + INTEL_GUC_ACTION_REGISTER_CONTEXT, + guc_id, + offset, + }; + + return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), + 0, loop); +} + +static int __guc_action_register_context_v70(struct intel_guc *guc, + struct guc_ctxt_registration_info *info, + bool loop) { u32 action[] = { INTEL_GUC_ACTION_REGISTER_CONTEXT, @@ -2213,24 +2249,52 @@ static int __guc_action_register_context(struct intel_guc *guc, 0, loop); } -static void prepare_context_registration_info(struct intel_context *ce, - struct guc_ctxt_registration_info *info); +static void prepare_context_registration_info_v69(struct intel_context *ce); +static void prepare_context_registration_info_v70(struct intel_context *ce, + struct guc_ctxt_registration_info *info); -static int register_context(struct intel_context *ce, bool loop) +static int +register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop) +{ + u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) + + ce->guc_id.id * sizeof(struct guc_lrc_desc_v69); + + prepare_context_registration_info_v69(ce); + + if (intel_context_is_parent(ce)) + return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id, + offset, loop); + else + return __guc_action_register_context_v69(guc, ce->guc_id.id, + offset, loop); +} + +static int +register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop) { struct guc_ctxt_registration_info info; + + prepare_context_registration_info_v70(ce, &info); + + if (intel_context_is_parent(ce)) + return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop); + else + return __guc_action_register_context_v70(guc, &info, loop); +} + +static int register_context(struct intel_context *ce, bool loop) +{ struct intel_guc *guc = ce_to_guc(ce); int ret; GEM_BUG_ON(intel_context_is_child(ce)); trace_intel_context_register(ce); - prepare_context_registration_info(ce, &info); - - if (intel_context_is_parent(ce)) - ret = __guc_action_register_multi_lrc(guc, ce, &info, loop); + if (guc->fw.major_ver_found >= 70) + ret = register_context_v70(guc, ce, loop); else - ret = __guc_action_register_context(guc, &info, loop); + ret = register_context_v69(guc, ce, loop); + if (likely(!ret)) { unsigned long flags; @@ -2238,7 +2302,8 @@ static int register_context(struct intel_context *ce, bool loop) set_context_registered(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); - guc_context_policy_init(ce, loop); + if (guc->fw.major_ver_found >= 70) + guc_context_policy_init_v70(ce, loop); } return ret; @@ -2335,7 +2400,7 @@ static int __guc_context_set_context_policies(struct intel_guc *guc, 0, loop); } -static int guc_context_policy_init(struct intel_context *ce, bool loop) +static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) { struct intel_engine_cs *engine = ce->engine; struct intel_guc *guc = &engine->gt->uc.guc; @@ -2394,8 +2459,108 @@ static int guc_context_policy_init(struct intel_context *ce, bool loop) return ret; } -static void prepare_context_registration_info(struct intel_context *ce, - struct guc_ctxt_registration_info *info) +static void guc_context_policy_init_v69(struct intel_engine_cs *engine, + struct guc_lrc_desc_v69 *desc) +{ + desc->policy_flags = 0; + + if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) + desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69; + + /* NB: For both of these, zero means disabled. */ + desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; + desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; +} + +static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) +{ + /* + * this matches the mapping we do in map_i915_prio_to_guc_prio() + * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL) + */ + switch (prio) { + default: + MISSING_CASE(prio); + fallthrough; + case GUC_CLIENT_PRIORITY_KMD_NORMAL: + return GEN12_CTX_PRIORITY_NORMAL; + case GUC_CLIENT_PRIORITY_NORMAL: + return GEN12_CTX_PRIORITY_LOW; + case GUC_CLIENT_PRIORITY_HIGH: + case GUC_CLIENT_PRIORITY_KMD_HIGH: + return GEN12_CTX_PRIORITY_HIGH; + } +} + +static void prepare_context_registration_info_v69(struct intel_context *ce) +{ + struct intel_engine_cs *engine = ce->engine; + struct intel_guc *guc = &engine->gt->uc.guc; + u32 ctx_id = ce->guc_id.id; + struct guc_lrc_desc_v69 *desc; + struct intel_context *child; + + GEM_BUG_ON(!engine->mask); + + /* + * Ensure LRC + CT vmas are is same region as write barrier is done + * based on CT vma region. + */ + GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != + i915_gem_object_is_lmem(ce->ring->vma->obj)); + + desc = __get_lrc_desc_v69(guc, ctx_id); + desc->engine_class = engine_class_to_guc_class(engine->class); + desc->engine_submit_mask = engine->logical_mask; + desc->hw_context_desc = ce->lrc.lrca; + desc->priority = ce->guc_state.prio; + desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; + guc_context_policy_init_v69(engine, desc); + + /* + * If context is a parent, we need to register a process descriptor + * describing a work queue and register all child contexts. + */ + if (intel_context_is_parent(ce)) { + struct guc_process_desc_v69 *pdesc; + + ce->parallel.guc.wqi_tail = 0; + ce->parallel.guc.wqi_head = 0; + + desc->process_desc = i915_ggtt_offset(ce->state) + + __get_parent_scratch_offset(ce); + desc->wq_addr = i915_ggtt_offset(ce->state) + + __get_wq_offset(ce); + desc->wq_size = WQ_SIZE; + + pdesc = __get_process_desc_v69(ce); + memset(pdesc, 0, sizeof(*(pdesc))); + pdesc->stage_id = ce->guc_id.id; + pdesc->wq_base_addr = desc->wq_addr; + pdesc->wq_size_bytes = desc->wq_size; + pdesc->wq_status = WQ_STATUS_ACTIVE; + + ce->parallel.guc.wq_head = &pdesc->head; + ce->parallel.guc.wq_tail = &pdesc->tail; + ce->parallel.guc.wq_status = &pdesc->wq_status; + + for_each_child(ce, child) { + desc = __get_lrc_desc_v69(guc, child->guc_id.id); + + desc->engine_class = + engine_class_to_guc_class(engine->class); + desc->hw_context_desc = child->lrc.lrca; + desc->priority = ce->guc_state.prio; + desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; + guc_context_policy_init_v69(engine, desc); + } + + clear_children_join_go_memory(ce); + } +} + +static void prepare_context_registration_info_v70(struct intel_context *ce, + struct guc_ctxt_registration_info *info) { struct intel_engine_cs *engine = ce->engine; struct intel_guc *guc = &engine->gt->uc.guc; @@ -2420,6 +2585,8 @@ static void prepare_context_registration_info(struct intel_context *ce, */ info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); + if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY) + info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio); info->flags = CONTEXT_REGISTRATION_FLAG_KMD; /* @@ -2443,10 +2610,14 @@ static void prepare_context_registration_info(struct intel_context *ce, info->wq_base_hi = upper_32_bits(wq_base_offset); info->wq_size = WQ_SIZE; - wq_desc = __get_wq_desc(ce); + wq_desc = __get_wq_desc_v70(ce); memset(wq_desc, 0, sizeof(*wq_desc)); wq_desc->wq_status = WQ_STATUS_ACTIVE; + ce->parallel.guc.wq_head = &wq_desc->head; + ce->parallel.guc.wq_tail = &wq_desc->tail; + ce->parallel.guc.wq_status = &wq_desc->wq_status; + clear_children_join_go_memory(ce); } } @@ -2761,11 +2932,21 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc, u16 guc_id, u32 preemption_timeout) { - struct context_policy policy; + if (guc->fw.major_ver_found >= 70) { + struct context_policy policy; - __guc_context_policy_start_klv(&policy, guc_id); - __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); - __guc_context_set_context_policies(guc, &policy, true); + __guc_context_policy_start_klv(&policy, guc_id); + __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); + __guc_context_set_context_policies(guc, &policy, true); + } else { + u32 action[] = { + INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT, + guc_id, + preemption_timeout + }; + + intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); + } } static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) @@ -3013,11 +3194,21 @@ static int guc_context_alloc(struct intel_context *ce) static void __guc_context_set_prio(struct intel_guc *guc, struct intel_context *ce) { - struct context_policy policy; + if (guc->fw.major_ver_found >= 70) { + struct context_policy policy; - __guc_context_policy_start_klv(&policy, ce->guc_id.id); - __guc_context_policy_add_priority(&policy, ce->guc_state.prio); - __guc_context_set_context_policies(guc, &policy, true); + __guc_context_policy_start_klv(&policy, ce->guc_id.id); + __guc_context_policy_add_priority(&policy, ce->guc_state.prio); + __guc_context_set_context_policies(guc, &policy, true); + } else { + u32 action[] = { + INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY, + ce->guc_id.id, + ce->guc_state.prio, + }; + + guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); + } } static void guc_context_set_prio(struct intel_guc *guc, @@ -4527,17 +4718,19 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, guc_log_context_priority(p, ce); if (intel_context_is_parent(ce)) { - struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); struct intel_context *child; drm_printf(p, "\t\tNumber children: %u\n", ce->parallel.number_children); - drm_printf(p, "\t\tWQI Head: %u\n", - READ_ONCE(wq_desc->head)); - drm_printf(p, "\t\tWQI Tail: %u\n", - READ_ONCE(wq_desc->tail)); - drm_printf(p, "\t\tWQI Status: %u\n\n", - READ_ONCE(wq_desc->wq_status)); + + if (ce->parallel.guc.wq_status) { + drm_printf(p, "\t\tWQI Head: %u\n", + READ_ONCE(*ce->parallel.guc.wq_head)); + drm_printf(p, "\t\tWQI Tail: %u\n", + READ_ONCE(*ce->parallel.guc.wq_tail)); + drm_printf(p, "\t\tWQI Status: %u\n\n", + READ_ONCE(*ce->parallel.guc.wq_status)); + } if (ce->engine->emit_bb_start == emit_bb_start_parent_no_preempt_mid_batch) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index d078f884b5e3..703f42ba5ddd 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -70,6 +70,10 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, fw_def(BROXTON, 0, guc_def(bxt, 70, 1, 1)) \ fw_def(SKYLAKE, 0, guc_def(skl, 70, 1, 1)) +#define INTEL_GUC_FIRMWARE_DEFS_FALLBACK(fw_def, guc_def) \ + fw_def(ALDERLAKE_P, 0, guc_def(adlp, 69, 0, 3)) \ + fw_def(ALDERLAKE_S, 0, guc_def(tgl, 69, 0, 3)) + #define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \ fw_def(ALDERLAKE_P, 0, huc_def(tgl, 7, 9, 3)) \ fw_def(ALDERLAKE_S, 0, huc_def(tgl, 7, 9, 3)) \ @@ -105,6 +109,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, MODULE_FIRMWARE(uc_); INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH) +INTEL_GUC_FIRMWARE_DEFS_FALLBACK(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH) INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH) /* The below structs and macros are used to iterate across the list of blobs */ @@ -149,6 +154,9 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) static const struct uc_fw_platform_requirement blobs_guc[] = { INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB) }; + static const struct uc_fw_platform_requirement blobs_guc_fallback[] = { + INTEL_GUC_FIRMWARE_DEFS_FALLBACK(MAKE_FW_LIST, GUC_FW_BLOB) + }; static const struct uc_fw_platform_requirement blobs_huc[] = { INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB) }; @@ -156,12 +164,21 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) [INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) }, [INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) }, }; - static const struct uc_fw_platform_requirement *fw_blobs; + const struct uc_fw_platform_requirement *fw_blobs; enum intel_platform p = INTEL_INFO(i915)->platform; u32 fw_count; u8 rev = INTEL_REVID(i915); int i; + /* + * The only difference between the ADL GuC FWs is the HWConfig support. + * ADL-N does not support HWConfig, so we should use the same binary as + * ADL-S, otherwise the GuC might attempt to fetch a config table that + * does not exist. + */ + if (IS_ADLP_N(i915)) + p = INTEL_ALDERLAKE_S; + GEM_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all)); fw_blobs = blobs_all[uc_fw->type].blobs; fw_count = blobs_all[uc_fw->type].count; @@ -170,12 +187,29 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) { const struct uc_fw_blob *blob = &fw_blobs[i].blob; uc_fw->path = blob->path; + uc_fw->wanted_path = blob->path; uc_fw->major_ver_wanted = blob->major; uc_fw->minor_ver_wanted = blob->minor; break; } } + if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) { + const struct uc_fw_platform_requirement *blobs = blobs_guc_fallback; + u32 count = ARRAY_SIZE(blobs_guc_fallback); + + for (i = 0; i < count && p <= blobs[i].p; i++) { + if (p == blobs[i].p && rev >= blobs[i].rev) { + const struct uc_fw_blob *blob = &blobs[i].blob; + + uc_fw->fallback.path = blob->path; + uc_fw->fallback.major_ver = blob->major; + uc_fw->fallback.minor_ver = blob->minor; + break; + } + } + } + /* make sure the list is ordered as expected */ if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) { for (i = 1; i < fw_count; i++) { @@ -329,7 +363,24 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) __force_fw_fetch_failures(uc_fw, -EINVAL); __force_fw_fetch_failures(uc_fw, -ESTALE); - err = request_firmware(&fw, uc_fw->path, dev); + err = firmware_request_nowarn(&fw, uc_fw->path, dev); + if (err && !intel_uc_fw_is_overridden(uc_fw) && uc_fw->fallback.path) { + err = firmware_request_nowarn(&fw, uc_fw->fallback.path, dev); + if (!err) { + drm_notice(&i915->drm, + "%s firmware %s is recommended, but only %s was found\n", + intel_uc_fw_type_repr(uc_fw->type), + uc_fw->wanted_path, + uc_fw->fallback.path); + drm_info(&i915->drm, + "Consider updating your linux-firmware pkg or downloading from %s\n", + INTEL_UC_FIRMWARE_URL); + + uc_fw->path = uc_fw->fallback.path; + uc_fw->major_ver_wanted = uc_fw->fallback.major_ver; + uc_fw->minor_ver_wanted = uc_fw->fallback.minor_ver; + } + } if (err) goto fail; @@ -428,8 +479,8 @@ fail: INTEL_UC_FIRMWARE_MISSING : INTEL_UC_FIRMWARE_ERROR); - drm_notice(&i915->drm, "%s firmware %s: fetch failed with error %d\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); + i915_probe_error(i915, "%s firmware %s: fetch failed with error %d\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); drm_info(&i915->drm, "%s firmware(s) can be downloaded from %s\n", intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL); @@ -787,7 +838,13 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len) void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p) { drm_printf(p, "%s firmware: %s\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path); + intel_uc_fw_type_repr(uc_fw->type), uc_fw->wanted_path); + if (uc_fw->fallback.path) { + drm_printf(p, "%s firmware fallback: %s\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->fallback.path); + drm_printf(p, "fallback selected: %s\n", + str_yes_no(uc_fw->path == uc_fw->fallback.path)); + } drm_printf(p, "\tstatus: %s\n", intel_uc_fw_status_repr(uc_fw->status)); drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n", diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 3229018877d3..562acdf88adb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -74,6 +74,7 @@ struct intel_uc_fw { const enum intel_uc_fw_status status; enum intel_uc_fw_status __status; /* no accidental overwrites */ }; + const char *wanted_path; const char *path; bool user_overridden; size_t size; @@ -98,6 +99,12 @@ struct intel_uc_fw { u16 major_ver_found; u16 minor_ver_found; + struct { + const char *path; + u16 major_ver; + u16 minor_ver; + } fallback; + u32 rsa_size; u32 ucode_size; diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index b9eb75a2b400..1c35a41620ae 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -3117,9 +3117,9 @@ void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu) continue; vaddr = shmem_pin_map(engine->default_state); - if (IS_ERR(vaddr)) { - gvt_err("failed to map %s->default state, err:%zd\n", - engine->name, PTR_ERR(vaddr)); + if (!vaddr) { + gvt_err("failed to map %s->default state\n", + engine->name); return; } diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 90b0ce5051af..1041b5340465 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -530,6 +530,7 @@ mask_err: static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); + struct pci_dev *root_pdev; int ret; if (i915_inject_probe_failure(dev_priv)) @@ -641,6 +642,15 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) intel_bw_init_hw(dev_priv); + /* + * FIXME: Temporary hammer to avoid freezing the machine on our DGFX + * This should be totally removed when we handle the pci states properly + * on runtime PM and on s2idle cases. + */ + root_pdev = pcie_find_root_port(pdev); + if (root_pdev) + pci_d3cold_disable(root_pdev); + return 0; err_msi: @@ -664,11 +674,16 @@ err_perf: static void i915_driver_hw_remove(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); + struct pci_dev *root_pdev; i915_perf_fini(dev_priv); if (pdev->msi_enabled) pci_disable_msi(pdev); + + root_pdev = pcie_find_root_port(pdev); + if (root_pdev) + pci_d3cold_enable(root_pdev); } /** @@ -1193,14 +1208,6 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) goto out; } - /* - * FIXME: Temporary hammer to avoid freezing the machine on our DGFX - * This should be totally removed when we handle the pci states properly - * on runtime PM and on s2idle cases. - */ - if (suspend_to_idle(dev_priv)) - pci_d3cold_disable(pdev); - pci_disable_device(pdev); /* * During hibernation on some platforms the BIOS may try to access @@ -1365,8 +1372,6 @@ static int i915_drm_resume_early(struct drm_device *dev) pci_set_master(pdev); - pci_d3cold_enable(pdev); - disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); ret = vlv_resume_prepare(dev_priv, false); @@ -1543,7 +1548,6 @@ static int intel_runtime_suspend(struct device *kdev) { struct drm_i915_private *dev_priv = kdev_to_i915(kdev); struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; - struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); int ret; if (drm_WARN_ON_ONCE(&dev_priv->drm, !HAS_RUNTIME_PM(dev_priv))) @@ -1589,12 +1593,6 @@ static int intel_runtime_suspend(struct device *kdev) drm_err(&dev_priv->drm, "Unclaimed access detected prior to suspending\n"); - /* - * FIXME: Temporary hammer to avoid freezing the machine on our DGFX - * This should be totally removed when we handle the pci states properly - * on runtime PM and on s2idle cases. - */ - pci_d3cold_disable(pdev); rpm->suspended = true; /* @@ -1633,7 +1631,6 @@ static int intel_runtime_resume(struct device *kdev) { struct drm_i915_private *dev_priv = kdev_to_i915(kdev); struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; - struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); int ret; if (drm_WARN_ON_ONCE(&dev_priv->drm, !HAS_RUNTIME_PM(dev_priv))) @@ -1646,7 +1643,6 @@ static int intel_runtime_resume(struct device *kdev) intel_opregion_notify_adapter(dev_priv, PCI_D0); rpm->suspended = false; - pci_d3cold_enable(pdev); if (intel_uncore_unclaimed_mmio(&dev_priv->uncore)) drm_dbg(&dev_priv->drm, "Unclaimed access during suspend, bios?\n"); diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c index 18d38cb59923..b09d1d386574 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.c +++ b/drivers/gpu/drm/i915/i915_drm_client.c @@ -116,8 +116,9 @@ show_client_class(struct seq_file *m, total += busy_add(ctx, class); rcu_read_unlock(); - seq_printf(m, "drm-engine-%s:\t%llu ns\n", - uabi_class_names[class], total); + if (capacity) + seq_printf(m, "drm-engine-%s:\t%llu ns\n", + uabi_class_names[class], total); if (capacity > 1) seq_printf(m, "drm-engine-capacity-%s:\t%u\n", diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c index 159571b9bd24..dcc081874ec8 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.c +++ b/drivers/gpu/drm/i915/i915_scatterlist.c @@ -68,6 +68,7 @@ void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size) * drm_mm_node * @node: The drm_mm_node. * @region_start: An offset to add to the dma addresses of the sg list. + * @page_alignment: Required page alignment for each sg entry. Power of two. * * Create a struct sg_table, initializing it from a struct drm_mm_node, * taking a maximum segment length into account, splitting into segments @@ -77,22 +78,25 @@ void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size) * error code cast to an error pointer on failure. */ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, - u64 region_start) + u64 region_start, + u32 page_alignment) { - const u64 max_segment = SZ_1G; /* Do we have a limit on this? */ - u64 segment_pages = max_segment >> PAGE_SHIFT; + const u32 max_segment = round_down(UINT_MAX, page_alignment); + const u32 segment_pages = max_segment >> PAGE_SHIFT; u64 block_size, offset, prev_end; struct i915_refct_sgt *rsgt; struct sg_table *st; struct scatterlist *sg; + GEM_BUG_ON(!max_segment); + rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL); if (!rsgt) return ERR_PTR(-ENOMEM); i915_refct_sgt_init(rsgt, node->size << PAGE_SHIFT); st = &rsgt->table; - if (sg_alloc_table(st, DIV_ROUND_UP(node->size, segment_pages), + if (sg_alloc_table(st, DIV_ROUND_UP_ULL(node->size, segment_pages), GFP_KERNEL)) { i915_refct_sgt_put(rsgt); return ERR_PTR(-ENOMEM); @@ -112,12 +116,14 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, sg = __sg_next(sg); sg_dma_address(sg) = region_start + offset; + GEM_BUG_ON(!IS_ALIGNED(sg_dma_address(sg), + page_alignment)); sg_dma_len(sg) = 0; sg->length = 0; st->nents++; } - len = min(block_size, max_segment - sg->length); + len = min_t(u64, block_size, max_segment - sg->length); sg->length += len; sg_dma_len(sg) += len; @@ -138,6 +144,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, * i915_buddy_block list * @res: The struct i915_ttm_buddy_resource. * @region_start: An offset to add to the dma addresses of the sg list. + * @page_alignment: Required page alignment for each sg entry. Power of two. * * Create a struct sg_table, initializing it from struct i915_buddy_block list, * taking a maximum segment length into account, splitting into segments @@ -147,11 +154,12 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, * error code cast to an error pointer on failure. */ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, - u64 region_start) + u64 region_start, + u32 page_alignment) { struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); const u64 size = res->num_pages << PAGE_SHIFT; - const u64 max_segment = rounddown(UINT_MAX, PAGE_SIZE); + const u32 max_segment = round_down(UINT_MAX, page_alignment); struct drm_buddy *mm = bman_res->mm; struct list_head *blocks = &bman_res->blocks; struct drm_buddy_block *block; @@ -161,6 +169,7 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, resource_size_t prev_end; GEM_BUG_ON(list_empty(blocks)); + GEM_BUG_ON(!max_segment); rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL); if (!rsgt) @@ -191,12 +200,14 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, sg = __sg_next(sg); sg_dma_address(sg) = region_start + offset; + GEM_BUG_ON(!IS_ALIGNED(sg_dma_address(sg), + page_alignment)); sg_dma_len(sg) = 0; sg->length = 0; st->nents++; } - len = min(block_size, max_segment - sg->length); + len = min_t(u64, block_size, max_segment - sg->length); sg->length += len; sg_dma_len(sg) += len; diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index 12c6a1684081..9ddb3e743a3e 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -213,9 +213,11 @@ static inline void __i915_refct_sgt_init(struct i915_refct_sgt *rsgt, void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size); struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, - u64 region_start); + u64 region_start, + u32 page_alignment); struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, - u64 region_start); + u64 region_start, + u32 page_alignment); #endif diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 8521daba212a..1e2750210831 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -166,7 +166,14 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj, struct device *kdev = kobj_to_dev(kobj); struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); struct i915_gpu_coredump *gpu; - ssize_t ret; + ssize_t ret = 0; + + /* + * FIXME: Concurrent clients triggering resets and reading + clearing + * dumps can cause inconsistent sysfs reads when a user calls in with a + * non-zero offset to complete a prior partial read but the + * gpu_coredump has been cleared or replaced. + */ gpu = i915_first_error_state(i915); if (IS_ERR(gpu)) { @@ -178,8 +185,10 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj, const char *str = "No error state collected\n"; size_t len = strlen(str); - ret = min_t(size_t, count, len - off); - memcpy(buf, str + off, ret); + if (off < len) { + ret = min_t(size_t, count, len - off); + memcpy(buf, str + off, ret); + } } return ret; @@ -259,4 +268,6 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv) device_remove_bin_file(kdev, &dpf_attrs_1); device_remove_bin_file(kdev, &dpf_attrs); + + kobject_put(dev_priv->sysfs_gt); } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 4f6db539571a..04d12f278f57 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -23,6 +23,7 @@ */ #include <linux/sched/mm.h> +#include <linux/dma-fence-array.h> #include <drm/drm_gem.h> #include "display/intel_frontbuffer.h" @@ -1636,10 +1637,10 @@ static void force_unbind(struct i915_vma *vma) GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); } -static void release_references(struct i915_vma *vma, bool vm_ddestroy) +static void release_references(struct i915_vma *vma, struct intel_gt *gt, + bool vm_ddestroy) { struct drm_i915_gem_object *obj = vma->obj; - struct intel_gt *gt = vma->vm->gt; GEM_BUG_ON(i915_vma_is_active(vma)); @@ -1694,11 +1695,12 @@ void i915_vma_destroy_locked(struct i915_vma *vma) force_unbind(vma); list_del_init(&vma->vm_link); - release_references(vma, false); + release_references(vma, vma->vm->gt, false); } void i915_vma_destroy(struct i915_vma *vma) { + struct intel_gt *gt; bool vm_ddestroy; mutex_lock(&vma->vm->mutex); @@ -1706,8 +1708,11 @@ void i915_vma_destroy(struct i915_vma *vma) list_del_init(&vma->vm_link); vm_ddestroy = vma->vm_ddestroy; vma->vm_ddestroy = false; + + /* vma->vm may be freed when releasing vma->vm->mutex. */ + gt = vma->vm->gt; mutex_unlock(&vma->vm->mutex); - release_references(vma, vm_ddestroy); + release_references(vma, gt, vm_ddestroy); } void i915_vma_parked(struct intel_gt *gt) @@ -1823,6 +1828,21 @@ int _i915_vma_move_to_active(struct i915_vma *vma, if (unlikely(err)) return err; + /* + * Reserve fences slot early to prevent an allocation after preparing + * the workload and associating fences with dma_resv. + */ + if (fence && !(flags & __EXEC_OBJECT_NO_RESERVE)) { + struct dma_fence *curr; + int idx; + + dma_fence_array_for_each(curr, idx, fence) + ; + err = dma_resv_reserve_fences(vma->obj->base.resv, idx); + if (unlikely(err)) + return err; + } + if (flags & EXEC_OBJECT_WRITE) { struct intel_frontbuffer *front; @@ -1832,31 +1852,23 @@ int _i915_vma_move_to_active(struct i915_vma *vma, i915_active_add_request(&front->write, rq); intel_frontbuffer_put(front); } + } - if (!(flags & __EXEC_OBJECT_NO_RESERVE)) { - err = dma_resv_reserve_fences(vma->obj->base.resv, 1); - if (unlikely(err)) - return err; - } + if (fence) { + struct dma_fence *curr; + enum dma_resv_usage usage; + int idx; - if (fence) { - dma_resv_add_fence(vma->obj->base.resv, fence, - DMA_RESV_USAGE_WRITE); + obj->read_domains = 0; + if (flags & EXEC_OBJECT_WRITE) { + usage = DMA_RESV_USAGE_WRITE; obj->write_domain = I915_GEM_DOMAIN_RENDER; - obj->read_domains = 0; - } - } else { - if (!(flags & __EXEC_OBJECT_NO_RESERVE)) { - err = dma_resv_reserve_fences(vma->obj->base.resv, 1); - if (unlikely(err)) - return err; + } else { + usage = DMA_RESV_USAGE_READ; } - if (fence) { - dma_resv_add_fence(vma->obj->base.resv, fence, - DMA_RESV_USAGE_READ); - obj->write_domain = 0; - } + dma_fence_array_for_each(curr, idx, fence) + dma_resv_add_fence(vma->obj->base.resv, curr, usage); } if (flags & EXEC_OBJECT_NEEDS_FENCE && vma->fence) diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 62ff77445b01..575d67bc6ffe 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -152,6 +152,7 @@ int intel_region_ttm_fini(struct intel_memory_region *mem) * Convert an opaque TTM resource manager resource to a refcounted sg_table. * @mem: The memory region. * @res: The resource manager resource obtained from the TTM resource manager. + * @page_alignment: Required page alignment for each sg entry. Power of two. * * The gem backends typically use sg-tables for operations on the underlying * io_memory. So provide a way for the backends to translate the @@ -161,16 +162,19 @@ int intel_region_ttm_fini(struct intel_memory_region *mem) */ struct i915_refct_sgt * intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, - struct ttm_resource *res) + struct ttm_resource *res, + u32 page_alignment) { if (mem->is_range_manager) { struct ttm_range_mgr_node *range_node = to_ttm_range_mgr_node(res); return i915_rsgt_from_mm_node(&range_node->mm_nodes[0], - mem->region.start); + mem->region.start, + page_alignment); } else { - return i915_rsgt_from_buddy_resource(res, mem->region.start); + return i915_rsgt_from_buddy_resource(res, mem->region.start, + page_alignment); } } diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h index cf9d86dcf409..5bb8d8b582ae 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.h +++ b/drivers/gpu/drm/i915/intel_region_ttm.h @@ -24,7 +24,8 @@ int intel_region_ttm_fini(struct intel_memory_region *mem); struct i915_refct_sgt * intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, - struct ttm_resource *res); + struct ttm_resource *res, + u32 page_alignment); void intel_region_ttm_resource_free(struct intel_memory_region *mem, struct ttm_resource *res); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 8633bec18fa7..ab9f17fc85bc 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -742,7 +742,7 @@ static int pot_hole(struct i915_address_space *vm, u64 addr; for (addr = round_up(hole_start + min_alignment, step) - min_alignment; - addr <= round_down(hole_end - (2 * min_alignment), step) - min_alignment; + hole_end > addr && hole_end - addr >= 2 * min_alignment; addr += step) { err = i915_vma_pin(vma, 0, 0, addr | flags); if (err) { diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 73eb53edb8de..3b18e5905c86 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -451,7 +451,6 @@ out_put: static int igt_mock_max_segment(void *arg) { - const unsigned int max_segment = rounddown(UINT_MAX, PAGE_SIZE); struct intel_memory_region *mem = arg; struct drm_i915_private *i915 = mem->i915; struct i915_ttm_buddy_resource *res; @@ -460,7 +459,10 @@ static int igt_mock_max_segment(void *arg) struct drm_buddy *mm; struct list_head *blocks; struct scatterlist *sg; + I915_RND_STATE(prng); LIST_HEAD(objects); + unsigned int max_segment; + unsigned int ps; u64 size; int err = 0; @@ -472,7 +474,13 @@ static int igt_mock_max_segment(void *arg) */ size = SZ_8G; - mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0); + ps = PAGE_SIZE; + if (i915_prandom_u64_state(&prng) & 1) + ps = SZ_64K; /* For something like DG2 */ + + max_segment = round_down(UINT_MAX, ps); + + mem = mock_region_create(i915, 0, size, ps, 0, 0); if (IS_ERR(mem)) return PTR_ERR(mem); @@ -498,12 +506,21 @@ static int igt_mock_max_segment(void *arg) } for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) { + dma_addr_t daddr = sg_dma_address(sg); + if (sg->length > max_segment) { pr_err("%s: Created an oversized scatterlist entry, %u > %u\n", __func__, sg->length, max_segment); err = -EINVAL; goto out_close; } + + if (!IS_ALIGNED(daddr, ps)) { + pr_err("%s: Created an unaligned scatterlist entry, addr=%pa, ps=%u\n", + __func__, &daddr, ps); + err = -EINVAL; + goto out_close; + } } out_close: diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index 670557ce1024..bac21fe84ca5 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -33,7 +33,8 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj) return PTR_ERR(obj->mm.res); obj->mm.rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region, - obj->mm.res); + obj->mm.res, + obj->mm.region->min_page_size); if (IS_ERR(obj->mm.rsgt)) { err = PTR_ERR(obj->mm.rsgt); goto err_free_resource; diff --git a/drivers/gpu/drm/imx/dcss/dcss-dev.c b/drivers/gpu/drm/imx/dcss/dcss-dev.c index c849533ca83e..3f5750cc2673 100644 --- a/drivers/gpu/drm/imx/dcss/dcss-dev.c +++ b/drivers/gpu/drm/imx/dcss/dcss-dev.c @@ -207,6 +207,7 @@ struct dcss_dev *dcss_dev_create(struct device *dev, bool hdmi_output) ret = dcss_submodules_init(dcss); if (ret) { + of_node_put(dcss->of_port); dev_err(dev, "submodules initialization failed\n"); goto clks_err; } @@ -237,6 +238,8 @@ void dcss_dev_destroy(struct dcss_dev *dcss) dcss_clocks_disable(dcss); } + of_node_put(dcss->of_port); + pm_runtime_disable(dcss->dev); dcss_submodules_stop(dcss); diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c index 9c8829f945b2..f7863d6dea80 100644 --- a/drivers/gpu/drm/imx/ipuv3-crtc.c +++ b/drivers/gpu/drm/imx/ipuv3-crtc.c @@ -69,7 +69,7 @@ static void ipu_crtc_disable_planes(struct ipu_crtc *ipu_crtc, drm_atomic_crtc_state_for_each_plane(plane, old_crtc_state) { if (plane == &ipu_crtc->plane[0]->base) disable_full = true; - if (&ipu_crtc->plane[1] && plane == &ipu_crtc->plane[1]->base) + if (ipu_crtc->plane[1] && plane == &ipu_crtc->plane[1]->base) disable_partial = true; } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 4e665c806a14..efe9840e28fa 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -498,10 +498,15 @@ int adreno_hw_init(struct msm_gpu *gpu) ring->cur = ring->start; ring->next = ring->start; - - /* reset completed fence seqno: */ - ring->memptrs->fence = ring->fctx->completed_fence; ring->memptrs->rptr = 0; + + /* Detect and clean up an impossible fence, ie. if GPU managed + * to scribble something invalid, we don't want that to confuse + * us into mistakingly believing that submits have completed. + */ + if (fence_before(ring->fctx->last_fence, ring->memptrs->fence)) { + ring->memptrs->fence = ring->fctx->last_fence; + } } return 0; @@ -1057,7 +1062,8 @@ void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu) for (i = 0; i < ARRAY_SIZE(adreno_gpu->info->fw); i++) release_firmware(adreno_gpu->fw[i]); - pm_runtime_disable(&priv->gpu_pdev->dev); + if (pm_runtime_enabled(&priv->gpu_pdev->dev)) + pm_runtime_disable(&priv->gpu_pdev->dev); msm_gpu_cleanup(&adreno_gpu->base); } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 3a462e327e0e..a1b8c4592943 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -1251,12 +1251,13 @@ static void dpu_encoder_vblank_callback(struct drm_encoder *drm_enc, DPU_ATRACE_BEGIN("encoder_vblank_callback"); dpu_enc = to_dpu_encoder_virt(drm_enc); + atomic_inc(&phy_enc->vsync_cnt); + spin_lock_irqsave(&dpu_enc->enc_spinlock, lock_flags); if (dpu_enc->crtc) dpu_crtc_vblank_callback(dpu_enc->crtc); spin_unlock_irqrestore(&dpu_enc->enc_spinlock, lock_flags); - atomic_inc(&phy_enc->vsync_cnt); DPU_ATRACE_END("encoder_vblank_callback"); } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c index 59da348ff339..0ec809ab06e7 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c @@ -252,11 +252,6 @@ static int dpu_encoder_phys_wb_atomic_check( DPU_DEBUG("[atomic_check:%d, \"%s\",%d,%d]\n", phys_enc->wb_idx, mode->name, mode->hdisplay, mode->vdisplay); - if (!conn_state->writeback_job || !conn_state->writeback_job->fb) - return 0; - - fb = conn_state->writeback_job->fb; - if (!conn_state || !conn_state->connector) { DPU_ERROR("invalid connector state\n"); return -EINVAL; @@ -267,6 +262,11 @@ static int dpu_encoder_phys_wb_atomic_check( return -EINVAL; } + if (!conn_state->writeback_job || !conn_state->writeback_job->fb) + return 0; + + fb = conn_state->writeback_job->fb; + DPU_DEBUG("[fb_id:%u][fb:%u,%u]\n", fb->base.id, fb->width, fb->height); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c index 399115e4e217..2fd787079f9b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c @@ -11,7 +11,14 @@ static int dpu_wb_conn_get_modes(struct drm_connector *connector) struct msm_drm_private *priv = dev->dev_private; struct dpu_kms *dpu_kms = to_dpu_kms(priv->kms); - return drm_add_modes_noedid(connector, dpu_kms->catalog->caps->max_linewidth, + /* + * We should ideally be limiting the modes only to the maxlinewidth but + * on some chipsets this will allow even 4k modes to be added which will + * fail the per SSPP bandwidth checks. So, till we have dual-SSPP support + * and source split support added lets limit the modes based on max_mixer_width + * as 4K modes can then be supported. + */ + return drm_add_modes_noedid(connector, dpu_kms->catalog->caps->max_mixer_width, dev->mode_config.max_height); } diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c index fb48c8c19ec3..17cb1fc78379 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c @@ -216,6 +216,7 @@ static int mdp4_modeset_init_intf(struct mdp4_kms *mdp4_kms, encoder = mdp4_lcdc_encoder_init(dev, panel_node); if (IS_ERR(encoder)) { DRM_DEV_ERROR(dev->dev, "failed to construct LCDC encoder\n"); + of_node_put(panel_node); return PTR_ERR(encoder); } @@ -225,6 +226,7 @@ static int mdp4_modeset_init_intf(struct mdp4_kms *mdp4_kms, connector = mdp4_lvds_connector_init(dev, panel_node, encoder); if (IS_ERR(connector)) { DRM_DEV_ERROR(dev->dev, "failed to initialize LVDS connector\n"); + of_node_put(panel_node); return PTR_ERR(connector); } diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index b7f5b8d3bbd6..703249384e7c 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1534,6 +1534,8 @@ end: return ret; } +static int dp_ctrl_on_stream_phy_test_report(struct dp_ctrl *dp_ctrl); + static int dp_ctrl_process_phy_test_request(struct dp_ctrl_private *ctrl) { int ret = 0; @@ -1557,7 +1559,7 @@ static int dp_ctrl_process_phy_test_request(struct dp_ctrl_private *ctrl) ret = dp_ctrl_on_link(&ctrl->dp_ctrl); if (!ret) - ret = dp_ctrl_on_stream(&ctrl->dp_ctrl); + ret = dp_ctrl_on_stream_phy_test_report(&ctrl->dp_ctrl); else DRM_ERROR("failed to enable DP link controller\n"); @@ -1813,7 +1815,27 @@ static int dp_ctrl_link_retrain(struct dp_ctrl_private *ctrl) return dp_ctrl_setup_main_link(ctrl, &training_step); } -int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl) +static int dp_ctrl_on_stream_phy_test_report(struct dp_ctrl *dp_ctrl) +{ + int ret; + struct dp_ctrl_private *ctrl; + + ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); + + ctrl->dp_ctrl.pixel_rate = ctrl->panel->dp_mode.drm_mode.clock; + + ret = dp_ctrl_enable_stream_clocks(ctrl); + if (ret) { + DRM_ERROR("Failed to start pixel clocks. ret=%d\n", ret); + return ret; + } + + dp_ctrl_send_phy_test_pattern(ctrl); + + return 0; +} + +int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl, bool force_link_train) { int ret = 0; bool mainlink_ready = false; @@ -1849,12 +1871,7 @@ int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl) goto end; } - if (ctrl->link->sink_request & DP_TEST_LINK_PHY_TEST_PATTERN) { - dp_ctrl_send_phy_test_pattern(ctrl); - return 0; - } - - if (!dp_ctrl_channel_eq_ok(ctrl)) + if (force_link_train || !dp_ctrl_channel_eq_ok(ctrl)) dp_ctrl_link_retrain(ctrl); /* stop txing train pattern to end link training */ diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.h b/drivers/gpu/drm/msm/dp/dp_ctrl.h index 0745fde01b45..b563e2e3bfe5 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.h +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.h @@ -21,7 +21,7 @@ struct dp_ctrl { }; int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl); -int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl); +int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl, bool force_link_train); int dp_ctrl_off_link_stream(struct dp_ctrl *dp_ctrl); int dp_ctrl_off_link(struct dp_ctrl *dp_ctrl); int dp_ctrl_off(struct dp_ctrl *dp_ctrl); diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index bce77935394f..239c8e3f2fbd 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -309,12 +309,15 @@ static void dp_display_unbind(struct device *dev, struct device *master, struct msm_drm_private *priv = dev_get_drvdata(master); /* disable all HPD interrupts */ - dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_INT_MASK, false); + if (dp->core_initialized) + dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_INT_MASK, false); kthread_stop(dp->ev_tsk); dp_power_client_deinit(dp->power); dp_aux_unregister(dp->aux); + dp->drm_dev = NULL; + dp->aux->drm_dev = NULL; priv->dp[dp->id] = NULL; } @@ -872,7 +875,7 @@ static int dp_display_enable(struct dp_display_private *dp, u32 data) return 0; } - rc = dp_ctrl_on_stream(dp->ctrl); + rc = dp_ctrl_on_stream(dp->ctrl, data); if (!rc) dp_display->power_on = true; @@ -1659,6 +1662,7 @@ void dp_bridge_enable(struct drm_bridge *drm_bridge) int rc = 0; struct dp_display_private *dp_display; u32 state; + bool force_link_train = false; dp_display = container_of(dp, struct dp_display_private, dp_display); if (!dp_display->dp_mode.drm_mode.clock) { @@ -1693,10 +1697,12 @@ void dp_bridge_enable(struct drm_bridge *drm_bridge) state = dp_display->hpd_state; - if (state == ST_DISPLAY_OFF) + if (state == ST_DISPLAY_OFF) { dp_display_host_phy_init(dp_display); + force_link_train = true; + } - dp_display_enable(dp_display, 0); + dp_display_enable(dp_display, force_link_train); rc = dp_display_post_enable(dp); if (rc) { @@ -1705,10 +1711,6 @@ void dp_bridge_enable(struct drm_bridge *drm_bridge) dp_display_unprepare(dp); } - /* manual kick off plug event to train link */ - if (state == ST_DISPLAY_OFF) - dp_add_event(dp_display, EV_IRQ_HPD_INT, 0, 0); - /* completed connection */ dp_display->hpd_state = ST_CONNECTED; diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 44485363f37a..14ab9a627d8b 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -964,7 +964,7 @@ static const struct drm_driver msm_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import_sg_table = msm_gem_prime_import_sg_table, - .gem_prime_mmap = drm_gem_prime_mmap, + .gem_prime_mmap = msm_gem_prime_mmap, #ifdef CONFIG_DEBUG_FS .debugfs_init = msm_debugfs_init, #endif diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 08388d742d65..099a67d10c3a 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -246,6 +246,7 @@ unsigned long msm_gem_shrinker_shrink(struct drm_device *dev, unsigned long nr_t void msm_gem_shrinker_init(struct drm_device *dev); void msm_gem_shrinker_cleanup(struct drm_device *dev); +int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj); int msm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map); void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct iosys_map *map); diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c index 3df255402a33..38e3323bc232 100644 --- a/drivers/gpu/drm/msm/msm_fence.c +++ b/drivers/gpu/drm/msm/msm_fence.c @@ -46,12 +46,14 @@ bool msm_fence_completed(struct msm_fence_context *fctx, uint32_t fence) (int32_t)(*fctx->fenceptr - fence) >= 0; } -/* called from workqueue */ +/* called from irq handler and workqueue (in recover path) */ void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence) { - spin_lock(&fctx->spinlock); + unsigned long flags; + + spin_lock_irqsave(&fctx->spinlock, flags); fctx->completed_fence = max(fence, fctx->completed_fence); - spin_unlock(&fctx->spinlock); + spin_unlock_irqrestore(&fctx->spinlock, flags); } struct msm_fence { diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 97d5b4d8b9b0..7f92231785a0 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -439,14 +439,12 @@ int msm_gem_pin_vma_locked(struct drm_gem_object *obj, struct msm_gem_vma *vma) return ret; } -void msm_gem_unpin_vma_locked(struct drm_gem_object *obj, struct msm_gem_vma *vma) +void msm_gem_unpin_locked(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); GEM_WARN_ON(!msm_gem_is_locked(obj)); - msm_gem_unpin_vma(vma); - msm_obj->pin_count--; GEM_WARN_ON(msm_obj->pin_count < 0); @@ -586,7 +584,8 @@ void msm_gem_unpin_iova(struct drm_gem_object *obj, msm_gem_lock(obj); vma = lookup_vma(obj, aspace); if (!GEM_WARN_ON(!vma)) { - msm_gem_unpin_vma_locked(obj, vma); + msm_gem_unpin_vma(vma); + msm_gem_unpin_locked(obj); } msm_gem_unlock(obj); } diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index c75d3b879a53..6b7d5bb3b575 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -145,7 +145,7 @@ struct msm_gem_object { uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj); int msm_gem_pin_vma_locked(struct drm_gem_object *obj, struct msm_gem_vma *vma); -void msm_gem_unpin_vma_locked(struct drm_gem_object *obj, struct msm_gem_vma *vma); +void msm_gem_unpin_locked(struct drm_gem_object *obj); struct msm_gem_vma *msm_gem_get_vma_locked(struct drm_gem_object *obj, struct msm_gem_address_space *aspace); int msm_gem_get_iova(struct drm_gem_object *obj, @@ -377,10 +377,11 @@ struct msm_gem_submit { } *cmd; /* array of size nr_cmds */ struct { /* make sure these don't conflict w/ MSM_SUBMIT_BO_x */ -#define BO_VALID 0x8000 /* is current addr in cmdstream correct/valid? */ -#define BO_LOCKED 0x4000 /* obj lock is held */ -#define BO_ACTIVE 0x2000 /* active refcnt is held */ -#define BO_PINNED 0x1000 /* obj is pinned and on active list */ +#define BO_VALID 0x8000 /* is current addr in cmdstream correct/valid? */ +#define BO_LOCKED 0x4000 /* obj lock is held */ +#define BO_ACTIVE 0x2000 /* active refcnt is held */ +#define BO_OBJ_PINNED 0x1000 /* obj (pages) is pinned and on active list */ +#define BO_VMA_PINNED 0x0800 /* vma (virtual address) is pinned */ uint32_t flags; union { struct msm_gem_object *obj; diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c index 94ab705e9b8a..dcc8a573bc76 100644 --- a/drivers/gpu/drm/msm/msm_gem_prime.c +++ b/drivers/gpu/drm/msm/msm_gem_prime.c @@ -11,6 +11,21 @@ #include "msm_drv.h" #include "msm_gem.h" +int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + int ret; + + /* Ensure the mmap offset is initialized. We lazily initialize it, + * so if it has not been first mmap'd directly as a GEM object, the + * mmap offset will not be already initialized. + */ + ret = drm_gem_create_mmap_offset(obj); + if (ret) + return ret; + + return drm_gem_prime_mmap(obj, vma); +} + struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 80975229b4de..c9e4aeb14f4a 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -232,8 +232,11 @@ static void submit_cleanup_bo(struct msm_gem_submit *submit, int i, */ submit->bos[i].flags &= ~cleanup_flags; - if (flags & BO_PINNED) - msm_gem_unpin_vma_locked(obj, submit->bos[i].vma); + if (flags & BO_VMA_PINNED) + msm_gem_unpin_vma(submit->bos[i].vma); + + if (flags & BO_OBJ_PINNED) + msm_gem_unpin_locked(obj); if (flags & BO_ACTIVE) msm_gem_active_put(obj); @@ -244,7 +247,9 @@ static void submit_cleanup_bo(struct msm_gem_submit *submit, int i, static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i) { - submit_cleanup_bo(submit, i, BO_PINNED | BO_ACTIVE | BO_LOCKED); + unsigned cleanup_flags = BO_VMA_PINNED | BO_OBJ_PINNED | + BO_ACTIVE | BO_LOCKED; + submit_cleanup_bo(submit, i, cleanup_flags); if (!(submit->bos[i].flags & BO_VALID)) submit->bos[i].iova = 0; @@ -375,7 +380,7 @@ static int submit_pin_objects(struct msm_gem_submit *submit) if (ret) break; - submit->bos[i].flags |= BO_PINNED; + submit->bos[i].flags |= BO_OBJ_PINNED | BO_VMA_PINNED; submit->bos[i].vma = vma; if (vma->iova == submit->bos[i].iova) { @@ -511,7 +516,7 @@ static void submit_cleanup(struct msm_gem_submit *submit, bool error) unsigned i; if (error) - cleanup_flags |= BO_PINNED | BO_ACTIVE; + cleanup_flags |= BO_VMA_PINNED | BO_OBJ_PINNED | BO_ACTIVE; for (i = 0; i < submit->nr_bos; i++) { struct msm_gem_object *msm_obj = submit->bos[i].obj; @@ -529,7 +534,8 @@ void msm_submit_retire(struct msm_gem_submit *submit) struct drm_gem_object *obj = &submit->bos[i].obj->base; msm_gem_lock(obj); - submit_cleanup_bo(submit, i, BO_PINNED | BO_ACTIVE); + /* Note, VMA already fence-unpinned before submit: */ + submit_cleanup_bo(submit, i, BO_OBJ_PINNED | BO_ACTIVE); msm_gem_unlock(obj); drm_gem_object_put(obj); } @@ -922,7 +928,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, INT_MAX, GFP_KERNEL); } if (submit->fence_id < 0) { - ret = submit->fence_id = 0; + ret = submit->fence_id; submit->fence_id = 0; } diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index 3c1dc9241831..c471aebcdbab 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -62,8 +62,7 @@ void msm_gem_purge_vma(struct msm_gem_address_space *aspace, unsigned size = vma->node.size; /* Print a message if we try to purge a vma in use */ - if (GEM_WARN_ON(msm_gem_vma_inuse(vma))) - return; + GEM_WARN_ON(msm_gem_vma_inuse(vma)); /* Don't do anything if the memory isn't mapped */ if (!vma->mapped) @@ -128,8 +127,7 @@ msm_gem_map_vma(struct msm_gem_address_space *aspace, void msm_gem_close_vma(struct msm_gem_address_space *aspace, struct msm_gem_vma *vma) { - if (GEM_WARN_ON(msm_gem_vma_inuse(vma) || vma->mapped)) - return; + GEM_WARN_ON(msm_gem_vma_inuse(vma) || vma->mapped); spin_lock(&aspace->lock); if (vma->iova) diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index eb8a6663f309..c8cd9bfa3eeb 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -164,24 +164,6 @@ int msm_gpu_hw_init(struct msm_gpu *gpu) return ret; } -static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, - uint32_t fence) -{ - struct msm_gem_submit *submit; - unsigned long flags; - - spin_lock_irqsave(&ring->submit_lock, flags); - list_for_each_entry(submit, &ring->submits, node) { - if (fence_after(submit->seqno, fence)) - break; - - msm_update_fence(submit->ring->fctx, - submit->hw_fence->seqno); - dma_fence_signal(submit->hw_fence); - } - spin_unlock_irqrestore(&ring->submit_lock, flags); -} - #ifdef CONFIG_DEV_COREDUMP static ssize_t msm_gpu_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) @@ -436,9 +418,9 @@ static void recover_worker(struct kthread_work *work) * one more to clear the faulting submit */ if (ring == cur_ring) - fence++; + ring->memptrs->fence = ++fence; - update_fences(gpu, ring, fence); + msm_update_fence(ring->fctx, fence); } if (msm_gpu_active(gpu)) { @@ -672,7 +654,6 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, msm_submit_retire(submit); pm_runtime_mark_last_busy(&gpu->pdev->dev); - pm_runtime_put_autosuspend(&gpu->pdev->dev); spin_lock_irqsave(&ring->submit_lock, flags); list_del(&submit->node); @@ -686,6 +667,8 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, msm_devfreq_idle(gpu); mutex_unlock(&gpu->active_lock); + pm_runtime_put_autosuspend(&gpu->pdev->dev); + msm_gem_submit_put(submit); } @@ -735,7 +718,7 @@ void msm_gpu_retire(struct msm_gpu *gpu) int i; for (i = 0; i < gpu->nr_rings; i++) - update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence); + msm_update_fence(gpu->rb[i]->fctx, gpu->rb[i]->memptrs->fence); kthread_queue_work(gpu->worker, &gpu->retire_work); update_sw_cntrs(gpu); diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index bcaddbba564d..a54ed354578b 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -58,7 +58,7 @@ static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova, u64 addr = iova; unsigned int i; - for_each_sg(sgt->sgl, sg, sgt->nents, i) { + for_each_sgtable_sg(sgt, sg, i) { size_t size = sg->length; phys_addr_t phys = sg_phys(sg); diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 43066320ff8c..56eecb4a72dc 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -25,7 +25,7 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) msm_gem_lock(obj); msm_gem_unpin_vma_fenced(submit->bos[i].vma, fctx); - submit->bos[i].flags &= ~BO_PINNED; + submit->bos[i].flags &= ~BO_VMA_PINNED; msm_gem_unlock(obj); } diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 7ba66ad68a8a..16356611b5b9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -680,7 +680,11 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm, goto out_free_dma; for (i = 0; i < npages; i += max) { - args.end = start + (max << PAGE_SHIFT); + if (args.start + (max << PAGE_SHIFT) > end) + args.end = end; + else + args.end = args.start + (max << PAGE_SHIFT); + ret = migrate_vma_setup(&args); if (ret) goto out_free_pfns; diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index c96014464355..a189982601a4 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -713,7 +713,7 @@ static int generic_edp_panel_probe(struct device *dev, struct panel_edp *panel) of_property_read_u32(dev->of_node, "hpd-reliable-delay-ms", &reliable_ms); desc->delay.hpd_reliable = reliable_ms; of_property_read_u32(dev->of_node, "hpd-absent-delay-ms", &absent_ms); - desc->delay.hpd_reliable = absent_ms; + desc->delay.hpd_absent = absent_ms; /* Power the panel on so we can read the EDID */ ret = pm_runtime_get_sync(dev); diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 7fcbc2a5b6cd..b1e6d238674f 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -233,6 +233,7 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file) { struct panfrost_device *pfdev = dev->dev_private; + struct panfrost_file_priv *file_priv = file->driver_priv; struct drm_panfrost_submit *args = data; struct drm_syncobj *sync_out = NULL; struct panfrost_job *job; @@ -262,12 +263,12 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data, job->jc = args->jc; job->requirements = args->requirements; job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev); - job->file_priv = file->driver_priv; + job->mmu = file_priv->mmu; slot = panfrost_job_get_slot(job); ret = drm_sched_job_init(&job->base, - &job->file_priv->sched_entity[slot], + &file_priv->sched_entity[slot], NULL); if (ret) goto out_put_job; @@ -432,8 +433,8 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, void *data, if (args->retained) { if (args->madv == PANFROST_MADV_DONTNEED) - list_add_tail(&bo->base.madv_list, - &pfdev->shrinker_list); + list_move_tail(&bo->base.madv_list, + &pfdev->shrinker_list); else if (args->madv == PANFROST_MADV_WILLNEED) list_del_init(&bo->base.madv_list); } diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index fda5871aebe3..7c4208476fbd 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -201,7 +201,7 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js) return; } - cfg = panfrost_mmu_as_get(pfdev, job->file_priv->mmu); + cfg = panfrost_mmu_as_get(pfdev, job->mmu); job_write(pfdev, JS_HEAD_NEXT_LO(js), lower_32_bits(jc_head)); job_write(pfdev, JS_HEAD_NEXT_HI(js), upper_32_bits(jc_head)); @@ -435,7 +435,7 @@ static void panfrost_job_handle_err(struct panfrost_device *pfdev, job->jc = 0; } - panfrost_mmu_as_put(pfdev, job->file_priv->mmu); + panfrost_mmu_as_put(pfdev, job->mmu); panfrost_devfreq_record_idle(&pfdev->pfdevfreq); if (signal_fence) @@ -456,7 +456,7 @@ static void panfrost_job_handle_done(struct panfrost_device *pfdev, * happen when we receive the DONE interrupt while doing a GPU reset). */ job->jc = 0; - panfrost_mmu_as_put(pfdev, job->file_priv->mmu); + panfrost_mmu_as_put(pfdev, job->mmu); panfrost_devfreq_record_idle(&pfdev->pfdevfreq); dma_fence_signal_locked(job->done_fence); diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h index 77e6d0e6f612..8becc1ba0eb9 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.h +++ b/drivers/gpu/drm/panfrost/panfrost_job.h @@ -17,7 +17,7 @@ struct panfrost_job { struct kref refcount; struct panfrost_device *pfdev; - struct panfrost_file_priv *file_priv; + struct panfrost_mmu *mmu; /* Fence to be signaled by IRQ handler when the job is complete. */ struct dma_fence *done_fence; diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index d3f82b26a631..b285a8001b1d 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -518,7 +518,7 @@ err_map: err_pages: drm_gem_shmem_put_pages(&bo->base); err_bo: - drm_gem_object_put(&bo->base.base); + panfrost_gem_mapping_put(bomapping); return ret; } diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index 67d38f53d3e5..13ed33e74457 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -23,6 +23,14 @@ #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> +#if defined(CONFIG_ARM_DMA_USE_IOMMU) +#include <asm/dma-iommu.h> +#else +#define arm_iommu_detach_device(...) ({ }) +#define arm_iommu_release_mapping(...) ({ }) +#define to_dma_iommu_mapping(dev) NULL +#endif + #include "rockchip_drm_drv.h" #include "rockchip_drm_fb.h" #include "rockchip_drm_gem.h" @@ -49,6 +57,15 @@ int rockchip_drm_dma_attach_device(struct drm_device *drm_dev, if (!private->domain) return 0; + if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) { + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); + + if (mapping) { + arm_iommu_detach_device(dev); + arm_iommu_release_mapping(mapping); + } + } + ret = iommu_attach_device(private->domain, dev); if (ret) { DRM_DEV_ERROR(dev, "Failed to attach iommu device\n"); diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 191c56064f19..6b25b2f4f5a3 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -190,7 +190,7 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) } EXPORT_SYMBOL(drm_sched_entity_flush); -static void drm_sched_entity_kill_jobs_irq_work(struct irq_work *wrk) +static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk) { struct drm_sched_job *job = container_of(wrk, typeof(*job), work); @@ -207,8 +207,8 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, struct drm_sched_job *job = container_of(cb, struct drm_sched_job, finish_cb); - init_irq_work(&job->work, drm_sched_entity_kill_jobs_irq_work); - irq_work_queue(&job->work); + INIT_WORK(&job->work, drm_sched_entity_kill_jobs_work); + schedule_work(&job->work); } static struct dma_fence * diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c index 08394444dd6e..f4886e66ff34 100644 --- a/drivers/gpu/drm/solomon/ssd130x.c +++ b/drivers/gpu/drm/solomon/ssd130x.c @@ -350,7 +350,7 @@ static int ssd130x_init(struct ssd130x_device *ssd130x) /* Set precharge period in number of ticks from the internal clock */ precharge = (SSD130X_SET_PRECHARGE_PERIOD1_SET(ssd130x->prechargep1) | - SSD130X_SET_PRECHARGE_PERIOD1_SET(ssd130x->prechargep2)); + SSD130X_SET_PRECHARGE_PERIOD2_SET(ssd130x->prechargep2)); ret = ssd130x_write_cmd(ssd130x, 2, SSD130X_SET_PRECHARGE_PERIOD, precharge); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 275f7e4a03ae..6eb1aabdb161 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -7,6 +7,7 @@ */ #include <linux/component.h> +#include <linux/dma-mapping.h> #include <linux/kfifo.h> #include <linux/module.h> #include <linux/of_graph.h> @@ -73,7 +74,6 @@ static int sun4i_drv_bind(struct device *dev) goto free_drm; } - dev_set_drvdata(dev, drm); drm->dev_private = drv; INIT_LIST_HEAD(&drv->frontend_list); INIT_LIST_HEAD(&drv->engine_list); @@ -114,6 +114,8 @@ static int sun4i_drv_bind(struct device *dev) drm_fbdev_generic_setup(drm, 32); + dev_set_drvdata(dev, drm); + return 0; finish_poll: @@ -130,6 +132,7 @@ static void sun4i_drv_unbind(struct device *dev) { struct drm_device *drm = dev_get_drvdata(dev); + dev_set_drvdata(dev, NULL); drm_dev_unregister(drm); drm_kms_helper_poll_fini(drm); drm_atomic_helper_shutdown(drm); @@ -367,6 +370,13 @@ static int sun4i_drv_probe(struct platform_device *pdev) INIT_KFIFO(list.fifo); + /* + * DE2 and DE3 cores actually supports 40-bit addresses, but + * driver does not. + */ + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + dma_set_max_seg_size(&pdev->dev, UINT_MAX); + for (i = 0;; i++) { struct device_node *pipeline = of_parse_phandle(np, "allwinner,pipelines", diff --git a/drivers/gpu/drm/sun4i/sun4i_layer.c b/drivers/gpu/drm/sun4i/sun4i_layer.c index 6d43080791a0..85fb9e800ddf 100644 --- a/drivers/gpu/drm/sun4i/sun4i_layer.c +++ b/drivers/gpu/drm/sun4i/sun4i_layer.c @@ -117,7 +117,7 @@ static bool sun4i_layer_format_mod_supported(struct drm_plane *plane, struct sun4i_layer *layer = plane_to_sun4i_layer(plane); if (IS_ERR_OR_NULL(layer->backend->frontend)) - sun4i_backend_format_is_supported(format, modifier); + return sun4i_backend_format_is_supported(format, modifier); return sun4i_backend_format_is_supported(format, modifier) || sun4i_frontend_format_is_supported(format, modifier); diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c index a8d75fd7e9f4..477cb6985b4d 100644 --- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c +++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c @@ -93,34 +93,10 @@ crtcs_exit: return crtcs; } -static int sun8i_dw_hdmi_find_connector_pdev(struct device *dev, - struct platform_device **pdev_out) -{ - struct platform_device *pdev; - struct device_node *remote; - - remote = of_graph_get_remote_node(dev->of_node, 1, -1); - if (!remote) - return -ENODEV; - - if (!of_device_is_compatible(remote, "hdmi-connector")) { - of_node_put(remote); - return -ENODEV; - } - - pdev = of_find_device_by_node(remote); - of_node_put(remote); - if (!pdev) - return -ENODEV; - - *pdev_out = pdev; - return 0; -} - static int sun8i_dw_hdmi_bind(struct device *dev, struct device *master, void *data) { - struct platform_device *pdev = to_platform_device(dev), *connector_pdev; + struct platform_device *pdev = to_platform_device(dev); struct dw_hdmi_plat_data *plat_data; struct drm_device *drm = data; struct device_node *phy_node; @@ -167,30 +143,16 @@ static int sun8i_dw_hdmi_bind(struct device *dev, struct device *master, return dev_err_probe(dev, PTR_ERR(hdmi->regulator), "Couldn't get regulator\n"); - ret = sun8i_dw_hdmi_find_connector_pdev(dev, &connector_pdev); - if (!ret) { - hdmi->ddc_en = gpiod_get_optional(&connector_pdev->dev, - "ddc-en", GPIOD_OUT_HIGH); - platform_device_put(connector_pdev); - - if (IS_ERR(hdmi->ddc_en)) { - dev_err(dev, "Couldn't get ddc-en gpio\n"); - return PTR_ERR(hdmi->ddc_en); - } - } - ret = regulator_enable(hdmi->regulator); if (ret) { dev_err(dev, "Failed to enable regulator\n"); - goto err_unref_ddc_en; + return ret; } - gpiod_set_value(hdmi->ddc_en, 1); - ret = reset_control_deassert(hdmi->rst_ctrl); if (ret) { dev_err(dev, "Could not deassert ctrl reset control\n"); - goto err_disable_ddc_en; + goto err_disable_regulator; } ret = clk_prepare_enable(hdmi->clk_tmds); @@ -245,12 +207,8 @@ err_disable_clk_tmds: clk_disable_unprepare(hdmi->clk_tmds); err_assert_ctrl_reset: reset_control_assert(hdmi->rst_ctrl); -err_disable_ddc_en: - gpiod_set_value(hdmi->ddc_en, 0); +err_disable_regulator: regulator_disable(hdmi->regulator); -err_unref_ddc_en: - if (hdmi->ddc_en) - gpiod_put(hdmi->ddc_en); return ret; } @@ -264,11 +222,7 @@ static void sun8i_dw_hdmi_unbind(struct device *dev, struct device *master, sun8i_hdmi_phy_deinit(hdmi->phy); clk_disable_unprepare(hdmi->clk_tmds); reset_control_assert(hdmi->rst_ctrl); - gpiod_set_value(hdmi->ddc_en, 0); regulator_disable(hdmi->regulator); - - if (hdmi->ddc_en) - gpiod_put(hdmi->ddc_en); } static const struct component_ops sun8i_dw_hdmi_ops = { diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h index bffe1b9cd3dc..9ad09522947a 100644 --- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h +++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h @@ -9,7 +9,6 @@ #include <drm/bridge/dw_hdmi.h> #include <drm/drm_encoder.h> #include <linux/clk.h> -#include <linux/gpio/consumer.h> #include <linux/regmap.h> #include <linux/regulator/consumer.h> #include <linux/reset.h> @@ -193,7 +192,6 @@ struct sun8i_dw_hdmi { struct regulator *regulator; const struct sun8i_dw_hdmi_quirks *quirks; struct reset_control *rst_ctrl; - struct gpio_desc *ddc_en; }; extern struct platform_driver sun8i_hdmi_phy_driver; diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c index 768242a78e2b..5422363690e7 100644 --- a/drivers/gpu/drm/tiny/simpledrm.c +++ b/drivers/gpu/drm/tiny/simpledrm.c @@ -627,7 +627,7 @@ static const struct drm_connector_funcs simpledrm_connector_funcs = { .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; -static int +static enum drm_mode_status simpledrm_simple_display_pipe_mode_valid(struct drm_simple_display_pipe *pipe, const struct drm_display_mode *mode) { diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 75d308ec173d..406e9c324e76 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -109,11 +109,11 @@ void ttm_bo_set_bulk_move(struct ttm_buffer_object *bo, return; spin_lock(&bo->bdev->lru_lock); - if (bo->bulk_move && bo->resource) - ttm_lru_bulk_move_del(bo->bulk_move, bo->resource); + if (bo->resource) + ttm_resource_del_bulk_move(bo->resource, bo); bo->bulk_move = bulk; - if (bo->bulk_move && bo->resource) - ttm_lru_bulk_move_add(bo->bulk_move, bo->resource); + if (bo->resource) + ttm_resource_add_bulk_move(bo->resource, bo); spin_unlock(&bo->bdev->lru_lock); } EXPORT_SYMBOL(ttm_bo_set_bulk_move); @@ -689,8 +689,11 @@ void ttm_bo_pin(struct ttm_buffer_object *bo) { dma_resv_assert_held(bo->base.resv); WARN_ON_ONCE(!kref_read(&bo->kref)); - if (!(bo->pin_count++) && bo->bulk_move && bo->resource) - ttm_lru_bulk_move_del(bo->bulk_move, bo->resource); + spin_lock(&bo->bdev->lru_lock); + if (bo->resource) + ttm_resource_del_bulk_move(bo->resource, bo); + ++bo->pin_count; + spin_unlock(&bo->bdev->lru_lock); } EXPORT_SYMBOL(ttm_bo_pin); @@ -707,8 +710,11 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo) if (WARN_ON_ONCE(!bo->pin_count)) return; - if (!(--bo->pin_count) && bo->bulk_move && bo->resource) - ttm_lru_bulk_move_add(bo->bulk_move, bo->resource); + spin_lock(&bo->bdev->lru_lock); + --bo->pin_count; + if (bo->resource) + ttm_resource_add_bulk_move(bo->resource, bo); + spin_unlock(&bo->bdev->lru_lock); } EXPORT_SYMBOL(ttm_bo_unpin); diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index a0562ab386f5..e7147e304637 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -156,8 +156,12 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, ttm_resource_manager_for_each_res(man, &cursor, res) { struct ttm_buffer_object *bo = res->bo; - uint32_t num_pages = PFN_UP(bo->base.size); + uint32_t num_pages; + if (!bo) + continue; + + num_pages = PFN_UP(bo->base.size); ret = ttm_bo_swapout(bo, ctx, gfp_flags); /* ttm_bo_swapout has dropped the lru_lock */ if (!ret) diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index 65889b3caf50..20f9adcc3235 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -91,8 +91,8 @@ static void ttm_lru_bulk_move_pos_tail(struct ttm_lru_bulk_move_pos *pos, } /* Add the resource to a bulk_move cursor */ -void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk, - struct ttm_resource *res) +static void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk, + struct ttm_resource *res) { struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res); @@ -105,8 +105,8 @@ void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk, } /* Remove the resource from a bulk_move range */ -void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk, - struct ttm_resource *res) +static void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk, + struct ttm_resource *res) { struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res); @@ -122,6 +122,22 @@ void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk, } } +/* Add the resource to a bulk move if the BO is configured for it */ +void ttm_resource_add_bulk_move(struct ttm_resource *res, + struct ttm_buffer_object *bo) +{ + if (bo->bulk_move && !bo->pin_count) + ttm_lru_bulk_move_add(bo->bulk_move, res); +} + +/* Remove the resource from a bulk move if the BO is configured for it */ +void ttm_resource_del_bulk_move(struct ttm_resource *res, + struct ttm_buffer_object *bo) +{ + if (bo->bulk_move && !bo->pin_count) + ttm_lru_bulk_move_del(bo->bulk_move, res); +} + /* Move a resource to the LRU or bulk tail */ void ttm_resource_move_to_lru_tail(struct ttm_resource *res) { @@ -169,15 +185,14 @@ void ttm_resource_init(struct ttm_buffer_object *bo, res->bus.is_iomem = false; res->bus.caching = ttm_cached; res->bo = bo; - INIT_LIST_HEAD(&res->lru); man = ttm_manager_type(bo->bdev, place->mem_type); spin_lock(&bo->bdev->lru_lock); - man->usage += res->num_pages << PAGE_SHIFT; - if (bo->bulk_move) - ttm_lru_bulk_move_add(bo->bulk_move, res); + if (bo->pin_count) + list_add_tail(&res->lru, &bo->bdev->pinned); else - ttm_resource_move_to_lru_tail(res); + list_add_tail(&res->lru, &man->lru[bo->priority]); + man->usage += res->num_pages << PAGE_SHIFT; spin_unlock(&bo->bdev->lru_lock); } EXPORT_SYMBOL(ttm_resource_init); @@ -210,8 +225,16 @@ int ttm_resource_alloc(struct ttm_buffer_object *bo, { struct ttm_resource_manager *man = ttm_manager_type(bo->bdev, place->mem_type); + int ret; + + ret = man->func->alloc(man, bo, place, res_ptr); + if (ret) + return ret; - return man->func->alloc(man, bo, place, res_ptr); + spin_lock(&bo->bdev->lru_lock); + ttm_resource_add_bulk_move(*res_ptr, bo); + spin_unlock(&bo->bdev->lru_lock); + return 0; } void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res) @@ -221,12 +244,9 @@ void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res) if (!*res) return; - if (bo->bulk_move) { - spin_lock(&bo->bdev->lru_lock); - ttm_lru_bulk_move_del(bo->bulk_move, *res); - spin_unlock(&bo->bdev->lru_lock); - } - + spin_lock(&bo->bdev->lru_lock); + ttm_resource_del_bulk_move(*res, bo); + spin_unlock(&bo->bdev->lru_lock); man = ttm_manager_type(bo->bdev, (*res)->mem_type); man->func->free(man, *res); *res = NULL; diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 49c0f2ac868b..b8d856312846 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -248,6 +248,9 @@ void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo) { struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev); + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + mutex_lock(&vc4->purgeable.lock); list_add_tail(&bo->size_head, &vc4->purgeable.list); vc4->purgeable.num++; @@ -259,6 +262,9 @@ static void vc4_bo_remove_from_purgeable_pool_locked(struct vc4_bo *bo) { struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev); + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + /* list_del_init() is used here because the caller might release * the purgeable lock in order to acquire the madv one and update the * madv status. @@ -387,6 +393,9 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size) struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_bo *bo; + if (WARN_ON_ONCE(vc4->is_vc5)) + return ERR_PTR(-ENODEV); + bo = kzalloc(sizeof(*bo), GFP_KERNEL); if (!bo) return ERR_PTR(-ENOMEM); @@ -413,6 +422,9 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size, struct drm_gem_cma_object *cma_obj; struct vc4_bo *bo; + if (WARN_ON_ONCE(vc4->is_vc5)) + return ERR_PTR(-ENODEV); + if (size == 0) return ERR_PTR(-EINVAL); @@ -471,19 +483,20 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size, return bo; } -int vc4_dumb_create(struct drm_file *file_priv, - struct drm_device *dev, - struct drm_mode_create_dumb *args) +int vc4_bo_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args) { - int min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8); + struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_bo *bo = NULL; int ret; - if (args->pitch < min_pitch) - args->pitch = min_pitch; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; - if (args->size < args->pitch * args->height) - args->size = args->pitch * args->height; + ret = vc4_dumb_fixup_args(args); + if (ret) + return ret; bo = vc4_bo_create(dev, args->size, false, VC4_BO_TYPE_DUMB); if (IS_ERR(bo)) @@ -601,8 +614,12 @@ static void vc4_bo_cache_time_work(struct work_struct *work) int vc4_bo_inc_usecnt(struct vc4_bo *bo) { + struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev); int ret; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + /* Fast path: if the BO is already retained by someone, no need to * check the madv status. */ @@ -637,6 +654,11 @@ int vc4_bo_inc_usecnt(struct vc4_bo *bo) void vc4_bo_dec_usecnt(struct vc4_bo *bo) { + struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev); + + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + /* Fast path: if the BO is still retained by someone, no need to test * the madv value. */ @@ -756,6 +778,9 @@ int vc4_create_bo_ioctl(struct drm_device *dev, void *data, struct vc4_bo *bo = NULL; int ret; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + ret = vc4_grab_bin_bo(vc4, vc4file); if (ret) return ret; @@ -779,9 +804,13 @@ int vc4_create_bo_ioctl(struct drm_device *dev, void *data, int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_mmap_bo *args = data; struct drm_gem_object *gem_obj; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + gem_obj = drm_gem_object_lookup(file_priv, args->handle); if (!gem_obj) { DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); @@ -805,6 +834,9 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, struct vc4_bo *bo = NULL; int ret; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (args->size == 0) return -EINVAL; @@ -875,11 +907,15 @@ fail: int vc4_set_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_set_tiling *args = data; struct drm_gem_object *gem_obj; struct vc4_bo *bo; bool t_format; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (args->flags != 0) return -EINVAL; @@ -918,10 +954,14 @@ int vc4_set_tiling_ioctl(struct drm_device *dev, void *data, int vc4_get_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_get_tiling *args = data; struct drm_gem_object *gem_obj; struct vc4_bo *bo; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (args->flags != 0 || args->modifier != 0) return -EINVAL; @@ -948,6 +988,9 @@ int vc4_bo_cache_init(struct drm_device *dev) struct vc4_dev *vc4 = to_vc4_dev(dev); int i; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + /* Create the initial set of BO labels that the kernel will * use. This lets us avoid a bunch of string reallocation in * the kernel's draw and BO allocation paths. @@ -1007,6 +1050,9 @@ int vc4_label_bo_ioctl(struct drm_device *dev, void *data, struct drm_gem_object *gem_obj; int ret = 0, label; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (!args->len) return -EINVAL; diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 59b20c8f132b..9355213dc883 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -256,7 +256,7 @@ static u32 vc4_get_fifo_full_level(struct vc4_crtc *vc4_crtc, u32 format) * Removing 1 from the FIFO full level however * seems to completely remove that issue. */ - if (!vc4->hvs->hvs5) + if (!vc4->is_vc5) return fifo_len_bytes - 3 * HVS_FIFO_LATENCY_PIX - 1; return fifo_len_bytes - 3 * HVS_FIFO_LATENCY_PIX; @@ -389,7 +389,7 @@ static void vc4_crtc_config_pv(struct drm_crtc *crtc, struct drm_encoder *encode if (is_dsi) CRTC_WRITE(PV_HACT_ACT, mode->hdisplay * pixel_rep); - if (vc4->hvs->hvs5) + if (vc4->is_vc5) CRTC_WRITE(PV_MUX_CFG, VC4_SET_FIELD(PV_MUX_CFG_RGB_PIXEL_MUX_MODE_NO_SWAP, PV_MUX_CFG_RGB_PIXEL_MUX_MODE)); @@ -775,17 +775,18 @@ struct vc4_async_flip_state { struct drm_framebuffer *old_fb; struct drm_pending_vblank_event *event; - struct vc4_seqno_cb cb; + union { + struct dma_fence_cb fence; + struct vc4_seqno_cb seqno; + } cb; }; /* Called when the V3D execution for the BO being flipped to is done, so that * we can actually update the plane's address to point to it. */ static void -vc4_async_page_flip_complete(struct vc4_seqno_cb *cb) +vc4_async_page_flip_complete(struct vc4_async_flip_state *flip_state) { - struct vc4_async_flip_state *flip_state = - container_of(cb, struct vc4_async_flip_state, cb); struct drm_crtc *crtc = flip_state->crtc; struct drm_device *dev = crtc->dev; struct drm_plane *plane = crtc->primary; @@ -802,59 +803,96 @@ vc4_async_page_flip_complete(struct vc4_seqno_cb *cb) drm_crtc_vblank_put(crtc); drm_framebuffer_put(flip_state->fb); - /* Decrement the BO usecnt in order to keep the inc/dec calls balanced - * when the planes are updated through the async update path. - * FIXME: we should move to generic async-page-flip when it's - * available, so that we can get rid of this hand-made cleanup_fb() - * logic. - */ - if (flip_state->old_fb) { - struct drm_gem_cma_object *cma_bo; - struct vc4_bo *bo; + if (flip_state->old_fb) + drm_framebuffer_put(flip_state->old_fb); + + kfree(flip_state); +} + +static void vc4_async_page_flip_seqno_complete(struct vc4_seqno_cb *cb) +{ + struct vc4_async_flip_state *flip_state = + container_of(cb, struct vc4_async_flip_state, cb.seqno); + struct vc4_bo *bo = NULL; - cma_bo = drm_fb_cma_get_gem_obj(flip_state->old_fb, 0); + if (flip_state->old_fb) { + struct drm_gem_cma_object *cma_bo = + drm_fb_cma_get_gem_obj(flip_state->old_fb, 0); bo = to_vc4_bo(&cma_bo->base); - vc4_bo_dec_usecnt(bo); - drm_framebuffer_put(flip_state->old_fb); } - kfree(flip_state); + vc4_async_page_flip_complete(flip_state); + + /* + * Decrement the BO usecnt in order to keep the inc/dec + * calls balanced when the planes are updated through + * the async update path. + * + * FIXME: we should move to generic async-page-flip when + * it's available, so that we can get rid of this + * hand-made cleanup_fb() logic. + */ + if (bo) + vc4_bo_dec_usecnt(bo); } -/* Implements async (non-vblank-synced) page flips. - * - * The page flip ioctl needs to return immediately, so we grab the - * modeset semaphore on the pipe, and queue the address update for - * when V3D is done with the BO being flipped to. - */ -static int vc4_async_page_flip(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_pending_vblank_event *event, - uint32_t flags) +static void vc4_async_page_flip_fence_complete(struct dma_fence *fence, + struct dma_fence_cb *cb) { - struct drm_device *dev = crtc->dev; - struct drm_plane *plane = crtc->primary; - int ret = 0; - struct vc4_async_flip_state *flip_state; + struct vc4_async_flip_state *flip_state = + container_of(cb, struct vc4_async_flip_state, cb.fence); + + vc4_async_page_flip_complete(flip_state); + dma_fence_put(fence); +} + +static int vc4_async_set_fence_cb(struct drm_device *dev, + struct vc4_async_flip_state *flip_state) +{ + struct drm_framebuffer *fb = flip_state->fb; struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0); - struct vc4_bo *bo = to_vc4_bo(&cma_bo->base); + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct dma_fence *fence; + int ret; - /* Increment the BO usecnt here, so that we never end up with an - * unbalanced number of vc4_bo_{dec,inc}_usecnt() calls when the - * plane is later updated through the non-async path. - * FIXME: we should move to generic async-page-flip when it's - * available, so that we can get rid of this hand-made prepare_fb() - * logic. - */ - ret = vc4_bo_inc_usecnt(bo); + if (!vc4->is_vc5) { + struct vc4_bo *bo = to_vc4_bo(&cma_bo->base); + + return vc4_queue_seqno_cb(dev, &flip_state->cb.seqno, bo->seqno, + vc4_async_page_flip_seqno_complete); + } + + ret = dma_resv_get_singleton(cma_bo->base.resv, DMA_RESV_USAGE_READ, &fence); if (ret) return ret; + /* If there's no fence, complete the page flip immediately */ + if (!fence) { + vc4_async_page_flip_fence_complete(fence, &flip_state->cb.fence); + return 0; + } + + /* If the fence has already been completed, complete the page flip */ + if (dma_fence_add_callback(fence, &flip_state->cb.fence, + vc4_async_page_flip_fence_complete)) + vc4_async_page_flip_fence_complete(fence, &flip_state->cb.fence); + + return 0; +} + +static int +vc4_async_page_flip_common(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, + uint32_t flags) +{ + struct drm_device *dev = crtc->dev; + struct drm_plane *plane = crtc->primary; + struct vc4_async_flip_state *flip_state; + flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL); - if (!flip_state) { - vc4_bo_dec_usecnt(bo); + if (!flip_state) return -ENOMEM; - } drm_framebuffer_get(fb); flip_state->fb = fb; @@ -881,23 +919,79 @@ static int vc4_async_page_flip(struct drm_crtc *crtc, */ drm_atomic_set_fb_for_plane(plane->state, fb); - vc4_queue_seqno_cb(dev, &flip_state->cb, bo->seqno, - vc4_async_page_flip_complete); + vc4_async_set_fence_cb(dev, flip_state); /* Driver takes ownership of state on successful async commit. */ return 0; } +/* Implements async (non-vblank-synced) page flips. + * + * The page flip ioctl needs to return immediately, so we grab the + * modeset semaphore on the pipe, and queue the address update for + * when V3D is done with the BO being flipped to. + */ +static int vc4_async_page_flip(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, + uint32_t flags) +{ + struct drm_device *dev = crtc->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0); + struct vc4_bo *bo = to_vc4_bo(&cma_bo->base); + int ret; + + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + + /* + * Increment the BO usecnt here, so that we never end up with an + * unbalanced number of vc4_bo_{dec,inc}_usecnt() calls when the + * plane is later updated through the non-async path. + * + * FIXME: we should move to generic async-page-flip when + * it's available, so that we can get rid of this + * hand-made prepare_fb() logic. + */ + ret = vc4_bo_inc_usecnt(bo); + if (ret) + return ret; + + ret = vc4_async_page_flip_common(crtc, fb, event, flags); + if (ret) { + vc4_bo_dec_usecnt(bo); + return ret; + } + + return 0; +} + +static int vc5_async_page_flip(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, + uint32_t flags) +{ + return vc4_async_page_flip_common(crtc, fb, event, flags); +} + int vc4_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_pending_vblank_event *event, uint32_t flags, struct drm_modeset_acquire_ctx *ctx) { - if (flags & DRM_MODE_PAGE_FLIP_ASYNC) - return vc4_async_page_flip(crtc, fb, event, flags); - else + if (flags & DRM_MODE_PAGE_FLIP_ASYNC) { + struct drm_device *dev = crtc->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + + if (vc4->is_vc5) + return vc5_async_page_flip(crtc, fb, event, flags); + else + return vc4_async_page_flip(crtc, fb, event, flags); + } else { return drm_atomic_helper_page_flip(crtc, fb, event, flags, ctx); + } } struct drm_crtc_state *vc4_crtc_duplicate_state(struct drm_crtc *crtc) @@ -1149,7 +1243,7 @@ int vc4_crtc_init(struct drm_device *drm, struct vc4_crtc *vc4_crtc, crtc_funcs, NULL); drm_crtc_helper_add(crtc, crtc_helper_funcs); - if (!vc4->hvs->hvs5) { + if (!vc4->is_vc5) { drm_mode_crtc_set_gamma_size(crtc, ARRAY_SIZE(vc4_crtc->lut_r)); drm_crtc_enable_color_mgmt(crtc, 0, false, crtc->gamma_size); diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 162bc18e7497..0f0f0263e744 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -63,6 +63,32 @@ void __iomem *vc4_ioremap_regs(struct platform_device *pdev, int index) return map; } +int vc4_dumb_fixup_args(struct drm_mode_create_dumb *args) +{ + int min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8); + + if (args->pitch < min_pitch) + args->pitch = min_pitch; + + if (args->size < args->pitch * args->height) + args->size = args->pitch * args->height; + + return 0; +} + +static int vc5_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args) +{ + int ret; + + ret = vc4_dumb_fixup_args(args); + if (ret) + return ret; + + return drm_gem_cma_dumb_create_internal(file_priv, dev, args); +} + static int vc4_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { @@ -73,6 +99,9 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data, if (args->pad != 0) return -EINVAL; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (!vc4->v3d) return -ENODEV; @@ -116,11 +145,16 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data, static int vc4_open(struct drm_device *dev, struct drm_file *file) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_file *vc4file; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + vc4file = kzalloc(sizeof(*vc4file), GFP_KERNEL); if (!vc4file) return -ENOMEM; + vc4file->dev = vc4; vc4_perfmon_open_file(vc4file); file->driver_priv = vc4file; @@ -132,6 +166,9 @@ static void vc4_close(struct drm_device *dev, struct drm_file *file) struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_file *vc4file = file->driver_priv; + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + if (vc4file->bin_bo_used) vc4_v3d_bin_bo_put(vc4); @@ -160,7 +197,7 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(VC4_PERFMON_GET_VALUES, vc4_perfmon_get_values_ioctl, DRM_RENDER_ALLOW), }; -static struct drm_driver vc4_drm_driver = { +static const struct drm_driver vc4_drm_driver = { .driver_features = (DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_GEM | @@ -175,7 +212,7 @@ static struct drm_driver vc4_drm_driver = { .gem_create_object = vc4_create_object, - DRM_GEM_CMA_DRIVER_OPS_WITH_DUMB_CREATE(vc4_dumb_create), + DRM_GEM_CMA_DRIVER_OPS_WITH_DUMB_CREATE(vc4_bo_dumb_create), .ioctls = vc4_drm_ioctls, .num_ioctls = ARRAY_SIZE(vc4_drm_ioctls), @@ -189,6 +226,27 @@ static struct drm_driver vc4_drm_driver = { .patchlevel = DRIVER_PATCHLEVEL, }; +static const struct drm_driver vc5_drm_driver = { + .driver_features = (DRIVER_MODESET | + DRIVER_ATOMIC | + DRIVER_GEM), + +#if defined(CONFIG_DEBUG_FS) + .debugfs_init = vc4_debugfs_init, +#endif + + DRM_GEM_CMA_DRIVER_OPS_WITH_DUMB_CREATE(vc5_dumb_create), + + .fops = &vc4_drm_fops, + + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, + .patchlevel = DRIVER_PATCHLEVEL, +}; + static void vc4_match_add_drivers(struct device *dev, struct component_match **match, struct platform_driver *const *drivers, @@ -212,42 +270,49 @@ static void vc4_match_add_drivers(struct device *dev, static int vc4_drm_bind(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); + const struct drm_driver *driver; struct rpi_firmware *firmware = NULL; struct drm_device *drm; struct vc4_dev *vc4; struct device_node *node; struct drm_crtc *crtc; + bool is_vc5; int ret = 0; dev->coherent_dma_mask = DMA_BIT_MASK(32); - /* If VC4 V3D is missing, don't advertise render nodes. */ - node = of_find_matching_node_and_match(NULL, vc4_v3d_dt_match, NULL); - if (!node || !of_device_is_available(node)) - vc4_drm_driver.driver_features &= ~DRIVER_RENDER; - of_node_put(node); + is_vc5 = of_device_is_compatible(dev->of_node, "brcm,bcm2711-vc5"); + if (is_vc5) + driver = &vc5_drm_driver; + else + driver = &vc4_drm_driver; - vc4 = devm_drm_dev_alloc(dev, &vc4_drm_driver, struct vc4_dev, base); + vc4 = devm_drm_dev_alloc(dev, driver, struct vc4_dev, base); if (IS_ERR(vc4)) return PTR_ERR(vc4); + vc4->is_vc5 = is_vc5; drm = &vc4->base; platform_set_drvdata(pdev, drm); INIT_LIST_HEAD(&vc4->debugfs_list); - mutex_init(&vc4->bin_bo_lock); + if (!is_vc5) { + mutex_init(&vc4->bin_bo_lock); - ret = vc4_bo_cache_init(drm); - if (ret) - return ret; + ret = vc4_bo_cache_init(drm); + if (ret) + return ret; + } ret = drmm_mode_config_init(drm); if (ret) return ret; - ret = vc4_gem_init(drm); - if (ret) - return ret; + if (!is_vc5) { + ret = vc4_gem_init(drm); + if (ret) + return ret; + } node = of_find_compatible_node(NULL, NULL, "raspberrypi,bcm2835-firmware"); if (node) { @@ -258,7 +323,7 @@ static int vc4_drm_bind(struct device *dev) return -EPROBE_DEFER; } - ret = drm_aperture_remove_framebuffers(false, &vc4_drm_driver); + ret = drm_aperture_remove_framebuffers(false, driver); if (ret) return ret; diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 15e0c2ac3940..93fd55b9e99e 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -48,6 +48,8 @@ enum vc4_kernel_bo_type { * done. This way, only events related to a specific job will be counted. */ struct vc4_perfmon { + struct vc4_dev *dev; + /* Tracks the number of users of the perfmon, when this counter reaches * zero the perfmon is destroyed. */ @@ -74,6 +76,8 @@ struct vc4_perfmon { struct vc4_dev { struct drm_device base; + bool is_vc5; + unsigned int irq; struct vc4_hvs *hvs; @@ -316,6 +320,7 @@ struct vc4_v3d { }; struct vc4_hvs { + struct vc4_dev *vc4; struct platform_device *pdev; void __iomem *regs; u32 __iomem *dlist; @@ -333,9 +338,6 @@ struct vc4_hvs { struct drm_mm_node mitchell_netravali_filter; struct debugfs_regset32 regset; - - /* HVS version 5 flag, therefore requires updated dlist structures */ - bool hvs5; }; struct vc4_plane { @@ -580,6 +582,8 @@ to_vc4_crtc_state(struct drm_crtc_state *crtc_state) #define VC4_REG32(reg) { .name = #reg, .offset = reg } struct vc4_exec_info { + struct vc4_dev *dev; + /* Sequence number for this bin/render job. */ uint64_t seqno; @@ -701,6 +705,8 @@ struct vc4_exec_info { * released when the DRM file is closed should be placed here. */ struct vc4_file { + struct vc4_dev *dev; + struct { struct idr idr; struct mutex lock; @@ -814,9 +820,9 @@ struct vc4_validated_shader_info { struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size); struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size, bool from_cache, enum vc4_kernel_bo_type type); -int vc4_dumb_create(struct drm_file *file_priv, - struct drm_device *dev, - struct drm_mode_create_dumb *args); +int vc4_bo_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args); int vc4_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, @@ -885,6 +891,7 @@ static inline void vc4_debugfs_add_regset32(struct drm_device *drm, /* vc4_drv.c */ void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index); +int vc4_dumb_fixup_args(struct drm_mode_create_dumb *args); /* vc4_dpi.c */ extern struct platform_driver vc4_dpi_driver; diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 9eaf304fc20d..fe10d9c3fff8 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -76,6 +76,9 @@ vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, u32 i; int ret = 0; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (!vc4->v3d) { DRM_DEBUG("VC4_GET_HANG_STATE with no VC4 V3D probed\n"); return -ENODEV; @@ -386,6 +389,9 @@ vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns, unsigned long timeout_expire; DEFINE_WAIT(wait); + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (vc4->finished_seqno >= seqno) return 0; @@ -468,6 +474,9 @@ vc4_submit_next_bin_job(struct drm_device *dev) struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_exec_info *exec; + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + again: exec = vc4_first_bin_job(vc4); if (!exec) @@ -513,6 +522,9 @@ vc4_submit_next_render_job(struct drm_device *dev) if (!exec) return; + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + /* A previous RCL may have written to one of our textures, and * our full cache flush at bin time may have occurred before * that RCL completed. Flush the texture cache now, but not @@ -531,6 +543,9 @@ vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec) struct vc4_dev *vc4 = to_vc4_dev(dev); bool was_empty = list_empty(&vc4->render_job_list); + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + list_move_tail(&exec->head, &vc4->render_job_list); if (was_empty) vc4_submit_next_render_job(dev); @@ -997,6 +1012,9 @@ vc4_job_handle_completed(struct vc4_dev *vc4) unsigned long irqflags; struct vc4_seqno_cb *cb, *cb_temp; + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + spin_lock_irqsave(&vc4->job_lock, irqflags); while (!list_empty(&vc4->job_done_list)) { struct vc4_exec_info *exec = @@ -1033,6 +1051,9 @@ int vc4_queue_seqno_cb(struct drm_device *dev, struct vc4_dev *vc4 = to_vc4_dev(dev); unsigned long irqflags; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + cb->func = func; INIT_WORK(&cb->work, vc4_seqno_cb_work); @@ -1083,8 +1104,12 @@ int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_wait_seqno *args = data; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno, &args->timeout_ns); } @@ -1093,11 +1118,15 @@ int vc4_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct vc4_dev *vc4 = to_vc4_dev(dev); int ret; struct drm_vc4_wait_bo *args = data; struct drm_gem_object *gem_obj; struct vc4_bo *bo; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (args->pad != 0) return -EINVAL; @@ -1144,6 +1173,9 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, args->shader_rec_size, args->bo_handle_count); + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (!vc4->v3d) { DRM_DEBUG("VC4_SUBMIT_CL with no VC4 V3D probed\n"); return -ENODEV; @@ -1167,6 +1199,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, DRM_ERROR("malloc failure on exec struct\n"); return -ENOMEM; } + exec->dev = vc4; ret = vc4_v3d_pm_get(vc4); if (ret) { @@ -1276,6 +1309,9 @@ int vc4_gem_init(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + vc4->dma_fence_context = dma_fence_context_alloc(1); INIT_LIST_HEAD(&vc4->bin_job_list); @@ -1321,11 +1357,15 @@ static void vc4_gem_destroy(struct drm_device *dev, void *unused) int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_gem_madvise *args = data; struct drm_gem_object *gem_obj; struct vc4_bo *bo; int ret; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + switch (args->madv) { case VC4_MADV_DONTNEED: case VC4_MADV_WILLNEED: diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 823d812f4982..ce9d16666d91 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -1481,7 +1481,7 @@ vc4_hdmi_encoder_compute_mode_clock(const struct drm_display_mode *mode, unsigned int bpc, enum vc4_hdmi_output_format fmt) { - unsigned long long clock = mode->clock * 1000; + unsigned long long clock = mode->clock * 1000ULL; if (mode->flags & DRM_MODE_FLAG_DBLCLK) clock = clock * 2; diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c index 2a58fc421cf6..ba2c8e5a9b64 100644 --- a/drivers/gpu/drm/vc4/vc4_hvs.c +++ b/drivers/gpu/drm/vc4/vc4_hvs.c @@ -220,10 +220,11 @@ u8 vc4_hvs_get_fifo_frame_count(struct vc4_hvs *hvs, unsigned int fifo) int vc4_hvs_get_fifo_from_output(struct vc4_hvs *hvs, unsigned int output) { + struct vc4_dev *vc4 = hvs->vc4; u32 reg; int ret; - if (!hvs->hvs5) + if (!vc4->is_vc5) return output; switch (output) { @@ -273,6 +274,7 @@ int vc4_hvs_get_fifo_from_output(struct vc4_hvs *hvs, unsigned int output) static int vc4_hvs_init_channel(struct vc4_hvs *hvs, struct drm_crtc *crtc, struct drm_display_mode *mode, bool oneshot) { + struct vc4_dev *vc4 = hvs->vc4; struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); struct vc4_crtc_state *vc4_crtc_state = to_vc4_crtc_state(crtc->state); unsigned int chan = vc4_crtc_state->assigned_channel; @@ -291,7 +293,7 @@ static int vc4_hvs_init_channel(struct vc4_hvs *hvs, struct drm_crtc *crtc, */ dispctrl = SCALER_DISPCTRLX_ENABLE; - if (!hvs->hvs5) + if (!vc4->is_vc5) dispctrl |= VC4_SET_FIELD(mode->hdisplay, SCALER_DISPCTRLX_WIDTH) | VC4_SET_FIELD(mode->vdisplay, @@ -312,7 +314,7 @@ static int vc4_hvs_init_channel(struct vc4_hvs *hvs, struct drm_crtc *crtc, HVS_WRITE(SCALER_DISPBKGNDX(chan), dispbkgndx | SCALER_DISPBKGND_AUTOHS | - ((!hvs->hvs5) ? SCALER_DISPBKGND_GAMMA : 0) | + ((!vc4->is_vc5) ? SCALER_DISPBKGND_GAMMA : 0) | (interlace ? SCALER_DISPBKGND_INTERLACE : 0)); /* Reload the LUT, since the SRAMs would have been disabled if @@ -617,11 +619,9 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) if (!hvs) return -ENOMEM; + hvs->vc4 = vc4; hvs->pdev = pdev; - if (of_device_is_compatible(pdev->dev.of_node, "brcm,bcm2711-hvs")) - hvs->hvs5 = true; - hvs->regs = vc4_ioremap_regs(pdev, 0); if (IS_ERR(hvs->regs)) return PTR_ERR(hvs->regs); @@ -630,7 +630,7 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) hvs->regset.regs = hvs_regs; hvs->regset.nregs = ARRAY_SIZE(hvs_regs); - if (hvs->hvs5) { + if (vc4->is_vc5) { hvs->core_clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(hvs->core_clk)) { dev_err(&pdev->dev, "Couldn't get core clock\n"); @@ -644,7 +644,7 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) } } - if (!hvs->hvs5) + if (!vc4->is_vc5) hvs->dlist = hvs->regs + SCALER_DLIST_START; else hvs->dlist = hvs->regs + SCALER5_DLIST_START; @@ -665,7 +665,7 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) * between planes when they don't overlap on the screen, but * for now we just allocate globally. */ - if (!hvs->hvs5) + if (!vc4->is_vc5) /* 48k words of 2x12-bit pixels */ drm_mm_init(&hvs->lbm_mm, 0, 48 * 1024); else diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index 4342fb43e8c1..2eacfb6773d2 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -265,6 +265,9 @@ vc4_irq_enable(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + if (!vc4->v3d) return; @@ -279,6 +282,9 @@ vc4_irq_disable(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + if (!vc4->v3d) return; @@ -296,8 +302,12 @@ vc4_irq_disable(struct drm_device *dev) int vc4_irq_install(struct drm_device *dev, int irq) { + struct vc4_dev *vc4 = to_vc4_dev(dev); int ret; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (irq == IRQ_NOTCONNECTED) return -ENOTCONN; @@ -316,6 +326,9 @@ void vc4_irq_uninstall(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + vc4_irq_disable(dev); free_irq(vc4->irq, dev); } @@ -326,6 +339,9 @@ void vc4_irq_reset(struct drm_device *dev) struct vc4_dev *vc4 = to_vc4_dev(dev); unsigned long irqflags; + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + /* Acknowledge any stale IRQs. */ V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS); diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index c169bd72e53b..893d831b24aa 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -393,7 +393,7 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state) old_hvs_state->fifo_state[channel].pending_commit = NULL; } - if (vc4->hvs->hvs5) { + if (vc4->is_vc5) { unsigned long state_rate = max(old_hvs_state->core_clock_rate, new_hvs_state->core_clock_rate); unsigned long core_rate = max_t(unsigned long, @@ -412,7 +412,7 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state) vc4_ctm_commit(vc4, state); - if (vc4->hvs->hvs5) + if (vc4->is_vc5) vc5_hvs_pv_muxing_commit(vc4, state); else vc4_hvs_pv_muxing_commit(vc4, state); @@ -430,7 +430,7 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state) drm_atomic_helper_cleanup_planes(dev, state); - if (vc4->hvs->hvs5) { + if (vc4->is_vc5) { drm_dbg(dev, "Running the core clock at %lu Hz\n", new_hvs_state->core_clock_rate); @@ -479,8 +479,12 @@ static struct drm_framebuffer *vc4_fb_create(struct drm_device *dev, struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_mode_fb_cmd2 mode_cmd_local; + if (WARN_ON_ONCE(vc4->is_vc5)) + return ERR_PTR(-ENODEV); + /* If the user didn't specify a modifier, use the * vc4_set_tiling_ioctl() state for the BO. */ @@ -997,11 +1001,15 @@ static const struct drm_mode_config_funcs vc4_mode_funcs = { .fb_create = vc4_fb_create, }; +static const struct drm_mode_config_funcs vc5_mode_funcs = { + .atomic_check = vc4_atomic_check, + .atomic_commit = drm_atomic_helper_commit, + .fb_create = drm_gem_fb_create, +}; + int vc4_kms_load(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); - bool is_vc5 = of_device_is_compatible(dev->dev->of_node, - "brcm,bcm2711-vc5"); int ret; /* @@ -1009,7 +1017,7 @@ int vc4_kms_load(struct drm_device *dev) * the BCM2711, but the load tracker computations are used for * the core clock rate calculation. */ - if (!is_vc5) { + if (!vc4->is_vc5) { /* Start with the load tracker enabled. Can be * disabled through the debugfs load_tracker file. */ @@ -1025,7 +1033,7 @@ int vc4_kms_load(struct drm_device *dev) return ret; } - if (is_vc5) { + if (vc4->is_vc5) { dev->mode_config.max_width = 7680; dev->mode_config.max_height = 7680; } else { @@ -1033,7 +1041,7 @@ int vc4_kms_load(struct drm_device *dev) dev->mode_config.max_height = 2048; } - dev->mode_config.funcs = &vc4_mode_funcs; + dev->mode_config.funcs = vc4->is_vc5 ? &vc5_mode_funcs : &vc4_mode_funcs; dev->mode_config.helper_private = &vc4_mode_config_helpers; dev->mode_config.preferred_depth = 24; dev->mode_config.async_page_flip = true; diff --git a/drivers/gpu/drm/vc4/vc4_perfmon.c b/drivers/gpu/drm/vc4/vc4_perfmon.c index 18abc06335c1..79a74184d732 100644 --- a/drivers/gpu/drm/vc4/vc4_perfmon.c +++ b/drivers/gpu/drm/vc4/vc4_perfmon.c @@ -17,13 +17,30 @@ void vc4_perfmon_get(struct vc4_perfmon *perfmon) { - if (perfmon) - refcount_inc(&perfmon->refcnt); + struct vc4_dev *vc4; + + if (!perfmon) + return; + + vc4 = perfmon->dev; + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + + refcount_inc(&perfmon->refcnt); } void vc4_perfmon_put(struct vc4_perfmon *perfmon) { - if (perfmon && refcount_dec_and_test(&perfmon->refcnt)) + struct vc4_dev *vc4; + + if (!perfmon) + return; + + vc4 = perfmon->dev; + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + + if (refcount_dec_and_test(&perfmon->refcnt)) kfree(perfmon); } @@ -32,6 +49,9 @@ void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon) unsigned int i; u32 mask; + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + if (WARN_ON_ONCE(!perfmon || vc4->active_perfmon)) return; @@ -49,6 +69,9 @@ void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon, { unsigned int i; + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + if (WARN_ON_ONCE(!vc4->active_perfmon || perfmon != vc4->active_perfmon)) return; @@ -64,8 +87,12 @@ void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon, struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id) { + struct vc4_dev *vc4 = vc4file->dev; struct vc4_perfmon *perfmon; + if (WARN_ON_ONCE(vc4->is_vc5)) + return NULL; + mutex_lock(&vc4file->perfmon.lock); perfmon = idr_find(&vc4file->perfmon.idr, id); vc4_perfmon_get(perfmon); @@ -76,8 +103,14 @@ struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id) void vc4_perfmon_open_file(struct vc4_file *vc4file) { + struct vc4_dev *vc4 = vc4file->dev; + + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + mutex_init(&vc4file->perfmon.lock); idr_init_base(&vc4file->perfmon.idr, VC4_PERFMONID_MIN); + vc4file->dev = vc4; } static int vc4_perfmon_idr_del(int id, void *elem, void *data) @@ -91,6 +124,11 @@ static int vc4_perfmon_idr_del(int id, void *elem, void *data) void vc4_perfmon_close_file(struct vc4_file *vc4file) { + struct vc4_dev *vc4 = vc4file->dev; + + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + mutex_lock(&vc4file->perfmon.lock); idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, NULL); idr_destroy(&vc4file->perfmon.idr); @@ -107,6 +145,9 @@ int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data, unsigned int i; int ret; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (!vc4->v3d) { DRM_DEBUG("Creating perfmon no VC4 V3D probed\n"); return -ENODEV; @@ -127,6 +168,7 @@ int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data, GFP_KERNEL); if (!perfmon) return -ENOMEM; + perfmon->dev = vc4; for (i = 0; i < req->ncounters; i++) perfmon->events[i] = req->events[i]; @@ -157,6 +199,9 @@ int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data, struct drm_vc4_perfmon_destroy *req = data; struct vc4_perfmon *perfmon; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (!vc4->v3d) { DRM_DEBUG("Destroying perfmon no VC4 V3D probed\n"); return -ENODEV; @@ -182,6 +227,9 @@ int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data, struct vc4_perfmon *perfmon; int ret; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (!vc4->v3d) { DRM_DEBUG("Getting perfmon no VC4 V3D probed\n"); return -ENODEV; diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index b3438f4a81ce..1e866dc00ac3 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -489,10 +489,10 @@ static u32 vc4_lbm_size(struct drm_plane_state *state) } /* Align it to 64 or 128 (hvs5) bytes */ - lbm = roundup(lbm, vc4->hvs->hvs5 ? 128 : 64); + lbm = roundup(lbm, vc4->is_vc5 ? 128 : 64); /* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */ - lbm /= vc4->hvs->hvs5 ? 4 : 2; + lbm /= vc4->is_vc5 ? 4 : 2; return lbm; } @@ -608,7 +608,7 @@ static int vc4_plane_allocate_lbm(struct drm_plane_state *state) ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm, &vc4_state->lbm, lbm_size, - vc4->hvs->hvs5 ? 64 : 32, + vc4->is_vc5 ? 64 : 32, 0, 0); spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); @@ -917,7 +917,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && fb->format->has_alpha; - if (!vc4->hvs->hvs5) { + if (!vc4->is_vc5) { /* Control word */ vc4_dlist_write(vc4_state, SCALER_CTL0_VALID | @@ -1321,6 +1321,10 @@ static int vc4_plane_atomic_async_check(struct drm_plane *plane, old_vc4_state = to_vc4_plane_state(plane->state); new_vc4_state = to_vc4_plane_state(new_plane_state); + + if (!new_vc4_state->hw_dlist) + return -EINVAL; + if (old_vc4_state->dlist_count != new_vc4_state->dlist_count || old_vc4_state->pos0_offset != new_vc4_state->pos0_offset || old_vc4_state->pos2_offset != new_vc4_state->pos2_offset || @@ -1385,6 +1389,13 @@ static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { .atomic_async_update = vc4_plane_atomic_async_update, }; +static const struct drm_plane_helper_funcs vc5_plane_helper_funcs = { + .atomic_check = vc4_plane_atomic_check, + .atomic_update = vc4_plane_atomic_update, + .atomic_async_check = vc4_plane_atomic_async_check, + .atomic_async_update = vc4_plane_atomic_async_update, +}; + static bool vc4_format_mod_supported(struct drm_plane *plane, uint32_t format, uint64_t modifier) @@ -1453,14 +1464,13 @@ static const struct drm_plane_funcs vc4_plane_funcs = { struct drm_plane *vc4_plane_init(struct drm_device *dev, enum drm_plane_type type) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_plane *plane = NULL; struct vc4_plane *vc4_plane; u32 formats[ARRAY_SIZE(hvs_formats)]; int num_formats = 0; int ret = 0; unsigned i; - bool hvs5 = of_device_is_compatible(dev->dev->of_node, - "brcm,bcm2711-vc5"); static const uint64_t modifiers[] = { DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, DRM_FORMAT_MOD_BROADCOM_SAND128, @@ -1476,7 +1486,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, return ERR_PTR(-ENOMEM); for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { - if (!hvs_formats[i].hvs5_only || hvs5) { + if (!hvs_formats[i].hvs5_only || vc4->is_vc5) { formats[num_formats] = hvs_formats[i].drm; num_formats++; } @@ -1490,7 +1500,10 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, if (ret) return ERR_PTR(ret); - drm_plane_helper_add(plane, &vc4_plane_helper_funcs); + if (vc4->is_vc5) + drm_plane_helper_add(plane, &vc5_plane_helper_funcs); + else + drm_plane_helper_add(plane, &vc4_plane_helper_funcs); drm_plane_create_alpha_property(plane); drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c index 3c918eeaf56e..f6b7dc3df08c 100644 --- a/drivers/gpu/drm/vc4/vc4_render_cl.c +++ b/drivers/gpu/drm/vc4/vc4_render_cl.c @@ -593,11 +593,15 @@ vc4_rcl_render_config_surface_setup(struct vc4_exec_info *exec, int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_rcl_setup setup = {0}; struct drm_vc4_submit_cl *args = exec->args; bool has_bin = args->bin_cl_size != 0; int ret; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (args->min_x_tile > args->max_x_tile || args->min_y_tile > args->max_y_tile) { DRM_DEBUG("Bad render tile set (%d,%d)-(%d,%d)\n", diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 7bb3067f8425..cc714dcfe1f2 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -127,6 +127,9 @@ static int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused) int vc4_v3d_pm_get(struct vc4_dev *vc4) { + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + mutex_lock(&vc4->power_lock); if (vc4->power_refcount++ == 0) { int ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); @@ -145,6 +148,9 @@ vc4_v3d_pm_get(struct vc4_dev *vc4) void vc4_v3d_pm_put(struct vc4_dev *vc4) { + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + mutex_lock(&vc4->power_lock); if (--vc4->power_refcount == 0) { pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); @@ -172,6 +178,9 @@ int vc4_v3d_get_bin_slot(struct vc4_dev *vc4) uint64_t seqno = 0; struct vc4_exec_info *exec; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + try_again: spin_lock_irqsave(&vc4->job_lock, irqflags); slot = ffs(~vc4->bin_alloc_used); @@ -316,6 +325,9 @@ int vc4_v3d_bin_bo_get(struct vc4_dev *vc4, bool *used) { int ret = 0; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + mutex_lock(&vc4->bin_bo_lock); if (used && *used) @@ -348,6 +360,9 @@ static void bin_bo_release(struct kref *ref) void vc4_v3d_bin_bo_put(struct vc4_dev *vc4) { + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + mutex_lock(&vc4->bin_bo_lock); kref_put(&vc4->bin_bo_kref, bin_bo_release); mutex_unlock(&vc4->bin_bo_lock); diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c index eec76af49f04..2feba55bcef7 100644 --- a/drivers/gpu/drm/vc4/vc4_validate.c +++ b/drivers/gpu/drm/vc4/vc4_validate.c @@ -105,9 +105,13 @@ size_is_lt(uint32_t width, uint32_t height, int cpp) struct drm_gem_cma_object * vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex) { + struct vc4_dev *vc4 = exec->dev; struct drm_gem_cma_object *obj; struct vc4_bo *bo; + if (WARN_ON_ONCE(vc4->is_vc5)) + return NULL; + if (hindex >= exec->bo_count) { DRM_DEBUG("BO index %d greater than BO count %d\n", hindex, exec->bo_count); @@ -160,10 +164,14 @@ vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo, uint32_t offset, uint8_t tiling_format, uint32_t width, uint32_t height, uint8_t cpp) { + struct vc4_dev *vc4 = exec->dev; uint32_t aligned_width, aligned_height, stride, size; uint32_t utile_w = utile_width(cpp); uint32_t utile_h = utile_height(cpp); + if (WARN_ON_ONCE(vc4->is_vc5)) + return false; + /* The shaded vertex format stores signed 12.4 fixed point * (-2048,2047) offsets from the viewport center, so we should * never have a render target larger than 4096. The texture @@ -482,10 +490,14 @@ vc4_validate_bin_cl(struct drm_device *dev, void *unvalidated, struct vc4_exec_info *exec) { + struct vc4_dev *vc4 = to_vc4_dev(dev); uint32_t len = exec->args->bin_cl_size; uint32_t dst_offset = 0; uint32_t src_offset = 0; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + while (src_offset < len) { void *dst_pkt = validated + dst_offset; void *src_pkt = unvalidated + src_offset; @@ -926,9 +938,13 @@ int vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec) { + struct vc4_dev *vc4 = to_vc4_dev(dev); uint32_t i; int ret = 0; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + for (i = 0; i < exec->shader_state_count; i++) { ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]); if (ret) diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c index 7cf82b071de2..e315aeb5fef5 100644 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c @@ -778,6 +778,7 @@ vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state) struct vc4_validated_shader_info * vc4_validate_shader(struct drm_gem_cma_object *shader_obj) { + struct vc4_dev *vc4 = to_vc4_dev(shader_obj->base.dev); bool found_shader_end = false; int shader_end_ip = 0; uint32_t last_thread_switch_ip = -3; @@ -785,6 +786,9 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) struct vc4_validated_shader_info *validated_shader = NULL; struct vc4_shader_validation_state validation_state; + if (WARN_ON_ONCE(vc4->is_vc5)) + return NULL; + memset(&validation_state, 0, sizeof(validation_state)); validation_state.shader = shader_obj->vaddr; validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t); diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c index 5a5bf4e5b717..e31554d7139f 100644 --- a/drivers/gpu/drm/xen/xen_drm_front_gem.c +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c @@ -71,7 +71,7 @@ static int xen_drm_front_gem_object_mmap(struct drm_gem_object *gem_obj, * the whole buffer. */ vma->vm_flags &= ~VM_PFNMAP; - vma->vm_flags |= VM_MIXEDMAP; + vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND; vma->vm_pgoff = 0; /* |