diff options
Diffstat (limited to 'drivers/gpu')
147 files changed, 2651 insertions, 726 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 617d9c3a86c3..1168351267fd 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -179,6 +179,13 @@ config DRM_TTM GPU memory types. Will be enabled automatically if a device driver uses it. +config DRM_TTM_DMA_PAGE_POOL + bool + depends on DRM_TTM && (SWIOTLB || INTEL_IOMMU) + default y + help + Choose this if you need the TTM dma page pool + config DRM_VRAM_HELPER tristate depends on DRM diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0bb08e0e1611..bcc5d40a8d5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -977,6 +977,9 @@ struct amdgpu_device { uint64_t unique_id; uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS]; + + /* device pstate */ + int pstate; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 649e68c4479b..d1495e1c9289 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, { unsigned long start_jiffies; unsigned long end_jiffies; - struct dma_fence *fence = NULL; + struct dma_fence *fence; int i, r; start_jiffies = jiffies; @@ -44,16 +44,14 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, if (r) goto exit_do_move; r = dma_fence_wait(fence, false); + dma_fence_put(fence); if (r) goto exit_do_move; - dma_fence_put(fence); } end_jiffies = jiffies; r = jiffies_to_msecs(end_jiffies - start_jiffies); exit_do_move: - if (fence) - dma_fence_put(fence); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 6614d8a6f4c8..2cdaf3b2a721 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -604,8 +604,11 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) continue; } - for (i = 0; i < num_entities; i++) + for (i = 0; i < num_entities; i++) { + mutex_lock(&ctx->adev->lock_reset); drm_sched_entity_fini(&ctx->entities[0][i].entity); + mutex_unlock(&ctx->adev->lock_reset); + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 693f17e78791..8e6726e0d035 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -859,6 +859,9 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) struct amdgpu_device *adev = dev->dev_private; int r = 0, i; + /* Avoid accidently unparking the sched thread during GPU reset */ + mutex_lock(&adev->lock_reset); + /* hold on the scheduler */ for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; @@ -884,6 +887,8 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) kthread_unpark(ring->sched.thread); } + mutex_unlock(&adev->lock_reset); + return 0; } @@ -1036,6 +1041,9 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) if (!fences) return -ENOMEM; + /* Avoid accidently unparking the sched thread during GPU reset */ + mutex_lock(&adev->lock_reset); + /* stop the scheduler */ kthread_park(ring->sched.thread); @@ -1075,6 +1083,8 @@ failure: /* restart the scheduler */ kthread_unpark(ring->sched.thread); + mutex_unlock(&adev->lock_reset); + ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); kfree(fences); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d36d2b093539..4f76beafb2fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2057,6 +2057,7 @@ out: */ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) { + struct amdgpu_gpu_instance *gpu_instance; int i = 0, r; for (i = 0; i < adev->num_ip_blocks; i++) { @@ -2082,8 +2083,39 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) if (r) DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); - /* set to low pstate by default */ - amdgpu_xgmi_set_pstate(adev, 0); + + if (adev->gmc.xgmi.num_physical_nodes > 1) { + mutex_lock(&mgpu_info.mutex); + + /* + * Reset device p-state to low as this was booted with high. + * + * This should be performed only after all devices from the same + * hive get initialized. + * + * However, it's unknown how many device in the hive in advance. + * As this is counted one by one during devices initializations. + * + * So, we wait for all XGMI interlinked devices initialized. + * This may bring some delays as those devices may come from + * different hives. But that should be OK. + */ + if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) { + for (i = 0; i < mgpu_info.num_gpu; i++) { + gpu_instance = &(mgpu_info.gpu_ins[i]); + if (gpu_instance->adev->flags & AMD_IS_APU) + continue; + + r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0); + if (r) { + DRM_ERROR("pstate setting failed (%d).\n", r); + break; + } + } + } + + mutex_unlock(&mgpu_info.mutex); + } return 0; } @@ -3020,6 +3052,13 @@ fence_driver_init: DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n"); } + /* + * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost. + * Otherwise the mgpu fan boost feature will be skipped due to the + * gpu instance is counted less. + */ + amdgpu_register_gpu_instance(adev); + /* enable clockgating, etc. after ib tests, etc. since some blocks require * explicit gating rather than handling it automatically. */ @@ -3070,7 +3109,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev) int r; DRM_INFO("amdgpu: finishing device.\n"); + flush_delayed_work(&adev->delayed_init_work); adev->shutdown = true; + /* disable all interrupts */ amdgpu_irq_disable_all(adev); if (adev->mode_info.mode_config_initialized){ @@ -3088,7 +3129,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev) adev->firmware.gpu_info_fw = NULL; } adev->accel_working = false; - cancel_delayed_work_sync(&adev->delayed_init_work); /* free i2c buses */ if (!amdgpu_device_has_dc_support(adev)) amdgpu_i2c_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index d2dd59a95e8a..3cadb0b76f22 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -514,7 +514,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, * Also, don't allow GTT domain if the BO doens't have USWC falg set. */ if (adev->asic_type >= CHIP_CARRIZO && - adev->asic_type <= CHIP_RAVEN && + adev->asic_type < CHIP_RAVEN && (adev->flags & AMD_IS_APU) && (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && amdgpu_bo_support_uswc(bo_flags) && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 1d6ce018c8f2..0ffc9447b573 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -998,9 +998,10 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, {0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, /* Navi14 */ - {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, /* Renoir */ {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index c9d1fada6188..e00b46180d2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -454,8 +454,6 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev) } ring = &adev->gfx.kiq.ring; - if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) - kfree(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]); kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 459aa9059542..0ae0a2715b0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -225,7 +225,7 @@ struct amdgpu_me { uint32_t num_me; uint32_t num_pipe_per_me; uint32_t num_queue_per_pipe; - void *mqd_backup[AMDGPU_MAX_GFX_RINGS + 1]; + void *mqd_backup[AMDGPU_MAX_GFX_RINGS]; /* These are the resources for which amdgpu takes ownership */ DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES); @@ -267,6 +267,7 @@ struct amdgpu_gfx { uint32_t mec2_feature_version; bool mec_fw_write_wait; bool me_fw_write_wait; + bool cp_fw_write_wait; struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; unsigned num_gfx_rings; struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 555d8e57fae9..b499a3de8bb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -77,6 +77,7 @@ struct amdgpu_gmc_fault { struct amdgpu_vmhub { uint32_t ctx0_ptb_addr_lo32; uint32_t ctx0_ptb_addr_hi32; + uint32_t vm_inv_eng0_sem; uint32_t vm_inv_eng0_req; uint32_t vm_inv_eng0_ack; uint32_t vm_context0_cntl; @@ -127,18 +128,48 @@ struct amdgpu_xgmi { }; struct amdgpu_gmc { + /* FB's physical address in MMIO space (for CPU to + * map FB). This is different compared to the agp/ + * gart/vram_start/end field as the later is from + * GPU's view and aper_base is from CPU's view. + */ resource_size_t aper_size; resource_size_t aper_base; /* for some chips with <= 32MB we need to lie * about vram size near mc fb location */ u64 mc_vram_size; u64 visible_vram_size; + /* AGP aperture start and end in MC address space + * Driver find a hole in the MC address space + * to place AGP by setting MC_VM_AGP_BOT/TOP registers + * Under VMID0, logical address == MC address. AGP + * aperture maps to physical bus or IOVA addressed. + * AGP aperture is used to simulate FB in ZFB case. + * AGP aperture is also used for page table in system + * memory (mainly for APU). + * + */ u64 agp_size; u64 agp_start; u64 agp_end; + /* GART aperture start and end in MC address space + * Driver find a hole in the MC address space + * to place GART by setting VM_CONTEXT0_PAGE_TABLE_START/END_ADDR + * registers + * Under VMID0, logical address inside GART aperture will + * be translated through gpuvm gart page table to access + * paged system memory + */ u64 gart_size; u64 gart_start; u64 gart_end; + /* Frame buffer aperture of this GPU device. Different from + * fb_start (see below), this only covers the local GPU device. + * Driver get fb_start from MC_VM_FB_LOCATION_BASE (set by vbios) + * and calculate vram_start of this local device by adding an + * offset inside the XGMI hive. + * Under VMID0, logical address == MC address + */ u64 vram_start; u64 vram_end; /* FB region , it's same as local vram region in single GPU, in XGMI diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 6f3b03f6224f..30d540d23b77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -311,7 +311,7 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) drm_irq_uninstall(adev->ddev); adev->irq.installed = false; if (adev->irq.msi_enabled) - pci_disable_msi(adev->pdev); + pci_free_irq_vectors(adev->pdev); if (!amdgpu_device_has_dc_support(adev)) flush_work(&adev->hotplug_work); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 137a8573d556..b6db28a570c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -190,7 +190,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) pm_runtime_put_autosuspend(dev->dev); } - amdgpu_register_gpu_instance(adev); out: if (r) { /* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */ @@ -656,15 +655,19 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file return -ENOMEM; alloc_size = info->read_mmr_reg.count * sizeof(*regs); - for (i = 0; i < info->read_mmr_reg.count; i++) + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < info->read_mmr_reg.count; i++) { if (amdgpu_asic_read_register(adev, se_num, sh_num, info->read_mmr_reg.dword_offset + i, ®s[i])) { DRM_DEBUG_KMS("unallowed offset %#x\n", info->read_mmr_reg.dword_offset + i); kfree(regs); + amdgpu_gfx_off_ctrl(adev, true); return -EFAULT; } + } + amdgpu_gfx_off_ctrl(adev, true); n = copy_to_user(out, regs, min(size, alloc_size)); kfree(regs); return n ? -EFAULT : 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index bbe9ac7e843f..2770cba56a6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -567,7 +567,9 @@ static int psp_xgmi_initialize(struct psp_context *psp) struct ta_xgmi_shared_memory *xgmi_cmd; int ret; - if (!psp->adev->psp.ta_fw) + if (!psp->adev->psp.ta_fw || + !psp->adev->psp.ta_xgmi_ucode_size || + !psp->adev->psp.ta_xgmi_start_addr) return -ENOENT; if (!psp->xgmi_context.initialized) { @@ -756,6 +758,12 @@ static int psp_ras_terminate(struct psp_context *psp) { int ret; + /* + * TODO: bypass the terminate in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + if (!psp->ras.ras_initialized) return 0; @@ -777,6 +785,18 @@ static int psp_ras_initialize(struct psp_context *psp) { int ret; + /* + * TODO: bypass the initialize in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->adev->psp.ta_ras_ucode_size || + !psp->adev->psp.ta_ras_start_addr) { + dev_warn(psp->adev->dev, "RAS: ras ta ucode is not available\n"); + return 0; + } + if (!psp->ras.ras_initialized) { ret = psp_ras_init_shared_buf(psp); if (ret) @@ -866,6 +886,18 @@ static int psp_hdcp_initialize(struct psp_context *psp) { int ret; + /* + * TODO: bypass the initialize in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->adev->psp.ta_hdcp_ucode_size || + !psp->adev->psp.ta_hdcp_start_addr) { + dev_warn(psp->adev->dev, "HDCP: hdcp ta ucode is not available\n"); + return 0; + } + if (!psp->hdcp_context.hdcp_initialized) { ret = psp_hdcp_init_shared_buf(psp); if (ret) @@ -948,6 +980,12 @@ static int psp_hdcp_terminate(struct psp_context *psp) { int ret; + /* + * TODO: bypass the terminate in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + if (!psp->hdcp_context.hdcp_initialized) return 0; @@ -1039,6 +1077,18 @@ static int psp_dtm_initialize(struct psp_context *psp) { int ret; + /* + * TODO: bypass the initialize in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->adev->psp.ta_dtm_ucode_size || + !psp->adev->psp.ta_dtm_start_addr) { + dev_warn(psp->adev->dev, "DTM: dtm ta ucode is not available\n"); + return 0; + } + if (!psp->dtm_context.dtm_initialized) { ret = psp_dtm_init_shared_buf(psp); if (ret) @@ -1091,6 +1141,12 @@ static int psp_dtm_terminate(struct psp_context *psp) { int ret; + /* + * TODO: bypass the terminate in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + if (!psp->dtm_context.dtm_initialized) return 0; @@ -1411,7 +1467,10 @@ out: || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA5 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA6 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA7 - || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G)) + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) /*skip ucode loading in SRIOV VF */ continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index dab90c280476..404483437bd3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -220,7 +220,7 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, * As their names indicate, inject operation will write the * value to the address. * - * Second member: struct ras_debug_if::op. + * The second member: struct ras_debug_if::op. * It has three kinds of operations. * * - 0: disable RAS on the block. Take ::head as its data. @@ -228,14 +228,20 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, * - 2: inject errors on the block. Take ::inject as its data. * * How to use the interface? - * programs: - * copy the struct ras_debug_if in your codes and initialize it. - * write the struct to the control node. + * + * Programs + * + * Copy the struct ras_debug_if in your codes and initialize it. + * Write the struct to the control node. + * + * Shells * * .. code-block:: bash * * echo op block [error [sub_block address value]] > .../ras/ras_ctrl * + * Parameters: + * * op: disable, enable, inject * disable: only block is needed * enable: block and error are needed @@ -265,8 +271,10 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count * * .. note:: - * Operation is only allowed on blocks which are supported. + * Operations are only allowed on blocks which are supported. * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask + * to see which blocks support RAS on a particular asic. + * */ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) @@ -322,7 +330,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * * DOC: AMDGPU RAS debugfs EEPROM table reset interface * * Some boards contain an EEPROM which is used to persistently store a list of - * bad pages containing ECC errors detected in vram. This interface provides + * bad pages which experiences ECC errors in vram. This interface provides * a way to reset the EEPROM, e.g., after testing error injection. * * Usage: @@ -362,7 +370,7 @@ static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = { /** * DOC: AMDGPU RAS sysfs Error Count Interface * - * It allows user to read the error count for each IP block on the gpu through + * It allows the user to read the error count for each IP block on the gpu through * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count * * It outputs the multiple lines which report the uncorrected (ue) and corrected @@ -1027,6 +1035,24 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev) } /* sysfs end */ +/** + * DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors + * + * Normally when there is an uncorrectable error, the driver will reset + * the GPU to recover. However, in the event of an unrecoverable error, + * the driver provides an interface to reboot the system automatically + * in that event. + * + * The following file in debugfs provides that interface: + * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot + * + * Usage: + * + * .. code-block:: bash + * + * echo true > .../ras/auto_reboot + * + */ /* debugfs begin */ static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index b66d29d5ffa2..b158230af8db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -138,6 +138,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) } dma_fence_put(fence); + fence = NULL; r = amdgpu_bo_kmap(vram_obj, &vram_map); if (r) { @@ -183,6 +184,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) } dma_fence_put(fence); + fence = NULL; r = amdgpu_bo_kmap(gtt_obj[i], >t_map); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index f940526c5889..63e734a125fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -473,7 +473,7 @@ TRACE_EVENT(amdgpu_ib_pipe_sync, TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence), TP_ARGS(sched_job, fence), TP_STRUCT__entry( - __string(ring, sched_job->base.sched->name); + __string(ring, sched_job->base.sched->name) __field(uint64_t, id) __field(struct dma_fence *, fence) __field(uint64_t, ctx) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index d5d916169226..61d9b7774d42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1906,9 +1906,6 @@ void amdgpu_ttm_late_init(struct amdgpu_device *adev) void *stolen_vga_buf; /* return the VGA stolen memory (if any) back to VRAM */ amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf); - - /* return the IP Discovery TMR memory back to VRAM */ - amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL); } /** @@ -1921,7 +1918,10 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) amdgpu_ttm_debugfs_fini(adev); amdgpu_ttm_training_reserve_vram_fini(adev); + /* return the IP Discovery TMR memory back to VRAM */ + amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL); amdgpu_ttm_fw_reserve_vram_fini(adev); + if (adev->mman.aper_base_kaddr) iounmap(adev->mman.aper_base_kaddr); adev->mman.aper_base_kaddr = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index b4dd89af6f09..e324bfe6c58f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -299,6 +299,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) { int i, j; + cancel_delayed_work_sync(&adev->uvd.idle_work); drm_sched_entity_destroy(&adev->uvd.entity); for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 703677f2ff6f..46b590af2fd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -216,6 +216,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) if (adev->vce.vcpu_bo == NULL) return 0; + cancel_delayed_work_sync(&adev->vce.idle_work); drm_sched_entity_destroy(&adev->vce.entity); amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 3199e4a5ff12..9d870444d7d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -193,6 +193,8 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) { int i, j; + cancel_delayed_work_sync(&adev->vcn.idle_work); + if (adev->vcn.indirect_sram) { amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo, &adev->vcn.dpg_sram_gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e775f271f1ed..598c24505c73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1418,6 +1418,9 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, uint64_t incr, entry_end, pe_start; struct amdgpu_bo *pt; + /* make sure that the page tables covering the address range are + * actually allocated + */ r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor, params->direct); if (r) @@ -1491,7 +1494,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, } while (frag_start < entry_end); if (amdgpu_vm_pt_descendant(adev, &cursor)) { - /* Free all child entries */ + /* Free all child entries. + * Update the tables with the flags and addresses and free up subsequent + * tables in the case of huge pages or freed up areas. + * This is the maximum you can free, because all other page tables are not + * completely covered by the range and so potentially still in use. + */ while (cursor.pfn < frag_start) { amdgpu_vm_free_pts(adev, params->vm, &cursor); amdgpu_vm_pt_next(adev, &cursor); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 00371713c671..61d13d8b7b20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -274,22 +274,55 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) { int ret = 0; struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); + struct amdgpu_device *tmp_adev; + bool update_hive_pstate = true; + bool is_high_pstate = pstate && adev->asic_type == CHIP_VEGA20; if (!hive) return 0; - if (hive->pstate == pstate) - return 0; + mutex_lock(&hive->hive_lock); + + if (hive->pstate == pstate) { + adev->pstate = is_high_pstate ? pstate : adev->pstate; + goto out; + } dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate); if (is_support_sw_smu_xgmi(adev)) ret = smu_set_xgmi_pstate(&adev->smu, pstate); - if (ret) + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_xgmi_pstate) + ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle, + pstate); + + if (ret) { dev_err(adev->dev, "XGMI: Set pstate failure on device %llx, hive %llx, ret %d", adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id, ret); + goto out; + } + + /* Update device pstate */ + adev->pstate = pstate; + + /* + * Update the hive pstate only all devices of the hive + * are in the same pstate + */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + if (tmp_adev->pstate != adev->pstate) { + update_hive_pstate = false; + break; + } + } + if (update_hive_pstate || is_high_pstate) + hive->pstate = pstate; + +out: + mutex_unlock(&hive->hive_lock); return ret; } @@ -364,6 +397,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) goto exit; } + /* Set default device pstate */ + adev->pstate = -1; + top_info = &adev->psp.xgmi_context.top_info; list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index e7bab4f094b3..ca5f0e7ea1ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -564,6 +564,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev) kfree(adev->gfx.rlc.register_list_format); } +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) +{ + adev->gfx.cp_fw_write_wait = false; + + switch (adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI12: + case CHIP_NAVI14: + if ((adev->gfx.me_fw_version >= 0x00000046) && + (adev->gfx.me_feature_version >= 27) && + (adev->gfx.pfp_fw_version >= 0x00000068) && + (adev->gfx.pfp_feature_version >= 27) && + (adev->gfx.mec_fw_version >= 0x0000005b) && + (adev->gfx.mec_feature_version >= 27)) + adev->gfx.cp_fw_write_wait = true; + break; + default: + break; + } + + if (adev->gfx.cp_fw_write_wait == false) + DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ + GRBM requires 1-cycle delay in cp firmware\n"); +} + + static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev) { const struct rlc_firmware_header_v2_1 *rlc_hdr; @@ -832,6 +858,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) } } + gfx_v10_0_check_fw_write_wait(adev); out: if (err) { dev_err(adev->dev, @@ -1758,27 +1785,52 @@ static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); } -static void gfx_v10_0_init_csb(struct amdgpu_device *adev) +static int gfx_v10_0_init_csb(struct amdgpu_device *adev) { + int r; + + if (adev->in_gpu_reset) { + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); + if (r) + return r; + + r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, + (void **)&adev->gfx.rlc.cs_ptr); + if (!r) { + adev->gfx.rlc.funcs->get_csb_buffer(adev, + adev->gfx.rlc.cs_ptr); + amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); + } + + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + if (r) + return r; + } + /* csib */ WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI, adev->gfx.rlc.clear_state_gpu_addr >> 32); WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO, adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); + + return 0; } -static void gfx_v10_0_init_pg(struct amdgpu_device *adev) +static int gfx_v10_0_init_pg(struct amdgpu_device *adev) { int i; + int r; - gfx_v10_0_init_csb(adev); + r = gfx_v10_0_init_csb(adev); + if (r) + return r; for (i = 0; i < adev->num_vmhubs; i++) amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); /* TODO: init power gating */ - return; + return 0; } void gfx_v10_0_rlc_stop(struct amdgpu_device *adev) @@ -1880,7 +1932,10 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) r = gfx_v10_0_wait_for_rlc_autoload_complete(adev); if (r) return r; - gfx_v10_0_init_pg(adev); + + r = gfx_v10_0_init_pg(adev); + if (r) + return r; /* enable RLC SRM */ gfx_v10_0_rlc_enable_srm(adev); @@ -1906,7 +1961,10 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) return r; } - gfx_v10_0_init_pg(adev); + r = gfx_v10_0_init_pg(adev); + if (r) + return r; + adev->gfx.rlc.funcs->start(adev); if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { @@ -2373,7 +2431,7 @@ static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) return 0; } -static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) +static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) { int i; u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); @@ -2386,7 +2444,17 @@ static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) adev->gfx.gfx_ring[i].sched.ready = false; } WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); - udelay(50); + + for (i = 0; i < adev->usec_timeout; i++) { + if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt"); + + return 0; } static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) @@ -3087,6 +3155,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct v10_gfx_mqd *mqd = ring->mqd_ptr; + int mqd_idx = ring - &adev->gfx.gfx_ring[0]; if (!adev->in_gpu_reset && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); @@ -3098,12 +3167,12 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) #endif nv_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]) - memcpy(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], mqd, sizeof(*mqd)); + if (adev->gfx.me.mqd_backup[mqd_idx]) + memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); } else if (adev->in_gpu_reset) { /* reset mqd with the backup copy */ - if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]) - memcpy(mqd, adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], sizeof(*mqd)); + if (adev->gfx.me.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset the ring */ ring->wptr = 0; adev->wb.wb[ring->wptr_offs] = 0; @@ -4766,6 +4835,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); } +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + struct amdgpu_device *adev = ring->adev; + bool fw_version_ok = false; + + fw_version_ok = adev->gfx.cp_fw_write_wait; + + if (fw_version_ok) + gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, + ref, mask, 0x20); + else + amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, + ref, mask); +} + static void gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, uint32_t me, uint32_t pipe, @@ -5156,6 +5243,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_tmz = gfx_v10_0_ring_emit_tmz, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { @@ -5189,6 +5277,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { @@ -5219,6 +5308,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { .emit_rreg = gfx_v10_0_ring_emit_rreg, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, }; static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 39297baedfb4..faf2ffce5837 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -704,6 +704,7 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), }; static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = @@ -986,6 +987,13 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) adev->gfx.me_fw_write_wait = false; adev->gfx.mec_fw_write_wait = false; + if ((adev->gfx.mec_fw_version < 0x000001a5) || + (adev->gfx.mec_feature_version < 46) || + (adev->gfx.pfp_fw_version < 0x000000b7) || + (adev->gfx.pfp_feature_version < 46)) + DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ + GRBM requires 1-cycle delay in cp firmware\n"); + switch (adev->asic_type) { case CHIP_VEGA10: if ((adev->gfx.me_fw_version >= 0x0000009c) && @@ -1044,8 +1052,13 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) case CHIP_VEGA20: break; case CHIP_RAVEN: - if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) - &&((adev->gfx.rlc_fw_version != 106 && + /* Disable GFXOFF on original raven. There are combinations + * of sbios and platforms that are not stable. + */ + if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)) + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) + &&((adev->gfx.rlc_fw_version != 106 && adev->gfx.rlc_fw_version < 531) || (adev->gfx.rlc_fw_version == 53815) || (adev->gfx.rlc_feature_version < 1) || @@ -1057,6 +1070,12 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_RLC_SMU_HS; break; + case CHIP_RENOIR: + if (adev->pm.pp_feature & PP_GFXOFF_MASK) + adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_RLC_SMU_HS; + break; default: break; } @@ -2725,7 +2744,10 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) * And it's needed by gfxoff feature. */ if (adev->gfx.rlc.is_rlc_v2_1) { - gfx_v9_1_init_rlc_save_restore_list(adev); + if (adev->asic_type == CHIP_VEGA12 || + (adev->asic_type == CHIP_RAVEN && + adev->rev_id >= 8)) + gfx_v9_1_init_rlc_save_restore_list(adev); gfx_v9_0_enable_save_restore_machine(adev); } @@ -3876,9 +3898,22 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) uint64_t clock; mutex_lock(&adev->gfx.gpu_clock_mutex); - WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); - clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | - ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) { + uint32_t tmp, lsb, msb, i = 0; + do { + if (i != 0) + udelay(1); + tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); + lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB); + msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); + i++; + } while (unlikely(tmp != msb) && (i < adev->usec_timeout)); + clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL); + } else { + WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); + clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | + ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + } mutex_unlock(&adev->gfx.gpu_clock_mutex); return clock; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 9ec4297e61e5..e91bd7945777 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -367,6 +367,8 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index b4f32d853ca1..b70c7b483c24 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -356,6 +356,8 @@ void gfxhub_v2_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 6e1b25bd1fe7..321f8a997be8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -235,6 +235,29 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, const unsigned eng = 17; unsigned int i; + spin_lock(&adev->gmc.invalidate_lock); + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) { + for (i = 0; i < adev->usec_timeout; i++) { + /* a read return value of 1 means semaphore acuqire */ + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + if (tmp & 0x1) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); + } + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); /* @@ -254,6 +277,17 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, udelay(1); } + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + + spin_unlock(&adev->gmc.invalidate_lock); + if (i < adev->usec_timeout) return; @@ -338,17 +372,38 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* a read return value of 1 means semaphore acuqire */ + amdgpu_ring_emit_reg_wait(ring, + hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), lower_32_bits(pd_addr)); amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), upper_32_bits(pd_addr)); - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, + hub->vm_inv_eng0_ack + eng, + req, 1 << vmid); - /* wait for the invalidate to complete */ - amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, - 1 << vmid, 1 << vmid); + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); return pd_addr; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 9f2a893871ec..3c355fb5d2b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -459,6 +459,29 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } spin_lock(&adev->gmc.invalidate_lock); + + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) { + for (j = 0; j < adev->usec_timeout; j++) { + /* a read return value of 1 means semaphore acuqire */ + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + if (tmp & 0x1) + break; + udelay(1); + } + + if (j >= adev->usec_timeout) + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); + } + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); /* @@ -474,7 +497,18 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, break; udelay(1); } + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + spin_unlock(&adev->gmc.invalidate_lock); + if (j < adev->usec_timeout) return; @@ -489,6 +523,20 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* a read return value of 1 means semaphore acuqire */ + amdgpu_ring_emit_reg_wait(ring, + hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), lower_32_bits(pd_addr)); @@ -499,6 +547,15 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, hub->vm_inv_eng0_ack + eng, req, 1 << vmid); + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); + return pd_addr; } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 6965e1e6fa9e..28105e4af507 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -420,6 +420,8 @@ void mmhub_v1_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 945533634711..a7cb185d639a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -348,6 +348,8 @@ void mmhub_v2_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 657970f9ebfb..66efe2f7bd76 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -219,6 +219,15 @@ static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid) hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); tmp = mmVML2PF0_VM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3, hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); @@ -495,6 +504,10 @@ void mmhub_v9_4_init(struct amdgpu_device *adev) SOC15_REG_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_SEM) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; hub[i]->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmVML2VC0_VM_INVALIDATE_ENG0_REQ) + diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index a55a2e83fb19..0ba66bef5746 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -40,6 +40,7 @@ #include "gc/gc_10_1_0_sh_mask.h" #include "hdp/hdp_5_0_0_offset.h" #include "hdp/hdp_5_0_0_sh_mask.h" +#include "smuio/smuio_11_0_0_offset.h" #include "soc15.h" #include "soc15_common.h" @@ -156,8 +157,27 @@ static bool nv_read_disabled_bios(struct amdgpu_device *adev) static bool nv_read_bios_from_rom(struct amdgpu_device *adev, u8 *bios, u32 length_bytes) { - /* TODO: will implement it when SMU header is available */ - return false; + u32 *dw_ptr; + u32 i, length_dw; + + if (bios == NULL) + return false; + if (length_bytes == 0) + return false; + /* APU vbios image is part of sbios image */ + if (adev->flags & AMD_IS_APU) + return false; + + dw_ptr = (u32 *)bios; + length_dw = ALIGN(length_bytes, 4) / 4; + + /* set rom index to 0 */ + WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX), 0); + /* read out the rom data */ + for (i = 0; i < length_dw; i++) + dw_ptr[i] = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA)); + + return true; } static struct soc15_allowed_register_entry nv_allowed_read_registers[] = { @@ -539,6 +559,16 @@ static bool nv_need_reset_on_init(struct amdgpu_device *adev) return false; } +static uint64_t nv_get_pcie_replay_count(struct amdgpu_device *adev) +{ + + /* TODO + * dummy implement for pcie_replay_count sysfs interface + * */ + + return 0; +} + static void nv_init_doorbell_index(struct amdgpu_device *adev) { adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ; @@ -586,6 +616,7 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = .need_full_reset = &nv_need_full_reset, .get_pcie_usage = &nv_get_pcie_usage, .need_reset_on_init = &nv_need_reset_on_init, + .get_pcie_replay_count = &nv_get_pcie_replay_count, }; static int nv_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index b8fdb192f6d6..f4ad2990f973 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1173,6 +1173,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); } +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + amdgpu_ring_emit_wreg(ring, reg0, ref); + /* wait for a cycle to reset vm_inv_eng*_ack */ + amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0); + amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); +} + static int sdma_v5_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1588,7 +1598,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { 6 + /* sdma_v5_0_ring_emit_pipeline_sync */ /* sdma_v5_0_ring_emit_vm_flush */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 + 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */ .emit_ib = sdma_v5_0_ring_emit_ib, @@ -1602,6 +1612,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { .pad_ib = sdma_v5_0_ring_pad_ib, .emit_wreg = sdma_v5_0_ring_emit_wreg, .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v5_0_ring_init_cond_exec, .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec, .preempt_ib = sdma_v5_0_ring_preempt_ib, diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 57bb5f9e08b2..88ae27a5a03d 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -64,7 +64,8 @@ static int si_ih_irq_init(struct amdgpu_device *adev) u32 interrupt_cntl, ih_cntl, ih_rb_cntl; si_ih_disable_interrupts(adev); - WREG32(INTERRUPT_CNTL2, adev->irq.ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, adev->dummy_page_addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE; interrupt_cntl &= ~IH_REQ_NONSNOOP_EN; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 741b564b4aa5..8e1640bc07af 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1145,7 +1145,9 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_VCN_MGCG; - adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; + adev->pg_flags = AMD_PG_SUPPORT_SDMA | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG; } else if (adev->pdev->device == 0x15d8) { adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | @@ -1188,7 +1190,9 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_VCN_MGCG; - adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; + adev->pg_flags = AMD_PG_SUPPORT_SDMA | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG; } break; case CHIP_ARCTURUS: @@ -1233,11 +1237,6 @@ static int soc15_common_early_init(void *handle) AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG; adev->external_rev_id = adev->rev_id + 0x91; - - if (adev->pm.pp_feature & PP_GFXOFF_MASK) - adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | - AMD_PG_SUPPORT_CP | - AMD_PG_SUPPORT_RLC_SMU_HS; break; default: /* FIXME: not supported yet */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 9af6c6ffbfa2..57af489a5de3 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -28,8 +28,8 @@ #include "nbio_v7_0.h" #include "nbio_v7_4.h" -#define SOC15_FLUSH_GPU_TLB_NUM_WREG 4 -#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 1 +#define SOC15_FLUSH_GPU_TLB_NUM_WREG 6 +#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 3 extern const struct amd_ip_funcs soc15_common_ip_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 03083e5f731a..93edf9193a7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -293,7 +293,7 @@ static int vcn_v2_5_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; - int i; + int i, j; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) @@ -305,8 +305,8 @@ static int vcn_v2_5_hw_fini(void *handle) ring->sched.ready = false; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.inst[i].ring_enc[i]; + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + ring = &adev->vcn.inst[i].ring_enc[j]; ring->sched.ready = false; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0a6173d727e3..7aac9568d3be 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -719,7 +719,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) */ if (adev->flags & AMD_IS_APU && adev->asic_type >= CHIP_CARRIZO && - adev->asic_type <= CHIP_RAVEN) + adev->asic_type < CHIP_RAVEN) init_data.flags.gpu_vm_support = true; if (amdgpu_dc_feature_mask & DC_FBC_MASK) @@ -4239,8 +4239,8 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec result = MODE_OK; else DRM_DEBUG_KMS("Mode %dx%d (clk %d) failed DC validation with error %d\n", - mode->vdisplay, mode->hdisplay, + mode->vdisplay, mode->clock, dc_result); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 49cf39711dfc..2bf8534c18fb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -36,7 +36,9 @@ #include "dc_link_ddc.h" #include "i2caux_interface.h" - +#if defined(CONFIG_DEBUG_FS) +#include "amdgpu_dm_debugfs.h" +#endif /* #define TRACE_DPCD */ #ifdef TRACE_DPCD @@ -147,6 +149,12 @@ amdgpu_dm_mst_connector_late_register(struct drm_connector *connector) to_amdgpu_dm_connector(connector); struct drm_dp_mst_port *port = amdgpu_dm_connector->port; +#if defined(CONFIG_DEBUG_FS) + connector_debugfs_init(amdgpu_dm_connector); + amdgpu_dm_connector->debugfs_dpcd_address = 0; + amdgpu_dm_connector->debugfs_dpcd_size = 0; +#endif + return drm_dp_mst_connector_late_register(connector, port); } diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile index 985633c08a26..26c6d735cdc7 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile @@ -24,15 +24,20 @@ # It calculates Bandwidth and Watermarks values for HW programming # -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +calcs_ccflags := -mhard-float -msse -calcs_ccflags := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif -ifdef CONFIG_CC_IS_CLANG +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +calcs_ccflags += -mpreferred-stack-boundary=4 +else calcs_ccflags += -msse2 endif diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index cc94c1a73daa..12ba6fdf89b7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -3027,15 +3027,6 @@ void core_link_enable_stream( CONTROLLER_DP_TEST_PATTERN_VIDEOMODE, COLOR_DEPTH_UNDEFINED); - /* This second call is needed to reconfigure the DIG - * as a workaround for the incorrect value being applied - * from transmitter control. - */ - if (!dc_is_virtual_signal(pipe_ctx->stream->signal)) - stream->link->link_enc->funcs->setup( - stream->link->link_enc, - pipe_ctx->stream->signal); - #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT if (pipe_ctx->stream->timing.flags.DSC) { if (dc_is_dp_signal(pipe_ctx->stream->signal) || diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index ddb8d5649e79..63f3bddba7da 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -10,15 +10,20 @@ ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT DCN20 += dcn20_dsc.o endif -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse -CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif -ifdef CONFIG_CC_IS_CLANG +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -mpreferred-stack-boundary=4 +else CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -msse2 endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 924c2e303588..bbd1c98564be 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1152,6 +1152,11 @@ struct stream_encoder *dcn20_stream_encoder_create( if (!enc1) return NULL; + if (ASICREV_IS_NAVI14_M(ctx->asic_id.hw_internal_rev)) { + if (eng_id >= ENGINE_ID_DIGD) + eng_id++; + } + dcn20_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, &stream_enc_regs[eng_id], &se_shift, &se_mask); diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index 19fabac13c65..14113ccf498d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -3,15 +3,20 @@ DCN21 = dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o dcn21_hwseq.o dcn21_link_encoder.o -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse -CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif -ifdef CONFIG_CC_IS_CLANG +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -mpreferred-stack-boundary=4 +else CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -msse2 endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 5b2a65b42403..8df251626e22 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -24,15 +24,20 @@ # It provides the general basic services required by other DAL # subcomponents. -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +dml_ccflags := -mhard-float -msse -dml_ccflags := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif -ifdef CONFIG_CC_IS_CLANG +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +dml_ccflags += -mpreferred-stack-boundary=4 +else dml_ccflags += -msse2 endif diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile index b456cd23c6fa..970737217e53 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile @@ -1,15 +1,20 @@ # # Makefile for the 'dsc' sub-component of DAL. -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +dsc_ccflags := -mhard-float -msse -dsc_ccflags := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif -ifdef CONFIG_CC_IS_CLANG +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +dsc_ccflags += -mpreferred-stack-boundary=4 +else dsc_ccflags += -msse2 endif diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 5902f80d1fce..a7f92d0b3a90 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -220,6 +220,9 @@ enum pp_df_cstate { ((group) << PP_GROUP_SHIFT | (block) << PP_BLOCK_SHIFT | \ (support) << PP_STATE_SUPPORT_SHIFT | (state) << PP_STATE_SHIFT) +#define XGMI_MODE_PSTATE_D3 0 +#define XGMI_MODE_PSTATE_D0 1 + struct seq_file; enum amd_pp_clock_type; struct amd_pp_simple_clock_info; @@ -318,6 +321,7 @@ struct amd_pm_funcs { int (*set_ppfeature_status)(void *handle, uint64_t ppfeature_masks); int (*asic_reset_mode_2)(void *handle); int (*set_df_cstate)(void *handle, enum pp_df_cstate state); + int (*set_xgmi_pstate)(void *handle, uint32_t pstate); }; #endif diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index f4ff15378e61..7932eb163a00 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -969,6 +969,14 @@ static int pp_dpm_switch_power_profile(void *handle, workload = hwmgr->workload_setting[index]; } + if (type == PP_SMC_POWER_PROFILE_COMPUTE && + hwmgr->hwmgr_func->disable_power_features_for_compute_performance) { + if (hwmgr->hwmgr_func->disable_power_features_for_compute_performance(hwmgr, en)) { + mutex_unlock(&hwmgr->smu_lock); + return -EINVAL; + } + } + if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) hwmgr->hwmgr_func->set_power_profile_mode(hwmgr, &workload, 0); mutex_unlock(&hwmgr->smu_lock); @@ -1566,6 +1574,23 @@ static int pp_set_df_cstate(void *handle, enum pp_df_cstate state) return 0; } +static int pp_set_xgmi_pstate(void *handle, uint32_t pstate) +{ + struct pp_hwmgr *hwmgr = handle; + + if (!hwmgr) + return -EINVAL; + + if (!hwmgr->pm_en || !hwmgr->hwmgr_func->set_xgmi_pstate) + return 0; + + mutex_lock(&hwmgr->smu_lock); + hwmgr->hwmgr_func->set_xgmi_pstate(hwmgr, pstate); + mutex_unlock(&hwmgr->smu_lock); + + return 0; +} + static const struct amd_pm_funcs pp_dpm_funcs = { .load_firmware = pp_dpm_load_fw, .wait_for_fw_loading_complete = pp_dpm_fw_loading_complete, @@ -1625,4 +1650,5 @@ static const struct amd_pm_funcs pp_dpm_funcs = { .asic_reset_mode_2 = pp_asic_reset_mode_2, .smu_i2c_bus_access = pp_smu_i2c_bus_access, .set_df_cstate = pp_set_df_cstate, + .set_xgmi_pstate = pp_set_xgmi_pstate, }; diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 4e468b0272c3..40b546c75fc2 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -383,14 +383,25 @@ bool smu_clk_dpm_is_enabled(struct smu_context *smu, enum smu_clk_type clk_type) return true; } - +/** + * smu_dpm_set_power_gate - power gate/ungate the specific IP block + * + * @smu: smu_context pointer + * @block_type: the IP block to power gate/ungate + * @gate: to power gate if true, ungate otherwise + * + * This API uses no smu->mutex lock protection due to: + * 1. It is either called by other IP block(gfx/sdma/vcn/uvd/vce). + * This is guarded to be race condition free by the caller. + * 2. Or get called on user setting request of power_dpm_force_performance_level. + * Under this case, the smu->mutex lock protection is already enforced on + * the parent API smu_force_performance_level of the call path. + */ int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type, bool gate) { int ret = 0; - mutex_lock(&smu->mutex); - switch (block_type) { case AMD_IP_BLOCK_TYPE_UVD: ret = smu_dpm_set_uvd_enable(smu, gate); @@ -408,8 +419,6 @@ int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type, break; } - mutex_unlock(&smu->mutex); - return ret; } @@ -526,7 +535,7 @@ bool is_support_sw_smu(struct amdgpu_device *adev) bool is_support_sw_smu_xgmi(struct amdgpu_device *adev) { - if (amdgpu_dpm != 1) + if (!is_support_sw_smu(adev)) return false; if (adev->asic_type == CHIP_VEGA20) @@ -582,10 +591,18 @@ int smu_sys_set_pp_table(struct smu_context *smu, void *buf, size_t size) smu_table->power_play_table = smu_table->hardcode_pptable; smu_table->power_play_table_size = size; + /* + * Special hw_fini action(for Navi1x, the DPMs disablement will be + * skipped) may be needed for custom pptable uploading. + */ + smu->uploading_custom_pp_table = true; + ret = smu_reset(smu); if (ret) pr_info("smu reset failed, ret = %d\n", ret); + smu->uploading_custom_pp_table = false; + failed: mutex_unlock(&smu->mutex); return ret; @@ -705,8 +722,12 @@ static int smu_set_funcs(struct amdgpu_device *adev) { struct smu_context *smu = &adev->smu; + if (adev->pm.pp_feature & PP_OVERDRIVE_MASK) + smu->od_enabled = true; + switch (adev->asic_type) { case CHIP_VEGA20: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; vega20_set_ppt_funcs(smu); break; case CHIP_NAVI10: @@ -715,7 +736,10 @@ static int smu_set_funcs(struct amdgpu_device *adev) navi10_set_ppt_funcs(smu); break; case CHIP_ARCTURUS: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; arcturus_set_ppt_funcs(smu); + /* OD is not supported on Arcturus */ + smu->od_enabled =false; break; case CHIP_RENOIR: renoir_set_ppt_funcs(smu); @@ -724,9 +748,6 @@ static int smu_set_funcs(struct amdgpu_device *adev) return -EINVAL; } - if (adev->pm.pp_feature & PP_OVERDRIVE_MASK) - smu->od_enabled = true; - return 0; } @@ -1057,10 +1078,6 @@ static int smu_smc_table_hw_init(struct smu_context *smu, return ret; if (adev->asic_type != CHIP_ARCTURUS) { - ret = smu_override_pcie_parameters(smu); - if (ret) - return ret; - ret = smu_notify_display_change(smu); if (ret) return ret; @@ -1089,6 +1106,12 @@ static int smu_smc_table_hw_init(struct smu_context *smu, return ret; } + if (adev->asic_type != CHIP_ARCTURUS) { + ret = smu_override_pcie_parameters(smu); + if (ret) + return ret; + } + ret = smu_set_default_od_settings(smu, initialize); if (ret) return ret; @@ -1098,7 +1121,7 @@ static int smu_smc_table_hw_init(struct smu_context *smu, if (ret) return ret; - ret = smu_get_power_limit(smu, &smu->default_power_limit, true, false); + ret = smu_get_power_limit(smu, &smu->default_power_limit, false, false); if (ret) return ret; } @@ -1282,10 +1305,25 @@ static int smu_hw_fini(void *handle) return ret; } - ret = smu_stop_dpms(smu); - if (ret) { - pr_warn("Fail to stop Dpms!\n"); - return ret; + /* + * For custom pptable uploading, skip the DPM features + * disable process on Navi1x ASICs. + * - As the gfx related features are under control of + * RLC on those ASICs. RLC reinitialization will be + * needed to reenable them. That will cost much more + * efforts. + * + * - SMU firmware can handle the DPM reenablement + * properly. + */ + if (!smu->uploading_custom_pp_table || + !((adev->asic_type >= CHIP_NAVI10) && + (adev->asic_type <= CHIP_NAVI12))) { + ret = smu_stop_dpms(smu); + if (ret) { + pr_warn("Fail to stop Dpms!\n"); + return ret; + } } kfree(table_context->driver_pptable); @@ -2500,3 +2538,13 @@ int smu_get_dpm_clock_table(struct smu_context *smu, return ret; } + +uint32_t smu_get_pptable_power_limit(struct smu_context *smu) +{ + uint32_t ret = 0; + + if (smu->ppt_funcs->get_pptable_power_limit) + ret = smu->ppt_funcs->get_pptable_power_limit(smu); + + return ret; +} diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index 3099ac256bd3..58c7c4a3053e 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -1261,15 +1261,14 @@ arcturus_get_profiling_clk_mask(struct smu_context *smu, static int arcturus_get_power_limit(struct smu_context *smu, uint32_t *limit, - bool asic_default) + bool cap) { PPTable_t *pptable = smu->smu_table.driver_pptable; uint32_t asic_default_power_limit = 0; int ret = 0; int power_src; - if (!smu->default_power_limit || - !smu->power_limit) { + if (!smu->power_limit) { if (smu_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) { power_src = smu_power_get_index(smu, SMU_POWER_SOURCE_AC); if (power_src < 0) @@ -1292,17 +1291,11 @@ static int arcturus_get_power_limit(struct smu_context *smu, pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0]; } - if (smu->od_enabled) { - asic_default_power_limit *= (100 + smu->smu_table.TDPODLimit); - asic_default_power_limit /= 100; - } - - smu->default_power_limit = asic_default_power_limit; smu->power_limit = asic_default_power_limit; } - if (asic_default) - *limit = smu->default_power_limit; + if (cap) + *limit = smu_v11_0_get_max_power_limit(smu); else *limit = smu->power_limit; @@ -2070,6 +2063,13 @@ static void arcturus_i2c_eeprom_control_fini(struct i2c_adapter *control) i2c_del_adapter(control); } +static uint32_t arcturus_get_pptable_power_limit(struct smu_context *smu) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + + return pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0]; +} + static const struct pptable_funcs arcturus_ppt_funcs = { /* translate smu index into arcturus specific index */ .get_smu_msg_index = arcturus_get_smu_msg_index, @@ -2160,6 +2160,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq, .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, .override_pcie_parameters = smu_v11_0_override_pcie_parameters, + .get_pptable_power_limit = arcturus_get_pptable_power_limit, }; void arcturus_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index a24beaa4fb01..d2909c91d65b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -81,6 +81,8 @@ static void hwmgr_init_workload_prority(struct pp_hwmgr *hwmgr) int hwmgr_early_init(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev; + if (!hwmgr) return -EINVAL; @@ -94,8 +96,11 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) hwmgr_init_workload_prority(hwmgr); hwmgr->gfxoff_state_changed_by_workload = false; + adev = hwmgr->adev; + switch (hwmgr->chip_family) { case AMDGPU_FAMILY_CI: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; hwmgr->smumgr_funcs = &ci_smu_funcs; ci_set_asic_special_caps(hwmgr); hwmgr->feature_mask &= ~(PP_VBI_TIME_SUPPORT_MASK | @@ -106,12 +111,14 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) smu7_init_function_pointers(hwmgr); break; case AMDGPU_FAMILY_CZ: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; hwmgr->od_enabled = false; hwmgr->smumgr_funcs = &smu8_smu_funcs; hwmgr->feature_mask &= ~PP_GFXOFF_MASK; smu8_init_function_pointers(hwmgr); break; case AMDGPU_FAMILY_VI: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; hwmgr->feature_mask &= ~PP_GFXOFF_MASK; switch (hwmgr->chip_id) { case CHIP_TOPAZ: @@ -153,6 +160,7 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) case AMDGPU_FAMILY_AI: switch (hwmgr->chip_id) { case CHIP_VEGA10: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; hwmgr->feature_mask &= ~PP_GFXOFF_MASK; hwmgr->smumgr_funcs = &vega10_smu_funcs; vega10_hwmgr_init(hwmgr); @@ -162,6 +170,7 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) vega12_hwmgr_init(hwmgr); break; case CHIP_VEGA20: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; hwmgr->feature_mask &= ~PP_GFXOFF_MASK; hwmgr->smumgr_funcs = &vega20_smu_funcs; vega20_hwmgr_init(hwmgr); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index c805c6fcdba2..f73dff68e799 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -3477,18 +3477,31 @@ static int smu7_get_pp_table_entry(struct pp_hwmgr *hwmgr, static int smu7_get_gpu_power(struct pp_hwmgr *hwmgr, u32 *query) { + struct amdgpu_device *adev = hwmgr->adev; int i; u32 tmp = 0; if (!query) return -EINVAL; - smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetCurrPkgPwr, 0); - tmp = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); - *query = tmp; + /* + * PPSMC_MSG_GetCurrPkgPwr is not supported on: + * - Hawaii + * - Bonaire + * - Fiji + * - Tonga + */ + if ((adev->asic_type != CHIP_HAWAII) && + (adev->asic_type != CHIP_BONAIRE) && + (adev->asic_type != CHIP_FIJI) && + (adev->asic_type != CHIP_TONGA)) { + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetCurrPkgPwr, 0); + tmp = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); + *query = tmp; - if (tmp != 0) - return 0; + if (tmp != 0) + return 0; + } smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogStart); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, @@ -3969,6 +3982,13 @@ static int smu7_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *input) "Failed to populate and upload SCLK MCLK DPM levels!", result = tmp_result); + /* + * If a custom pp table is loaded, set DPMTABLE_OD_UPDATE_VDDC flag. + * That effectively disables AVFS feature. + */ + if (hwmgr->hardcode_pp_table != NULL) + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC; + tmp_result = smu7_update_avfs(hwmgr); PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to update avfs voltages!", diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 4ea63a2e17da..d71a492c87a3 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -3689,6 +3689,13 @@ static int vega10_set_power_state_tasks(struct pp_hwmgr *hwmgr, PP_ASSERT_WITH_CODE(!result, "Failed to upload PPtable!", return result); + /* + * If a custom pp table is loaded, set DPMTABLE_OD_UPDATE_VDDC flag. + * That effectively disables AVFS feature. + */ + if(hwmgr->hardcode_pp_table != NULL) + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC; + vega10_update_avfs(hwmgr); /* @@ -5263,6 +5270,59 @@ static int vega10_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_ return 0; } +static int vega10_disable_power_features_for_compute_performance(struct pp_hwmgr *hwmgr, bool disable) +{ + struct vega10_hwmgr *data = hwmgr->backend; + uint32_t feature_mask = 0; + + if (disable) { + feature_mask |= data->smu_features[GNLD_ULV].enabled ? + data->smu_features[GNLD_ULV].smu_feature_bitmap : 0; + feature_mask |= data->smu_features[GNLD_DS_GFXCLK].enabled ? + data->smu_features[GNLD_DS_GFXCLK].smu_feature_bitmap : 0; + feature_mask |= data->smu_features[GNLD_DS_SOCCLK].enabled ? + data->smu_features[GNLD_DS_SOCCLK].smu_feature_bitmap : 0; + feature_mask |= data->smu_features[GNLD_DS_LCLK].enabled ? + data->smu_features[GNLD_DS_LCLK].smu_feature_bitmap : 0; + feature_mask |= data->smu_features[GNLD_DS_DCEFCLK].enabled ? + data->smu_features[GNLD_DS_DCEFCLK].smu_feature_bitmap : 0; + } else { + feature_mask |= (!data->smu_features[GNLD_ULV].enabled) ? + data->smu_features[GNLD_ULV].smu_feature_bitmap : 0; + feature_mask |= (!data->smu_features[GNLD_DS_GFXCLK].enabled) ? + data->smu_features[GNLD_DS_GFXCLK].smu_feature_bitmap : 0; + feature_mask |= (!data->smu_features[GNLD_DS_SOCCLK].enabled) ? + data->smu_features[GNLD_DS_SOCCLK].smu_feature_bitmap : 0; + feature_mask |= (!data->smu_features[GNLD_DS_LCLK].enabled) ? + data->smu_features[GNLD_DS_LCLK].smu_feature_bitmap : 0; + feature_mask |= (!data->smu_features[GNLD_DS_DCEFCLK].enabled) ? + data->smu_features[GNLD_DS_DCEFCLK].smu_feature_bitmap : 0; + } + + if (feature_mask) + PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr, + !disable, feature_mask), + "enable/disable power features for compute performance Failed!", + return -EINVAL); + + if (disable) { + data->smu_features[GNLD_ULV].enabled = false; + data->smu_features[GNLD_DS_GFXCLK].enabled = false; + data->smu_features[GNLD_DS_SOCCLK].enabled = false; + data->smu_features[GNLD_DS_LCLK].enabled = false; + data->smu_features[GNLD_DS_DCEFCLK].enabled = false; + } else { + data->smu_features[GNLD_ULV].enabled = true; + data->smu_features[GNLD_DS_GFXCLK].enabled = true; + data->smu_features[GNLD_DS_SOCCLK].enabled = true; + data->smu_features[GNLD_DS_LCLK].enabled = true; + data->smu_features[GNLD_DS_DCEFCLK].enabled = true; + } + + return 0; + +} + static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .backend_init = vega10_hwmgr_backend_init, .backend_fini = vega10_hwmgr_backend_fini, @@ -5330,6 +5390,8 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .get_ppfeature_status = vega10_get_ppfeature_status, .set_ppfeature_status = vega10_set_ppfeature_status, .set_mp1_state = vega10_set_mp1_state, + .disable_power_features_for_compute_performance = + vega10_disable_power_features_for_compute_performance, }; int vega10_hwmgr_init(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 9295bd90b792..5bcf0d684151 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -4176,6 +4176,20 @@ static int vega20_set_df_cstate(struct pp_hwmgr *hwmgr, return ret; } +static int vega20_set_xgmi_pstate(struct pp_hwmgr *hwmgr, + uint32_t pstate) +{ + int ret; + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetXgmiMode, + pstate ? XGMI_MODE_PSTATE_D0 : XGMI_MODE_PSTATE_D3); + if (ret) + pr_err("SetXgmiPstate failed!\n"); + + return ret; +} + static const struct pp_hwmgr_func vega20_hwmgr_funcs = { /* init/fini related */ .backend_init = vega20_hwmgr_backend_init, @@ -4245,6 +4259,7 @@ static const struct pp_hwmgr_func vega20_hwmgr_funcs = { .set_mp1_state = vega20_set_mp1_state, .smu_i2c_bus_access = vega20_smu_i2c_bus_access, .set_df_cstate = vega20_set_df_cstate, + .set_xgmi_pstate = vega20_set_xgmi_pstate, }; int vega20_hwmgr_init(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 8120e7587585..031e0c22fcc7 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -261,7 +261,6 @@ struct smu_table_context struct smu_table *tables; struct smu_table memory_pool; uint8_t thermal_controller_type; - uint16_t TDPODLimit; void *overdrive_table; }; @@ -391,6 +390,7 @@ struct smu_context uint32_t smc_if_version; + bool uploading_custom_pp_table; }; struct i2c_adapter; @@ -548,6 +548,7 @@ struct pptable_funcs { int (*get_dpm_ultimate_freq)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *min, uint32_t *max); int (*set_soft_freq_limited_range)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t min, uint32_t max); int (*override_pcie_parameters)(struct smu_context *smu); + uint32_t (*get_pptable_power_limit)(struct smu_context *smu); }; int smu_load_microcode(struct smu_context *smu); @@ -717,4 +718,6 @@ int smu_get_uclk_dpm_states(struct smu_context *smu, int smu_get_dpm_clock_table(struct smu_context *smu, struct dpm_clocks *clock_table); +uint32_t smu_get_pptable_power_limit(struct smu_context *smu); + #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index bd8c922dfd3e..af977675fd33 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -356,6 +356,9 @@ struct pp_hwmgr_func { int (*asic_reset)(struct pp_hwmgr *hwmgr, enum SMU_ASIC_RESET_MODE mode); int (*smu_i2c_bus_access)(struct pp_hwmgr *hwmgr, bool aquire); int (*set_df_cstate)(struct pp_hwmgr *hwmgr, enum pp_df_cstate state); + int (*set_xgmi_pstate)(struct pp_hwmgr *hwmgr, uint32_t pstate); + int (*disable_power_features_for_compute_performance)(struct pp_hwmgr *hwmgr, + bool disable); }; struct pp_table_func { diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h index 886b9a21ebd8..a886f0644d24 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h @@ -159,7 +159,7 @@ //FIXME need updating // Debug Overrides Bitmask #define DPM_OVERRIDE_DISABLE_UCLK_PID 0x00000001 -#define DPM_OVERRIDE_ENABLE_VOLT_LINK_VCN_FCLK 0x00000002 +#define DPM_OVERRIDE_DISABLE_VOLT_LINK_VCN_FCLK 0x00000002 // I2C Config Bit Defines #define I2C_CONTROLLER_ENABLED 1 diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h index abd4debb3def..606149085683 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h @@ -27,7 +27,7 @@ #define SMU11_DRIVER_IF_VERSION_INV 0xFFFFFFFF #define SMU11_DRIVER_IF_VERSION_VG20 0x13 -#define SMU11_DRIVER_IF_VERSION_ARCT 0x0F +#define SMU11_DRIVER_IF_VERSION_ARCT 0x10 #define SMU11_DRIVER_IF_VERSION_NV10 0x33 #define SMU11_DRIVER_IF_VERSION_NV14 0x34 @@ -48,6 +48,8 @@ #define SMU11_TOOL_SIZE 0x19000 +#define MAX_PCIE_CONF 2 + #define CLK_MAP(clk, index) \ [SMU_##clk] = {1, (index)} @@ -88,6 +90,11 @@ struct smu_11_0_dpm_table { uint32_t max; /* MHz */ }; +struct smu_11_0_pcie_table { + uint8_t pcie_gen[MAX_PCIE_CONF]; + uint8_t pcie_lane[MAX_PCIE_CONF]; +}; + struct smu_11_0_dpm_tables { struct smu_11_0_dpm_table soc_table; struct smu_11_0_dpm_table gfx_table; @@ -100,6 +107,7 @@ struct smu_11_0_dpm_tables { struct smu_11_0_dpm_table display_table; struct smu_11_0_dpm_table phy_table; struct smu_11_0_dpm_table fclk_table; + struct smu_11_0_pcie_table pcie_table; }; struct smu_11_0_dpm_context { @@ -250,4 +258,8 @@ int smu_v11_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_ int smu_v11_0_override_pcie_parameters(struct smu_context *smu); +int smu_v11_0_set_default_od_settings(struct smu_context *smu, bool initialize, size_t overdrive_table_size); + +uint32_t smu_v11_0_get_max_power_limit(struct smu_context *smu); + #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h index 86cdc3393eac..b2f96a101124 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h @@ -141,7 +141,9 @@ struct smu_11_0_powerplay_table struct smu_11_0_power_saving_clock_table power_saving_clock; struct smu_11_0_overdrive_table overdrive_table; +#ifndef SMU_11_0_PARTIAL_PPTABLE PPTable_t smc_pptable; //PPTable_t in smu11_driver_if.h +#endif } __attribute__((packed)); #endif diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 769f9451d904..aaec884d63ed 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -36,6 +36,7 @@ #include "navi10_ppt.h" #include "smu_v11_0_pptable.h" #include "smu_v11_0_ppsmc.h" +#include "nbio/nbio_7_4_sh_mask.h" #include "asic_reg/mp/mp_11_0_sh_mask.h" @@ -207,7 +208,7 @@ static struct smu_11_0_cmn2aisc_mapping navi10_workload_map[PP_SMC_POWER_PROFILE WORKLOAD_MAP(PP_SMC_POWER_PROFILE_POWERSAVING, WORKLOAD_PPLIB_POWER_SAVING_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VIDEO, WORKLOAD_PPLIB_VIDEO_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VR, WORKLOAD_PPLIB_VR_BIT), - WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_CUSTOM_BIT), + WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_COMPUTE_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM, WORKLOAD_PPLIB_CUSTOM_BIT), }; @@ -599,6 +600,7 @@ static int navi10_set_default_dpm_table(struct smu_context *smu) struct smu_table_context *table_context = &smu->smu_table; struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context; PPTable_t *driver_ppt = NULL; + int i; driver_ppt = table_context->driver_pptable; @@ -629,6 +631,11 @@ static int navi10_set_default_dpm_table(struct smu_context *smu) dpm_context->dpm_tables.phy_table.min = driver_ppt->FreqTablePhyclk[0]; dpm_context->dpm_tables.phy_table.max = driver_ppt->FreqTablePhyclk[NUM_PHYCLK_DPM_LEVELS - 1]; + for (i = 0; i < MAX_PCIE_CONF; i++) { + dpm_context->dpm_tables.pcie_table.pcie_gen[i] = driver_ppt->PcieGenSpeed[i]; + dpm_context->dpm_tables.pcie_table.pcie_lane[i] = driver_ppt->PcieLaneCount[i]; + } + return 0; } @@ -691,13 +698,29 @@ static bool navi10_is_support_fine_grained_dpm(struct smu_context *smu, enum smu return dpm_desc->SnapToDiscrete == 0 ? true : false; } +static inline bool navi10_od_feature_is_supported(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODFEATURE_ID feature) +{ + return od_table->cap[feature]; +} + + static int navi10_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { + uint16_t *curve_settings; int i, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0; uint32_t freq_values[3] = {0}; uint32_t mark_index = 0; + struct smu_table_context *table_context = &smu->smu_table; + uint32_t gen_speed, lane_width; + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context; + struct amdgpu_device *adev = smu->adev; + PPTable_t *pptable = (PPTable_t *)table_context->driver_pptable; + OverDriveTable_t *od_table = + (OverDriveTable_t *)table_context->overdrive_table; + struct smu_11_0_overdrive_table *od_settings = smu->od_settings; switch (clk_type) { case SMU_GFXCLK: @@ -748,6 +771,69 @@ static int navi10_print_clk_levels(struct smu_context *smu, } break; + case SMU_PCIE: + gen_speed = (RREG32_PCIE(smnPCIE_LC_SPEED_CNTL) & + PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) + >> PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT; + lane_width = (RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL) & + PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK) + >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT; + for (i = 0; i < NUM_LINK_LEVELS; i++) + size += sprintf(buf + size, "%d: %s %s %dMhz %s\n", i, + (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 0) ? "2.5GT/s," : + (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 1) ? "5.0GT/s," : + (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 2) ? "8.0GT/s," : + (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 3) ? "16.0GT/s," : "", + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 1) ? "x1" : + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 2) ? "x2" : + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 3) ? "x4" : + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 4) ? "x8" : + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 5) ? "x12" : + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 6) ? "x16" : "", + pptable->LclkFreq[i], + (gen_speed == dpm_context->dpm_tables.pcie_table.pcie_gen[i]) && + (lane_width == dpm_context->dpm_tables.pcie_table.pcie_lane[i]) ? + "*" : ""); + break; + case SMU_OD_SCLK: + if (!smu->od_enabled || !od_table || !od_settings) + break; + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) + break; + size += sprintf(buf + size, "OD_SCLK:\n"); + size += sprintf(buf + size, "0: %uMhz\n1: %uMhz\n", od_table->GfxclkFmin, od_table->GfxclkFmax); + break; + case SMU_OD_MCLK: + if (!smu->od_enabled || !od_table || !od_settings) + break; + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) + break; + size += sprintf(buf + size, "OD_MCLK:\n"); + size += sprintf(buf + size, "0: %uMHz\n", od_table->UclkFmax); + break; + case SMU_OD_VDDC_CURVE: + if (!smu->od_enabled || !od_table || !od_settings) + break; + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) + break; + size += sprintf(buf + size, "OD_VDDC_CURVE:\n"); + for (i = 0; i < 3; i++) { + switch (i) { + case 0: + curve_settings = &od_table->GfxclkFreq1; + break; + case 1: + curve_settings = &od_table->GfxclkFreq2; + break; + case 2: + curve_settings = &od_table->GfxclkFreq3; + break; + default: + break; + } + size += sprintf(buf + size, "%d: %uMHz @ %umV\n", i, curve_settings[0], curve_settings[1] / NAVI10_VOLTAGE_SCALE); + } + break; default: break; } @@ -773,6 +859,12 @@ static int navi10_force_clk_levels(struct smu_context *smu, case SMU_UCLK: case SMU_DCEFCLK: case SMU_FCLK: + /* There is only 2 levels for fine grained DPM */ + if (navi10_is_support_fine_grained_dpm(smu, clk_type)) { + soft_max_level = (soft_max_level >= 1 ? 1 : 0); + soft_min_level = (soft_min_level >= 1 ? 1 : 0); + } + ret = smu_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq); if (ret) return size; @@ -1582,17 +1674,22 @@ static int navi10_display_disable_memory_clock_switch(struct smu_context *smu, return ret; } +static uint32_t navi10_get_pptable_power_limit(struct smu_context *smu) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + return pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0]; +} + static int navi10_get_power_limit(struct smu_context *smu, uint32_t *limit, - bool asic_default) + bool cap) { PPTable_t *pptable = smu->smu_table.driver_pptable; uint32_t asic_default_power_limit = 0; int ret = 0; int power_src; - if (!smu->default_power_limit || - !smu->power_limit) { + if (!smu->power_limit) { if (smu_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) { power_src = smu_power_get_index(smu, SMU_POWER_SOURCE_AC); if (power_src < 0) @@ -1615,17 +1712,11 @@ static int navi10_get_power_limit(struct smu_context *smu, pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0]; } - if (smu->od_enabled) { - asic_default_power_limit *= (100 + smu->smu_table.TDPODLimit); - asic_default_power_limit /= 100; - } - - smu->default_power_limit = asic_default_power_limit; smu->power_limit = asic_default_power_limit; } - if (asic_default) - *limit = smu->default_power_limit; + if (cap) + *limit = smu_v11_0_get_max_power_limit(smu); else *limit = smu->power_limit; @@ -1640,6 +1731,9 @@ static int navi10_update_pcie_parameters(struct smu_context *smu, int ret, i; uint32_t smu_pcie_arg; + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context; + for (i = 0; i < NUM_LINK_LEVELS; i++) { smu_pcie_arg = (i << 16) | ((pptable->PcieGenSpeed[i] <= pcie_gen_cap) ? (pptable->PcieGenSpeed[i] << 8) : @@ -1648,11 +1742,260 @@ static int navi10_update_pcie_parameters(struct smu_context *smu, ret = smu_send_smc_msg_with_param(smu, SMU_MSG_OverridePcieParameters, smu_pcie_arg); + + if (ret) + return ret; + + if (pptable->PcieGenSpeed[i] > pcie_gen_cap) + dpm_context->dpm_tables.pcie_table.pcie_gen[i] = pcie_gen_cap; + if (pptable->PcieLaneCount[i] > pcie_width_cap) + dpm_context->dpm_tables.pcie_table.pcie_lane[i] = pcie_width_cap; + } + + return 0; +} + +static inline void navi10_dump_od_table(OverDriveTable_t *od_table) { + pr_debug("OD: Gfxclk: (%d, %d)\n", od_table->GfxclkFmin, od_table->GfxclkFmax); + pr_debug("OD: Gfx1: (%d, %d)\n", od_table->GfxclkFreq1, od_table->GfxclkVolt1); + pr_debug("OD: Gfx2: (%d, %d)\n", od_table->GfxclkFreq2, od_table->GfxclkVolt2); + pr_debug("OD: Gfx3: (%d, %d)\n", od_table->GfxclkFreq3, od_table->GfxclkVolt3); + pr_debug("OD: UclkFmax: %d\n", od_table->UclkFmax); + pr_debug("OD: OverDrivePct: %d\n", od_table->OverDrivePct); +} + +static int navi10_od_setting_check_range(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODSETTING_ID setting, uint32_t value) +{ + if (value < od_table->min[setting]) { + pr_warn("OD setting (%d, %d) is less than the minimum allowed (%d)\n", setting, value, od_table->min[setting]); + return -EINVAL; + } + if (value > od_table->max[setting]) { + pr_warn("OD setting (%d, %d) is greater than the maximum allowed (%d)\n", setting, value, od_table->max[setting]); + return -EINVAL; + } + return 0; +} + +static int navi10_setup_od_limits(struct smu_context *smu) { + struct smu_11_0_overdrive_table *overdrive_table = NULL; + struct smu_11_0_powerplay_table *powerplay_table = NULL; + + if (!smu->smu_table.power_play_table) { + pr_err("powerplay table uninitialized!\n"); + return -ENOENT; + } + powerplay_table = (struct smu_11_0_powerplay_table *)smu->smu_table.power_play_table; + overdrive_table = &powerplay_table->overdrive_table; + if (!smu->od_settings) { + smu->od_settings = kmemdup(overdrive_table, sizeof(struct smu_11_0_overdrive_table), GFP_KERNEL); + } else { + memcpy(smu->od_settings, overdrive_table, sizeof(struct smu_11_0_overdrive_table)); + } + return 0; +} + +static int navi10_set_default_od_settings(struct smu_context *smu, bool initialize) { + OverDriveTable_t *od_table; + int ret = 0; + + ret = smu_v11_0_set_default_od_settings(smu, initialize, sizeof(OverDriveTable_t)); + if (ret) + return ret; + + if (initialize) { + ret = navi10_setup_od_limits(smu); + if (ret) { + pr_err("Failed to retrieve board OD limits\n"); + return ret; + } + } + od_table = (OverDriveTable_t *)smu->smu_table.overdrive_table; + if (od_table) { + navi10_dump_od_table(od_table); + } + + return ret; +} + +static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABLE_COMMAND type, long input[], uint32_t size) { + int i; + int ret = 0; + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTable_t *od_table; + struct smu_11_0_overdrive_table *od_settings; + enum SMU_11_0_ODSETTING_ID freq_setting, voltage_setting; + uint16_t *freq_ptr, *voltage_ptr; + od_table = (OverDriveTable_t *)table_context->overdrive_table; + + if (!smu->od_enabled) { + pr_warn("OverDrive is not enabled!\n"); + return -EINVAL; + } + + if (!smu->od_settings) { + pr_err("OD board limits are not set!\n"); + return -ENOENT; + } + + od_settings = smu->od_settings; + + switch (type) { + case PP_OD_EDIT_SCLK_VDDC_TABLE: + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) { + pr_warn("GFXCLK_LIMITS not supported!\n"); + return -ENOTSUPP; + } + if (!table_context->overdrive_table) { + pr_err("Overdrive is not initialized\n"); + return -EINVAL; + } + for (i = 0; i < size; i += 2) { + if (i + 2 > size) { + pr_info("invalid number of input parameters %d\n", size); + return -EINVAL; + } + switch (input[i]) { + case 0: + freq_setting = SMU_11_0_ODSETTING_GFXCLKFMIN; + freq_ptr = &od_table->GfxclkFmin; + if (input[i + 1] > od_table->GfxclkFmax) { + pr_info("GfxclkFmin (%ld) must be <= GfxclkFmax (%u)!\n", + input[i + 1], + od_table->GfxclkFmin); + return -EINVAL; + } + break; + case 1: + freq_setting = SMU_11_0_ODSETTING_GFXCLKFMAX; + freq_ptr = &od_table->GfxclkFmax; + if (input[i + 1] < od_table->GfxclkFmin) { + pr_info("GfxclkFmax (%ld) must be >= GfxclkFmin (%u)!\n", + input[i + 1], + od_table->GfxclkFmax); + return -EINVAL; + } + break; + default: + pr_info("Invalid SCLK_VDDC_TABLE index: %ld\n", input[i]); + pr_info("Supported indices: [0:min,1:max]\n"); + return -EINVAL; + } + ret = navi10_od_setting_check_range(od_settings, freq_setting, input[i + 1]); + if (ret) + return ret; + *freq_ptr = input[i + 1]; + } + break; + case PP_OD_EDIT_MCLK_VDDC_TABLE: + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) { + pr_warn("UCLK_MAX not supported!\n"); + return -ENOTSUPP; + } + if (size < 2) { + pr_info("invalid number of parameters: %d\n", size); + return -EINVAL; + } + if (input[0] != 1) { + pr_info("Invalid MCLK_VDDC_TABLE index: %ld\n", input[0]); + pr_info("Supported indices: [1:max]\n"); + return -EINVAL; + } + ret = navi10_od_setting_check_range(od_settings, SMU_11_0_ODSETTING_UCLKFMAX, input[1]); + if (ret) + return ret; + od_table->UclkFmax = input[1]; + break; + case PP_OD_COMMIT_DPM_TABLE: + navi10_dump_od_table(od_table); + ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, (void *)od_table, true); + if (ret) { + pr_err("Failed to import overdrive table!\n"); + return ret; + } + // no lock needed because smu_od_edit_dpm_table has it + ret = smu_handle_task(smu, smu->smu_dpm.dpm_level, + AMD_PP_TASK_READJUST_POWER_STATE, + false); + if (ret) { + return ret; + } + break; + case PP_OD_EDIT_VDDC_CURVE: + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) { + pr_warn("GFXCLK_CURVE not supported!\n"); + return -ENOTSUPP; + } + if (size < 3) { + pr_info("invalid number of parameters: %d\n", size); + return -EINVAL; + } + if (!od_table) { + pr_info("Overdrive is not initialized\n"); + return -EINVAL; + } + + switch (input[0]) { + case 0: + freq_setting = SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P1; + voltage_setting = SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P1; + freq_ptr = &od_table->GfxclkFreq1; + voltage_ptr = &od_table->GfxclkVolt1; + break; + case 1: + freq_setting = SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P2; + voltage_setting = SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P2; + freq_ptr = &od_table->GfxclkFreq2; + voltage_ptr = &od_table->GfxclkVolt2; + break; + case 2: + freq_setting = SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P3; + voltage_setting = SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P3; + freq_ptr = &od_table->GfxclkFreq3; + voltage_ptr = &od_table->GfxclkVolt3; + break; + default: + pr_info("Invalid VDDC_CURVE index: %ld\n", input[0]); + pr_info("Supported indices: [0, 1, 2]\n"); + return -EINVAL; + } + ret = navi10_od_setting_check_range(od_settings, freq_setting, input[1]); + if (ret) + return ret; + // Allow setting zero to disable the OverDrive VDDC curve + if (input[2] != 0) { + ret = navi10_od_setting_check_range(od_settings, voltage_setting, input[2]); + if (ret) + return ret; + *freq_ptr = input[1]; + *voltage_ptr = ((uint16_t)input[2]) * NAVI10_VOLTAGE_SCALE; + pr_debug("OD: set curve %ld: (%d, %d)\n", input[0], *freq_ptr, *voltage_ptr); + } else { + // If setting 0, disable all voltage curve settings + od_table->GfxclkVolt1 = 0; + od_table->GfxclkVolt2 = 0; + od_table->GfxclkVolt3 = 0; + } + navi10_dump_od_table(od_table); + break; + default: + return -ENOSYS; + } return ret; } +static int navi10_run_btc(struct smu_context *smu) +{ + int ret = 0; + + ret = smu_send_smc_msg(smu, SMU_MSG_RunBtc); + if (ret) + pr_err("RunBtc failed!\n"); + + return ret; +} static const struct pptable_funcs navi10_ppt_funcs = { .tables_init = navi10_tables_init, @@ -1742,6 +2085,10 @@ static const struct pptable_funcs navi10_ppt_funcs = { .get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq, .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, .override_pcie_parameters = smu_v11_0_override_pcie_parameters, + .set_default_od_settings = navi10_set_default_od_settings, + .od_edit_dpm_table = navi10_od_edit_dpm_table, + .get_pptable_power_limit = navi10_get_pptable_power_limit, + .run_btc = navi10_run_btc, }; void navi10_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.h b/drivers/gpu/drm/amd/powerplay/navi10_ppt.h index a37e37c5f105..ec03c7992f6d 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.h +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.h @@ -33,6 +33,11 @@ #define NAVI14_UMD_PSTATE_PEAK_XTX_GFXCLK (1717) #define NAVI14_UMD_PSTATE_PEAK_XL_GFXCLK (1448) +#define NAVI10_VOLTAGE_SCALE (4) + +#define smnPCIE_LC_SPEED_CNTL 0x11140290 +#define smnPCIE_LC_LINK_WIDTH_CNTL 0x11140288 + extern void navi10_set_ppt_funcs(struct smu_context *smu); #endif diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c index 4a9751971a9d..04daf7e9fe05 100644 --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c @@ -180,11 +180,13 @@ static int renoir_print_clk_levels(struct smu_context *smu, int i, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0, min = 0, max = 0; DpmClocks_t *clk_table = smu->smu_table.clocks_table; - SmuMetrics_t metrics = {0}; + SmuMetrics_t metrics; if (!clk_table || clk_type >= SMU_CLK_COUNT) return -EINVAL; + memset(&metrics, 0, sizeof(metrics)); + ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0, (void *)&metrics, false); if (ret) diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index bbb74b1d5d80..fc9679ea2368 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -24,6 +24,8 @@ #include <linux/module.h> #include <linux/pci.h> +#define SMU_11_0_PARTIAL_PPTABLE + #include "pp_debug.h" #include "amdgpu.h" #include "amdgpu_smu.h" @@ -31,6 +33,7 @@ #include "atomfirmware.h" #include "amdgpu_atomfirmware.h" #include "smu_v11_0.h" +#include "smu_v11_0_pptable.h" #include "soc15_common.h" #include "atom.h" #include "amd_pcie.h" @@ -368,6 +371,7 @@ int smu_v11_0_setup_pptable(struct smu_context *smu) version_major = le16_to_cpu(hdr->header.header_version_major); version_minor = le16_to_cpu(hdr->header.header_version_minor); if (version_major == 2 && smu->smu_table.boot_values.pp_table_id > 0) { + pr_info("use driver provided pptable %d\n", smu->smu_table.boot_values.pp_table_id); switch (version_minor) { case 0: ret = smu_v11_0_set_pptable_v2_0(smu, &table, &size); @@ -384,6 +388,7 @@ int smu_v11_0_setup_pptable(struct smu_context *smu) return ret; } else { + pr_info("use vbios provided pptable\n"); index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, powerplayinfo); @@ -1043,13 +1048,44 @@ int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu) return 0; } +uint32_t smu_v11_0_get_max_power_limit(struct smu_context *smu) { + uint32_t od_limit, max_power_limit; + struct smu_11_0_powerplay_table *powerplay_table = NULL; + struct smu_table_context *table_context = &smu->smu_table; + powerplay_table = table_context->power_play_table; + + max_power_limit = smu_get_pptable_power_limit(smu); + + if (!max_power_limit) { + // If we couldn't get the table limit, fall back on first-read value + if (!smu->default_power_limit) + smu->default_power_limit = smu->power_limit; + max_power_limit = smu->default_power_limit; + } + + if (smu->od_enabled) { + od_limit = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]); + + pr_debug("ODSETTING_POWERPERCENTAGE: %d (default: %d)\n", od_limit, smu->default_power_limit); + + max_power_limit *= (100 + od_limit); + max_power_limit /= 100; + } + + return max_power_limit; +} + int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n) { int ret = 0; + uint32_t max_power_limit; - if (n > smu->default_power_limit) { - pr_err("New power limit is over the max allowed %d\n", - smu->default_power_limit); + max_power_limit = smu_v11_0_get_max_power_limit(smu); + + if (n > max_power_limit) { + pr_err("New power limit (%d) is over the max allowed %d\n", + n, + max_power_limit); return -EINVAL; } @@ -1463,16 +1499,13 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, return ret; } -#define XGMI_STATE_D0 1 -#define XGMI_STATE_D3 0 - int smu_v11_0_set_xgmi_pstate(struct smu_context *smu, uint32_t pstate) { int ret = 0; ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetXgmiMode, - pstate ? XGMI_STATE_D0 : XGMI_STATE_D3); + pstate ? XGMI_MODE_PSTATE_D0 : XGMI_MODE_PSTATE_D3); return ret; } @@ -1780,3 +1813,30 @@ int smu_v11_0_override_pcie_parameters(struct smu_context *smu) return ret; } + +int smu_v11_0_set_default_od_settings(struct smu_context *smu, bool initialize, size_t overdrive_table_size) +{ + struct smu_table_context *table_context = &smu->smu_table; + int ret = 0; + + if (initialize) { + if (table_context->overdrive_table) { + return -EINVAL; + } + table_context->overdrive_table = kzalloc(overdrive_table_size, GFP_KERNEL); + if (!table_context->overdrive_table) { + return -ENOMEM; + } + ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, table_context->overdrive_table, false); + if (ret) { + pr_err("Failed to export overdrive table!\n"); + return ret; + } + } + ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, table_context->overdrive_table, true); + if (ret) { + pr_err("Failed to import overdrive table!\n"); + return ret; + } + return ret; +} diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c index 7c8933f987d1..0b4892833808 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -221,7 +221,7 @@ static struct smu_11_0_cmn2aisc_mapping vega20_workload_map[PP_SMC_POWER_PROFILE WORKLOAD_MAP(PP_SMC_POWER_PROFILE_POWERSAVING, WORKLOAD_PPLIB_POWER_SAVING_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VIDEO, WORKLOAD_PPLIB_VIDEO_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VR, WORKLOAD_PPLIB_VR_BIT), - WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_CUSTOM_BIT), + WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_COMPUTE_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM, WORKLOAD_PPLIB_CUSTOM_BIT), }; @@ -466,7 +466,6 @@ static int vega20_store_powerplay_table(struct smu_context *smu) sizeof(PPTable_t)); table_context->thermal_controller_type = powerplay_table->ucThermalControllerType; - table_context->TDPODLimit = le32_to_cpu(powerplay_table->OverDrive8Table.ODSettingsMax[ATOM_VEGA20_ODSETTING_POWERPERCENTAGE]); return 0; } diff --git a/drivers/gpu/drm/arc/arcpgu_drv.c b/drivers/gpu/drm/arc/arcpgu_drv.c index 6b7f791685ec..d6a6692db0ac 100644 --- a/drivers/gpu/drm/arc/arcpgu_drv.c +++ b/drivers/gpu/drm/arc/arcpgu_drv.c @@ -14,6 +14,7 @@ #include <drm/drm_fb_helper.h> #include <drm/drm_gem_cma_helper.h> #include <drm/drm_gem_framebuffer_helper.h> +#include <drm/drm_of.h> #include <drm/drm_probe_helper.h> #include <linux/dma-mapping.h> #include <linux/module.h> @@ -45,7 +46,7 @@ static int arcpgu_load(struct drm_device *drm) { struct platform_device *pdev = to_platform_device(drm->dev); struct arcpgu_drm_private *arcpgu; - struct device_node *encoder_node; + struct device_node *encoder_node = NULL, *endpoint_node = NULL; struct resource *res; int ret; @@ -80,14 +81,23 @@ static int arcpgu_load(struct drm_device *drm) if (arc_pgu_setup_crtc(drm) < 0) return -ENODEV; - /* find the encoder node and initialize it */ - encoder_node = of_parse_phandle(drm->dev->of_node, "encoder-slave", 0); + /* + * There is only one output port inside each device. It is linked with + * encoder endpoint. + */ + endpoint_node = of_graph_get_next_endpoint(pdev->dev.of_node, NULL); + if (endpoint_node) { + encoder_node = of_graph_get_remote_port_parent(endpoint_node); + of_node_put(endpoint_node); + } + if (encoder_node) { ret = arcpgu_drm_hdmi_init(drm, encoder_node); of_node_put(encoder_node); if (ret < 0) return ret; } else { + dev_info(drm->dev, "no encoder found. Assumed virtual LCD on simulation platform\n"); ret = arcpgu_drm_sim_init(drm, NULL); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c index d49772de93e0..52648b4008bc 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c @@ -84,7 +84,8 @@ static void komeda_kms_commit_tail(struct drm_atomic_state *old_state) drm_atomic_helper_commit_modeset_disables(dev, old_state); - drm_atomic_helper_commit_planes(dev, old_state, 0); + drm_atomic_helper_commit_planes(dev, old_state, + DRM_PLANE_COMMIT_ACTIVE_ONLY); drm_atomic_helper_commit_modeset_enables(dev, old_state); diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c index 42bdc63dcffa..52750116aa19 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c @@ -581,8 +581,8 @@ komeda_splitter_validate(struct komeda_splitter *splitter, } if (!in_range(&splitter->vsize, dflow->in_h)) { - DRM_DEBUG_ATOMIC("split in_in: %d exceed the acceptable range.\n", - dflow->in_w); + DRM_DEBUG_ATOMIC("split in_h: %d exceeds the acceptable range.\n", + dflow->in_h); return -EINVAL; } diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 587052751b48..b191d39c071d 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1571,8 +1571,11 @@ static void commit_tail(struct drm_atomic_state *old_state) { struct drm_device *dev = old_state->dev; const struct drm_mode_config_helper_funcs *funcs; + struct drm_crtc_state *new_crtc_state; + struct drm_crtc *crtc; ktime_t start; s64 commit_time_ms; + unsigned int i, new_self_refresh_mask = 0; funcs = dev->mode_config.helper_private; @@ -1592,6 +1595,15 @@ static void commit_tail(struct drm_atomic_state *old_state) drm_atomic_helper_wait_for_dependencies(old_state); + /* + * We cannot safely access new_crtc_state after + * drm_atomic_helper_commit_hw_done() so figure out which crtc's have + * self-refresh active beforehand: + */ + for_each_new_crtc_in_state(old_state, crtc, new_crtc_state, i) + if (new_crtc_state->self_refresh_active) + new_self_refresh_mask |= BIT(i); + if (funcs && funcs->atomic_commit_tail) funcs->atomic_commit_tail(old_state); else @@ -1600,7 +1612,8 @@ static void commit_tail(struct drm_atomic_state *old_state) commit_time_ms = ktime_ms_delta(ktime_get(), start); if (commit_time_ms > 0) drm_self_refresh_helper_update_avg_times(old_state, - (unsigned long)commit_time_ms); + (unsigned long)commit_time_ms, + new_self_refresh_mask); drm_atomic_helper_commit_cleanup_done(old_state); diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 11adc4b6ccfe..ae5809a1f19a 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1507,12 +1507,12 @@ __dump_topology_ref_history(struct drm_dp_mst_topology_ref_history *history, ulong *entries; uint nr_entries; u64 ts_nsec = entry->ts_nsec; - u64 rem_nsec = do_div(ts_nsec, 1000000000); + u32 rem_nsec = do_div(ts_nsec, 1000000000); nr_entries = stack_depot_fetch(entry->backtrace, &entries); stack_trace_snprint(buf, PAGE_SIZE, entries, nr_entries, 4); - drm_printf(&p, " %d %ss (last at %5llu.%06llu):\n%s", + drm_printf(&p, " %d %ss (last at %5llu.%06u):\n%s", entry->count, topology_ref_type_to_str(entry->type), ts_nsec, rem_nsec / 1000, buf); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 2f2b889096b0..000fa4a1899f 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1105,21 +1105,33 @@ int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size, if (obj_size < vma->vm_end - vma->vm_start) return -EINVAL; + /* Take a ref for this mapping of the object, so that the fault + * handler can dereference the mmap offset's pointer to the object. + * This reference is cleaned up by the corresponding vm_close + * (which should happen whether the vma was created by this call, or + * by a vm_open due to mremap or partial unmap or whatever). + */ + drm_gem_object_get(obj); + if (obj->funcs && obj->funcs->mmap) { /* Remove the fake offset */ vma->vm_pgoff -= drm_vma_node_start(&obj->vma_node); ret = obj->funcs->mmap(obj, vma); - if (ret) + if (ret) { + drm_gem_object_put_unlocked(obj); return ret; + } WARN_ON(!(vma->vm_flags & VM_DONTEXPAND)); } else { if (obj->funcs && obj->funcs->vm_ops) vma->vm_ops = obj->funcs->vm_ops; else if (dev->driver->gem_vm_ops) vma->vm_ops = dev->driver->gem_vm_ops; - else + else { + drm_gem_object_put_unlocked(obj); return -EINVAL; + } vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); @@ -1128,14 +1140,6 @@ int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size, vma->vm_private_data = obj; - /* Take a ref for this mapping of the object, so that the fault - * handler can dereference the mmap offset's pointer to the object. - * This reference is cleaned up by the corresponding vm_close - * (which should happen whether the vma was created by this call, or - * by a vm_open due to mremap or partial unmap or whatever). - */ - drm_gem_object_get(obj); - return 0; } EXPORT_SYMBOL(drm_gem_mmap_obj); diff --git a/drivers/gpu/drm/drm_gem_ttm_helper.c b/drivers/gpu/drm/drm_gem_ttm_helper.c index 7412bfc5c05a..605a8a3da7f9 100644 --- a/drivers/gpu/drm/drm_gem_ttm_helper.c +++ b/drivers/gpu/drm/drm_gem_ttm_helper.c @@ -64,8 +64,19 @@ int drm_gem_ttm_mmap(struct drm_gem_object *gem, struct vm_area_struct *vma) { struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem); + int ret; - return ttm_bo_mmap_obj(vma, bo); + ret = ttm_bo_mmap_obj(vma, bo); + if (ret < 0) + return ret; + + /* + * ttm has its own object refcounting, so drop gem reference + * to avoid double accounting counting. + */ + drm_gem_object_put_unlocked(gem); + + return 0; } EXPORT_SYMBOL(drm_gem_ttm_mmap); diff --git a/drivers/gpu/drm/drm_self_refresh_helper.c b/drivers/gpu/drm/drm_self_refresh_helper.c index 68f4765a5896..dd33fec5aabd 100644 --- a/drivers/gpu/drm/drm_self_refresh_helper.c +++ b/drivers/gpu/drm/drm_self_refresh_helper.c @@ -133,29 +133,33 @@ out_drop_locks: * drm_self_refresh_helper_update_avg_times - Updates a crtc's SR time averages * @state: the state which has just been applied to hardware * @commit_time_ms: the amount of time in ms that this commit took to complete + * @new_self_refresh_mask: bitmask of crtc's that have self_refresh_active in + * new state * * Called after &drm_mode_config_funcs.atomic_commit_tail, this function will * update the average entry/exit self refresh times on self refresh transitions. * These averages will be used when calculating how long to delay before * entering self refresh mode after activity. */ -void drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state, - unsigned int commit_time_ms) +void +drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state, + unsigned int commit_time_ms, + unsigned int new_self_refresh_mask) { struct drm_crtc *crtc; - struct drm_crtc_state *old_crtc_state, *new_crtc_state; + struct drm_crtc_state *old_crtc_state; int i; - for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, - new_crtc_state, i) { + for_each_old_crtc_in_state(state, crtc, old_crtc_state, i) { + bool new_self_refresh_active = new_self_refresh_mask & BIT(i); struct drm_self_refresh_data *sr_data = crtc->self_refresh_data; struct ewma_psr_time *time; if (old_crtc_state->self_refresh_active == - new_crtc_state->self_refresh_active) + new_self_refresh_active) continue; - if (new_crtc_state->self_refresh_active) + if (new_self_refresh_active) time = &sr_data->entry_avg_ms; else time = &sr_data->exit_avg_ms; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c index 698db540972c..648cf0207309 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c @@ -180,6 +180,8 @@ void etnaviv_core_dump(struct etnaviv_gem_submit *submit) etnaviv_cmdbuf_get_va(&submit->cmdbuf, &gpu->mmu_context->cmdbuf_mapping)); + mutex_unlock(&gpu->mmu_context->lock); + /* Reserve space for the bomap */ if (n_bomap_pages) { bomap_start = bomap = iter.data; @@ -221,8 +223,6 @@ void etnaviv_core_dump(struct etnaviv_gem_submit *submit) obj->base.size); } - mutex_unlock(&gpu->mmu_context->lock); - etnaviv_core_dump_header(&iter, ETDUMP_BUF_END, iter.data); dev_coredumpv(gpu->dev, iter.start, iter.data - iter.start, GFP_KERNEL); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c index 043111a1d60c..f8bf488e9d71 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c @@ -155,9 +155,11 @@ static void etnaviv_iommuv2_dump(struct etnaviv_iommu_context *context, void *bu memcpy(buf, v2_context->mtlb_cpu, SZ_4K); buf += SZ_4K; - for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++, buf += SZ_4K) - if (v2_context->mtlb_cpu[i] & MMUv2_PTE_PRESENT) + for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++) + if (v2_context->mtlb_cpu[i] & MMUv2_PTE_PRESENT) { memcpy(buf, v2_context->stlb_cpu[i], SZ_4K); + buf += SZ_4K; + } } static void etnaviv_iommuv2_restore_nonsec(struct etnaviv_gpu *gpu, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c index 35ebae6a1be7..3607d348c298 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c @@ -328,12 +328,23 @@ etnaviv_iommu_context_init(struct etnaviv_iommu_global *global, ret = etnaviv_cmdbuf_suballoc_map(suballoc, ctx, &ctx->cmdbuf_mapping, global->memory_base); - if (ret) { - global->ops->free(ctx); - return NULL; + if (ret) + goto out_free; + + if (global->version == ETNAVIV_IOMMU_V1 && + ctx->cmdbuf_mapping.iova > 0x80000000) { + dev_err(global->dev, + "command buffer outside valid memory window\n"); + goto out_unmap; } return ctx; + +out_unmap: + etnaviv_cmdbuf_suballoc_unmap(ctx, &ctx->cmdbuf_mapping); +out_free: + global->ops->free(ctx); + return NULL; } void etnaviv_iommu_restore(struct etnaviv_gpu *gpu, diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index 9cd6d2348a1e..c2875b10adf9 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -200,6 +200,7 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc) crtc_state->update_wm_pre = false; crtc_state->update_wm_post = false; crtc_state->fifo_changed = false; + crtc_state->preload_luts = false; crtc_state->wm.need_postvbl_update = false; crtc_state->fb_bits = 0; crtc_state->update_planes = 0; diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index fa44eb73d088..aa3a063549c3 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1022,6 +1022,55 @@ void intel_color_commit(const struct intel_crtc_state *crtc_state) dev_priv->display.color_commit(crtc_state); } +static bool intel_can_preload_luts(const struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_atomic_state *state = + to_intel_atomic_state(new_crtc_state->base.state); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + + return !old_crtc_state->base.gamma_lut && + !old_crtc_state->base.degamma_lut; +} + +static bool chv_can_preload_luts(const struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_atomic_state *state = + to_intel_atomic_state(new_crtc_state->base.state); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + + /* + * CGM_PIPE_MODE is itself single buffered. We'd have to + * somehow split it out from chv_load_luts() if we wanted + * the ability to preload the CGM LUTs/CSC without tearing. + */ + if (old_crtc_state->cgm_mode || new_crtc_state->cgm_mode) + return false; + + return !old_crtc_state->base.gamma_lut; +} + +static bool glk_can_preload_luts(const struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_atomic_state *state = + to_intel_atomic_state(new_crtc_state->base.state); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + + /* + * The hardware degamma is active whenever the pipe + * CSC is active. Thus even if the old state has no + * software degamma we need to avoid clobbering the + * linear hardware degamma mid scanout. + */ + return !old_crtc_state->csc_enable && + !old_crtc_state->base.gamma_lut; +} + int intel_color_check(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); @@ -1165,6 +1214,8 @@ static int i9xx_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = intel_can_preload_luts(crtc_state); + return 0; } @@ -1217,6 +1268,8 @@ static int chv_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = chv_can_preload_luts(crtc_state); + return 0; } @@ -1271,6 +1324,8 @@ static int ilk_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = intel_can_preload_luts(crtc_state); + return 0; } @@ -1328,6 +1383,8 @@ static int ivb_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = intel_can_preload_luts(crtc_state); + return 0; } @@ -1366,6 +1423,8 @@ static int glk_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = glk_can_preload_luts(crtc_state); + return 0; } @@ -1415,6 +1474,8 @@ static int icl_color_check(struct intel_crtc_state *crtc_state) crtc_state->csc_mode = icl_csc_mode(crtc_state); + crtc_state->preload_luts = intel_can_preload_luts(crtc_state); + return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index b51f244ad7a5..0d6e494b4508 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -1794,10 +1794,8 @@ void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state, * of Color Encoding Format and Content Color Gamut] while sending * YCBCR 420, HDR BT.2020 signals we should program MSA MISC1 fields * which indicate VSC SDP for the Pixel Encoding/Colorimetry Format. - * - * FIXME MST doesn't pass in the conn_state */ - if (conn_state && intel_dp_needs_vsc_sdp(crtc_state, conn_state)) + if (intel_dp_needs_vsc_sdp(crtc_state, conn_state)) temp |= DP_MSA_MISC_COLOR_VSC_SDP; I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp); @@ -3605,7 +3603,11 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, else hsw_ddi_pre_enable_dp(encoder, crtc_state, conn_state); - intel_ddi_set_dp_msa(crtc_state, conn_state); + /* MST will call a setting of MSA after an allocating of Virtual Channel + * from MST encoder pre_enable callback. + */ + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) + intel_ddi_set_dp_msa(crtc_state, conn_state); } static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 348ce0456696..6f5e3bd13ad1 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -66,6 +66,7 @@ #include "intel_cdclk.h" #include "intel_color.h" #include "intel_display_types.h" +#include "intel_dp_link_training.h" #include "intel_fbc.h" #include "intel_fbdev.h" #include "intel_fifo_underrun.h" @@ -2528,6 +2529,9 @@ u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv, * the highest stride limits of them all. */ crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A); + if (!crtc) + return 0; + plane = to_intel_plane(crtc->base.primary); return plane->max_stride(plane, pixel_format, modifier, @@ -14201,6 +14205,11 @@ static void intel_update_crtc(struct intel_crtc *crtc, /* vblanks work again, re-enable pipe CRC. */ intel_crtc_enable_pipe_crc(crtc); } else { + if (new_crtc_state->preload_luts && + (new_crtc_state->base.color_mgmt_changed || + new_crtc_state->update_pipe)) + intel_color_load_luts(new_crtc_state); + intel_pre_plane_update(old_crtc_state, new_crtc_state); if (new_crtc_state->update_pipe) @@ -14713,6 +14722,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { if (new_crtc_state->base.active && !needs_modeset(new_crtc_state) && + !new_crtc_state->preload_luts && (new_crtc_state->base.color_mgmt_changed || new_crtc_state->update_pipe)) intel_color_load_luts(new_crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 355c50088589..f417e0948001 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -27,7 +27,6 @@ #include <drm/drm_util.h> #include <drm/i915_drm.h> -#include "intel_dp_link_training.h" enum link_m_n_set; struct dpll; diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 4341bd66a418..1a7334dbe802 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -775,6 +775,7 @@ struct intel_crtc_state { bool disable_cxsr; bool update_wm_pre, update_wm_post; /* watermarks are updated */ bool fifo_changed; /* FIFO split is changed */ + bool preload_luts; /* Pipe source size (ie. panel fitter input size) * All planes will be positioned inside this space, diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 9ae5b8b6bbbc..03d1cba0b696 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -331,6 +331,8 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr); intel_ddi_enable_pipe_clock(pipe_config); + + intel_ddi_set_dp_msa(pipe_config, conn_state); } static void intel_mst_enable_dp(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 3d1061470e76..48c960ca12fb 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -234,6 +234,11 @@ static int intelfb_create(struct drm_fb_helper *helper, info->apertures->ranges[0].base = ggtt->gmadr.start; info->apertures->ranges[0].size = ggtt->mappable_end; + /* Our framebuffer is the entirety of fbdev's system memory */ + info->fix.smem_start = + (unsigned long)(ggtt->gmadr.start + vma->node.start); + info->fix.smem_len = vma->node.size; + vaddr = i915_vma_pin_iomap(vma); if (IS_ERR(vaddr)) { DRM_ERROR("Failed to remap framebuffer into virtual memory\n"); @@ -243,10 +248,6 @@ static int intelfb_create(struct drm_fb_helper *helper, info->screen_base = vaddr; info->screen_size = vma->node.size; - /* Our framebuffer is the entirety of fbdev's system memory */ - info->fix.smem_start = (unsigned long)info->screen_base; - info->fix.smem_len = info->screen_size; - drm_fb_helper_fill_info(info, &ifbdev->helper, sizes); /* If the object is shmemfs backed, it will have given us zeroed pages. diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index edc41fc40726..72fda0430062 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -2885,7 +2885,7 @@ struct intel_plane * skl_universal_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe, enum plane_id plane_id) { - static const struct drm_plane_funcs *plane_funcs; + const struct drm_plane_funcs *plane_funcs; struct intel_plane *plane; enum drm_plane_type plane_type; unsigned int supported_rotations; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index de6e55af82cf..255ab040022e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -236,6 +236,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) free_engines(rcu_access_pointer(ctx->engines)); mutex_destroy(&ctx->engines_mutex); + kfree(ctx->jump_whitelist); + if (ctx->timeline) intel_timeline_put(ctx->timeline); @@ -527,6 +529,9 @@ __create_context(struct drm_i915_private *i915) for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; + ctx->jump_whitelist = NULL; + ctx->jump_whitelist_cmds = 0; + spin_lock(&i915->gem.contexts.lock); list_add_tail(&ctx->link, &i915->gem.contexts.list); spin_unlock(&i915->gem.contexts.lock); @@ -722,6 +727,7 @@ int i915_gem_init_contexts(struct drm_i915_private *i915) void i915_gem_driver_release__contexts(struct drm_i915_private *i915) { destroy_kernel_context(&i915->kernel_context); + flush_work(&i915->gem.contexts.free_work); } static int context_idr_cleanup(int id, void *p, void *data) @@ -1136,7 +1142,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, if (i915_gem_context_is_closed(ctx)) { err = -ENOENT; - goto out; + goto unlock; } if (vm == rcu_access_pointer(ctx->vm)) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 861d7d92fe9f..3870dd5daaa0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -176,6 +176,13 @@ struct i915_gem_context { * per vm, which may be one per context or shared with the global GTT) */ struct radix_tree_root handles_vma; + + /** jump_whitelist: Bit array for tracking cmds during cmdparsing + * Guarded by struct_mutex + */ + unsigned long *jump_whitelist; + /** jump_whitelist_cmds: No of cmd slots available */ + u32 jump_whitelist_cmds; }; #endif /* __I915_GEM_CONTEXT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index e4f5c269150a..f0998f1225af 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -298,7 +298,9 @@ static inline u64 gen8_noncanonical_addr(u64 address) static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { - return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len; + return intel_engine_requires_cmd_parser(eb->engine) || + (intel_engine_using_cmd_parser(eb->engine) && + eb->args->batch_len); } static int eb_create(struct i915_execbuffer *eb) @@ -1990,40 +1992,94 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq) return 0; } -static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) +static struct i915_vma * +shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *dev_priv = eb->i915; + struct i915_vma * const vma = *eb->vma; + struct i915_address_space *vm; + u64 flags; + + /* + * PPGTT backed shadow buffers must be mapped RO, to prevent + * post-scan tampering + */ + if (CMDPARSER_USES_GGTT(dev_priv)) { + flags = PIN_GLOBAL; + vm = &dev_priv->ggtt.vm; + } else if (vma->vm->has_read_only) { + flags = PIN_USER; + vm = vma->vm; + i915_gem_object_set_readonly(obj); + } else { + DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n"); + return ERR_PTR(-EINVAL); + } + + return i915_gem_object_pin(obj, vm, NULL, 0, 0, flags); +} + +static struct i915_vma *eb_parse(struct i915_execbuffer *eb) { struct intel_engine_pool_node *pool; struct i915_vma *vma; + u64 batch_start; + u64 shadow_batch_start; int err; pool = intel_engine_get_pool(eb->engine, eb->batch_len); if (IS_ERR(pool)) return ERR_CAST(pool); - err = intel_engine_cmd_parser(eb->engine, + vma = shadow_batch_pin(eb, pool->obj); + if (IS_ERR(vma)) + goto err; + + batch_start = gen8_canonical_addr(eb->batch->node.start) + + eb->batch_start_offset; + + shadow_batch_start = gen8_canonical_addr(vma->node.start); + + err = intel_engine_cmd_parser(eb->gem_context, + eb->engine, eb->batch->obj, - pool->obj, + batch_start, eb->batch_start_offset, eb->batch_len, - is_master); + pool->obj, + shadow_batch_start); + if (err) { - if (err == -EACCES) /* unhandled chained batch */ + i915_vma_unpin(vma); + + /* + * Unsafe GGTT-backed buffers can still be submitted safely + * as non-secure. + * For PPGTT backing however, we have no choice but to forcibly + * reject unsafe buffers + */ + if (CMDPARSER_USES_GGTT(eb->i915) && (err == -EACCES)) + /* Execute original buffer non-secure */ vma = NULL; else vma = ERR_PTR(err); goto err; } - vma = i915_gem_object_ggtt_pin(pool->obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) - goto err; - eb->vma[eb->buffer_count] = i915_vma_get(vma); eb->flags[eb->buffer_count] = __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; vma->exec_flags = &eb->flags[eb->buffer_count]; eb->buffer_count++; + eb->batch_start_offset = 0; + eb->batch = vma; + + if (CMDPARSER_USES_GGTT(eb->i915)) + eb->batch_flags |= I915_DISPATCH_SECURE; + + /* eb->batch_len unchanged */ + vma->private = pool; return vma; @@ -2430,6 +2486,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, struct drm_i915_gem_exec_object2 *exec, struct drm_syncobj **fences) { + struct drm_i915_private *i915 = to_i915(dev); struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; struct dma_fence *exec_fence = NULL; @@ -2441,7 +2498,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS); - eb.i915 = to_i915(dev); + eb.i915 = i915; eb.file = file; eb.args = args; if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) @@ -2461,8 +2518,15 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.batch_flags = 0; if (args->flags & I915_EXEC_SECURE) { + if (INTEL_GEN(i915) >= 11) + return -ENODEV; + + /* Return -EPERM to trigger fallback code on old binaries. */ + if (!HAS_SECURE_BATCHES(i915)) + return -EPERM; + if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) - return -EPERM; + return -EPERM; eb.batch_flags |= I915_DISPATCH_SECURE; } @@ -2539,34 +2603,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } + if (eb.batch_len == 0) + eb.batch_len = eb.batch->size - eb.batch_start_offset; + if (eb_use_cmdparser(&eb)) { struct i915_vma *vma; - vma = eb_parse(&eb, drm_is_current_master(file)); + vma = eb_parse(&eb); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_vma; } - - if (vma) { - /* - * Batch parsed and accepted: - * - * Set the DISPATCH_SECURE bit to remove the NON_SECURE - * bit from MI_BATCH_BUFFER_START commands issued in - * the dispatch_execbuffer implementations. We - * specifically don't want that set on batches the - * command parser has accepted. - */ - eb.batch_flags |= I915_DISPATCH_SECURE; - eb.batch_start_offset = 0; - eb.batch = vma; - } } - if (eb.batch_len == 0) - eb.batch_len = eb.batch->size - eb.batch_start_offset; - /* * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index c99bb94fe41e..f88ee1317bb4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -11,50 +11,6 @@ #include "i915_drv.h" -static bool switch_to_kernel_context_sync(struct intel_gt *gt) -{ - bool result = !intel_gt_is_wedged(gt); - - if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { - /* XXX hide warning from gem_eio */ - if (i915_modparams.reset) { - dev_err(gt->i915->drm.dev, - "Failed to idle engines, declaring wedged!\n"); - GEM_TRACE_DUMP(); - } - - /* - * Forcibly cancel outstanding work and leave - * the gpu quiet. - */ - intel_gt_set_wedged(gt); - result = false; - } - - if (intel_gt_pm_wait_for_idle(gt)) - result = false; - - return result; -} - -static void user_forcewake(struct intel_gt *gt, bool suspend) -{ - int count = atomic_read(>->user_wakeref); - - /* Inside suspend/resume so single threaded, no races to worry about. */ - if (likely(!count)) - return; - - intel_gt_pm_get(gt); - if (suspend) { - GEM_BUG_ON(count > atomic_read(>->wakeref.count)); - atomic_sub(count, >->wakeref.count); - } else { - atomic_add(count, >->wakeref.count); - } - intel_gt_pm_put(gt); -} - void i915_gem_suspend(struct drm_i915_private *i915) { GEM_TRACE("\n"); @@ -62,8 +18,6 @@ void i915_gem_suspend(struct drm_i915_private *i915) intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0); flush_workqueue(i915->wq); - user_forcewake(&i915->gt, true); - /* * We have to flush all the executing contexts to main memory so * that they can saved in the hibernation image. To ensure the last @@ -73,8 +27,7 @@ void i915_gem_suspend(struct drm_i915_private *i915) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - intel_gt_suspend(&i915->gt); - intel_uc_suspend(&i915->gt.uc); + intel_gt_suspend_prepare(&i915->gt); i915_gem_drain_freed_objects(i915); } @@ -116,6 +69,8 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) * machine in an unusable condition. */ + intel_gt_suspend_late(&i915->gt); + spin_lock_irqsave(&i915->mm.obj_lock, flags); for (phase = phases; *phase; phase++) { LIST_HEAD(keep); @@ -140,8 +95,6 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) list_splice_tail(&keep, *phase); } spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - - i915_gem_sanitize(i915); } void i915_gem_resume(struct drm_i915_private *i915) @@ -161,14 +114,6 @@ void i915_gem_resume(struct drm_i915_private *i915) if (intel_gt_resume(&i915->gt)) goto err_wedged; - intel_uc_resume(&i915->gt.uc); - - /* Always reload a context for powersaving. */ - if (!switch_to_kernel_context_sync(&i915->gt)) - goto err_wedged; - - user_forcewake(&i915->gt, false); - out_unlock: intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); return; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 1e045c337044..4c72d74d6576 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -646,8 +646,28 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, obj->mm.dirty = false; for_each_sgt_page(page, sgt_iter, pages) { - if (obj->mm.dirty) + if (obj->mm.dirty && trylock_page(page)) { + /* + * As this may not be anonymous memory (e.g. shmem) + * but exist on a real mapping, we have to lock + * the page in order to dirty it -- holding + * the page reference is not sufficient to + * prevent the inode from being truncated. + * Play safe and take the lock. + * + * However...! + * + * The mmu-notifier can be invalidated for a + * migrate_page, that is alreadying holding the lock + * on the page. Such a try_to_unmap() will result + * in us calling put_pages() and so recursively try + * to lock the page. We avoid that deadlock with + * a trylock_page() and in exchange we risk missing + * some page dirtying. + */ set_page_dirty(page); + unlock_page(page); + } mark_page_accessed(page); put_page(page); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index f8113bc756c6..5ca3ec911e50 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1372,6 +1372,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, } execlists_active_lock_bh(execlists); + rcu_read_lock(); for (port = execlists->active; (rq = *port); port++) { char hdr[80]; int len; @@ -1409,6 +1410,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, if (tl) intel_timeline_put(tl); } + rcu_read_unlock(); execlists_active_unlock_bh(execlists); } else if (INTEL_GEN(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 5051f304705b..06aa14c7aa8c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -141,8 +141,8 @@ void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine) void intel_engine_park_heartbeat(struct intel_engine_cs *engine) { - cancel_delayed_work(&engine->heartbeat.work); - i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); + if (cancel_delayed_work(&engine->heartbeat.work)) + i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); } void intel_engine_init_heartbeat(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index c5d1047a4bc5..758f0e8ec672 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -454,13 +454,14 @@ struct intel_engine_cs { /* status_notifier: list of callbacks for context-switch changes */ struct atomic_notifier_head context_status_notifier; -#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) +#define I915_ENGINE_USING_CMD_PARSER BIT(0) #define I915_ENGINE_SUPPORTS_STATS BIT(1) #define I915_ENGINE_HAS_PREEMPTION BIT(2) #define I915_ENGINE_HAS_SEMAPHORES BIT(3) #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) #define I915_ENGINE_IS_VIRTUAL BIT(5) #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) +#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) unsigned int flags; /* @@ -528,9 +529,15 @@ struct intel_engine_cs { }; static inline bool -intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) +intel_engine_using_cmd_parser(const struct intel_engine_cs *engine) { - return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; + return engine->flags & I915_ENGINE_USING_CMD_PARSER; +} + +static inline bool +intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER; } static inline bool diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 898662c158ad..4c26daf7ee46 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -31,9 +31,11 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) intel_uc_init_early(>->uc); } -void intel_gt_init_hw_early(struct drm_i915_private *i915) +void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt) { - i915->gt.ggtt = &i915->ggtt; + gt->ggtt = ggtt; + + intel_gt_sanitize(gt, false); } static void init_unused_ring(struct intel_gt *gt, u32 base) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 5b6effed3713..5436f8c30708 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -28,7 +28,7 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc) } void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915); -void intel_gt_init_hw_early(struct drm_i915_private *i915); +void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt); int __must_check intel_gt_init_hw(struct intel_gt *gt); int intel_gt_init(struct intel_gt *gt); void intel_gt_driver_register(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 32becf15d4e0..6187cdd06646 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -4,6 +4,8 @@ * Copyright © 2019 Intel Corporation */ +#include <linux/suspend.h> + #include "i915_drv.h" #include "i915_globals.h" #include "i915_params.h" @@ -18,6 +20,24 @@ #include "intel_rps.h" #include "intel_wakeref.h" +static void user_forcewake(struct intel_gt *gt, bool suspend) +{ + int count = atomic_read(>->user_wakeref); + + /* Inside suspend/resume so single threaded, no races to worry about. */ + if (likely(!count)) + return; + + intel_gt_pm_get(gt); + if (suspend) { + GEM_BUG_ON(count > atomic_read(>->wakeref.count)); + atomic_sub(count, >->wakeref.count); + } else { + atomic_add(count, >->wakeref.count); + } + intel_gt_pm_put(gt); +} + static int __gt_unpark(struct intel_wakeref *wf) { struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref); @@ -41,6 +61,9 @@ static int __gt_unpark(struct intel_wakeref *wf) gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); GEM_BUG_ON(!gt->awake); + if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); + intel_rps_unpark(>->rps); i915_pmu_gt_unparked(i915); @@ -66,6 +89,11 @@ static int __gt_park(struct intel_wakeref *wf) /* Everything switched off, flush any residual interrupt just in case */ intel_synchronize_irq(i915); + if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) { + intel_rc6_ctx_wa_check(&i915->gt.rc6); + intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); + } + GEM_BUG_ON(!wakeref); intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); @@ -118,8 +146,22 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) { struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; - GEM_TRACE("\n"); + GEM_TRACE("force:%s\n", yesno(force)); + + /* Use a raw wakeref to avoid calling intel_display_power_get early */ + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + + /* + * As we have just resumed the machine and woken the device up from + * deep PCI sleep (presumably D3_cold), assume the HW has been reset + * back to defaults, recovering from whatever wedged state we left it + * in and so worth trying to use the device once more. + */ + if (intel_gt_is_wedged(gt)) + intel_gt_unset_wedged(gt); intel_uc_sanitize(>->uc); @@ -127,6 +169,8 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) if (engine->reset.prepare) engine->reset.prepare(engine); + intel_uc_reset_prepare(>->uc); + if (reset_engines(gt) || force) { for_each_engine(engine, gt, id) __intel_engine_reset(engine, false); @@ -135,6 +179,9 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) for_each_engine(engine, gt, id) if (engine->reset.finish) engine->reset.finish(engine); + + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); } void intel_gt_pm_fini(struct intel_gt *gt) @@ -148,6 +195,8 @@ int intel_gt_resume(struct intel_gt *gt) enum intel_engine_id id; int err = 0; + GEM_TRACE("\n"); + /* * After resume, we may need to poke into the pinned kernel * contexts to paper over any damage caused by the sudden suspend. @@ -186,14 +235,22 @@ int intel_gt_resume(struct intel_gt *gt) } intel_rc6_enable(>->rc6); + + intel_uc_resume(>->uc); + + user_forcewake(gt, false); + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); intel_gt_pm_put(gt); return err; } -static void wait_for_idle(struct intel_gt *gt) +static void wait_for_suspend(struct intel_gt *gt) { + if (!intel_gt_pm_is_awake(gt)) + return; + if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { /* * Forcibly cancel outstanding work and leave @@ -205,27 +262,65 @@ static void wait_for_idle(struct intel_gt *gt) intel_gt_pm_wait_for_idle(gt); } -void intel_gt_suspend(struct intel_gt *gt) +void intel_gt_suspend_prepare(struct intel_gt *gt) +{ + user_forcewake(gt, true); + wait_for_suspend(gt); + + intel_uc_suspend(>->uc); +} + +static suspend_state_t pm_suspend_target(void) +{ +#if IS_ENABLED(CONFIG_PM_SLEEP) + return pm_suspend_target_state; +#else + return PM_SUSPEND_TO_IDLE; +#endif +} + +void intel_gt_suspend_late(struct intel_gt *gt) { intel_wakeref_t wakeref; /* We expect to be idle already; but also want to be independent */ - wait_for_idle(gt); + wait_for_suspend(gt); + + /* + * On disabling the device, we want to turn off HW access to memory + * that we no longer own. + * + * However, not all suspend-states disable the device. S0 (s2idle) + * is effectively runtime-suspend, the device is left powered on + * but needs to be put into a low power state. We need to keep + * powermanagement enabled, but we also retain system state and so + * it remains safe to keep on using our allocated memory. + */ + if (pm_suspend_target() == PM_SUSPEND_TO_IDLE) + return; with_intel_runtime_pm(gt->uncore->rpm, wakeref) { intel_rps_disable(>->rps); intel_rc6_disable(>->rc6); intel_llc_disable(>->llc); } + + intel_gt_sanitize(gt, false); + + GEM_TRACE("\n"); } void intel_gt_runtime_suspend(struct intel_gt *gt) { intel_uc_runtime_suspend(>->uc); + + GEM_TRACE("\n"); } int intel_gt_runtime_resume(struct intel_gt *gt) { + GEM_TRACE("\n"); + intel_gt_init_swizzling(gt); return intel_uc_runtime_resume(>->uc); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index d924c984c74d..b3e17399be9b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -43,8 +43,9 @@ void intel_gt_pm_fini(struct intel_gt *gt); void intel_gt_sanitize(struct intel_gt *gt, bool force); +void intel_gt_suspend_prepare(struct intel_gt *gt); +void intel_gt_suspend_late(struct intel_gt *gt); int intel_gt_resume(struct intel_gt *gt); -void intel_gt_suspend(struct intel_gt *gt); void intel_gt_runtime_suspend(struct intel_gt *gt); int intel_gt_runtime_resume(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index b73229a84d85..353809ac2754 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -74,10 +74,10 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) /* Resume iteration after dropping lock */ list_safe_reset_next(tl, tn, link); - if (--tl->active_count) - active_count += !!rcu_access_pointer(tl->last_request.fence); - else + if (!--tl->active_count) list_del(&tl->link); + else + active_count += !!rcu_access_pointer(tl->last_request.fence); mutex_unlock(&tl->mutex); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 51aef2a233cb..0ac3b26674ad 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -990,6 +990,59 @@ static void intel_engine_context_out(struct intel_engine_cs *engine) write_sequnlock_irqrestore(&engine->stats.lock, flags); } +static void restore_default_state(struct intel_context *ce, + struct intel_engine_cs *engine) +{ + u32 *regs = ce->lrc_reg_state; + + if (engine->pinned_default_state) + memcpy(regs, /* skip restoring the vanilla PPHWSP */ + engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, + engine->context_size - PAGE_SIZE); + + execlists_init_reg_state(regs, ce, engine, ce->ring, false); +} + +static void reset_active(struct i915_request *rq, + struct intel_engine_cs *engine) +{ + struct intel_context * const ce = rq->hw_context; + u32 head; + + /* + * The executing context has been cancelled. We want to prevent + * further execution along this context and propagate the error on + * to anything depending on its results. + * + * In __i915_request_submit(), we apply the -EIO and remove the + * requests' payloads for any banned requests. But first, we must + * rewind the context back to the start of the incomplete request so + * that we do not jump back into the middle of the batch. + * + * We preserve the breadcrumbs and semaphores of the incomplete + * requests so that inter-timeline dependencies (i.e other timelines) + * remain correctly ordered. And we defer to __i915_request_submit() + * so that all asynchronous waits are correctly handled. + */ + GEM_TRACE("%s(%s): { rq=%llx:%lld }\n", + __func__, engine->name, rq->fence.context, rq->fence.seqno); + + /* On resubmission of the active request, payload will be scrubbed */ + if (i915_request_completed(rq)) + head = rq->tail; + else + head = active_request(ce->timeline, rq)->head; + ce->ring->head = intel_ring_wrap(ce->ring, head); + intel_ring_update_space(ce->ring); + + /* Scrub the context image to prevent replaying the previous batch */ + restore_default_state(ce, engine); + __execlists_update_reg_state(ce, engine); + + /* We've switched away, so this should be a no-op, but intent matters */ + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; +} + static inline struct intel_engine_cs * __execlists_schedule_in(struct i915_request *rq) { @@ -998,6 +1051,9 @@ __execlists_schedule_in(struct i915_request *rq) intel_context_get(ce); + if (unlikely(i915_gem_context_is_banned(ce->gem_context))) + reset_active(rq, engine); + if (ce->tag) { /* Use a fixed tag for OA and friends */ ce->lrc_desc |= (u64)ce->tag << 32; @@ -1047,72 +1103,22 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) tasklet_schedule(&ve->base.execlists.tasklet); } -static void restore_default_state(struct intel_context *ce, - struct intel_engine_cs *engine) -{ - u32 *regs = ce->lrc_reg_state; - - if (engine->pinned_default_state) - memcpy(regs, /* skip restoring the vanilla PPHWSP */ - engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, - engine->context_size - PAGE_SIZE); - - execlists_init_reg_state(regs, ce, engine, ce->ring, false); -} - -static void reset_active(struct i915_request *rq, - struct intel_engine_cs *engine) -{ - struct intel_context * const ce = rq->hw_context; - u32 head; - - /* - * The executing context has been cancelled. We want to prevent - * further execution along this context and propagate the error on - * to anything depending on its results. - * - * In __i915_request_submit(), we apply the -EIO and remove the - * requests' payloads for any banned requests. But first, we must - * rewind the context back to the start of the incomplete request so - * that we do not jump back into the middle of the batch. - * - * We preserve the breadcrumbs and semaphores of the incomplete - * requests so that inter-timeline dependencies (i.e other timelines) - * remain correctly ordered. And we defer to __i915_request_submit() - * so that all asynchronous waits are correctly handled. - */ - GEM_TRACE("%s(%s): { rq=%llx:%lld }\n", - __func__, engine->name, rq->fence.context, rq->fence.seqno); - - /* On resubmission of the active request, payload will be scrubbed */ - if (i915_request_completed(rq)) - head = rq->tail; - else - head = active_request(ce->timeline, rq)->head; - ce->ring->head = intel_ring_wrap(ce->ring, head); - intel_ring_update_space(ce->ring); - - /* Scrub the context image to prevent replaying the previous batch */ - restore_default_state(ce, engine); - __execlists_update_reg_state(ce, engine); - - /* We've switched away, so this should be a no-op, but intent matters */ - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; -} - static inline void __execlists_schedule_out(struct i915_request *rq, struct intel_engine_cs * const engine) { struct intel_context * const ce = rq->hw_context; + /* + * NB process_csb() is not under the engine->active.lock and hence + * schedule_out can race with schedule_in meaning that we should + * refrain from doing non-trivial work here. + */ + intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); intel_gt_pm_put(engine->gt); - if (unlikely(i915_gem_context_is_banned(ce->gem_context))) - reset_active(rq, engine); - /* * If this is part of a virtual engine, its next request may * have been blocked waiting for access to the active context. diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 70f0e01a38b9..700104b90163 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -178,8 +178,13 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_RC6_ENABLE | rc6_mode); - set(uncore, GEN9_PG_ENABLE, - GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); + /* + * WaRsDisableCoarsePowerGating:skl,cnl + * - Render/Media PG need to be disabled with RC6. + */ + if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6))) + set(uncore, GEN9_PG_ENABLE, + GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); } static void gen8_rc6_enable(struct intel_rc6 *rc6) @@ -486,6 +491,66 @@ static void rpm_put(struct intel_rc6 *rc6) rc6->wakeref = false; } +static bool intel_rc6_ctx_corrupted(struct intel_rc6 *rc6) +{ + return !intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO); +} + +static void intel_rc6_ctx_wa_init(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + + if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + return; + + if (intel_rc6_ctx_corrupted(rc6)) { + DRM_INFO("RC6 context corrupted, disabling runtime power management\n"); + rc6->ctx_corrupted = true; + } +} + +/** + * intel_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA + * @rc6: rc6 state + * + * Perform any steps needed to re-init the RC6 CTX WA after system resume. + */ +void intel_rc6_ctx_wa_resume(struct intel_rc6 *rc6) +{ + if (rc6->ctx_corrupted && !intel_rc6_ctx_corrupted(rc6)) { + DRM_INFO("RC6 context restored, re-enabling runtime power management\n"); + rc6->ctx_corrupted = false; + } +} + +/** + * intel_rc6_ctx_wa_check - check for a new RC6 CTX corruption + * @rc6: rc6 state + * + * Check if an RC6 CTX corruption has happened since the last check and if so + * disable RC6 and runtime power management. +*/ +void intel_rc6_ctx_wa_check(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + + if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + return; + + if (rc6->ctx_corrupted) + return; + + if (!intel_rc6_ctx_corrupted(rc6)) + return; + + DRM_NOTE("RC6 context corruption, disabling runtime power management\n"); + + intel_rc6_disable(rc6); + rc6->ctx_corrupted = true; + + return; +} + static void __intel_rc6_disable(struct intel_rc6 *rc6) { struct drm_i915_private *i915 = rc6_to_i915(rc6); @@ -510,6 +575,8 @@ void intel_rc6_init(struct intel_rc6 *rc6) if (!rc6_supported(rc6)) return; + intel_rc6_ctx_wa_init(rc6); + if (IS_CHERRYVIEW(i915)) err = chv_rc6_init(rc6); else if (IS_VALLEYVIEW(i915)) @@ -525,6 +592,11 @@ void intel_rc6_init(struct intel_rc6 *rc6) void intel_rc6_sanitize(struct intel_rc6 *rc6) { + if (rc6->enabled) { /* unbalanced suspend/resume */ + rpm_get(rc6); + rc6->enabled = false; + } + if (rc6->supported) __intel_rc6_disable(rc6); } @@ -539,6 +611,9 @@ void intel_rc6_enable(struct intel_rc6 *rc6) GEM_BUG_ON(rc6->enabled); + if (rc6->ctx_corrupted) + return; + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); if (IS_CHERRYVIEW(i915)) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h index 5e6711f36457..1370f6834a4c 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.h +++ b/drivers/gpu/drm/i915/gt/intel_rc6.h @@ -22,4 +22,7 @@ void intel_rc6_disable(struct intel_rc6 *rc6); u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, i915_reg_t reg); u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg); +void intel_rc6_ctx_wa_check(struct intel_rc6 *rc6); +void intel_rc6_ctx_wa_resume(struct intel_rc6 *rc6); + #endif /* INTEL_RC6_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h index 214f354d6ae4..89ad5697a8d4 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6_types.h +++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h @@ -23,6 +23,7 @@ struct intel_rc6 { bool supported : 1; bool enabled : 1; bool wakeref : 1; + bool ctx_corrupted : 1; }; #endif /* INTEL_RC6_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c index 5d429037cdad..d1752f15702a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -15,7 +15,8 @@ static int live_gt_resume(void *arg) /* Do several suspend/resume cycles to check we don't explode! */ do { - intel_gt_suspend(gt); + intel_gt_suspend_prepare(gt); + intel_gt_suspend_late(gt); if (gt->rc6.enabled) { pr_err("rc6 still enabled after suspend!\n"); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 019ae6486e8d..3ee4a4e7689d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -554,6 +554,13 @@ int intel_guc_suspend(struct intel_guc *guc) }; /* + * If GuC communication is enabled but submission is not supported, + * we do not need to suspend the GuC. + */ + if (!intel_guc_submission_is_enabled(guc)) + return 0; + + /* * The ENTER_S_STATE action queues the save/restore operation in GuC FW * and then returns, so waiting on the H2G is not enough to guarantee * GuC is done. When all the processing is done, GuC writes @@ -610,6 +617,14 @@ int intel_guc_resume(struct intel_guc *guc) GUC_POWER_D0, }; + /* + * If GuC communication is enabled but submission is not supported, + * we do not need to resume the GuC but we do need to enable the + * GuC communication on resume (above). + */ + if (!intel_guc_submission_is_enabled(guc)) + return 0; + return intel_guc_send(guc, action, ARRAY_SIZE(action)); } diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index afd7f66bdc2d..bd12af349123 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -3420,6 +3420,10 @@ int intel_gvt_for_each_tracked_mmio(struct intel_gvt *gvt, } for (i = 0; i < gvt->mmio.num_mmio_block; i++, block++) { + /* pvinfo data doesn't come from hw mmio */ + if (i915_mmio_reg_offset(block->offset) == VGT_PVINFO_PAGE) + continue; + for (j = 0; j < block->size; j += 4) { ret = handler(gvt, i915_mmio_reg_offset(block->offset) + j, diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 207383dda84d..5448f37c8102 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -91,14 +91,15 @@ static void debug_active_init(struct i915_active *ref) static void debug_active_activate(struct i915_active *ref) { - lockdep_assert_held(&ref->mutex); + spin_lock_irq(&ref->tree_lock); if (!atomic_read(&ref->count)) /* before the first inc */ debug_object_activate(ref, &active_debug_desc); + spin_unlock_irq(&ref->tree_lock); } static void debug_active_deactivate(struct i915_active *ref) { - lockdep_assert_held(&ref->mutex); + lockdep_assert_held(&ref->tree_lock); if (!atomic_read(&ref->count)) /* after the last dec */ debug_object_deactivate(ref, &active_debug_desc); } @@ -128,29 +129,22 @@ __active_retire(struct i915_active *ref) { struct active_node *it, *n; struct rb_root root; - bool retire = false; + unsigned long flags; - lockdep_assert_held(&ref->mutex); GEM_BUG_ON(i915_active_is_idle(ref)); /* return the unused nodes to our slabcache -- flushing the allocator */ - if (atomic_dec_and_test(&ref->count)) { - debug_active_deactivate(ref); - root = ref->tree; - ref->tree = RB_ROOT; - ref->cache = NULL; - retire = true; - } - - mutex_unlock(&ref->mutex); - if (!retire) + if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags)) return; GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); - rbtree_postorder_for_each_entry_safe(it, n, &root, node) { - GEM_BUG_ON(i915_active_fence_isset(&it->base)); - kmem_cache_free(global.slab_cache, it); - } + debug_active_deactivate(ref); + + root = ref->tree; + ref->tree = RB_ROOT; + ref->cache = NULL; + + spin_unlock_irqrestore(&ref->tree_lock, flags); /* After the final retire, the entire struct may be freed */ if (ref->retire) @@ -158,6 +152,11 @@ __active_retire(struct i915_active *ref) /* ... except if you wait on it, you must manage your own references! */ wake_up_var(ref); + + rbtree_postorder_for_each_entry_safe(it, n, &root, node) { + GEM_BUG_ON(i915_active_fence_isset(&it->base)); + kmem_cache_free(global.slab_cache, it); + } } static void @@ -169,7 +168,6 @@ active_work(struct work_struct *wrk) if (atomic_add_unless(&ref->count, -1, 1)) return; - mutex_lock(&ref->mutex); __active_retire(ref); } @@ -180,9 +178,7 @@ active_retire(struct i915_active *ref) if (atomic_add_unless(&ref->count, -1, 1)) return; - /* If we are inside interrupt context (fence signaling), defer */ - if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS || - !mutex_trylock(&ref->mutex)) { + if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) { queue_work(system_unbound_wq, &ref->work); return; } @@ -227,7 +223,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl) if (!prealloc) return NULL; - mutex_lock(&ref->mutex); + spin_lock_irq(&ref->tree_lock); GEM_BUG_ON(i915_active_is_idle(ref)); parent = NULL; @@ -257,7 +253,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl) out: ref->cache = node; - mutex_unlock(&ref->mutex); + spin_unlock_irq(&ref->tree_lock); BUILD_BUG_ON(offsetof(typeof(*node), base)); return &node->base; @@ -278,8 +274,10 @@ void __i915_active_init(struct i915_active *ref, if (bits & I915_ACTIVE_MAY_SLEEP) ref->flags |= I915_ACTIVE_RETIRE_SLEEPS; + spin_lock_init(&ref->tree_lock); ref->tree = RB_ROOT; ref->cache = NULL; + init_llist_head(&ref->preallocated_barriers); atomic_set(&ref->count, 0); __mutex_init(&ref->mutex, "i915_active", key); @@ -510,7 +508,7 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) if (RB_EMPTY_ROOT(&ref->tree)) return NULL; - mutex_lock(&ref->mutex); + spin_lock_irq(&ref->tree_lock); GEM_BUG_ON(i915_active_is_idle(ref)); /* @@ -575,7 +573,7 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) goto match; } - mutex_unlock(&ref->mutex); + spin_unlock_irq(&ref->tree_lock); return NULL; @@ -583,7 +581,7 @@ match: rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ if (p == &ref->cache->node) ref->cache = NULL; - mutex_unlock(&ref->mutex); + spin_unlock_irq(&ref->tree_lock); return rb_entry(p, struct active_node, node); } @@ -664,6 +662,7 @@ unwind: void i915_active_acquire_barrier(struct i915_active *ref) { struct llist_node *pos, *next; + unsigned long flags; GEM_BUG_ON(i915_active_is_idle(ref)); @@ -673,7 +672,7 @@ void i915_active_acquire_barrier(struct i915_active *ref) * populated by i915_request_add_active_barriers() to point to the * request that will eventually release them. */ - mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING); + spin_lock_irqsave_nested(&ref->tree_lock, flags, SINGLE_DEPTH_NESTING); llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { struct active_node *node = barrier_from_ll(pos); struct intel_engine_cs *engine = barrier_to_engine(node); @@ -699,7 +698,7 @@ void i915_active_acquire_barrier(struct i915_active *ref) llist_add(barrier_to_ll(node), &engine->barrier_tasks); intel_engine_pm_put(engine); } - mutex_unlock(&ref->mutex); + spin_unlock_irqrestore(&ref->tree_lock, flags); } void i915_request_add_active_barriers(struct i915_request *rq) diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h index d89a74c142c6..96aed0ee700a 100644 --- a/drivers/gpu/drm/i915/i915_active_types.h +++ b/drivers/gpu/drm/i915/i915_active_types.h @@ -48,6 +48,7 @@ struct i915_active { atomic_t count; struct mutex mutex; + spinlock_t tree_lock; struct active_node *cache; struct rb_root tree; diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 24555102e198..f24096e27bef 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -53,13 +53,11 @@ * granting userspace undue privileges. There are three categories of privilege. * * First, commands which are explicitly defined as privileged or which should - * only be used by the kernel driver. The parser generally rejects such - * commands, though it may allow some from the drm master process. + * only be used by the kernel driver. The parser rejects such commands * * Second, commands which access registers. To support correct/enhanced * userspace functionality, particularly certain OpenGL extensions, the parser - * provides a whitelist of registers which userspace may safely access (for both - * normal and drm master processes). + * provides a whitelist of registers which userspace may safely access * * Third, commands which access privileged memory (i.e. GGTT, HWS page, etc). * The parser always rejects such commands. @@ -84,9 +82,9 @@ * in the per-engine command tables. * * Other command table entries map fairly directly to high level categories - * mentioned above: rejected, master-only, register whitelist. The parser - * implements a number of checks, including the privileged memory checks, via a - * general bitmasking mechanism. + * mentioned above: rejected, register whitelist. The parser implements a number + * of checks, including the privileged memory checks, via a general bitmasking + * mechanism. */ /* @@ -104,8 +102,6 @@ struct drm_i915_cmd_descriptor { * CMD_DESC_REJECT: The command is never allowed * CMD_DESC_REGISTER: The command should be checked against the * register whitelist for the appropriate ring - * CMD_DESC_MASTER: The command is allowed if the submitting process - * is the DRM master */ u32 flags; #define CMD_DESC_FIXED (1<<0) @@ -113,7 +109,6 @@ struct drm_i915_cmd_descriptor { #define CMD_DESC_REJECT (1<<2) #define CMD_DESC_REGISTER (1<<3) #define CMD_DESC_BITMASK (1<<4) -#define CMD_DESC_MASTER (1<<5) /* * The command's unique identification bits and the bitmask to get them. @@ -194,7 +189,7 @@ struct drm_i915_cmd_table { #define CMD(op, opm, f, lm, fl, ...) \ { \ .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \ - .cmd = { (op), ~0u << (opm) }, \ + .cmd = { (op & ~0u << (opm)), ~0u << (opm) }, \ .length = { (lm) }, \ __VA_ARGS__ \ } @@ -209,14 +204,13 @@ struct drm_i915_cmd_table { #define R CMD_DESC_REJECT #define W CMD_DESC_REGISTER #define B CMD_DESC_BITMASK -#define M CMD_DESC_MASTER /* Command Mask Fixed Len Action ---------------------------------------------------------- */ -static const struct drm_i915_cmd_descriptor common_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = { CMD( MI_NOOP, SMI, F, 1, S ), CMD( MI_USER_INTERRUPT, SMI, F, 1, R ), - CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, M ), + CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, R ), CMD( MI_ARB_CHECK, SMI, F, 1, S ), CMD( MI_REPORT_HEAD, SMI, F, 1, S ), CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), @@ -246,7 +240,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_BATCH_BUFFER_START, SMI, !F, 0xFF, S ), }; -static const struct drm_i915_cmd_descriptor render_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_render_cmds[] = { CMD( MI_FLUSH, SMI, F, 1, S ), CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_PREDICATE, SMI, F, 1, S ), @@ -313,7 +307,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { CMD( MI_URB_ATOMIC_ALLOC, SMI, F, 1, S ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_RS_CONTEXT, SMI, F, 1, S ), - CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), @@ -330,7 +324,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS, S3D, !F, 0x1FF, S ), }; -static const struct drm_i915_cmd_descriptor video_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_video_cmds[] = { CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, @@ -374,7 +368,7 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = { CMD( MFX_WAIT, SMFX, F, 1, S ), }; -static const struct drm_i915_cmd_descriptor vecs_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_vecs_cmds[] = { CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, @@ -412,7 +406,7 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = { }}, ), }; -static const struct drm_i915_cmd_descriptor blt_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = { CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, B, .bits = {{ @@ -446,10 +440,64 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = { }; static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = { - CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), }; +/* + * For Gen9 we can still rely on the h/w to enforce cmd security, and only + * need to re-enforce the register access checks. We therefore only need to + * teach the cmdparser how to find the end of each command, and identify + * register accesses. The table doesn't need to reject any commands, and so + * the only commands listed here are: + * 1) Those that touch registers + * 2) Those that do not have the default 8-bit length + * + * Note that the default MI length mask chosen for this table is 0xFF, not + * the 0x3F used on older devices. This is because the vast majority of MI + * cmds on Gen9 use a standard 8-bit Length field. + * All the Gen9 blitter instructions are standard 0xFF length mask, and + * none allow access to non-general registers, so in fact no BLT cmds are + * included in the table at all. + * + */ +static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = { + CMD( MI_NOOP, SMI, F, 1, S ), + CMD( MI_USER_INTERRUPT, SMI, F, 1, S ), + CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, S ), + CMD( MI_FLUSH, SMI, F, 1, S ), + CMD( MI_ARB_CHECK, SMI, F, 1, S ), + CMD( MI_REPORT_HEAD, SMI, F, 1, S ), + CMD( MI_ARB_ON_OFF, SMI, F, 1, S ), + CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, S ), + CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, S ), + CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ), + CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ), + CMD( MI_UPDATE_GTT, SMI, !F, 0x3FF, S ), + CMD( MI_STORE_REGISTER_MEM_GEN8, SMI, F, 4, W, + .reg = { .offset = 1, .mask = 0x007FFFFC } ), + CMD( MI_FLUSH_DW, SMI, !F, 0x3F, S ), + CMD( MI_LOAD_REGISTER_MEM_GEN8, SMI, F, 4, W, + .reg = { .offset = 1, .mask = 0x007FFFFC } ), + CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), + + /* + * We allow BB_START but apply further checks. We just sanitize the + * basic fields here. + */ +#define MI_BB_START_OPERAND_MASK GENMASK(SMI-1, 0) +#define MI_BB_START_OPERAND_EXPECT (MI_BATCH_PPGTT_HSW | 1) + CMD( MI_BATCH_BUFFER_START_GEN8, SMI, !F, 0xFF, B, + .bits = {{ + .offset = 0, + .mask = MI_BB_START_OPERAND_MASK, + .expected = MI_BB_START_OPERAND_EXPECT, + }}, ), +}; + static const struct drm_i915_cmd_descriptor noop_desc = CMD(MI_NOOP, SMI, F, 1, S); @@ -463,40 +511,44 @@ static const struct drm_i915_cmd_descriptor noop_desc = #undef R #undef W #undef B -#undef M -static const struct drm_i915_cmd_table gen7_render_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { render_cmds, ARRAY_SIZE(render_cmds) }, +static const struct drm_i915_cmd_table gen7_render_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, }; -static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { render_cmds, ARRAY_SIZE(render_cmds) }, +static const struct drm_i915_cmd_table hsw_render_ring_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, { hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) }, }; -static const struct drm_i915_cmd_table gen7_video_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { video_cmds, ARRAY_SIZE(video_cmds) }, +static const struct drm_i915_cmd_table gen7_video_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_video_cmds, ARRAY_SIZE(gen7_video_cmds) }, }; -static const struct drm_i915_cmd_table hsw_vebox_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { vecs_cmds, ARRAY_SIZE(vecs_cmds) }, +static const struct drm_i915_cmd_table hsw_vebox_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_vecs_cmds, ARRAY_SIZE(gen7_vecs_cmds) }, }; -static const struct drm_i915_cmd_table gen7_blt_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { blt_cmds, ARRAY_SIZE(blt_cmds) }, +static const struct drm_i915_cmd_table gen7_blt_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, }; -static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { blt_cmds, ARRAY_SIZE(blt_cmds) }, +static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) }, }; +static const struct drm_i915_cmd_table gen9_blt_cmd_table[] = { + { gen9_blt_cmds, ARRAY_SIZE(gen9_blt_cmds) }, +}; + + /* * Register whitelists, sorted by increasing register offset. */ @@ -612,17 +664,27 @@ static const struct drm_i915_reg_descriptor gen7_blt_regs[] = { REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), }; -static const struct drm_i915_reg_descriptor ivb_master_regs[] = { - REG32(FORCEWAKE_MT), - REG32(DERRMR), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)), -}; - -static const struct drm_i915_reg_descriptor hsw_master_regs[] = { - REG32(FORCEWAKE_MT), - REG32(DERRMR), +static const struct drm_i915_reg_descriptor gen9_blt_regs[] = { + REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE), + REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), + REG32(BCS_SWCTRL), + REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), + REG64_IDX(BCS_GPR, 0), + REG64_IDX(BCS_GPR, 1), + REG64_IDX(BCS_GPR, 2), + REG64_IDX(BCS_GPR, 3), + REG64_IDX(BCS_GPR, 4), + REG64_IDX(BCS_GPR, 5), + REG64_IDX(BCS_GPR, 6), + REG64_IDX(BCS_GPR, 7), + REG64_IDX(BCS_GPR, 8), + REG64_IDX(BCS_GPR, 9), + REG64_IDX(BCS_GPR, 10), + REG64_IDX(BCS_GPR, 11), + REG64_IDX(BCS_GPR, 12), + REG64_IDX(BCS_GPR, 13), + REG64_IDX(BCS_GPR, 14), + REG64_IDX(BCS_GPR, 15), }; #undef REG64 @@ -631,28 +693,27 @@ static const struct drm_i915_reg_descriptor hsw_master_regs[] = { struct drm_i915_reg_table { const struct drm_i915_reg_descriptor *regs; int num_regs; - bool master; }; static const struct drm_i915_reg_table ivb_render_reg_tables[] = { - { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false }, - { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true }, + { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, }; static const struct drm_i915_reg_table ivb_blt_reg_tables[] = { - { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false }, - { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true }, + { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, }; static const struct drm_i915_reg_table hsw_render_reg_tables[] = { - { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false }, - { hsw_render_regs, ARRAY_SIZE(hsw_render_regs), false }, - { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true }, + { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, + { hsw_render_regs, ARRAY_SIZE(hsw_render_regs) }, }; static const struct drm_i915_reg_table hsw_blt_reg_tables[] = { - { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false }, - { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true }, + { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, +}; + +static const struct drm_i915_reg_table gen9_blt_reg_tables[] = { + { gen9_blt_regs, ARRAY_SIZE(gen9_blt_regs) }, }; static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) @@ -710,6 +771,17 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) return 0; } +static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header) +{ + u32 client = cmd_header >> INSTR_CLIENT_SHIFT; + + if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT) + return 0xFF; + + DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header); + return 0; +} + static bool validate_cmds_sorted(const struct intel_engine_cs *engine, const struct drm_i915_cmd_table *cmd_tables, int cmd_table_count) @@ -867,18 +939,19 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) int cmd_table_count; int ret; - if (!IS_GEN(engine->i915, 7)) + if (!IS_GEN(engine->i915, 7) && !(IS_GEN(engine->i915, 9) && + engine->class == COPY_ENGINE_CLASS)) return; switch (engine->class) { case RENDER_CLASS: if (IS_HASWELL(engine->i915)) { - cmd_tables = hsw_render_ring_cmds; + cmd_tables = hsw_render_ring_cmd_table; cmd_table_count = - ARRAY_SIZE(hsw_render_ring_cmds); + ARRAY_SIZE(hsw_render_ring_cmd_table); } else { - cmd_tables = gen7_render_cmds; - cmd_table_count = ARRAY_SIZE(gen7_render_cmds); + cmd_tables = gen7_render_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_render_cmd_table); } if (IS_HASWELL(engine->i915)) { @@ -888,36 +961,46 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) engine->reg_tables = ivb_render_reg_tables; engine->reg_table_count = ARRAY_SIZE(ivb_render_reg_tables); } - engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask; break; case VIDEO_DECODE_CLASS: - cmd_tables = gen7_video_cmds; - cmd_table_count = ARRAY_SIZE(gen7_video_cmds); + cmd_tables = gen7_video_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_video_cmd_table); engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; case COPY_ENGINE_CLASS: - if (IS_HASWELL(engine->i915)) { - cmd_tables = hsw_blt_ring_cmds; - cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds); + engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; + if (IS_GEN(engine->i915, 9)) { + cmd_tables = gen9_blt_cmd_table; + cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table); + engine->get_cmd_length_mask = + gen9_blt_get_cmd_length_mask; + + /* BCS Engine unsafe without parser */ + engine->flags |= I915_ENGINE_REQUIRES_CMD_PARSER; + } else if (IS_HASWELL(engine->i915)) { + cmd_tables = hsw_blt_ring_cmd_table; + cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table); } else { - cmd_tables = gen7_blt_cmds; - cmd_table_count = ARRAY_SIZE(gen7_blt_cmds); + cmd_tables = gen7_blt_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table); } - if (IS_HASWELL(engine->i915)) { + if (IS_GEN(engine->i915, 9)) { + engine->reg_tables = gen9_blt_reg_tables; + engine->reg_table_count = + ARRAY_SIZE(gen9_blt_reg_tables); + } else if (IS_HASWELL(engine->i915)) { engine->reg_tables = hsw_blt_reg_tables; engine->reg_table_count = ARRAY_SIZE(hsw_blt_reg_tables); } else { engine->reg_tables = ivb_blt_reg_tables; engine->reg_table_count = ARRAY_SIZE(ivb_blt_reg_tables); } - - engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; break; case VIDEO_ENHANCEMENT_CLASS: - cmd_tables = hsw_vebox_cmds; - cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds); + cmd_tables = hsw_vebox_cmd_table; + cmd_table_count = ARRAY_SIZE(hsw_vebox_cmd_table); /* VECS can use the same length_mask function as VCS */ engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; @@ -943,7 +1026,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) return; } - engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER; + engine->flags |= I915_ENGINE_USING_CMD_PARSER; } /** @@ -955,7 +1038,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) */ void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine) { - if (!intel_engine_needs_cmd_parser(engine)) + if (!intel_engine_using_cmd_parser(engine)) return; fini_hash_table(engine); @@ -1029,22 +1112,16 @@ __find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr) } static const struct drm_i915_reg_descriptor * -find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr) +find_reg(const struct intel_engine_cs *engine, u32 addr) { const struct drm_i915_reg_table *table = engine->reg_tables; + const struct drm_i915_reg_descriptor *reg = NULL; int count = engine->reg_table_count; - for (; count > 0; ++table, --count) { - if (!table->master || is_master) { - const struct drm_i915_reg_descriptor *reg; + for (; !reg && (count > 0); ++table, --count) + reg = __find_reg(table->regs, table->num_regs, addr); - reg = __find_reg(table->regs, table->num_regs, addr); - if (reg != NULL) - return reg; - } - } - - return NULL; + return reg; } /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */ @@ -1128,8 +1205,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, static bool check_cmd(const struct intel_engine_cs *engine, const struct drm_i915_cmd_descriptor *desc, - const u32 *cmd, u32 length, - const bool is_master) + const u32 *cmd, u32 length) { if (desc->flags & CMD_DESC_SKIP) return true; @@ -1139,12 +1215,6 @@ static bool check_cmd(const struct intel_engine_cs *engine, return false; } - if ((desc->flags & CMD_DESC_MASTER) && !is_master) { - DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n", - *cmd); - return false; - } - if (desc->flags & CMD_DESC_REGISTER) { /* * Get the distance between individual register offset @@ -1158,7 +1228,7 @@ static bool check_cmd(const struct intel_engine_cs *engine, offset += step) { const u32 reg_addr = cmd[offset] & desc->reg.mask; const struct drm_i915_reg_descriptor *reg = - find_reg(engine, is_master, reg_addr); + find_reg(engine, reg_addr); if (!reg) { DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n", @@ -1236,16 +1306,112 @@ static bool check_cmd(const struct intel_engine_cs *engine, return true; } +static int check_bbstart(const struct i915_gem_context *ctx, + u32 *cmd, u32 offset, u32 length, + u32 batch_len, + u64 batch_start, + u64 shadow_batch_start) +{ + u64 jump_offset, jump_target; + u32 target_cmd_offset, target_cmd_index; + + /* For igt compatibility on older platforms */ + if (CMDPARSER_USES_GGTT(ctx->i915)) { + DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n"); + return -EACCES; + } + + if (length != 3) { + DRM_DEBUG("CMD: Recursive BB_START with bad length(%u)\n", + length); + return -EINVAL; + } + + jump_target = *(u64*)(cmd+1); + jump_offset = jump_target - batch_start; + + /* + * Any underflow of jump_target is guaranteed to be outside the range + * of a u32, so >= test catches both too large and too small + */ + if (jump_offset >= batch_len) { + DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n", + jump_target); + return -EINVAL; + } + + /* + * This cannot overflow a u32 because we already checked jump_offset + * is within the BB, and the batch_len is a u32 + */ + target_cmd_offset = lower_32_bits(jump_offset); + target_cmd_index = target_cmd_offset / sizeof(u32); + + *(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset; + + if (target_cmd_index == offset) + return 0; + + if (ctx->jump_whitelist_cmds <= target_cmd_index) { + DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n"); + return -EINVAL; + } else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) { + DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n", + jump_target); + return -EINVAL; + } + + return 0; +} + +static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len) +{ + const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32)); + const u32 exact_size = BITS_TO_LONGS(batch_cmds); + u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds)); + unsigned long *next_whitelist; + + if (CMDPARSER_USES_GGTT(ctx->i915)) + return; + + if (batch_cmds <= ctx->jump_whitelist_cmds) { + bitmap_zero(ctx->jump_whitelist, batch_cmds); + return; + } + +again: + next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL); + if (next_whitelist) { + kfree(ctx->jump_whitelist); + ctx->jump_whitelist = next_whitelist; + ctx->jump_whitelist_cmds = + next_size * BITS_PER_BYTE * sizeof(long); + return; + } + + if (next_size > exact_size) { + next_size = exact_size; + goto again; + } + + DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n"); + bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds); + + return; +} + #define LENGTH_BIAS 2 /** * i915_parse_cmds() - parse a submitted batch buffer for privilege violations + * @ctx: the context in which the batch is to execute * @engine: the engine on which the batch is to execute * @batch_obj: the batch buffer in question - * @shadow_batch_obj: copy of the batch buffer in question + * @batch_start: Canonical base address of batch * @batch_start_offset: byte offset in the batch at which execution starts * @batch_len: length of the commands in batch_obj - * @is_master: is the submitting process the drm master? + * @shadow_batch_obj: copy of the batch buffer in question + * @shadow_batch_start: Canonical base address of shadow_batch_obj * * Parses the specified batch buffer looking for privilege violations as * described in the overview. @@ -1253,14 +1419,17 @@ static bool check_cmd(const struct intel_engine_cs *engine, * Return: non-zero if the parser finds violations or otherwise fails; -EACCES * if the batch appears legal but should use hardware parsing */ -int intel_engine_cmd_parser(struct intel_engine_cs *engine, + +int intel_engine_cmd_parser(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, struct drm_i915_gem_object *batch_obj, - struct drm_i915_gem_object *shadow_batch_obj, + u64 batch_start, u32 batch_start_offset, u32 batch_len, - bool is_master) + struct drm_i915_gem_object *shadow_batch_obj, + u64 shadow_batch_start) { - u32 *cmd, *batch_end; + u32 *cmd, *batch_end, offset = 0; struct drm_i915_cmd_descriptor default_desc = noop_desc; const struct drm_i915_cmd_descriptor *desc = &default_desc; bool needs_clflush_after = false; @@ -1274,6 +1443,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, return PTR_ERR(cmd); } + init_whitelist(ctx, batch_len); + /* * We use the batch length as size because the shadow object is as * large or larger and copy_batch() will write MI_NOPs to the extra @@ -1283,31 +1454,15 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, do { u32 length; - if (*cmd == MI_BATCH_BUFFER_END) { - if (needs_clflush_after) { - void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); - drm_clflush_virt_range(ptr, - (void *)(cmd + 1) - ptr); - } + if (*cmd == MI_BATCH_BUFFER_END) break; - } desc = find_cmd(engine, *cmd, desc, &default_desc); if (!desc) { DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", *cmd); ret = -EINVAL; - break; - } - - /* - * If the batch buffer contains a chained batch, return an - * error that tells the caller to abort and dispatch the - * workload as a non-secure batch. - */ - if (desc->cmd.value == MI_BATCH_BUFFER_START) { - ret = -EACCES; - break; + goto err; } if (desc->flags & CMD_DESC_FIXED) @@ -1321,22 +1476,43 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, length, batch_end - cmd); ret = -EINVAL; - break; + goto err; } - if (!check_cmd(engine, desc, cmd, length, is_master)) { + if (!check_cmd(engine, desc, cmd, length)) { ret = -EACCES; + goto err; + } + + if (desc->cmd.value == MI_BATCH_BUFFER_START) { + ret = check_bbstart(ctx, cmd, offset, length, + batch_len, batch_start, + shadow_batch_start); + + if (ret) + goto err; break; } + if (ctx->jump_whitelist_cmds > offset) + set_bit(offset, ctx->jump_whitelist); + cmd += length; + offset += length; if (cmd >= batch_end) { DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); ret = -EINVAL; - break; + goto err; } } while (1); + if (needs_clflush_after) { + void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); + + drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr); + } + +err: i915_gem_object_unpin_map(shadow_batch_obj); return ret; } @@ -1357,7 +1533,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) /* If the command parser is not enabled, report 0 - unsupported */ for_each_uabi_engine(engine, dev_priv) { - if (intel_engine_needs_cmd_parser(engine)) { + if (intel_engine_using_cmd_parser(engine)) { active = true; break; } @@ -1382,6 +1558,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) * the parser enabled. * 9. Don't whitelist or handle oacontrol specially, as ownership * for oacontrol state is moving to i915-perf. + * 10. Support for Gen9 BCS Parsing */ - return 9; + return 10; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 3340485c12e3..87e05ca3646a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -63,6 +63,7 @@ #include "gem/i915_gem_ioctls.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_rc6.h" #include "i915_debugfs.h" #include "i915_drv.h" @@ -603,8 +604,6 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv) if (ret) goto err_uncore; - i915_gem_init_mmio(dev_priv); - return 0; err_uncore: @@ -1177,7 +1176,7 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) if (ret) goto err_ggtt; - intel_gt_init_hw_early(dev_priv); + intel_gt_init_hw_early(&dev_priv->gt, &dev_priv->ggtt); ret = i915_ggtt_enable_hw(dev_priv); if (ret) { @@ -1821,7 +1820,9 @@ static int i915_drm_resume(struct drm_device *dev) disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); - i915_gem_sanitize(dev_priv); + intel_rc6_ctx_wa_resume(&dev_priv->gt.rc6); + + intel_gt_sanitize(&dev_priv->gt, true); ret = i915_ggtt_enable_hw(dev_priv); if (ret) @@ -1952,8 +1953,6 @@ static int i915_drm_resume_early(struct drm_device *dev) intel_power_domains_resume(dev_priv); - intel_gt_sanitize(&dev_priv->gt, true); - enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); return ret; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1e6118f62b29..e29bc137e7ba 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1614,9 +1614,16 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define VEBOX_MASK(dev_priv) \ ENGINE_INSTANCES_MASK(dev_priv, VECS0, I915_MAX_VECS) +/* + * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution + * All later gens can run the final buffer from the ppgtt + */ +#define CMDPARSER_USES_GGTT(dev_priv) IS_GEN(dev_priv, 7) + #define HAS_LLC(dev_priv) (INTEL_INFO(dev_priv)->has_llc) #define HAS_SNOOP(dev_priv) (INTEL_INFO(dev_priv)->has_snoop) #define HAS_EDRAM(dev_priv) ((dev_priv)->edram_size_mb) +#define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6) #define HAS_WT(dev_priv) ((IS_HASWELL(dev_priv) || \ IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv)) @@ -1649,10 +1656,12 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, /* Early gen2 have a totally busted CS tlb and require pinned batches. */ #define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv)) +#define NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv) \ + (IS_BROADWELL(dev_priv) || IS_GEN(dev_priv, 9)) + /* WaRsDisableCoarsePowerGating:skl,cnl */ #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ - (IS_CANNONLAKE(dev_priv) || \ - IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) + (IS_CANNONLAKE(dev_priv) || IS_GEN(dev_priv, 9)) #define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4) #define HAS_GMBUS_BURST_READ(dev_priv) (INTEL_GEN(dev_priv) >= 10 || \ @@ -1779,7 +1788,6 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, /* i915_gem.c */ int i915_gem_init_userptr(struct drm_i915_private *dev_priv); void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv); -void i915_gem_sanitize(struct drm_i915_private *i915); void i915_gem_init_early(struct drm_i915_private *dev_priv); void i915_gem_cleanup_early(struct drm_i915_private *dev_priv); int i915_gem_freeze(struct drm_i915_private *dev_priv); @@ -1835,6 +1843,14 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj, unsigned long flags); #define I915_GEM_OBJECT_UNBIND_ACTIVE BIT(0) +struct i915_vma * __must_check +i915_gem_object_pin(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view, + u64 size, + u64 alignment, + u64 flags); + void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv); static inline int __must_check @@ -1863,7 +1879,6 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, return atomic_read(&error->reset_engine_count[engine->uabi_class]); } -void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); void i915_gem_driver_register(struct drm_i915_private *i915); void i915_gem_driver_unregister(struct drm_i915_private *i915); @@ -1941,12 +1956,14 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type); int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); void intel_engine_init_cmd_parser(struct intel_engine_cs *engine); void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); -int intel_engine_cmd_parser(struct intel_engine_cs *engine, +int intel_engine_cmd_parser(struct i915_gem_context *cxt, + struct intel_engine_cs *engine, struct drm_i915_gem_object *batch_obj, - struct drm_i915_gem_object *shadow_batch_obj, + u64 user_batch_start, u32 batch_start_offset, u32 batch_len, - bool is_master); + struct drm_i915_gem_object *shadow_batch_obj, + u64 shadow_batch_start); /* intel_device_info.c */ static inline struct intel_device_info * @@ -2028,4 +2045,10 @@ i915_coherent_map_type(struct drm_i915_private *i915) return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC; } +static inline bool intel_guc_submission_is_enabled(struct intel_guc *guc) +{ + return intel_guc_is_submission_supported(guc) && + intel_guc_is_running(guc); +} + #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b1574ab104d7..b9eb6b3149b7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -893,6 +893,20 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_address_space *vm = &dev_priv->ggtt.vm; + + return i915_gem_object_pin(obj, vm, view, size, alignment, + flags | PIN_GLOBAL); +} + +struct i915_vma * +i915_gem_object_pin(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view, + u64 size, + u64 alignment, + u64 flags) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_vma *vma; int ret; @@ -958,7 +972,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, return ERR_PTR(ret); } - ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); + ret = i915_vma_pin(vma, size, alignment, flags); if (ret) return ERR_PTR(ret); @@ -1039,38 +1053,6 @@ out: return err; } -void i915_gem_sanitize(struct drm_i915_private *i915) -{ - intel_wakeref_t wakeref; - - GEM_TRACE("\n"); - - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - - /* - * As we have just resumed the machine and woken the device up from - * deep PCI sleep (presumably D3_cold), assume the HW has been reset - * back to defaults, recovering from whatever wedged state we left it - * in and so worth trying to use the device once more. - */ - if (intel_gt_is_wedged(&i915->gt)) - intel_gt_unset_wedged(&i915->gt); - - /* - * If we inherit context state from the BIOS or earlier occupants - * of the GPU, the GPU may be in an inconsistent state when we - * try to take over. The only way to remove the earlier state - * is by resetting. However, resetting on earlier gen is tricky as - * it may impact the display and we are uncertain about the stability - * of the reset, so this could be applied to even earlier gen. - */ - intel_gt_sanitize(&i915->gt, false); - - intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); -} - static int __intel_engines_record_defaults(struct intel_gt *gt) { struct i915_request *requests[I915_NUM_ENGINES] = {}; @@ -1413,11 +1395,6 @@ void i915_gem_driver_release(struct drm_i915_private *dev_priv) WARN_ON(!list_empty(&dev_priv->gem.contexts.list)); } -void i915_gem_init_mmio(struct drm_i915_private *i915) -{ - i915_gem_sanitize(i915); -} - static void i915_gem_init__mm(struct drm_i915_private *i915) { spin_lock_init(&i915->mm.obj_lock); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 88179202c556..6239a9adbf14 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2609,8 +2609,6 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; - ppgtt->vm.total = ggtt->vm.total; - return 0; err_ppgtt: diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index ad33fbe90a28..cf8a8c3ef047 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -63,7 +63,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, value = !!(i915->caps.scheduler & I915_SCHEDULER_CAP_SEMAPHORES); break; case I915_PARAM_HAS_SECURE_BATCHES: - value = capable(CAP_SYS_ADMIN); + value = HAS_SECURE_BATCHES(i915) && capable(CAP_SYS_ADMIN); break; case I915_PARAM_CMD_PARSER_VERSION: value = i915_cmd_parser_get_version(i915); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index e8b67f5e521d..3c85cb0ee99f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1029,9 +1029,9 @@ i915_error_object_create(struct drm_i915_private *i915, for_each_sgt_daddr(dma, iter, vma->pages) { void __iomem *s; - s = io_mapping_map_atomic_wc(&mem->iomap, dma); + s = io_mapping_map_wc(&mem->iomap, dma, PAGE_SIZE); ret = compress_page(compress, (void __force *)s, dst); - io_mapping_unmap_atomic(s); + io_mapping_unmap(s); if (ret) break; } @@ -1043,9 +1043,9 @@ i915_error_object_create(struct drm_i915_private *i915, drm_clflush_pages(&page, 1); - s = kmap_atomic(page); + s = kmap(page); ret = compress_page(compress, s, dst); - kunmap_atomic(s); + kunmap(s); drm_clflush_pages(&page, 1); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index a8c2318d3d5e..65d7c2e599de 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1870,7 +1870,7 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream, config_length += num_lri_dwords(oa_config->mux_regs_len); config_length += num_lri_dwords(oa_config->b_counter_regs_len); config_length += num_lri_dwords(oa_config->flex_regs_len); - config_length++; /* MI_BATCH_BUFFER_END */ + config_length += 3; /* MI_BATCH_BUFFER_START */ config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE); obj = i915_gem_object_create_shmem(stream->perf->i915, config_length); @@ -1895,7 +1895,12 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream, oa_config->flex_regs, oa_config->flex_regs_len); - *cs++ = MI_BATCH_BUFFER_END; + /* Jump into the active wait. */ + *cs++ = (INTEL_GEN(stream->perf->i915) < 8 ? + MI_BATCH_BUFFER_START : + MI_BATCH_BUFFER_START_GEN8); + *cs++ = i915_ggtt_offset(stream->noa_wait); + *cs++ = 0; i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); @@ -3307,15 +3312,6 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, } } - if (props->hold_preemption) { - if (!props->single_context) { - DRM_DEBUG("preemption disable with no context\n"); - ret = -EINVAL; - goto err; - } - privileged_op = true; - } - /* * On Haswell the OA unit supports clock gating off for a specific * context and in this mode there's no visibility of metrics for the @@ -3335,12 +3331,21 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, * doesn't request global stream access (i.e. query based sampling * using MI_RECORD_PERF_COUNT. */ - if (IS_HASWELL(perf->i915) && specific_ctx && !props->hold_preemption) + if (IS_HASWELL(perf->i915) && specific_ctx) privileged_op = false; else if (IS_GEN(perf->i915, 12) && specific_ctx && (props->sample_flags & SAMPLE_OA_REPORT) == 0) privileged_op = false; + if (props->hold_preemption) { + if (!props->single_context) { + DRM_DEBUG("preemption disable with no context\n"); + ret = -EINVAL; + goto err; + } + privileged_op = true; + } + /* Similar to perf's kernel.perf_paranoid_cpu sysctl option * we check a dev.i915.perf_stream_paranoid sysctl option * to determine if it's ok to access system wide OA counters diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 05395015d1f2..0d40dccd1409 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -878,8 +878,8 @@ create_event_attributes(struct i915_pmu *pmu) const char *name; const char *unit; } events[] = { - __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"), - __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"), + __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"), + __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"), __event(I915_PMU_INTERRUPTS, "interrupts", NULL), __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"), }; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 53c280c4e741..73079b503724 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -474,6 +474,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define ECOCHK_PPGTT_WT_HSW (0x2 << 3) #define ECOCHK_PPGTT_WB_HSW (0x3 << 3) +#define GEN8_RC6_CTX_INFO _MMIO(0x8504) + #define GAC_ECO_BITS _MMIO(0x14090) #define ECOBITS_SNB_BIT (1 << 13) #define ECOBITS_PPGTT_CACHE64B (3 << 8) @@ -560,6 +562,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) */ #define BCS_SWCTRL _MMIO(0x22200) +/* There are 16 GPR registers */ +#define BCS_GPR(n) _MMIO(0x22600 + (n) * 8) +#define BCS_GPR_UDW(n) _MMIO(0x22600 + (n) * 8 + 4) + #define GPGPU_THREADS_DISPATCHED _MMIO(0x2290) #define GPGPU_THREADS_DISPATCHED_UDW _MMIO(0x2290 + 4) #define HS_INVOCATION_COUNT _MMIO(0x2300) @@ -7353,6 +7359,10 @@ enum { #define DMC_DEBUG3 _MMIO(0x101090) +/* Display Internal Timeout Register */ +#define RM_TIMEOUT _MMIO(0x42060) +#define MMIO_TIMEOUT_US(us) ((us) << 0) + /* interrupts */ #define DE_MASTER_IRQ_CONTROL (1 << 31) #define DE_SPRITEB_FLIP_DONE (1 << 29) @@ -9661,7 +9671,7 @@ enum skl_power_gate { #define TRANS_DDI_EDP_INPUT_A_ONOFF (4 << 12) #define TRANS_DDI_EDP_INPUT_B_ONOFF (5 << 12) #define TRANS_DDI_EDP_INPUT_C_ONOFF (6 << 12) -#define TRANS_DDI_MST_TRANSPORT_SELECT_MASK REG_GENMASK(12, 10) +#define TRANS_DDI_MST_TRANSPORT_SELECT_MASK REG_GENMASK(11, 10) #define TRANS_DDI_MST_TRANSPORT_SELECT(trans) \ REG_FIELD_PREP(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, trans) #define TRANS_DDI_HDCP_SIGNALLING (1 << 9) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index d2edb527dcb8..010d67f48ad9 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -202,21 +202,26 @@ static void kick_submission(struct intel_engine_cs *engine, if (prio <= engine->execlists.queue_priority_hint) return; + rcu_read_lock(); + /* Nothing currently active? We're overdue for a submission! */ inflight = execlists_active(&engine->execlists); if (!inflight) - return; + goto unlock; /* * If we are already the currently executing context, don't * bother evaluating if we should preempt ourselves. */ if (inflight->hw_context == rq->hw_context) - return; + goto unlock; engine->execlists.queue_priority_hint = prio; if (need_preempt(prio, rq_prio(inflight))) tasklet_hi_schedule(&engine->execlists.tasklet); + +unlock: + rcu_read_unlock(); } static void __i915_schedule(struct i915_sched_node *node, diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c index 000ba43e2c02..8fd92b9130a7 100644 --- a/drivers/gpu/drm/i915/intel_pch.c +++ b/drivers/gpu/drm/i915/intel_pch.c @@ -62,7 +62,6 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) /* KBP is SPT compatible */ return PCH_SPT; case INTEL_PCH_CNP_DEVICE_ID_TYPE: - case INTEL_PCH_CNP2_DEVICE_ID_TYPE: DRM_DEBUG_KMS("Found Cannon Lake PCH (CNP)\n"); WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); return PCH_CNP; @@ -76,6 +75,11 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) WARN_ON(!IS_COFFEELAKE(dev_priv)); /* CometPoint is CNP Compatible */ return PCH_CNP; + case INTEL_PCH_CMP_V_DEVICE_ID_TYPE: + DRM_DEBUG_KMS("Found Comet Lake V PCH (CMP-V)\n"); + WARN_ON(!IS_COFFEELAKE(dev_priv)); + /* Comet Lake V PCH is based on KBP, which is SPT compatible */ + return PCH_SPT; case INTEL_PCH_ICP_DEVICE_ID_TYPE: DRM_DEBUG_KMS("Found Ice Lake PCH\n"); WARN_ON(!IS_ICELAKE(dev_priv)); diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h index 1115c6a0522c..d26c25dd8d54 100644 --- a/drivers/gpu/drm/i915/intel_pch.h +++ b/drivers/gpu/drm/i915/intel_pch.h @@ -40,10 +40,10 @@ enum intel_pch { #define INTEL_PCH_SPT_LP_DEVICE_ID_TYPE 0x9D00 #define INTEL_PCH_KBP_DEVICE_ID_TYPE 0xA280 #define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300 -#define INTEL_PCH_CNP2_DEVICE_ID_TYPE 0xA380 #define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE 0x9D80 #define INTEL_PCH_CMP_DEVICE_ID_TYPE 0x0280 #define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680 +#define INTEL_PCH_CMP_V_DEVICE_ID_TYPE 0xA380 #define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480 #define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00 #define INTEL_PCH_TGP_DEVICE_ID_TYPE 0xA080 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 5d2b460d3ee5..809bff955b5a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -107,6 +107,14 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) */ I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | PWM1_GATING_DIS | PWM2_GATING_DIS); + + /* + * Lower the display internal timeout. + * This is needed to avoid any hard hangs when DSI port PLL + * is off and a MMIO access is attempted by any privilege + * application, using batch buffers or any other means. + */ + I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950)); } static void glk_init_clock_gating(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index e378543ed453..d83f6bf6d9d4 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -124,7 +124,6 @@ static void pm_resume(struct drm_i915_private *i915) */ with_intel_runtime_pm(&i915->runtime_pm, wakeref) { intel_gt_sanitize(&i915->gt, false); - i915_gem_sanitize(i915); i915_gem_restore_gtt_mappings(i915); i915_gem_restore_fences(&i915->ggtt); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index a0da5944dd33..27ed3cee6a9b 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -183,7 +183,6 @@ struct drm_i915_private *mock_gem_device(void) intel_timelines_init(i915); mock_init_ggtt(i915, &i915->ggtt); - i915->gt.ggtt = &i915->ggtt; mkwrite_device_info(i915)->engine_mask = BIT(0); diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index 9ec93dc27fb5..20ac3844edec 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -118,8 +118,7 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt) ggtt->vm.vma_ops.clear_pages = clear_pages; i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); - - intel_gt_init_hw_early(i915); + i915->gt.ggtt = ggtt; } void mock_fini_ggtt(struct i915_ggtt *ggtt) diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index 14878ebf59d7..4a55bb6e2213 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -3,6 +3,8 @@ * Copyright (c) 2015 MediaTek Inc. */ +#include <drm/drm_fourcc.h> + #include <linux/clk.h> #include <linux/component.h> #include <linux/module.h> @@ -50,6 +52,8 @@ OVL_CON_CLRFMT_RGB : 0) #define OVL_CON_AEN BIT(8) #define OVL_CON_ALPHA 0xff +#define OVL_CON_VIRT_FLIP BIT(9) +#define OVL_CON_HORZ_FLIP BIT(10) struct mtk_disp_ovl_data { unsigned int addr; @@ -137,6 +141,40 @@ static unsigned int mtk_ovl_layer_nr(struct mtk_ddp_comp *comp) return ovl->data->layer_nr; } +static unsigned int mtk_ovl_supported_rotations(struct mtk_ddp_comp *comp) +{ + return DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_180 | + DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y; +} + +static int mtk_ovl_layer_check(struct mtk_ddp_comp *comp, unsigned int idx, + struct mtk_plane_state *mtk_state) +{ + struct drm_plane_state *state = &mtk_state->base; + unsigned int rotation = 0; + + rotation = drm_rotation_simplify(state->rotation, + DRM_MODE_ROTATE_0 | + DRM_MODE_REFLECT_X | + DRM_MODE_REFLECT_Y); + rotation &= ~DRM_MODE_ROTATE_0; + + /* We can only do reflection, not rotation */ + if ((rotation & DRM_MODE_ROTATE_MASK) != 0) + return -EINVAL; + + /* + * TODO: Rotating/reflecting YUV buffers is not supported at this time. + * Only RGB[AX] variants are supported. + */ + if (state->fb->format->is_yuv && rotation != 0) + return -EINVAL; + + state->rotation = rotation; + + return 0; +} + static void mtk_ovl_layer_on(struct mtk_ddp_comp *comp, unsigned int idx) { unsigned int reg; @@ -229,6 +267,16 @@ static void mtk_ovl_layer_config(struct mtk_ddp_comp *comp, unsigned int idx, if (idx != 0) con |= OVL_CON_AEN | OVL_CON_ALPHA; + if (pending->rotation & DRM_MODE_REFLECT_Y) { + con |= OVL_CON_VIRT_FLIP; + addr += (pending->height - 1) * pending->pitch; + } + + if (pending->rotation & DRM_MODE_REFLECT_X) { + con |= OVL_CON_HORZ_FLIP; + addr += pending->pitch - 1; + } + writel_relaxed(con, comp->regs + DISP_REG_OVL_CON(idx)); writel_relaxed(pitch, comp->regs + DISP_REG_OVL_PITCH(idx)); writel_relaxed(src_size, comp->regs + DISP_REG_OVL_SRC_SIZE(idx)); @@ -263,9 +311,11 @@ static const struct mtk_ddp_comp_funcs mtk_disp_ovl_funcs = { .stop = mtk_ovl_stop, .enable_vblank = mtk_ovl_enable_vblank, .disable_vblank = mtk_ovl_disable_vblank, + .supported_rotations = mtk_ovl_supported_rotations, .layer_nr = mtk_ovl_layer_nr, .layer_on = mtk_ovl_layer_on, .layer_off = mtk_ovl_layer_off, + .layer_check = mtk_ovl_layer_check, .layer_config = mtk_ovl_layer_config, .bgclr_in_on = mtk_ovl_bgclr_in_on, .bgclr_in_off = mtk_ovl_bgclr_in_off, diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index b841d3706d8b..f80a8ba75977 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -207,6 +207,28 @@ static void mtk_crtc_ddp_clk_disable(struct mtk_drm_crtc *mtk_crtc) clk_disable_unprepare(mtk_crtc->ddp_comp[i]->clk); } +static +struct mtk_ddp_comp *mtk_drm_ddp_comp_for_plane(struct drm_crtc *crtc, + struct drm_plane *plane, + unsigned int *local_layer) +{ + struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); + struct mtk_ddp_comp *comp; + int i, count = 0; + + for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) { + comp = mtk_crtc->ddp_comp[i]; + if (plane->index < (count + mtk_ddp_comp_layer_nr(comp))) { + *local_layer = plane->index - count; + return comp; + } + count += mtk_ddp_comp_layer_nr(comp); + } + + WARN(1, "Failed to find component for plane %d\n", plane->index); + return NULL; +} + static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc) { struct drm_crtc *crtc = &mtk_crtc->base; @@ -283,19 +305,12 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc) for (i = 0; i < mtk_crtc->layer_nr; i++) { struct drm_plane *plane = &mtk_crtc->planes[i]; struct mtk_plane_state *plane_state; - struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0]; - unsigned int comp_layer_nr = mtk_ddp_comp_layer_nr(comp); + struct mtk_ddp_comp *comp; unsigned int local_layer; plane_state = to_mtk_plane_state(plane->state); - - if (i >= comp_layer_nr) { - comp = mtk_crtc->ddp_comp[1]; - local_layer = i - comp_layer_nr; - } else - local_layer = i; - mtk_ddp_comp_layer_config(comp, local_layer, - plane_state); + comp = mtk_drm_ddp_comp_for_plane(crtc, plane, &local_layer); + mtk_ddp_comp_layer_config(comp, local_layer, plane_state); } return 0; @@ -343,7 +358,6 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc) struct mtk_crtc_state *state = to_mtk_crtc_state(mtk_crtc->base.state); struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0]; unsigned int i; - unsigned int comp_layer_nr = mtk_ddp_comp_layer_nr(comp); unsigned int local_layer; /* @@ -366,22 +380,30 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc) plane_state = to_mtk_plane_state(plane->state); - if (plane_state->pending.config) { - if (i >= comp_layer_nr) { - comp = mtk_crtc->ddp_comp[1]; - local_layer = i - comp_layer_nr; - } else - local_layer = i; - - mtk_ddp_comp_layer_config(comp, local_layer, - plane_state); - plane_state->pending.config = false; - } + if (!plane_state->pending.config) + continue; + + comp = mtk_drm_ddp_comp_for_plane(crtc, plane, + &local_layer); + + mtk_ddp_comp_layer_config(comp, local_layer, + plane_state); + plane_state->pending.config = false; } mtk_crtc->pending_planes = false; } } +int mtk_drm_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane, + struct mtk_plane_state *state) +{ + unsigned int local_layer; + struct mtk_ddp_comp *comp; + + comp = mtk_drm_ddp_comp_for_plane(crtc, plane, &local_layer); + return mtk_ddp_comp_layer_check(comp, local_layer, state); +} + static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { @@ -543,14 +565,65 @@ void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *comp) mtk_drm_finish_page_flip(mtk_crtc); } +static int mtk_drm_crtc_num_comp_planes(struct mtk_drm_crtc *mtk_crtc, + int comp_idx) +{ + struct mtk_ddp_comp *comp; + + if (comp_idx > 1) + return 0; + + comp = mtk_crtc->ddp_comp[comp_idx]; + if (!comp->funcs) + return 0; + + if (comp_idx == 1 && !comp->funcs->bgclr_in_on) + return 0; + + return mtk_ddp_comp_layer_nr(comp); +} + +static inline +enum drm_plane_type mtk_drm_crtc_plane_type(unsigned int plane_idx) +{ + if (plane_idx == 0) + return DRM_PLANE_TYPE_PRIMARY; + else if (plane_idx == 1) + return DRM_PLANE_TYPE_CURSOR; + else + return DRM_PLANE_TYPE_OVERLAY; + +} + +static int mtk_drm_crtc_init_comp_planes(struct drm_device *drm_dev, + struct mtk_drm_crtc *mtk_crtc, + int comp_idx, int pipe) +{ + int num_planes = mtk_drm_crtc_num_comp_planes(mtk_crtc, comp_idx); + struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[comp_idx]; + int i, ret; + + for (i = 0; i < num_planes; i++) { + ret = mtk_plane_init(drm_dev, + &mtk_crtc->planes[mtk_crtc->layer_nr], + BIT(pipe), + mtk_drm_crtc_plane_type(mtk_crtc->layer_nr), + mtk_ddp_comp_supported_rotations(comp)); + if (ret) + return ret; + + mtk_crtc->layer_nr++; + } + return 0; +} + int mtk_drm_crtc_create(struct drm_device *drm_dev, const enum mtk_ddp_comp_id *path, unsigned int path_len) { struct mtk_drm_private *priv = drm_dev->dev_private; struct device *dev = drm_dev->dev; struct mtk_drm_crtc *mtk_crtc; - enum drm_plane_type type; - unsigned int zpos; + unsigned int num_comp_planes = 0; int pipe = priv->num_pipes; int ret; int i; @@ -606,23 +679,15 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, mtk_crtc->ddp_comp[i] = comp; } - mtk_crtc->layer_nr = mtk_ddp_comp_layer_nr(mtk_crtc->ddp_comp[0]); - if (mtk_crtc->ddp_comp_nr > 1) { - struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[1]; + for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) + num_comp_planes += mtk_drm_crtc_num_comp_planes(mtk_crtc, i); - if (comp->funcs->bgclr_in_on) - mtk_crtc->layer_nr += mtk_ddp_comp_layer_nr(comp); - } - mtk_crtc->planes = devm_kcalloc(dev, mtk_crtc->layer_nr, - sizeof(struct drm_plane), - GFP_KERNEL); - - for (zpos = 0; zpos < mtk_crtc->layer_nr; zpos++) { - type = (zpos == 0) ? DRM_PLANE_TYPE_PRIMARY : - (zpos == 1) ? DRM_PLANE_TYPE_CURSOR : - DRM_PLANE_TYPE_OVERLAY; - ret = mtk_plane_init(drm_dev, &mtk_crtc->planes[zpos], - BIT(pipe), type); + mtk_crtc->planes = devm_kcalloc(dev, num_comp_planes, + sizeof(struct drm_plane), GFP_KERNEL); + + for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) { + ret = mtk_drm_crtc_init_comp_planes(drm_dev, mtk_crtc, i, + pipe); if (ret) return ret; } diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h index fcc134eb00c9..6afe1c19557a 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h @@ -19,5 +19,7 @@ void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *comp); int mtk_drm_crtc_create(struct drm_device *drm_dev, const enum mtk_ddp_comp_id *path, unsigned int path_len); +int mtk_drm_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane, + struct mtk_plane_state *state); #endif /* MTK_DRM_CRTC_H */ diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h index 26441f4d1ad3..2f1e9e75b8da 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h @@ -77,9 +77,13 @@ struct mtk_ddp_comp_funcs { void (*stop)(struct mtk_ddp_comp *comp); void (*enable_vblank)(struct mtk_ddp_comp *comp, struct drm_crtc *crtc); void (*disable_vblank)(struct mtk_ddp_comp *comp); + unsigned int (*supported_rotations)(struct mtk_ddp_comp *comp); unsigned int (*layer_nr)(struct mtk_ddp_comp *comp); void (*layer_on)(struct mtk_ddp_comp *comp, unsigned int idx); void (*layer_off)(struct mtk_ddp_comp *comp, unsigned int idx); + int (*layer_check)(struct mtk_ddp_comp *comp, + unsigned int idx, + struct mtk_plane_state *state); void (*layer_config)(struct mtk_ddp_comp *comp, unsigned int idx, struct mtk_plane_state *state); void (*gamma_set)(struct mtk_ddp_comp *comp, @@ -130,6 +134,15 @@ static inline void mtk_ddp_comp_disable_vblank(struct mtk_ddp_comp *comp) comp->funcs->disable_vblank(comp); } +static inline +unsigned int mtk_ddp_comp_supported_rotations(struct mtk_ddp_comp *comp) +{ + if (comp->funcs && comp->funcs->supported_rotations) + return comp->funcs->supported_rotations(comp); + + return 0; +} + static inline unsigned int mtk_ddp_comp_layer_nr(struct mtk_ddp_comp *comp) { if (comp->funcs && comp->funcs->layer_nr) @@ -152,6 +165,15 @@ static inline void mtk_ddp_comp_layer_off(struct mtk_ddp_comp *comp, comp->funcs->layer_off(comp, idx); } +static inline int mtk_ddp_comp_layer_check(struct mtk_ddp_comp *comp, + unsigned int idx, + struct mtk_plane_state *state) +{ + if (comp->funcs && comp->funcs->layer_check) + return comp->funcs->layer_check(comp, idx, state); + return 0; +} + static inline void mtk_ddp_comp_layer_config(struct mtk_ddp_comp *comp, unsigned int idx, struct mtk_plane_state *state) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c index 584a9ecadce6..3b0cc91c7023 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c @@ -20,6 +20,12 @@ static const u32 formats[] = { DRM_FORMAT_XRGB8888, DRM_FORMAT_ARGB8888, + DRM_FORMAT_BGRX8888, + DRM_FORMAT_BGRA8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_RGB888, + DRM_FORMAT_BGR888, DRM_FORMAT_RGB565, DRM_FORMAT_UYVY, DRM_FORMAT_YUYV, @@ -84,6 +90,7 @@ static int mtk_plane_atomic_check(struct drm_plane *plane, { struct drm_framebuffer *fb = state->fb; struct drm_crtc_state *crtc_state; + int ret; if (!fb) return 0; @@ -91,6 +98,11 @@ static int mtk_plane_atomic_check(struct drm_plane *plane, if (!state->crtc) return 0; + ret = mtk_drm_crtc_plane_check(state->crtc, plane, + to_mtk_plane_state(state)); + if (ret) + return ret; + crtc_state = drm_atomic_get_crtc_state(state->state, state->crtc); if (IS_ERR(crtc_state)) return PTR_ERR(crtc_state); @@ -132,6 +144,7 @@ static void mtk_plane_atomic_update(struct drm_plane *plane, state->pending.y = plane->state->dst.y1; state->pending.width = drm_rect_width(&plane->state->dst); state->pending.height = drm_rect_height(&plane->state->dst); + state->pending.rotation = plane->state->rotation; wmb(); /* Make sure the above parameters are set before update */ state->pending.dirty = true; } @@ -154,7 +167,8 @@ static const struct drm_plane_helper_funcs mtk_plane_helper_funcs = { }; int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane, - unsigned long possible_crtcs, enum drm_plane_type type) + unsigned long possible_crtcs, enum drm_plane_type type, + unsigned int supported_rotations) { int err; @@ -166,6 +180,14 @@ int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane, return err; } + if (supported_rotations & ~DRM_MODE_ROTATE_0) { + err = drm_plane_create_rotation_property(plane, + DRM_MODE_ROTATE_0, + supported_rotations); + if (err) + DRM_INFO("Create rotation property failed\n"); + } + drm_plane_helper_add(plane, &mtk_plane_helper_funcs); return 0; diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.h b/drivers/gpu/drm/mediatek/mtk_drm_plane.h index 6f842df722c7..760885e35b27 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.h @@ -20,6 +20,7 @@ struct mtk_plane_pending_state { unsigned int y; unsigned int width; unsigned int height; + unsigned int rotation; bool dirty; }; @@ -35,6 +36,7 @@ to_mtk_plane_state(struct drm_plane_state *state) } int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane, - unsigned long possible_crtcs, enum drm_plane_type type); + unsigned long possible_crtcs, enum drm_plane_type type, + unsigned int supported_rotations); #endif diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index e686331fa089..691c1a277d91 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -352,26 +352,26 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu, cxdbg = ioremap(res->start, resource_size(res)); if (cxdbg) { - cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT, + cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT, A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); - cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM, + cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM, A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); - cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0); - cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0); - cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0); - cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0); + cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); + cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); + cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); + cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); - cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, + cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210); - cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, + cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98); - cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0); - cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0); - cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0); - cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0); + cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); + cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); + cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); + cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); } a6xx_state->debugbus = state_kcalloc(a6xx_state, diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index 6be879578140..1c74381a4fc9 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -47,12 +47,8 @@ static int msm_gpu_release(struct inode *inode, struct file *file) struct msm_gpu_show_priv *show_priv = m->private; struct msm_drm_private *priv = show_priv->dev->dev_private; struct msm_gpu *gpu = priv->gpu; - int ret; - - ret = mutex_lock_interruptible(&show_priv->dev->struct_mutex); - if (ret) - return ret; + mutex_lock(&show_priv->dev->struct_mutex); gpu->funcs->gpu_state_put(show_priv->state); mutex_unlock(&show_priv->dev->struct_mutex); diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 9d086133a84e..9458dc6c750c 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -556,11 +556,11 @@ static int panfrost_probe(struct platform_device *pdev) return 0; err_out2: + pm_runtime_disable(pfdev->dev); panfrost_devfreq_fini(pfdev); err_out1: panfrost_device_fini(pfdev); err_out0: - pm_runtime_disable(pfdev->dev); drm_dev_put(ddev); return err; } diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index bdd990568476..a3ed64a1f15e 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -224,9 +224,9 @@ static size_t get_pgsize(u64 addr, size_t size) return SZ_2M; } -void panfrost_mmu_flush_range(struct panfrost_device *pfdev, - struct panfrost_mmu *mmu, - u64 iova, size_t size) +static void panfrost_mmu_flush_range(struct panfrost_device *pfdev, + struct panfrost_mmu *mmu, + u64 iova, size_t size) { if (mmu->as < 0) return; @@ -406,11 +406,11 @@ addr_to_drm_mm_node(struct panfrost_device *pfdev, int as, u64 addr) spin_lock(&pfdev->as_lock); list_for_each_entry(mmu, &pfdev->as_lru_list, list) { if (as == mmu->as) - break; + goto found_mmu; } - if (as != mmu->as) - goto out; + goto out; +found_mmu: priv = container_of(mmu, struct panfrost_file_priv, mmu); spin_lock(&priv->mm_lock); @@ -432,7 +432,8 @@ out: #define NUM_FAULT_PAGES (SZ_2M / PAGE_SIZE) -int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr) +static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, + u64 addr) { int ret, i; struct panfrost_gem_object *bo; diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c index 83c57d325ca8..2dba192bf198 100644 --- a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c +++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c @@ -16,6 +16,7 @@ #include "panfrost_issues.h" #include "panfrost_job.h" #include "panfrost_mmu.h" +#include "panfrost_perfcnt.h" #include "panfrost_regs.h" #define COUNTERS_PER_BLOCK 64 diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index daff9a2af3be..acabeaf28732 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -6965,8 +6965,8 @@ static int cik_irq_init(struct radeon_device *rdev) } /* setup interrupt control */ - /* XXX this should actually be a bus address, not an MC address. same on older asics */ - WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index e937cc01910d..033bc466a862 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3696,8 +3696,8 @@ int r600_irq_init(struct radeon_device *rdev) } /* setup interrupt control */ - /* set dummy read address to ring address */ - WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 41f08321a361..fd74e2611185 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -379,10 +379,6 @@ radeon_pci_remove(struct pci_dev *pdev) static void radeon_pci_shutdown(struct pci_dev *pdev) { -#ifdef CONFIG_PPC64 - struct drm_device *ddev = pci_get_drvdata(pdev); -#endif - /* if we are running in a VM, make sure the device * torn down properly on reboot/shutdown */ @@ -390,13 +386,14 @@ radeon_pci_shutdown(struct pci_dev *pdev) radeon_pci_remove(pdev); #ifdef CONFIG_PPC64 - /* Some adapters need to be suspended before a + /* + * Some adapters need to be suspended before a * shutdown occurs in order to prevent an error * during kexec. * Make this power specific becauase it breaks * some non-power boards. */ - radeon_suspend_kms(ddev, true, true, false); + radeon_suspend_kms(pci_get_drvdata(pdev), true, true, false); #endif } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 05894d198a79..1d8efb0eefdb 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -5997,8 +5997,8 @@ static int si_irq_init(struct radeon_device *rdev) } /* setup interrupt control */ - /* set dummy read address to ring address */ - WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 460fd98e40a7..a0b382a637a6 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -1958,6 +1958,7 @@ static void si_initialize_powertune_defaults(struct radeon_device *rdev) case 0x682C: si_pi->cac_weights = cac_weights_cape_verde_pro; si_pi->dte_data = dte_data_sun_xt; + update_dte_from_pl2 = true; break; case 0x6825: case 0x6827: diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 1a5153197fe9..461a7a8129f4 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -23,6 +23,7 @@ #include <linux/kthread.h> #include <linux/slab.h> +#include <linux/completion.h> #include <drm/drm_print.h> #include <drm/gpu_scheduler.h> @@ -68,6 +69,8 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, if (!entity->rq_list) return -ENOMEM; + init_completion(&entity->entity_idle); + for (i = 0; i < num_rq_list; ++i) entity->rq_list[i] = rq_list[i]; @@ -286,11 +289,12 @@ void drm_sched_entity_fini(struct drm_sched_entity *entity) */ if (spsc_queue_count(&entity->job_queue)) { if (sched) { - /* Park the kernel for a moment to make sure it isn't processing - * our enity. + /* + * Wait for thread to idle to make sure it isn't processing + * this entity. */ - kthread_park(sched->thread); - kthread_unpark(sched->thread); + wait_for_completion(&entity->entity_idle); + } if (entity->dependency) { dma_fence_remove_callback(entity->dependency, diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 2af64459b3d7..3c57e84222ca 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -47,6 +47,7 @@ #include <linux/kthread.h> #include <linux/wait.h> #include <linux/sched.h> +#include <linux/completion.h> #include <uapi/linux/sched/types.h> #include <drm/drm_print.h> @@ -134,6 +135,7 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq) list_for_each_entry_continue(entity, &rq->entities, list) { if (drm_sched_entity_is_ready(entity)) { rq->current_entity = entity; + reinit_completion(&entity->entity_idle); spin_unlock(&rq->lock); return entity; } @@ -144,6 +146,7 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq) if (drm_sched_entity_is_ready(entity)) { rq->current_entity = entity; + reinit_completion(&entity->entity_idle); spin_unlock(&rq->lock); return entity; } @@ -496,8 +499,10 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched) fence = sched->ops->run_job(s_job); if (IS_ERR_OR_NULL(fence)) { + if (IS_ERR(fence)) + dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); + s_job->s_fence->parent = NULL; - dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); } else { s_job->s_fence->parent = fence; } @@ -645,9 +650,13 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) struct drm_sched_job *job; unsigned long flags; - /* Don't destroy jobs while the timeout worker is running */ - if (sched->timeout != MAX_SCHEDULE_TIMEOUT && - !cancel_delayed_work(&sched->work_tdr)) + /* + * Don't destroy jobs while the timeout worker is running OR thread + * is being parked and hence assumed to not touch ring_mirror_list + */ + if ((sched->timeout != MAX_SCHEDULE_TIMEOUT && + !cancel_delayed_work(&sched->work_tdr)) || + __kthread_should_park(sched->thread)) return NULL; spin_lock_irqsave(&sched->job_list_lock, flags); @@ -724,6 +733,9 @@ static int drm_sched_main(void *param) continue; sched_job = drm_sched_entity_pop_job(entity); + + complete(&entity->entity_idle); + if (!sched_job) continue; @@ -746,8 +758,9 @@ static int drm_sched_main(void *param) r); dma_fence_put(fence); } else { + if (IS_ERR(fence)) + dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); - dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); drm_sched_process_job(NULL, &sched_job->cb); } diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile index 01fc670ce7a2..caea2a099496 100644 --- a/drivers/gpu/drm/ttm/Makefile +++ b/drivers/gpu/drm/ttm/Makefile @@ -4,8 +4,8 @@ ttm-y := ttm_memory.o ttm_tt.o ttm_bo.o \ ttm_bo_util.o ttm_bo_vm.o ttm_module.o \ - ttm_execbuf_util.o ttm_page_alloc.o ttm_bo_manager.o \ - ttm_page_alloc_dma.o + ttm_execbuf_util.o ttm_page_alloc.o ttm_bo_manager.o ttm-$(CONFIG_AGP) += ttm_agp_backend.o +ttm-$(CONFIG_DRM_TTM_DMA_PAGE_POOL) += ttm_page_alloc_dma.o obj-$(CONFIG_DRM_TTM) += ttm.o diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index ff54e7609e8f..bf876faea592 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c @@ -33,7 +33,6 @@ * when freed). */ -#if defined(CONFIG_SWIOTLB) || defined(CONFIG_INTEL_IOMMU) #define pr_fmt(fmt) "[TTM] " fmt #include <linux/dma-mapping.h> @@ -1238,5 +1237,3 @@ int ttm_dma_page_alloc_debugfs(struct seq_file *m, void *data) return 0; } EXPORT_SYMBOL_GPL(ttm_dma_page_alloc_debugfs); - -#endif diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 4c4b59ae2c81..549dde83408b 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -560,14 +560,17 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, if (args->bcl_start != args->bcl_end) { bin = kcalloc(1, sizeof(*bin), GFP_KERNEL); - if (!bin) + if (!bin) { + v3d_job_put(&render->base); return -ENOMEM; + } ret = v3d_job_init(v3d, file_priv, &bin->base, v3d_job_free, args->in_sync_bcl); if (ret) { kfree(bin); v3d_job_put(&render->base); + kfree(bin); return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 81a95651643f..e962048f65d2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -576,8 +576,7 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) else dev_priv->map_mode = vmw_dma_map_populate; - /* No TTM coherent page pool? FIXME: Ask TTM instead! */ - if (!(IS_ENABLED(CONFIG_SWIOTLB) || IS_ENABLED(CONFIG_INTEL_IOMMU)) && + if (!IS_ENABLED(CONFIG_DRM_TTM_DMA_PAGE_POOL) && (dev_priv->map_mode == vmw_dma_alloc_coherent)) return -EINVAL; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 8e95e8a31df8..32b9131b2bae 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -362,7 +362,6 @@ static void vmw_hw_surface_destroy(struct vmw_resource *res) { struct vmw_private *dev_priv = res->dev_priv; - struct vmw_surface *srf; void *cmd; if (res->func->destroy == vmw_gb_surface_destroy) { @@ -386,7 +385,6 @@ static void vmw_hw_surface_destroy(struct vmw_resource *res) */ mutex_lock(&dev_priv->cmdbuf_mutex); - srf = vmw_res_to_srf(res); dev_priv->used_memory_size -= res->backup_size; mutex_unlock(&dev_priv->cmdbuf_mutex); } |