diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
52 files changed, 739 insertions, 354 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 56da8920195d..99e5f474505d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -189,6 +189,8 @@ extern int sched_policy; static const int sched_policy = KFD_SCHED_POLICY_HWS; #endif +extern int amdgpu_tmz; + #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; #endif @@ -202,8 +204,6 @@ extern int amdgpu_cik_support; #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ #define AMDGPU_FENCE_JIFFIES_TIMEOUT (HZ / 2) -/* AMDGPU_IB_POOL_SIZE must be a power of 2 */ -#define AMDGPU_IB_POOL_SIZE 16 #define AMDGPU_DEBUGFS_MAX_COMPONENTS 32 #define AMDGPUFB_CONN_LIMIT 4 #define AMDGPU_BIOS_NUM_SCRATCH 16 @@ -400,13 +400,6 @@ struct amdgpu_sa_bo { int amdgpu_fence_slab_init(void); void amdgpu_fence_slab_fini(void); -enum amdgpu_ib_pool_type { - AMDGPU_IB_POOL_NORMAL = 0, - AMDGPU_IB_POOL_VM, - AMDGPU_IB_POOL_DIRECT, - - AMDGPU_IB_POOL_MAX -}; /* * IRQS. */ @@ -864,7 +857,7 @@ struct amdgpu_device { unsigned num_rings; struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; bool ib_pool_ready; - struct amdgpu_sa_manager ring_tmp_bo[AMDGPU_IB_POOL_MAX]; + struct amdgpu_sa_manager ib_pools[AMDGPU_IB_POOL_MAX]; struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX]; /* interrupts */ @@ -945,7 +938,7 @@ struct amdgpu_device { atomic64_t gart_pin_size; /* soc15 register offset based on ip, instance and segment */ - uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; + uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; /* delayed work_func for deferring clockgating during resume */ struct delayed_work delayed_init_work; @@ -1263,5 +1256,9 @@ _name##_show(struct device *dev, \ \ static struct device_attribute pmu_attr_##_name = __ATTR_RO(_name) -#endif +static inline bool amdgpu_is_tmz(struct amdgpu_device *adev) +{ + return adev->gmc.tmz_enabled; +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index abfbe89e805e..ad59ac4423b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -564,6 +564,13 @@ uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd) return adev->gds.gws_size; } +uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + return adev->rev_id; +} + int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 13feb313e9b3..d065c50582eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -175,6 +175,7 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_unique_id(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd); uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd); +uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd); uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src); /* Read user wptr from a specified user address space with page fault diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index d1495e1c9289..d9b35df33806 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -40,7 +40,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, for (i = 0; i < n; i++) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence, - false, false); + false, false, false); if (r) goto exit_do_move; r = dma_fence_wait(fence, false); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 3eee5c7d83e0..7653f62b1b2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -924,7 +924,8 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, ring = to_amdgpu_ring(entity->rq->sched); r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ? - chunk_ib->ib_bytes : 0, AMDGPU_IB_POOL_NORMAL, ib); + chunk_ib->ib_bytes : 0, + AMDGPU_IB_POOL_DELAYED, ib); if (r) { DRM_ERROR("Failed to get ib !\n"); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f9b315e7e004..b038ddbb2ece 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1140,6 +1140,8 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); + amdgpu_gmc_tmz_set(adev); + return 0; } @@ -2998,7 +3000,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); adev->gfx.gfx_off_req_count = 1; - adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false; + adev->pm.ac_power = power_supply_is_system_supplied() > 0; /* Registers mapping */ /* TODO: block userspace mapping of io register */ @@ -3432,15 +3434,12 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) } } - amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); - amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); - - amdgpu_amdkfd_suspend(adev, !fbcon); - amdgpu_ras_suspend(adev); r = amdgpu_device_ip_suspend_phase1(adev); + amdgpu_amdkfd_suspend(adev, !fbcon); + /* evict vram memory */ amdgpu_bo_evict_vram(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8ea86ffdea0d..719a963c31a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -85,9 +85,10 @@ * - 3.34.0 - Non-DC can flip correctly between buffers with different pitches * - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask * - 3.36.0 - Allow reading more status registers on si/cik + * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 36 +#define KMS_DRIVER_MINOR 37 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; @@ -144,6 +145,7 @@ int amdgpu_discovery = -1; int amdgpu_mes = 0; int amdgpu_noretry; int amdgpu_force_asic_type = -1; +int amdgpu_tmz = 0; struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), @@ -687,13 +689,12 @@ MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (defau /** * DOC: hws_gws_support(bool) - * Whether HWS support gws barriers. Default value: false (not supported) - * This will be replaced with a MEC firmware version check once firmware - * is ready + * Assume that HWS supports GWS barriers regardless of what firmware version + * check says. Default value: false (rely on MEC2 firmware version check). */ bool hws_gws_support; module_param(hws_gws_support, bool, 0444); -MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)"); +MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false = rely on FW version check (Default), true = force supported)"); /** * DOC: queue_preemption_timeout_ms (int) @@ -728,6 +729,16 @@ uint amdgpu_dm_abm_level = 0; MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) "); module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444); +/** + * DOC: tmz (int) + * Trusted Memory Zone (TMZ) is a method to protect data being written + * to or read from memory. + * + * The default value: 0 (off). TODO: change to auto till it is completed. + */ +MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)"); +module_param_named(tmz, amdgpu_tmz, int, 0444); + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 7531527067df..d878fe7fee51 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -192,14 +192,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, * Used For polling fence. * Returns 0 on success, -ENOMEM on failure. */ -int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, + uint32_t timeout) { uint32_t seq; + signed long r; if (!s) return -EINVAL; seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + return -ETIMEDOUT; + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 245aec521388..4ed9958af94e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -234,7 +234,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_VRAM_CLEARED | AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | - AMDGPU_GEM_CREATE_EXPLICIT_SYNC)) + AMDGPU_GEM_CREATE_EXPLICIT_SYNC | + AMDGPU_GEM_CREATE_ENCRYPTED)) return -EINVAL; @@ -242,6 +243,11 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK) return -EINVAL; + if (!amdgpu_is_tmz(adev) && (flags & AMDGPU_GEM_CREATE_ENCRYPTED)) { + DRM_NOTE_ONCE("Cannot allocate secure buffer since TMZ is disabled\n"); + return -EINVAL; + } + /* create a gem object to contain this object in */ if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a721b0e0ff69..0103acc57474 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -675,13 +675,15 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) spin_lock_irqsave(&kiq->ring_lock, flags); if (amdgpu_device_wb_get(adev, ®_val_offs)) { - spin_unlock_irqrestore(&kiq->ring_lock, flags); pr_err("critical bug! too many kiq readers\n"); - goto failed_kiq_read; + goto failed_unlock; } amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) + goto failed_undo; + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -712,7 +714,13 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) amdgpu_device_wb_free(adev, reg_val_offs); return value; +failed_undo: + amdgpu_ring_undo(ring); +failed_unlock: + spin_unlock_irqrestore(&kiq->ring_lock, flags); failed_kiq_read: + if (reg_val_offs) + amdgpu_device_wb_free(adev, reg_val_offs); pr_err("failed to read reg:%x\n", reg); return ~0; } @@ -730,7 +738,10 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) + goto failed_undo; + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -759,6 +770,9 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) return; +failed_undo: + amdgpu_ring_undo(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); failed_kiq_write: pr_err("failed to write reg:%x\n", reg); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index a8ca808f453b..acabb57aa8af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -373,3 +373,38 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) return 0; } + +/** + * amdgpu_tmz_set -- check and set if a device supports TMZ + * @adev: amdgpu_device pointer + * + * Check and set if an the device @adev supports Trusted Memory + * Zones (TMZ). + */ +void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_RAVEN: + case CHIP_RENOIR: + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + /* Don't enable it by default yet. + */ + if (amdgpu_tmz < 1) { + adev->gmc.tmz_enabled = false; + dev_info(adev->dev, + "Trusted Memory Zone (TMZ) feature disabled as experimental (default)\n"); + } else { + adev->gmc.tmz_enabled = true; + dev_info(adev->dev, + "Trusted Memory Zone (TMZ) feature enabled as experimental (cmd line)\n"); + } + break; + default: + adev->gmc.tmz_enabled = false; + dev_warn(adev->dev, + "Trusted Memory Zone (TMZ) feature not supported\n"); + break; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 7546da0cc70c..2bd9423c1dab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -213,6 +213,8 @@ struct amdgpu_gmc { } fault_hash[AMDGPU_GMC_FAULT_HASH_SIZE]; uint64_t last_fault:AMDGPU_GMC_FAULT_RING_ORDER; + bool tmz_enabled; + const struct amdgpu_gmc_funcs *gmc_funcs; struct amdgpu_xgmi xgmi; @@ -276,4 +278,6 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev); void amdgpu_gmc_ras_fini(struct amdgpu_device *adev); int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev); +extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 1adaac972190..c24366aacf3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -61,14 +61,13 @@ * Returns 0 on success, error on failure. */ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, - unsigned size, - enum amdgpu_ib_pool_type pool_type, - struct amdgpu_ib *ib) + unsigned size, enum amdgpu_ib_pool_type pool_type, + struct amdgpu_ib *ib) { int r; if (size) { - r = amdgpu_sa_bo_new(&adev->ring_tmp_bo[pool_type], + r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type], &ib->sa_bo, size, 256); if (r) { dev_err(adev->dev, "failed to get a new IB (%d)\n", r); @@ -133,6 +132,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, uint64_t fence_ctx; uint32_t status = 0, alloc_size; unsigned fence_flags = 0; + bool secure; unsigned i; int r = 0; @@ -161,6 +161,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, return -EINVAL; } + if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) && + (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)) { + dev_err(adev->dev, "secure submissions not supported on compute rings\n"); + return -EINVAL; + } + alloc_size = ring->funcs->emit_frame_size + num_ibs * ring->funcs->emit_ib_size; @@ -217,6 +223,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, amdgpu_ring_emit_cntxcntl(ring, status); } + /* Setup initial TMZiness and send it off. + */ + secure = false; + if (job && ring->funcs->emit_frame_cntl) { + secure = ib->flags & AMDGPU_IB_FLAGS_SECURE; + amdgpu_ring_emit_frame_cntl(ring, true, secure); + } + for (i = 0; i < num_ibs; ++i) { ib = &ibs[i]; @@ -228,12 +242,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ continue; + if (job && ring->funcs->emit_frame_cntl) { + if (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) { + amdgpu_ring_emit_frame_cntl(ring, false, secure); + secure = !secure; + amdgpu_ring_emit_frame_cntl(ring, true, secure); + } + } + amdgpu_ring_emit_ib(ring, job, ib, status); status &= ~AMDGPU_HAVE_CTX_SWITCH; } - if (ring->funcs->emit_tmz) - amdgpu_ring_emit_tmz(ring, false); + if (job && ring->funcs->emit_frame_cntl) + amdgpu_ring_emit_frame_cntl(ring, false, secure); #ifdef CONFIG_X86_64 if (!(adev->flags & AMD_IS_APU)) @@ -282,30 +304,32 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, */ int amdgpu_ib_pool_init(struct amdgpu_device *adev) { - int r, i; unsigned size; + int r, i; - if (adev->ib_pool_ready) { + if (adev->ib_pool_ready) return 0; - } + for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) { if (i == AMDGPU_IB_POOL_DIRECT) size = PAGE_SIZE * 2; else - size = AMDGPU_IB_POOL_SIZE*64*1024; - r = amdgpu_sa_bo_manager_init(adev, &adev->ring_tmp_bo[i], - size, - AMDGPU_GPU_PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT); - if (r) { - for (i--; i >= 0; i--) - amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo[i]); - return r; - } + size = AMDGPU_IB_POOL_SIZE; + + r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i], + size, AMDGPU_GPU_PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT); + if (r) + goto error; } adev->ib_pool_ready = true; return 0; + +error: + while (i--) + amdgpu_sa_bo_manager_fini(adev, &adev->ib_pools[i]); + return r; } /** @@ -320,11 +344,12 @@ void amdgpu_ib_pool_fini(struct amdgpu_device *adev) { int i; - if (adev->ib_pool_ready) { - for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) - amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo[i]); - adev->ib_pool_ready = false; - } + if (!adev->ib_pool_ready) + return; + + for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) + amdgpu_sa_bo_manager_fini(adev, &adev->ib_pools[i]); + adev->ib_pool_ready = false; } /** @@ -339,9 +364,9 @@ void amdgpu_ib_pool_fini(struct amdgpu_device *adev) */ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) { - unsigned i; - int r, ret = 0; long tmo_gfx, tmo_mm; + int r, ret = 0; + unsigned i; tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT; if (amdgpu_sriov_vf(adev)) { @@ -419,15 +444,16 @@ static int amdgpu_debugfs_sa_info(struct seq_file *m, void *data) struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; - seq_printf(m, "-------------------- NORMAL -------------------- \n"); - amdgpu_sa_bo_dump_debug_info(&adev->ring_tmp_bo[AMDGPU_IB_POOL_NORMAL], m); - seq_printf(m, "---------------------- VM ---------------------- \n"); - amdgpu_sa_bo_dump_debug_info(&adev->ring_tmp_bo[AMDGPU_IB_POOL_VM], m); - seq_printf(m, "-------------------- DIRECT--------------------- \n"); - amdgpu_sa_bo_dump_debug_info(&adev->ring_tmp_bo[AMDGPU_IB_POOL_DIRECT], m); + seq_printf(m, "--------------------- DELAYED --------------------- \n"); + amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DELAYED], + m); + seq_printf(m, "-------------------- IMMEDIATE -------------------- \n"); + amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_IMMEDIATE], + m); + seq_printf(m, "--------------------- DIRECT ---------------------- \n"); + amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DIRECT], m); return 0; - } static const struct drm_info_list amdgpu_debugfs_sa_list[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 3a67f6c046d4..fe92dcd94d4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -282,7 +282,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, !dma_fence_is_later(updates, (*id)->flushed_updates)) updates = NULL; - if ((*id)->owner != vm->direct.fence_context || + if ((*id)->owner != vm->immediate.fence_context || job->vm_pd_addr != (*id)->pd_gpu_addr || updates || !(*id)->last_flush || ((*id)->last_flush->context != fence_context && @@ -349,7 +349,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, struct dma_fence *flushed; /* Check all the prerequisites to using this VMID */ - if ((*id)->owner != vm->direct.fence_context) + if ((*id)->owner != vm->immediate.fence_context) continue; if ((*id)->pd_gpu_addr != job->vm_pd_addr) @@ -448,7 +448,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, } id->pd_gpu_addr = job->vm_pd_addr; - id->owner = vm->direct.fence_context; + id->owner = vm->immediate.fence_context; if (job->vm_needs_flush) { dma_fence_put(id->last_flush); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index d4262069d501..81caac9b958a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -62,7 +62,6 @@ struct amdgpu_job { /* user fence handling */ uint64_t uf_addr; uint64_t uf_sequence; - }; int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index c687f5415b3f..3d822eba9a5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -753,7 +753,7 @@ int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence) return amdgpu_copy_buffer(ring, shadow_addr, parent_addr, amdgpu_bo_size(shadow), NULL, fence, - true, false); + true, false, false); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 5e39ecd8cc28..7d41f7b9a340 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -229,6 +229,17 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; } +/** + * amdgpu_bo_encrypted - test if the BO is encrypted + * @bo: pointer to a buffer object + * + * Return true if the buffer object is encrypted, false otherwise. + */ +static inline bool amdgpu_bo_encrypted(struct amdgpu_bo *bo) +{ + return bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED; +} + bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 49e2e43f2e4a..c762deb5abc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -3271,26 +3271,27 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) return ret; } - - ret = device_create_file(adev->dev, &dev_attr_pp_num_states); - if (ret) { - DRM_ERROR("failed to create device file pp_num_states\n"); - return ret; - } - ret = device_create_file(adev->dev, &dev_attr_pp_cur_state); - if (ret) { - DRM_ERROR("failed to create device file pp_cur_state\n"); - return ret; - } - ret = device_create_file(adev->dev, &dev_attr_pp_force_state); - if (ret) { - DRM_ERROR("failed to create device file pp_force_state\n"); - return ret; - } - ret = device_create_file(adev->dev, &dev_attr_pp_table); - if (ret) { - DRM_ERROR("failed to create device file pp_table\n"); - return ret; + if (!amdgpu_sriov_vf(adev)) { + ret = device_create_file(adev->dev, &dev_attr_pp_num_states); + if (ret) { + DRM_ERROR("failed to create device file pp_num_states\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_pp_cur_state); + if (ret) { + DRM_ERROR("failed to create device file pp_cur_state\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_pp_force_state); + if (ret) { + DRM_ERROR("failed to create device file pp_force_state\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_pp_table); + if (ret) { + DRM_ERROR("failed to create device file pp_table\n"); + return ret; + } } ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk); @@ -3337,6 +3338,13 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) return ret; } } + + /* the reset are not needed for SRIOV one vf mode */ + if (amdgpu_sriov_vf(adev)) { + adev->pm.sysfs_initialized = true; + return ret; + } + if (adev->asic_type != CHIP_ARCTURUS) { ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie); if (ret) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index e1d894f0d654..107e80063553 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -50,6 +50,8 @@ #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched) +#define AMDGPU_IB_POOL_SIZE (1024 * 1024) + enum amdgpu_ring_type { AMDGPU_RING_TYPE_GFX = AMDGPU_HW_IP_GFX, AMDGPU_RING_TYPE_COMPUTE = AMDGPU_HW_IP_COMPUTE, @@ -63,6 +65,17 @@ enum amdgpu_ring_type { AMDGPU_RING_TYPE_KIQ }; +enum amdgpu_ib_pool_type { + /* Normal submissions to the top of the pipeline. */ + AMDGPU_IB_POOL_DELAYED, + /* Immediate submissions to the bottom of the pipeline. */ + AMDGPU_IB_POOL_IMMEDIATE, + /* Direct submission to the ring buffer during init and reset. */ + AMDGPU_IB_POOL_DIRECT, + + AMDGPU_IB_POOL_MAX +}; + struct amdgpu_device; struct amdgpu_ring; struct amdgpu_ib; @@ -105,7 +118,8 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); void amdgpu_fence_driver_resume(struct amdgpu_device *adev); int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, unsigned flags); -int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, + uint32_t timeout); bool amdgpu_fence_process(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, @@ -176,7 +190,8 @@ struct amdgpu_ring_funcs { void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring, uint32_t reg0, uint32_t reg1, uint32_t ref, uint32_t mask); - void (*emit_tmz)(struct amdgpu_ring *ring, bool start); + void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start, + bool secure); /* Try to soft recover the ring to make the fence signal */ void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid); int (*preempt_ib)(struct amdgpu_ring *ring); @@ -255,7 +270,7 @@ struct amdgpu_ring { #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) -#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) +#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 2a152516504a..e5b8fb8e75c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -89,7 +89,8 @@ struct amdgpu_buffer_funcs { /* dst addr in bytes */ uint64_t dst_offset, /* number of byte to transfer */ - uint32_t byte_count); + uint32_t byte_count, + bool tmz); /* maximum bytes in a single operation */ uint32_t fill_max_bytes; @@ -107,7 +108,7 @@ struct amdgpu_buffer_funcs { uint32_t byte_count); }; -#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b)) +#define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t)) #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) struct amdgpu_sdma_instance * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index b86392253696..b87ca171986a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -249,6 +249,11 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, owner != AMDGPU_FENCE_OWNER_UNDEFINED) continue; + /* Never sync to VM updates either. */ + if (fence_owner == AMDGPU_FENCE_OWNER_VM && + owner != AMDGPU_FENCE_OWNER_UNDEFINED) + continue; + /* Ignore fences depending on the sync mode */ switch (mode) { case AMDGPU_SYNC_ALWAYS: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index b158230af8db..2f4d5ca9894f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -44,7 +44,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) /* Number of tests = * (Total GTT - IB pool - writeback page - ring buffers) / test size */ - n = adev->gmc.gart_size - AMDGPU_IB_POOL_SIZE*64*1024; + n = adev->gmc.gart_size - AMDGPU_IB_POOL_SIZE; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) if (adev->rings[i]) n -= adev->rings[i]->ring_size; @@ -124,7 +124,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) amdgpu_bo_kunmap(gtt_obj[i]); r = amdgpu_copy_buffer(ring, gart_addr, vram_addr, - size, NULL, &fence, false, false); + size, NULL, &fence, false, false, false); if (r) { DRM_ERROR("Failed GTT->VRAM copy %d\n", i); @@ -170,7 +170,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) amdgpu_bo_kunmap(vram_obj); r = amdgpu_copy_buffer(ring, vram_addr, gart_addr, - size, NULL, &fence, false, false); + size, NULL, &fence, false, false, false); if (r) { DRM_ERROR("Failed VRAM->GTT copy %d\n", i); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 6880c023ca8b..d5543c25f3c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -62,11 +62,6 @@ #define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128 -static int amdgpu_map_buffer(struct ttm_buffer_object *bo, - struct ttm_mem_reg *mem, unsigned num_pages, - uint64_t offset, unsigned window, - struct amdgpu_ring *ring, - uint64_t *addr); /** * amdgpu_init_mem_type - Initialize a memory manager for a specific type of @@ -277,7 +272,7 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, * */ static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, - unsigned long *offset) + uint64_t *offset) { struct drm_mm_node *mm_node = mem->mm_node; @@ -289,91 +284,191 @@ static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, } /** + * amdgpu_ttm_map_buffer - Map memory into the GART windows + * @bo: buffer object to map + * @mem: memory object to map + * @mm_node: drm_mm node object to map + * @num_pages: number of pages to map + * @offset: offset into @mm_node where to start + * @window: which GART window to use + * @ring: DMA ring to use for the copy + * @tmz: if we should setup a TMZ enabled mapping + * @addr: resulting address inside the MC address space + * + * Setup one of the GART windows to access a specific piece of memory or return + * the physical address for local memory. + */ +static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, + struct ttm_mem_reg *mem, + struct drm_mm_node *mm_node, + unsigned num_pages, uint64_t offset, + unsigned window, struct amdgpu_ring *ring, + bool tmz, uint64_t *addr) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_job *job; + unsigned num_dw, num_bytes; + struct dma_fence *fence; + uint64_t src_addr, dst_addr; + void *cpu_addr; + uint64_t flags; + unsigned int i; + int r; + + BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < + AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); + + /* Map only what can't be accessed directly */ + if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) { + *addr = amdgpu_mm_node_addr(bo, mm_node, mem) + offset; + return 0; + } + + *addr = adev->gmc.gart_start; + *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * + AMDGPU_GPU_PAGE_SIZE; + *addr += offset & ~PAGE_MASK; + + num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); + num_bytes = num_pages * 8; + + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, + AMDGPU_IB_POOL_DELAYED, &job); + if (r) + return r; + + src_addr = num_dw * 4; + src_addr += job->ibs[0].gpu_addr; + + dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); + dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; + amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, + dst_addr, num_bytes, false); + + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + WARN_ON(job->ibs[0].length_dw > num_dw); + + flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem); + if (tmz) + flags |= AMDGPU_PTE_TMZ; + + cpu_addr = &job->ibs[0].ptr[num_dw]; + + if (mem->mem_type == TTM_PL_TT) { + struct ttm_dma_tt *dma; + dma_addr_t *dma_address; + + dma = container_of(bo->ttm, struct ttm_dma_tt, ttm); + dma_address = &dma->dma_address[offset >> PAGE_SHIFT]; + r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, + cpu_addr); + if (r) + goto error_free; + } else { + dma_addr_t dma_address; + + dma_address = (mm_node->start << PAGE_SHIFT) + offset; + dma_address += adev->vm_manager.vram_base_offset; + + for (i = 0; i < num_pages; ++i) { + r = amdgpu_gart_map(adev, i << PAGE_SHIFT, 1, + &dma_address, flags, cpu_addr); + if (r) + goto error_free; + + dma_address += PAGE_SIZE; + } + } + + r = amdgpu_job_submit(job, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, &fence); + if (r) + goto error_free; + + dma_fence_put(fence); + + return r; + +error_free: + amdgpu_job_free(job); + return r; +} + +/** * amdgpu_copy_ttm_mem_to_mem - Helper function for copy + * @adev: amdgpu device + * @src: buffer/address where to read from + * @dst: buffer/address where to write to + * @size: number of bytes to copy + * @tmz: if a secure copy should be used + * @resv: resv object to sync to + * @f: Returns the last fence if multiple jobs are submitted. * * The function copies @size bytes from {src->mem + src->offset} to * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a * move and different for a BO to BO copy. * - * @f: Returns the last fence if multiple jobs are submitted. */ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, - struct amdgpu_copy_mem *src, - struct amdgpu_copy_mem *dst, - uint64_t size, + const struct amdgpu_copy_mem *src, + const struct amdgpu_copy_mem *dst, + uint64_t size, bool tmz, struct dma_resv *resv, struct dma_fence **f) { + const uint32_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * + AMDGPU_GPU_PAGE_SIZE); + + uint64_t src_node_size, dst_node_size, src_offset, dst_offset; struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct drm_mm_node *src_mm, *dst_mm; - uint64_t src_node_start, dst_node_start, src_node_size, - dst_node_size, src_page_offset, dst_page_offset; struct dma_fence *fence = NULL; int r = 0; - const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * - AMDGPU_GPU_PAGE_SIZE); if (!adev->mman.buffer_funcs_enabled) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; } - src_mm = amdgpu_find_mm_node(src->mem, &src->offset); - src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) + - src->offset; - src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset; - src_page_offset = src_node_start & (PAGE_SIZE - 1); + src_offset = src->offset; + src_mm = amdgpu_find_mm_node(src->mem, &src_offset); + src_node_size = (src_mm->size << PAGE_SHIFT) - src_offset; - dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset); - dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) + - dst->offset; - dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset; - dst_page_offset = dst_node_start & (PAGE_SIZE - 1); + dst_offset = dst->offset; + dst_mm = amdgpu_find_mm_node(dst->mem, &dst_offset); + dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst_offset; mutex_lock(&adev->mman.gtt_window_lock); while (size) { - unsigned long cur_size; - uint64_t from = src_node_start, to = dst_node_start; + uint32_t src_page_offset = src_offset & ~PAGE_MASK; + uint32_t dst_page_offset = dst_offset & ~PAGE_MASK; struct dma_fence *next; + uint32_t cur_size; + uint64_t from, to; /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst * begins at an offset, then adjust the size accordingly */ - cur_size = min3(min(src_node_size, dst_node_size), size, - GTT_MAX_BYTES); - if (cur_size + src_page_offset > GTT_MAX_BYTES || - cur_size + dst_page_offset > GTT_MAX_BYTES) - cur_size -= max(src_page_offset, dst_page_offset); - - /* Map only what needs to be accessed. Map src to window 0 and - * dst to window 1 - */ - if (src->mem->start == AMDGPU_BO_INVALID_OFFSET) { - r = amdgpu_map_buffer(src->bo, src->mem, - PFN_UP(cur_size + src_page_offset), - src_node_start, 0, ring, - &from); - if (r) - goto error; - /* Adjust the offset because amdgpu_map_buffer returns - * start of mapped page - */ - from += src_page_offset; - } + cur_size = max(src_page_offset, dst_page_offset); + cur_size = min(min3(src_node_size, dst_node_size, size), + (uint64_t)(GTT_MAX_BYTES - cur_size)); + + /* Map src to window 0 and dst to window 1. */ + r = amdgpu_ttm_map_buffer(src->bo, src->mem, src_mm, + PFN_UP(cur_size + src_page_offset), + src_offset, 0, ring, tmz, &from); + if (r) + goto error; - if (dst->mem->start == AMDGPU_BO_INVALID_OFFSET) { - r = amdgpu_map_buffer(dst->bo, dst->mem, - PFN_UP(cur_size + dst_page_offset), - dst_node_start, 1, ring, - &to); - if (r) - goto error; - to += dst_page_offset; - } + r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, dst_mm, + PFN_UP(cur_size + dst_page_offset), + dst_offset, 1, ring, tmz, &to); + if (r) + goto error; r = amdgpu_copy_buffer(ring, from, to, cur_size, - resv, &next, false, true); + resv, &next, false, true, tmz); if (r) goto error; @@ -386,23 +481,20 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, src_node_size -= cur_size; if (!src_node_size) { - src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm, - src->mem); - src_node_size = (src_mm->size << PAGE_SHIFT); - src_page_offset = 0; + ++src_mm; + src_node_size = src_mm->size << PAGE_SHIFT; + src_offset = 0; } else { - src_node_start += cur_size; - src_page_offset = src_node_start & (PAGE_SIZE - 1); + src_offset += cur_size; } + dst_node_size -= cur_size; if (!dst_node_size) { - dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm, - dst->mem); - dst_node_size = (dst_mm->size << PAGE_SHIFT); - dst_page_offset = 0; + ++dst_mm; + dst_node_size = dst_mm->size << PAGE_SHIFT; + dst_offset = 0; } else { - dst_node_start += cur_size; - dst_page_offset = dst_node_start & (PAGE_SIZE - 1); + dst_offset += cur_size; } } error: @@ -425,6 +517,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, struct ttm_mem_reg *old_mem) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); struct amdgpu_copy_mem src, dst; struct dma_fence *fence = NULL; int r; @@ -438,14 +531,14 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, new_mem->num_pages << PAGE_SHIFT, + amdgpu_bo_encrypted(abo), bo->base.resv, &fence); if (r) goto error; /* clear the space being freed */ if (old_mem->mem_type == TTM_PL_VRAM && - (ttm_to_amdgpu_bo(bo)->flags & - AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { + (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { struct dma_fence *wipe_fence = NULL; r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON, @@ -742,8 +835,8 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, unsigned long page_offset) { + uint64_t offset = (page_offset << PAGE_SHIFT); struct drm_mm_node *mm; - unsigned long offset = (page_offset << PAGE_SHIFT); mm = amdgpu_find_mm_node(&bo->mem, &offset); return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + @@ -1027,6 +1120,9 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, struct amdgpu_ttm_tt *gtt = (void *)ttm; int r; + if (amdgpu_bo_encrypted(abo)) + flags |= AMDGPU_PTE_TMZ; + if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) { uint64_t page_idx = 1; @@ -1539,6 +1635,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, switch (bo->mem.mem_type) { case TTM_PL_TT: + if (amdgpu_bo_is_amdgpu_bo(bo) && + amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo))) + return false; return true; case TTM_PL_VRAM: @@ -1587,8 +1686,9 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, if (bo->mem.mem_type != TTM_PL_VRAM) return -EIO; - nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset); - pos = (nodes->start << PAGE_SHIFT) + offset; + pos = offset; + nodes = amdgpu_find_mm_node(&abo->tbo.mem, &pos); + pos += (nodes->start << PAGE_SHIFT); while (len && pos < adev->gmc.mc_vram_size) { uint64_t aligned_pos = pos & ~(uint64_t)3; @@ -2015,76 +2115,14 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) return ttm_bo_mmap(filp, vma, &adev->mman.bdev); } -static int amdgpu_map_buffer(struct ttm_buffer_object *bo, - struct ttm_mem_reg *mem, unsigned num_pages, - uint64_t offset, unsigned window, - struct amdgpu_ring *ring, - uint64_t *addr) -{ - struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; - struct amdgpu_device *adev = ring->adev; - struct ttm_tt *ttm = bo->ttm; - struct amdgpu_job *job; - unsigned num_dw, num_bytes; - dma_addr_t *dma_address; - struct dma_fence *fence; - uint64_t src_addr, dst_addr; - uint64_t flags; - int r; - - BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < - AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); - - *addr = adev->gmc.gart_start; - *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * - AMDGPU_GPU_PAGE_SIZE; - - num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); - num_bytes = num_pages * 8; - - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, - AMDGPU_IB_POOL_NORMAL, &job); - if (r) - return r; - - src_addr = num_dw * 4; - src_addr += job->ibs[0].gpu_addr; - - dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); - dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; - amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, - dst_addr, num_bytes); - - amdgpu_ring_pad_ib(ring, &job->ibs[0]); - WARN_ON(job->ibs[0].length_dw > num_dw); - - dma_address = >t->ttm.dma_address[offset >> PAGE_SHIFT]; - flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem); - r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, - &job->ibs[0].ptr[num_dw]); - if (r) - goto error_free; - - r = amdgpu_job_submit(job, &adev->mman.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, &fence); - if (r) - goto error_free; - - dma_fence_put(fence); - - return r; - -error_free: - amdgpu_job_free(job); - return r; -} - int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, bool direct_submit, - bool vm_needs_flush) + bool vm_needs_flush, bool tmz) { + enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT : + AMDGPU_IB_POOL_DELAYED; struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; @@ -2102,8 +2140,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, - direct_submit ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_NORMAL, &job); + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, &job); if (r) return r; @@ -2125,7 +2162,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint32_t cur_size_in_bytes = min(byte_count, max_bytes); amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset, - dst_offset, cur_size_in_bytes); + dst_offset, cur_size_in_bytes, tmz); src_offset += cur_size_in_bytes; dst_offset += cur_size_in_bytes; @@ -2192,7 +2229,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, /* for IB padding */ num_dw += 64; - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_NORMAL, &job); + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED, + &job); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 6b22dc41ef13..4351d02644a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -97,11 +97,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, bool direct_submit, - bool vm_needs_flush); + bool vm_needs_flush, bool tmz); int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, - struct amdgpu_copy_mem *src, - struct amdgpu_copy_mem *dst, - uint64_t size, + const struct amdgpu_copy_mem *src, + const struct amdgpu_copy_mem *dst, + uint64_t size, bool tmz, struct dma_resv *resv, struct dma_fence **f); int amdgpu_fill_buffer(struct amdgpu_bo *bo, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 550282d9c1fc..5100ebe8858d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1056,8 +1056,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, goto err; } - r = amdgpu_job_alloc_with_ib(adev, 64, - direct ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_NORMAL, &job); + r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT : + AMDGPU_IB_POOL_DELAYED, &job); if (r) goto err; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index d090455282e5..ecaa2d7483b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -447,7 +447,7 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + AMDGPU_IB_POOL_DIRECT, &job); if (r) return r; @@ -526,7 +526,8 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - direct ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_NORMAL, &job); + direct ? AMDGPU_IB_POOL_DIRECT : + AMDGPU_IB_POOL_DELAYED, &job); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 8c10084f44ef..cbbb8d02535a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -60,7 +60,10 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, ref, mask); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) + goto failed_undo; + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -82,6 +85,9 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, return; +failed_undo: + amdgpu_ring_undo(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); failed_kiq: pr_err("failed to write reg %x wait reg %x\n", reg0, reg1); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index bcdaf5204d05..414a0b1c2e5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -726,7 +726,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) * @adev: amdgpu_device pointer * @vm: VM to clear BO from * @bo: BO to clear - * @direct: use a direct update + * @immediate: use an immediate update * * Root PD needs to be reserved when calling this. * @@ -736,7 +736,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo *bo, - bool direct) + bool immediate) { struct ttm_operation_ctx ctx = { true, false }; unsigned level = adev->vm_manager.root_level; @@ -795,7 +795,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; - params.direct = direct; + params.immediate = immediate; r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); if (r) @@ -850,11 +850,11 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, * @adev: amdgpu_device pointer * @vm: requesting vm * @level: the page table level - * @direct: use a direct update + * @immediate: use a immediate update * @bp: resulting BO allocation parameters */ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int level, bool direct, + int level, bool immediate, struct amdgpu_bo_param *bp) { memset(bp, 0, sizeof(*bp)); @@ -870,7 +870,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, else if (!vm->root.base.bo || vm->root.base.bo->shadow) bp->flags |= AMDGPU_GEM_CREATE_SHADOW; bp->type = ttm_bo_type_kernel; - bp->no_wait_gpu = direct; + bp->no_wait_gpu = immediate; if (vm->root.base.bo) bp->resv = vm->root.base.bo->tbo.base.resv; } @@ -881,7 +881,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, * @adev: amdgpu_device pointer * @vm: VM to allocate page tables for * @cursor: Which page table to allocate - * @direct: use a direct update + * @immediate: use an immediate update * * Make sure a specific page table or directory is allocated. * @@ -892,7 +892,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_vm_pt_cursor *cursor, - bool direct) + bool immediate) { struct amdgpu_vm_pt *entry = cursor->entry; struct amdgpu_bo_param bp; @@ -913,7 +913,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, if (entry->base.bo) return 0; - amdgpu_vm_bo_param(adev, vm, cursor->level, direct, &bp); + amdgpu_vm_bo_param(adev, vm, cursor->level, immediate, &bp); r = amdgpu_bo_create(adev, &bp, &pt); if (r) @@ -925,7 +925,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, pt->parent = amdgpu_bo_ref(cursor->parent->base.bo); amdgpu_vm_bo_base_init(&entry->base, vm, pt); - r = amdgpu_vm_clear_bo(adev, vm, pt, direct); + r = amdgpu_vm_clear_bo(adev, vm, pt, immediate); if (r) goto error_free_pt; @@ -1276,7 +1276,7 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vm: requested vm - * @direct: submit directly to the paging queue + * @immediate: submit immediately to the paging queue * * Makes sure all directories are up to date. * @@ -1284,7 +1284,7 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, * 0 for success, error for failure. */ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, - struct amdgpu_vm *vm, bool direct) + struct amdgpu_vm *vm, bool immediate) { struct amdgpu_vm_update_params params; int r; @@ -1295,7 +1295,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; - params.direct = direct; + params.immediate = immediate; r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); if (r) @@ -1446,20 +1446,24 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, uint64_t incr, entry_end, pe_start; struct amdgpu_bo *pt; - if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { + if (!params->unlocked) { /* make sure that the page tables covering the * address range are actually allocated */ r = amdgpu_vm_alloc_pts(params->adev, params->vm, - &cursor, params->direct); + &cursor, params->immediate); if (r) return r; } shift = amdgpu_vm_level_shift(adev, cursor.level); parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1); - if (adev->asic_type < CHIP_VEGA10 && - (flags & AMDGPU_PTE_VALID)) { + if (params->unlocked) { + /* Unlocked updates are only allowed on the leaves */ + if (amdgpu_vm_pt_descendant(adev, &cursor)) + continue; + } else if (adev->asic_type < CHIP_VEGA10 && + (flags & AMDGPU_PTE_VALID)) { /* No huge page support before GMC v9 */ if (cursor.level != AMDGPU_VM_PTB) { if (!amdgpu_vm_pt_descendant(adev, &cursor)) @@ -1557,7 +1561,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * * @adev: amdgpu_device pointer * @vm: requested vm - * @direct: direct submission in a page fault + * @immediate: immediate submission in a page fault + * @unlocked: unlocked invalidation during MM callback * @resv: fences we need to sync to * @start: start of mapped range * @last: last mapped entry @@ -1572,8 +1577,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * 0 for success, -EINVAL for failure. */ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct amdgpu_vm *vm, bool direct, - struct dma_resv *resv, + struct amdgpu_vm *vm, bool immediate, + bool unlocked, struct dma_resv *resv, uint64_t start, uint64_t last, uint64_t flags, uint64_t addr, dma_addr_t *pages_addr, @@ -1586,8 +1591,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; - params.direct = direct; + params.immediate = immediate; params.pages_addr = pages_addr; + params.unlocked = unlocked; /* Implicitly sync to command submissions in the same VM before * unmapping. Sync to moving fences before mapping. @@ -1603,11 +1609,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, goto error_unlock; } - if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { - struct amdgpu_bo *root = vm->root.base.bo; + if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) { + struct dma_fence *tmp = dma_fence_get_stub(); - if (!dma_fence_is_signaled(vm->last_direct)) - amdgpu_bo_fence(root, vm->last_direct, true); + amdgpu_bo_fence(vm->root.base.bo, vm->last_unlocked, true); + swap(vm->last_unlocked, tmp); + dma_fence_put(tmp); } r = vm->update_funcs->prepare(¶ms, resv, sync_mode); @@ -1721,7 +1728,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, } last = min((uint64_t)mapping->last, start + max_entries - 1); - r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv, + r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv, start, last, flags, addr, dma_addr, fence); if (r) @@ -1784,6 +1791,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, if (bo) { flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); + + if (amdgpu_bo_encrypted(bo)) + flags |= AMDGPU_PTE_TMZ; + bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); } else { flags = 0x0; @@ -2014,7 +2025,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, mapping->start < AMDGPU_GMC_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv, + r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv, mapping->start, mapping->last, init_pte_value, 0, NULL, &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); @@ -2578,7 +2589,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return false; /* Don't evict VM page tables while they are updated */ - if (!dma_fence_is_signaled(bo_base->vm->last_direct)) { + if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) { amdgpu_vm_eviction_unlock(bo_base->vm); return false; } @@ -2755,7 +2766,7 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) if (timeout <= 0) return timeout; - return dma_fence_wait_timeout(vm->last_direct, true, timeout); + return dma_fence_wait_timeout(vm->last_unlocked, true, timeout); } /** @@ -2791,7 +2802,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, /* create scheduler entities for page table updates */ - r = drm_sched_entity_init(&vm->direct, DRM_SCHED_PRIORITY_NORMAL, + r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL, adev->vm_manager.vm_pte_scheds, adev->vm_manager.vm_pte_num_scheds, NULL); if (r) @@ -2801,7 +2812,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, adev->vm_manager.vm_pte_scheds, adev->vm_manager.vm_pte_num_scheds, NULL); if (r) - goto error_free_direct; + goto error_free_immediate; vm->pte_support_ats = false; vm->is_compute_context = false; @@ -2827,7 +2838,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, else vm->update_funcs = &amdgpu_vm_sdma_funcs; vm->last_update = NULL; - vm->last_direct = dma_fence_get_stub(); + vm->last_unlocked = dma_fence_get_stub(); mutex_init(&vm->eviction_lock); vm->evicting = false; @@ -2881,11 +2892,11 @@ error_free_root: vm->root.base.bo = NULL; error_free_delayed: - dma_fence_put(vm->last_direct); + dma_fence_put(vm->last_unlocked); drm_sched_entity_destroy(&vm->delayed); -error_free_direct: - drm_sched_entity_destroy(&vm->direct); +error_free_immediate: + drm_sched_entity_destroy(&vm->immediate); return r; } @@ -3082,8 +3093,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->pasid = 0; } - dma_fence_wait(vm->last_direct, false); - dma_fence_put(vm->last_direct); + dma_fence_wait(vm->last_unlocked, false); + dma_fence_put(vm->last_unlocked); list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { @@ -3100,7 +3111,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_bo_unref(&root); WARN_ON(vm->root.base.bo); - drm_sched_entity_destroy(&vm->direct); + drm_sched_entity_destroy(&vm->immediate); drm_sched_entity_destroy(&vm->delayed); if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { @@ -3333,8 +3344,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid, value = 0; } - r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1, - flags, value, NULL, NULL); + r = amdgpu_vm_bo_update_mapping(adev, vm, true, false, NULL, addr, + addr + 1, flags, value, NULL, NULL); if (r) goto error_unlock; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index b13c14d6b820..c8e68d7890bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -54,6 +54,9 @@ struct amdgpu_bo_list_entry; #define AMDGPU_PTE_SYSTEM (1ULL << 1) #define AMDGPU_PTE_SNOOPED (1ULL << 2) +/* RV+ */ +#define AMDGPU_PTE_TMZ (1ULL << 3) + /* VI only */ #define AMDGPU_PTE_EXECUTABLE (1ULL << 4) @@ -203,9 +206,14 @@ struct amdgpu_vm_update_params { struct amdgpu_vm *vm; /** - * @direct: if changes should be made directly + * @immediate: if changes should be made immediately + */ + bool immediate; + + /** + * @unlocked: true if the root BO is not locked */ - bool direct; + bool unlocked; /** * @pages_addr: @@ -271,11 +279,11 @@ struct amdgpu_vm { struct dma_fence *last_update; /* Scheduler entities for page table updates */ - struct drm_sched_entity direct; + struct drm_sched_entity immediate; struct drm_sched_entity delayed; - /* Last submission to the scheduler entities */ - struct dma_fence *last_direct; + /* Last unlocked submission to the scheduler entities */ + struct dma_fence *last_unlocked; unsigned int pasid; /* dedicated to vm */ @@ -376,7 +384,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, void *param); int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); int amdgpu_vm_update_pdes(struct amdgpu_device *adev, - struct amdgpu_vm *vm, bool direct); + struct amdgpu_vm *vm, bool immediate); int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index e38516304070..39c704a1fb0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -84,7 +84,7 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, pe += (unsigned long)amdgpu_bo_kptr(bo); - trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct); + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->immediate); for (i = 0; i < count; i++) { value = p->pages_addr ? diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index fbd451f3559a..8d9c6feba660 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -61,11 +61,12 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, struct dma_resv *resv, enum amdgpu_sync_mode sync_mode) { + enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE + : AMDGPU_IB_POOL_DELAYED; unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW; int r; - r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, - p->direct ? AMDGPU_IB_POOL_VM : AMDGPU_IB_POOL_NORMAL, &p->job); + r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool, &p->job); if (r) return r; @@ -91,11 +92,11 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, { struct amdgpu_ib *ib = p->job->ibs; struct drm_sched_entity *entity; - struct dma_fence *f, *tmp; struct amdgpu_ring *ring; + struct dma_fence *f; int r; - entity = p->direct ? &p->vm->direct : &p->vm->delayed; + entity = p->immediate ? &p->vm->immediate : &p->vm->delayed; ring = container_of(entity->rq->sched, struct amdgpu_ring, sched); WARN_ON(ib->length_dw == 0); @@ -105,15 +106,16 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, if (r) goto error; - if (p->direct) { - tmp = dma_fence_get(f); - swap(p->vm->last_direct, tmp); + if (p->unlocked) { + struct dma_fence *tmp = dma_fence_get(f); + + swap(p->vm->last_unlocked, f); dma_fence_put(tmp); } else { - dma_resv_add_shared_fence(p->vm->root.base.bo->tbo.base.resv, f); + amdgpu_bo_fence(p->vm->root.base.bo, f, true); } - if (fence && !p->direct) + if (fence && !p->immediate) swap(*fence, f); dma_fence_put(f); return 0; @@ -143,7 +145,7 @@ static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p, src += p->num_dw_left * 4; pe += amdgpu_gmc_sign_extend(bo->tbo.offset); - trace_amdgpu_vm_copy_ptes(pe, src, count, p->direct); + trace_amdgpu_vm_copy_ptes(pe, src, count, p->immediate); amdgpu_vm_copy_pte(p->adev, ib, pe, src, count); } @@ -170,7 +172,7 @@ static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p, struct amdgpu_ib *ib = p->job->ibs; pe += amdgpu_gmc_sign_extend(bo->tbo.offset); - trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct); + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->immediate); if (count < 3) { amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags, count, incr); @@ -199,6 +201,8 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags) { + enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE + : AMDGPU_IB_POOL_DELAYED; unsigned int i, ndw, nptes; uint64_t *pte; int r; @@ -224,8 +228,8 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW); ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW); - r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, - p->direct ? AMDGPU_IB_POOL_VM : AMDGPU_IB_POOL_NORMAL, &p->job); + r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool, + &p->job); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 54d8a3e7e75c..48c0ce13f68e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -395,7 +395,9 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) bool init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN; /* fw bug so temporarily disable pstate switching */ - if (!hive || adev->asic_type == CHIP_VEGA20) + return 0; + + if (!hive || adev->asic_type != CHIP_VEGA20) return 0; mutex_lock(&hive->hive_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 7c2ee84fa4a6..20f108818b2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1313,7 +1313,8 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev) static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, - uint32_t byte_count) + uint32_t byte_count, + bool tmz) { ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); ib->ptr[ib->length_dw++] = byte_count; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index e1687e408e1d..9fe20b782e88 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3037,7 +3037,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev); static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume); static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start); +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { @@ -7095,6 +7095,10 @@ static int gfx_v10_0_set_powergating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; bool enable = (state == AMD_PG_STATE_GATE); + + if (amdgpu_sriov_vf(adev)) + return 0; + switch (adev->asic_type) { case CHIP_NAVI10: case CHIP_NAVI14: @@ -7115,6 +7119,9 @@ static int gfx_v10_0_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_vf(adev)) + return 0; + switch (adev->asic_type) { case CHIP_NAVI10: case CHIP_NAVI14: @@ -7427,7 +7434,8 @@ static void gfx_v10_0_ring_emit_sb(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0); } -static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) +static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, + uint32_t flags) { uint32_t dw2 = 0; @@ -7435,8 +7443,6 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flag gfx_v10_0_ring_emit_ce_meta(ring, (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); - gfx_v10_0_ring_emit_tmz(ring, true); - dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ if (flags & AMDGPU_HAVE_CTX_SWITCH) { /* set load_global_config & load_global_uconfig */ @@ -7593,10 +7599,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) sizeof(de_payload) >> 2); } -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, + bool secure) { + uint32_t v = secure ? FRAME_TMZ : 0; + amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); - amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ + amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); } static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, @@ -8050,7 +8059,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec, .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec, .preempt_ib = gfx_v10_0_ring_preempt_ib, - .emit_tmz = gfx_v10_0_ring_emit_tmz, + .emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 496205a8ee0c..eedb92218ba5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2531,7 +2531,7 @@ static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) break; default: break; - }; + } } static void gfx_v9_0_constants_init(struct amdgpu_device *adev) @@ -4054,9 +4054,8 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) spin_lock_irqsave(&kiq->ring_lock, flags); if (amdgpu_device_wb_get(adev, ®_val_offs)) { - spin_unlock_irqrestore(&kiq->ring_lock, flags); pr_err("critical bug! too many kiq readers\n"); - goto failed_kiq_read; + goto failed_unlock; } amdgpu_ring_alloc(ring, 32); amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); @@ -4070,7 +4069,10 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) reg_val_offs * 4)); amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + reg_val_offs * 4)); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) + goto failed_undo; + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -4102,7 +4104,13 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) amdgpu_device_wb_free(adev, reg_val_offs); return value; +failed_undo: + amdgpu_ring_undo(ring); +failed_unlock: + spin_unlock_irqrestore(&kiq->ring_lock, flags); failed_kiq_read: + if (reg_val_offs) + amdgpu_device_wb_free(adev, reg_val_offs); pr_err("failed to read gpu clock\n"); return ~0; } @@ -5434,10 +5442,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); } -static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) +static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, + bool secure) { + uint32_t v = secure ? FRAME_TMZ : 0; + amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); - amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ + amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); } static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) @@ -5447,8 +5458,6 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) if (amdgpu_sriov_vf(ring->adev)) gfx_v9_0_ring_emit_ce_meta(ring); - gfx_v9_0_ring_emit_tmz(ring, true); - dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ if (flags & AMDGPU_HAVE_CTX_SWITCH) { /* set load_global_config & load_global_uconfig */ @@ -6691,7 +6700,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, - .emit_tmz = gfx_v9_0_ring_emit_tmz, + .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 94a6096a81f4..edaa50d850a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -372,7 +372,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * translation. Avoid this by doing the invalidation from the SDMA * itself. */ - r = amdgpu_job_alloc_with_ib(adev, 16 * 4, AMDGPU_IB_POOL_VM, &job); + r = amdgpu_job_alloc_with_ib(adev, 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, + &job); if (r) goto error_alloc; @@ -426,7 +427,13 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) { + amdgpu_ring_undo(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + return -ETIME; + } + amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq.ring_lock); r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index fecdbc471983..055ecba754ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -621,7 +621,13 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, pasid, 2, all_hub); kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) { + amdgpu_ring_undo(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + return -ETIME; + } + amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq.ring_lock); r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index 52a697545801..83b453f5d717 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -26,7 +26,7 @@ #define AI_MAILBOX_POLL_ACK_TIMEDOUT 500 #define AI_MAILBOX_POLL_MSG_TIMEDOUT 12000 -#define AI_MAILBOX_POLL_FLR_TIMEDOUT 500 +#define AI_MAILBOX_POLL_FLR_TIMEDOUT 5000 enum idh_request { IDH_REQ_GPU_INIT_ACCESS = 1, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index 45bcf438e607..52605e14a1a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -26,7 +26,7 @@ #define NV_MAILBOX_POLL_ACK_TIMEDOUT 500 #define NV_MAILBOX_POLL_MSG_TIMEDOUT 6000 -#define NV_MAILBOX_POLL_FLR_TIMEDOUT 500 +#define NV_MAILBOX_POLL_FLR_TIMEDOUT 5000 enum idh_request { IDH_REQ_GPU_INIT_ACCESS = 1, diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h index 074a9a09c0a7..a5b60c9a2418 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h +++ b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h @@ -73,6 +73,22 @@ #define SDMA_OP_AQL_COPY 0 #define SDMA_OP_AQL_BARRIER_OR 0 +#define SDMA_GCR_RANGE_IS_PA (1 << 18) +#define SDMA_GCR_SEQ(x) (((x) & 0x3) << 16) +#define SDMA_GCR_GL2_WB (1 << 15) +#define SDMA_GCR_GL2_INV (1 << 14) +#define SDMA_GCR_GL2_DISCARD (1 << 13) +#define SDMA_GCR_GL2_RANGE(x) (((x) & 0x3) << 11) +#define SDMA_GCR_GL2_US (1 << 10) +#define SDMA_GCR_GL1_INV (1 << 9) +#define SDMA_GCR_GLV_INV (1 << 8) +#define SDMA_GCR_GLK_INV (1 << 7) +#define SDMA_GCR_GLK_WB (1 << 6) +#define SDMA_GCR_GLM_INV (1 << 5) +#define SDMA_GCR_GLM_WB (1 << 4) +#define SDMA_GCR_GL1_RANGE(x) (((x) & 0x3) << 2) +#define SDMA_GCR_GLI_INV(x) (((x) & 0x3) << 0) + /*define for op field*/ #define SDMA_PKT_HEADER_op_offset 0 #define SDMA_PKT_HEADER_op_mask 0x000000FF diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 995bdec9fa7d..9c42316c47c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -498,8 +498,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - !amdgpu_sriov_vf(adev)) + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); diff --git a/drivers/gpu/drm/amd/amdgpu/nvd.h b/drivers/gpu/drm/amd/amdgpu/nvd.h index 1de984647dbb..f3d8771ebed4 100644 --- a/drivers/gpu/drm/amd/amdgpu/nvd.h +++ b/drivers/gpu/drm/amd/amdgpu/nvd.h @@ -306,6 +306,7 @@ #define PACKET3_GET_LOD_STATS 0x8E #define PACKET3_DRAW_MULTI_PREAMBLE 0x8F #define PACKET3_FRAME_CONTROL 0x90 +# define FRAME_TMZ (1 << 0) # define FRAME_CMD(x) ((x) << 28) /* * x=0: tmz_begin diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index d2d2363787ff..97c80f1d5731 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -137,6 +137,8 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + if (amdgpu_sriov_vf(adev)) + break; snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); if (err) { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 9a7f194c730a..5f304d61999e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -1200,7 +1200,8 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev) static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, - uint32_t byte_count) + uint32_t byte_count, + bool tmz) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index e6b5b4f672a0..c59f6f6f4c09 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1638,7 +1638,8 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev) static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, - uint32_t byte_count) + uint32_t byte_count, + bool tmz) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 3e99f31b4bd0..ebd723a0bcfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2458,10 +2458,12 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, - uint32_t byte_count) + uint32_t byte_count, + bool tmz) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | - SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | + SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index e206064ffb82..b544baf306f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -410,6 +410,18 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid); + /* Invalidate L2, because if we don't do it, we might get stale cache + * lines from previous IBs. + */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, (SDMA_GCR_GL2_INV | + SDMA_GCR_GL2_WB | + SDMA_GCR_GLM_INV | + SDMA_GCR_GLM_WB) << 16); + amdgpu_ring_write(ring, 0xffffff80); + amdgpu_ring_write(ring, 0xffff); + /* An IB packet must end on a 8 DW boundary--the next dword * must be on a 8-dword boundary. Our IB packet below is 6 * dwords long, thus add x number of NOPs, such that, in @@ -1634,7 +1646,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 + 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ - .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */ + .emit_ib_size = 5 + 7 + 6, /* sdma_v5_0_ring_emit_ib */ .emit_ib = sdma_v5_0_ring_emit_ib, .emit_fence = sdma_v5_0_ring_emit_fence, .emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync, @@ -1694,10 +1706,12 @@ static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev) static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, - uint32_t byte_count) + uint32_t byte_count, + bool tmz) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | - SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | + SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index e4b8283ad11d..7d2bbcbe547b 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -776,7 +776,8 @@ static void si_dma_set_irq_funcs(struct amdgpu_device *adev) static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, - uint32_t byte_count) + uint32_t byte_count, + bool tmz) { ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, byte_count); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index c893c645a4b2..56d02aa690a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -35,6 +35,9 @@ #define RREG32_SOC15(ip, inst, reg) \ RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) +#define RREG32_SOC15_NO_KIQ(ip, inst, reg) \ + RREG32_NO_KIQ(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + #define RREG32_SOC15_OFFSET(ip, inst, reg, offset) \ RREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index edfe50821cd9..295d68c5811d 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -286,6 +286,7 @@ #define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 #define PACKET3_SWITCH_BUFFER 0x8B #define PACKET3_FRAME_CONTROL 0x90 +# define FRAME_TMZ (1 << 0) # define FRAME_CMD(x) ((x) << 28) /* * x=0: tmz_begin diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 14d346321a5f..418cf097c918 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -56,24 +56,43 @@ const uint32_t static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev) { - WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + uint32_t rsmu_umc_addr, rsmu_umc_val; + + rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0, + mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); + rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4); + + rsmu_umc_val = REG_SET_FIELD(rsmu_umc_val, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, RSMU_UMC_INDEX_MODE_EN, 1); + + WREG32_PCIE(rsmu_umc_addr * 4, rsmu_umc_val); } static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) { - WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + uint32_t rsmu_umc_addr, rsmu_umc_val; + + rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0, + mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); + rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4); + + rsmu_umc_val = REG_SET_FIELD(rsmu_umc_val, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, RSMU_UMC_INDEX_MODE_EN, 0); + + WREG32_PCIE(rsmu_umc_addr * 4, rsmu_umc_val); } static uint32_t umc_v6_1_get_umc_index_mode_state(struct amdgpu_device *adev) { - uint32_t rsmu_umc_index; + uint32_t rsmu_umc_addr, rsmu_umc_val; - rsmu_umc_index = RREG32_SOC15(RSMU, 0, + rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0, mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); + rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4); - return REG_GET_FIELD(rsmu_umc_index, + return REG_GET_FIELD(rsmu_umc_val, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, RSMU_UMC_INDEX_MODE_EN); } @@ -85,6 +104,81 @@ static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev, return adev->umc.channel_offs*ch_inst + UMC_6_INST_DIST*umc_inst; } +static void umc_v6_1_clear_error_count_per_channel(struct amdgpu_device *adev, + uint32_t umc_reg_offset) +{ + uint32_t ecc_err_cnt_addr; + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + + if (adev->asic_type == CHIP_ARCTURUS) { + /* UMC 6_1_2 registers */ + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, + mmUMCCH0_0_EccErrCntSel_ARCT); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, + mmUMCCH0_0_EccErrCnt_ARCT); + } else { + /* UMC 6_1_1 registers */ + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, + mmUMCCH0_0_EccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, + mmUMCCH0_0_EccErrCnt); + } + + /* select the lower chip */ + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + + umc_reg_offset) * 4); + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, + UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 0); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, + ecc_err_cnt_sel); + + /* clear lower chip error count */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, + UMC_V6_1_CE_CNT_INIT); + + /* select the higher chip */ + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + + umc_reg_offset) * 4); + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, + UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 1); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, + ecc_err_cnt_sel); + + /* clear higher chip error count */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, + UMC_V6_1_CE_CNT_INIT); +} + +static void umc_v6_1_clear_error_count(struct amdgpu_device *adev) +{ + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + uint32_t rsmu_umc_index_state = + umc_v6_1_get_umc_index_mode_state(adev); + + if (rsmu_umc_index_state) + umc_v6_1_disable_umc_index_mode(adev); + + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + umc_reg_offset = get_umc_6_reg_offset(adev, + umc_inst, + ch_inst); + + umc_v6_1_clear_error_count_per_channel(adev, + umc_reg_offset); + } + + if (rsmu_umc_index_state) + umc_v6_1_enable_umc_index_mode(adev); +} + static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, uint32_t umc_reg_offset, unsigned long *error_count) @@ -117,23 +211,21 @@ static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, EccErrCntCsSel, 0); WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); *error_count += (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - UMC_V6_1_CE_CNT_INIT); - /* clear the lower chip err count */ - WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); /* select the higher chip and check the err counter */ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, EccErrCntCsSel, 1); WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); *error_count += (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - UMC_V6_1_CE_CNT_INIT); - /* clear the higher chip err count */ - WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); /* check for SRAM correctable error MCUMC_STATUS is a 64 bit register */ @@ -209,6 +301,8 @@ static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, if (rsmu_umc_index_state) umc_v6_1_enable_umc_index_mode(adev); + + umc_v6_1_clear_error_count(adev); } static void umc_v6_1_query_error_address(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 0fa1c5cec439..38ca4a712f12 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -1404,7 +1404,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, { struct amdgpu_ring *ring; uint32_t reg_data = 0; - int ret_code; + int ret_code = 0; /* pause/unpause if state is changed */ if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) { @@ -1414,7 +1414,6 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { - ret_code = 0; SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 0x1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); @@ -1469,9 +1468,10 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); } } else { - /* unpause dpg, no need to wait */ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data); + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 0x1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); } adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; } |