From 37e017311c650ba0502aec6ec531ed9bc84d70da Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 6 Jun 2024 15:06:38 +0200 Subject: drm/xe/guc: Split g2h worker function In the next patch we will want to perform the same steps that g2h worker function is doing but from the different worker. Suggested-by: Matthew Brost Signed-off-by: Michal Wajdeczko Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240606130639.1504-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index c1f258348f5c..08cf355e2e56 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1260,9 +1260,8 @@ static int dequeue_one_g2h(struct xe_guc_ct *ct) return 1; } -static void g2h_worker_func(struct work_struct *w) +static void receive_g2h(struct xe_guc_ct *ct) { - struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker); struct xe_gt *gt = ct_to_gt(ct); bool ongoing; int ret; @@ -1311,6 +1310,13 @@ static void g2h_worker_func(struct work_struct *w) xe_pm_runtime_put(ct_to_xe(ct)); } +static void g2h_worker_func(struct work_struct *w) +{ + struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker); + + receive_g2h(ct); +} + static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb, struct guc_ctb_snapshot *snapshot, bool atomic) -- cgit From 09b286950f2911615694f4a1ff491efe9ed5eeba Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 6 Jun 2024 15:06:39 +0200 Subject: drm/xe/guc: Allow CTB G2H processing without G2H IRQ During early initialization, in the xe_guc_min_load_for_hwconfig() function, we are successfully enabling CTB communication, but it will only allow us to send non-blocking H2G messages, as due to not yet enabled IRQs, including G2H IRQs, we will not notice any new G2H message sent by the GuC, including replies to our blocking H2G request messages. And those successful replies are mandatory for the VF drivers to continue normal operations. As attempt to workaround this driver initialization ordering issue, introduce special safe-mode CTB worker, that will periodically trigger G2H processing, like original IRQ handler, in case no MSI/MSIX IRQs were enabled on the driver yet. Once we detect that IRQ were enabled, we will stop this worker. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240606130639.1504-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 43 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_guc_ct_types.h | 2 ++ 2 files changed, 45 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 08cf355e2e56..936b63483e96 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -126,7 +126,9 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) xa_destroy(&ct->fence_lookup); } +static void receive_g2h(struct xe_guc_ct *ct); static void g2h_worker_func(struct work_struct *w); +static void safe_mode_worker_func(struct work_struct *w); static void primelockdep(struct xe_guc_ct *ct) { @@ -155,6 +157,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) spin_lock_init(&ct->fast_lock); xa_init(&ct->fence_lookup); INIT_WORK(&ct->g2h_worker, g2h_worker_func); + INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func); init_waitqueue_head(&ct->wq); init_waitqueue_head(&ct->g2h_fence_wq); @@ -321,6 +324,42 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct, mutex_unlock(&ct->lock); } +static bool ct_needs_safe_mode(struct xe_guc_ct *ct) +{ + return !pci_dev_msi_enabled(to_pci_dev(ct_to_xe(ct)->drm.dev)); +} + +static bool ct_restart_safe_mode_worker(struct xe_guc_ct *ct) +{ + if (!ct_needs_safe_mode(ct)) + return false; + + queue_delayed_work(ct->g2h_wq, &ct->safe_mode_worker, HZ / 10); + return true; +} + +static void safe_mode_worker_func(struct work_struct *w) +{ + struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, safe_mode_worker.work); + + receive_g2h(ct); + + if (!ct_restart_safe_mode_worker(ct)) + xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode canceled\n"); +} + +static void ct_enter_safe_mode(struct xe_guc_ct *ct) +{ + if (ct_restart_safe_mode_worker(ct)) + xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode enabled\n"); +} + +static void ct_exit_safe_mode(struct xe_guc_ct *ct) +{ + if (cancel_delayed_work_sync(&ct->safe_mode_worker)) + xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode disabled\n"); +} + int xe_guc_ct_enable(struct xe_guc_ct *ct) { struct xe_device *xe = ct_to_xe(ct); @@ -350,6 +389,9 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) wake_up_all(&ct->wq); xe_gt_dbg(gt, "GuC CT communication channel enabled\n"); + if (ct_needs_safe_mode(ct)) + ct_enter_safe_mode(ct); + return 0; err_out: @@ -373,6 +415,7 @@ static void stop_g2h_handler(struct xe_guc_ct *ct) void xe_guc_ct_disable(struct xe_guc_ct *ct) { xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED); + ct_exit_safe_mode(ct); stop_g2h_handler(ct); } diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h index fede4c6e93cb..761cb9031298 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -110,6 +110,8 @@ struct xe_guc_ct { u32 g2h_outstanding; /** @g2h_worker: worker to process G2H messages */ struct work_struct g2h_worker; + /** @safe_mode_worker: worker to check G2H messages with IRQ disabled */ + struct delayed_work safe_mode_worker; /** @state: CT state */ enum xe_guc_ct_state state; /** @fence_seqno: G2H fence seqno - 16 bits used by CT */ -- cgit From 78247e48a118948cbb7126fa3ebe0e2cb4215bfd Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 7 Jun 2024 11:07:07 +0300 Subject: drm/xe: do not select ACPI_BUTTON MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xe driver has never needed ACPI button. Selecting the kconfig is just copy-paste from i915, which no longer needs it either. Stop selecting ACPI_BUTTON. Cc: Ville Syrjälä Closes: https://lore.kernel.org/r/ZmGsJsXhHcPV48XJ@intel.com Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1872adc6b20ce4c5ef55ba60a7233b31ace776fb.1717747542.git.jani.nikula@intel.com --- drivers/gpu/drm/xe/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 782934be0a77..db0efed49f1d 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -28,7 +28,6 @@ config DRM_XE select BACKLIGHT_CLASS_DEVICE if ACPI select INPUT if ACPI select ACPI_VIDEO if X86 && ACPI - select ACPI_BUTTON if ACPI select X86_PLATFORM_DEVICES if X86 && ACPI select ACPI_WMI if X86 && ACPI select SYNC_FILE -- cgit From 3d420e9fa84866cb3b98b6baa05d682850ef2952 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 6 Jun 2024 18:52:12 -0700 Subject: drm/xe: Rework GPU page fault handling Add helper function to implement VMA (user binding) page faults, remove unnecessary userptr.invalidate_link list del operation, retry on memory pressure, remove unnecessary xe_vma_userptr_check_repin after rebinding, remove unnecessary TLB invalidation, and always use vm->lock in write mode. Changes help facilitate SVM page faults. Signed-off-by: Matthew Brost Reviewed-by: Nirmoy Das Reviewed-by: Jagmeet Randhawa Link: https://patchwork.freedesktop.org/patch/msgid/20240607015212.2190106-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 142 +++++++++++++++-------------------- 1 file changed, 62 insertions(+), 80 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 040dd142c49c..eaf68f0135c1 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -125,126 +125,108 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, return 0; } -static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) +static int handle_vma_pagefault(struct xe_tile *tile, struct pagefault *pf, + struct xe_vma *vma) { - struct xe_device *xe = gt_to_xe(gt); - struct xe_tile *tile = gt_to_tile(gt); + struct xe_vm *vm = xe_vma_vm(vma); struct drm_exec exec; - struct xe_vm *vm; - struct xe_vma *vma = NULL; struct dma_fence *fence; - bool write_locked; - int ret = 0; + ktime_t end = 0; + int err; bool atomic; - /* SW isn't expected to handle TRTT faults */ - if (pf->trva_fault) - return -EFAULT; - - /* ASID to VM */ - mutex_lock(&xe->usm.lock); - vm = xa_load(&xe->usm.asid_to_vm, pf->asid); - if (vm && xe_vm_in_fault_mode(vm)) - xe_vm_get(vm); - else - vm = NULL; - mutex_unlock(&xe->usm.lock); - if (!vm) - return -EINVAL; - -retry_userptr: - /* - * TODO: Avoid exclusive lock if VM doesn't have userptrs, or - * start out read-locked? - */ - down_write(&vm->lock); - write_locked = true; - vma = lookup_vma(vm, pf->page_addr); - if (!vma) { - ret = -EINVAL; - goto unlock_vm; - } - - if (!xe_vma_is_userptr(vma) || - !xe_vma_userptr_check_repin(to_userptr_vma(vma))) { - downgrade_write(&vm->lock); - write_locked = false; - } - trace_xe_vma_pagefault(vma); - atomic = access_is_atomic(pf->access_type); /* Check if VMA is valid */ if (vma_is_valid(tile, vma) && !atomic) - goto unlock_vm; - - /* TODO: Validate fault */ + return 0; - if (xe_vma_is_userptr(vma) && write_locked) { +retry_userptr: + if (xe_vma_is_userptr(vma) && + xe_vma_userptr_check_repin(to_userptr_vma(vma))) { struct xe_userptr_vma *uvma = to_userptr_vma(vma); - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&uvma->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - - ret = xe_vma_userptr_pin_pages(uvma); - if (ret) - goto unlock_vm; - - downgrade_write(&vm->lock); - write_locked = false; + err = xe_vma_userptr_pin_pages(uvma); + if (err) + return err; } /* Lock VM and BOs dma-resv */ drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { - ret = xe_pf_begin(&exec, vma, atomic, tile->id); + err = xe_pf_begin(&exec, vma, atomic, tile->id); drm_exec_retry_on_contention(&exec); - if (ret) + if (xe_vm_validate_should_retry(&exec, err, &end)) + err = -EAGAIN; + if (err) goto unlock_dma_resv; /* Bind VMA only to the GT that has faulted */ trace_xe_vma_pf_bind(vma); fence = xe_vma_rebind(vm, vma, BIT(tile->id)); if (IS_ERR(fence)) { - ret = PTR_ERR(fence); + err = PTR_ERR(fence); + if (xe_vm_validate_should_retry(&exec, err, &end)) + err = -EAGAIN; goto unlock_dma_resv; } } - /* - * XXX: Should we drop the lock before waiting? This only helps if doing - * GPU binds which is currently only done if we have to wait for more - * than 10ms on a move. - */ dma_fence_wait(fence, false); dma_fence_put(fence); - - if (xe_vma_is_userptr(vma)) - ret = xe_vma_userptr_check_repin(to_userptr_vma(vma)); vma->tile_invalidated &= ~BIT(tile->id); unlock_dma_resv: drm_exec_fini(&exec); -unlock_vm: - if (!ret) - vm->usm.last_fault_vma = vma; - if (write_locked) - up_write(&vm->lock); - else - up_read(&vm->lock); - if (ret == -EAGAIN) + if (err == -EAGAIN) goto retry_userptr; - if (!ret) { - ret = xe_gt_tlb_invalidation_vma(gt, NULL, vma); - if (ret >= 0) - ret = 0; + return err; +} + +static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_vm *vm; + struct xe_vma *vma = NULL; + int err; + + /* SW isn't expected to handle TRTT faults */ + if (pf->trva_fault) + return -EFAULT; + + /* ASID to VM */ + mutex_lock(&xe->usm.lock); + vm = xa_load(&xe->usm.asid_to_vm, pf->asid); + if (vm && xe_vm_in_fault_mode(vm)) + xe_vm_get(vm); + else + vm = NULL; + mutex_unlock(&xe->usm.lock); + if (!vm) + return -EINVAL; + + /* + * TODO: Change to read lock? Using write lock for simplicity. + */ + down_write(&vm->lock); + vma = lookup_vma(vm, pf->page_addr); + if (!vma) { + err = -EINVAL; + goto unlock_vm; } + + err = handle_vma_pagefault(tile, pf, vma); + +unlock_vm: + if (!err) + vm->usm.last_fault_vma = vma; + up_write(&vm->lock); xe_vm_put(vm); - return ret; + return err; } static int send_pagefault_reply(struct xe_guc *guc, -- cgit From 7ecea18e8cdf9efbe2a1f96573a185b83a5d9e85 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Thu, 6 Jun 2024 14:27:20 +0530 Subject: drm/xe: Cleanup force wake registers bit definitions - Remove unused bit definitions. - Driver uses BIT(0) for waking/sleeping the domain and since the registers are masked respective mask bit BIT(16) needs to be set. Use defines for these bits and use them in domain initialization. v3 - Use defines within domain_init Cc: Rodrigo Vivi Cc: Badal Nilawar Suggested-by: Rodrigo Vivi Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Badal Nilawar Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240606085720.1327152-1-himal.prasad.ghimiray@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 8 +++++--- drivers/gpu/drm/xe/xe_force_wake.c | 24 +++++++++--------------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index d09b2473259f..47c26c37608d 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -487,9 +487,11 @@ ((ccs) << ((cslice) * CCS_MODE_CSLICE_WIDTH)) #define FORCEWAKE_ACK_GT XE_REG(0x130044) -#define FORCEWAKE_KERNEL BIT(0) -#define FORCEWAKE_USER BIT(1) -#define FORCEWAKE_KERNEL_FALLBACK BIT(15) + +/* Applicable for all FORCEWAKE_DOMAIN and FORCEWAKE_ACK_DOMAIN regs */ +#define FORCEWAKE_KERNEL 0 +#define FORCEWAKE_MT(bit) BIT(bit) +#define FORCEWAKE_MT_MASK(bit) BIT((bit) + 16) #define MTL_MEDIA_PERF_LIMIT_REASONS XE_REG(0x138030) #define MTL_MEDIA_MC6 XE_REG(0x138048) diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 9bbe8a5040da..b2d385daff4b 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -28,13 +28,13 @@ fw_to_xe(struct xe_force_wake *fw) static void domain_init(struct xe_force_wake_domain *domain, enum xe_force_wake_domain_id id, - struct xe_reg reg, struct xe_reg ack, u32 val, u32 mask) + struct xe_reg reg, struct xe_reg ack) { domain->id = id; domain->reg_ctl = reg; domain->reg_ack = ack; - domain->val = val; - domain->mask = mask; + domain->val = FORCEWAKE_MT(FORCEWAKE_KERNEL); + domain->mask = FORCEWAKE_MT_MASK(FORCEWAKE_KERNEL); } void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) @@ -51,14 +51,12 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], XE_FW_DOMAIN_ID_GT, FORCEWAKE_GT, - FORCEWAKE_ACK_GT_MTL, - BIT(0), BIT(16)); + FORCEWAKE_ACK_GT_MTL); } else { domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], XE_FW_DOMAIN_ID_GT, FORCEWAKE_GT, - FORCEWAKE_ACK_GT, - BIT(0), BIT(16)); + FORCEWAKE_ACK_GT); } } @@ -73,8 +71,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER], XE_FW_DOMAIN_ID_RENDER, FORCEWAKE_RENDER, - FORCEWAKE_ACK_RENDER, - BIT(0), BIT(16)); + FORCEWAKE_ACK_RENDER); for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { if (!(gt->info.engine_mask & BIT(i))) @@ -83,8 +80,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j], XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j, FORCEWAKE_MEDIA_VDBOX(j), - FORCEWAKE_ACK_MEDIA_VDBOX(j), - BIT(0), BIT(16)); + FORCEWAKE_ACK_MEDIA_VDBOX(j)); } for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) { @@ -94,16 +90,14 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j], XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j, FORCEWAKE_MEDIA_VEBOX(j), - FORCEWAKE_ACK_MEDIA_VEBOX(j), - BIT(0), BIT(16)); + FORCEWAKE_ACK_MEDIA_VEBOX(j)); } if (gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)) domain_init(&fw->domains[XE_FW_DOMAIN_ID_GSC], XE_FW_DOMAIN_ID_GSC, FORCEWAKE_GSC, - FORCEWAKE_ACK_GSC, - BIT(0), BIT(16)); + FORCEWAKE_ACK_GSC); } static void domain_wake(struct xe_gt *gt, struct xe_force_wake_domain *domain) -- cgit From 4468d0488ecb91639f12659f8a025139120a431b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 4 Jun 2024 11:47:00 -0700 Subject: drm/xe: Drop EXEC_QUEUE_FLAG_BANNED Clean up laying violation of setting q->flags EXEC_QUEUE_FLAG_BANNED bit in GuC backend. Move banned to GuC owned bit and report banned status to upper layers via reset_status vfunc. This is a slight change in behavior as reset_status returns true if wedged or killed bits set too, but in all of these cases submission to queue is no longer allowed. Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240604184700.1946918-1-matthew.brost@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec.c | 2 +- drivers/gpu/drm/xe/xe_exec_queue.c | 2 +- drivers/gpu/drm/xe/xe_exec_queue_types.h | 12 +++++------- drivers/gpu/drm/xe/xe_guc_submit.c | 10 ++++++---- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 97eeb973e897..4cf6c6ab4866 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -141,7 +141,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) q->width != args->num_batch_buffer)) return -EINVAL; - if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_BANNED)) { + if (XE_IOCTL_DBG(xe, q->ops->reset_status(q))) { err = -ECANCELED; goto err_exec_queue; } diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 27215075c799..cf45df0328da 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -677,7 +677,7 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, switch (args->property) { case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN: - args->value = !!(q->flags & EXEC_QUEUE_FLAG_BANNED); + args->value = q->ops->reset_status(q); ret = 0; break; default: diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 18d8b2a60928..f0c5f82ce7e3 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -70,18 +70,16 @@ struct xe_exec_queue { */ struct dma_fence *last_fence; -/* queue no longer allowed to submit */ -#define EXEC_QUEUE_FLAG_BANNED BIT(0) /* queue used for kernel submission only */ -#define EXEC_QUEUE_FLAG_KERNEL BIT(1) +#define EXEC_QUEUE_FLAG_KERNEL BIT(0) /* kernel engine only destroyed at driver unload */ -#define EXEC_QUEUE_FLAG_PERMANENT BIT(2) +#define EXEC_QUEUE_FLAG_PERMANENT BIT(1) /* for VM jobs. Caller needs to hold rpm ref when creating queue with this flag */ -#define EXEC_QUEUE_FLAG_VM BIT(3) +#define EXEC_QUEUE_FLAG_VM BIT(2) /* child of VM queue for multi-tile VM jobs */ -#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(4) +#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(3) /* kernel exec_queue only, set priority to highest level */ -#define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(5) +#define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(4) /** * @flags: flags for this exec queue, should statically setup aside from ban diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 47aab04cf34f..4464ba337d12 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -61,6 +61,7 @@ exec_queue_to_guc(struct xe_exec_queue *q) #define EXEC_QUEUE_STATE_RESET (1 << 6) #define EXEC_QUEUE_STATE_KILLED (1 << 7) #define EXEC_QUEUE_STATE_WEDGED (1 << 8) +#define EXEC_QUEUE_STATE_BANNED (1 << 9) static bool exec_queue_registered(struct xe_exec_queue *q) { @@ -134,12 +135,12 @@ static void set_exec_queue_destroyed(struct xe_exec_queue *q) static bool exec_queue_banned(struct xe_exec_queue *q) { - return (q->flags & EXEC_QUEUE_FLAG_BANNED); + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; } static void set_exec_queue_banned(struct xe_exec_queue *q) { - q->flags |= EXEC_QUEUE_FLAG_BANNED; + atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state); } static bool exec_queue_suspended(struct xe_exec_queue *q) @@ -189,8 +190,9 @@ static void set_exec_queue_wedged(struct xe_exec_queue *q) static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) { - return exec_queue_banned(q) || (atomic_read(&q->guc->state) & - (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED)); + return (atomic_read(&q->guc->state) & + (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED | + EXEC_QUEUE_STATE_BANNED)); } #ifdef CONFIG_PROVE_LOCKING -- cgit From b321cb83a375bcc18cd0a4b62bdeaf6905cca769 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 7 Jun 2024 17:31:55 +0200 Subject: drm/xe/pf: Assert LMEM provisioning is done only on DGFX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Local Memory (aka VRAM) is only available on DGFX platforms. We shouldn't attempt to provision VFs with LMEM or attempt to update the LMTT on non-DGFX platforms. Add missing asserts that would enforce that and fix release code that could crash on iGFX due to uninitialized LMTT. Fixes: c063cce7df3a ("drm/xe/pf: Update the LMTT when freeing VF GT config") Signed-off-by: Michal Wajdeczko Cc: Piotr Piórkowski Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240607153155.1592-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index f49fc2917f93..694671497f6e 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1290,6 +1290,9 @@ static void pf_reset_vf_lmtt(struct xe_device *xe, unsigned int vfid) struct xe_tile *tile; unsigned int tid; + xe_assert(xe, IS_DGFX(xe)); + xe_assert(xe, IS_SRIOV_PF(xe)); + for_each_tile(tile, xe, tid) { lmtt = &tile->sriov.pf.lmtt; xe_lmtt_drop_pages(lmtt, vfid); @@ -1308,6 +1311,9 @@ static int pf_update_vf_lmtt(struct xe_device *xe, unsigned int vfid) unsigned int tid; int err; + xe_assert(xe, IS_DGFX(xe)); + xe_assert(xe, IS_SRIOV_PF(xe)); + total = 0; for_each_tile(tile, xe, tid) total += pf_get_vf_config_lmem(tile->primary_gt, vfid); @@ -1353,6 +1359,7 @@ fail: static void pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config) { + xe_gt_assert(gt, IS_DGFX(gt_to_xe(gt))); xe_gt_assert(gt, !xe_gt_is_media_type(gt)); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); @@ -1371,6 +1378,7 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) int err; xe_gt_assert(gt, vfid); + xe_gt_assert(gt, IS_DGFX(xe)); xe_gt_assert(gt, !xe_gt_is_media_type(gt)); size = round_up(size, pf_get_lmem_alignment(gt)); @@ -1838,11 +1846,14 @@ u32 xe_gt_sriov_pf_config_get_threshold(struct xe_gt *gt, unsigned int vfid, static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + struct xe_device *xe = gt_to_xe(gt); if (!xe_gt_is_media_type(gt)) { pf_release_vf_config_ggtt(gt, config); - pf_release_vf_config_lmem(gt, config); - pf_update_vf_lmtt(gt_to_xe(gt), vfid); + if (IS_DGFX(xe)) { + pf_release_vf_config_lmem(gt, config); + pf_update_vf_lmtt(xe, vfid); + } } pf_release_config_ctxs(gt, config); pf_release_config_dbs(gt, config); -- cgit From 3a3fc10cce3b6cc6ac252f3b6f5d750f0b1d735e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 9 Jun 2024 20:19:29 +0200 Subject: drm/xe/guc: Move H2G SETUP_PC_GUCRC definition to SLPC ABI We already have a dedicated file for GuC SLPC ABI definitions. Move definition of the SETUP_PC_GUCRC action and related enum to that file, rename them to match format of other new ABI definitions and add simple kernel-doc. Signed-off-by: Michal Wajdeczko Cc: Vinay Belgaumkar Reviewed-by: Vinay Belgaumkar Link: https://patchwork.freedesktop.org/patch/msgid/20240609181931.1724-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/abi/guc_actions_abi.h | 6 ------ drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h | 22 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_guc_pc.c | 7 +++---- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index 79ba98a169f9..43ad4652c2b2 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -128,7 +128,6 @@ enum xe_guc_action { XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B, - XE_GUC_ACTION_SETUP_PC_GUCRC = 0x3004, XE_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, XE_GUC_ACTION_GET_HWCONFIG = 0x4100, XE_GUC_ACTION_REGISTER_CONTEXT = 0x4502, @@ -153,11 +152,6 @@ enum xe_guc_action { XE_GUC_ACTION_LIMIT }; -enum xe_guc_rc_options { - XE_GUCRC_HOST_CONTROL, - XE_GUCRC_FIRMWARE_CONTROL, -}; - enum xe_guc_preempt_options { XE_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4, XE_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8, diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h index c165e26c0976..85abe4f09ae2 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h @@ -246,4 +246,26 @@ struct slpc_shared_data { #define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC (0xffu << 0) #define HOST2GUC_PC_SLPC_REQUEST_MSG_N_EVENT_DATA_N GUC_HXG_REQUEST_MSG_n_DATAn +/** + * DOC: SETUP_PC_GUCRC + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_FAST_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_SETUP_PC_GUCRC` = 0x3004 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **MODE** = GUCRC_HOST_CONTROL(0), GUCRC_FIRMWARE_CONTROL(1) | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_ACTION_HOST2GUC_SETUP_PC_GUCRC 0x3004u +#define GUCRC_HOST_CONTROL 0u +#define GUCRC_FIRMWARE_CONTROL 1u + #endif diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 508f0d39b4ad..b57207bb1f11 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -9,7 +9,6 @@ #include -#include "abi/guc_actions_abi.h" #include "abi/guc_actions_slpc_abi.h" #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" @@ -195,7 +194,7 @@ static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode) { struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; u32 action[] = { - XE_GUC_ACTION_SETUP_PC_GUCRC, + GUC_ACTION_HOST2GUC_SETUP_PC_GUCRC, mode, }; int ret; @@ -758,7 +757,7 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) if (xe->info.skip_guc_pc) return 0; - ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL); + ret = pc_action_setup_gucrc(pc, GUCRC_HOST_CONTROL); if (ret) return ret; @@ -861,7 +860,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) goto out; } - ret = pc_action_setup_gucrc(pc, XE_GUCRC_FIRMWARE_CONTROL); + ret = pc_action_setup_gucrc(pc, GUCRC_FIRMWARE_CONTROL); out: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -- cgit From 7eea2580994b4eb266fd9aa60b1c913be4151925 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 9 Jun 2024 20:19:30 +0200 Subject: drm/xe/guc: Add pc_to_ct() helper We are converting xe_guc_pc to xe_guc_ct few times already. Add simple helper function to avoid code duplication. While at it, simplify other helper functions and fix order of local variables to match the guideline. Signed-off-by: Michal Wajdeczko Cc: Vinay Belgaumkar Reviewed-by: Vinay Belgaumkar Link: https://patchwork.freedesktop.org/patch/msgid/20240609181931.1724-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_pc.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index b57207bb1f11..2eb8584566eb 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -19,6 +19,7 @@ #include "xe_gt_idle.h" #include "xe_gt_sysfs.h" #include "xe_gt_types.h" +#include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_map.h" #include "xe_mmio.h" @@ -66,29 +67,27 @@ * */ -static struct xe_guc * -pc_to_guc(struct xe_guc_pc *pc) +static struct xe_guc *pc_to_guc(struct xe_guc_pc *pc) { return container_of(pc, struct xe_guc, pc); } -static struct xe_device * -pc_to_xe(struct xe_guc_pc *pc) +static struct xe_guc_ct *pc_to_ct(struct xe_guc_pc *pc) { - struct xe_guc *guc = pc_to_guc(pc); - struct xe_gt *gt = container_of(guc, struct xe_gt, uc.guc); + return &pc_to_guc(pc)->ct; +} - return gt_to_xe(gt); +static struct xe_gt *pc_to_gt(struct xe_guc_pc *pc) +{ + return guc_to_gt(pc_to_guc(pc)); } -static struct xe_gt * -pc_to_gt(struct xe_guc_pc *pc) +static struct xe_device *pc_to_xe(struct xe_guc_pc *pc) { - return container_of(pc, struct xe_gt, uc.guc.pc); + return guc_to_xe(pc_to_guc(pc)); } -static struct iosys_map * -pc_to_maps(struct xe_guc_pc *pc) +static struct iosys_map *pc_to_maps(struct xe_guc_pc *pc) { return &pc->bo->vmap; } @@ -129,14 +128,14 @@ static int wait_for_pc_state(struct xe_guc_pc *pc, static int pc_action_reset(struct xe_guc_pc *pc) { - struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; - int ret; + struct xe_guc_ct *ct = pc_to_ct(pc); u32 action[] = { GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, SLPC_EVENT(SLPC_EVENT_RESET, 2), xe_bo_ggtt_addr(pc->bo), 0, }; + int ret; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); if (ret) @@ -147,14 +146,14 @@ static int pc_action_reset(struct xe_guc_pc *pc) static int pc_action_query_task_state(struct xe_guc_pc *pc) { - struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; - int ret; + struct xe_guc_ct *ct = pc_to_ct(pc); u32 action[] = { GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2), xe_bo_ggtt_addr(pc->bo), 0, }; + int ret; if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) return -EAGAIN; @@ -170,14 +169,14 @@ static int pc_action_query_task_state(struct xe_guc_pc *pc) static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value) { - struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; - int ret; + struct xe_guc_ct *ct = pc_to_ct(pc); u32 action[] = { GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), id, value, }; + int ret; if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) return -EAGAIN; @@ -192,7 +191,7 @@ static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value) static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode) { - struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; + struct xe_guc_ct *ct = pc_to_ct(pc); u32 action[] = { GUC_ACTION_HOST2GUC_SETUP_PC_GUCRC, mode, -- cgit From 3438558284c8a219c1a72c2094a05bc12e9fcb8f Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 9 Jun 2024 20:19:31 +0200 Subject: drm/xe/guc: Prefer GT oriented messages in xe_guc_pc If possible, we should prefer xe_gt_err() over drm_err(). While at it, improve and fix some of the error messages. Also drop unnecessary "xe_gt_sysfs.h" include. Signed-off-by: Michal Wajdeczko Cc: Vinay Belgaumkar Reviewed-by: Vinay Belgaumkar Link: https://patchwork.freedesktop.org/patch/msgid/20240609181931.1724-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_pc.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 2eb8584566eb..666a37106bc5 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -17,7 +17,7 @@ #include "xe_force_wake.h" #include "xe_gt.h" #include "xe_gt_idle.h" -#include "xe_gt_sysfs.h" +#include "xe_gt_printk.h" #include "xe_gt_types.h" #include "xe_guc.h" #include "xe_guc_ct.h" @@ -139,7 +139,8 @@ static int pc_action_reset(struct xe_guc_pc *pc) ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); if (ret) - drm_err(&pc_to_xe(pc)->drm, "GuC PC reset: %pe", ERR_PTR(ret)); + xe_gt_err(pc_to_gt(pc), "GuC PC reset failed: %pe\n", + ERR_PTR(ret)); return ret; } @@ -161,8 +162,8 @@ static int pc_action_query_task_state(struct xe_guc_pc *pc) /* Blocking here to ensure the results are ready before reading them */ ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action)); if (ret) - drm_err(&pc_to_xe(pc)->drm, - "GuC PC query task state failed: %pe", ERR_PTR(ret)); + xe_gt_err(pc_to_gt(pc), "GuC PC query task state failed: %pe\n", + ERR_PTR(ret)); return ret; } @@ -183,8 +184,8 @@ static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value) ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); if (ret) - drm_err(&pc_to_xe(pc)->drm, "GuC PC set param failed: %pe", - ERR_PTR(ret)); + xe_gt_err(pc_to_gt(pc), "GuC PC set param[%u]=%u failed: %pe\n", + id, value, ERR_PTR(ret)); return ret; } @@ -200,8 +201,8 @@ static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode) ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); if (ret) - drm_err(&pc_to_xe(pc)->drm, "GuC RC enable failed: %pe", - ERR_PTR(ret)); + xe_gt_err(pc_to_gt(pc), "GuC RC enable mode=%u failed: %pe\n", + mode, ERR_PTR(ret)); return ret; } @@ -844,7 +845,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) goto out; if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) { - drm_err(&pc_to_xe(pc)->drm, "GuC PC Start failed\n"); + xe_gt_err(gt, "GuC PC Start failed\n"); ret = -EIO; goto out; } -- cgit From 3541e19d0d3b30ad099c0c26ba87561aedfbd652 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Fri, 7 Jun 2024 18:27:40 +0530 Subject: drm/xe: Ensure caller uses sole domain for xe_force_wake_assert_held xe_force_wake_assert_held() is designed to confirm a particular forcewake domain's wakefulness; it doesn't verify the wakefulness of multiple domains. Make sure the caller doesn't input multiple domains(XE_FORCEWAKE_ALL) as a parameter. v2 - use domain != XE_FORCEWAKE_ALL (Michal) v3 - Add kernel-doc Cc: Michal Wajdeczko Cc: Rodrigo Vivi Cc: Lucas De Marchi Cc: Badal Nilawar Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240607125741.1407331-1-himal.prasad.ghimiray@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_force_wake.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h index 83cb157da7cc..8cbb04fe0ed9 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.h +++ b/drivers/gpu/drm/xe/xe_force_wake.h @@ -28,10 +28,21 @@ xe_force_wake_ref(struct xe_force_wake *fw, return fw->domains[ffs(domain) - 1].ref; } +/** + * xe_force_wake_assert_held - asserts domain is awake + * @fw : xe_force_wake structure + * @domain: xe_force_wake_domains apart from XE_FORCEWAKE_ALL + * + * xe_force_wake_assert_held() is designed to confirm a particular + * forcewake domain's wakefulness; it doesn't verify the wakefulness of + * multiple domains. Make sure the caller doesn't input multiple + * domains(XE_FORCEWAKE_ALL) as a parameter. + */ static inline void xe_force_wake_assert_held(struct xe_force_wake *fw, enum xe_force_wake_domains domain) { + xe_gt_assert(fw->gt, domain != XE_FORCEWAKE_ALL); xe_gt_assert(fw->gt, fw->awake_domains & domain); } -- cgit From 35feb8dbbca627d118ccc1f2111841788c142703 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Fri, 7 Jun 2024 18:27:41 +0530 Subject: drm/xe: Check valid domain is passed in xe_force_wake_ref Assert domain is not XE_FORCEWAKE_ALL. v2 - use domain != XE_FORCEWAKE_ALL (Michal) v3 - Fix commit description. Cc: Michal Wajdeczko Cc: Rodrigo Vivi Cc: Lucas De Marchi Cc: Badal Nilawar Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240607125741.1407331-2-himal.prasad.ghimiray@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_force_wake.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h index 8cbb04fe0ed9..a2577672f4e3 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.h +++ b/drivers/gpu/drm/xe/xe_force_wake.h @@ -24,7 +24,7 @@ static inline int xe_force_wake_ref(struct xe_force_wake *fw, enum xe_force_wake_domains domain) { - xe_gt_assert(fw->gt, domain); + xe_gt_assert(fw->gt, domain != XE_FORCEWAKE_ALL); return fw->domains[ffs(domain) - 1].ref; } -- cgit From 91524b3a09b4eaa87bd9e073c289d502d6a7c8d0 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 10 Jun 2024 14:04:10 +0200 Subject: drm/xe/guc: Drop unused legacy GuC message ABI definitions Those were copy-pasted from i915 code and never used in Xe driver. Signed-off-by: Michal Wajdeczko Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240610120411.1768-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/abi/guc_messages_abi.h | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/gpu/drm/xe/abi/guc_messages_abi.h b/drivers/gpu/drm/xe/abi/guc_messages_abi.h index 534a39db7772..e32142e3fe41 100644 --- a/drivers/gpu/drm/xe/abi/guc_messages_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_messages_abi.h @@ -220,17 +220,4 @@ #define GUC_HXG_RESPONSE_MSG_0_DATA0 GUC_HXG_MSG_0_AUX #define GUC_HXG_RESPONSE_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD -/* deprecated */ -#define INTEL_GUC_MSG_TYPE_SHIFT 28 -#define INTEL_GUC_MSG_TYPE_MASK (0xF << INTEL_GUC_MSG_TYPE_SHIFT) -#define INTEL_GUC_MSG_DATA_SHIFT 16 -#define INTEL_GUC_MSG_DATA_MASK (0xFFF << INTEL_GUC_MSG_DATA_SHIFT) -#define INTEL_GUC_MSG_CODE_SHIFT 0 -#define INTEL_GUC_MSG_CODE_MASK (0xFFFF << INTEL_GUC_MSG_CODE_SHIFT) - -enum intel_guc_msg_type { - INTEL_GUC_MSG_TYPE_REQUEST = 0x0, - INTEL_GUC_MSG_TYPE_RESPONSE = 0xF, -}; - #endif -- cgit From 4ca1a12a1b3520681cc274a38333d4294ac8050f Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 10 Jun 2024 14:04:11 +0200 Subject: drm/xe/guc: Add kernel-doc for HXG Fast Request We have kernel-doc for all HXG message types but Fast Request. Signed-off-by: Michal Wajdeczko Acked-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240610120411.1768-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/abi/guc_messages_abi.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/gpu/drm/xe/abi/guc_messages_abi.h b/drivers/gpu/drm/xe/abi/guc_messages_abi.h index e32142e3fe41..f6ed4dfd215c 100644 --- a/drivers/gpu/drm/xe/abi/guc_messages_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_messages_abi.h @@ -91,6 +91,34 @@ #define GUC_HXG_REQUEST_MSG_0_ACTION (0xffffu << 0) #define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD +/** + * DOC: HXG Fast Request + * + * The `HXG Request`_ message should be used to initiate asynchronous activity + * for which confirmation or return data is not expected. + * + * If confirmation is required then `HXG Request`_ shall be used instead. + * + * The recipient of this message may only use `HXG Failure`_ message if it was + * unable to accept this request (like invalid data). + * + * Format of `HXG Fast Request`_ message is same as `HXG Request`_ except @TYPE. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN - see `HXG Message`_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = `GUC_HXG_TYPE_FAST_REQUEST`_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 - see `HXG Request`_ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION - see `HXG Request`_ | + * +---+-------+--------------------------------------------------------------+ + * |...| | DATAn - see `HXG Request`_ | + * +---+-------+--------------------------------------------------------------+ + */ + /** * DOC: HXG Event * -- cgit From 24d0d98af1c32ac6452fe04e0b5464a59303b5c9 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Fri, 7 Jun 2024 17:55:28 +0530 Subject: drm/xe/xe2lpm: Fixup Wa_14020756599 This WA needs to be applied to graphics GT when the media version is 2000. Currently, media version 2000 is always paired with graphics version 2004 which will result in writing same register with same bits twice. We can't add optional rule in rtp framework and also writing same register with same bits gives warning. Currently, media version 2000 is always paired with graphics version 2004, so just checking the latter is sufficient. V2(Lucas): - Add more detail in commit message - Improve code comment to follow guideline Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2016 Fixes: 131328aa5699 ("drm/xe/xe2lpm: Add permanent Wa_14020756599") Signed-off-by: Tejas Upadhyay Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240607122528.1048610-1-tejas.upadhyay@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_rtp.c | 5 ----- drivers/gpu/drm/xe/xe_rtp.h | 14 -------------- drivers/gpu/drm/xe/xe_wa.c | 14 +++++++------- 3 files changed, 7 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 01c32a932780..eff1c9c2f5cc 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -324,8 +324,3 @@ bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, return dss >= dss_per_gslice; } -bool xe_rtp_match_when_media2000(const struct xe_gt *gt, - const struct xe_hw_engine *hwe) -{ - return (gt_to_xe(gt))->info.media_verx100 == 2000; -} diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index a32645f5f80b..337b1ef1959c 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -427,18 +427,4 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, const struct xe_hw_engine *hwe); -/* - * xe_rtp_match_when_media2000 - Match when media GT version 2000 - * - * @gt: GT structure - * @hwe: Engine instance - * - * Its one of the case where we need to apply workaround on primary GT - * based on if media GT version 2000 is present. Thus this API will help - * us to match media version 2000. - * - * Returns: true if media GT version 2000, false otherwise. - */ -bool xe_rtp_match_when_media2000(const struct xe_gt *gt, - const struct xe_hw_engine *hwe); #endif diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 26b170a0cdc7..18a4d5dd5a4c 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -677,6 +677,13 @@ static const struct xe_rtp_entry_sr lrc_was[] = { ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP)) }, + /* + * This WA is also needed on primary GT when the media version is 2000. + * Currently, media version 2000 is always paired with graphics version + * 2004, so just checking the latter is sufficient. In the future, media + * version 2000 can be used with some other graphics version where WA + * still needs to be implemented + */ { XE_RTP_NAME("14020756599"), XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) @@ -705,13 +712,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_AUTOSTRIP)) }, - /* Xe2_LPM */ - - { XE_RTP_NAME("14020756599"), - XE_RTP_RULES(ENGINE_CLASS(RENDER), FUNC(xe_rtp_match_when_media2000)), - XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) - }, - {} }; -- cgit From afe12a055d2de6b8c5ef1d4d8d5ca81220f3c31d Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 11 Jun 2024 18:35:33 +0200 Subject: drm/xe: Prefer GT oriented messages in xe_force_wake.c If possible, we should prefer xe_gt_notice() over drm_notice(). Signed-off-by: Michal Wajdeczko Reviewed-by: Rodrigo Vivi Cc: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240611163537.1944-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_force_wake.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index b2d385daff4b..13ddabd90eed 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -10,6 +10,7 @@ #include "regs/xe_gt_regs.h" #include "regs/xe_reg_defs.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_mmio.h" #define XE_FORCE_WAKE_ACK_TIMEOUT_MS 50 @@ -20,12 +21,6 @@ fw_to_gt(struct xe_force_wake *fw) return fw->gt; } -static struct xe_device * -fw_to_xe(struct xe_force_wake *fw) -{ - return gt_to_xe(fw_to_gt(fw)); -} - static void domain_init(struct xe_force_wake_domain *domain, enum xe_force_wake_domain_id id, struct xe_reg reg, struct xe_reg ack) @@ -135,7 +130,6 @@ static int domain_sleep_wait(struct xe_gt *gt, int xe_force_wake_get(struct xe_force_wake *fw, enum xe_force_wake_domains domains) { - struct xe_device *xe = fw_to_xe(fw); struct xe_gt *gt = fw_to_gt(fw); struct xe_force_wake_domain *domain; enum xe_force_wake_domains tmp, woken = 0; @@ -153,8 +147,8 @@ int xe_force_wake_get(struct xe_force_wake *fw, ret = domain_wake_wait(gt, domain); ret2 |= ret; if (ret) - drm_notice(&xe->drm, "Force wake domain (%d) failed to ack wake, ret=%d\n", - domain->id, ret); + xe_gt_notice(gt, "Force wake domain (%d) failed to ack wake, ret=%d\n", + domain->id, ret); } fw->awake_domains |= woken; spin_unlock_irqrestore(&fw->lock, flags); @@ -165,7 +159,6 @@ int xe_force_wake_get(struct xe_force_wake *fw, int xe_force_wake_put(struct xe_force_wake *fw, enum xe_force_wake_domains domains) { - struct xe_device *xe = fw_to_xe(fw); struct xe_gt *gt = fw_to_gt(fw); struct xe_force_wake_domain *domain; enum xe_force_wake_domains tmp, sleep = 0; @@ -183,8 +176,8 @@ int xe_force_wake_put(struct xe_force_wake *fw, ret = domain_sleep_wait(gt, domain); ret2 |= ret; if (ret) - drm_notice(&xe->drm, "Force wake domain (%d) failed to ack sleep, ret=%d\n", - domain->id, ret); + xe_gt_notice(gt, "Force wake domain (%d) failed to ack sleep, ret=%d\n", + domain->id, ret); } fw->awake_domains &= ~sleep; spin_unlock_irqrestore(&fw->lock, flags); -- cgit From d960c58010293cc9bddd81a6a2effde02d6a8fa7 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 11 Jun 2024 18:35:34 +0200 Subject: drm/xe: Kill fw_to_gt() helper It's too simple to deserve separate helper. Suggested-by: Lucas De Marchi Signed-off-by: Michal Wajdeczko Reviewed-by: Francois Dugast Link: https://patchwork.freedesktop.org/patch/msgid/20240611163537.1944-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_force_wake.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 13ddabd90eed..8799a2544648 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -15,12 +15,6 @@ #define XE_FORCE_WAKE_ACK_TIMEOUT_MS 50 -static struct xe_gt * -fw_to_gt(struct xe_force_wake *fw) -{ - return fw->gt; -} - static void domain_init(struct xe_force_wake_domain *domain, enum xe_force_wake_domain_id id, struct xe_reg reg, struct xe_reg ack) @@ -130,7 +124,7 @@ static int domain_sleep_wait(struct xe_gt *gt, int xe_force_wake_get(struct xe_force_wake *fw, enum xe_force_wake_domains domains) { - struct xe_gt *gt = fw_to_gt(fw); + struct xe_gt *gt = fw->gt; struct xe_force_wake_domain *domain; enum xe_force_wake_domains tmp, woken = 0; unsigned long flags; @@ -159,7 +153,7 @@ int xe_force_wake_get(struct xe_force_wake *fw, int xe_force_wake_put(struct xe_force_wake *fw, enum xe_force_wake_domains domains) { - struct xe_gt *gt = fw_to_gt(fw); + struct xe_gt *gt = fw->gt; struct xe_force_wake_domain *domain; enum xe_force_wake_domains tmp, sleep = 0; unsigned long flags; -- cgit From f80437eb25f711a71f3143d3f0ebc597431aebc8 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 11 Jun 2024 18:35:35 +0200 Subject: drm/xe: Include additional info on failed force-wake operation For debug purposes it might be useful to look at the values of the force-wake ack registers in case wake/sleep operations failures. Move xe_gt_notice() from the caller to the helper function, where we have the latest value of force-wake ack register available. Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240611163537.1944-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_force_wake.c | 48 ++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 8799a2544648..afbca81c12dd 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -97,9 +97,17 @@ static void domain_wake(struct xe_gt *gt, struct xe_force_wake_domain *domain) static int domain_wake_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain) { - return xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val, - XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, - NULL, true); + u32 value; + int ret; + + ret = xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val, + XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, + &value, true); + if (ret) + xe_gt_notice(gt, "Force wake domain %d failed to ack wake (%pe) reg[%#x] = %#x\n", + domain->id, ERR_PTR(ret), domain->reg_ack.addr, value); + + return ret; } static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain) @@ -110,9 +118,17 @@ static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain) static int domain_sleep_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain) { - return xe_mmio_wait32(gt, domain->reg_ack, domain->val, 0, - XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, - NULL, true); + u32 value; + int ret; + + ret = xe_mmio_wait32(gt, domain->reg_ack, domain->val, 0, + XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, + &value, true); + if (ret) + xe_gt_notice(gt, "Force wake domain %d failed to ack sleep (%pe) reg[%#x] = %#x\n", + domain->id, ERR_PTR(ret), domain->reg_ack.addr, value); + + return ret; } #define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \ @@ -128,7 +144,7 @@ int xe_force_wake_get(struct xe_force_wake *fw, struct xe_force_wake_domain *domain; enum xe_force_wake_domains tmp, woken = 0; unsigned long flags; - int ret, ret2 = 0; + int ret = 0; spin_lock_irqsave(&fw->lock, flags); for_each_fw_domain_masked(domain, domains, fw, tmp) { @@ -138,16 +154,12 @@ int xe_force_wake_get(struct xe_force_wake *fw, } } for_each_fw_domain_masked(domain, woken, fw, tmp) { - ret = domain_wake_wait(gt, domain); - ret2 |= ret; - if (ret) - xe_gt_notice(gt, "Force wake domain (%d) failed to ack wake, ret=%d\n", - domain->id, ret); + ret |= domain_wake_wait(gt, domain); } fw->awake_domains |= woken; spin_unlock_irqrestore(&fw->lock, flags); - return ret2; + return ret; } int xe_force_wake_put(struct xe_force_wake *fw, @@ -157,7 +169,7 @@ int xe_force_wake_put(struct xe_force_wake *fw, struct xe_force_wake_domain *domain; enum xe_force_wake_domains tmp, sleep = 0; unsigned long flags; - int ret, ret2 = 0; + int ret = 0; spin_lock_irqsave(&fw->lock, flags); for_each_fw_domain_masked(domain, domains, fw, tmp) { @@ -167,14 +179,10 @@ int xe_force_wake_put(struct xe_force_wake *fw, } } for_each_fw_domain_masked(domain, sleep, fw, tmp) { - ret = domain_sleep_wait(gt, domain); - ret2 |= ret; - if (ret) - xe_gt_notice(gt, "Force wake domain (%d) failed to ack sleep, ret=%d\n", - domain->id, ret); + ret |= domain_sleep_wait(gt, domain); } fw->awake_domains &= ~sleep; spin_unlock_irqrestore(&fw->lock, flags); - return ret2; + return ret; } -- cgit From ec8727568299255798da50ffa97c9b479da56543 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 11 Jun 2024 18:35:36 +0200 Subject: drm/xe: Combine common force-wake code into helpers The code of 'control' and 'wait' force-wake operations are very similar for both 'wake' and 'sleep' cases. Add helpers to maximize code reuse. Signed-off-by: Michal Wajdeczko Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240611163537.1944-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_force_wake.c | 44 ++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index afbca81c12dd..468aabd72d6b 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -15,6 +15,11 @@ #define XE_FORCE_WAKE_ACK_TIMEOUT_MS 50 +static const char *str_wake_sleep(bool wake) +{ + return wake ? "wake" : "sleep"; +} + static void domain_init(struct xe_force_wake_domain *domain, enum xe_force_wake_domain_id id, struct xe_reg reg, struct xe_reg ack) @@ -89,46 +94,47 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) FORCEWAKE_ACK_GSC); } -static void domain_wake(struct xe_gt *gt, struct xe_force_wake_domain *domain) +static void __domain_ctl(struct xe_gt *gt, struct xe_force_wake_domain *domain, bool wake) { - xe_mmio_write32(gt, domain->reg_ctl, domain->mask | domain->val); + xe_mmio_write32(gt, domain->reg_ctl, domain->mask | (wake ? domain->val : 0)); } -static int domain_wake_wait(struct xe_gt *gt, - struct xe_force_wake_domain *domain) +static int __domain_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain, bool wake) { u32 value; int ret; - ret = xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val, + ret = xe_mmio_wait32(gt, domain->reg_ack, domain->val, wake ? domain->val : 0, XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, &value, true); if (ret) - xe_gt_notice(gt, "Force wake domain %d failed to ack wake (%pe) reg[%#x] = %#x\n", - domain->id, ERR_PTR(ret), domain->reg_ack.addr, value); + xe_gt_notice(gt, "Force wake domain %d failed to ack %s (%pe) reg[%#x] = %#x\n", + domain->id, str_wake_sleep(wake), ERR_PTR(ret), + domain->reg_ack.addr, value); return ret; } +static void domain_wake(struct xe_gt *gt, struct xe_force_wake_domain *domain) +{ + __domain_ctl(gt, domain, true); +} + +static int domain_wake_wait(struct xe_gt *gt, + struct xe_force_wake_domain *domain) +{ + return __domain_wait(gt, domain, true); +} + static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain) { - xe_mmio_write32(gt, domain->reg_ctl, domain->mask); + __domain_ctl(gt, domain, false); } static int domain_sleep_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain) { - u32 value; - int ret; - - ret = xe_mmio_wait32(gt, domain->reg_ack, domain->val, 0, - XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, - &value, true); - if (ret) - xe_gt_notice(gt, "Force wake domain %d failed to ack sleep (%pe) reg[%#x] = %#x\n", - domain->id, ERR_PTR(ret), domain->reg_ack.addr, value); - - return ret; + return __domain_wait(gt, domain, false); } #define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \ -- cgit From 513ea833c20109d475b4ace9e6a18f6c0c25de4c Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 11 Jun 2024 18:35:37 +0200 Subject: drm/xe/vf: Ignore force-wake requests if VF The control and ack force-wake registers are not accessible for the VF drivers. To avoid changing existing code logic that tracks woken domains, simply ignore all attempts to access control or ack registers if we are running as a VF driver. Signed-off-by: Michal Wajdeczko Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240611163537.1944-6-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_force_wake.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 468aabd72d6b..5db6926120c3 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -12,6 +12,7 @@ #include "xe_gt.h" #include "xe_gt_printk.h" #include "xe_mmio.h" +#include "xe_sriov.h" #define XE_FORCE_WAKE_ACK_TIMEOUT_MS 50 @@ -96,6 +97,9 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) static void __domain_ctl(struct xe_gt *gt, struct xe_force_wake_domain *domain, bool wake) { + if (IS_SRIOV(gt_to_xe(gt))) + return; + xe_mmio_write32(gt, domain->reg_ctl, domain->mask | (wake ? domain->val : 0)); } @@ -104,6 +108,9 @@ static int __domain_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain, u32 value; int ret; + if (IS_SRIOV(gt_to_xe(gt))) + return 0; + ret = xe_mmio_wait32(gt, domain->reg_ack, domain->val, wake ? domain->val : 0, XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, &value, true); -- cgit From 3b9c181bcde8555ca81b2394c2dc2201cefc2dd4 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Tue, 11 Jun 2024 10:47:15 -0700 Subject: devcoredump: Add dev_coredumpm_timeout() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add function to set a custom coredump timeout. For Xe driver usage, current 5 minutes timeout may be too short for users to search and understand what needs to be done to capture coredump to report bugs. We have plans to automate(distribute a udev script) it but at the end will be up to distros and users to pack it so having a option to increase the timeout is a safer option. v2: - replace dev_coredump_timeout_set() by dev_coredumpm_timeout() (Mukesh) v3: - make dev_coredumpm() static inline (Johannes) v5: - rename DEVCOREDUMP_TIMEOUT -> DEVCD_TIMEOUT to avoid redefinition in include/net/bluetooth/coredump.h v6: - fix definition of dev_coredumpm_timeout() when CONFIG_DEV_COREDUMP is disabled Cc: Rodrigo Vivi Cc: Mukesh Ojha Cc: Johannes Berg Cc: Jonathan Cavitt Reviewed-by: Rodrigo Vivi Reviewed-by: Jonathan Cavitt Signed-off-by: José Roberto de Souza Acked-by: Greg Kroah-Hartman Acked-by: Johannes Berg Link: https://patchwork.freedesktop.org/patch/msgid/20240611174716.72660-1-jose.souza@intel.com Signed-off-by: Rodrigo Vivi --- drivers/base/devcoredump.c | 23 ++++++++++---------- include/linux/devcoredump.h | 53 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 54 insertions(+), 22 deletions(-) diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c index 82aeb09b3d1b..c795edad1b96 100644 --- a/drivers/base/devcoredump.c +++ b/drivers/base/devcoredump.c @@ -18,9 +18,6 @@ static struct class devcd_class; /* global disable flag, for security purposes */ static bool devcd_disabled; -/* if data isn't read by userspace after 5 minutes then delete it */ -#define DEVCD_TIMEOUT (HZ * 60 * 5) - struct devcd_entry { struct device devcd_dev; void *data; @@ -328,7 +325,8 @@ void dev_coredump_put(struct device *dev) EXPORT_SYMBOL_GPL(dev_coredump_put); /** - * dev_coredumpm - create device coredump with read/free methods + * dev_coredumpm_timeout - create device coredump with read/free methods with a + * custom timeout. * @dev: the struct device for the crashed device * @owner: the module that contains the read/free functions, use %THIS_MODULE * @data: data cookie for the @read/@free functions @@ -336,17 +334,20 @@ EXPORT_SYMBOL_GPL(dev_coredump_put); * @gfp: allocation flags * @read: function to read from the given buffer * @free: function to free the given buffer + * @timeout: time in jiffies to remove coredump * * Creates a new device coredump for the given device. If a previous one hasn't * been read yet, the new coredump is discarded. The data lifetime is determined * by the device coredump framework and when it is no longer needed the @free * function will be called to free the data. */ -void dev_coredumpm(struct device *dev, struct module *owner, - void *data, size_t datalen, gfp_t gfp, - ssize_t (*read)(char *buffer, loff_t offset, size_t count, - void *data, size_t datalen), - void (*free)(void *data)) +void dev_coredumpm_timeout(struct device *dev, struct module *owner, + void *data, size_t datalen, gfp_t gfp, + ssize_t (*read)(char *buffer, loff_t offset, + size_t count, void *data, + size_t datalen), + void (*free)(void *data), + unsigned long timeout) { static atomic_t devcd_count = ATOMIC_INIT(0); struct devcd_entry *devcd; @@ -403,7 +404,7 @@ void dev_coredumpm(struct device *dev, struct module *owner, dev_set_uevent_suppress(&devcd->devcd_dev, false); kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD); INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); - schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT); + schedule_delayed_work(&devcd->del_wk, timeout); mutex_unlock(&devcd->mutex); return; put_device: @@ -414,7 +415,7 @@ void dev_coredumpm(struct device *dev, struct module *owner, free: free(data); } -EXPORT_SYMBOL_GPL(dev_coredumpm); +EXPORT_SYMBOL_GPL(dev_coredumpm_timeout); /** * dev_coredumpsg - create device coredump that uses scatterlist as data diff --git a/include/linux/devcoredump.h b/include/linux/devcoredump.h index c8f7eb6cc191..377892604ff4 100644 --- a/include/linux/devcoredump.h +++ b/include/linux/devcoredump.h @@ -12,6 +12,9 @@ #include #include +/* if data isn't read by userspace after 5 minutes then delete it */ +#define DEVCD_TIMEOUT (HZ * 60 * 5) + /* * _devcd_free_sgtable - free all the memory of the given scatterlist table * (i.e. both pages and scatterlist instances) @@ -50,16 +53,17 @@ static inline void _devcd_free_sgtable(struct scatterlist *table) kfree(delete_iter); } - #ifdef CONFIG_DEV_COREDUMP void dev_coredumpv(struct device *dev, void *data, size_t datalen, gfp_t gfp); -void dev_coredumpm(struct device *dev, struct module *owner, - void *data, size_t datalen, gfp_t gfp, - ssize_t (*read)(char *buffer, loff_t offset, size_t count, - void *data, size_t datalen), - void (*free)(void *data)); +void dev_coredumpm_timeout(struct device *dev, struct module *owner, + void *data, size_t datalen, gfp_t gfp, + ssize_t (*read)(char *buffer, loff_t offset, + size_t count, void *data, + size_t datalen), + void (*free)(void *data), + unsigned long timeout); void dev_coredumpsg(struct device *dev, struct scatterlist *table, size_t datalen, gfp_t gfp); @@ -73,11 +77,13 @@ static inline void dev_coredumpv(struct device *dev, void *data, } static inline void -dev_coredumpm(struct device *dev, struct module *owner, - void *data, size_t datalen, gfp_t gfp, - ssize_t (*read)(char *buffer, loff_t offset, size_t count, - void *data, size_t datalen), - void (*free)(void *data)) +dev_coredumpm_timeout(struct device *dev, struct module *owner, + void *data, size_t datalen, gfp_t gfp, + ssize_t (*read)(char *buffer, loff_t offset, + size_t count, void *data, + size_t datalen), + void (*free)(void *data), + unsigned long timeout) { free(data); } @@ -92,4 +98,29 @@ static inline void dev_coredump_put(struct device *dev) } #endif /* CONFIG_DEV_COREDUMP */ +/** + * dev_coredumpm - create device coredump with read/free methods + * @dev: the struct device for the crashed device + * @owner: the module that contains the read/free functions, use %THIS_MODULE + * @data: data cookie for the @read/@free functions + * @datalen: length of the data + * @gfp: allocation flags + * @read: function to read from the given buffer + * @free: function to free the given buffer + * + * Creates a new device coredump for the given device. If a previous one hasn't + * been read yet, the new coredump is discarded. The data lifetime is determined + * by the device coredump framework and when it is no longer needed the @free + * function will be called to free the data. + */ +static inline void dev_coredumpm(struct device *dev, struct module *owner, + void *data, size_t datalen, gfp_t gfp, + ssize_t (*read)(char *buffer, loff_t offset, size_t count, + void *data, size_t datalen), + void (*free)(void *data)) +{ + dev_coredumpm_timeout(dev, owner, data, datalen, gfp, read, free, + DEVCD_TIMEOUT); +} + #endif /* __DEVCOREDUMP_H */ -- cgit From ec3ac2c8d941dad959dcdc760aa43bc45785d346 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Tue, 11 Jun 2024 10:47:16 -0700 Subject: drm/xe: Increase devcoredump timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 5 minutes is too short for a regular user to search and understand what he needs to do to report capture devcoredump and report a bug to us, so here increasing this timeout to 1 hour. Cc: Rodrigo Vivi Cc: Jonathan Cavitt Reviewed-by: Rodrigo Vivi Reviewed-by: Jonathan Cavitt Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240611174716.72660-2-jose.souza@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_devcoredump.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index d7f2d19a77c1..62c2b10fbf1d 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -53,6 +53,9 @@ #ifdef CONFIG_DEV_COREDUMP +/* 1 hour timeout */ +#define XE_COREDUMP_TIMEOUT_JIFFIES (60 * 60 * HZ) + static struct xe_device *coredump_to_xe(const struct xe_devcoredump *coredump) { return container_of(coredump, struct xe_device, devcoredump); @@ -247,8 +250,9 @@ void xe_devcoredump(struct xe_sched_job *job) drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n", xe->drm.primary->index); - dev_coredumpm(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL, - xe_devcoredump_read, xe_devcoredump_free); + dev_coredumpm_timeout(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL, + xe_devcoredump_read, xe_devcoredump_free, + XE_COREDUMP_TIMEOUT_JIFFIES); } static void xe_driver_devcoredump_fini(void *arg) -- cgit From e46d3f813abd2383881c66d21ba04cee9fbdf3a9 Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Fri, 7 Jun 2024 11:29:38 -0700 Subject: drm/xe/trace: Extract bo, vm, vma traces xe_trace.h is starting to get over crowded. Move the traces related to bo, vm, vma's to its own file. v2: Update year in License(Gustavo) Reviewed-by: Gustavo Sousa Suggested-by: Jani Nikula Cc: Lucas De Marchi Signed-off-by: Radhakrishna Sripada Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240607182943.3572524-2-radhakrishna.sripada@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_gt_pagefault.c | 2 +- drivers/gpu/drm/xe/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_trace.h | 212 ------------------------------- drivers/gpu/drm/xe/xe_trace_bo.c | 9 ++ drivers/gpu/drm/xe/xe_trace_bo.h | 236 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_vm.c | 2 +- 8 files changed, 250 insertions(+), 216 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_trace_bo.c create mode 100644 drivers/gpu/drm/xe/xe_trace_bo.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 70738d1f85e9..59c8513cb130 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -112,6 +112,7 @@ xe-y += xe_bb.o \ xe_tile.o \ xe_tile_sysfs.o \ xe_trace.o \ + xe_trace_bo.o \ xe_ttm_sys_mgr.o \ xe_ttm_stolen_mgr.o \ xe_ttm_vram_mgr.o \ diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 2bae01ce4e5b..74294f1b05bc 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -25,7 +25,7 @@ #include "xe_pm.h" #include "xe_preempt_fence.h" #include "xe_res_cursor.h" -#include "xe_trace.h" +#include "xe_trace_bo.h" #include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index eaf68f0135c1..9292d5468868 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -19,7 +19,7 @@ #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_migrate.h" -#include "xe_trace.h" +#include "xe_trace_bo.h" #include "xe_vm.h" struct pagefault { diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 7e3fb33110d9..ddd50c3f7208 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -32,7 +32,7 @@ #include "xe_res_cursor.h" #include "xe_sched_job.h" #include "xe_sync.h" -#include "xe_trace.h" +#include "xe_trace_bo.h" #include "xe_vm.h" /** diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index e4cba64474e6..ba9cee9e1466 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -12,8 +12,6 @@ #include #include -#include "xe_bo.h" -#include "xe_bo_types.h" #include "xe_exec_queue_types.h" #include "xe_gpu_scheduler_types.h" #include "xe_gt_tlb_invalidation_types.h" @@ -76,58 +74,6 @@ DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_timeout, TP_ARGS(fence) ); -DECLARE_EVENT_CLASS(xe_bo, - TP_PROTO(struct xe_bo *bo), - TP_ARGS(bo), - - TP_STRUCT__entry( - __field(size_t, size) - __field(u32, flags) - __field(struct xe_vm *, vm) - ), - - TP_fast_assign( - __entry->size = bo->size; - __entry->flags = bo->flags; - __entry->vm = bo->vm; - ), - - TP_printk("size=%zu, flags=0x%02x, vm=%p", - __entry->size, __entry->flags, __entry->vm) -); - -DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, - TP_PROTO(struct xe_bo *bo), - TP_ARGS(bo) -); - -TRACE_EVENT(xe_bo_move, - TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement, - bool move_lacks_source), - TP_ARGS(bo, new_placement, old_placement, move_lacks_source), - TP_STRUCT__entry( - __field(struct xe_bo *, bo) - __field(size_t, size) - __field(u32, new_placement) - __field(u32, old_placement) - __array(char, device_id, 12) - __field(bool, move_lacks_source) - ), - - TP_fast_assign( - __entry->bo = bo; - __entry->size = bo->size; - __entry->new_placement = new_placement; - __entry->old_placement = old_placement; - strscpy(__entry->device_id, dev_name(xe_bo_device(__entry->bo)->drm.dev), 12); - __entry->move_lacks_source = move_lacks_source; - ), - TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s", - __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size, - xe_mem_type_to_name[__entry->old_placement], - xe_mem_type_to_name[__entry->new_placement], __entry->device_id) -); - DECLARE_EVENT_CLASS(xe_exec_queue, TP_PROTO(struct xe_exec_queue *q), TP_ARGS(q), @@ -386,164 +332,6 @@ DEFINE_EVENT(xe_hw_fence, xe_hw_fence_free, TP_ARGS(fence) ); -DECLARE_EVENT_CLASS(xe_vma, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma), - - TP_STRUCT__entry( - __field(struct xe_vma *, vma) - __field(u32, asid) - __field(u64, start) - __field(u64, end) - __field(u64, ptr) - ), - - TP_fast_assign( - __entry->vma = vma; - __entry->asid = xe_vma_vm(vma)->usm.asid; - __entry->start = xe_vma_start(vma); - __entry->end = xe_vma_end(vma) - 1; - __entry->ptr = xe_vma_userptr(vma); - ), - - TP_printk("vma=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx,", - __entry->vma, __entry->asid, __entry->start, - __entry->end, __entry->ptr) -) - -DEFINE_EVENT(xe_vma, xe_vma_flush, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_pagefault, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_acc, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_fail, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_bind, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_pf_bind, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_unbind, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_worker, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_exec, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_rebind_worker, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_rebind_exec, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_invalidate, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_evict, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate_complete, - TP_PROTO(struct xe_vma *vma), - TP_ARGS(vma) -); - -DECLARE_EVENT_CLASS(xe_vm, - TP_PROTO(struct xe_vm *vm), - TP_ARGS(vm), - - TP_STRUCT__entry( - __field(struct xe_vm *, vm) - __field(u32, asid) - ), - - TP_fast_assign( - __entry->vm = vm; - __entry->asid = vm->usm.asid; - ), - - TP_printk("vm=%p, asid=0x%05x", __entry->vm, - __entry->asid) -); - -DEFINE_EVENT(xe_vm, xe_vm_kill, - TP_PROTO(struct xe_vm *vm), - TP_ARGS(vm) -); - -DEFINE_EVENT(xe_vm, xe_vm_create, - TP_PROTO(struct xe_vm *vm), - TP_ARGS(vm) -); - -DEFINE_EVENT(xe_vm, xe_vm_free, - TP_PROTO(struct xe_vm *vm), - TP_ARGS(vm) -); - -DEFINE_EVENT(xe_vm, xe_vm_cpu_bind, - TP_PROTO(struct xe_vm *vm), - TP_ARGS(vm) -); - -DEFINE_EVENT(xe_vm, xe_vm_restart, - TP_PROTO(struct xe_vm *vm), - TP_ARGS(vm) -); - -DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_enter, - TP_PROTO(struct xe_vm *vm), - TP_ARGS(vm) -); - -DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_retry, - TP_PROTO(struct xe_vm *vm), - TP_ARGS(vm) -); - -DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit, - TP_PROTO(struct xe_vm *vm), - TP_ARGS(vm) -); - /* GuC */ DECLARE_EVENT_CLASS(xe_guc_ct_flow_control, TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), diff --git a/drivers/gpu/drm/xe/xe_trace_bo.c b/drivers/gpu/drm/xe/xe_trace_bo.c new file mode 100644 index 000000000000..6d5e66ce4c50 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_trace_bo.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "xe_trace_bo.h" +#endif diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h new file mode 100644 index 000000000000..c8bd746d9d41 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -0,0 +1,236 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright © 2024 Intel Corporation + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM xe + +#if !defined(_XE_TRACE_BO_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _XE_TRACE_BO_H_ + +#include +#include + +#include "xe_bo.h" +#include "xe_bo_types.h" +#include "xe_vm.h" + +DECLARE_EVENT_CLASS(xe_bo, + TP_PROTO(struct xe_bo *bo), + TP_ARGS(bo), + + TP_STRUCT__entry( + __field(size_t, size) + __field(u32, flags) + __field(struct xe_vm *, vm) + ), + + TP_fast_assign( + __entry->size = bo->size; + __entry->flags = bo->flags; + __entry->vm = bo->vm; + ), + + TP_printk("size=%zu, flags=0x%02x, vm=%p", + __entry->size, __entry->flags, __entry->vm) +); + +DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, + TP_PROTO(struct xe_bo *bo), + TP_ARGS(bo) +); + +TRACE_EVENT(xe_bo_move, + TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement, + bool move_lacks_source), + TP_ARGS(bo, new_placement, old_placement, move_lacks_source), + TP_STRUCT__entry( + __field(struct xe_bo *, bo) + __field(size_t, size) + __field(u32, new_placement) + __field(u32, old_placement) + __array(char, device_id, 12) + __field(bool, move_lacks_source) + ), + + TP_fast_assign( + __entry->bo = bo; + __entry->size = bo->size; + __entry->new_placement = new_placement; + __entry->old_placement = old_placement; + strscpy(__entry->device_id, dev_name(xe_bo_device(__entry->bo)->drm.dev), 12); + __entry->move_lacks_source = move_lacks_source; + ), + TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s", + __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size, + xe_mem_type_to_name[__entry->old_placement], + xe_mem_type_to_name[__entry->new_placement], __entry->device_id) +); + +DECLARE_EVENT_CLASS(xe_vma, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma), + + TP_STRUCT__entry( + __field(struct xe_vma *, vma) + __field(u32, asid) + __field(u64, start) + __field(u64, end) + __field(u64, ptr) + ), + + TP_fast_assign( + __entry->vma = vma; + __entry->asid = xe_vma_vm(vma)->usm.asid; + __entry->start = xe_vma_start(vma); + __entry->end = xe_vma_end(vma) - 1; + __entry->ptr = xe_vma_userptr(vma); + ), + + TP_printk("vma=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx,", + __entry->vma, __entry->asid, __entry->start, + __entry->end, __entry->ptr) +) + +DEFINE_EVENT(xe_vma, xe_vma_flush, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_pagefault, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_acc, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_fail, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_bind, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_pf_bind, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_unbind, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_worker, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_exec, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_rebind_worker, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_rebind_exec, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_invalidate, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_evict, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate_complete, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DECLARE_EVENT_CLASS(xe_vm, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm), + + TP_STRUCT__entry( + __field(struct xe_vm *, vm) + __field(u32, asid) + ), + + TP_fast_assign( + __entry->vm = vm; + __entry->asid = vm->usm.asid; + ), + + TP_printk("vm=%p, asid=0x%05x", __entry->vm, + __entry->asid) +); + +DEFINE_EVENT(xe_vm, xe_vm_kill, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_create, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_free, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_cpu_bind, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_restart, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_enter, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_retry, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +#endif + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe +#define TRACE_INCLUDE_FILE xe_trace_bo +#include diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 99bf7412475c..ffda487653d8 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -36,7 +36,7 @@ #include "xe_pt.h" #include "xe_res_cursor.h" #include "xe_sync.h" -#include "xe_trace.h" +#include "xe_trace_bo.h" #include "xe_wa.h" #include "xe_hmm.h" -- cgit From 6a04e1fc36ff762312088c2ca819ada3b9f7ab33 Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Fri, 7 Jun 2024 11:29:39 -0700 Subject: drm/xe/trace: Extract guc related traces xe_trace.h is starting to get over crowded. Move the traces related to guc to its own file. v2: Update year in License(Gustavo) Reviewed-by: Gustavo Sousa Suggested-by: Jani Nikula Cc: Lucas De Marchi Signed-off-by: Radhakrishna Sripada Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240607182943.3572524-3-radhakrishna.sripada@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_guc_ct.c | 2 +- drivers/gpu/drm/xe/xe_trace.h | 80 ----------------------------- drivers/gpu/drm/xe/xe_trace_guc.c | 9 ++++ drivers/gpu/drm/xe/xe_trace_guc.h | 103 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 114 insertions(+), 81 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_trace_guc.c create mode 100644 drivers/gpu/drm/xe/xe_trace_guc.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 59c8513cb130..cbf961b90237 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -113,6 +113,7 @@ xe-y += xe_bb.o \ xe_tile_sysfs.o \ xe_trace.o \ xe_trace_bo.o \ + xe_trace_guc.o \ xe_ttm_sys_mgr.o \ xe_ttm_stolen_mgr.o \ xe_ttm_vram_mgr.o \ diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 936b63483e96..aef400133720 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -29,7 +29,7 @@ #include "xe_guc_submit.h" #include "xe_map.h" #include "xe_pm.h" -#include "xe_trace.h" +#include "xe_trace_guc.h" /* Used when a CT send wants to block and / or receive data */ struct g2h_fence { diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index ba9cee9e1466..81128c0f31e6 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -332,86 +332,6 @@ DEFINE_EVENT(xe_hw_fence, xe_hw_fence_free, TP_ARGS(fence) ); -/* GuC */ -DECLARE_EVENT_CLASS(xe_guc_ct_flow_control, - TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), - TP_ARGS(_head, _tail, size, space, len), - - TP_STRUCT__entry( - __field(u32, _head) - __field(u32, _tail) - __field(u32, size) - __field(u32, space) - __field(u32, len) - ), - - TP_fast_assign( - __entry->_head = _head; - __entry->_tail = _tail; - __entry->size = size; - __entry->space = space; - __entry->len = len; - ), - - TP_printk("h2g flow control: head=%u, tail=%u, size=%u, space=%u, len=%u", - __entry->_head, __entry->_tail, __entry->size, - __entry->space, __entry->len) -); - -DEFINE_EVENT(xe_guc_ct_flow_control, xe_guc_ct_h2g_flow_control, - TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), - TP_ARGS(_head, _tail, size, space, len) -); - -DEFINE_EVENT_PRINT(xe_guc_ct_flow_control, xe_guc_ct_g2h_flow_control, - TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), - TP_ARGS(_head, _tail, size, space, len), - - TP_printk("g2h flow control: head=%u, tail=%u, size=%u, space=%u, len=%u", - __entry->_head, __entry->_tail, __entry->size, - __entry->space, __entry->len) -); - -DECLARE_EVENT_CLASS(xe_guc_ctb, - TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), - TP_ARGS(gt_id, action, len, _head, tail), - - TP_STRUCT__entry( - __field(u8, gt_id) - __field(u32, action) - __field(u32, len) - __field(u32, tail) - __field(u32, _head) - ), - - TP_fast_assign( - __entry->gt_id = gt_id; - __entry->action = action; - __entry->len = len; - __entry->tail = tail; - __entry->_head = _head; - ), - - TP_printk("gt%d: H2G CTB: action=0x%x, len=%d, tail=%d, head=%d\n", - __entry->gt_id, __entry->action, __entry->len, - __entry->tail, __entry->_head) -); - -DEFINE_EVENT(xe_guc_ctb, xe_guc_ctb_h2g, - TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), - TP_ARGS(gt_id, action, len, _head, tail) -); - -DEFINE_EVENT_PRINT(xe_guc_ctb, xe_guc_ctb_g2h, - TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), - TP_ARGS(gt_id, action, len, _head, tail), - - TP_printk("gt%d: G2H CTB: action=0x%x, len=%d, tail=%d, head=%d\n", - __entry->gt_id, __entry->action, __entry->len, - __entry->tail, __entry->_head) - -); - #endif /* This part must be outside protection */ diff --git a/drivers/gpu/drm/xe/xe_trace_guc.c b/drivers/gpu/drm/xe/xe_trace_guc.c new file mode 100644 index 000000000000..fcdf6888ff2f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_trace_guc.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "xe_trace_guc.h" +#endif diff --git a/drivers/gpu/drm/xe/xe_trace_guc.h b/drivers/gpu/drm/xe/xe_trace_guc.h new file mode 100644 index 000000000000..d6830ff21822 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_trace_guc.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright © 2024 Intel Corporation + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM xe + +#if !defined(_XE_TRACE_GUC_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _XE_TRACE_GUC_H_ + +#include +#include + +#include "xe_guc_exec_queue_types.h" + +DECLARE_EVENT_CLASS(xe_guc_ct_flow_control, + TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), + TP_ARGS(_head, _tail, size, space, len), + + TP_STRUCT__entry( + __field(u32, _head) + __field(u32, _tail) + __field(u32, size) + __field(u32, space) + __field(u32, len) + ), + + TP_fast_assign( + __entry->_head = _head; + __entry->_tail = _tail; + __entry->size = size; + __entry->space = space; + __entry->len = len; + ), + + TP_printk("h2g flow control: head=%u, tail=%u, size=%u, space=%u, len=%u", + __entry->_head, __entry->_tail, __entry->size, + __entry->space, __entry->len) +); + +DEFINE_EVENT(xe_guc_ct_flow_control, xe_guc_ct_h2g_flow_control, + TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), + TP_ARGS(_head, _tail, size, space, len) +); + +DEFINE_EVENT_PRINT(xe_guc_ct_flow_control, xe_guc_ct_g2h_flow_control, + TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), + TP_ARGS(_head, _tail, size, space, len), + + TP_printk("g2h flow control: head=%u, tail=%u, size=%u, space=%u, len=%u", + __entry->_head, __entry->_tail, __entry->size, + __entry->space, __entry->len) +); + +DECLARE_EVENT_CLASS(xe_guc_ctb, + TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), + TP_ARGS(gt_id, action, len, _head, tail), + + TP_STRUCT__entry( + __field(u8, gt_id) + __field(u32, action) + __field(u32, len) + __field(u32, tail) + __field(u32, _head) + ), + + TP_fast_assign( + __entry->gt_id = gt_id; + __entry->action = action; + __entry->len = len; + __entry->tail = tail; + __entry->_head = _head; + ), + + TP_printk("gt%d: H2G CTB: action=0x%x, len=%d, tail=%d, head=%d\n", + __entry->gt_id, __entry->action, __entry->len, + __entry->tail, __entry->_head) +); + +DEFINE_EVENT(xe_guc_ctb, xe_guc_ctb_h2g, + TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), + TP_ARGS(gt_id, action, len, _head, tail) +); + +DEFINE_EVENT_PRINT(xe_guc_ctb, xe_guc_ctb_g2h, + TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), + TP_ARGS(gt_id, action, len, _head, tail), + + TP_printk("gt%d: G2H CTB: action=0x%x, len=%d, tail=%d, head=%d\n", + __entry->gt_id, __entry->action, __entry->len, + __entry->tail, __entry->_head) + +); + +#endif + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe +#define TRACE_INCLUDE_FILE xe_trace_guc +#include -- cgit From 3432f26efa45bed7bc61f534ca9d2c0afdaf5632 Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Fri, 7 Jun 2024 11:29:40 -0700 Subject: drm/xe/trace: Print device_id in xe_trace_bo events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In multi-gpu environments it is important to know the device bo/vm belongs to. The tracing information includes the device_id to indicate the device the event is associated with. v2: Use variable sized variant to display dev name(Gustavo) v3: Pass single argument to __assign_str to fix kunit error v4: Minor cleanups(Gustavo) Suggested-by: Ville Syrjälä Cc: Lucas De Marchi Reviewed-by: Gustavo Sousa Signed-off-by: Radhakrishna Sripada Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240607182943.3572524-4-radhakrishna.sripada@intel.com --- drivers/gpu/drm/xe/xe_trace_bo.h | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h index c8bd746d9d41..f39f09ed3495 100644 --- a/drivers/gpu/drm/xe/xe_trace_bo.h +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -16,24 +16,31 @@ #include "xe_bo_types.h" #include "xe_vm.h" +#define __dev_name_bo(bo) dev_name(xe_bo_device(bo)->drm.dev) +#define __dev_name_vm(vm) dev_name((vm)->xe->drm.dev) +#define __dev_name_vma(vma) __dev_name_vm(xe_vma_vm(vma)) + DECLARE_EVENT_CLASS(xe_bo, TP_PROTO(struct xe_bo *bo), TP_ARGS(bo), TP_STRUCT__entry( + __string(dev, __dev_name_bo(bo)) __field(size_t, size) __field(u32, flags) __field(struct xe_vm *, vm) ), TP_fast_assign( + __assign_str(dev); __entry->size = bo->size; __entry->flags = bo->flags; __entry->vm = bo->vm; ), - TP_printk("size=%zu, flags=0x%02x, vm=%p", - __entry->size, __entry->flags, __entry->vm) + TP_printk("dev=%s, size=%zu, flags=0x%02x, vm=%p", + __get_str(dev), __entry->size, + __entry->flags, __entry->vm) ); DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, @@ -50,7 +57,7 @@ TRACE_EVENT(xe_bo_move, __field(size_t, size) __field(u32, new_placement) __field(u32, old_placement) - __array(char, device_id, 12) + __string(device_id, __dev_name_bo(bo)) __field(bool, move_lacks_source) ), @@ -59,13 +66,13 @@ TRACE_EVENT(xe_bo_move, __entry->size = bo->size; __entry->new_placement = new_placement; __entry->old_placement = old_placement; - strscpy(__entry->device_id, dev_name(xe_bo_device(__entry->bo)->drm.dev), 12); + __assign_str(device_id); __entry->move_lacks_source = move_lacks_source; ), TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s", __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size, xe_mem_type_to_name[__entry->old_placement], - xe_mem_type_to_name[__entry->new_placement], __entry->device_id) + xe_mem_type_to_name[__entry->new_placement], __get_str(device_id)) ); DECLARE_EVENT_CLASS(xe_vma, @@ -73,6 +80,7 @@ DECLARE_EVENT_CLASS(xe_vma, TP_ARGS(vma), TP_STRUCT__entry( + __string(dev, __dev_name_vma(vma)) __field(struct xe_vma *, vma) __field(u32, asid) __field(u64, start) @@ -81,6 +89,7 @@ DECLARE_EVENT_CLASS(xe_vma, ), TP_fast_assign( + __assign_str(dev); __entry->vma = vma; __entry->asid = xe_vma_vm(vma)->usm.asid; __entry->start = xe_vma_start(vma); @@ -88,8 +97,8 @@ DECLARE_EVENT_CLASS(xe_vma, __entry->ptr = xe_vma_userptr(vma); ), - TP_printk("vma=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx,", - __entry->vma, __entry->asid, __entry->start, + TP_printk("dev=%s, vma=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx,", + __get_str(dev), __entry->vma, __entry->asid, __entry->start, __entry->end, __entry->ptr) ) @@ -173,17 +182,19 @@ DECLARE_EVENT_CLASS(xe_vm, TP_ARGS(vm), TP_STRUCT__entry( + __string(dev, __dev_name_vm(vm)) __field(struct xe_vm *, vm) __field(u32, asid) ), TP_fast_assign( + __assign_str(dev); __entry->vm = vm; __entry->asid = vm->usm.asid; ), - TP_printk("vm=%p, asid=0x%05x", __entry->vm, - __entry->asid) + TP_printk("dev=%s, vm=%p, asid=0x%05x", __get_str(dev), + __entry->vm, __entry->asid) ); DEFINE_EVENT(xe_vm, xe_vm_kill, -- cgit From 3cba2f1d3f18939362e082ea8b9bd8abfa735b84 Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Fri, 7 Jun 2024 11:29:41 -0700 Subject: drm/xe/trace: Print device_id in xe_trace_guc events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In multi-gpu environments it is important to know the device guc txn belongs to. The tracing information includes the device_id to indicate the device the event is associated with. v2: Use variable sized variant to display dev name(Gustavo) v3: Pass single argument to __assign_str to fix kunit error v4: Minor formatting tweaks Suggested-by: Ville Syrjälä Cc: Lucas De Marchi Reviewed-by: Gustavo Sousa Signed-off-by: Radhakrishna Sripada Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240607182943.3572524-5-radhakrishna.sripada@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 11 ++++----- drivers/gpu/drm/xe/xe_trace_guc.h | 47 ++++++++++++++++++++++----------------- 2 files changed, 33 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index aef400133720..b4137fe195a4 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -571,7 +571,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, /* Update descriptor */ desc_write(xe, h2g, tail, h2g->info.tail); - trace_xe_guc_ctb_h2g(gt->info.id, *(action - 1), full_len, + trace_xe_guc_ctb_h2g(xe, gt->info.id, *(action - 1), full_len, desc_read(xe, h2g, head), h2g->info.tail); return 0; @@ -684,6 +684,7 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence) { + struct xe_device *xe = ct_to_xe(ct); struct xe_gt *gt = ct_to_gt(ct); struct drm_printer p = xe_gt_info_printer(gt); unsigned int sleep_period_ms = 1; @@ -711,7 +712,7 @@ try_again: if (sleep_period_ms == 1024) goto broken; - trace_xe_guc_ct_h2g_flow_control(h2g->info.head, h2g->info.tail, + trace_xe_guc_ct_h2g_flow_control(xe, h2g->info.head, h2g->info.tail, h2g->info.size, h2g->info.space, len + GUC_CTB_HDR_LEN); @@ -723,7 +724,7 @@ try_again: struct xe_device *xe = ct_to_xe(ct); struct guc_ctb *g2h = &ct->ctbs.g2h; - trace_xe_guc_ct_g2h_flow_control(g2h->info.head, + trace_xe_guc_ct_g2h_flow_control(xe, g2h->info.head, desc_read(xe, g2h, tail), g2h->info.size, g2h->info.space, @@ -1213,8 +1214,8 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) g2h->info.head = (head + avail) % g2h->info.size; desc_write(xe, g2h, head, g2h->info.head); - trace_xe_guc_ctb_g2h(ct_to_gt(ct)->info.id, action, len, - g2h->info.head, tail); + trace_xe_guc_ctb_g2h(xe, ct_to_gt(ct)->info.id, + action, len, g2h->info.head, tail); return len; } diff --git a/drivers/gpu/drm/xe/xe_trace_guc.h b/drivers/gpu/drm/xe/xe_trace_guc.h index d6830ff21822..23abdd55dc62 100644 --- a/drivers/gpu/drm/xe/xe_trace_guc.h +++ b/drivers/gpu/drm/xe/xe_trace_guc.h @@ -12,13 +12,17 @@ #include #include +#include "xe_device_types.h" #include "xe_guc_exec_queue_types.h" +#define __dev_name_xe(xe) dev_name((xe)->drm.dev) + DECLARE_EVENT_CLASS(xe_guc_ct_flow_control, - TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), - TP_ARGS(_head, _tail, size, space, len), + TP_PROTO(struct xe_device *xe, u32 _head, u32 _tail, u32 size, u32 space, u32 len), + TP_ARGS(xe, _head, _tail, size, space, len), TP_STRUCT__entry( + __string(dev, __dev_name_xe(xe)) __field(u32, _head) __field(u32, _tail) __field(u32, size) @@ -27,6 +31,7 @@ DECLARE_EVENT_CLASS(xe_guc_ct_flow_control, ), TP_fast_assign( + __assign_str(dev); __entry->_head = _head; __entry->_tail = _tail; __entry->size = size; @@ -34,30 +39,31 @@ DECLARE_EVENT_CLASS(xe_guc_ct_flow_control, __entry->len = len; ), - TP_printk("h2g flow control: head=%u, tail=%u, size=%u, space=%u, len=%u", - __entry->_head, __entry->_tail, __entry->size, + TP_printk("h2g flow control: dev=%s, head=%u, tail=%u, size=%u, space=%u, len=%u", + __get_str(dev), __entry->_head, __entry->_tail, __entry->size, __entry->space, __entry->len) ); DEFINE_EVENT(xe_guc_ct_flow_control, xe_guc_ct_h2g_flow_control, - TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), - TP_ARGS(_head, _tail, size, space, len) + TP_PROTO(struct xe_device *xe, u32 _head, u32 _tail, u32 size, u32 space, u32 len), + TP_ARGS(xe, _head, _tail, size, space, len) ); DEFINE_EVENT_PRINT(xe_guc_ct_flow_control, xe_guc_ct_g2h_flow_control, - TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), - TP_ARGS(_head, _tail, size, space, len), + TP_PROTO(struct xe_device *xe, u32 _head, u32 _tail, u32 size, u32 space, u32 len), + TP_ARGS(xe, _head, _tail, size, space, len), - TP_printk("g2h flow control: head=%u, tail=%u, size=%u, space=%u, len=%u", - __entry->_head, __entry->_tail, __entry->size, + TP_printk("g2h flow control: dev=%s, head=%u, tail=%u, size=%u, space=%u, len=%u", + __get_str(dev), __entry->_head, __entry->_tail, __entry->size, __entry->space, __entry->len) ); DECLARE_EVENT_CLASS(xe_guc_ctb, - TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), - TP_ARGS(gt_id, action, len, _head, tail), + TP_PROTO(struct xe_device *xe, u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), + TP_ARGS(xe, gt_id, action, len, _head, tail), TP_STRUCT__entry( + __string(dev, __dev_name_xe(xe)) __field(u8, gt_id) __field(u32, action) __field(u32, len) @@ -66,6 +72,7 @@ DECLARE_EVENT_CLASS(xe_guc_ctb, ), TP_fast_assign( + __assign_str(dev); __entry->gt_id = gt_id; __entry->action = action; __entry->len = len; @@ -73,22 +80,22 @@ DECLARE_EVENT_CLASS(xe_guc_ctb, __entry->_head = _head; ), - TP_printk("gt%d: H2G CTB: action=0x%x, len=%d, tail=%d, head=%d\n", - __entry->gt_id, __entry->action, __entry->len, + TP_printk("H2G CTB: dev=%s, gt%d: action=0x%x, len=%d, tail=%d, head=%d\n", + __get_str(dev), __entry->gt_id, __entry->action, __entry->len, __entry->tail, __entry->_head) ); DEFINE_EVENT(xe_guc_ctb, xe_guc_ctb_h2g, - TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), - TP_ARGS(gt_id, action, len, _head, tail) + TP_PROTO(struct xe_device *xe, u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), + TP_ARGS(xe, gt_id, action, len, _head, tail) ); DEFINE_EVENT_PRINT(xe_guc_ctb, xe_guc_ctb_g2h, - TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), - TP_ARGS(gt_id, action, len, _head, tail), + TP_PROTO(struct xe_device *xe, u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), + TP_ARGS(xe, gt_id, action, len, _head, tail), - TP_printk("gt%d: G2H CTB: action=0x%x, len=%d, tail=%d, head=%d\n", - __entry->gt_id, __entry->action, __entry->len, + TP_printk("G2H CTB: dev=%s, gt%d: action=0x%x, len=%d, tail=%d, head=%d\n", + __get_str(dev), __entry->gt_id, __entry->action, __entry->len, __entry->tail, __entry->_head) ); -- cgit From 501c4255c40935280a10844cf7550bf1e4f8939b Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Fri, 7 Jun 2024 11:29:42 -0700 Subject: drm/xe/trace: Print device_id in xe_trace events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In multi-gpu environments it is important to know the device gt events belongs to. The tracing information includes the device_id to indicate the device the event is associated with. v2: Use variable sized variant to display dev name(Gustavo) v3: Pass single argument to __assign_str to fix kunit error v4: Remove unused sting_helper library include Suggested-by: Ville Syrjälä Cc: Lucas De Marchi Reviewed-by: Gustavo Sousa Signed-off-by: Radhakrishna Sripada Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240607182943.3572524-6-radhakrishna.sripada@intel.com --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 27 ++++++------ drivers/gpu/drm/xe/xe_pt.c | 8 ++-- drivers/gpu/drm/xe/xe_trace.h | 64 ++++++++++++++++++----------- 3 files changed, 59 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 105797776a6c..23d397a246a8 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -22,6 +22,7 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) { struct xe_gt *gt = container_of(work, struct xe_gt, tlb_invalidation.fence_tdr.work); + struct xe_device *xe = gt_to_xe(gt); struct xe_gt_tlb_invalidation_fence *fence, *next; spin_lock_irq(>->tlb_invalidation.pending_lock); @@ -33,7 +34,7 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) if (msecs_to_jiffies(since_inval_ms) < TLB_TIMEOUT) break; - trace_xe_gt_tlb_invalidation_fence_timeout(fence); + trace_xe_gt_tlb_invalidation_fence_timeout(xe, fence); xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d", fence->seqno, gt->tlb_invalidation.seqno_recv); @@ -71,18 +72,18 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) } static void -__invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) +__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) { - trace_xe_gt_tlb_invalidation_fence_signal(fence); + trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); dma_fence_signal(&fence->base); dma_fence_put(&fence->base); } static void -invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) +invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) { list_del(&fence->link); - __invalidation_fence_signal(fence); + __invalidation_fence_signal(xe, fence); } /** @@ -121,7 +122,7 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) - invalidation_fence_signal(fence); + invalidation_fence_signal(gt_to_xe(gt), fence); spin_unlock_irq(>->tlb_invalidation.pending_lock); mutex_unlock(>->uc.guc.ct.lock); } @@ -144,6 +145,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, u32 *action, int len) { struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); int seqno; int ret; @@ -157,7 +159,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, seqno = gt->tlb_invalidation.seqno; if (fence) { fence->seqno = seqno; - trace_xe_gt_tlb_invalidation_fence_send(fence); + trace_xe_gt_tlb_invalidation_fence_send(xe, fence); } action[1] = seqno; ret = xe_guc_ct_send_locked(&guc->ct, action, len, @@ -171,7 +173,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, * we can just go ahead and signal the fence here. */ if (tlb_invalidation_seqno_past(gt, seqno)) { - __invalidation_fence_signal(fence); + __invalidation_fence_signal(xe, fence); } else { fence->invalidation_time = ktime_get(); list_add_tail(&fence->link, @@ -184,7 +186,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, } spin_unlock_irq(>->tlb_invalidation.pending_lock); } else if (ret < 0 && fence) { - __invalidation_fence_signal(fence); + __invalidation_fence_signal(xe, fence); } if (!ret) { gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) % @@ -294,7 +296,7 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, /* Execlists not supported */ if (gt_to_xe(gt)->info.force_execlist) { if (fence) - __invalidation_fence_signal(fence); + __invalidation_fence_signal(xe, fence); return 0; } @@ -432,6 +434,7 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); struct xe_gt_tlb_invalidation_fence *fence, *next; unsigned long flags; @@ -468,12 +471,12 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) { - trace_xe_gt_tlb_invalidation_fence_recv(fence); + trace_xe_gt_tlb_invalidation_fence_recv(xe, fence); if (!tlb_invalidation_seqno_past(gt, fence->seqno)) break; - invalidation_fence_signal(fence); + invalidation_fence_signal(xe, fence); } if (!list_empty(>->tlb_invalidation.pending_fences)) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index cd60c009b679..ade9e7a3a0ad 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1137,8 +1137,9 @@ static void invalidation_fence_cb(struct dma_fence *fence, { struct invalidation_fence *ifence = container_of(cb, struct invalidation_fence, cb); + struct xe_device *xe = gt_to_xe(ifence->gt); - trace_xe_gt_tlb_invalidation_fence_cb(&ifence->base); + trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base); if (!ifence->fence->error) { queue_work(system_wq, &ifence->work); } else { @@ -1153,8 +1154,9 @@ static void invalidation_fence_work_func(struct work_struct *w) { struct invalidation_fence *ifence = container_of(w, struct invalidation_fence, work); + struct xe_device *xe = gt_to_xe(ifence->gt); - trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base); + trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base); xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, ifence->end, ifence->asid); } @@ -1166,7 +1168,7 @@ static int invalidation_fence_init(struct xe_gt *gt, { int ret; - trace_xe_gt_tlb_invalidation_fence_create(&ifence->base); + trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); spin_lock_irq(>->tlb_invalidation.lock); dma_fence_init(&ifence->base.base, &invalidation_fence_ops, diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 81128c0f31e6..27ba7c416405 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -20,58 +20,64 @@ #include "xe_sched_job.h" #include "xe_vm.h" +#define __dev_name_xe(xe) dev_name((xe)->drm.dev) +#define __dev_name_gt(gt) __dev_name_xe(gt_to_xe((gt))) +#define __dev_name_eq(q) __dev_name_gt((q)->gt) + DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence, - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(fence), + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(xe, fence), TP_STRUCT__entry( + __string(dev, __dev_name_xe(xe)) __field(struct xe_gt_tlb_invalidation_fence *, fence) __field(int, seqno) ), TP_fast_assign( + __assign_str(dev); __entry->fence = fence; __entry->seqno = fence->seqno; ), - TP_printk("fence=%p, seqno=%d", - __entry->fence, __entry->seqno) + TP_printk("dev=%s, fence=%p, seqno=%d", + __get_str(dev), __entry->fence, __entry->seqno) ); DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_create, - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(fence) + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(xe, fence) ); DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_work_func, - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(fence) + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(xe, fence) ); DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_cb, - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(fence) + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(xe, fence) ); DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_send, - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(fence) + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(xe, fence) ); DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_recv, - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(fence) + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(xe, fence) ); DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_signal, - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(fence) + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(xe, fence) ); DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_timeout, - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(fence) + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(xe, fence) ); DECLARE_EVENT_CLASS(xe_exec_queue, @@ -79,6 +85,7 @@ DECLARE_EVENT_CLASS(xe_exec_queue, TP_ARGS(q), TP_STRUCT__entry( + __string(dev, __dev_name_eq(q)) __field(enum xe_engine_class, class) __field(u32, logical_mask) __field(u8, gt_id) @@ -89,6 +96,7 @@ DECLARE_EVENT_CLASS(xe_exec_queue, ), TP_fast_assign( + __assign_str(dev); __entry->class = q->class; __entry->logical_mask = q->logical_mask; __entry->gt_id = q->gt->info.id; @@ -98,8 +106,8 @@ DECLARE_EVENT_CLASS(xe_exec_queue, __entry->flags = q->flags; ), - TP_printk("%d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x", - __entry->class, __entry->logical_mask, + TP_printk("dev=%s, %d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x", + __get_str(dev), __entry->class, __entry->logical_mask, __entry->gt_id, __entry->width, __entry->guc_id, __entry->guc_state, __entry->flags) ); @@ -199,6 +207,7 @@ DECLARE_EVENT_CLASS(xe_sched_job, TP_ARGS(job), TP_STRUCT__entry( + __string(dev, __dev_name_eq(job->q)) __field(u32, seqno) __field(u32, lrc_seqno) __field(u16, guc_id) @@ -210,6 +219,7 @@ DECLARE_EVENT_CLASS(xe_sched_job, ), TP_fast_assign( + __assign_str(dev); __entry->seqno = xe_sched_job_seqno(job); __entry->lrc_seqno = xe_sched_job_lrc_seqno(job); __entry->guc_id = job->q->guc->id; @@ -221,8 +231,8 @@ DECLARE_EVENT_CLASS(xe_sched_job, __entry->batch_addr = (u64)job->ptrs[0].batch_addr; ), - TP_printk("fence=%p, seqno=%u, lrc_seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", - __entry->fence, __entry->seqno, + TP_printk("dev=%s, fence=%p, seqno=%u, lrc_seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", + __get_str(dev), __entry->fence, __entry->seqno, __entry->lrc_seqno, __entry->guc_id, __entry->batch_addr, __entry->guc_state, __entry->flags, __entry->error) @@ -268,17 +278,19 @@ DECLARE_EVENT_CLASS(xe_sched_msg, TP_ARGS(msg), TP_STRUCT__entry( + __string(dev, __dev_name_eq(((struct xe_exec_queue *)msg->private_data))) __field(u32, opcode) __field(u16, guc_id) ), TP_fast_assign( + __assign_str(dev); __entry->opcode = msg->opcode; __entry->guc_id = ((struct xe_exec_queue *)msg->private_data)->guc->id; ), - TP_printk("guc_id=%d, opcode=%u", __entry->guc_id, + TP_printk("dev=%s, guc_id=%d, opcode=%u", __get_str(dev), __entry->guc_id, __entry->opcode) ); @@ -297,19 +309,21 @@ DECLARE_EVENT_CLASS(xe_hw_fence, TP_ARGS(fence), TP_STRUCT__entry( + __string(dev, __dev_name_gt(fence->ctx->gt)) __field(u64, ctx) __field(u32, seqno) __field(struct xe_hw_fence *, fence) ), TP_fast_assign( + __assign_str(dev); __entry->ctx = fence->dma.context; __entry->seqno = fence->dma.seqno; __entry->fence = fence; ), - TP_printk("ctx=0x%016llx, fence=%p, seqno=%u", - __entry->ctx, __entry->fence, __entry->seqno) + TP_printk("dev=%s, ctx=0x%016llx, fence=%p, seqno=%u", + __get_str(dev), __entry->ctx, __entry->fence, __entry->seqno) ); DEFINE_EVENT(xe_hw_fence, xe_hw_fence_create, -- cgit From e81568a0cbb288770f8d2e4b7d41752a9f367bd1 Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Fri, 7 Jun 2024 11:29:43 -0700 Subject: drm/xe: Add reg read/write trace This will help debug register read/writes and provides a way to trace all the mmio transactions. v2: Fix kunit error v3: Print devid to help in multi-gpu setup v3: rebase and use variable sized variant to display dev name(Gustavo) v4: Pass single argument to __asign_str to fix kunit error v5: Remove unrelated include xe_tile.h and remove cast in trace Reviewed-by: Gustavo Sousa Signed-off-by: Radhakrishna Sripada Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240607182943.3572524-7-radhakrishna.sripada@intel.com --- drivers/gpu/drm/xe/xe_mmio.c | 23 +++++++++++++++++++---- drivers/gpu/drm/xe/xe_trace.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 7962eeb9adb7..f92faad4b96d 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -21,6 +21,7 @@ #include "xe_gt_sriov_vf.h" #include "xe_macros.h" #include "xe_sriov.h" +#include "xe_trace.h" static void tiles_fini(void *arg) { @@ -124,16 +125,24 @@ u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) { struct xe_tile *tile = gt_to_tile(gt); u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); + u8 val; - return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); + val = readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); + trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); + + return val; } u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg) { struct xe_tile *tile = gt_to_tile(gt); u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); + u16 val; + + val = readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); + trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); - return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); + return val; } void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) @@ -141,6 +150,7 @@ void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) struct xe_tile *tile = gt_to_tile(gt); u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); + trace_xe_reg_rw(gt, true, addr, val, sizeof(val)); writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); } @@ -148,11 +158,16 @@ u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) { struct xe_tile *tile = gt_to_tile(gt); u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); + u32 val; if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt))) - return xe_gt_sriov_vf_read32(gt, reg); + val = xe_gt_sriov_vf_read32(gt, reg); + else + val = readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); + + trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); - return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); + return val; } u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set) diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 27ba7c416405..09ca1ad057b0 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -346,6 +346,34 @@ DEFINE_EVENT(xe_hw_fence, xe_hw_fence_free, TP_ARGS(fence) ); +TRACE_EVENT(xe_reg_rw, + TP_PROTO(struct xe_gt *gt, bool write, u32 reg, u64 val, int len), + + TP_ARGS(gt, write, reg, val, len), + + TP_STRUCT__entry( + __string(dev, __dev_name_gt(gt)) + __field(u64, val) + __field(u32, reg) + __field(u16, write) + __field(u16, len) + ), + + TP_fast_assign( + __assign_str(dev); + __entry->val = val; + __entry->reg = reg; + __entry->write = write; + __entry->len = len; + ), + + TP_printk("dev=%s, %s reg=0x%x, len=%d, val=(0x%x, 0x%x)", + __get_str(dev), __entry->write ? "write" : "read", + __entry->reg, __entry->len, + (u32)(__entry->val & 0xffffffff), + (u32)(__entry->val >> 32)) +); + #endif /* This part must be outside protection */ -- cgit From e54700f7d6aa2ae0d0a0aeeebedcecd7ce1123fe Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 3 Jun 2024 20:24:30 +0530 Subject: drm/xe/bmg: Add PCI IDs Add the initial set of device IDs for Battlemage. Signed-off-by: Matt Roper Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/20240603145430.1260817-1-balasubramani.vivekanandan@intel.com --- drivers/gpu/drm/xe/xe_pci.c | 3 ++- include/drm/xe_pciids.h | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index e84da0cbb8e9..08583fdd7643 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -340,7 +340,7 @@ static const struct xe_device_desc lnl_desc = { .require_force_probe = true, }; -static const struct xe_device_desc bmg_desc __maybe_unused = { +static const struct xe_device_desc bmg_desc = { DGFX_FEATURES, PLATFORM(BATTLEMAGE), .require_force_probe = true, @@ -389,6 +389,7 @@ static const struct pci_device_id pciidlist[] = { XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc), XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc), XE_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc), + XE_BMG_IDS(INTEL_VGA_DEVICE, &bmg_desc), { } }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h index adb37bc541e4..644872a35c35 100644 --- a/include/drm/xe_pciids.h +++ b/include/drm/xe_pciids.h @@ -192,4 +192,11 @@ MACRO__(0x64A0, ## __VA_ARGS__), \ MACRO__(0x64B0, ## __VA_ARGS__) +#define XE_BMG_IDS(MACRO__, ...) \ + MACRO__(0xE202, ## __VA_ARGS__), \ + MACRO__(0xE20B, ## __VA_ARGS__), \ + MACRO__(0xE20C, ## __VA_ARGS__), \ + MACRO__(0xE20D, ## __VA_ARGS__), \ + MACRO__(0xE212, ## __VA_ARGS__) + #endif -- cgit From 8b9544e07d802bf5376921500c4d19c3405d3ad6 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 11 Jun 2024 07:40:43 -0700 Subject: drm/xe: Add LRC ctx timestamp support functions LRC ctx timestamp support functions are used to determine how long a job has run on the hardware. v2: - Don't use static inlines (Jani) - Kernel doc - s/ctx_timestamp_job/ctx_job_timestamp v6: - Add kernel doc for xe_lrc_update_timestamp (Lucas) - Call xe_lrc_ctx_timestamp() in xe_lrc_update_timestamp (Lucas) Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240611144053.2805091-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_lrc.c | 78 ++++++++++++++++++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_lrc.h | 5 +++ 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index c1bb85d2e243..2ad645f49ec9 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -652,6 +652,7 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) #define LRC_SEQNO_PPHWSP_OFFSET 512 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) +#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) #define LRC_PARALLEL_PPHWSP_OFFSET 2048 #define LRC_PPHWSP_SIZE SZ_4K @@ -680,6 +681,12 @@ static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; } +static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) +{ + /* The start seqno is stored in the driver-defined portion of PPHWSP */ + return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; +} + static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) { /* The parallel is stored in the driver-defined portion of PPHWSP */ @@ -691,6 +698,11 @@ static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc) return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; } +static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc) +{ + return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32); +} + static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) { /* Indirect ring state page is at the very end of LRC */ @@ -716,11 +728,65 @@ DECL_MAP_ADDR_HELPERS(pphwsp) DECL_MAP_ADDR_HELPERS(seqno) DECL_MAP_ADDR_HELPERS(regs) DECL_MAP_ADDR_HELPERS(start_seqno) +DECL_MAP_ADDR_HELPERS(ctx_job_timestamp) +DECL_MAP_ADDR_HELPERS(ctx_timestamp) DECL_MAP_ADDR_HELPERS(parallel) DECL_MAP_ADDR_HELPERS(indirect_ring) #undef DECL_MAP_ADDR_HELPERS +/** + * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address + * @lrc: Pointer to the lrc. + * + * Returns: ctx timestamp GGTT address + */ +u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_ctx_timestamp_ggtt_addr(lrc); +} + +/** + * xe_lrc_ctx_timestamp() - Read ctx timestamp value + * @lrc: Pointer to the lrc. + * + * Returns: ctx timestamp value + */ +u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map map; + + map = __xe_lrc_ctx_timestamp_map(lrc); + return xe_map_read32(xe, &map); +} + +/** + * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address + * @lrc: Pointer to the lrc. + * + * Returns: ctx timestamp job GGTT address + */ +u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); +} + +/** + * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value + * @lrc: Pointer to the lrc. + * + * Returns: ctx timestamp job value + */ +u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map map; + + map = __xe_lrc_ctx_job_timestamp_map(lrc); + return xe_map_read32(xe, &map); +} + u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) { return __xe_lrc_pphwsp_ggtt_addr(lrc); @@ -1659,11 +1725,21 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) kfree(snapshot); } +/** + * xe_lrc_update_timestamp() - Update ctx timestamp + * @lrc: Pointer to the lrc. + * @old_ts: Old timestamp value + * + * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and + * update saved value. + * + * Returns: New ctx timestamp value + */ u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) { *old_ts = lrc->ctx_timestamp; - lrc->ctx_timestamp = xe_lrc_read_ctx_reg(lrc, CTX_TIMESTAMP); + lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); return lrc->ctx_timestamp; } diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 882c3437ba5c..001af6c79454 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -94,6 +94,11 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot); void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p); void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot); +u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc); +u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc); +u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc); +u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc); + /** * xe_lrc_update_timestamp - readout LRC timestamp and update cached value * @lrc: logical ring context for this exec queue -- cgit From 9f46ecbb3f1d5111c28e8205ad1526663c28aa9d Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 11 Jun 2024 07:40:44 -0700 Subject: drm/xe: Add MI_COPY_MEM_MEM GPU instruction definitions MI_COPY_MEM_MEM GPU instructions are used to copy ctx timestamp from a LRC registers to another location at the beginning of every jobs execution. Add MI_COPY_MEM_MEM GPU instruction definitions. v2: - Include MI_COPY_MEM_MEM based on instruction order (Michal) - Fix tabs/spaces issue (Michal) - Use macro for DW definition (Michal) Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240611144053.2805091-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/instructions/xe_mi_commands.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index c74ceb550dce..b7bf99dd4848 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -59,6 +59,10 @@ #define MI_LOAD_REGISTER_MEM (__MI_INSTR(0x29) | XE_INSTR_NUM_DW(4)) #define MI_LRM_USE_GGTT REG_BIT(22) +#define MI_COPY_MEM_MEM (__MI_INSTR(0x2e) | XE_INSTR_NUM_DW(5)) +#define MI_COPY_MEM_MEM_SRC_GGTT REG_BIT(22) +#define MI_COPY_MEM_MEM_DST_GGTT REG_BIT(21) + #define MI_BATCH_BUFFER_START __MI_INSTR(0x31) #endif -- cgit From 65921374c48f7fd8305d3b3b500857967a563c4f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 11 Jun 2024 07:40:45 -0700 Subject: drm/xe: Emit ctx timestamp copy in ring ops Copy ctx timestamp at beginning of every GPU job to a saved location. Used to determine how long a job has been running on the hardware. v2: - - s/ctx_timestamp_job/ctx_job_timestamp Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240611144053.2805091-4-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_ring_ops.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index db630d27beba..0be4f489d3e1 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -224,6 +224,19 @@ static u32 get_ppgtt_flag(struct xe_sched_job *job) return job->q->vm ? BIT(8) : 0; } +static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i) +{ + dw[i++] = MI_COPY_MEM_MEM | MI_COPY_MEM_MEM_SRC_GGTT | + MI_COPY_MEM_MEM_DST_GGTT; + dw[i++] = xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); + dw[i++] = 0; + dw[i++] = xe_lrc_ctx_timestamp_ggtt_addr(lrc); + dw[i++] = 0; + dw[i++] = MI_NOOP; + + return i; +} + /* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc, u64 batch_addr, u32 seqno) @@ -232,6 +245,8 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc u32 ppgtt_flag = get_ppgtt_flag(job); struct xe_gt *gt = job->q->gt; + i = emit_copy_timestamp(lrc, dw, i); + if (job->ring_ops_flush_tlb) { dw[i++] = preparser_disable(true); i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), @@ -283,6 +298,8 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, struct xe_device *xe = gt_to_xe(gt); bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE; + i = emit_copy_timestamp(lrc, dw, i); + dw[i++] = preparser_disable(true); /* hsdes: 1809175790 */ @@ -332,6 +349,8 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); u32 mask_flags = 0; + i = emit_copy_timestamp(lrc, dw, i); + dw[i++] = preparser_disable(true); if (lacks_render) mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS; @@ -375,6 +394,8 @@ static void emit_migration_job_gen12(struct xe_sched_job *job, { u32 dw[MAX_JOB_SIZE_DW], i = 0; + i = emit_copy_timestamp(lrc, dw, i); + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, dw, i); -- cgit From 1bf1ca4e326c7d0bdf4aa1946d4ba0ccdae8d09e Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 11 Jun 2024 07:40:46 -0700 Subject: drm/xe: Add ctx timestamp to LRC snapshot The ctx timestamp is useful information, add to LRC snapshot. v2: - s/ctx_timestamp_job/ctx_job_timestamp Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240611144053.2805091-5-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_lrc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 2ad645f49ec9..43bc5e33a6c7 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -49,6 +49,8 @@ struct xe_lrc_snapshot { } tail; u32 start_seqno; u32 seqno; + u32 ctx_timestamp; + u32 ctx_job_timestamp; }; static struct xe_device * @@ -1642,6 +1644,8 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; snapshot->lrc_snapshot = NULL; + snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); + snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); return snapshot; } @@ -1690,6 +1694,8 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer snapshot->tail.internal, snapshot->tail.memory); drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); + drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp); + drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp); if (!snapshot->lrc_snapshot) return; -- cgit From 49cc215aad7f5b953d4fc85c75b044f0abdc10cd Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 11 Jun 2024 07:40:47 -0700 Subject: drm/xe: Add xe_gt_clock_interval_to_ms helper Add helper to convert GT clock ticks to msec. Useful for determining if timeouts occur by examing GT clock ticks. v6: - s/nom/n , s/dom/d (Jonathan) - include math64 (CI) Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240611144053.2805091-6-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt_clock.c | 20 ++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_clock.h | 1 + 2 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index 9ff2061133df..86c2d62b4bdc 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -3,6 +3,8 @@ * Copyright © 2022 Intel Corporation */ +#include + #include "xe_gt_clock.h" #include "regs/xe_gt_regs.h" @@ -79,3 +81,21 @@ int xe_gt_clock_init(struct xe_gt *gt) gt->info.reference_clock = freq; return 0; } + +static u64 div_u64_roundup(u64 n, u32 d) +{ + return div_u64(n + d - 1, d); +} + +/** + * xe_gt_clock_interval_to_ms - Convert sampled GT clock ticks to msec + * + * @gt: the &xe_gt + * @count: count of GT clock ticks + * + * Returns: time in msec + */ +u64 xe_gt_clock_interval_to_ms(struct xe_gt *gt, u64 count) +{ + return div_u64_roundup(count * MSEC_PER_SEC, gt->info.reference_clock); +} diff --git a/drivers/gpu/drm/xe/xe_gt_clock.h b/drivers/gpu/drm/xe/xe_gt_clock.h index 44fa0371b973..3adeb7baaca4 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.h +++ b/drivers/gpu/drm/xe/xe_gt_clock.h @@ -11,5 +11,6 @@ struct xe_gt; int xe_gt_clock_init(struct xe_gt *gt); +u64 xe_gt_clock_interval_to_ms(struct xe_gt *gt, u64 count); #endif -- cgit From 41e1fa93a2fb3cf5a50e556cff7bb51231781806 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 11 Jun 2024 07:40:48 -0700 Subject: drm/xe: Improve unexpected state error messages Include G2H handler name when an unexpected error state messages. v6: - Use xe_gt_err (Michal) - Print runnable state (John H) Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240611144053.2805091-7-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 4464ba337d12..afd22a8d815d 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1620,6 +1620,7 @@ int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; u32 guc_id = msg[0]; + u32 runnable_state = msg[1]; if (unlikely(len < 2)) { drm_err(&xe->drm, "Invalid length %u", len); @@ -1632,8 +1633,10 @@ int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) if (unlikely(!exec_queue_pending_enable(q) && !exec_queue_pending_disable(q))) { - drm_err(&xe->drm, "Unexpected engine state 0x%04x", - atomic_read(&q->guc->state)); + xe_gt_err(guc_to_gt(guc), + "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u", + atomic_read(&q->guc->state), q->guc->id, + runnable_state); return -EPROTO; } @@ -1671,8 +1674,9 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || exec_queue_pending_enable(q) || exec_queue_enabled(q)) { - drm_err(&xe->drm, "Unexpected engine state 0x%04x", - atomic_read(&q->guc->state)); + xe_gt_err(guc_to_gt(guc), + "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d", + atomic_read(&q->guc->state), q->guc->id); return -EPROTO; } -- cgit From 7f4f492c7058fb82d2892078b556bde8a41d0b60 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 11 Jun 2024 07:40:49 -0700 Subject: drm/xe: Assert runnable state in handle_sched_done Ensure G2H and KMD GuC machine match. Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240611144053.2805091-8-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index afd22a8d815d..ab0dc93d7740 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1592,16 +1592,21 @@ static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); } -static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q) +static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, + u32 runnable_state) { trace_xe_exec_queue_scheduling_done(q); if (exec_queue_pending_enable(q)) { + xe_gt_assert(guc_to_gt(guc), runnable_state == 1); + q->guc->resume_time = ktime_get(); clear_exec_queue_pending_enable(q); smp_wmb(); wake_up_all(&guc->ct.wq); } else { + xe_gt_assert(guc_to_gt(guc), runnable_state == 0); + clear_exec_queue_pending_disable(q); if (q->guc->suspend_pending) { suspend_fence_signal(q); @@ -1640,7 +1645,7 @@ int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) return -EPROTO; } - handle_sched_done(guc, q); + handle_sched_done(guc, q, runnable_state); return 0; } -- cgit From 716ce587a81e6165a4133ea32f63f3d69f80e1e7 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 11 Jun 2024 07:40:50 -0700 Subject: drm/xe: Add GuC state asserts to deregister_exec_queue Will help catch bugs in GuC state machine. Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240611144053.2805091-9-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index ab0dc93d7740..26c27ca51b2e 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1587,6 +1587,11 @@ static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) q->guc->id, }; + xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); + trace_xe_exec_queue_deregister(q); xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); -- cgit From fc592a81ff9f0e5a46d7fb652a74db97fed5ef1b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 11 Jun 2024 07:40:51 -0700 Subject: drm/xe: Add pending disable assert to handle_sched_done Will help catch bugs in GuC state machine. Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240611144053.2805091-10-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 26c27ca51b2e..25240b50a59c 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1611,6 +1611,7 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, wake_up_all(&guc->ct.wq); } else { xe_gt_assert(guc_to_gt(guc), runnable_state == 0); + xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); clear_exec_queue_pending_disable(q); if (q->guc->suspend_pending) { -- cgit From b47b83ef1657f8030d8e4485b5948a190c554cc5 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 11 Jun 2024 07:40:52 -0700 Subject: drm/xe: Add killed, banned, or wedged as stick bit during GuC reset These bits should be persistent across reset, treat them as such. Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240611144053.2805091-11-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 25240b50a59c..671c72caf0ff 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1444,7 +1444,9 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) set_exec_queue_suspended(q); suspend_fence_signal(q); } - atomic_and(EXEC_QUEUE_STATE_DESTROYED | EXEC_QUEUE_STATE_SUSPENDED, + atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED | + EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED | + EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); q->guc->resume_time = 0; trace_xe_exec_queue_stop(q); -- cgit From 7ddb9403dd7497f7cd922243ee4b3c9cee4a30a4 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 11 Jun 2024 07:40:53 -0700 Subject: drm/xe: Sample ctx timestamp to determine if jobs have timed out In GuC TDR sample ctx timestamp to determine if jobs have timed out. The scheduling enable needs to be toggled to properly sample the timestamp. If a job has not been running for longer than the timeout period, re-enable scheduling and restart the TDR. v2: - Use GT clock to msec helper (Umesh, off list) - s/ctx_timestamp_job/ctx_job_timestamp v3: - Fix state machine for TDR, mainly decouple sched disable and deregister (testing) - Rebase (CI) v4: - Fix checkpatch && newline issue (CI) - Do not deregister on wedged or unregistered (CI) - Fix refcounting bugs (CI) - Move devcoredump above VM / kernel job check (John H) - Add comment for check_timeout state usage (John H) - Assert pending disable not inflight when enabling scheduling (John H) - Use enable_scheduling in other scheduling enable code (John H) - Add comments on a few steps in TDR (John H) - Add assert for timestamp overflow protection (John H) v6: - Use mul_u64_u32_div (CI, checkpath) - Change check time to dbg level (Paulo) - Add immediate mode to sched disable (inspection) - Use xe_gt_* messages (John H) - Fix typo in comment (John H) - Check timeout before clearing pending disable (Paulo) v7: - Fix ADJUST_FIVE_PERCENT macro (checkpatch) - Don't print sched disable failure message on GT reset (John H) - Move kernel / VM jobs WARNs near comment (John H) Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240611144053.2805091-12-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 303 +++++++++++++++++++++++++++++-------- 1 file changed, 242 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 671c72caf0ff..74552391dc5a 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -10,6 +10,7 @@ #include #include #include +#include #include @@ -23,6 +24,7 @@ #include "xe_force_wake.h" #include "xe_gpu_scheduler.h" #include "xe_gt.h" +#include "xe_gt_clock.h" #include "xe_gt_printk.h" #include "xe_guc.h" #include "xe_guc_ct.h" @@ -62,6 +64,8 @@ exec_queue_to_guc(struct xe_exec_queue *q) #define EXEC_QUEUE_STATE_KILLED (1 << 7) #define EXEC_QUEUE_STATE_WEDGED (1 << 8) #define EXEC_QUEUE_STATE_BANNED (1 << 9) +#define EXEC_QUEUE_STATE_CHECK_TIMEOUT (1 << 10) +#define EXEC_QUEUE_STATE_EXTRA_REF (1 << 11) static bool exec_queue_registered(struct xe_exec_queue *q) { @@ -188,6 +192,31 @@ static void set_exec_queue_wedged(struct xe_exec_queue *q) atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); } +static bool exec_queue_check_timeout(struct xe_exec_queue *q) +{ + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_CHECK_TIMEOUT; +} + +static void set_exec_queue_check_timeout(struct xe_exec_queue *q) +{ + atomic_or(EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); +} + +static void clear_exec_queue_check_timeout(struct xe_exec_queue *q) +{ + atomic_and(~EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); +} + +static bool exec_queue_extra_ref(struct xe_exec_queue *q) +{ + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_EXTRA_REF; +} + +static void set_exec_queue_extra_ref(struct xe_exec_queue *q) +{ + atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state); +} + static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) { return (atomic_read(&q->guc->state) & @@ -920,6 +949,109 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) xe_sched_submission_start(sched); } +#define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) + +static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) +{ + struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); + u32 ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]); + u32 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); + u32 timeout_ms = q->sched_props.job_timeout_ms; + u32 diff; + u64 running_time_ms; + + /* + * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch + * possible overflows with a high timeout. + */ + xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); + + if (ctx_timestamp < ctx_job_timestamp) + diff = ctx_timestamp + U32_MAX - ctx_job_timestamp; + else + diff = ctx_timestamp - ctx_job_timestamp; + + /* + * Ensure timeout is within 5% to account for an GuC scheduling latency + */ + running_time_ms = + ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff)); + + xe_gt_dbg(gt, + "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x", + xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), + q->guc->id, running_time_ms, timeout_ms, diff); + + return running_time_ms >= timeout_ms; +} + +static void enable_scheduling(struct xe_exec_queue *q) +{ + MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); + struct xe_guc *guc = exec_queue_to_guc(q); + int ret; + + xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); + + set_exec_queue_pending_enable(q); + set_exec_queue_enabled(q); + trace_xe_exec_queue_scheduling_enable(q); + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); + + ret = wait_event_timeout(guc->ct.wq, + !exec_queue_pending_enable(q) || + guc_read_stopped(guc), HZ * 5); + if (!ret || guc_read_stopped(guc)) { + xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); + set_exec_queue_banned(q); + xe_gt_reset_async(q->gt); + xe_sched_tdr_queue_imm(&q->guc->sched); + } +} + +static void disable_scheduling(struct xe_exec_queue *q, bool immediate) +{ + MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); + struct xe_guc *guc = exec_queue_to_guc(q); + + xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); + + if (immediate) + set_min_preemption_timeout(guc, q); + clear_exec_queue_enabled(q); + set_exec_queue_pending_disable(q); + trace_xe_exec_queue_scheduling_disable(q); + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); +} + +static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) +{ + u32 action[] = { + XE_GUC_ACTION_DEREGISTER_CONTEXT, + q->guc->id, + }; + + xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); + + set_exec_queue_destroyed(q); + trace_xe_exec_queue_deregister(q); + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_DEREGISTER_CONTEXT, 1); +} + static enum drm_gpu_sched_stat guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) { @@ -927,10 +1059,10 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) struct xe_sched_job *tmp_job; struct xe_exec_queue *q = job->q; struct xe_gpu_scheduler *sched = &q->guc->sched; - struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q)); + struct xe_guc *guc = exec_queue_to_guc(q); int err = -ETIME; int i = 0; - bool wedged; + bool wedged, skip_timeout_check; /* * TDR has fired before free job worker. Common if exec queue @@ -942,49 +1074,53 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) return DRM_GPU_SCHED_STAT_NOMINAL; } - drm_notice(&xe->drm, "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", - xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), - q->guc->id, q->flags); - xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, - "Kernel-submitted job timed out\n"); - xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), - "VM job timed out on non-killed execqueue\n"); - - if (!exec_queue_killed(q)) - xe_devcoredump(job); - - trace_xe_sched_job_timedout(job); - - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); - /* Kill the run_job entry point */ xe_sched_submission_stop(sched); + /* Must check all state after stopping scheduler */ + skip_timeout_check = exec_queue_reset(q) || + exec_queue_killed_or_banned_or_wedged(q) || + exec_queue_destroyed(q); + + /* Job hasn't started, can't be timed out */ + if (!skip_timeout_check && !xe_sched_job_started(job)) + goto rearm; + /* - * Kernel jobs should never fail, nor should VM jobs if they do - * somethings has gone wrong and the GT needs a reset + * XXX: Sampling timeout doesn't work in wedged mode as we have to + * modify scheduling state to read timestamp. We could read the + * timestamp from a register to accumulate current running time but this + * doesn't work for SRIOV. For now assuming timeouts in wedged mode are + * genuine timeouts. */ - if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || - (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { - if (!xe_sched_invalidate_job(job, 2)) { - xe_sched_add_pending_job(sched, job); - xe_sched_submission_start(sched); - xe_gt_reset_async(q->gt); - goto out; - } - } + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); - /* Engine state now stable, disable scheduling if needed */ + /* Engine state now stable, disable scheduling to check timestamp */ if (!wedged && exec_queue_registered(q)) { - struct xe_guc *guc = exec_queue_to_guc(q); int ret; if (exec_queue_reset(q)) err = -EIO; - set_exec_queue_banned(q); + if (!exec_queue_destroyed(q)) { - xe_exec_queue_get(q); - disable_scheduling_deregister(guc, q); + /* + * Wait for any pending G2H to flush out before + * modifying state + */ + ret = wait_event_timeout(guc->ct.wq, + !exec_queue_pending_enable(q) || + guc_read_stopped(guc), HZ * 5); + if (!ret || guc_read_stopped(guc)) + goto trigger_reset; + + /* + * Flag communicates to G2H handler that schedule + * disable originated from a timeout check. The G2H then + * avoid triggering cleanup or deregistering the exec + * queue. + */ + set_exec_queue_check_timeout(q); + disable_scheduling(q, skip_timeout_check); } /* @@ -1000,15 +1136,60 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) !exec_queue_pending_disable(q) || guc_read_stopped(guc), HZ * 5); if (!ret || guc_read_stopped(guc)) { - drm_warn(&xe->drm, "Schedule disable failed to respond"); - xe_sched_add_pending_job(sched, job); - xe_sched_submission_start(sched); +trigger_reset: + if (!ret) + xe_gt_warn(guc_to_gt(guc), "Schedule disable failed to respond"); + set_exec_queue_extra_ref(q); + xe_exec_queue_get(q); /* GT reset owns this */ + set_exec_queue_banned(q); xe_gt_reset_async(q->gt); xe_sched_tdr_queue_imm(sched); - goto out; + goto rearm; + } + } + + /* + * Check if job is actually timed out, if so restart job execution and TDR + */ + if (!wedged && !skip_timeout_check && !check_timeout(q, job) && + !exec_queue_reset(q) && exec_queue_registered(q)) { + clear_exec_queue_check_timeout(q); + goto sched_enable; + } + + xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", + xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), + q->guc->id, q->flags); + trace_xe_sched_job_timedout(job); + + if (!exec_queue_killed(q)) + xe_devcoredump(job); + + /* + * Kernel jobs should never fail, nor should VM jobs if they do + * somethings has gone wrong and the GT needs a reset + */ + xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, + "Kernel-submitted job timed out\n"); + xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), + "VM job timed out on non-killed execqueue\n"); + if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || + (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { + if (!xe_sched_invalidate_job(job, 2)) { + clear_exec_queue_check_timeout(q); + xe_gt_reset_async(q->gt); + goto rearm; } } + /* Finish cleaning up exec queue via deregister */ + set_exec_queue_banned(q); + if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) { + set_exec_queue_extra_ref(q); + xe_exec_queue_get(q); + __deregister_exec_queue(guc, q); + } + /* Stop fence signaling */ xe_hw_fence_irq_stop(q->fence_irq); @@ -1030,7 +1211,19 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) /* Start fence signaling */ xe_hw_fence_irq_start(q->fence_irq); -out: + return DRM_GPU_SCHED_STAT_NOMINAL; + +sched_enable: + enable_scheduling(q); +rearm: + /* + * XXX: Ideally want to adjust timeout based on current exection time + * but there is not currently an easy way to do in DRM scheduler. With + * some thought, do this in a follow up. + */ + xe_sched_add_pending_job(sched, job); + xe_sched_submission_start(sched); + return DRM_GPU_SCHED_STAT_NOMINAL; } @@ -1133,7 +1326,6 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) guc_read_stopped(guc)); if (!guc_read_stopped(guc)) { - MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); s64 since_resume_ms = ktime_ms_delta(ktime_get(), q->guc->resume_time); @@ -1144,12 +1336,7 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) msleep(wait_ms); set_exec_queue_suspended(q); - clear_exec_queue_enabled(q); - set_exec_queue_pending_disable(q); - trace_xe_exec_queue_scheduling_disable(q); - - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); + disable_scheduling(q, false); } } else if (q->guc->suspend_pending) { set_exec_queue_suspended(q); @@ -1160,19 +1347,11 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) { struct xe_exec_queue *q = msg->private_data; - struct xe_guc *guc = exec_queue_to_guc(q); if (guc_exec_queue_allowed_to_change_state(q)) { - MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); - q->guc->resume_time = RESUME_PENDING; clear_exec_queue_suspended(q); - set_exec_queue_pending_enable(q); - set_exec_queue_enabled(q); - trace_xe_exec_queue_scheduling_enable(q); - - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); + enable_scheduling(q); } else { clear_exec_queue_suspended(q); } @@ -1434,8 +1613,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) /* Clean up lost G2H + reset engine state */ if (exec_queue_registered(q)) { - if ((exec_queue_banned(q) && exec_queue_destroyed(q)) || - xe_exec_queue_is_lr(q)) + if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) xe_exec_queue_put(q); else if (exec_queue_destroyed(q)) __guc_exec_queue_fini(guc, q); @@ -1612,6 +1790,8 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, smp_wmb(); wake_up_all(&guc->ct.wq); } else { + bool check_timeout = exec_queue_check_timeout(q); + xe_gt_assert(guc_to_gt(guc), runnable_state == 0); xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); @@ -1619,11 +1799,12 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, if (q->guc->suspend_pending) { suspend_fence_signal(q); } else { - if (exec_queue_banned(q)) { + if (exec_queue_banned(q) || check_timeout) { smp_wmb(); wake_up_all(&guc->ct.wq); } - deregister_exec_queue(guc, q); + if (!check_timeout) + deregister_exec_queue(guc, q); } } } @@ -1664,7 +1845,7 @@ static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) clear_exec_queue_registered(q); - if (exec_queue_banned(q) || xe_exec_queue_is_lr(q)) + if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) xe_exec_queue_put(q); else __guc_exec_queue_fini(guc, q); @@ -1728,7 +1909,7 @@ int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) * guc_exec_queue_timedout_job. */ set_exec_queue_reset(q); - if (!exec_queue_banned(q)) + if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) xe_guc_exec_queue_trigger_cleanup(q); return 0; @@ -1758,7 +1939,7 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, /* Treat the same as engine reset */ set_exec_queue_reset(q); - if (!exec_queue_banned(q)) + if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) xe_guc_exec_queue_trigger_cleanup(q); return 0; -- cgit From 6aaf3fd08d08e1bb9dc72bc2bd189464972d27c4 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 13 Jun 2024 14:07:49 +0200 Subject: drm/xe/vf: Use correct check for being a VF driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The IS_SRIOV macro returns true also when we are running as a PF driver. Use correct IS_SRIOV_VF macro to skip force-wake management. Fixes: 513ea833c201 ("drm/xe/vf: Ignore force-wake requests if VF") Signed-off-by: Michal Wajdeczko Cc: Piotr Piórkowski Cc: Rodrigo Vivi Cc: Nirmoy Das Reviewed-by: Piotr Piórkowski Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240613120749.2032-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_force_wake.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 5db6926120c3..b263fff15273 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -97,7 +97,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) static void __domain_ctl(struct xe_gt *gt, struct xe_force_wake_domain *domain, bool wake) { - if (IS_SRIOV(gt_to_xe(gt))) + if (IS_SRIOV_VF(gt_to_xe(gt))) return; xe_mmio_write32(gt, domain->reg_ctl, domain->mask | (wake ? domain->val : 0)); @@ -108,7 +108,7 @@ static int __domain_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain, u32 value; int ret; - if (IS_SRIOV(gt_to_xe(gt))) + if (IS_SRIOV_VF(gt_to_xe(gt))) return 0; ret = xe_mmio_wait32(gt, domain->reg_ack, domain->val, wake ? domain->val : 0, -- cgit From 8c57c4dc2a9172e543025673c0c6c98953af2b13 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 13 Jun 2024 21:57:02 +0200 Subject: drm/xe: Drop duplicated declaration The declaration of xe_reg_whitelist_process_engine() function does not fit into "xe_wa.h" and is already a duplicate. Signed-off-by: Michal Wajdeczko Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240613195702.2164-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_hw_engine.c | 1 + drivers/gpu/drm/xe/xe_wa.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 0a83506e1ad8..52f12009678e 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -26,6 +26,7 @@ #include "xe_macros.h" #include "xe_mmio.h" #include "xe_reg_sr.h" +#include "xe_reg_whitelist.h" #include "xe_rtp.h" #include "xe_sched_job.h" #include "xe_sriov.h" diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h index 1b24d66f9d80..db9ddeaf69bf 100644 --- a/drivers/gpu/drm/xe/xe_wa.h +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -17,8 +17,6 @@ void xe_wa_process_gt(struct xe_gt *gt); void xe_wa_process_engine(struct xe_hw_engine *hwe); void xe_wa_process_lrc(struct xe_hw_engine *hwe); void xe_wa_apply_tile_workarounds(struct xe_tile *tile); - -void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe); void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); /** -- cgit From de8390b101b2b37cdbab5bc91a47f69c2b1df6b0 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 14 Jun 2024 11:44:33 +0200 Subject: drm/xe/sched_job: Promote xe_sched_job_add_deps() Move it out of the xe_migrate compilation unit so it can be re-used in other places. Cc: Matthew Brost Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240614094433.775866-1-francois.dugast@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec.c | 6 +++--- drivers/gpu/drm/xe/xe_migrate.c | 26 ++++++++++---------------- drivers/gpu/drm/xe/xe_sched_job.c | 6 ++++++ drivers/gpu/drm/xe/xe_sched_job.h | 3 +++ 4 files changed, 22 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 4cf6c6ab4866..2d72cdec3a0b 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -259,9 +259,9 @@ retry: /* Wait behind rebinds */ if (!xe_vm_in_lr_mode(vm)) { - err = drm_sched_job_add_resv_dependencies(&job->drm, - xe_vm_resv(vm), - DMA_RESV_USAGE_KERNEL); + err = xe_sched_job_add_deps(job, + xe_vm_resv(vm), + DMA_RESV_USAGE_KERNEL); if (err) goto err_put_job; } diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index ddd50c3f7208..05f933787860 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -647,12 +647,6 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, bb->cs[bb->len++] = upper_32_bits(src_ofs); } -static int job_add_deps(struct xe_sched_job *job, struct dma_resv *resv, - enum dma_resv_usage usage) -{ - return drm_sched_job_add_resv_dependencies(&job->drm, resv, usage); -} - static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm) { return usm ? m->usm_batch_base_ofs : m->batch_base_ofs; @@ -849,11 +843,11 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, xe_sched_job_add_migrate_flush(job, flush_flags); if (!fence) { - err = job_add_deps(job, src_bo->ttm.base.resv, - DMA_RESV_USAGE_BOOKKEEP); + err = xe_sched_job_add_deps(job, src_bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP); if (!err && src_bo != dst_bo) - err = job_add_deps(job, dst_bo->ttm.base.resv, - DMA_RESV_USAGE_BOOKKEEP); + err = xe_sched_job_add_deps(job, dst_bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP); if (err) goto err_job; } @@ -1091,8 +1085,8 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, * fences, which are always tracked as * DMA_RESV_USAGE_KERNEL. */ - err = job_add_deps(job, bo->ttm.base.resv, - DMA_RESV_USAGE_KERNEL); + err = xe_sched_job_add_deps(job, bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL); if (err) goto err_job; } @@ -1417,8 +1411,8 @@ xe_migrate_update_pgtables(struct xe_migrate *m, /* Wait on BO move */ if (bo) { - err = job_add_deps(job, bo->ttm.base.resv, - DMA_RESV_USAGE_KERNEL); + err = xe_sched_job_add_deps(job, bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL); if (err) goto err_job; } @@ -1428,8 +1422,8 @@ xe_migrate_update_pgtables(struct xe_migrate *m, * trigger preempts before moving forward */ if (first_munmap_rebind) { - err = job_add_deps(job, xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP); + err = xe_sched_job_add_deps(job, xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP); if (err) goto err_job; } diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 5c013904877a..44d534e362cd 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -363,3 +363,9 @@ xe_sched_job_snapshot_print(struct xe_sched_job_snapshot *snapshot, for (i = 0; i < snapshot->batch_addr_len; i++) drm_printf(p, "batch_addr[%u]: 0x%016llx\n", i, snapshot->batch_addr[i]); } + +int xe_sched_job_add_deps(struct xe_sched_job *job, struct dma_resv *resv, + enum dma_resv_usage usage) +{ + return drm_sched_job_add_resv_dependencies(&job->drm, resv, usage); +} diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h index f362e28455db..3dc72c5c1f13 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.h +++ b/drivers/gpu/drm/xe/xe_sched_job.h @@ -90,4 +90,7 @@ struct xe_sched_job_snapshot *xe_sched_job_snapshot_capture(struct xe_sched_job void xe_sched_job_snapshot_free(struct xe_sched_job_snapshot *snapshot); void xe_sched_job_snapshot_print(struct xe_sched_job_snapshot *snapshot, struct drm_printer *p); +int xe_sched_job_add_deps(struct xe_sched_job *job, struct dma_resv *resv, + enum dma_resv_usage usage); + #endif -- cgit From 97fedf015cd908bf4d200b7e510bb6dd93150206 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 14 Jun 2024 12:14:15 +0200 Subject: drm/xe: Use fixed CCS mode when running in SR-IOV mode We don't support changing CCS mode when running in SR-IOV mode yet. When running as a PF driver just apply default ccs_mode=1 that VF drivers will assume as already applied and fixed. Signed-off-by: Michal Wajdeczko Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240614101415.2208-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_ccs_mode.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index 5d4cdbd69bc3..d2e4dc3aaf61 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -12,6 +12,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sysfs.h" #include "xe_mmio.h" +#include "xe_sriov.h" static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines) { @@ -75,7 +76,7 @@ static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines) void xe_gt_apply_ccs_mode(struct xe_gt *gt) { - if (!gt->ccs_mode) + if (!gt->ccs_mode || IS_SRIOV_VF(gt_to_xe(gt))) return; __xe_gt_apply_ccs_mode(gt, gt->ccs_mode); @@ -110,6 +111,12 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, u32 num_engines, num_slices; int ret; + if (IS_SRIOV(xe)) { + xe_gt_dbg(gt, "Can't change compute mode when running as %s\n", + xe_sriov_mode_to_string(xe_device_sriov_mode(xe))); + return -EOPNOTSUPP; + } + ret = kstrtou32(buff, 0, &num_engines); if (ret) return ret; -- cgit From 0d2ca8fd2878582e01e933c8d6110860b921c1e6 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 13 Jun 2024 17:34:24 +0200 Subject: drm/xe/uc: Fix and start using xe_uc_fw_sanitize() Helper xe_uc_fw_sanitize() was defined but never used. First fix it by properly exiting also from the LOAD_FAIL state, then use it in GuC and HuC sanitize code. Signed-off-by: Michal Wajdeczko Cc: Daniele Ceraolo Spurio Cc: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240613153424.2120-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 2 +- drivers/gpu/drm/xe/xe_huc.c | 4 +--- drivers/gpu/drm/xe/xe_uc_fw.h | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 0bf6e01b8910..0e1a5674ef13 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -1094,7 +1094,7 @@ void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir) void xe_guc_sanitize(struct xe_guc *guc) { - xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); + xe_uc_fw_sanitize(&guc->fw); xe_guc_ct_disable(&guc->ct); guc->submission_state.enabled = false; } diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index b039ff49341b..6238fb354914 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -295,9 +295,7 @@ fail: void xe_huc_sanitize(struct xe_huc *huc) { - if (!xe_uc_fw_is_loadable(&huc->fw)) - return; - xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE); + xe_uc_fw_sanitize(&huc->fw); } void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p) diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h index 35078038797e..c108e9d08e70 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.h +++ b/drivers/gpu/drm/xe/xe_uc_fw.h @@ -158,7 +158,7 @@ static inline bool xe_uc_fw_is_overridden(const struct xe_uc_fw *uc_fw) static inline void xe_uc_fw_sanitize(struct xe_uc_fw *uc_fw) { - if (xe_uc_fw_is_loaded(uc_fw)) + if (xe_uc_fw_is_loadable(uc_fw)) xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOADABLE); } -- cgit From 5d7612ae201ec199b46bbf81a36cb4667e29d973 Mon Sep 17 00:00:00 2001 From: Akshata Jahagirdar Date: Fri, 14 Jun 2024 11:24:56 -0700 Subject: drm/xe/xe2lpg: Add Wa_14021490052 Add Wa_14021490052 for Xe2LPG 20.04. Signed-off-by: Akshata Jahagirdar Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240614182455.2370059-2-matthew.d.roper@intel.com --- drivers/gpu/drm/xe/xe_wa.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 18a4d5dd5a4c..a2725a150441 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -688,6 +688,15 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) }, + { XE_RTP_NAME("14021490052"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(FF_MODE, + DIS_MESH_PARTIAL_AUTOSTRIP | + DIS_MESH_AUTOSTRIP), + SET(VFLSKPD, + DIS_PARTIAL_AUTOSTRIP | + DIS_AUTOSTRIP)) + }, /* Xe2_HPG */ { XE_RTP_NAME("15010599737"), -- cgit From 731e46c032281601756f08cfa7d8505fe41166a9 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 13 Jun 2024 19:03:48 +0200 Subject: drm/xe/exec_queue: Rename xe_exec_queue::compute to xe_exec_queue::lr The properties of this struct are used in long running context so make that clear by renaming it to lr, in alignment with the rest of the code. Cc: Matthew Brost Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240613170348.723245-1-francois.dugast@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 6 ++-- drivers/gpu/drm/xe/xe_exec_queue_types.h | 14 ++++---- drivers/gpu/drm/xe/xe_preempt_fence.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 58 ++++++++++++++++---------------- 4 files changed, 40 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index cf45df0328da..0ba37835849b 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -67,7 +67,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, q->fence_irq = >->fence_irq[hwe->class]; q->ring_ops = gt->ring_ops[hwe->class]; q->ops = gt->exec_queue_ops; - INIT_LIST_HEAD(&q->compute.link); + INIT_LIST_HEAD(&q->lr.link); INIT_LIST_HEAD(&q->multi_gt_link); q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; @@ -633,8 +633,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, return PTR_ERR(q); if (xe_vm_in_preempt_fence_mode(vm)) { - q->compute.context = dma_fence_context_alloc(1); - spin_lock_init(&q->compute.lock); + q->lr.context = dma_fence_context_alloc(1); + spin_lock_init(&q->lr.lock); err = xe_vm_add_compute_exec_queue(vm, q); if (XE_IOCTL_DBG(xe, err)) diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index f0c5f82ce7e3..201588ec33c3 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -113,19 +113,19 @@ struct xe_exec_queue { enum xe_exec_queue_priority priority; } sched_props; - /** @compute: compute exec queue state */ + /** @lr: long-running exec queue state */ struct { - /** @compute.pfence: preemption fence */ + /** @lr.pfence: preemption fence */ struct dma_fence *pfence; - /** @compute.context: preemption fence context */ + /** @lr.context: preemption fence context */ u64 context; - /** @compute.seqno: preemption fence seqno */ + /** @lr.seqno: preemption fence seqno */ u32 seqno; - /** @compute.link: link into VM's list of exec queues */ + /** @lr.link: link into VM's list of exec queues */ struct list_head link; - /** @compute.lock: preemption fences lock */ + /** @lr.lock: preemption fences lock */ spinlock_t lock; - } compute; + } lr; /** @ops: submission backend exec queue operations */ const struct xe_exec_queue_ops *ops; diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index 5b243b7feb59..e8b8ae5c6485 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -129,7 +129,7 @@ xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q, list_del_init(&pfence->link); pfence->q = xe_exec_queue_get(q); dma_fence_init(&pfence->base, &preempt_fence_ops, - &q->compute.lock, context, seqno); + &q->lr.lock, context, seqno); return &pfence->base; } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index ffda487653d8..61d4d95a5377 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -83,10 +83,10 @@ static bool preempt_fences_waiting(struct xe_vm *vm) lockdep_assert_held(&vm->lock); xe_vm_assert_held(vm); - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { - if (!q->compute.pfence || + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { + if (!q->lr.pfence || test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &q->compute.pfence->flags)) { + &q->lr.pfence->flags)) { return true; } } @@ -129,14 +129,14 @@ static int wait_for_existing_preempt_fences(struct xe_vm *vm) xe_vm_assert_held(vm); - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { - if (q->compute.pfence) { - long timeout = dma_fence_wait(q->compute.pfence, false); + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { + if (q->lr.pfence) { + long timeout = dma_fence_wait(q->lr.pfence, false); if (timeout < 0) return -ETIME; - dma_fence_put(q->compute.pfence); - q->compute.pfence = NULL; + dma_fence_put(q->lr.pfence); + q->lr.pfence = NULL; } } @@ -148,7 +148,7 @@ static bool xe_vm_is_idle(struct xe_vm *vm) struct xe_exec_queue *q; xe_vm_assert_held(vm); - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { if (!xe_exec_queue_is_idle(q)) return false; } @@ -161,17 +161,17 @@ static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) struct list_head *link; struct xe_exec_queue *q; - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { struct dma_fence *fence; link = list->next; xe_assert(vm->xe, link != list); fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), - q, q->compute.context, - ++q->compute.seqno); - dma_fence_put(q->compute.pfence); - q->compute.pfence = fence; + q, q->lr.context, + ++q->lr.seqno); + dma_fence_put(q->lr.pfence); + q->lr.pfence = fence; } } @@ -191,10 +191,10 @@ static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) if (err) goto out_unlock; - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) - if (q->compute.pfence) { + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) + if (q->lr.pfence) { dma_resv_add_fence(bo->ttm.base.resv, - q->compute.pfence, + q->lr.pfence, DMA_RESV_USAGE_BOOKKEEP); } @@ -211,10 +211,10 @@ static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, lockdep_assert_held(&vm->lock); xe_vm_assert_held(vm); - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { q->ops->resume(q); - drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->compute.pfence, + drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); } } @@ -238,16 +238,16 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) if (err) goto out_up_write; - pfence = xe_preempt_fence_create(q, q->compute.context, - ++q->compute.seqno); + pfence = xe_preempt_fence_create(q, q->lr.context, + ++q->lr.seqno); if (!pfence) { err = -ENOMEM; goto out_fini; } - list_add(&q->compute.link, &vm->preempt.exec_queues); + list_add(&q->lr.link, &vm->preempt.exec_queues); ++vm->preempt.num_exec_queues; - q->compute.pfence = pfence; + q->lr.pfence = pfence; down_read(&vm->userptr.notifier_lock); @@ -284,12 +284,12 @@ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) return; down_write(&vm->lock); - list_del(&q->compute.link); + list_del(&q->lr.link); --vm->preempt.num_exec_queues; - if (q->compute.pfence) { - dma_fence_enable_sw_signaling(q->compute.pfence); - dma_fence_put(q->compute.pfence); - q->compute.pfence = NULL; + if (q->lr.pfence) { + dma_fence_enable_sw_signaling(q->lr.pfence); + dma_fence_put(q->lr.pfence); + q->lr.pfence = NULL; } up_write(&vm->lock); } @@ -327,7 +327,7 @@ static void xe_vm_kill(struct xe_vm *vm, bool unlocked) vm->flags |= XE_VM_FLAG_BANNED; trace_xe_vm_kill(vm); - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) q->ops->kill(q); if (unlocked) -- cgit From 0e6745de6dfc1395ce1bbde1e3f9b0a12fba5a1b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 17 Jun 2024 14:26:13 +0200 Subject: drm/xe: Allow const pointer when checking SR-IOV mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems that more and more parts of the driver code rely on having only a const pointer to the xe_device. Allow to check SR-IOV mode in that code as well. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240617122613.553-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_sriov.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h index 486bb21c3256..688fbabf08f1 100644 --- a/drivers/gpu/drm/xe/xe_sriov.h +++ b/drivers/gpu/drm/xe/xe_sriov.h @@ -19,18 +19,18 @@ void xe_sriov_probe_early(struct xe_device *xe); void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p); int xe_sriov_init(struct xe_device *xe); -static inline enum xe_sriov_mode xe_device_sriov_mode(struct xe_device *xe) +static inline enum xe_sriov_mode xe_device_sriov_mode(const struct xe_device *xe) { xe_assert(xe, xe->sriov.__mode); return xe->sriov.__mode; } -static inline bool xe_device_is_sriov_pf(struct xe_device *xe) +static inline bool xe_device_is_sriov_pf(const struct xe_device *xe) { return xe_device_sriov_mode(xe) == XE_SRIOV_MODE_PF; } -static inline bool xe_device_is_sriov_vf(struct xe_device *xe) +static inline bool xe_device_is_sriov_vf(const struct xe_device *xe) { return xe_device_sriov_mode(xe) == XE_SRIOV_MODE_VF; } -- cgit From f0ccd2d805e55e12b430d5d6b9acd9f891af455e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 17 Jun 2024 17:47:36 +0200 Subject: drm/xe/vf: Don't touch GuC irq registers if using memory irqs On platforms where VFs are using memory based interrupts, we missed invalid access to no longer existing interrupt registers, as we keep them marked with XE_REG_OPTION_VF. To fix that just either setup memirq vectors in GuC or enable legacy interrupts. Fixes: aef4eb7c7dec ("drm/xe/vf: Setup memory based interrupts in GuC") Signed-off-by: Michal Wajdeczko Cc: Matt Roper Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240617154736.685-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 0e1a5674ef13..7ecb509c87d7 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -854,8 +854,6 @@ int xe_guc_enable_communication(struct xe_guc *guc) struct xe_device *xe = guc_to_xe(guc); int err; - guc_enable_irq(guc); - if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe)) { struct xe_gt *gt = guc_to_gt(guc); struct xe_tile *tile = gt_to_tile(gt); @@ -863,6 +861,8 @@ int xe_guc_enable_communication(struct xe_guc *guc) err = xe_memirq_init_guc(&tile->sriov.vf.memirq, guc); if (err) return err; + } else { + guc_enable_irq(guc); } xe_mmio_rmw32(guc_to_gt(guc), PMINTRMSK, -- cgit From 6045473650c40848122f5042b112606ce5d14406 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 17 Jun 2024 22:00:40 -0700 Subject: drm/xe/rtp: Allow to match 0 sr entries If none of the rules match, there should be 0 entries in the sr xarray, so none of them should have a register matching. Reviewed-by: Matt Roper Signed-off-by: Lucas De Marchi Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240618050044.324454-2-lucas.demarchi@intel.com --- drivers/gpu/drm/xe/tests/xe_rtp_test.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index 06759d754783..474a0b222ce1 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -255,9 +255,14 @@ static void xe_rtp_process_tests(struct kunit *test) } KUNIT_EXPECT_EQ(test, count, param->expected_count); - KUNIT_EXPECT_EQ(test, sr_entry->clr_bits, param->expected_clr_bits); - KUNIT_EXPECT_EQ(test, sr_entry->set_bits, param->expected_set_bits); - KUNIT_EXPECT_EQ(test, sr_entry->reg.raw, param->expected_reg.raw); + if (count) { + KUNIT_EXPECT_EQ(test, sr_entry->clr_bits, param->expected_clr_bits); + KUNIT_EXPECT_EQ(test, sr_entry->set_bits, param->expected_set_bits); + KUNIT_EXPECT_EQ(test, sr_entry->reg.raw, param->expected_reg.raw); + } else { + KUNIT_EXPECT_NULL(test, sr_entry); + } + KUNIT_EXPECT_EQ(test, reg_sr->errors, param->expected_sr_errors); } -- cgit From 512660cd1f1ab60d4ab8a0ae25b507d10be40fb3 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 17 Jun 2024 22:00:41 -0700 Subject: drm/xe/rtp: Expand max rules/actions per entry Having at most 4 rules per entry is already reaching the maximum. Expand it to 6 to allow for more room. With the addition of OR condition for rules, this will very soon not be sufficient. Reviewed-by: Matt Roper Signed-off-by: Lucas De Marchi Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240618050044.324454-3-lucas.demarchi@intel.com --- drivers/gpu/drm/xe/xe_rtp.h | 4 ++-- drivers/gpu/drm/xe/xe_rtp_helpers.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 337b1ef1959c..904c5156a7b1 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -325,7 +325,7 @@ struct xe_reg_sr; * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry_sr entry * @...: Rules * - * At least one rule is needed and up to 4 are supported. Multiple rules are + * At least one rule is needed and up to 6 are supported. Multiple rules are * AND'ed together, i.e. all the rules must evaluate to true for the entry to * be processed. See XE_RTP_MATCH_* for the possible match rules. Example: * @@ -350,7 +350,7 @@ struct xe_reg_sr; * XE_RTP_ACTIONS - Helper to set multiple actions to a struct xe_rtp_entry_sr * @...: Actions to be taken * - * At least one action is needed and up to 4 are supported. See XE_RTP_ACTION_* + * At least one action is needed and up to 6 are supported. See XE_RTP_ACTION_* * for the possible actions. Example: * * .. code-block:: c diff --git a/drivers/gpu/drm/xe/xe_rtp_helpers.h b/drivers/gpu/drm/xe/xe_rtp_helpers.h index 7735f217ba71..c59e40fd7fff 100644 --- a/drivers/gpu/drm/xe/xe_rtp_helpers.h +++ b/drivers/gpu/drm/xe/xe_rtp_helpers.h @@ -58,6 +58,8 @@ #define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_) #define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_) #define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_5(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_4(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_6(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_5(prefix_, sep_, _XE_TUPLE_TAIL args_) /* * XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer -- cgit From dc72c52a42e0255441bed7444ab16f2b6c98b681 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 17 Jun 2024 22:00:42 -0700 Subject: drm/xe/rtp: Allow to OR rules Some workarounds started to depend on different set of conditions where the action should be applied if any of them match. See e.g. commit 24d0d98af1c3 ("drm/xe/xe2lpm: Fixup Wa_14020756599"). Add XE_RTP_MATCH_OR that allows to implement a logical OR for the rules. Normal precedence applies: r1, r2, OR, r3 means (r1 AND r2) OR r3 The check is shortcut as soon as a set of conditions match. v2: Do not match on empty number of rules-other-than-OR evaluated Reviewed-by: Matt Roper Signed-off-by: Lucas De Marchi Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240618050044.324454-4-lucas.demarchi@intel.com --- drivers/gpu/drm/xe/tests/xe_rtp_test.c | 53 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_rtp.c | 30 +++++++++++++++++-- drivers/gpu/drm/xe/xe_rtp.h | 21 ++++++++++++++ drivers/gpu/drm/xe/xe_rtp_types.h | 1 + 4 files changed, 102 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index 474a0b222ce1..f217445c246a 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -90,6 +90,59 @@ static const struct rtp_test_case cases[] = { {} }, }, + { + .name = "match-or", + .expected_reg = REGULAR_REG1, + .expected_set_bits = REG_BIT(0) | REG_BIT(1) | REG_BIT(2), + .expected_clr_bits = REG_BIT(0) | REG_BIT(1) | REG_BIT(2), + .expected_count = 1, + .entries = (const struct xe_rtp_entry_sr[]) { + { XE_RTP_NAME("first"), + XE_RTP_RULES(FUNC(match_yes), OR, FUNC(match_no)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + { XE_RTP_NAME("middle"), + XE_RTP_RULES(FUNC(match_no), FUNC(match_no), OR, + FUNC(match_yes), OR, + FUNC(match_no)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1))) + }, + { XE_RTP_NAME("last"), + XE_RTP_RULES(FUNC(match_no), OR, FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(2))) + }, + { XE_RTP_NAME("no-match"), + XE_RTP_RULES(FUNC(match_no), OR, FUNC(match_no)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(3))) + }, + {} + }, + }, + { + .name = "match-or-xfail", + .expected_reg = REGULAR_REG1, + .expected_count = 0, + .entries = (const struct xe_rtp_entry_sr[]) { + { XE_RTP_NAME("leading-or"), + XE_RTP_RULES(OR, FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + { XE_RTP_NAME("trailing-or"), + /* + * First condition is match_no, otherwise the failure + * wouldn't really trigger as RTP stops processing as + * soon as it has a matching set of rules + */ + XE_RTP_RULES(FUNC(match_no), OR), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1))) + }, + { XE_RTP_NAME("no-or-or-yes"), + XE_RTP_RULES(FUNC(match_no), OR, OR, FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(2))) + }, + {} + }, + }, { .name = "no-match-no-add-multiple-rules", .expected_reg = REGULAR_REG1, diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index eff1c9c2f5cc..dc315b8aae07 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -35,11 +35,18 @@ static bool rule_matches(const struct xe_device *xe, unsigned int n_rules) { const struct xe_rtp_rule *r; - unsigned int i; + unsigned int i, rcount = 0; bool match; for (r = rules, i = 0; i < n_rules; r = &rules[++i]) { switch (r->match_type) { + case XE_RTP_MATCH_OR: + /* + * This is only reached if a complete set of + * rules passed or none were evaluated. For both cases, + * shortcut the other rules and return the proper value. + */ + goto done; case XE_RTP_MATCH_PLATFORM: match = xe->info.platform == r->platform; break; @@ -102,10 +109,27 @@ static bool rule_matches(const struct xe_device *xe, match = false; } - if (!match) - return false; + if (!match) { + /* + * Advance rules until we find XE_RTP_MATCH_OR to check + * if there's another set of conditions to check + */ + while (i < n_rules && rules[++i].match_type != XE_RTP_MATCH_OR) + ; + + if (i >= n_rules) + return false; + + rcount = 0; + } else { + rcount++; + } } +done: + if (drm_WARN_ON(&xe->drm, !rcount)) + return false; + return true; } diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 904c5156a7b1..bd5b5ba0fb31 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -179,6 +179,27 @@ struct xe_reg_sr; #define XE_RTP_RULE_IS_DISCRETE \ { .match_type = XE_RTP_MATCH_DISCRETE } +/** + * XE_RTP_RULE_OR - Create an OR condition for rtp rules + * + * RTP rules are AND'ed when evaluated and all of them need to match. + * XE_RTP_RULE_OR allows to create set of rules where any of them matching is + * sufficient for the action to trigger. Example: + * + * .. code-block:: c + * + * const struct xe_rtp_entry_sr entries[] = { + * ... + * { XE_RTP_NAME("test-entry"), + * XE_RTP_RULES(PLATFORM(DG2), OR, PLATFORM(TIGERLAKE)), + * ... + * }, + * ... + * }; + */ +#define XE_RTP_RULE_OR \ + { .match_type = XE_RTP_MATCH_OR } + /** * XE_RTP_ACTION_WR - Helper to write a value to the register, overriding all * the bits diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index 637acc7626a4..10150bc22ccd 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -51,6 +51,7 @@ enum { XE_RTP_MATCH_ENGINE_CLASS, XE_RTP_MATCH_NOT_ENGINE_CLASS, XE_RTP_MATCH_FUNC, + XE_RTP_MATCH_OR, }; /** struct xe_rtp_rule - match rule for processing entry */ -- cgit From c8c00286f5c4bb3cafa4a6562711dff26fca6c8e Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 17 Jun 2024 22:00:43 -0700 Subject: drm/xe/rtp: Add match on any GT One surprising factor of GRAPHICS_VERSION()/MEDIA_VERSION() matching for people adding new WAs is that it implicitly checks that the graphics/media IP under check is of that specific type and not that the device contains a media/graphics IP of that version. Add a new *_ANY_GT() variant that can be used in that case. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240618050044.324454-5-lucas.demarchi@intel.com --- drivers/gpu/drm/xe/xe_rtp.c | 6 ++++++ drivers/gpu/drm/xe/xe_rtp.h | 30 +++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_rtp_types.h | 2 ++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index dc315b8aae07..ac31cba1dbea 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -63,6 +63,9 @@ static bool rule_matches(const struct xe_device *xe, xe->info.graphics_verx100 <= r->ver_end && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; + case XE_RTP_MATCH_GRAPHICS_VERSION_ANY_GT: + match = xe->info.graphics_verx100 == r->ver_start; + break; case XE_RTP_MATCH_GRAPHICS_STEP: match = xe->info.step.graphics >= r->step_start && xe->info.step.graphics < r->step_end && @@ -82,6 +85,9 @@ static bool rule_matches(const struct xe_device *xe, xe->info.step.media < r->step_end && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; + case XE_RTP_MATCH_MEDIA_VERSION_ANY_GT: + match = xe->info.media_verx100 == r->ver_start; + break; case XE_RTP_MATCH_INTEGRATED: match = !xe->info.is_dgfx; break; diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index bd5b5ba0fb31..ad446731192c 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -140,9 +140,23 @@ struct xe_reg_sr; .ver_start = ver_start__, .ver_end = ver_end__, } /** - * XE_RTP_RULE_MEDIA_VERSION - Create rule matching media version + * XE_RTP_RULE_GRAPHICS_VERSION_ANY_GT - Create rule matching graphics version on any GT * @ver__: Graphics IP version to match * + * Like XE_RTP_RULE_GRAPHICS_VERSION, but it matches even if the current GT + * being checked is not of the graphics type. It allows to add RTP entries to + * another GT when the device contains a Graphics IP with that version. + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_GRAPHICS_VERSION_ANY_GT(ver__) \ + { .match_type = XE_RTP_MATCH_GRAPHICS_VERSION_ANY_GT, \ + .ver_start = ver__, } + +/** + * XE_RTP_RULE_MEDIA_VERSION - Create rule matching media version + * @ver__: Media IP version to match + * * Refer to XE_RTP_RULES() for expected usage. */ #define XE_RTP_RULE_MEDIA_VERSION(ver__) \ @@ -163,6 +177,20 @@ struct xe_reg_sr; { .match_type = XE_RTP_MATCH_MEDIA_VERSION_RANGE, \ .ver_start = ver_start__, .ver_end = ver_end__, } +/** + * XE_RTP_RULE_MEDIA_VERSION_ANY_GT - Create rule matching media version on any GT + * @ver__: Media IP version to match + * + * Like XE_RTP_RULE_MEDIA_VERSION, but it matches even if the current GT being + * checked is not of the media type. It allows to add RTP entries to another + * GT when the device contains a Media IP with that version. + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_MEDIA_VERSION_ANY_GT(ver__) \ + { .match_type = XE_RTP_MATCH_MEDIA_VERSION_ANY_GT, \ + .ver_start = ver__, } + /** * XE_RTP_RULE_IS_INTEGRATED - Create a rule matching integrated graphics devices * diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index 10150bc22ccd..1b76b947c706 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -42,9 +42,11 @@ enum { XE_RTP_MATCH_SUBPLATFORM, XE_RTP_MATCH_GRAPHICS_VERSION, XE_RTP_MATCH_GRAPHICS_VERSION_RANGE, + XE_RTP_MATCH_GRAPHICS_VERSION_ANY_GT, XE_RTP_MATCH_GRAPHICS_STEP, XE_RTP_MATCH_MEDIA_VERSION, XE_RTP_MATCH_MEDIA_VERSION_RANGE, + XE_RTP_MATCH_MEDIA_VERSION_ANY_GT, XE_RTP_MATCH_MEDIA_STEP, XE_RTP_MATCH_INTEGRATED, XE_RTP_MATCH_DISCRETE, -- cgit From 62712be3a4e03c8375f490bcab131441ab3a7af0 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 17 Jun 2024 22:00:44 -0700 Subject: drm/xe/xe2: Add proper check for media in Wa_14020756599 A temporary fixup was made in commit 24d0d98af1c3 ("drm/xe/xe2lpm: Fixup Wa_14020756599") due to limitations in the RTP infra. Now that RTP has support for OR condition that change can be removed. RTP now also supports checking any GT, so use that instead of the more specific xe_rtp_match_when_media2000() used in that commit. Reviewed-by: Matt Roper Signed-off-by: Lucas De Marchi Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240618050044.324454-6-lucas.demarchi@intel.com --- drivers/gpu/drm/xe/xe_wa.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index a2725a150441..0b6fbbebc41e 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -677,15 +677,9 @@ static const struct xe_rtp_entry_sr lrc_was[] = { ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP)) }, - /* - * This WA is also needed on primary GT when the media version is 2000. - * Currently, media version 2000 is always paired with graphics version - * 2004, so just checking the latter is sufficient. In the future, media - * version 2000 can be used with some other graphics version where WA - * still needs to be implemented - */ { XE_RTP_NAME("14020756599"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER), OR, + MEDIA_VERSION_ANY_GT(2000), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) }, { XE_RTP_NAME("14021490052"), -- cgit From 52c2e956dcebecc8901911217a9647203ebcaf3c Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:53 -0700 Subject: drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Xe, the plan is to support multiple types of perf counter streams (OA is only one type of these streams). Rather than introduce NxM ioctls for these (N perf streams with M ioctl's per perf stream), we decide to multiplex these (N different stream types and the M ops for each of these stream types) through a single PERF ioctl. This multiplexing is the purpose of the PERF layer. In addition to PERF DRM ioctl's, another set of ioctl's on the PERF fd are defined. These are expected to be common to different PERF stream types and therefore defined at the PERF layer itself. v2: Add param_size to 'struct drm_xe_perf_param' (Umesh) v3: Rename 'enum drm_xe_perf_ops' to 'enum drm_xe_perf_ioctls' (Guy Zadicario) Add DRM_ prefix to ioctl names to indicate uapi names v4: Add 'enum drm_xe_perf_op' previously missed out (Guy Zadicario) v5: Squash the ops and PERF layer patches into a single patch (Umesh) Remove param_size from struct 'drm_xe_perf_param' (Umesh) v6: Add DRM_XE_PERF_IOCTL_STATUS v7: Add DRM_XE_PERF_IOCTL_INFO v8: Fix Copyright years, fix DRM_XE_PERF_TYPE_MAX, move '#include "xe_perf.h"' to xe_perf.c, add kernel doc (Michal) Acked-by: Rodrigo Vivi Acked-by: Guy Zadicario Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-2-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_device.c | 2 ++ drivers/gpu/drm/xe/xe_perf.c | 34 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_perf.h | 14 +++++++++ include/uapi/drm/xe_drm.h | 66 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 117 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_perf.c create mode 100644 drivers/gpu/drm/xe/xe_perf.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index cbf961b90237..f99492449e5d 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -95,6 +95,7 @@ xe-y += xe_bb.o \ xe_pat.o \ xe_pci.o \ xe_pcode.o \ + xe_perf.o \ xe_pm.o \ xe_preempt_fence.o \ xe_pt.o \ diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 64691a56d59c..a44093cbbb71 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -44,6 +44,7 @@ #include "xe_module.h" #include "xe_pat.h" #include "xe_pcode.h" +#include "xe_perf.h" #include "xe_pm.h" #include "xe_query.h" #include "xe_sriov.h" @@ -141,6 +142,7 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_PERF, xe_perf_ioctl, DRM_RENDER_ALLOW), }; static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) diff --git a/drivers/gpu/drm/xe/xe_perf.c b/drivers/gpu/drm/xe/xe_perf.c new file mode 100644 index 000000000000..2963174ecd0e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_perf.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include + +#include + +#include "xe_perf.h" + +/** + * xe_perf_ioctl - The top level perf layer ioctl + * @dev: @drm_device + * @data: pointer to struct @drm_xe_perf_param + * @file: @drm_file + * + * The function is called for different perf streams types and allows execution + * of different operations supported by those perf stream types. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_perf_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_xe_perf_param *arg = data; + + if (arg->extensions) + return -EINVAL; + + switch (arg->perf_type) { + default: + return -EINVAL; + } +} diff --git a/drivers/gpu/drm/xe/xe_perf.h b/drivers/gpu/drm/xe/xe_perf.h new file mode 100644 index 000000000000..e7e258eaf0a9 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_perf.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_PERF_H_ +#define _XE_PERF_H_ + +struct drm_device; +struct drm_file; + +int xe_perf_ioctl(struct drm_device *dev, void *data, struct drm_file *file); + +#endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index d7b0903c22b2..c1626027dc69 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -80,6 +80,7 @@ extern "C" { * - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY * - &DRM_IOCTL_XE_EXEC * - &DRM_IOCTL_XE_WAIT_USER_FENCE + * - &DRM_IOCTL_XE_PERF */ /* @@ -100,6 +101,8 @@ extern "C" { #define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08 #define DRM_XE_EXEC 0x09 #define DRM_XE_WAIT_USER_FENCE 0x0a +#define DRM_XE_PERF 0x0b + /* Must be kept compact -- no holes */ #define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query) @@ -113,6 +116,7 @@ extern "C" { #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property) #define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) +#define DRM_IOCTL_XE_PERF DRM_IOW(DRM_COMMAND_BASE + DRM_XE_PERF, struct drm_xe_perf_param) /** * DOC: Xe IOCTL Extensions @@ -1370,6 +1374,68 @@ struct drm_xe_wait_user_fence { __u64 reserved[2]; }; +/** + * enum drm_xe_perf_type - Perf stream types + */ +enum drm_xe_perf_type { + __DRM_XE_PERF_TYPE_MAX, /* non-ABI */ +}; + +/** + * enum drm_xe_perf_op - Perf stream ops + */ +enum drm_xe_perf_op { + /** @DRM_XE_PERF_OP_STREAM_OPEN: Open a perf counter stream */ + DRM_XE_PERF_OP_STREAM_OPEN, + + /** @DRM_XE_PERF_OP_ADD_CONFIG: Add perf stream config */ + DRM_XE_PERF_OP_ADD_CONFIG, + + /** @DRM_XE_PERF_OP_REMOVE_CONFIG: Remove perf stream config */ + DRM_XE_PERF_OP_REMOVE_CONFIG, +}; + +/** + * struct drm_xe_perf_param - Input of &DRM_XE_PERF + * + * The perf layer enables multiplexing perf counter streams of multiple + * types. The actual params for a particular stream operation are supplied + * via the @param pointer (use __copy_from_user to get these params). + */ +struct drm_xe_perf_param { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + /** @perf_type: Perf stream type, of enum @drm_xe_perf_type */ + __u64 perf_type; + /** @perf_op: Perf op, of enum @drm_xe_perf_op */ + __u64 perf_op; + /** @param: Pointer to actual stream params */ + __u64 param; +}; + +/** + * enum drm_xe_perf_ioctls - Perf fd ioctl's + * + * Information exchanged between userspace and kernel for perf fd ioctl's + * is stream type specific + */ +enum drm_xe_perf_ioctls { + /** @DRM_XE_PERF_IOCTL_ENABLE: Enable data capture for a stream */ + DRM_XE_PERF_IOCTL_ENABLE = _IO('i', 0x0), + + /** @DRM_XE_PERF_IOCTL_DISABLE: Disable data capture for a stream */ + DRM_XE_PERF_IOCTL_DISABLE = _IO('i', 0x1), + + /** @DRM_XE_PERF_IOCTL_CONFIG: Change stream configuration */ + DRM_XE_PERF_IOCTL_CONFIG = _IO('i', 0x2), + + /** @DRM_XE_PERF_IOCTL_STATUS: Return stream status */ + DRM_XE_PERF_IOCTL_STATUS = _IO('i', 0x3), + + /** @DRM_XE_PERF_IOCTL_INFO: Return stream info */ + DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), +}; + #if defined(__cplusplus) } #endif -- cgit From fe8929bdf83512ed7e413a28e543c725bf536354 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:54 -0700 Subject: drm/xe/perf/uapi: Add perf_stream_paranoid sysctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Normally only superuser/root can access perf counter data. However, superuser can set perf_stream_paranoid sysctl to 0 to allow non-privileged users to also access perf data. perf_stream_paranoid is introduced at the perf layer to allow different perf stream types to share this access mechanism. v2: Add kernel doc for non-static functions (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-3-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_module.c | 5 +++++ drivers/gpu/drm/xe/xe_perf.c | 40 ++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_perf.h | 6 ++++++ 3 files changed, 51 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 3edeb30d5ccb..893858a2eea0 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -11,6 +11,7 @@ #include "xe_drv.h" #include "xe_hw_fence.h" #include "xe_pci.h" +#include "xe_perf.h" #include "xe_sched_job.h" struct xe_modparam xe_modparam = { @@ -78,6 +79,10 @@ static const struct init_funcs init_funcs[] = { .init = xe_register_pci_driver, .exit = xe_unregister_pci_driver, }, + { + .init = xe_perf_sysctl_register, + .exit = xe_perf_sysctl_unregister, + }, }; static int __init xe_init(void) diff --git a/drivers/gpu/drm/xe/xe_perf.c b/drivers/gpu/drm/xe/xe_perf.c index 2963174ecd0e..f619cf50b453 100644 --- a/drivers/gpu/drm/xe/xe_perf.c +++ b/drivers/gpu/drm/xe/xe_perf.c @@ -4,11 +4,15 @@ */ #include +#include #include #include "xe_perf.h" +u32 xe_perf_stream_paranoid = true; +static struct ctl_table_header *sysctl_header; + /** * xe_perf_ioctl - The top level perf layer ioctl * @dev: @drm_device @@ -32,3 +36,39 @@ int xe_perf_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return -EINVAL; } } + +static struct ctl_table perf_ctl_table[] = { + { + .procname = "perf_stream_paranoid", + .data = &xe_perf_stream_paranoid, + .maxlen = sizeof(xe_perf_stream_paranoid), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + {} +}; + +/** + * xe_perf_sysctl_register - Register "perf_stream_paranoid" sysctl + * + * Normally only superuser/root can access perf counter data. However, + * superuser can set perf_stream_paranoid sysctl to 0 to allow non-privileged + * users to also access perf data. + * + * Return: always returns 0 + */ +int xe_perf_sysctl_register(void) +{ + sysctl_header = register_sysctl("dev/xe", perf_ctl_table); + return 0; +} + +/** + * xe_perf_sysctl_unregister - Unregister "perf_stream_paranoid" sysctl + */ +void xe_perf_sysctl_unregister(void) +{ + unregister_sysctl_table(sysctl_header); +} diff --git a/drivers/gpu/drm/xe/xe_perf.h b/drivers/gpu/drm/xe/xe_perf.h index e7e258eaf0a9..53a8377a1bb1 100644 --- a/drivers/gpu/drm/xe/xe_perf.h +++ b/drivers/gpu/drm/xe/xe_perf.h @@ -6,9 +6,15 @@ #ifndef _XE_PERF_H_ #define _XE_PERF_H_ +#include + struct drm_device; struct drm_file; +extern u32 xe_perf_stream_paranoid; + int xe_perf_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int xe_perf_sysctl_register(void); +void xe_perf_sysctl_unregister(void); #endif -- cgit From 67977882a2f1339f0a7d32576ad61967828b2ca5 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:55 -0700 Subject: drm/xe/oa/uapi: Add OA data formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add and initialize supported OA data formats for various platforms (including Xe2). User can request OA data in any supported format. Bspec: 52198, 60942, 61101 v2: Start 'xe_oa_format_name' enum from 0 (Umesh) Fix error rewind with OA (Umesh) v3: Use graphics versions rather than absolute platform names v4: Add missing kernel doc for struct memebers and enum and other minor changes (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-4-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_device.c | 11 +++- drivers/gpu/drm/xe/xe_device_types.h | 4 ++ drivers/gpu/drm/xe/xe_oa.c | 111 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa.h | 16 +++++ drivers/gpu/drm/xe/xe_oa_types.h | 83 ++++++++++++++++++++++++++ include/uapi/drm/xe_drm.h | 19 ++++++ 7 files changed, 244 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/xe/xe_oa.c create mode 100644 drivers/gpu/drm/xe/xe_oa.h create mode 100644 drivers/gpu/drm/xe/xe_oa_types.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index f99492449e5d..7039008be234 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -92,6 +92,7 @@ xe-y += xe_bb.o \ xe_mmio.o \ xe_mocs.o \ xe_module.o \ + xe_oa.o \ xe_pat.o \ xe_pci.o \ xe_pcode.o \ diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index a44093cbbb71..1195c64a715a 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -656,10 +656,14 @@ int xe_device_probe(struct xe_device *xe) xe_heci_gsc_init(xe); - err = xe_display_init(xe); + err = xe_oa_init(xe); if (err) goto err_fini_gt; + err = xe_display_init(xe); + if (err) + goto err_fini_oa; + err = drm_dev_register(&xe->drm, 0); if (err) goto err_fini_display; @@ -675,6 +679,9 @@ int xe_device_probe(struct xe_device *xe) err_fini_display: xe_display_driver_remove(xe); +err_fini_oa: + xe_oa_fini(xe); + err_fini_gt: for_each_gt(gt, xe, id) { if (id < last_gt) @@ -707,6 +714,8 @@ void xe_device_remove(struct xe_device *xe) xe_display_fini(xe); + xe_oa_fini(xe); + xe_heci_gsc_fini(xe); for_each_gt(gt, xe, id) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 52bc461171d5..185986e1d586 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -17,6 +17,7 @@ #include "xe_gt_types.h" #include "xe_lmtt_types.h" #include "xe_memirq_types.h" +#include "xe_oa.h" #include "xe_platform_types.h" #include "xe_pt_types.h" #include "xe_sriov_types.h" @@ -462,6 +463,9 @@ struct xe_device { /** @heci_gsc: graphics security controller */ struct xe_heci_gsc heci_gsc; + /** @oa: oa perf counter subsystem */ + struct xe_oa oa; + /** @needs_flr_on_fini: requests function-reset on fini */ bool needs_flr_on_fini; diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c new file mode 100644 index 000000000000..5c0179ff4f60 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include + +#include "xe_assert.h" +#include "xe_device.h" +#include "xe_macros.h" +#include "xe_oa.h" + +#define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x + +static const struct xe_oa_format oa_formats[] = { + [XE_OA_FORMAT_C4_B8] = { 7, 64, DRM_FMT(OAG) }, + [XE_OA_FORMAT_A12] = { 0, 64, DRM_FMT(OAG) }, + [XE_OA_FORMAT_A12_B8_C8] = { 2, 128, DRM_FMT(OAG) }, + [XE_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256, DRM_FMT(OAG) }, + [XE_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256, DRM_FMT(OAR) }, + [XE_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256, DRM_FMT(OAG) }, + [XE_OAC_FORMAT_A24u64_B8_C8] = { 1, 320, DRM_FMT(OAC), HDR_64_BIT }, + [XE_OAC_FORMAT_A22u32_R2u32_B8_C8] = { 2, 192, DRM_FMT(OAC), HDR_64_BIT }, + [XE_OAM_FORMAT_MPEC8u64_B8_C8] = { 1, 192, DRM_FMT(OAM_MPEC), HDR_64_BIT }, + [XE_OAM_FORMAT_MPEC8u32_B8_C8] = { 2, 128, DRM_FMT(OAM_MPEC), HDR_64_BIT }, + [XE_OA_FORMAT_PEC64u64] = { 1, 576, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, + [XE_OA_FORMAT_PEC64u64_B8_C8] = { 1, 640, DRM_FMT(PEC), HDR_64_BIT, 1, 1 }, + [XE_OA_FORMAT_PEC64u32] = { 1, 320, DRM_FMT(PEC), HDR_64_BIT }, + [XE_OA_FORMAT_PEC32u64_G1] = { 5, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, + [XE_OA_FORMAT_PEC32u32_G1] = { 5, 192, DRM_FMT(PEC), HDR_64_BIT }, + [XE_OA_FORMAT_PEC32u64_G2] = { 6, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, + [XE_OA_FORMAT_PEC32u32_G2] = { 6, 192, DRM_FMT(PEC), HDR_64_BIT }, + [XE_OA_FORMAT_PEC36u64_G1_32_G2_4] = { 3, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, + [XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, +}; + +static void oa_format_add(struct xe_oa *oa, enum xe_oa_format_name format) +{ + __set_bit(format, oa->format_mask); +} + +static void xe_oa_init_supported_formats(struct xe_oa *oa) +{ + if (GRAPHICS_VER(oa->xe) >= 20) { + /* Xe2+ */ + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8); + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_PEC64u64); + oa_format_add(oa, XE_OA_FORMAT_PEC64u64_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_PEC64u32); + oa_format_add(oa, XE_OA_FORMAT_PEC32u64_G1); + oa_format_add(oa, XE_OA_FORMAT_PEC32u32_G1); + oa_format_add(oa, XE_OA_FORMAT_PEC32u64_G2); + oa_format_add(oa, XE_OA_FORMAT_PEC32u32_G2); + oa_format_add(oa, XE_OA_FORMAT_PEC36u64_G1_32_G2_4); + oa_format_add(oa, XE_OA_FORMAT_PEC36u64_G1_4_G2_32); + } else if (GRAPHICS_VERx100(oa->xe) >= 1270) { + /* XE_METEORLAKE */ + oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8); + oa_format_add(oa, XE_OAC_FORMAT_A24u64_B8_C8); + oa_format_add(oa, XE_OAC_FORMAT_A22u32_R2u32_B8_C8); + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8); + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8); + } else if (GRAPHICS_VERx100(oa->xe) >= 1255) { + /* XE_DG2, XE_PVC */ + oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8); + oa_format_add(oa, XE_OAC_FORMAT_A24u64_B8_C8); + oa_format_add(oa, XE_OAC_FORMAT_A22u32_R2u32_B8_C8); + } else { + /* Gen12+ */ + xe_assert(oa->xe, GRAPHICS_VER(oa->xe) >= 12); + oa_format_add(oa, XE_OA_FORMAT_A12); + oa_format_add(oa, XE_OA_FORMAT_A12_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_A32u40_A4u32_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_C4_B8); + } +} + +/** + * xe_oa_init - OA initialization during device probe + * @xe: @xe_device + * + * Return: 0 on success or a negative error code on failure + */ +int xe_oa_init(struct xe_device *xe) +{ + struct xe_oa *oa = &xe->oa; + + /* Support OA only with GuC submission and Gen12+ */ + if (XE_WARN_ON(!xe_device_uc_enabled(xe)) || XE_WARN_ON(GRAPHICS_VER(xe) < 12)) + return 0; + + oa->xe = xe; + oa->oa_formats = oa_formats; + + xe_oa_init_supported_formats(oa); + return 0; +} + +/** + * xe_oa_fini - OA de-initialization during device remove + * @xe: @xe_device + */ +void xe_oa_fini(struct xe_device *xe) +{ + struct xe_oa *oa = &xe->oa; + + oa->xe = NULL; +} diff --git a/drivers/gpu/drm/xe/xe_oa.h b/drivers/gpu/drm/xe/xe_oa.h new file mode 100644 index 000000000000..2647c1947746 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_oa.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_OA_H_ +#define _XE_OA_H_ + +#include "xe_oa_types.h" + +struct xe_device; + +int xe_oa_init(struct xe_device *xe); +void xe_oa_fini(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h new file mode 100644 index 000000000000..99940e25b1c6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_OA_TYPES_H_ +#define _XE_OA_TYPES_H_ + +#include +#include + +enum xe_oa_report_header { + HDR_32_BIT = 0, + HDR_64_BIT, +}; + +enum xe_oa_format_name { + XE_OA_FORMAT_C4_B8, + + /* Gen8+ */ + XE_OA_FORMAT_A12, + XE_OA_FORMAT_A12_B8_C8, + XE_OA_FORMAT_A32u40_A4u32_B8_C8, + + /* DG2 */ + XE_OAR_FORMAT_A32u40_A4u32_B8_C8, + XE_OA_FORMAT_A24u40_A14u32_B8_C8, + + /* DG2/MTL OAC */ + XE_OAC_FORMAT_A24u64_B8_C8, + XE_OAC_FORMAT_A22u32_R2u32_B8_C8, + + /* MTL OAM */ + XE_OAM_FORMAT_MPEC8u64_B8_C8, + XE_OAM_FORMAT_MPEC8u32_B8_C8, + + /* Xe2+ */ + XE_OA_FORMAT_PEC64u64, + XE_OA_FORMAT_PEC64u64_B8_C8, + XE_OA_FORMAT_PEC64u32, + XE_OA_FORMAT_PEC32u64_G1, + XE_OA_FORMAT_PEC32u32_G1, + XE_OA_FORMAT_PEC32u64_G2, + XE_OA_FORMAT_PEC32u32_G2, + XE_OA_FORMAT_PEC36u64_G1_32_G2_4, + XE_OA_FORMAT_PEC36u64_G1_4_G2_32, + + __XE_OA_FORMAT_MAX, +}; + +/** + * struct xe_oa_format - Format fields for supported OA formats. OA format + * properties are specified in PRM/Bspec 52198 and 60942 + */ +struct xe_oa_format { + /** @counter_select: counter select value (see Bspec 52198/60942) */ + u32 counter_select; + /** @size: record size as written by HW (multiple of 64 byte cachelines) */ + int size; + /** @type: of enum @drm_xe_oa_format_type */ + int type; + /** @header: 32 or 64 bit report headers */ + enum xe_oa_report_header header; + /** @counter_size: counter size value (see Bspec 60942) */ + u16 counter_size; + /** @bc_report: BC report value (see Bspec 60942) */ + u16 bc_report; +}; + +/** + * struct xe_oa - OA device level information + */ +struct xe_oa { + /** @xe: back pointer to xe device */ + struct xe_device *xe; + + /** @oa_formats: tracks all OA formats across platforms */ + const struct xe_oa_format *oa_formats; + + /** @format_mask: tracks valid OA formats for a platform */ + unsigned long format_mask[BITS_TO_LONGS(__XE_OA_FORMAT_MAX)]; +}; +#endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index c1626027dc69..7e10874bfb33 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1436,6 +1436,25 @@ enum drm_xe_perf_ioctls { DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), }; +/** + * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec + * 52198/60942 + */ +enum drm_xe_oa_format_type { + /** @DRM_XE_OA_FMT_TYPE_OAG: OAG report format */ + DRM_XE_OA_FMT_TYPE_OAG, + /** @DRM_XE_OA_FMT_TYPE_OAR: OAR report format */ + DRM_XE_OA_FMT_TYPE_OAR, + /** @DRM_XE_OA_FMT_TYPE_OAM: OAM report format */ + DRM_XE_OA_FMT_TYPE_OAM, + /** @DRM_XE_OA_FMT_TYPE_OAC: OAC report format */ + DRM_XE_OA_FMT_TYPE_OAC, + /** @DRM_XE_OA_FMT_TYPE_OAM_MPEC: OAM SAMEDIA or OAM MPEC report format */ + DRM_XE_OA_FMT_TYPE_OAM_MPEC, + /** @DRM_XE_OA_FMT_TYPE_PEC: PEC report format */ + DRM_XE_OA_FMT_TYPE_PEC, +}; + #if defined(__cplusplus) } #endif -- cgit From a9f905ae7b6f29a337dda2ad773c08b92dafe9a5 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:56 -0700 Subject: drm/xe/oa/uapi: Initialize OA units MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initialize OA unit data struct's for each gt during device probe. Also assign OA units for hardware engines. v2: Remove XE_OA_UNIT_OAG/XE_OA_UNIT_OAM_SAMEDIA_0 enum (Umesh) Change mtl_oa_base to 0x13000 (Umesh) v3: Switch to drmm_ functions and other cleanups (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-5-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/regs/xe_oa_regs.h | 92 +++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_types.h | 4 + drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 + drivers/gpu/drm/xe/xe_oa.c | 156 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa_types.h | 54 +++++++++++ include/uapi/drm/xe_drm.h | 14 +++ 6 files changed, 322 insertions(+) create mode 100644 drivers/gpu/drm/xe/regs/xe_oa_regs.h diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h new file mode 100644 index 000000000000..99bad563d51d --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __XE_OA_REGS__ +#define __XE_OA_REGS__ + +#define RPM_CONFIG1 XE_REG(0xd04) +#define GT_NOA_ENABLE REG_BIT(9) + +#define EU_PERF_CNTL0 XE_REG(0xe458) +#define EU_PERF_CNTL4 XE_REG(0xe45c) +#define EU_PERF_CNTL1 XE_REG(0xe558) +#define EU_PERF_CNTL5 XE_REG(0xe55c) +#define EU_PERF_CNTL2 XE_REG(0xe658) +#define EU_PERF_CNTL6 XE_REG(0xe65c) +#define EU_PERF_CNTL3 XE_REG(0xe758) + +#define OA_TLB_INV_CR XE_REG(0xceec) + +/* OAR unit */ +#define OAR_OACONTROL XE_REG(0x2960) +#define OAR_OACONTROL_COUNTER_SEL_MASK REG_GENMASK(3, 1) +#define OAR_OACONTROL_COUNTER_ENABLE REG_BIT(0) + +#define OACTXCONTROL(base) XE_REG((base) + 0x360) +#define OAR_OASTATUS XE_REG(0x2968) +#define OA_COUNTER_RESUME REG_BIT(0) + +/* OAG unit */ +#define OAG_OAGLBCTXCTRL XE_REG(0x2b28) +#define OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK REG_GENMASK(7, 2) +#define OAG_OAGLBCTXCTRL_TIMER_ENABLE REG_BIT(1) +#define OAG_OAGLBCTXCTRL_COUNTER_RESUME REG_BIT(0) + +#define OAG_OAHEADPTR XE_REG(0xdb00) +#define OAG_OAHEADPTR_MASK REG_GENMASK(31, 6) +#define OAG_OATAILPTR XE_REG(0xdb04) +#define OAG_OATAILPTR_MASK REG_GENMASK(31, 6) + +#define OAG_OABUFFER XE_REG(0xdb08) +#define OABUFFER_SIZE_MASK REG_GENMASK(5, 3) +#define OABUFFER_SIZE_128K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 0) +#define OABUFFER_SIZE_256K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 1) +#define OABUFFER_SIZE_512K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 2) +#define OABUFFER_SIZE_1M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 3) +#define OABUFFER_SIZE_2M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 4) +#define OABUFFER_SIZE_4M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 5) +#define OABUFFER_SIZE_8M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 6) +#define OABUFFER_SIZE_16M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 7) +#define OAG_OABUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */ + +#define OAG_OACONTROL XE_REG(0xdaf4) +#define OAG_OACONTROL_OA_CCS_SELECT_MASK REG_GENMASK(18, 16) +#define OAG_OACONTROL_OA_COUNTER_SEL_MASK REG_GENMASK(4, 2) +#define OAG_OACONTROL_OA_COUNTER_ENABLE REG_BIT(0) +/* Common to all OA units */ +#define OA_OACONTROL_REPORT_BC_MASK REG_GENMASK(9, 9) +#define OA_OACONTROL_COUNTER_SIZE_MASK REG_GENMASK(8, 8) + +#define OAG_OA_DEBUG XE_REG(0xdaf8, XE_REG_OPTION_MASKED) +#define OAG_OA_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6) +#define OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS REG_BIT(5) +#define OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS REG_BIT(1) + +#define OAG_OASTATUS XE_REG(0xdafc) +#define OASTATUS_MMIO_TRG_Q_FULL REG_BIT(6) +#define OASTATUS_COUNTER_OVERFLOW REG_BIT(2) +#define OASTATUS_BUFFER_OVERFLOW REG_BIT(1) +#define OASTATUS_REPORT_LOST REG_BIT(0) +/* OAM unit */ +#define OAM_HEAD_POINTER_OFFSET (0x1a0) +#define OAM_TAIL_POINTER_OFFSET (0x1a4) +#define OAM_BUFFER_OFFSET (0x1a8) +#define OAM_CONTEXT_CONTROL_OFFSET (0x1bc) +#define OAM_CONTROL_OFFSET (0x194) +#define OAM_CONTROL_COUNTER_SEL_MASK REG_GENMASK(3, 1) +#define OAM_DEBUG_OFFSET (0x198) +#define OAM_STATUS_OFFSET (0x19c) +#define OAM_MMIO_TRG_OFFSET (0x1d0) + +#define OAM_HEAD_POINTER(base) XE_REG((base) + OAM_HEAD_POINTER_OFFSET) +#define OAM_TAIL_POINTER(base) XE_REG((base) + OAM_TAIL_POINTER_OFFSET) +#define OAM_BUFFER(base) XE_REG((base) + OAM_BUFFER_OFFSET) +#define OAM_CONTEXT_CONTROL(base) XE_REG((base) + OAM_CONTEXT_CONTROL_OFFSET) +#define OAM_CONTROL(base) XE_REG((base) + OAM_CONTROL_OFFSET) +#define OAM_DEBUG(base) XE_REG((base) + OAM_DEBUG_OFFSET) +#define OAM_STATUS(base) XE_REG((base) + OAM_STATUS_OFFSET) +#define OAM_MMIO_TRG(base) XE_REG((base) + OAM_MMIO_TRG_OFFSET) + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 10a9a9529377..24bb95de920f 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -12,6 +12,7 @@ #include "xe_gt_sriov_vf_types.h" #include "xe_hw_engine_types.h" #include "xe_hw_fence_types.h" +#include "xe_oa.h" #include "xe_reg_sr_types.h" #include "xe_sa_types.h" #include "xe_uc_types.h" @@ -387,6 +388,9 @@ struct xe_gt { */ u8 instances_per_class[XE_ENGINE_CLASS_MAX]; } user_engines; + + /** @oa: oa perf counter subsystem per gt info */ + struct xe_oa_gt oa; }; #endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 580bbd7e83b2..70e6434f150d 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -148,6 +148,8 @@ struct xe_hw_engine { enum xe_hw_engine_id engine_id; /** @eclass: pointer to per hw engine class interface */ struct xe_hw_engine_class_intf *eclass; + /** @oa_unit: oa unit for this hw engine */ + struct xe_oa_unit *oa_unit; }; /** diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 5c0179ff4f60..e836fafa9fb3 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -3,13 +3,20 @@ * Copyright © 2023-2024 Intel Corporation */ +#include #include +#include "regs/xe_oa_regs.h" #include "xe_assert.h" #include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_macros.h" +#include "xe_mmio.h" #include "xe_oa.h" +#define XE_OA_UNIT_INVALID U32_MAX + #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x static const struct xe_oa_format oa_formats[] = { @@ -34,6 +41,142 @@ static const struct xe_oa_format oa_formats[] = { [XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, }; +static u32 num_oa_units_per_gt(struct xe_gt *gt) +{ + return 1; +} + +static u32 __hwe_oam_unit(struct xe_hw_engine *hwe) +{ + if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) >= 1270) { + /* + * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices + * within the gt use the same OAM. All MTL/LNL SKUs list 1 SA MEDIA + */ + xe_gt_WARN_ON(hwe->gt, hwe->gt->info.type != XE_GT_TYPE_MEDIA); + + return 0; + } + + return XE_OA_UNIT_INVALID; +} + +static u32 __hwe_oa_unit(struct xe_hw_engine *hwe) +{ + switch (hwe->class) { + case XE_ENGINE_CLASS_RENDER: + case XE_ENGINE_CLASS_COMPUTE: + return 0; + + case XE_ENGINE_CLASS_VIDEO_DECODE: + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + return __hwe_oam_unit(hwe); + + default: + return XE_OA_UNIT_INVALID; + } +} + +static struct xe_oa_regs __oam_regs(u32 base) +{ + return (struct xe_oa_regs) { + base, + OAM_HEAD_POINTER(base), + OAM_TAIL_POINTER(base), + OAM_BUFFER(base), + OAM_CONTEXT_CONTROL(base), + OAM_CONTROL(base), + OAM_DEBUG(base), + OAM_STATUS(base), + OAM_CONTROL_COUNTER_SEL_MASK, + }; +} + +static struct xe_oa_regs __oag_regs(void) +{ + return (struct xe_oa_regs) { + 0, + OAG_OAHEADPTR, + OAG_OATAILPTR, + OAG_OABUFFER, + OAG_OAGLBCTXCTRL, + OAG_OACONTROL, + OAG_OA_DEBUG, + OAG_OASTATUS, + OAG_OACONTROL_OA_COUNTER_SEL_MASK, + }; +} + +static void __xe_oa_init_oa_units(struct xe_gt *gt) +{ + const u32 mtl_oa_base[] = { 0x13000 }; + int i, num_units = gt->oa.num_oa_units; + + for (i = 0; i < num_units; i++) { + struct xe_oa_unit *u = >->oa.oa_unit[i]; + + if (gt->info.type != XE_GT_TYPE_MEDIA) { + u->regs = __oag_regs(); + u->type = DRM_XE_OA_UNIT_TYPE_OAG; + } else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { + u->regs = __oam_regs(mtl_oa_base[i]); + u->type = DRM_XE_OA_UNIT_TYPE_OAM; + } + + /* Set oa_unit_ids now to ensure ids remain contiguous */ + u->oa_unit_id = gt_to_xe(gt)->oa.oa_unit_ids++; + } +} + +static int xe_oa_init_gt(struct xe_gt *gt) +{ + u32 num_oa_units = num_oa_units_per_gt(gt); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_oa_unit *u; + + u = drmm_kcalloc(>_to_xe(gt)->drm, num_oa_units, sizeof(*u), GFP_KERNEL); + if (!u) + return -ENOMEM; + + for_each_hw_engine(hwe, gt, id) { + u32 index = __hwe_oa_unit(hwe); + + hwe->oa_unit = NULL; + if (index < num_oa_units) { + u[index].num_engines++; + hwe->oa_unit = &u[index]; + } + } + + /* + * Fused off engines can result in oa_unit's with num_engines == 0. These units + * will appear in OA unit query, but no perf streams can be opened on them. + */ + gt->oa.num_oa_units = num_oa_units; + gt->oa.oa_unit = u; + + __xe_oa_init_oa_units(gt); + + drmm_mutex_init(>_to_xe(gt)->drm, >->oa.gt_lock); + + return 0; +} + +static int xe_oa_init_oa_units(struct xe_oa *oa) +{ + struct xe_gt *gt; + int i, ret; + + for_each_gt(gt, oa->xe, i) { + ret = xe_oa_init_gt(gt); + if (ret) + return ret; + } + + return 0; +} + static void oa_format_add(struct xe_oa *oa, enum xe_oa_format_name format) { __set_bit(format, oa->format_mask); @@ -87,6 +230,7 @@ static void xe_oa_init_supported_formats(struct xe_oa *oa) int xe_oa_init(struct xe_device *xe) { struct xe_oa *oa = &xe->oa; + int ret; /* Support OA only with GuC submission and Gen12+ */ if (XE_WARN_ON(!xe_device_uc_enabled(xe)) || XE_WARN_ON(GRAPHICS_VER(xe) < 12)) @@ -95,8 +239,17 @@ int xe_oa_init(struct xe_device *xe) oa->xe = xe; oa->oa_formats = oa_formats; + ret = xe_oa_init_oa_units(oa); + if (ret) { + drm_err(&xe->drm, "OA initialization failed (%pe)\n", ERR_PTR(ret)); + goto exit; + } + xe_oa_init_supported_formats(oa); return 0; +exit: + oa->xe = NULL; + return ret; } /** @@ -107,5 +260,8 @@ void xe_oa_fini(struct xe_device *xe) { struct xe_oa *oa = &xe->oa; + if (!oa->xe) + return; + oa->xe = NULL; } diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 99940e25b1c6..e7b91e31f0e8 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -7,8 +7,12 @@ #define _XE_OA_TYPES_H_ #include +#include #include +#include +#include "regs/xe_reg_defs.h" + enum xe_oa_report_header { HDR_32_BIT = 0, HDR_64_BIT, @@ -67,6 +71,53 @@ struct xe_oa_format { u16 bc_report; }; +/** struct xe_oa_regs - Registers for each OA unit */ +struct xe_oa_regs { + u32 base; + struct xe_reg oa_head_ptr; + struct xe_reg oa_tail_ptr; + struct xe_reg oa_buffer; + struct xe_reg oa_ctx_ctrl; + struct xe_reg oa_ctrl; + struct xe_reg oa_debug; + struct xe_reg oa_status; + u32 oa_ctrl_counter_select_mask; +}; + +/** + * struct xe_oa_unit - Hardware OA unit + */ +struct xe_oa_unit { + /** @oa_unit_id: identifier for the OA unit */ + u16 oa_unit_id; + + /** @type: Type of OA unit - OAM, OAG etc. */ + enum drm_xe_oa_unit_type type; + + /** @regs: OA registers for programming the OA unit */ + struct xe_oa_regs regs; + + /** @num_engines: number of engines attached to this OA unit */ + u32 num_engines; + + /** @exclusive_stream: The stream currently using the OA unit */ + struct xe_oa_stream *exclusive_stream; +}; + +/** + * struct xe_oa_gt - OA per-gt information + */ +struct xe_oa_gt { + /** @gt_lock: lock protecting create/destroy OA streams */ + struct mutex gt_lock; + + /** @num_oa_units: number of oa units for each gt */ + u32 num_oa_units; + + /** @oa_unit: array of oa_units */ + struct xe_oa_unit *oa_unit; +}; + /** * struct xe_oa - OA device level information */ @@ -79,5 +130,8 @@ struct xe_oa { /** @format_mask: tracks valid OA formats for a platform */ unsigned long format_mask[BITS_TO_LONGS(__XE_OA_FORMAT_MAX)]; + + /** @oa_unit_ids: tracks oa unit ids assigned across gt's */ + u16 oa_unit_ids; }; #endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 7e10874bfb33..323d899a276b 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1436,6 +1436,20 @@ enum drm_xe_perf_ioctls { DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), }; +/** + * enum drm_xe_oa_unit_type - OA unit types + */ +enum drm_xe_oa_unit_type { + /** + * @DRM_XE_OA_UNIT_TYPE_OAG: OAG OA unit. OAR/OAC are considered + * sub-types of OAG. For OAR/OAC, use OAG. + */ + DRM_XE_OA_UNIT_TYPE_OAG, + + /** @DRM_XE_OA_UNIT_TYPE_OAM: OAM OA unit */ + DRM_XE_OA_UNIT_TYPE_OAM, +}; + /** * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec * 52198/60942 -- cgit From cdf02fe1a94a768cbcd20f5c4e1a1d805f4a06c0 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:57 -0700 Subject: drm/xe/oa/uapi: Add/remove OA config perf ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce add/remove config perf ops for OA. OA configurations consist of a set of event/counter select register address/value pairs. The add_config perf op validates and stores such configurations and also exposes them in the metrics sysfs. These configurations will be programmed to OA unit HW when an OA stream using a configuration is opened. The OA stream can also switch to other stored configurations. v2: Start config id's from 1 and other minor review comments (Umesh) v3: Add 32 bit build v4: Add kernel doc for non-static functions (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-6-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_device.c | 4 + drivers/gpu/drm/xe/xe_oa.c | 434 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa.h | 6 + drivers/gpu/drm/xe/xe_oa_types.h | 10 + drivers/gpu/drm/xe/xe_perf.c | 16 ++ include/uapi/drm/xe_drm.h | 25 +++ 6 files changed, 495 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 1195c64a715a..31b549f5f03a 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -670,6 +670,8 @@ int xe_device_probe(struct xe_device *xe) xe_display_register(xe); + xe_oa_register(xe); + xe_debugfs_register(xe); xe_hwmon_register(xe); @@ -710,6 +712,8 @@ void xe_device_remove(struct xe_device *xe) struct xe_gt *gt; u8 id; + xe_oa_unregister(xe); + xe_device_remove_display(xe); xe_display_fini(xe); diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index e836fafa9fb3..4122785735d4 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -14,9 +14,32 @@ #include "xe_macros.h" #include "xe_mmio.h" #include "xe_oa.h" +#include "xe_perf.h" #define XE_OA_UNIT_INVALID U32_MAX +struct xe_oa_reg { + struct xe_reg addr; + u32 value; +}; + +struct xe_oa_config { + struct xe_oa *oa; + + char uuid[UUID_STRING_LEN + 1]; + int id; + + const struct xe_oa_reg *regs; + u32 regs_len; + + struct attribute_group sysfs_metric; + struct attribute *attrs[2]; + struct kobj_attribute sysfs_metric_id; + + struct kref ref; + struct rcu_head rcu; +}; + #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x static const struct xe_oa_format oa_formats[] = { @@ -41,6 +64,405 @@ static const struct xe_oa_format oa_formats[] = { [XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, }; +static void xe_oa_config_release(struct kref *ref) +{ + struct xe_oa_config *oa_config = + container_of(ref, typeof(*oa_config), ref); + + kfree(oa_config->regs); + + kfree_rcu(oa_config, rcu); +} + +static void xe_oa_config_put(struct xe_oa_config *oa_config) +{ + if (!oa_config) + return; + + kref_put(&oa_config->ref, xe_oa_config_release); +} + +static bool xe_oa_is_valid_flex_addr(struct xe_oa *oa, u32 addr) +{ + static const struct xe_reg flex_eu_regs[] = { + EU_PERF_CNTL0, + EU_PERF_CNTL1, + EU_PERF_CNTL2, + EU_PERF_CNTL3, + EU_PERF_CNTL4, + EU_PERF_CNTL5, + EU_PERF_CNTL6, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) { + if (flex_eu_regs[i].addr == addr) + return true; + } + return false; +} + +static bool xe_oa_reg_in_range_table(u32 addr, const struct xe_mmio_range *table) +{ + while (table->start && table->end) { + if (addr >= table->start && addr <= table->end) + return true; + + table++; + } + + return false; +} + +static const struct xe_mmio_range xehp_oa_b_counters[] = { + { .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */ + { .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */ + {} +}; + +static const struct xe_mmio_range gen12_oa_b_counters[] = { + { .start = 0x2b2c, .end = 0x2b2c }, /* OAG_OA_PESS */ + { .start = 0xd900, .end = 0xd91c }, /* OAG_OASTARTTRIG[1-8] */ + { .start = 0xd920, .end = 0xd93c }, /* OAG_OAREPORTTRIG1[1-8] */ + { .start = 0xd940, .end = 0xd97c }, /* OAG_CEC[0-7][0-1] */ + { .start = 0xdc00, .end = 0xdc3c }, /* OAG_SCEC[0-7][0-1] */ + { .start = 0xdc40, .end = 0xdc40 }, /* OAG_SPCTR_CNF */ + { .start = 0xdc44, .end = 0xdc44 }, /* OAA_DBG_REG */ + {} +}; + +static const struct xe_mmio_range mtl_oam_b_counters[] = { + { .start = 0x393000, .end = 0x39301c }, /* OAM_STARTTRIG1[1-8] */ + { .start = 0x393020, .end = 0x39303c }, /* OAM_REPORTTRIG1[1-8] */ + { .start = 0x393040, .end = 0x39307c }, /* OAM_CEC[0-7][0-1] */ + { .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */ + {} +}; + +static const struct xe_mmio_range xe2_oa_b_counters[] = { + { .start = 0x393200, .end = 0x39323C }, /* MPES_0_MPES_SAG - MPES_7_UPPER_MPES_SAG */ + { .start = 0x394200, .end = 0x39423C }, /* MPES_0_MPES_SCMI0 - MPES_7_UPPER_MPES_SCMI0 */ + { .start = 0x394A00, .end = 0x394A3C }, /* MPES_0_MPES_SCMI1 - MPES_7_UPPER_MPES_SCMI1 */ + {}, +}; + +static bool xe_oa_is_valid_b_counter_addr(struct xe_oa *oa, u32 addr) +{ + return xe_oa_reg_in_range_table(addr, xehp_oa_b_counters) || + xe_oa_reg_in_range_table(addr, gen12_oa_b_counters) || + xe_oa_reg_in_range_table(addr, mtl_oam_b_counters) || + (GRAPHICS_VER(oa->xe) >= 20 && + xe_oa_reg_in_range_table(addr, xe2_oa_b_counters)); +} + +static const struct xe_mmio_range mtl_oa_mux_regs[] = { + { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ + { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ + { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ + { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ + { .start = 0x38d100, .end = 0x38d114}, /* VISACTL */ + {} +}; + +static const struct xe_mmio_range gen12_oa_mux_regs[] = { + { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ + { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ + { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ + { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ + { .start = 0x20cc, .end = 0x20cc }, /* WAIT_FOR_RC6_EXIT */ + {} +}; + +static const struct xe_mmio_range xe2_oa_mux_regs[] = { + { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ + { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ + { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ + { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ + { .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */ + { .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */ + {}, +}; + +static bool xe_oa_is_valid_mux_addr(struct xe_oa *oa, u32 addr) +{ + if (GRAPHICS_VER(oa->xe) >= 20) + return xe_oa_reg_in_range_table(addr, xe2_oa_mux_regs); + else if (GRAPHICS_VERx100(oa->xe) >= 1270) + return xe_oa_reg_in_range_table(addr, mtl_oa_mux_regs); + else + return xe_oa_reg_in_range_table(addr, gen12_oa_mux_regs); +} + +static bool xe_oa_is_valid_config_reg_addr(struct xe_oa *oa, u32 addr) +{ + return xe_oa_is_valid_flex_addr(oa, addr) || + xe_oa_is_valid_b_counter_addr(oa, addr) || + xe_oa_is_valid_mux_addr(oa, addr); +} + +static struct xe_oa_reg * +xe_oa_alloc_regs(struct xe_oa *oa, bool (*is_valid)(struct xe_oa *oa, u32 addr), + u32 __user *regs, u32 n_regs) +{ + struct xe_oa_reg *oa_regs; + int err; + u32 i; + + oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL); + if (!oa_regs) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < n_regs; i++) { + u32 addr, value; + + err = get_user(addr, regs); + if (err) + goto addr_err; + + if (!is_valid(oa, addr)) { + drm_dbg(&oa->xe->drm, "Invalid oa_reg address: %X\n", addr); + err = -EINVAL; + goto addr_err; + } + + err = get_user(value, regs + 1); + if (err) + goto addr_err; + + oa_regs[i].addr = XE_REG(addr); + oa_regs[i].value = value; + + regs += 2; + } + + return oa_regs; + +addr_err: + kfree(oa_regs); + return ERR_PTR(err); +} + +static ssize_t show_dynamic_id(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct xe_oa_config *oa_config = + container_of(attr, typeof(*oa_config), sysfs_metric_id); + + return sysfs_emit(buf, "%d\n", oa_config->id); +} + +static int create_dynamic_oa_sysfs_entry(struct xe_oa *oa, + struct xe_oa_config *oa_config) +{ + sysfs_attr_init(&oa_config->sysfs_metric_id.attr); + oa_config->sysfs_metric_id.attr.name = "id"; + oa_config->sysfs_metric_id.attr.mode = 0444; + oa_config->sysfs_metric_id.show = show_dynamic_id; + oa_config->sysfs_metric_id.store = NULL; + + oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr; + oa_config->attrs[1] = NULL; + + oa_config->sysfs_metric.name = oa_config->uuid; + oa_config->sysfs_metric.attrs = oa_config->attrs; + + return sysfs_create_group(oa->metrics_kobj, &oa_config->sysfs_metric); +} + +/** + * xe_oa_add_config_ioctl - Adds one OA config + * @dev: @drm_device + * @data: pointer to struct @drm_xe_oa_config + * @file: @drm_file + * + * The functions adds an OA config to the set of OA configs maintained in + * the kernel. The config determines which OA metrics are collected for an + * OA stream. + */ +int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) +{ + struct xe_oa *oa = &to_xe_device(dev)->oa; + struct drm_xe_oa_config param; + struct drm_xe_oa_config *arg = ¶m; + struct xe_oa_config *oa_config, *tmp; + struct xe_oa_reg *regs; + int err, id; + + if (!oa->xe) { + drm_dbg(&oa->xe->drm, "xe oa interface not available for this system\n"); + return -ENODEV; + } + + if (xe_perf_stream_paranoid && !perfmon_capable()) { + drm_dbg(&oa->xe->drm, "Insufficient privileges to add xe OA config\n"); + return -EACCES; + } + + err = __copy_from_user(¶m, u64_to_user_ptr(data), sizeof(param)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(oa->xe, arg->extensions) || + XE_IOCTL_DBG(oa->xe, !arg->regs_ptr) || + XE_IOCTL_DBG(oa->xe, !arg->n_regs)) + return -EINVAL; + + oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); + if (!oa_config) + return -ENOMEM; + + oa_config->oa = oa; + kref_init(&oa_config->ref); + + if (!uuid_is_valid(arg->uuid)) { + drm_dbg(&oa->xe->drm, "Invalid uuid format for OA config\n"); + err = -EINVAL; + goto reg_err; + } + + /* Last character in oa_config->uuid will be 0 because oa_config is kzalloc */ + memcpy(oa_config->uuid, arg->uuid, sizeof(arg->uuid)); + + oa_config->regs_len = arg->n_regs; + regs = xe_oa_alloc_regs(oa, xe_oa_is_valid_config_reg_addr, + u64_to_user_ptr(arg->regs_ptr), + arg->n_regs); + if (IS_ERR(regs)) { + drm_dbg(&oa->xe->drm, "Failed to create OA config for mux_regs\n"); + err = PTR_ERR(regs); + goto reg_err; + } + oa_config->regs = regs; + + err = mutex_lock_interruptible(&oa->metrics_lock); + if (err) + goto reg_err; + + /* We shouldn't have too many configs, so this iteration shouldn't be too costly */ + idr_for_each_entry(&oa->metrics_idr, tmp, id) { + if (!strcmp(tmp->uuid, oa_config->uuid)) { + drm_dbg(&oa->xe->drm, "OA config already exists with this uuid\n"); + err = -EADDRINUSE; + goto sysfs_err; + } + } + + err = create_dynamic_oa_sysfs_entry(oa, oa_config); + if (err) { + drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n"); + goto sysfs_err; + } + + oa_config->id = idr_alloc(&oa->metrics_idr, oa_config, 1, 0, GFP_KERNEL); + if (oa_config->id < 0) { + drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n"); + err = oa_config->id; + goto sysfs_err; + } + + mutex_unlock(&oa->metrics_lock); + + drm_dbg(&oa->xe->drm, "Added config %s id=%i\n", oa_config->uuid, oa_config->id); + + return oa_config->id; + +sysfs_err: + mutex_unlock(&oa->metrics_lock); +reg_err: + xe_oa_config_put(oa_config); + drm_dbg(&oa->xe->drm, "Failed to add new OA config\n"); + return err; +} + +/** + * xe_oa_remove_config_ioctl - Removes one OA config + * @dev: @drm_device + * @data: pointer to struct @drm_xe_perf_param + * @file: @drm_file + */ +int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) +{ + struct xe_oa *oa = &to_xe_device(dev)->oa; + struct xe_oa_config *oa_config; + u64 arg, *ptr = u64_to_user_ptr(data); + int ret; + + if (!oa->xe) { + drm_dbg(&oa->xe->drm, "xe oa interface not available for this system\n"); + return -ENODEV; + } + + if (xe_perf_stream_paranoid && !perfmon_capable()) { + drm_dbg(&oa->xe->drm, "Insufficient privileges to remove xe OA config\n"); + return -EACCES; + } + + ret = get_user(arg, ptr); + if (XE_IOCTL_DBG(oa->xe, ret)) + return ret; + + ret = mutex_lock_interruptible(&oa->metrics_lock); + if (ret) + return ret; + + oa_config = idr_find(&oa->metrics_idr, arg); + if (!oa_config) { + drm_dbg(&oa->xe->drm, "Failed to remove unknown OA config\n"); + ret = -ENOENT; + goto err_unlock; + } + + WARN_ON(arg != oa_config->id); + + sysfs_remove_group(oa->metrics_kobj, &oa_config->sysfs_metric); + idr_remove(&oa->metrics_idr, arg); + + mutex_unlock(&oa->metrics_lock); + + drm_dbg(&oa->xe->drm, "Removed config %s id=%i\n", oa_config->uuid, oa_config->id); + + xe_oa_config_put(oa_config); + + return 0; + +err_unlock: + mutex_unlock(&oa->metrics_lock); + return ret; +} + +/** + * xe_oa_register - Xe OA registration + * @xe: @xe_device + * + * Exposes the metrics sysfs directory upon completion of module initialization + */ +void xe_oa_register(struct xe_device *xe) +{ + struct xe_oa *oa = &xe->oa; + + if (!oa->xe) + return; + + oa->metrics_kobj = kobject_create_and_add("metrics", + &xe->drm.primary->kdev->kobj); +} + +/** + * xe_oa_unregister - Xe OA de-registration + * @xe: @xe_device + */ +void xe_oa_unregister(struct xe_device *xe) +{ + struct xe_oa *oa = &xe->oa; + + if (!oa->metrics_kobj) + return; + + kobject_put(oa->metrics_kobj); + oa->metrics_kobj = NULL; +} + static u32 num_oa_units_per_gt(struct xe_gt *gt) { return 1; @@ -239,6 +661,9 @@ int xe_oa_init(struct xe_device *xe) oa->xe = xe; oa->oa_formats = oa_formats; + drmm_mutex_init(&oa->xe->drm, &oa->metrics_lock); + idr_init_base(&oa->metrics_idr, 1); + ret = xe_oa_init_oa_units(oa); if (ret) { drm_err(&xe->drm, "OA initialization failed (%pe)\n", ERR_PTR(ret)); @@ -252,6 +677,12 @@ exit: return ret; } +static int destroy_config(int id, void *p, void *data) +{ + xe_oa_config_put(p); + return 0; +} + /** * xe_oa_fini - OA de-initialization during device remove * @xe: @xe_device @@ -263,5 +694,8 @@ void xe_oa_fini(struct xe_device *xe) if (!oa->xe) return; + idr_for_each(&oa->metrics_idr, destroy_config, oa); + idr_destroy(&oa->metrics_idr); + oa->xe = NULL; } diff --git a/drivers/gpu/drm/xe/xe_oa.h b/drivers/gpu/drm/xe/xe_oa.h index 2647c1947746..5ccc772e047a 100644 --- a/drivers/gpu/drm/xe/xe_oa.h +++ b/drivers/gpu/drm/xe/xe_oa.h @@ -8,9 +8,15 @@ #include "xe_oa_types.h" +struct drm_device; +struct drm_file; struct xe_device; int xe_oa_init(struct xe_device *xe); void xe_oa_fini(struct xe_device *xe); +void xe_oa_register(struct xe_device *xe); +void xe_oa_unregister(struct xe_device *xe); +int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); +int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); #endif diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index e7b91e31f0e8..f8a45015cf49 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -7,6 +7,7 @@ #define _XE_OA_TYPES_H_ #include +#include #include #include @@ -125,6 +126,15 @@ struct xe_oa { /** @xe: back pointer to xe device */ struct xe_device *xe; + /** @metrics_kobj: kobj for metrics sysfs */ + struct kobject *metrics_kobj; + + /** @metrics_lock: lock protecting add/remove configs */ + struct mutex metrics_lock; + + /** @metrics_idr: List of dynamic configurations (struct xe_oa_config) */ + struct idr metrics_idr; + /** @oa_formats: tracks all OA formats across platforms */ const struct xe_oa_format *oa_formats; diff --git a/drivers/gpu/drm/xe/xe_perf.c b/drivers/gpu/drm/xe/xe_perf.c index f619cf50b453..ca01042d75b1 100644 --- a/drivers/gpu/drm/xe/xe_perf.c +++ b/drivers/gpu/drm/xe/xe_perf.c @@ -8,11 +8,25 @@ #include +#include "xe_oa.h" #include "xe_perf.h" u32 xe_perf_stream_paranoid = true; static struct ctl_table_header *sysctl_header; +static int xe_oa_ioctl(struct drm_device *dev, struct drm_xe_perf_param *arg, + struct drm_file *file) +{ + switch (arg->perf_op) { + case DRM_XE_PERF_OP_ADD_CONFIG: + return xe_oa_add_config_ioctl(dev, arg->param, file); + case DRM_XE_PERF_OP_REMOVE_CONFIG: + return xe_oa_remove_config_ioctl(dev, arg->param, file); + default: + return -EINVAL; + } +} + /** * xe_perf_ioctl - The top level perf layer ioctl * @dev: @drm_device @@ -32,6 +46,8 @@ int xe_perf_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return -EINVAL; switch (arg->perf_type) { + case DRM_XE_PERF_TYPE_OA: + return xe_oa_ioctl(dev, arg, file); default: return -EINVAL; } diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 323d899a276b..fd9a4bd9e3d4 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1378,6 +1378,7 @@ struct drm_xe_wait_user_fence { * enum drm_xe_perf_type - Perf stream types */ enum drm_xe_perf_type { + DRM_XE_PERF_TYPE_OA, __DRM_XE_PERF_TYPE_MAX, /* non-ABI */ }; @@ -1469,6 +1470,30 @@ enum drm_xe_oa_format_type { DRM_XE_OA_FMT_TYPE_PEC, }; +/** + * struct drm_xe_oa_config - OA metric configuration + * + * Multiple OA configs can be added using @DRM_XE_PERF_OP_ADD_CONFIG. A + * particular config can be specified when opening an OA stream using + * @DRM_XE_OA_PROPERTY_OA_METRIC_SET property. + */ +struct drm_xe_oa_config { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @uuid: String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x" */ + char uuid[36]; + + /** @n_regs: Number of regs in @regs_ptr */ + __u32 n_regs; + + /** + * @regs_ptr: Pointer to (register address, value) pairs for OA config + * registers. Expected length of buffer is: (2 * sizeof(u32) * @n_regs). + */ + __u64 regs_ptr; +}; + #if defined(__cplusplus) } #endif -- cgit From b6fd51c6211910b1db072a3fa2a17ba85cb3dd51 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:58 -0700 Subject: drm/xe/oa/uapi: Define and parse OA stream properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Properties for OA streams are specified by user space, when the stream is opened, as a chain of drm_xe_ext_set_property struct's. Parse and validate these stream properties. v2: Remove struct drm_xe_oa_open_param (Harish Chegondi) Drop DRM_XE_OA_PROPERTY_POLL_OA_PERIOD_US (Umesh) Eliminate comparison with xe_oa_max_sample_rate (Umesh) Drop 'struct drm_xe_oa_record_header' (Umesh) v3: s/DRM_XE_OA_PROPERTY_OA_EXPONENT/ \ DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT/ (Jose) v4: Fix 32 bit build v5: Add non-static function kernel doc (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-7-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 364 +++++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa.h | 5 + drivers/gpu/drm/xe/xe_perf.c | 2 + include/uapi/drm/xe_drm.h | 72 +++++++++ 4 files changed, 443 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 4122785735d4..9b23eadf56cd 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -3,18 +3,23 @@ * Copyright © 2023-2024 Intel Corporation */ +#include + #include #include +#include "regs/xe_gt_regs.h" #include "regs/xe_oa_regs.h" #include "xe_assert.h" #include "xe_device.h" +#include "xe_exec_queue.h" #include "xe_gt.h" #include "xe_gt_printk.h" #include "xe_macros.h" #include "xe_mmio.h" #include "xe_oa.h" #include "xe_perf.h" +#include "xe_pm.h" #define XE_OA_UNIT_INVALID U32_MAX @@ -40,6 +45,19 @@ struct xe_oa_config { struct rcu_head rcu; }; +struct xe_oa_open_param { + u32 oa_unit_id; + bool sample; + u32 metric_set; + enum xe_oa_format_name oa_format; + int period_exponent; + bool disabled; + int exec_queue_id; + int engine_instance; + struct xe_exec_queue *exec_q; + struct xe_hw_engine *hwe; +}; + #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x static const struct xe_oa_format oa_formats[] = { @@ -82,6 +100,352 @@ static void xe_oa_config_put(struct xe_oa_config *oa_config) kref_put(&oa_config->ref, xe_oa_config_release); } +/** + * xe_oa_timestamp_frequency - Return OA timestamp frequency + * @gt: @xe_gt + * + * OA timestamp frequency = CS timestamp frequency in most platforms. On some + * platforms OA unit ignores the CTC_SHIFT and the 2 timestamps differ. In such + * cases, return the adjusted CS timestamp frequency to the user. + */ +u32 xe_oa_timestamp_frequency(struct xe_gt *gt) +{ + u32 reg, shift; + + /* + * Wa_18013179988:dg2 + * Wa_14015568240:pvc + * Wa_14015846243:mtl + */ + switch (gt_to_xe(gt)->info.platform) { + case XE_DG2: + case XE_PVC: + case XE_METEORLAKE: + xe_pm_runtime_get(gt_to_xe(gt)); + reg = xe_mmio_read32(gt, RPM_CONFIG0); + xe_pm_runtime_put(gt_to_xe(gt)); + + shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); + return gt->info.reference_clock << (3 - shift); + + default: + return gt->info.reference_clock; + } +} + +static u64 oa_exponent_to_ns(struct xe_gt *gt, int exponent) +{ + u64 nom = (2ULL << exponent) * NSEC_PER_SEC; + u32 den = xe_oa_timestamp_frequency(gt); + + return div_u64(nom + den - 1, den); +} + +static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type) +{ + switch (hwe->oa_unit->type) { + case DRM_XE_OA_UNIT_TYPE_OAG: + return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR || + type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC; + case DRM_XE_OA_UNIT_TYPE_OAM: + return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; + default: + return false; + } +} + +static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name) +{ + u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt); + u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt); + u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); + u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt); + int idx; + + for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) { + const struct xe_oa_format *f = &oa->oa_formats[idx]; + + if (counter_size == f->counter_size && bc_report == f->bc_report && + type == f->type && counter_sel == f->counter_select) { + *name = idx; + return 0; + } + } + + return -EINVAL; +} + +/** + * xe_oa_unit_id - Return OA unit ID for a hardware engine + * @hwe: @xe_hw_engine + * + * Return OA unit ID for a hardware engine when available + */ +u16 xe_oa_unit_id(struct xe_hw_engine *hwe) +{ + return hwe->oa_unit && hwe->oa_unit->num_engines ? + hwe->oa_unit->oa_unit_id : U16_MAX; +} + +static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) +{ + struct xe_gt *gt; + int i, ret = 0; + + if (param->exec_q) { + /* When we have an exec_q, get hwe from the exec_q */ + param->hwe = xe_gt_hw_engine(param->exec_q->gt, param->exec_q->class, + param->engine_instance, true); + } else { + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + /* Else just get the first hwe attached to the oa unit */ + for_each_gt(gt, oa->xe, i) { + for_each_hw_engine(hwe, gt, id) { + if (xe_oa_unit_id(hwe) == param->oa_unit_id) { + param->hwe = hwe; + goto out; + } + } + } + } +out: + if (!param->hwe || xe_oa_unit_id(param->hwe) != param->oa_unit_id) { + drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n", + param->exec_q ? param->exec_q->class : -1, + param->engine_instance, param->oa_unit_id); + ret = -EINVAL; + } + + return ret; +} + +static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + if (value >= oa->oa_unit_ids) { + drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); + return -EINVAL; + } + param->oa_unit_id = value; + return 0; +} + +static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->sample = value; + return 0; +} + +static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->metric_set = value; + return 0; +} + +static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + int ret = decode_oa_format(oa, value, ¶m->oa_format); + + if (ret) { + drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value); + return ret; + } + return 0; +} + +static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ +#define OA_EXPONENT_MAX 31 + + if (value > OA_EXPONENT_MAX) { + drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX); + return -EINVAL; + } + param->period_exponent = value; + return 0; +} + +static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->disabled = value; + return 0; +} + +static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->exec_queue_id = value; + return 0; +} + +static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->engine_instance = value; + return 0; +} + +typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param); +static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { + [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id, + [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa, + [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, + [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format, + [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent, + [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, + [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, + [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, +}; + +static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension, + struct xe_oa_open_param *param) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_ext_set_property ext; + int err; + u32 idx; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs)) || + XE_IOCTL_DBG(oa->xe, ext.pad)) + return -EINVAL; + + idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs)); + return xe_oa_set_property_funcs[idx](oa, ext.value, param); +} + +typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, u64 extension, + struct xe_oa_open_param *param); +static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { + [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, +}; + +static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number, + struct xe_oa_open_param *param) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_user_extension ext; + int err; + u32 idx; + + if (XE_IOCTL_DBG(oa->xe, ext_number >= DRM_XE_OA_PROPERTY_MAX)) + return -E2BIG; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(oa->xe, ext.pad) || + XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs))) + return -EINVAL; + + idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs)); + err = xe_oa_user_extension_funcs[idx](oa, extension, param); + if (XE_IOCTL_DBG(oa->xe, err)) + return err; + + if (ext.next_extension) + return xe_oa_user_extensions(oa, ext.next_extension, ++ext_number, param); + + return 0; +} + +/** + * xe_oa_stream_open_ioctl - Opens an OA stream + * @dev: @drm_device + * @data: pointer to struct @drm_xe_oa_config + * @file: @drm_file + * + * The functions opens an OA stream. An OA stream, opened with specified + * properties, enables perf counter samples to be collected, either + * periodically (time based sampling), or on request (using perf queries) + */ +int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) +{ + struct xe_oa *oa = &to_xe_device(dev)->oa; + struct xe_file *xef = to_xe_file(file); + struct xe_oa_open_param param = {}; + const struct xe_oa_format *f; + bool privileged_op = true; + int ret; + + if (!oa->xe) { + drm_dbg(&oa->xe->drm, "xe oa interface not available for this system\n"); + return -ENODEV; + } + + ret = xe_oa_user_extensions(oa, data, 0, ¶m); + if (ret) + return ret; + + if (param.exec_queue_id > 0) { + param.exec_q = xe_exec_queue_lookup(xef, param.exec_queue_id); + if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) + return -ENOENT; + } + + /* + * Query based sampling (using MI_REPORT_PERF_COUNT) with OAR/OAC, + * without global stream access, can be an unprivileged operation + */ + if (param.exec_q && !param.sample) + privileged_op = false; + + if (privileged_op && xe_perf_stream_paranoid && !perfmon_capable()) { + drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe perf stream\n"); + ret = -EACCES; + goto err_exec_q; + } + + if (!param.exec_q && !param.sample) { + drm_dbg(&oa->xe->drm, "Only OA report sampling supported\n"); + ret = -EINVAL; + goto err_exec_q; + } + + ret = xe_oa_assign_hwe(oa, ¶m); + if (ret) + goto err_exec_q; + + f = &oa->oa_formats[param.oa_format]; + if (!param.oa_format || !f->size || + !engine_supports_oa_format(param.hwe, f->type)) { + drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n", + param.oa_format, f->type, f->size, param.hwe->class); + ret = -EINVAL; + goto err_exec_q; + } + + if (param.period_exponent > 0) { + u64 oa_period, oa_freq_hz; + + /* Requesting samples from OAG buffer is a privileged operation */ + if (!param.sample) { + drm_dbg(&oa->xe->drm, "OA_EXPONENT specified without SAMPLE_OA\n"); + ret = -EINVAL; + goto err_exec_q; + } + oa_period = oa_exponent_to_ns(param.hwe->gt, param.period_exponent); + oa_freq_hz = div64_u64(NSEC_PER_SEC, oa_period); + drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); + } +err_exec_q: + if (ret < 0 && param.exec_q) + xe_exec_queue_put(param.exec_q); + return ret; +} + static bool xe_oa_is_valid_flex_addr(struct xe_oa *oa, u32 addr) { static const struct xe_reg flex_eu_regs[] = { diff --git a/drivers/gpu/drm/xe/xe_oa.h b/drivers/gpu/drm/xe/xe_oa.h index 5ccc772e047a..87a38820c317 100644 --- a/drivers/gpu/drm/xe/xe_oa.h +++ b/drivers/gpu/drm/xe/xe_oa.h @@ -11,12 +11,17 @@ struct drm_device; struct drm_file; struct xe_device; +struct xe_gt; +struct xe_hw_engine; int xe_oa_init(struct xe_device *xe); void xe_oa_fini(struct xe_device *xe); void xe_oa_register(struct xe_device *xe); void xe_oa_unregister(struct xe_device *xe); +int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); +u32 xe_oa_timestamp_frequency(struct xe_gt *gt); +u16 xe_oa_unit_id(struct xe_hw_engine *hwe); #endif diff --git a/drivers/gpu/drm/xe/xe_perf.c b/drivers/gpu/drm/xe/xe_perf.c index ca01042d75b1..d6cd74cadf34 100644 --- a/drivers/gpu/drm/xe/xe_perf.c +++ b/drivers/gpu/drm/xe/xe_perf.c @@ -18,6 +18,8 @@ static int xe_oa_ioctl(struct drm_device *dev, struct drm_xe_perf_param *arg, struct drm_file *file) { switch (arg->perf_op) { + case DRM_XE_PERF_OP_STREAM_OPEN: + return xe_oa_stream_open_ioctl(dev, arg->param, file); case DRM_XE_PERF_OP_ADD_CONFIG: return xe_oa_add_config_ioctl(dev, arg->param, file); case DRM_XE_PERF_OP_REMOVE_CONFIG: diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index fd9a4bd9e3d4..307409f968e2 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1470,6 +1470,78 @@ enum drm_xe_oa_format_type { DRM_XE_OA_FMT_TYPE_PEC, }; +/** + * enum drm_xe_oa_property_id - OA stream property id's + * + * Stream params are specified as a chain of @drm_xe_ext_set_property + * struct's, with @property values from enum @drm_xe_oa_property_id and + * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY. + * @param field in struct @drm_xe_perf_param points to the first + * @drm_xe_ext_set_property struct. + */ +enum drm_xe_oa_property_id { +#define DRM_XE_OA_EXTENSION_SET_PROPERTY 0 + /** + * @DRM_XE_OA_PROPERTY_OA_UNIT_ID: ID of the OA unit on which to open + * the OA stream, see @oa_unit_id in 'struct + * drm_xe_query_oa_units'. Defaults to 0 if not provided. + */ + DRM_XE_OA_PROPERTY_OA_UNIT_ID = 1, + + /** + * @DRM_XE_OA_PROPERTY_SAMPLE_OA: A value of 1 requests inclusion of raw + * OA unit reports or stream samples in a global buffer attached to an + * OA unit. + */ + DRM_XE_OA_PROPERTY_SAMPLE_OA, + + /** + * @DRM_XE_OA_PROPERTY_OA_METRIC_SET: OA metrics defining contents of OA + * reports, previously added via @DRM_XE_PERF_OP_ADD_CONFIG. + */ + DRM_XE_OA_PROPERTY_OA_METRIC_SET, + + /** @DRM_XE_OA_PROPERTY_OA_FORMAT: Perf counter report format */ + DRM_XE_OA_PROPERTY_OA_FORMAT, + /* + * OA_FORMAT's are specified the same way as in PRM/Bspec 52198/60942, + * in terms of the following quantities: a. enum @drm_xe_oa_format_type + * b. Counter select c. Counter size and d. BC report. Also refer to the + * oa_formats array in drivers/gpu/drm/xe/xe_oa.c. + */ +#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xff << 0) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xff << 8) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xff << 16) +#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xff << 24) + + /** + * @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit + * sampling with sampling frequency proportional to 2^(period_exponent + 1) + */ + DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT, + + /** + * @DRM_XE_OA_PROPERTY_OA_DISABLED: A value of 1 will open the OA + * stream in a DISABLED state (see @DRM_XE_PERF_IOCTL_ENABLE). + */ + DRM_XE_OA_PROPERTY_OA_DISABLED, + + /** + * @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID: Open the stream for a specific + * @exec_queue_id. Perf queries can be executed on this exec queue. + */ + DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID, + + /** + * @DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE: Optional engine instance to + * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to 0. + */ + DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE, + + /** @DRM_XE_OA_PROPERTY_MAX: non-ABI */ + DRM_XE_OA_PROPERTY_MAX +}; + /** * struct drm_xe_oa_config - OA metric configuration * -- cgit From 1db9a9dc90aece0803a26a711b52a9492faefab7 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:59 -0700 Subject: drm/xe/oa: OA stream initialization (OAG) Implement majority of OA stream initialization (as part of OA stream open) ioctl). OAG buffer is allocated for receiving perf counter samples from HW. OAG unit is initialized and the selected OA metric configuration is programmed into OAG unit HW using a command/batch buffer. Acked-by: Rodrigo Vivi Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-8-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 + drivers/gpu/drm/xe/xe_oa.c | 393 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa_types.h | 79 +++++++ 3 files changed, 475 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 47c26c37608d..6a7bbb410613 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -170,6 +170,8 @@ #define SQCNT1 XE_REG_MCR(0x8718) #define XELPMP_SQCNT1 XE_REG(0x8718) +#define SQCNT1_PMON_ENABLE REG_BIT(30) +#define SQCNT1_OABPC REG_BIT(29) #define ENFORCE_RAR REG_BIT(23) #define XEHP_SQCM XE_REG_MCR(0x8724) @@ -429,6 +431,7 @@ #define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED) #define UGM_BACKUP_MODE REG_BIT(13) #define MDQ_ARBITRATION_MODE REG_BIT(12) +#define STALL_DOP_GATING_DISABLE REG_BIT(5) #define EARLY_EOT_DIS REG_BIT(1) #define ROW_CHICKEN2 XE_REG_MCR(0xe4f4, XE_REG_OPTION_MASKED) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 9b23eadf56cd..c2fd2d22677f 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -3,24 +3,34 @@ * Copyright © 2023-2024 Intel Corporation */ +#include #include +#include #include #include +#include "instructions/xe_mi_commands.h" #include "regs/xe_gt_regs.h" #include "regs/xe_oa_regs.h" #include "xe_assert.h" +#include "xe_bb.h" +#include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" +#include "xe_force_wake.h" #include "xe_gt.h" +#include "xe_gt_mcr.h" #include "xe_gt_printk.h" #include "xe_macros.h" #include "xe_mmio.h" #include "xe_oa.h" #include "xe_perf.h" #include "xe_pm.h" +#include "xe_sched_job.h" +#define DEFAULT_POLL_FREQUENCY_HZ 200 +#define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) #define XE_OA_UNIT_INVALID U32_MAX struct xe_oa_reg { @@ -58,6 +68,13 @@ struct xe_oa_open_param { struct xe_hw_engine *hwe; }; +struct xe_oa_config_bo { + struct llist_node node; + + struct xe_oa_config *oa_config; + struct xe_bb *bb; +}; + #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x static const struct xe_oa_format oa_formats[] = { @@ -100,6 +117,378 @@ static void xe_oa_config_put(struct xe_oa_config *oa_config) kref_put(&oa_config->ref, xe_oa_config_release); } +static struct xe_oa_config *xe_oa_config_get(struct xe_oa_config *oa_config) +{ + return kref_get_unless_zero(&oa_config->ref) ? oa_config : NULL; +} + +static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_set) +{ + struct xe_oa_config *oa_config; + + rcu_read_lock(); + oa_config = idr_find(&oa->metrics_idr, metrics_set); + if (oa_config) + oa_config = xe_oa_config_get(oa_config); + rcu_read_unlock(); + + return oa_config; +} + +static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo) +{ + xe_oa_config_put(oa_bo->oa_config); + xe_bb_free(oa_bo->bb, NULL); + kfree(oa_bo); +} + +static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream) +{ + return &stream->hwe->oa_unit->regs; +} + +static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) +{ + struct xe_sched_job *job; + struct dma_fence *fence; + long timeout; + int err = 0; + + /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */ + job = xe_bb_create_job(stream->k_exec_q, bb); + if (IS_ERR(job)) { + err = PTR_ERR(job); + goto exit; + } + + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + timeout = dma_fence_wait_timeout(fence, false, HZ); + dma_fence_put(fence); + if (timeout < 0) + err = timeout; + else if (!timeout) + err = -ETIME; +exit: + return err; +} + +static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs) +{ + u32 i; + +#define MI_LOAD_REGISTER_IMM_MAX_REGS (126) + + for (i = 0; i < n_regs; i++) { + if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) { + u32 n_lri = min_t(u32, n_regs - i, + MI_LOAD_REGISTER_IMM_MAX_REGS); + + bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(n_lri); + } + bb->cs[bb->len++] = reg_data[i].addr.addr; + bb->cs[bb->len++] = reg_data[i].value; + } +} + +static int num_lri_dwords(int num_regs) +{ + int count = 0; + + if (num_regs > 0) { + count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS); + count += num_regs * 2; + } + + return count; +} + +static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream) +{ + xe_bo_unpin_map_no_vm(stream->oa_buffer.bo); +} + +static void xe_oa_free_configs(struct xe_oa_stream *stream) +{ + struct xe_oa_config_bo *oa_bo, *tmp; + + xe_oa_config_put(stream->oa_config); + llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node) + free_oa_config_bo(oa_bo); +} + +#define HAS_OA_BPC_REPORTING(xe) (GRAPHICS_VERx100(xe) >= 1255) + +static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) +{ + u32 sqcnt1; + + /* + * Wa_1508761755:xehpsdv, dg2 + * Enable thread stall DOP gating and EU DOP gating. + */ + if (stream->oa->xe->info.platform == XE_DG2) { + xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, + _MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE)); + xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, + _MASKED_BIT_DISABLE(DISABLE_DOP_GATING)); + } + + /* Make sure we disable noa to save power. */ + xe_mmio_rmw32(stream->gt, RPM_CONFIG1, GT_NOA_ENABLE, 0); + + sqcnt1 = SQCNT1_PMON_ENABLE | + (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); + + /* Reset PMON Enable to save power. */ + xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, sqcnt1, 0); +} + +static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream) +{ + struct xe_bo *bo; + + BUILD_BUG_ON_NOT_POWER_OF_2(XE_OA_BUFFER_SIZE); + BUILD_BUG_ON(XE_OA_BUFFER_SIZE < SZ_128K || XE_OA_BUFFER_SIZE > SZ_16M); + + bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL, + XE_OA_BUFFER_SIZE, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + stream->oa_buffer.bo = bo; + stream->oa_buffer.vaddr = bo->vmap.vaddr; + return 0; +} + +static struct xe_oa_config_bo * +__xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_config) +{ + struct xe_oa_config_bo *oa_bo; + size_t config_length; + struct xe_bb *bb; + + oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL); + if (!oa_bo) + return ERR_PTR(-ENOMEM); + + config_length = num_lri_dwords(oa_config->regs_len); + config_length = ALIGN(sizeof(u32) * config_length, XE_PAGE_SIZE) / sizeof(u32); + + bb = xe_bb_new(stream->gt, config_length, false); + if (IS_ERR(bb)) + goto err_free; + + write_cs_mi_lri(bb, oa_config->regs, oa_config->regs_len); + + oa_bo->bb = bb; + oa_bo->oa_config = xe_oa_config_get(oa_config); + llist_add(&oa_bo->node, &stream->oa_config_bos); + + return oa_bo; +err_free: + kfree(oa_bo); + return ERR_CAST(bb); +} + +static struct xe_oa_config_bo *xe_oa_alloc_config_buffer(struct xe_oa_stream *stream) +{ + struct xe_oa_config *oa_config = stream->oa_config; + struct xe_oa_config_bo *oa_bo; + + /* Look for the buffer in the already allocated BOs attached to the stream */ + llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) { + if (oa_bo->oa_config == oa_config && + memcmp(oa_bo->oa_config->uuid, oa_config->uuid, + sizeof(oa_config->uuid)) == 0) + goto out; + } + + oa_bo = __xe_oa_alloc_config_buffer(stream, oa_config); +out: + return oa_bo; +} + +static int xe_oa_emit_oa_config(struct xe_oa_stream *stream) +{ +#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500 + struct xe_oa_config_bo *oa_bo; + int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US; + + oa_bo = xe_oa_alloc_config_buffer(stream); + if (IS_ERR(oa_bo)) { + err = PTR_ERR(oa_bo); + goto exit; + } + + err = xe_oa_submit_bb(stream, oa_bo->bb); + + /* Additional empirical delay needed for NOA programming after registers are written */ + usleep_range(us, 2 * us); +exit: + return err; +} + +static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream) +{ + /* If user didn't require OA reports, ask HW not to emit ctx switch reports */ + return _MASKED_FIELD(OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS, + stream->sample ? + 0 : OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS); +} + +static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) +{ + u32 oa_debug, sqcnt1; + + /* + * Wa_1508761755:xehpsdv, dg2 + * EU NOA signals behave incorrectly if EU clock gating is enabled. + * Disable thread stall DOP gating and EU DOP gating. + */ + if (stream->oa->xe->info.platform == XE_DG2) { + xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, + _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); + xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DISABLE_DOP_GATING)); + } + + /* Disable clk ratio reports */ + oa_debug = OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | + OAG_OA_DEBUG_INCLUDE_CLK_RATIO; + + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug, + _MASKED_BIT_ENABLE(oa_debug) | + oag_report_ctx_switches(stream)); + + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ? + (OAG_OAGLBCTXCTRL_COUNTER_RESUME | + OAG_OAGLBCTXCTRL_TIMER_ENABLE | + REG_FIELD_PREP(OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK, + stream->period_exponent)) : 0); + + /* + * Initialize Super Queue Internal Cnt Register + * Set PMON Enable in order to collect valid metrics + * Enable bytes per clock reporting + */ + sqcnt1 = SQCNT1_PMON_ENABLE | + (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); + + xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, 0, sqcnt1); + + return xe_oa_emit_oa_config(stream); +} + +static int xe_oa_stream_init(struct xe_oa_stream *stream, + struct xe_oa_open_param *param) +{ + struct xe_oa_unit *u = param->hwe->oa_unit; + struct xe_gt *gt = param->hwe->gt; + int ret; + + stream->exec_q = param->exec_q; + stream->poll_period_ns = DEFAULT_POLL_PERIOD_NS; + stream->hwe = param->hwe; + stream->gt = stream->hwe->gt; + stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format]; + + stream->sample = param->sample; + stream->periodic = param->period_exponent > 0; + stream->period_exponent = param->period_exponent; + + stream->oa_config = xe_oa_get_oa_config(stream->oa, param->metric_set); + if (!stream->oa_config) { + drm_dbg(&stream->oa->xe->drm, "Invalid OA config id=%i\n", param->metric_set); + ret = -EINVAL; + goto exit; + } + + /* Take runtime pm ref and forcewake to disable RC6 */ + xe_pm_runtime_get(stream->oa->xe); + XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + + ret = xe_oa_alloc_oa_buffer(stream); + if (ret) + goto err_fw_put; + + stream->k_exec_q = xe_exec_queue_create(stream->oa->xe, NULL, + BIT(stream->hwe->logical_instance), 1, + stream->hwe, EXEC_QUEUE_FLAG_KERNEL, 0); + if (IS_ERR(stream->k_exec_q)) { + ret = PTR_ERR(stream->k_exec_q); + drm_err(&stream->oa->xe->drm, "gt%d, hwe %s, xe_exec_queue_create failed=%d", + stream->gt->info.id, stream->hwe->name, ret); + goto err_free_oa_buf; + } + + ret = xe_oa_enable_metric_set(stream); + if (ret) { + drm_dbg(&stream->oa->xe->drm, "Unable to enable metric set\n"); + goto err_put_k_exec_q; + } + + drm_dbg(&stream->oa->xe->drm, "opening stream oa config uuid=%s\n", + stream->oa_config->uuid); + + WRITE_ONCE(u->exclusive_stream, stream); + + spin_lock_init(&stream->oa_buffer.ptr_lock); + mutex_init(&stream->stream_lock); + + return 0; + +err_put_k_exec_q: + xe_oa_disable_metric_set(stream); + xe_exec_queue_put(stream->k_exec_q); +err_free_oa_buf: + xe_oa_free_oa_buffer(stream); +err_fw_put: + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_pm_runtime_put(stream->oa->xe); + xe_oa_free_configs(stream); +exit: + return ret; +} + +static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, + struct xe_oa_open_param *param) +{ + struct xe_oa_stream *stream; + int stream_fd; + int ret; + + /* We currently only allow exclusive access */ + if (param->hwe->oa_unit->exclusive_stream) { + drm_dbg(&oa->xe->drm, "OA unit already in use\n"); + ret = -EBUSY; + goto exit; + } + + stream = kzalloc(sizeof(*stream), GFP_KERNEL); + if (!stream) { + ret = -ENOMEM; + goto exit; + } + + stream->oa = oa; + ret = xe_oa_stream_init(stream, param); + if (ret) + goto err_free; + + /* Hold a reference on the drm device till stream_fd is released */ + drm_dev_get(&stream->oa->xe->drm); + + return stream_fd; +err_free: + kfree(stream); +exit: + return ret; +} + /** * xe_oa_timestamp_frequency - Return OA timestamp frequency * @gt: @xe_gt @@ -440,6 +829,10 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f oa_freq_hz = div64_u64(NSEC_PER_SEC, oa_period); drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); } + + mutex_lock(¶m.hwe->gt->oa.gt_lock); + ret = xe_oa_stream_open_ioctl_locked(oa, ¶m); + mutex_unlock(¶m.hwe->gt->oa.gt_lock); err_exec_q: if (ret < 0 && param.exec_q) xe_exec_queue_put(param.exec_q); diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index f8a45015cf49..6700383b1a52 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -14,6 +14,8 @@ #include #include "regs/xe_reg_defs.h" +#define XE_OA_BUFFER_SIZE SZ_16M + enum xe_oa_report_header { HDR_32_BIT = 0, HDR_64_BIT, @@ -144,4 +146,81 @@ struct xe_oa { /** @oa_unit_ids: tracks oa unit ids assigned across gt's */ u16 oa_unit_ids; }; + +/** @xe_oa_buffer: State of the stream OA buffer */ +struct xe_oa_buffer { + /** @format: data format */ + const struct xe_oa_format *format; + + /** @format: xe_bo backing the OA buffer */ + struct xe_bo *bo; + + /** @vaddr: mapped vaddr of the OA buffer */ + u8 *vaddr; + + /** @ptr_lock: Lock protecting reads/writes to head/tail pointers */ + spinlock_t ptr_lock; + + /** @head: Cached head to read from */ + u32 head; + + /** @tail: The last verified cached tail where HW has completed writing */ + u32 tail; +}; + +/** + * struct xe_oa_stream - state for a single open stream FD + */ +struct xe_oa_stream { + /** @oa: xe_oa backpointer */ + struct xe_oa *oa; + + /** @gt: gt associated with the oa stream */ + struct xe_gt *gt; + + /** @hwe: hardware engine associated with this oa stream */ + struct xe_hw_engine *hwe; + + /** @stream_lock: Lock serializing stream operations */ + struct mutex stream_lock; + + /** @sample: true if DRM_XE_OA_PROP_SAMPLE_OA is provided */ + bool sample; + + /** @exec_q: Exec queue corresponding to DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID */ + struct xe_exec_queue *exec_q; + + /** @k_exec_q: kernel exec_q used for OA programming batch submissions */ + struct xe_exec_queue *k_exec_q; + + /** @enabled: Whether the stream is currently enabled */ + bool enabled; + + /** @oa_config: OA configuration used by the stream */ + struct xe_oa_config *oa_config; + + /** @oa_config_bos: List of struct @xe_oa_config_bo's */ + struct llist_head oa_config_bos; + + /** @poll_check_timer: Timer to periodically check for data in the OA buffer */ + struct hrtimer poll_check_timer; + + /** @poll_wq: Wait queue for waiting for OA data to be available */ + wait_queue_head_t poll_wq; + + /** @pollin: Whether there is data available to read */ + bool pollin; + + /** @periodic: Whether periodic sampling is currently enabled */ + bool periodic; + + /** @period_exponent: OA unit sampling frequency is derived from this */ + int period_exponent; + + /** @oa_buffer: OA buffer for the stream */ + struct xe_oa_buffer oa_buffer; + + /** @poll_period_ns: hrtimer period for checking OA buffer for available data */ + u64 poll_period_ns; +}; #endif -- cgit From e936f885f1e96f59d9d05fb6cb5a02b9b9b88a05 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:00 -0700 Subject: drm/xe/oa/uapi: Expose OA stream fd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The OA stream open perf op returns an fd with its own file_operations for the newly initialized OA stream. These file_operations allow userspace to enable or disable the stream, as well as apply a different metric configuration for the OA stream. Userspace can also poll for data availability. OA stream initialization is completed in this commit by enabling the OA stream. When sampling is enabled this starts a hrtimer which periodically checks for data availablility. v2: Use stream properties for stream reconfiguration with DRM_XE_PERF_IOCTL_CONFIG v3: Hold runtime_pm reference across oa buffer alloc/free v4: Fix 32 bit build Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-9-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 380 +++++++++++++++++++++++++++++++++++++++++++++ include/uapi/drm/xe_drm.h | 4 + 2 files changed, 384 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index c2fd2d22677f..a71111859190 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -3,8 +3,10 @@ * Copyright © 2023-2024 Intel Corporation */ +#include #include #include +#include #include #include @@ -29,6 +31,7 @@ #include "xe_pm.h" #include "xe_sched_job.h" +#define OA_TAKEN(tail, head) (((tail) - (head)) & (XE_OA_BUFFER_SIZE - 1)) #define DEFAULT_POLL_FREQUENCY_HZ 200 #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) #define XE_OA_UNIT_INVALID U32_MAX @@ -147,6 +150,205 @@ static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream) return &stream->hwe->oa_unit->regs; } +static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream) +{ + return xe_mmio_read32(stream->gt, __oa_regs(stream)->oa_tail_ptr) & + OAG_OATAILPTR_MASK; +} + +#define oa_report_header_64bit(__s) \ + ((__s)->oa_buffer.format->header == HDR_64_BIT) + +static u64 oa_report_id(struct xe_oa_stream *stream, void *report) +{ + return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; +} + +static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) +{ + return oa_report_header_64bit(stream) ? + *((u64 *)report + 1) : + *((u32 *)report + 1); +} + +static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) +{ + u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); + int report_size = stream->oa_buffer.format->size; + u32 tail, hw_tail; + unsigned long flags; + bool pollin; + u32 partial_report_size; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + + hw_tail = xe_oa_hw_tail_read(stream); + hw_tail -= gtt_offset; + + /* + * The tail pointer increases in 64 byte (cacheline size), not in report_size + * increments. Also report size may not be a power of 2. Compute potential + * partially landed report in OA buffer. + */ + partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail); + partial_report_size %= report_size; + + /* Subtract partial amount off the tail */ + hw_tail = OA_TAKEN(hw_tail, partial_report_size); + + tail = hw_tail; + + /* + * Walk the stream backward until we find a report with report id and timestamp + * not 0. We can't tell whether a report has fully landed in memory before the + * report id and timestamp of the following report have landed. + * + * This is assuming that the writes of the OA unit land in memory in the order + * they were written. If not : (╯°□°)╯︵ ┻━┻ + */ + while (OA_TAKEN(tail, stream->oa_buffer.tail) >= report_size) { + void *report = stream->oa_buffer.vaddr + tail; + + if (oa_report_id(stream, report) || oa_timestamp(stream, report)) + break; + + tail = OA_TAKEN(tail, report_size); + } + + if (OA_TAKEN(hw_tail, tail) > report_size) + drm_dbg(&stream->oa->xe->drm, + "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n", + stream->oa_buffer.head, tail, hw_tail); + + stream->oa_buffer.tail = tail; + + pollin = OA_TAKEN(stream->oa_buffer.tail, + stream->oa_buffer.head) >= report_size; + + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + + return pollin; +} + +static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) +{ + struct xe_oa_stream *stream = + container_of(hrtimer, typeof(*stream), poll_check_timer); + + if (xe_oa_buffer_check_unlocked(stream)) { + stream->pollin = true; + wake_up(&stream->poll_wq); + } + + hrtimer_forward_now(hrtimer, ns_to_ktime(stream->poll_period_ns)); + + return HRTIMER_RESTART; +} + +static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) +{ + u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); + u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT; + unsigned long flags; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_status, 0); + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr, + gtt_offset & OAG_OAHEADPTR_MASK); + stream->oa_buffer.head = 0; + + /* + * PRM says: "This MMIO must be set before the OATAILPTR register and after the + * OAHEADPTR register. This is to enable proper functionality of the overflow bit". + */ + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_buffer, oa_buf); + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_tail_ptr, + gtt_offset & OAG_OATAILPTR_MASK); + + /* Mark that we need updated tail pointer to read from */ + stream->oa_buffer.tail = 0; + + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + + /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ + memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.bo->size); +} + +static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) +{ + return ((format->counter_select << (ffs(counter_sel_mask) - 1)) & counter_sel_mask) | + REG_FIELD_PREP(OA_OACONTROL_REPORT_BC_MASK, format->bc_report) | + REG_FIELD_PREP(OA_OACONTROL_COUNTER_SIZE_MASK, format->counter_size); +} + +static void xe_oa_enable(struct xe_oa_stream *stream) +{ + const struct xe_oa_format *format = stream->oa_buffer.format; + const struct xe_oa_regs *regs; + u32 val; + + /* + * BSpec: 46822: Bit 0. Even if stream->sample is 0, for OAR to function, the OA + * buffer must be correctly initialized + */ + xe_oa_init_oa_buffer(stream); + + regs = __oa_regs(stream); + val = __format_to_oactrl(format, regs->oa_ctrl_counter_select_mask) | + OAG_OACONTROL_OA_COUNTER_ENABLE; + + xe_mmio_write32(stream->gt, regs->oa_ctrl, val); +} + +static void xe_oa_disable(struct xe_oa_stream *stream) +{ + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, 0); + if (xe_mmio_wait32(stream->gt, __oa_regs(stream)->oa_ctrl, + OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false)) + drm_err(&stream->oa->xe->drm, + "wait for OA to be disabled timed out\n"); + + if (GRAPHICS_VERx100(stream->oa->xe) <= 1270 && GRAPHICS_VERx100(stream->oa->xe) != 1260) { + /* <= XE_METEORLAKE except XE_PVC */ + xe_mmio_write32(stream->gt, OA_TLB_INV_CR, 1); + if (xe_mmio_wait32(stream->gt, OA_TLB_INV_CR, 1, 0, 50000, NULL, false)) + drm_err(&stream->oa->xe->drm, + "wait for OA tlb invalidate timed out\n"); + } +} + +static __poll_t xe_oa_poll_locked(struct xe_oa_stream *stream, + struct file *file, poll_table *wait) +{ + __poll_t events = 0; + + poll_wait(file, &stream->poll_wq, wait); + + /* + * We don't explicitly check whether there's something to read here since this + * path may be hot depending on what else userspace is polling, or on the timeout + * in use. We rely on hrtimer xe_oa_poll_check_timer_cb to notify us when there + * are samples to read + */ + if (stream->pollin) + events |= EPOLLIN; + + return events; +} + +static __poll_t xe_oa_poll(struct file *file, poll_table *wait) +{ + struct xe_oa_stream *stream = file->private_data; + __poll_t ret; + + mutex_lock(&stream->stream_lock); + ret = xe_oa_poll_locked(stream, file, wait); + mutex_unlock(&stream->stream_lock); + + return ret; +} + static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) { struct xe_sched_job *job; @@ -246,6 +448,27 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, sqcnt1, 0); } +static void xe_oa_stream_destroy(struct xe_oa_stream *stream) +{ + struct xe_oa_unit *u = stream->hwe->oa_unit; + struct xe_gt *gt = stream->hwe->gt; + + if (WARN_ON(stream != u->exclusive_stream)) + return; + + WRITE_ONCE(u->exclusive_stream, NULL); + + xe_oa_disable_metric_set(stream); + xe_exec_queue_put(stream->k_exec_q); + + xe_oa_free_oa_buffer(stream); + + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_pm_runtime_put(stream->oa->xe); + + xe_oa_free_configs(stream); +} + static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream) { struct xe_bo *bo; @@ -383,6 +606,148 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) return xe_oa_emit_oa_config(stream); } +static void xe_oa_stream_enable(struct xe_oa_stream *stream) +{ + stream->pollin = false; + + xe_oa_enable(stream); + + if (stream->sample) + hrtimer_start(&stream->poll_check_timer, + ns_to_ktime(stream->poll_period_ns), + HRTIMER_MODE_REL_PINNED); +} + +static void xe_oa_stream_disable(struct xe_oa_stream *stream) +{ + xe_oa_disable(stream); + + if (stream->sample) + hrtimer_cancel(&stream->poll_check_timer); +} + +static void xe_oa_enable_locked(struct xe_oa_stream *stream) +{ + if (stream->enabled) + return; + + stream->enabled = true; + + xe_oa_stream_enable(stream); +} + +static void xe_oa_disable_locked(struct xe_oa_stream *stream) +{ + if (!stream->enabled) + return; + + stream->enabled = false; + + xe_oa_stream_disable(stream); +} + +static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) +{ + struct drm_xe_ext_set_property ext; + long ret = stream->oa_config->id; + struct xe_oa_config *config; + int err; + + err = __copy_from_user(&ext, u64_to_user_ptr(arg), sizeof(ext)); + if (XE_IOCTL_DBG(stream->oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(stream->oa->xe, ext.pad) || + XE_IOCTL_DBG(stream->oa->xe, ext.base.name != DRM_XE_OA_EXTENSION_SET_PROPERTY) || + XE_IOCTL_DBG(stream->oa->xe, ext.base.next_extension) || + XE_IOCTL_DBG(stream->oa->xe, ext.property != DRM_XE_OA_PROPERTY_OA_METRIC_SET)) + return -EINVAL; + + config = xe_oa_get_oa_config(stream->oa, ext.value); + if (!config) + return -ENODEV; + + if (config != stream->oa_config) { + err = xe_oa_emit_oa_config(stream); + if (!err) + config = xchg(&stream->oa_config, config); + else + ret = err; + } + + xe_oa_config_put(config); + + return ret; +} + +static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, + unsigned int cmd, + unsigned long arg) +{ + switch (cmd) { + case DRM_XE_PERF_IOCTL_ENABLE: + xe_oa_enable_locked(stream); + return 0; + case DRM_XE_PERF_IOCTL_DISABLE: + xe_oa_disable_locked(stream); + return 0; + case DRM_XE_PERF_IOCTL_CONFIG: + return xe_oa_config_locked(stream, arg); + } + + return -EINVAL; +} + +static long xe_oa_ioctl(struct file *file, + unsigned int cmd, + unsigned long arg) +{ + struct xe_oa_stream *stream = file->private_data; + long ret; + + mutex_lock(&stream->stream_lock); + ret = xe_oa_ioctl_locked(stream, cmd, arg); + mutex_unlock(&stream->stream_lock); + + return ret; +} + +static void xe_oa_destroy_locked(struct xe_oa_stream *stream) +{ + if (stream->enabled) + xe_oa_disable_locked(stream); + + xe_oa_stream_destroy(stream); + + if (stream->exec_q) + xe_exec_queue_put(stream->exec_q); + + kfree(stream); +} + +static int xe_oa_release(struct inode *inode, struct file *file) +{ + struct xe_oa_stream *stream = file->private_data; + struct xe_gt *gt = stream->gt; + + mutex_lock(>->oa.gt_lock); + xe_oa_destroy_locked(stream); + mutex_unlock(>->oa.gt_lock); + + /* Release the reference the perf stream kept on the driver */ + drm_dev_put(>_to_xe(gt)->drm); + + return 0; +} + +static const struct file_operations xe_oa_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .release = xe_oa_release, + .poll = xe_oa_poll, + .unlocked_ioctl = xe_oa_ioctl, +}; + static int xe_oa_stream_init(struct xe_oa_stream *stream, struct xe_oa_open_param *param) { @@ -436,6 +801,10 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, WRITE_ONCE(u->exclusive_stream, stream); + hrtimer_init(&stream->poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + stream->poll_check_timer.function = xe_oa_poll_check_timer_cb; + init_waitqueue_head(&stream->poll_wq); + spin_lock_init(&stream->oa_buffer.ptr_lock); mutex_init(&stream->stream_lock); @@ -479,10 +848,21 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, if (ret) goto err_free; + stream_fd = anon_inode_getfd("[xe_oa]", &xe_oa_fops, stream, 0); + if (stream_fd < 0) { + ret = stream_fd; + goto err_destroy; + } + + if (!param->disabled) + xe_oa_enable_locked(stream); + /* Hold a reference on the drm device till stream_fd is released */ drm_dev_get(&stream->oa->xe->drm); return stream_fd; +err_destroy: + xe_oa_stream_destroy(stream); err_free: kfree(stream); exit: diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 307409f968e2..1e09f786b3e6 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1478,6 +1478,10 @@ enum drm_xe_oa_format_type { * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY. * @param field in struct @drm_xe_perf_param points to the first * @drm_xe_ext_set_property struct. + * + * Exactly the same mechanism is also used for stream reconfiguration using + * the @DRM_XE_PERF_IOCTL_CONFIG perf fd ioctl, though only a subset of + * properties below can be specified for stream reconfiguration. */ enum drm_xe_oa_property_id { #define DRM_XE_OA_EXTENSION_SET_PROPERTY 0 -- cgit From efb315d0a013cdc8b1e49f5c07b1a2972bc624d4 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:01 -0700 Subject: drm/xe/oa/uapi: Read file_operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the OA stream read file_operation. Both blocking and non-blocking reads are supported. As part of read system call, the read copies OA perf data from the OA buffer to the user buffer, after appending packet headers for status and data packets. v2: Drop OA report headers, implement DRM_XE_PERF_IOCTL_STATUS (Umesh) v3: Introduce 'struct drm_xe_oa_stream_status' v4: Define oa_status register bitfields (Umesh) v5: Add extensions to 'struct drm_xe_oa_stream_status' v6: Minor cleanup, eliminate report32 variable v7: Use -EIO to signal to userspace to read OASTATUS using DRM_XE_PERF_IOCTL_STATUS, change previous sites returning -EIO to return -EINVAL Make drm_xe_oa_stream_status bits contiguous (Jose, Umesh) rmw oa_status bits (Umesh) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-10-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 201 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa_types.h | 3 + include/uapi/drm/xe_drm.h | 20 ++++ 3 files changed, 224 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index a71111859190..86d56b080eff 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -164,6 +164,14 @@ static u64 oa_report_id(struct xe_oa_stream *stream, void *report) return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; } +static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report) +{ + if (oa_report_header_64bit(stream)) + *(u64 *)report = 0; + else + *report = 0; +} + static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) { return oa_report_header_64bit(stream) ? @@ -171,6 +179,14 @@ static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) *((u32 *)report + 1); } +static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report) +{ + if (oa_report_header_64bit(stream)) + *(u64 *)&report[2] = 0; + else + report[1] = 0; +} + static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); @@ -245,6 +261,95 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) return HRTIMER_RESTART; } +static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf, + size_t count, size_t *offset, const u8 *report) +{ + int report_size = stream->oa_buffer.format->size; + int report_size_partial; + u8 *oa_buf_end; + + if ((count - *offset) < report_size) + return -ENOSPC; + + buf += *offset; + + oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE; + report_size_partial = oa_buf_end - report; + + if (report_size_partial < report_size) { + if (copy_to_user(buf, report, report_size_partial)) + return -EFAULT; + buf += report_size_partial; + + if (copy_to_user(buf, stream->oa_buffer.vaddr, + report_size - report_size_partial)) + return -EFAULT; + } else if (copy_to_user(buf, report, report_size)) { + return -EFAULT; + } + + *offset += report_size; + + return 0; +} + +static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, + size_t count, size_t *offset) +{ + int report_size = stream->oa_buffer.format->size; + u8 *oa_buf_base = stream->oa_buffer.vaddr; + u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); + u32 mask = (XE_OA_BUFFER_SIZE - 1); + size_t start_offset = *offset; + unsigned long flags; + u32 head, tail; + int ret = 0; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + head = stream->oa_buffer.head; + tail = stream->oa_buffer.tail; + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + + xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE); + + for (; OA_TAKEN(tail, head); head = (head + report_size) & mask) { + u8 *report = oa_buf_base + head; + + ret = xe_oa_append_report(stream, buf, count, offset, report); + if (ret) + break; + + if (is_power_of_2(report_size)) { + /* Clear out report id and timestamp to detect unlanded reports */ + oa_report_id_clear(stream, (void *)report); + oa_timestamp_clear(stream, (void *)report); + } else { + u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE; + u32 part = oa_buf_end - report; + + /* Zero out the entire report */ + if (report_size <= part) { + memset(report, 0, report_size); + } else { + memset(report, 0, part); + memset(oa_buf_base, 0, report_size - part); + } + } + } + + if (start_offset != *offset) { + struct xe_reg oaheadptr = __oa_regs(stream)->oa_head_ptr; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + xe_mmio_write32(stream->gt, oaheadptr, + (head + gtt_offset) & OAG_OAHEADPTR_MASK); + stream->oa_buffer.head = head; + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + } + + return ret; +} + static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); @@ -318,6 +423,78 @@ static void xe_oa_disable(struct xe_oa_stream *stream) } } +static int xe_oa_wait_unlocked(struct xe_oa_stream *stream) +{ + /* We might wait indefinitely if periodic sampling is not enabled */ + if (!stream->periodic) + return -EINVAL; + + return wait_event_interruptible(stream->poll_wq, + xe_oa_buffer_check_unlocked(stream)); +} + +#define OASTATUS_RELEVANT_BITS (OASTATUS_MMIO_TRG_Q_FULL | OASTATUS_COUNTER_OVERFLOW | \ + OASTATUS_BUFFER_OVERFLOW | OASTATUS_REPORT_LOST) + +static int __xe_oa_read(struct xe_oa_stream *stream, char __user *buf, + size_t count, size_t *offset) +{ + /* Only clear our bits to avoid side-effects */ + stream->oa_status = xe_mmio_rmw32(stream->gt, __oa_regs(stream)->oa_status, + OASTATUS_RELEVANT_BITS, 0); + /* + * Signal to userspace that there is non-zero OA status to read via + * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl + */ + if (stream->oa_status & OASTATUS_RELEVANT_BITS) + return -EIO; + + return xe_oa_append_reports(stream, buf, count, offset); +} + +static ssize_t xe_oa_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct xe_oa_stream *stream = file->private_data; + size_t offset = 0; + int ret; + + /* Can't read from disabled streams */ + if (!stream->enabled || !stream->sample) + return -EINVAL; + + if (!(file->f_flags & O_NONBLOCK)) { + do { + ret = xe_oa_wait_unlocked(stream); + if (ret) + return ret; + + mutex_lock(&stream->stream_lock); + ret = __xe_oa_read(stream, buf, count, &offset); + mutex_unlock(&stream->stream_lock); + } while (!offset && !ret); + } else { + mutex_lock(&stream->stream_lock); + ret = __xe_oa_read(stream, buf, count, &offset); + mutex_unlock(&stream->stream_lock); + } + + /* + * Typically we clear pollin here in order to wait for the new hrtimer callback + * before unblocking. The exception to this is if __xe_oa_read returns -ENOSPC, + * which means that more OA data is available than could fit in the user provided + * buffer. In this case we want the next poll() call to not block. + * + * Also in case of -EIO, we have already waited for data before returning + * -EIO, so need to wait again + */ + if (ret != -ENOSPC && ret != -EIO) + stream->pollin = false; + + /* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, -EINVAL, ... */ + return offset ?: (ret ?: -EAGAIN); +} + static __poll_t xe_oa_poll_locked(struct xe_oa_stream *stream, struct file *file, poll_table *wait) { @@ -680,6 +857,27 @@ static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) return ret; } +static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) +{ + struct drm_xe_oa_stream_status status = {}; + void __user *uaddr = (void __user *)arg; + + /* Map from register to uapi bits */ + if (stream->oa_status & OASTATUS_REPORT_LOST) + status.oa_status |= DRM_XE_OASTATUS_REPORT_LOST; + if (stream->oa_status & OASTATUS_BUFFER_OVERFLOW) + status.oa_status |= DRM_XE_OASTATUS_BUFFER_OVERFLOW; + if (stream->oa_status & OASTATUS_COUNTER_OVERFLOW) + status.oa_status |= DRM_XE_OASTATUS_COUNTER_OVERFLOW; + if (stream->oa_status & OASTATUS_MMIO_TRG_Q_FULL) + status.oa_status |= DRM_XE_OASTATUS_MMIO_TRG_Q_FULL; + + if (copy_to_user(uaddr, &status, sizeof(status))) + return -EFAULT; + + return 0; +} + static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, unsigned int cmd, unsigned long arg) @@ -693,6 +891,8 @@ static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, return 0; case DRM_XE_PERF_IOCTL_CONFIG: return xe_oa_config_locked(stream, arg); + case DRM_XE_PERF_IOCTL_STATUS: + return xe_oa_status_locked(stream, arg); } return -EINVAL; @@ -745,6 +945,7 @@ static const struct file_operations xe_oa_fops = { .llseek = no_llseek, .release = xe_oa_release, .poll = xe_oa_poll, + .read = xe_oa_read, .unlocked_ioctl = xe_oa_ioctl, }; diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 6700383b1a52..5bb8ce0d71c9 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -222,5 +222,8 @@ struct xe_oa_stream { /** @poll_period_ns: hrtimer period for checking OA buffer for available data */ u64 poll_period_ns; + + /** @oa_status: temporary storage for oa_status register value */ + u32 oa_status; }; #endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 1e09f786b3e6..03a6e479227a 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1570,6 +1570,26 @@ struct drm_xe_oa_config { __u64 regs_ptr; }; +/** + * struct drm_xe_oa_stream_status - OA stream status returned from + * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl. Userspace can call the ioctl to + * query stream status in response to EIO errno from perf fd read(). + */ +struct drm_xe_oa_stream_status { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_status: OA stream status (see Bspec 46717/61226) */ + __u64 oa_status; +#define DRM_XE_OASTATUS_MMIO_TRG_Q_FULL (1 << 3) +#define DRM_XE_OASTATUS_COUNTER_OVERFLOW (1 << 2) +#define DRM_XE_OASTATUS_BUFFER_OVERFLOW (1 << 1) +#define DRM_XE_OASTATUS_REPORT_LOST (1 << 0) + + /** @reserved: reserved for future use */ + __u64 reserved[3]; +}; + #if defined(__cplusplus) } #endif -- cgit From 2f4a730fcd2d6ae7378a67fe78797b0a3f7ca1b3 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:02 -0700 Subject: drm/xe/oa: Add OAR support Add OAR support to allow userspace to execute MI_REPORT_PERF_COUNT on render engines. Configuration batches are used to program the OAR unit, as well as modifying the render engine context image of a specified exec queue (to have correct register values when that context switches in). v2: Rename/refactor xe_oa_modify_self (Umesh) v3: Move IS_MI_LRI_CMD() into xe_oa.c (Michal) Acked-by: Rodrigo Vivi Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-11-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/instructions/xe_mi_commands.h | 1 + drivers/gpu/drm/xe/regs/xe_engine_regs.h | 1 + drivers/gpu/drm/xe/xe_lrc.c | 11 +- drivers/gpu/drm/xe/xe_lrc.h | 1 + drivers/gpu/drm/xe/xe_oa.c | 193 +++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa_types.h | 4 + 6 files changed, 206 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index b7bf99dd4848..10ec2920d31b 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -45,6 +45,7 @@ #define MI_LRI_MMIO_REMAP_EN REG_BIT(17) #define MI_LRI_NUM_REGS(x) XE_INSTR_NUM_DW(2 * (x) + 1) #define MI_LRI_FORCE_POSTED REG_BIT(12) +#define MI_LRI_LEN(x) (((x) & 0xff) + 1) #define MI_FLUSH_DW __MI_INSTR(0x26) #define MI_FLUSH_DW_STORE_INDEX REG_BIT(21) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 263ffc7bc2ef..cdc68d373165 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -129,6 +129,7 @@ #define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4) #define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED) +#define CTX_CTRL_OAC_CONTEXT_ENABLE REG_BIT(8) #define CTX_CTRL_INDIRECT_RING_STATE_ENABLE REG_BIT(4) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 43bc5e33a6c7..94ff62e1d95e 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -651,6 +651,7 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) /* Make the magic macros work */ #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset +#define __xe_lrc_regs_offset xe_lrc_regs_offset #define LRC_SEQNO_PPHWSP_OFFSET 512 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) @@ -658,6 +659,11 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) #define LRC_PARALLEL_PPHWSP_OFFSET 2048 #define LRC_PPHWSP_SIZE SZ_4K +u32 xe_lrc_regs_offset(struct xe_lrc *lrc) +{ + return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; +} + static size_t lrc_reg_size(struct xe_device *xe) { if (GRAPHICS_VERx100(xe) >= 1250) @@ -695,11 +701,6 @@ static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; } -static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc) -{ - return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; -} - static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc) { return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32); diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 001af6c79454..c24542e89318 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -52,6 +52,7 @@ static inline void xe_lrc_put(struct xe_lrc *lrc) size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class); u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc); +u32 xe_lrc_regs_offset(struct xe_lrc *lrc); void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail); u32 xe_lrc_ring_tail(struct xe_lrc *lrc); diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 86d56b080eff..d9285c976dbb 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -13,7 +13,9 @@ #include #include "instructions/xe_mi_commands.h" +#include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_lrc_layout.h" #include "regs/xe_oa_regs.h" #include "xe_assert.h" #include "xe_bb.h" @@ -24,6 +26,7 @@ #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_gt_printk.h" +#include "xe_lrc.h" #include "xe_macros.h" #include "xe_mmio.h" #include "xe_oa.h" @@ -58,6 +61,12 @@ struct xe_oa_config { struct rcu_head rcu; }; +struct flex { + struct xe_reg reg; + u32 offset; + u32 value; +}; + struct xe_oa_open_param { u32 oa_unit_id; bool sample; @@ -598,6 +607,93 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream) free_oa_config_bo(oa_bo); } +static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc, + struct xe_bb *bb, const struct flex *flex, u32 count) +{ + u32 offset = xe_bo_ggtt_addr(lrc->bo); + + do { + bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(22) /* GGTT */ | 2; + bb->cs[bb->len++] = offset + flex->offset * sizeof(u32); + bb->cs[bb->len++] = 0; + bb->cs[bb->len++] = flex->value; + + } while (flex++, --count); +} + +static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc, + const struct flex *flex, u32 count) +{ + struct xe_bb *bb; + int err; + + bb = xe_bb_new(stream->gt, 4 * count, false); + if (IS_ERR(bb)) { + err = PTR_ERR(bb); + goto exit; + } + + xe_oa_store_flex(stream, lrc, bb, flex, count); + + err = xe_oa_submit_bb(stream, bb); + xe_bb_free(bb, NULL); +exit: + return err; +} + +static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri) +{ + struct xe_bb *bb; + int err; + + bb = xe_bb_new(stream->gt, 3, false); + if (IS_ERR(bb)) { + err = PTR_ERR(bb); + goto exit; + } + + write_cs_mi_lri(bb, reg_lri, 1); + + err = xe_oa_submit_bb(stream, bb); + xe_bb_free(bb, NULL); +exit: + return err; +} + +static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) +{ + const struct xe_oa_format *format = stream->oa_buffer.format; + struct xe_lrc *lrc = stream->exec_q->lrc[0]; + u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); + u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | + (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); + + struct flex regs_context[] = { + { + OACTXCONTROL(stream->hwe->mmio_base), + stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, + enable ? OA_COUNTER_RESUME : 0, + }, + { + RING_CONTEXT_CONTROL(stream->hwe->mmio_base), + regs_offset + CTX_CONTEXT_CONTROL, + _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, + enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) + }, + }; + struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol }; + int err; + + /* Modify stream hwe context image with regs_context */ + err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], + regs_context, ARRAY_SIZE(regs_context)); + if (err) + return err; + + /* Apply reg_lri using LRI */ + return xe_oa_load_with_lri(stream, ®_lri); +} + #define HAS_OA_BPC_REPORTING(xe) (GRAPHICS_VERx100(xe) >= 1255) static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) @@ -615,6 +711,10 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) _MASKED_BIT_DISABLE(DISABLE_DOP_GATING)); } + /* disable the context save/restore or OAR counters */ + if (stream->exec_q) + xe_oa_configure_oar_context(stream, false); + /* Make sure we disable noa to save power. */ xe_mmio_rmw32(stream->gt, RPM_CONFIG1, GT_NOA_ENABLE, 0); @@ -743,6 +843,7 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream) static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) { u32 oa_debug, sqcnt1; + int ret; /* * Wa_1508761755:xehpsdv, dg2 @@ -780,6 +881,12 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, 0, sqcnt1); + if (stream->exec_q) { + ret = xe_oa_configure_oar_context(stream, true); + if (ret) + return ret; + } + return xe_oa_emit_oa_config(stream); } @@ -949,6 +1056,81 @@ static const struct file_operations xe_oa_fops = { .unlocked_ioctl = xe_oa_ioctl, }; +static bool engine_supports_mi_query(struct xe_hw_engine *hwe) +{ + return hwe->class == XE_ENGINE_CLASS_RENDER || + hwe->class == XE_ENGINE_CLASS_COMPUTE; +} + +static bool xe_oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end) +{ + u32 idx = *offset; + u32 len = min(MI_LRI_LEN(state[idx]) + idx, end); + bool found = false; + + idx++; + for (; idx < len; idx += 2) { + if (state[idx] == reg) { + found = true; + break; + } + } + + *offset = idx; + return found; +} + +#define IS_MI_LRI_CMD(x) (REG_FIELD_GET(MI_OPCODE, (x)) == \ + REG_FIELD_GET(MI_OPCODE, MI_LOAD_REGISTER_IMM)) + +static u32 xe_oa_context_image_offset(struct xe_oa_stream *stream, u32 reg) +{ + struct xe_lrc *lrc = stream->exec_q->lrc[0]; + u32 len = (xe_gt_lrc_size(stream->gt, stream->hwe->class) + + lrc->ring.size) / sizeof(u32); + u32 offset = xe_lrc_regs_offset(lrc) / sizeof(u32); + u32 *state = (u32 *)lrc->bo->vmap.vaddr; + + if (drm_WARN_ON(&stream->oa->xe->drm, !state)) + return U32_MAX; + + for (; offset < len; ) { + if (IS_MI_LRI_CMD(state[offset])) { + /* + * We expect reg-value pairs in MI_LRI command, so + * MI_LRI_LEN() should be even + */ + drm_WARN_ON(&stream->oa->xe->drm, + MI_LRI_LEN(state[offset]) & 0x1); + + if (xe_oa_find_reg_in_lri(state, reg, &offset, len)) + break; + } else { + offset++; + } + } + + return offset < len ? offset : U32_MAX; +} + +static int xe_oa_set_ctx_ctrl_offset(struct xe_oa_stream *stream) +{ + struct xe_reg reg = OACTXCONTROL(stream->hwe->mmio_base); + u32 offset = stream->oa->ctx_oactxctrl_offset[stream->hwe->class]; + + /* Do this only once. Failure is stored as offset of U32_MAX */ + if (offset) + goto exit; + + offset = xe_oa_context_image_offset(stream, reg.addr); + stream->oa->ctx_oactxctrl_offset[stream->hwe->class] = offset; + + drm_dbg(&stream->oa->xe->drm, "%s oa ctx control at 0x%08x dword offset\n", + stream->hwe->name, offset); +exit: + return offset && offset != U32_MAX ? 0 : -ENODEV; +} + static int xe_oa_stream_init(struct xe_oa_stream *stream, struct xe_oa_open_param *param) { @@ -966,6 +1148,17 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, stream->periodic = param->period_exponent > 0; stream->period_exponent = param->period_exponent; + if (stream->exec_q && engine_supports_mi_query(stream->hwe)) { + /* If we don't find the context offset, just return error */ + ret = xe_oa_set_ctx_ctrl_offset(stream); + if (ret) { + drm_err(&stream->oa->xe->drm, + "xe_oa_set_ctx_ctrl_offset failed for %s\n", + stream->hwe->name); + goto exit; + } + } + stream->oa_config = xe_oa_get_oa_config(stream->oa, param->metric_set); if (!stream->oa_config) { drm_dbg(&stream->oa->xe->drm, "Invalid OA config id=%i\n", param->metric_set); diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 5bb8ce0d71c9..d28ee566c51c 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -13,6 +13,7 @@ #include #include "regs/xe_reg_defs.h" +#include "xe_hw_engine_types.h" #define XE_OA_BUFFER_SIZE SZ_16M @@ -137,6 +138,9 @@ struct xe_oa { /** @metrics_idr: List of dynamic configurations (struct xe_oa_config) */ struct idr metrics_idr; + /** @ctx_oactxctrl_offset: offset of OACTXCONTROL register in context image */ + u32 ctx_oactxctrl_offset[XE_ENGINE_CLASS_MAX]; + /** @oa_formats: tracks all OA formats across platforms */ const struct xe_oa_format *oa_formats; -- cgit From 14e077f8006df9d2d7adf380f0c80e16d6a0a548 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:03 -0700 Subject: drm/xe/oa: Add OAC support Similar to OAR, allow userspace to execute MI_REPORT_PERF_COUNT on compute engines of a specified exec queue. Acked-by: Rodrigo Vivi Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-12-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 1 + drivers/gpu/drm/xe/regs/xe_oa_regs.h | 3 ++ drivers/gpu/drm/xe/xe_oa.c | 74 ++++++++++++++++++++++++++++++-- 3 files changed, 75 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index cdc68d373165..c38db2a74614 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -130,6 +130,7 @@ #define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED) #define CTX_CTRL_OAC_CONTEXT_ENABLE REG_BIT(8) +#define CTX_CTRL_RUN_ALONE REG_BIT(7) #define CTX_CTRL_INDIRECT_RING_STATE_ENABLE REG_BIT(4) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0) diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h index 99bad563d51d..2c9e1214e2af 100644 --- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -69,6 +69,9 @@ #define OASTATUS_COUNTER_OVERFLOW REG_BIT(2) #define OASTATUS_BUFFER_OVERFLOW REG_BIT(1) #define OASTATUS_REPORT_LOST REG_BIT(0) +/* OAC unit */ +#define OAC_OACONTROL XE_REG(0x15114) + /* OAM unit */ #define OAM_HEAD_POINTER_OFFSET (0x1a0) #define OAM_TAIL_POINTER_OFFSET (0x1a4) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index d9285c976dbb..42b0ba014e35 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -396,6 +396,19 @@ static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel REG_FIELD_PREP(OA_OACONTROL_COUNTER_SIZE_MASK, format->counter_size); } +static u32 __oa_ccs_select(struct xe_oa_stream *stream) +{ + u32 val; + + if (stream->hwe->class != XE_ENGINE_CLASS_COMPUTE) + return 0; + + val = REG_FIELD_PREP(OAG_OACONTROL_OA_CCS_SELECT_MASK, stream->hwe->instance); + xe_assert(stream->oa->xe, + REG_FIELD_GET(OAG_OACONTROL_OA_CCS_SELECT_MASK, val) == stream->hwe->instance); + return val; +} + static void xe_oa_enable(struct xe_oa_stream *stream) { const struct xe_oa_format *format = stream->oa_buffer.format; @@ -410,7 +423,7 @@ static void xe_oa_enable(struct xe_oa_stream *stream) regs = __oa_regs(stream); val = __format_to_oactrl(format, regs->oa_ctrl_counter_select_mask) | - OAG_OACONTROL_OA_COUNTER_ENABLE; + __oa_ccs_select(stream) | OAG_OACONTROL_OA_COUNTER_ENABLE; xe_mmio_write32(stream->gt, regs->oa_ctrl, val); } @@ -694,6 +707,57 @@ static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) return xe_oa_load_with_lri(stream, ®_lri); } +static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable) +{ + const struct xe_oa_format *format = stream->oa_buffer.format; + struct xe_lrc *lrc = stream->exec_q->lrc[0]; + u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); + u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | + (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); + struct flex regs_context[] = { + { + OACTXCONTROL(stream->hwe->mmio_base), + stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, + enable ? OA_COUNTER_RESUME : 0, + }, + { + RING_CONTEXT_CONTROL(stream->hwe->mmio_base), + regs_offset + CTX_CONTEXT_CONTROL, + _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, + enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) | + _MASKED_FIELD(CTX_CTRL_RUN_ALONE, + enable ? CTX_CTRL_RUN_ALONE : 0), + }, + }; + struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol }; + int err; + + /* Set ccs select to enable programming of OAC_OACONTROL */ + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, __oa_ccs_select(stream)); + + /* Modify stream hwe context image with regs_context */ + err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], + regs_context, ARRAY_SIZE(regs_context)); + if (err) + return err; + + /* Apply reg_lri using LRI */ + return xe_oa_load_with_lri(stream, ®_lri); +} + +static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable) +{ + switch (stream->hwe->class) { + case XE_ENGINE_CLASS_RENDER: + return xe_oa_configure_oar_context(stream, enable); + case XE_ENGINE_CLASS_COMPUTE: + return xe_oa_configure_oac_context(stream, enable); + default: + /* Video engines do not support MI_REPORT_PERF_COUNT */ + return 0; + } +} + #define HAS_OA_BPC_REPORTING(xe) (GRAPHICS_VERx100(xe) >= 1255) static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) @@ -713,7 +777,7 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) /* disable the context save/restore or OAR counters */ if (stream->exec_q) - xe_oa_configure_oar_context(stream, false); + xe_oa_configure_oa_context(stream, false); /* Make sure we disable noa to save power. */ xe_mmio_rmw32(stream->gt, RPM_CONFIG1, GT_NOA_ENABLE, 0); @@ -881,8 +945,9 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, 0, sqcnt1); + /* Configure OAR/OAC */ if (stream->exec_q) { - ret = xe_oa_configure_oar_context(stream, true); + ret = xe_oa_configure_oa_context(stream, true); if (ret) return ret; } @@ -1556,6 +1621,9 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f param.exec_q = xe_exec_queue_lookup(xef, param.exec_queue_id); if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) return -ENOENT; + + if (param.exec_q->width > 1) + drm_dbg(&oa->xe->drm, "exec_q->width > 1, programming only exec_q->lrc[0]\n"); } /* -- cgit From dd6b4718c3bab611588922ae8a7736c58eafcc93 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:04 -0700 Subject: drm/xe/oa/uapi: Query OA unit properties Implement query for properties of OA units present on a device. v2: Clean up reserved/pad fields (Umesh) Follow the same scheme as other query structs v3: Skip reporting reserved engines attached to OA units v4: Expose oa_buf_size via DRM_XE_PERF_IOCTL_INFO (Umesh) v5: Don't expose capabilities as OR of properties (Umesh) v6: Add extensions to query output structs: drm_xe_oa_unit, drm_xe_query_oa_units and drm_xe_oa_stream_info v7: Change oa_units[] array to __u64 type Acked-by: Rodrigo Vivi Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-13-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 13 +++++++ drivers/gpu/drm/xe/xe_query.c | 77 +++++++++++++++++++++++++++++++++++++++ include/uapi/drm/xe_drm.h | 85 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 175 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 42b0ba014e35..038caeb7c9e7 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1050,6 +1050,17 @@ static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) return 0; } +static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) +{ + struct drm_xe_oa_stream_info info = { .oa_buf_size = XE_OA_BUFFER_SIZE, }; + void __user *uaddr = (void __user *)arg; + + if (copy_to_user(uaddr, &info, sizeof(info))) + return -EFAULT; + + return 0; +} + static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, unsigned int cmd, unsigned long arg) @@ -1065,6 +1076,8 @@ static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, return xe_oa_config_locked(stream, arg); case DRM_XE_PERF_IOCTL_STATUS: return xe_oa_status_locked(stream, arg); + case DRM_XE_PERF_IOCTL_INFO: + return xe_oa_info_locked(stream, arg); } return -EINVAL; diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 995effcb904b..4e01df6b1b7a 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -602,6 +602,82 @@ query_uc_fw_version(struct xe_device *xe, struct drm_xe_device_query *query) return 0; } +static size_t calc_oa_unit_query_size(struct xe_device *xe) +{ + size_t size = sizeof(struct drm_xe_query_oa_units); + struct xe_gt *gt; + int i, id; + + for_each_gt(gt, xe, id) { + for (i = 0; i < gt->oa.num_oa_units; i++) { + size += sizeof(struct drm_xe_oa_unit); + size += gt->oa.oa_unit[i].num_engines * + sizeof(struct drm_xe_engine_class_instance); + } + } + + return size; +} + +static int query_oa_units(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + void __user *query_ptr = u64_to_user_ptr(query->data); + size_t size = calc_oa_unit_query_size(xe); + struct drm_xe_query_oa_units *qoa; + enum xe_hw_engine_id hwe_id; + struct drm_xe_oa_unit *du; + struct xe_hw_engine *hwe; + struct xe_oa_unit *u; + int gt_id, i, j, ret; + struct xe_gt *gt; + u8 *pdu; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + qoa = kzalloc(size, GFP_KERNEL); + if (!qoa) + return -ENOMEM; + + pdu = (u8 *)&qoa->oa_units[0]; + for_each_gt(gt, xe, gt_id) { + for (i = 0; i < gt->oa.num_oa_units; i++) { + u = >->oa.oa_unit[i]; + du = (struct drm_xe_oa_unit *)pdu; + + du->oa_unit_id = u->oa_unit_id; + du->oa_unit_type = u->type; + du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); + du->capabilities = DRM_XE_OA_CAPS_BASE; + + j = 0; + for_each_hw_engine(hwe, gt, hwe_id) { + if (!xe_hw_engine_is_reserved(hwe) && + xe_oa_unit_id(hwe) == u->oa_unit_id) { + du->eci[j].engine_class = + xe_to_user_engine_class[hwe->class]; + du->eci[j].engine_instance = hwe->logical_instance; + du->eci[j].gt_id = gt->info.id; + j++; + } + } + du->num_engines = j; + pdu += sizeof(*du) + j * sizeof(du->eci[0]); + qoa->num_oa_units++; + } + } + + ret = copy_to_user(query_ptr, qoa, size); + kfree(qoa); + + return ret ? -EFAULT : 0; +} + static int (* const xe_query_funcs[])(struct xe_device *xe, struct drm_xe_device_query *query) = { query_engines, @@ -612,6 +688,7 @@ static int (* const xe_query_funcs[])(struct xe_device *xe, query_gt_topology, query_engine_cycles, query_uc_fw_version, + query_oa_units, }; int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 03a6e479227a..93e00be44b2d 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -689,6 +689,7 @@ struct drm_xe_device_query { #define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5 #define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES 6 #define DRM_XE_DEVICE_QUERY_UC_FW_VERSION 7 +#define DRM_XE_DEVICE_QUERY_OA_UNITS 8 /** @query: The type of data to query */ __u32 query; @@ -1451,6 +1452,75 @@ enum drm_xe_oa_unit_type { DRM_XE_OA_UNIT_TYPE_OAM, }; +/** + * struct drm_xe_oa_unit - describe OA unit + */ +struct drm_xe_oa_unit { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_unit_id: OA unit ID */ + __u32 oa_unit_id; + + /** @oa_unit_type: OA unit type of @drm_xe_oa_unit_type */ + __u32 oa_unit_type; + + /** @capabilities: OA capabilities bit-mask */ + __u64 capabilities; +#define DRM_XE_OA_CAPS_BASE (1 << 0) + + /** @oa_timestamp_freq: OA timestamp freq */ + __u64 oa_timestamp_freq; + + /** @reserved: MBZ */ + __u64 reserved[4]; + + /** @num_engines: number of engines in @eci array */ + __u64 num_engines; + + /** @eci: engines attached to this OA unit */ + struct drm_xe_engine_class_instance eci[]; +}; + +/** + * struct drm_xe_query_oa_units - describe OA units + * + * If a query is made with a struct drm_xe_device_query where .query + * is equal to DRM_XE_DEVICE_QUERY_OA_UNITS, then the reply uses struct + * drm_xe_query_oa_units in .data. + * + * OA unit properties for all OA units can be accessed using a code block + * such as the one below: + * + * .. code-block:: C + * + * struct drm_xe_query_oa_units *qoa; + * struct drm_xe_oa_unit *oau; + * u8 *poau; + * + * // malloc qoa and issue DRM_XE_DEVICE_QUERY_OA_UNITS. Then: + * poau = (u8 *)&qoa->oa_units[0]; + * for (int i = 0; i < qoa->num_oa_units; i++) { + * oau = (struct drm_xe_oa_unit *)poau; + * // Access 'struct drm_xe_oa_unit' fields here + * poau += sizeof(*oau) + oau->num_engines * sizeof(oau->eci[0]); + * } + */ +struct drm_xe_query_oa_units { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + /** @num_oa_units: number of OA units returned in oau[] */ + __u32 num_oa_units; + /** @pad: MBZ */ + __u32 pad; + /** + * @oa_units: struct @drm_xe_oa_unit array returned for this device. + * Written below as a u64 array to avoid problems with nested flexible + * arrays with some compilers + */ + __u64 oa_units[]; +}; + /** * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec * 52198/60942 @@ -1590,6 +1660,21 @@ struct drm_xe_oa_stream_status { __u64 reserved[3]; }; +/** + * struct drm_xe_oa_stream_info - OA stream info returned from + * @DRM_XE_PERF_IOCTL_INFO perf fd ioctl + */ +struct drm_xe_oa_stream_info { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_buf_size: OA buffer size */ + __u64 oa_buf_size; + + /** @reserved: reserved for future use */ + __u64 reserved[3]; +}; + #if defined(__cplusplus) } #endif -- cgit From 392bf22238ff88506f410c464ba0c7a84e9de471 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:05 -0700 Subject: drm/xe/oa/uapi: OA buffer mmap Allow the OA buffer to be mmap'd to userspace. This is needed for the MMIO trigger use case. Even otherwise, with whitelisted OA head/tail ptr registers, userspace can receive/interpret OA data from the mmap'd buffer without issuing read()'s on the OA stream fd. v2: Remove unmap_mapping_range from xe_oa_release (Thomas H) Use vm_flags_mod (Umesh) Acked-by: Rodrigo Vivi Suggested-by: Umesh Nerlige Ramappa Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-14-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 038caeb7c9e7..00654213cd93 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -824,6 +824,8 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream) return PTR_ERR(bo); stream->oa_buffer.bo = bo; + /* mmap implementation requires OA buffer to be in system memory */ + xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0); stream->oa_buffer.vaddr = bo->vmap.vaddr; return 0; } @@ -1125,6 +1127,49 @@ static int xe_oa_release(struct inode *inode, struct file *file) return 0; } +static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct xe_oa_stream *stream = file->private_data; + struct xe_bo *bo = stream->oa_buffer.bo; + unsigned long start = vma->vm_start; + int i, ret; + + if (xe_perf_stream_paranoid && !perfmon_capable()) { + drm_dbg(&stream->oa->xe->drm, "Insufficient privilege to map OA buffer\n"); + return -EACCES; + } + + /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ + if (vma->vm_end - vma->vm_start != XE_OA_BUFFER_SIZE) { + drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); + return -EINVAL; + } + + /* + * Only support VM_READ, enforce MAP_PRIVATE by checking for + * VM_MAYSHARE, don't copy the vma on fork + */ + if (vma->vm_flags & (VM_WRITE | VM_EXEC | VM_SHARED | VM_MAYSHARE)) { + drm_dbg(&stream->oa->xe->drm, "mmap must be read only\n"); + return -EINVAL; + } + vm_flags_mod(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY, + VM_MAYWRITE | VM_MAYEXEC); + + xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == + (vma->vm_end - vma->vm_start) >> PAGE_SHIFT); + for (i = 0; i < bo->ttm.ttm->num_pages; i++) { + ret = remap_pfn_range(vma, start, page_to_pfn(bo->ttm.ttm->pages[i]), + PAGE_SIZE, vma->vm_page_prot); + if (ret) + break; + + start += PAGE_SIZE; + } + + return ret; +} + static const struct file_operations xe_oa_fops = { .owner = THIS_MODULE, .llseek = no_llseek, @@ -1132,6 +1177,7 @@ static const struct file_operations xe_oa_fops = { .poll = xe_oa_poll, .read = xe_oa_read, .unlocked_ioctl = xe_oa_ioctl, + .mmap = xe_oa_mmap, }; static bool engine_supports_mi_query(struct xe_hw_engine *hwe) -- cgit From 828a8eaf37c3fac6ba048995f55f1647a4ac542d Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:06 -0700 Subject: drm/xe/oa: Add MMIO trigger support Add MMIO trigger support and allow-list required registers for MMIO trigger use case. Registers are whitelisted for the lifetime of the driver but MMIO trigger is enabled only for the duration of the stream. Bspec: 45925, 60340, 61228 Acked-by: Rodrigo Vivi Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-15-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/regs/xe_oa_regs.h | 5 +++++ drivers/gpu/drm/xe/xe_oa.c | 24 +++++++++++++++++++++++- drivers/gpu/drm/xe/xe_reg_whitelist.c | 24 +++++++++++++++++++++++- 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h index 2c9e1214e2af..1189f5a540a8 100644 --- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -60,6 +60,10 @@ #define OA_OACONTROL_COUNTER_SIZE_MASK REG_GENMASK(8, 8) #define OAG_OA_DEBUG XE_REG(0xdaf8, XE_REG_OPTION_MASKED) +#define OAG_OA_DEBUG_DISABLE_MMIO_TRG REG_BIT(14) +#define OAG_OA_DEBUG_START_TRIGGER_SCOPE_CONTROL REG_BIT(13) +#define OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL REG_BIT(8) +#define OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL REG_BIT(7) #define OAG_OA_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6) #define OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS REG_BIT(5) #define OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS REG_BIT(1) @@ -69,6 +73,7 @@ #define OASTATUS_COUNTER_OVERFLOW REG_BIT(2) #define OASTATUS_BUFFER_OVERFLOW REG_BIT(1) #define OASTATUS_REPORT_LOST REG_BIT(0) +#define OAG_MMIOTRIGGER XE_REG(0xdb1c) /* OAC unit */ #define OAC_OACONTROL XE_REG(0x15114) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 00654213cd93..beafe409dba9 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -760,6 +760,13 @@ static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable) #define HAS_OA_BPC_REPORTING(xe) (GRAPHICS_VERx100(xe) >= 1255) +static u32 oag_configure_mmio_trigger(const struct xe_oa_stream *stream, bool enable) +{ + return _MASKED_FIELD(OAG_OA_DEBUG_DISABLE_MMIO_TRG, + enable && stream && stream->sample ? + 0 : OAG_OA_DEBUG_DISABLE_MMIO_TRG); +} + static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) { u32 sqcnt1; @@ -775,6 +782,9 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) _MASKED_BIT_DISABLE(DISABLE_DOP_GATING)); } + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug, + oag_configure_mmio_trigger(stream, false)); + /* disable the context save/restore or OAR counters */ if (stream->exec_q) xe_oa_configure_oa_context(stream, false); @@ -927,9 +937,17 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) oa_debug = OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | OAG_OA_DEBUG_INCLUDE_CLK_RATIO; + if (GRAPHICS_VER(stream->oa->xe) >= 20) + oa_debug |= + /* The three bits below are needed to get PEC counters running */ + OAG_OA_DEBUG_START_TRIGGER_SCOPE_CONTROL | + OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL | + OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL; + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug, _MASKED_BIT_ENABLE(oa_debug) | - oag_report_ctx_switches(stream)); + oag_report_ctx_switches(stream) | + oag_configure_mmio_trigger(stream, true)); xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ? (OAG_OAGLBCTXCTRL_COUNTER_RESUME | @@ -2203,6 +2221,10 @@ static void __xe_oa_init_oa_units(struct xe_gt *gt) u->type = DRM_XE_OA_UNIT_TYPE_OAM; } + /* Ensure MMIO trigger remains disabled till there is a stream */ + xe_mmio_write32(gt, u->regs.oa_debug, + oag_configure_mmio_trigger(NULL, false)); + /* Set oa_unit_ids now to ensure ids remain contiguous */ u->oa_unit_id = gt_to_xe(gt)->oa.oa_unit_ids++; } diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 3fa2ece7d228..3996934974fa 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -7,6 +7,7 @@ #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_oa_regs.h" #include "regs/xe_regs.h" #include "xe_gt_types.h" #include "xe_platform_types.h" @@ -63,7 +64,28 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(WHITELIST(CSBE_DEBUG_STATUS(RENDER_RING_BASE), 0)) }, - + { XE_RTP_NAME("oa_reg_render"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(WHITELIST(OAG_MMIOTRIGGER, + RING_FORCE_TO_NONPRIV_ACCESS_RW), + WHITELIST(OAG_OASTATUS, + RING_FORCE_TO_NONPRIV_ACCESS_RD), + WHITELIST(OAG_OAHEADPTR, + RING_FORCE_TO_NONPRIV_ACCESS_RD | + RING_FORCE_TO_NONPRIV_RANGE_4)) + }, + { XE_RTP_NAME("oa_reg_compute"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED), + ENGINE_CLASS(COMPUTE)), + XE_RTP_ACTIONS(WHITELIST(OAG_MMIOTRIGGER, + RING_FORCE_TO_NONPRIV_ACCESS_RW), + WHITELIST(OAG_OASTATUS, + RING_FORCE_TO_NONPRIV_ACCESS_RD), + WHITELIST(OAG_OAHEADPTR, + RING_FORCE_TO_NONPRIV_ACCESS_RD | + RING_FORCE_TO_NONPRIV_RANGE_4)) + }, {} }; -- cgit From 70af432b9acfb382dcd4f5f936528db2de992a8e Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:07 -0700 Subject: drm/xe/oa: Override GuC RC with OA on PVC On PVC, a w/a resets RCS/CCS before it goes into RC6. This breaks OA since OA does not expect engine resets during its use. Fix it by disabling RC6. v2: Convert to gt oriented error/warn messages (Michal) Acked-by: Rodrigo Vivi Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-16-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_guc_pc.c | 57 ++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_guc_pc.h | 3 +++ drivers/gpu/drm/xe/xe_oa.c | 24 +++++++++++++++++ drivers/gpu/drm/xe/xe_oa_types.h | 3 +++ 4 files changed, 87 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 666a37106bc5..2b45a9cd3ec0 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -24,6 +24,7 @@ #include "xe_map.h" #include "xe_mmio.h" #include "xe_pcode.h" +#include "xe_pm.h" #define MCHBAR_MIRROR_BASE_SNB 0x140000 @@ -190,6 +191,27 @@ static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value) return ret; } +static int pc_action_unset_param(struct xe_guc_pc *pc, u8 id) +{ + u32 action[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1), + id, + }; + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; + int ret; + + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) + return -EAGAIN; + + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); + if (ret) + xe_gt_err(pc_to_gt(pc), "GuC PC unset param failed: %pe", + ERR_PTR(ret)); + + return ret; +} + static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode) { struct xe_guc_ct *ct = pc_to_ct(pc); @@ -772,6 +794,41 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) return 0; } +/** + * xe_guc_pc_override_gucrc_mode - override GUCRC mode + * @pc: Xe_GuC_PC instance + * @mode: new value of the mode. + * + * Return: 0 on success, negative error code on error + */ +int xe_guc_pc_override_gucrc_mode(struct xe_guc_pc *pc, enum slpc_gucrc_mode mode) +{ + int ret; + + xe_pm_runtime_get(pc_to_xe(pc)); + ret = pc_action_set_param(pc, SLPC_PARAM_PWRGATE_RC_MODE, mode); + xe_pm_runtime_put(pc_to_xe(pc)); + + return ret; +} + +/** + * xe_guc_pc_unset_gucrc_mode - unset GUCRC mode override + * @pc: Xe_GuC_PC instance + * + * Return: 0 on success, negative error code on error + */ +int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc) +{ + int ret; + + xe_pm_runtime_get(pc_to_xe(pc)); + ret = pc_action_unset_param(pc, SLPC_PARAM_PWRGATE_RC_MODE); + xe_pm_runtime_put(pc_to_xe(pc)); + + return ret; +} + static void pc_init_pcode_freq(struct xe_guc_pc *pc) { u32 min = DIV_ROUND_CLOSEST(pc->rpn_freq, GT_FREQUENCY_MULTIPLIER); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 532cac985a6d..8a7b91ce1b3e 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -9,11 +9,14 @@ #include struct xe_guc_pc; +enum slpc_gucrc_mode; int xe_guc_pc_init(struct xe_guc_pc *pc); int xe_guc_pc_start(struct xe_guc_pc *pc); int xe_guc_pc_stop(struct xe_guc_pc *pc); int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc); +int xe_guc_pc_override_gucrc_mode(struct xe_guc_pc *pc, enum slpc_gucrc_mode mode); +int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc); u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc); int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq); diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index beafe409dba9..be6502066e53 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -12,6 +12,7 @@ #include #include +#include "abi/guc_actions_slpc_abi.h" #include "instructions/xe_mi_commands.h" #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" @@ -26,6 +27,7 @@ #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_gt_printk.h" +#include "xe_guc_pc.h" #include "xe_lrc.h" #include "xe_macros.h" #include "xe_mmio.h" @@ -817,6 +819,10 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); xe_pm_runtime_put(stream->oa->xe); + /* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */ + if (stream->override_gucrc) + xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); + xe_oa_free_configs(stream); } @@ -1308,6 +1314,21 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, goto exit; } + /* + * Wa_1509372804:pvc + * + * GuC reset of engines causes OA to lose configuration + * state. Prevent this by overriding GUCRC mode. + */ + if (stream->oa->xe->info.platform == XE_PVC) { + ret = xe_guc_pc_override_gucrc_mode(>->uc.guc.pc, + SLPC_GUCRC_MODE_GUCRC_NO_RC6); + if (ret) + goto err_free_configs; + + stream->override_gucrc = true; + } + /* Take runtime pm ref and forcewake to disable RC6 */ xe_pm_runtime_get(stream->oa->xe); XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); @@ -1354,6 +1375,9 @@ err_free_oa_buf: err_fw_put: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); xe_pm_runtime_put(stream->oa->xe); + if (stream->override_gucrc) + xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); +err_free_configs: xe_oa_free_configs(stream); exit: return ret; diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index d28ee566c51c..0981f0e57676 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -227,6 +227,9 @@ struct xe_oa_stream { /** @poll_period_ns: hrtimer period for checking OA buffer for available data */ u64 poll_period_ns; + /** @override_gucrc: GuC RC has been overridden for the OA stream */ + bool override_gucrc; + /** @oa_status: temporary storage for oa_status register value */ u32 oa_status; }; -- cgit From 3a1fc394ba85e851084d9fa1bdd4ecd625ef913b Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:08 -0700 Subject: drm/xe/oa: Changes to OA_TAKEN Rename OA_TAKEN to xe_oa_circ_diff, since xe_oa_circ_diff better describes what the macro actually does. Also convert to function and add xe_oa_stream arg. These will be used in the following patch. Acked-by: Rodrigo Vivi Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-17-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index be6502066e53..2d398b7231c1 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -36,7 +36,6 @@ #include "xe_pm.h" #include "xe_sched_job.h" -#define OA_TAKEN(tail, head) (((tail) - (head)) & (XE_OA_BUFFER_SIZE - 1)) #define DEFAULT_POLL_FREQUENCY_HZ 200 #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) #define XE_OA_UNIT_INVALID U32_MAX @@ -113,6 +112,11 @@ static const struct xe_oa_format oa_formats[] = { [XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, }; +static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head) +{ + return (tail - head) & (XE_OA_BUFFER_SIZE - 1); +} + static void xe_oa_config_release(struct kref *ref) { struct xe_oa_config *oa_config = @@ -217,11 +221,11 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) * increments. Also report size may not be a power of 2. Compute potential * partially landed report in OA buffer. */ - partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail); + partial_report_size = xe_oa_circ_diff(stream, hw_tail, stream->oa_buffer.tail); partial_report_size %= report_size; /* Subtract partial amount off the tail */ - hw_tail = OA_TAKEN(hw_tail, partial_report_size); + hw_tail = xe_oa_circ_diff(stream, hw_tail, partial_report_size); tail = hw_tail; @@ -233,24 +237,24 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) * This is assuming that the writes of the OA unit land in memory in the order * they were written. If not : (╯°□°)╯︵ ┻━┻ */ - while (OA_TAKEN(tail, stream->oa_buffer.tail) >= report_size) { + while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) { void *report = stream->oa_buffer.vaddr + tail; if (oa_report_id(stream, report) || oa_timestamp(stream, report)) break; - tail = OA_TAKEN(tail, report_size); + tail = xe_oa_circ_diff(stream, tail, report_size); } - if (OA_TAKEN(hw_tail, tail) > report_size) + if (xe_oa_circ_diff(stream, hw_tail, tail) > report_size) drm_dbg(&stream->oa->xe->drm, "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n", stream->oa_buffer.head, tail, hw_tail); stream->oa_buffer.tail = tail; - pollin = OA_TAKEN(stream->oa_buffer.tail, - stream->oa_buffer.head) >= report_size; + pollin = xe_oa_circ_diff(stream, stream->oa_buffer.tail, + stream->oa_buffer.head) >= report_size; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -323,7 +327,7 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE); - for (; OA_TAKEN(tail, head); head = (head + report_size) & mask) { + for (; xe_oa_circ_diff(stream, tail, head); head = (head + report_size) & mask) { u8 *report = oa_buf_base + head; ret = xe_oa_append_report(stream, buf, count, offset, report); -- cgit From cffd77865f476994680892601e09bc2164179907 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:09 -0700 Subject: drm/xe/oa: Enable Xe2+ overrun mode Enable Xe2+ overrun mode. For Xe2+, when overrun mode is enabled, there are no partial reports at the end of buffer, making the OA buffer effectively a non-power-of-2 size circular buffer whose size, circ_size, is a multiple of the report size. v2: Fix implementation of xe_oa_circ_diff/xe_oa_circ_incr (Umesh) Acked-by: Rodrigo Vivi Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-18-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 35 +++++++++++++++++++++++++++-------- drivers/gpu/drm/xe/xe_oa_types.h | 3 +++ 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 2d398b7231c1..34206e0b6a08 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -114,7 +114,14 @@ static const struct xe_oa_format oa_formats[] = { static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head) { - return (tail - head) & (XE_OA_BUFFER_SIZE - 1); + return tail >= head ? tail - head : + tail + stream->oa_buffer.circ_size - head; +} + +static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n) +{ + return ptr + n >= stream->oa_buffer.circ_size ? + ptr + n - stream->oa_buffer.circ_size : ptr + n; } static void xe_oa_config_release(struct kref *ref) @@ -288,7 +295,7 @@ static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf, buf += *offset; - oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE; + oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; report_size_partial = oa_buf_end - report; if (report_size_partial < report_size) { @@ -314,7 +321,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, int report_size = stream->oa_buffer.format->size; u8 *oa_buf_base = stream->oa_buffer.vaddr; u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); - u32 mask = (XE_OA_BUFFER_SIZE - 1); size_t start_offset = *offset; unsigned long flags; u32 head, tail; @@ -325,21 +331,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, tail = stream->oa_buffer.tail; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE); + xe_assert(stream->oa->xe, + head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size); - for (; xe_oa_circ_diff(stream, tail, head); head = (head + report_size) & mask) { + for (; xe_oa_circ_diff(stream, tail, head); + head = xe_oa_circ_incr(stream, head, report_size)) { u8 *report = oa_buf_base + head; ret = xe_oa_append_report(stream, buf, count, offset, report); if (ret) break; - if (is_power_of_2(report_size)) { + if (!(stream->oa_buffer.circ_size % report_size)) { /* Clear out report id and timestamp to detect unlanded reports */ oa_report_id_clear(stream, (void *)report); oa_timestamp_clear(stream, (void *)report); } else { - u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE; + u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; u32 part = oa_buf_end - report; /* Zero out the entire report */ @@ -377,7 +385,6 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr, gtt_offset & OAG_OAHEADPTR_MASK); stream->oa_buffer.head = 0; - /* * PRM says: "This MMIO must be set before the OATAILPTR register and after the * OAHEADPTR register. This is to enable proper functionality of the overflow bit". @@ -1300,6 +1307,18 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, stream->periodic = param->period_exponent > 0; stream->period_exponent = param->period_exponent; + /* + * For Xe2+, when overrun mode is enabled, there are no partial reports at the end + * of buffer, making the OA buffer effectively a non-power-of-2 size circular + * buffer whose size, circ_size, is a multiple of the report size + */ + if (GRAPHICS_VER(stream->oa->xe) >= 20 && + stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) + stream->oa_buffer.circ_size = + XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size; + else + stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE; + if (stream->exec_q && engine_supports_mi_query(stream->hwe)) { /* If we don't find the context offset, just return error */ ret = xe_oa_set_ctx_ctrl_offset(stream); diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 0981f0e57676..706d45577dae 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -170,6 +170,9 @@ struct xe_oa_buffer { /** @tail: The last verified cached tail where HW has completed writing */ u32 tail; + + /** @circ_size: The effective circular buffer size, for Xe2+ */ + u32 circ_size; }; /** -- cgit From 8e7455dd0dedf88332f249f8b1e50bc554e4c1e3 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 18 Jun 2024 12:49:47 +0200 Subject: drm/xe: Use ttm_uncached for BO with NEEDS_UC flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We should honor requested uncached mode also at the TTM layer. Otherwise, we risk losing updates to the memory based interrupts source or status vectors, as those require uncached memory. Signed-off-by: Michal Wajdeczko Cc: Thomas Hellström Cc: Matt Roper Acked-by: Thomas Hellström Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240618104947.729-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_bo.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 74294f1b05bc..65c696966e96 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -378,6 +378,15 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_FLAG_PAGETABLE)) caching = ttm_write_combined; + if (bo->flags & XE_BO_FLAG_NEEDS_UC) { + /* + * Valid only for internally-created buffers only, for + * which cpu_caching is never initialized. + */ + xe_assert(xe, bo->cpu_caching == 0); + caching = ttm_uncached; + } + err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages); if (err) { kfree(tt); -- cgit From 7a893345a406b46b6a0f9575ce58e513cf79b997 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 19 Jun 2024 18:34:13 +0200 Subject: drm/xe/guc: Move ARAT interrupts enabling to the upload step Even though ARAT interrupts are enabled by default, we still want to keep the code that enables them. But instead doing that in the CTB enabling step, move this code to the upload step, where we already setup few other registers related to GuC. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240619163413.817-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 7ecb509c87d7..02c028d0e91b 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -476,6 +476,9 @@ static void guc_prepare_xfer(struct xe_guc *guc) xe_mmio_write32(gt, GUC_SHIM_CONTROL, shim_flags); xe_mmio_write32(gt, GT_PM_CONFIG, GT_DOORBELL_ENABLE); + + /* Make sure GuC receives ARAT interrupts */ + xe_mmio_rmw32(gt, PMINTRMSK, ARAT_EXPIRED_INTRMSK, 0); } /* @@ -865,9 +868,6 @@ int xe_guc_enable_communication(struct xe_guc *guc) guc_enable_irq(guc); } - xe_mmio_rmw32(guc_to_gt(guc), PMINTRMSK, - ARAT_EXPIRED_INTRMSK, 0); - err = xe_guc_ct_enable(&guc->ct); if (err) return err; -- cgit From cb925d31f091467dee09dd49eab8bcd2c9b3e73e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 19 Jun 2024 19:54:27 +0200 Subject: drm/xe/oa: Fix potential NPD when OA is not initialized If oa->xe can be NULL then we shall not use it as a valid pointer. Fixes: cdf02fe1a94a ("drm/xe/oa/uapi: Add/remove OA config perf ops") Fixes: b6fd51c62119 ("drm/xe/oa/uapi: Define and parse OA stream properties") Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Cc: Ashutosh Dixit Cc: Umesh Nerlige Ramappa Reviewed-by: Rodrigo Vivi Reviewed-by: Ashutosh Dixit Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240619175427.861-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 34206e0b6a08..2277af816a34 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1725,7 +1725,8 @@ static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number */ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) { - struct xe_oa *oa = &to_xe_device(dev)->oa; + struct xe_device *xe = to_xe_device(dev); + struct xe_oa *oa = &xe->oa; struct xe_file *xef = to_xe_file(file); struct xe_oa_open_param param = {}; const struct xe_oa_format *f; @@ -1733,7 +1734,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f int ret; if (!oa->xe) { - drm_dbg(&oa->xe->drm, "xe oa interface not available for this system\n"); + drm_dbg(&xe->drm, "xe oa interface not available for this system\n"); return -ENODEV; } @@ -2005,7 +2006,8 @@ static int create_dynamic_oa_sysfs_entry(struct xe_oa *oa, */ int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) { - struct xe_oa *oa = &to_xe_device(dev)->oa; + struct xe_device *xe = to_xe_device(dev); + struct xe_oa *oa = &xe->oa; struct drm_xe_oa_config param; struct drm_xe_oa_config *arg = ¶m; struct xe_oa_config *oa_config, *tmp; @@ -2013,7 +2015,7 @@ int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *fi int err, id; if (!oa->xe) { - drm_dbg(&oa->xe->drm, "xe oa interface not available for this system\n"); + drm_dbg(&xe->drm, "xe oa interface not available for this system\n"); return -ENODEV; } @@ -2106,13 +2108,14 @@ reg_err: */ int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) { - struct xe_oa *oa = &to_xe_device(dev)->oa; + struct xe_device *xe = to_xe_device(dev); + struct xe_oa *oa = &xe->oa; struct xe_oa_config *oa_config; u64 arg, *ptr = u64_to_user_ptr(data); int ret; if (!oa->xe) { - drm_dbg(&oa->xe->drm, "xe oa interface not available for this system\n"); + drm_dbg(&xe->drm, "xe oa interface not available for this system\n"); return -ENODEV; } -- cgit From 3516b2913ead50c5649cafcd1fe97b9c9dc41e59 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Wed, 19 Jun 2024 12:28:54 -0700 Subject: drm/xe/oa: Call xe_oa_emit_oa_config() with new config when updating config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When UMD ask config to be updated, xe_oa_config_locked() was calling xe_oa_emit_oa_config() that would use stream->oa_config but that is only changed to the next oa_config after xe_oa_emit_oa_config() finish. So it was setting the same config for all DRM_XE_PERF_IOCTL_CONFIG calls. Cc: Ashutosh Dixit Signed-off-by: José Roberto de Souza Reviewed-by: Ashutosh Dixit Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240619192854.199289-1-jose.souza@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 2277af816a34..36b2e89b78b6 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -887,9 +887,9 @@ err_free: return ERR_CAST(bb); } -static struct xe_oa_config_bo *xe_oa_alloc_config_buffer(struct xe_oa_stream *stream) +static struct xe_oa_config_bo * +xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_config) { - struct xe_oa_config *oa_config = stream->oa_config; struct xe_oa_config_bo *oa_bo; /* Look for the buffer in the already allocated BOs attached to the stream */ @@ -905,13 +905,13 @@ out: return oa_bo; } -static int xe_oa_emit_oa_config(struct xe_oa_stream *stream) +static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config *config) { #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500 struct xe_oa_config_bo *oa_bo; int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US; - oa_bo = xe_oa_alloc_config_buffer(stream); + oa_bo = xe_oa_alloc_config_buffer(stream, config); if (IS_ERR(oa_bo)) { err = PTR_ERR(oa_bo); goto exit; @@ -989,7 +989,7 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) return ret; } - return xe_oa_emit_oa_config(stream); + return xe_oa_emit_oa_config(stream, stream->oa_config); } static void xe_oa_stream_enable(struct xe_oa_stream *stream) @@ -1054,7 +1054,7 @@ static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) return -ENODEV; if (config != stream->oa_config) { - err = xe_oa_emit_oa_config(stream); + err = xe_oa_emit_oa_config(stream, config); if (!err) config = xchg(&stream->oa_config, config); else -- cgit From 93d2d3e4c5d075ed691bf940f7eaf938cf8c6632 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 19 Jun 2024 15:56:17 -0700 Subject: drm/xe/oa: Remove WARN_ON's for unsupported configurations The OA ioctl's already have drm_dbg's which are sufficient to tell the user that OA is not supported on unsupported configurations (execlist mode and platform gen < 12). Having additional WARN_ON's for these during driver probe create unnecessary noise. Just remove these WARN_ON's. Suggested-by: Michal Wajdeczko Signed-off-by: Ashutosh Dixit Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240619225617.3465899-1-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 36b2e89b78b6..eaa5fe5fd75b 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -2385,7 +2385,7 @@ int xe_oa_init(struct xe_device *xe) int ret; /* Support OA only with GuC submission and Gen12+ */ - if (XE_WARN_ON(!xe_device_uc_enabled(xe)) || XE_WARN_ON(GRAPHICS_VER(xe) < 12)) + if (!xe_device_uc_enabled(xe) || GRAPHICS_VER(xe) < 12) return 0; oa->xe = xe; -- cgit From d35386b3a77bb26d771c7d7c59dab8a920ab62ee Mon Sep 17 00:00:00 2001 From: Sai Teja Pottumuttu Date: Wed, 19 Jun 2024 12:26:14 +0530 Subject: drm/xe/xelpgp: Extend Wa_14019877138 to graphics 12.74 Wa_14019877138 is also needed for xe_lpgp graphics 12.74 Signed-off-by: Sai Teja Pottumuttu Reviewed-by: Shekhar Chauhan Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240619065614.131151-1-sai.teja.pottumuttu@intel.com --- drivers/gpu/drm/xe/xe_wa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 0b6fbbebc41e..21b554627792 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -629,7 +629,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE)) }, { XE_RTP_NAME("14019877138"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) }, -- cgit From 65336c3fa2cf7f272067be9193303d1ab7c42190 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 20 Jun 2024 12:01:47 +0200 Subject: drm/xe/vf: Disable features that do not apply to VFs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already maintain several flags that control the availability of features on a given device. Disable features, like PCODE or GuC PC or GSC, that do not apply to a VF device. Signed-off-by: Michal Wajdeczko Cc: Piotr Piórkowski Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240620100147.949-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_device.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 31b549f5f03a..e25c37ac7d14 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -486,6 +486,17 @@ static int wait_for_lmem_ready(struct xe_device *xe) return 0; } +static void update_device_info(struct xe_device *xe) +{ + /* disable features that are not available/applicable to VFs */ + if (IS_SRIOV_VF(xe)) { + xe->info.enable_display = 0; + xe->info.has_heci_gscfi = 0; + xe->info.skip_guc_pc = 1; + xe->info.skip_pcode = 1; + } +} + /** * xe_device_probe_early: Device early probe * @xe: xe device instance @@ -506,6 +517,8 @@ int xe_device_probe_early(struct xe_device *xe) xe_sriov_probe_early(xe); + update_device_info(xe); + err = xe_pcode_probe_early(xe); if (err) return err; -- cgit From 9632dfb0def48b0b6fa343fef166e0cf2ac10a95 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 19 Jun 2024 23:45:50 +0200 Subject: drm/xe/vf: Don't run any save-restore RTP actions if VF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are no RTP save-restore actions applicable for VFs on current platforms. If any future platform will require some, we will need to update the RTP framework to support VF_READY or VF_ONLY actions. In the meantime, just skip all actions if we are running as VF driver. Signed-off-by: Michal Wajdeczko Cc: Lucas De Marchi Cc: Matt Roper Reviewed-by: Piotr Piórkowski Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240619214557.905-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_rtp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index ac31cba1dbea..5b27f7c45ea3 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -13,6 +13,7 @@ #include "xe_gt_topology.h" #include "xe_macros.h" #include "xe_reg_sr.h" +#include "xe_sriov.h" /** * DOC: Register Table Processing @@ -257,6 +258,9 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, rtp_get_context(ctx, &hwe, >, &xe); + if (IS_SRIOV_VF(xe)) + return; + for (entry = entries; entry && entry->name; entry++) { bool match = false; -- cgit From f20535ce1dd96003e76c958ffaa9c345483d2dfd Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 19 Jun 2024 23:45:51 +0200 Subject: drm/xe/vf: Don't apply tile workarounds if VF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VF drivers can't apply any workarounds as they don't have access to related registers. Since xe_wa_apply_tile_workarounds() function is not using RTP yet, we have to add early return. Signed-off-by: Michal Wajdeczko Cc: Matt Roper Reviewed-by: Piotr Piórkowski Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240619214557.905-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_wa.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 21b554627792..c7bf0862b231 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -21,6 +21,7 @@ #include "xe_mmio.h" #include "xe_platform_types.h" #include "xe_rtp.h" +#include "xe_sriov.h" #include "xe_step.h" /** @@ -865,6 +866,9 @@ void xe_wa_apply_tile_workarounds(struct xe_tile *tile) { struct xe_gt *mmio = tile->primary_gt; + if (IS_SRIOV_VF(tile->xe)) + return; + if (XE_WA(mmio, 22010954014)) xe_mmio_rmw32(mmio, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS); } -- cgit From f2800572cc0b01f329fac69646234c64b1ca9576 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 19 Jun 2024 23:45:52 +0200 Subject: drm/xe/vf: Don't change hwe IRQ masks if using memory IRQs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We currently do not support changing the engine interrupt enable mask on the per-engine basis when using memory based interrupts. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240619214557.905-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_irq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 8ee3c300c5e4..ab3d5b7a1e8c 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -134,6 +134,9 @@ void xe_irq_enable_hwe(struct xe_gt *gt) u32 gsc_mask = 0; u32 heci_mask = 0; + if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe)) + return; + if (xe_device_uc_enabled(xe)) { irqs = GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; -- cgit From 5aa326f52872b25906d7dca8e0c4f7e6c597f40f Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 19 Jun 2024 23:45:53 +0200 Subject: drm/xe/vf: Don't initialize OA if VF We don't support Observation Architecture on the VF device. Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Cc: Ashutosh Dixit Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240619214557.905-6-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index eaa5fe5fd75b..4168b51cf7b5 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -35,6 +35,7 @@ #include "xe_perf.h" #include "xe_pm.h" #include "xe_sched_job.h" +#include "xe_sriov.h" #define DEFAULT_POLL_FREQUENCY_HZ 200 #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) @@ -2388,6 +2389,9 @@ int xe_oa_init(struct xe_device *xe) if (!xe_device_uc_enabled(xe) || GRAPHICS_VER(xe) < 12) return 0; + if (IS_SRIOV_VF(xe)) + return 0; + oa->xe = xe; oa->oa_formats = oa_formats; -- cgit From ecab82af27873336e2a1655dd09e2a3fc41d1c10 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 19 Jun 2024 23:45:54 +0200 Subject: drm/xe/vf: Don't support gtidle if VF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VF drivers can't access any of gtidle control registers as this functionality is owned by the PF driver. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240619214557.905-7-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_idle.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index 5d6181117ab2..67aba4140510 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -15,6 +15,7 @@ #include "xe_macros.h" #include "xe_mmio.h" #include "xe_pm.h" +#include "xe_sriov.h" /** * DOC: Xe GT Idle @@ -100,6 +101,9 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) u32 pg_enable; int i, j; + if (IS_SRIOV_VF(xe)) + return; + /* Disable CPG for PVC */ if (xe->info.platform == XE_PVC) return; @@ -130,6 +134,9 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) void xe_gt_idle_disable_pg(struct xe_gt *gt) { + if (IS_SRIOV_VF(gt_to_xe(gt))) + return; + xe_device_assert_mem_access(gt_to_xe(gt)); XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); @@ -214,6 +221,9 @@ int xe_gt_idle_init(struct xe_gt_idle *gtidle) struct kobject *kobj; int err; + if (IS_SRIOV_VF(xe)) + return 0; + kobj = kobject_create_and_add("gtidle", gt->sysfs); if (!kobj) return -ENOMEM; @@ -246,6 +256,9 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt) xe_device_assert_mem_access(gt_to_xe(gt)); xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + if (IS_SRIOV_VF(gt_to_xe(gt))) + return; + /* Units of 1280 ns for a total of 5s */ xe_mmio_write32(gt, RC_IDLE_HYSTERSIS, 0x3B9ACA); /* Enable RC6 */ @@ -258,6 +271,9 @@ void xe_gt_idle_disable_c6(struct xe_gt *gt) xe_device_assert_mem_access(gt_to_xe(gt)); xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + if (IS_SRIOV_VF(gt_to_xe(gt))) + return; + xe_mmio_write32(gt, RC_CONTROL, 0); xe_mmio_write32(gt, RC_STATE, 0); } -- cgit From ef3fcfe0639824bc908c7bd18125a97f5f83357c Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 19 Jun 2024 23:45:55 +0200 Subject: drm/xe/vf: Don't use register based TLB invalidation if VF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VF drivers can only use GuC-based TLB invalidation, as they don't have access to the related registers. However, VFs shouldn't need any explicit TLB invalidation before enabling CTB communication, as there will be an implicit GGTT TLB invalidation issued by the GuC itself as part of MMIO-based action handling. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240619214557.905-8-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 23d397a246a8..e1f1ccb01143 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -13,6 +13,7 @@ #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_mmio.h" +#include "xe_sriov.h" #include "xe_trace.h" #include "regs/xe_guc_regs.h" @@ -249,6 +250,9 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) xe_gt_tlb_invalidation_wait(gt, seqno); } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { + if (IS_SRIOV_VF(xe)) + return 0; + xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1, -- cgit From 7875fe7c2495884dd08d4a23fdc44147225542da Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 19 Jun 2024 23:45:56 +0200 Subject: drm/xe/vf: Skip engine ring enabling if VF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All engines will be correctly initialized by the PF driver. Moreover, VF drivers can't access related engine registers. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240619214557.905-9-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_hw_engine.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 52f12009678e..78b50d3a6501 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -547,7 +547,8 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, if (hwe->class == XE_ENGINE_CLASS_OTHER) hwe->irq_handler = xe_gsc_hwe_irq_handler; - xe_hw_engine_enable_ring(hwe); + if (!IS_SRIOV_VF(xe)) + xe_hw_engine_enable_ring(hwe); } /* We reserve the highest BCS instance for USM */ -- cgit From 2b79878b0784ba7253ad9b8dee66495b288272c9 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 19 Jun 2024 23:45:57 +0200 Subject: drm/xe/vf: Custom HuC initialization if VF The HuC firmware is loaded and initialized by the PF driver. Make sure VF driver performs only limited data structure initialization. Signed-off-by: Michal Wajdeczko Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240619214557.905-10-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_huc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 6238fb354914..c88761fe31c9 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -21,6 +21,7 @@ #include "xe_guc.h" #include "xe_map.h" #include "xe_mmio.h" +#include "xe_sriov.h" #include "xe_uc_fw.h" static struct xe_gt * @@ -92,6 +93,9 @@ int xe_huc_init(struct xe_huc *huc) if (!xe_uc_fw_is_enabled(&huc->fw)) return 0; + if (IS_SRIOV_VF(xe)) + return 0; + if (huc->fw.has_gsc_headers) { ret = huc_alloc_gsc_pkt(huc); if (ret) -- cgit From 0d39640ace670bccb7f03b2a9e5463ec0885a9ca Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 13 Jun 2024 23:13:43 -0700 Subject: drm/xe: Invert runnable_state / pending enable check and assert Rather than checking for pending enable and asserting runnable_state == 1 in sched done handler, invert these. This is more robust code taking action based on the G2H message and asserting KMD tracking state is correct. Suggested-by: John Harrison Signed-off-by: Matthew Brost Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20240614061343.2931649-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 74552391dc5a..373447758a60 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1782,8 +1782,8 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, { trace_xe_exec_queue_scheduling_done(q); - if (exec_queue_pending_enable(q)) { - xe_gt_assert(guc_to_gt(guc), runnable_state == 1); + if (runnable_state == 1) { + xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); q->guc->resume_time = ktime_get(); clear_exec_queue_pending_enable(q); -- cgit From 33991ae8f40a8245f68e8e442766bf9072eaaa2a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 17 Jun 2024 17:38:59 -0700 Subject: drm/xe: Simplify locking in new_vma Rather than acquiring and dropping the VM / BO dma-resv around xe_vma_create and do the same thing upon adding preempt fences or an error, hold these locks through the entire new_vma() function. v2: - Rebase (CI) Cc: Fei Yang Signed-off-by: Matthew Brost Reviewed-by: Jagmeet Randhawa Link: https://patchwork.freedesktop.org/patch/msgid/20240618003859.3239239-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 61d4d95a5377..5b166fa03684 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -180,16 +180,14 @@ static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) struct xe_exec_queue *q; int err; + xe_bo_assert_held(bo); + if (!vm->preempt.num_exec_queues) return 0; - err = xe_bo_lock(bo, true); - if (err) - return err; - err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); if (err) - goto out_unlock; + return err; list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) if (q->lr.pfence) { @@ -198,9 +196,7 @@ static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) DMA_RESV_USAGE_BOOKKEEP); } -out_unlock: - xe_bo_unlock(bo); - return err; + return 0; } static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, @@ -2140,7 +2136,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; struct drm_exec exec; struct xe_vma *vma; - int err; + int err = 0; lockdep_assert_held_write(&vm->lock); @@ -2165,23 +2161,22 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, vma = xe_vma_create(vm, bo, op->gem.offset, op->va.addr, op->va.addr + op->va.range - 1, pat_index, flags); - if (bo) - drm_exec_fini(&exec); + if (IS_ERR(vma)) + goto err_unlock; - if (xe_vma_is_userptr(vma)) { + if (xe_vma_is_userptr(vma)) err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); - if (err) { - prep_vma_destroy(vm, vma, false); - xe_vma_destroy_unlocked(vma); - return ERR_PTR(err); - } - } else if (!xe_vma_has_no_bo(vma) && !bo->vm) { + else if (!xe_vma_has_no_bo(vma) && !bo->vm) err = add_preempt_fences(vm, bo); - if (err) { - prep_vma_destroy(vm, vma, false); - xe_vma_destroy_unlocked(vma); - return ERR_PTR(err); - } + +err_unlock: + if (bo) + drm_exec_fini(&exec); + + if (err) { + prep_vma_destroy(vm, vma, false); + xe_vma_destroy_unlocked(vma); + vma = ERR_PTR(err); } return vma; -- cgit From 3b1592fb783549e968aa20035ab37be5fb124f02 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Thu, 20 Jun 2024 15:49:27 -0700 Subject: drm/xe/lnl: Apply Wa_22019338487 This WA requires us to limit media GT frequency requests to a certain cap value during driver load. Freq limits are restored after load completes, so perf will not be affected during normal operations. During normal driver operation, this WA requires dummy writes to media offset 0x380D8C after every ~63 GGTT writes. This will ensure completion of the LMEM writes originating from Gunit. During driver unload(before FLR), the WA requires that we set requested frequency to the cap value again. v3: Do not use WA number in function name. Call WA wrapper from xe_device. Rename some variables, check for locks in the correct function (Rodrigo). Ensure reset path is also covered for this WA. v4: Fix BAT failure v5: Add a function pointer for ggtt_ops (Michal W) v6: Fix name collision and use static function (Rodrigo) Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar Link: https://patchwork.freedesktop.org/patch/msgid/20240620224928.3986377-2-vinay.belgaumkar@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 3 ++ drivers/gpu/drm/xe/display/xe_fb_pin.c | 4 +- drivers/gpu/drm/xe/xe_device.c | 3 ++ drivers/gpu/drm/xe/xe_ggtt.c | 42 +++++++++++++++++--- drivers/gpu/drm/xe/xe_ggtt.h | 1 - drivers/gpu/drm/xe/xe_ggtt_types.h | 12 ++++-- drivers/gpu/drm/xe/xe_gsc.c | 5 +++ drivers/gpu/drm/xe/xe_gt.c | 24 ++++++++++++ drivers/gpu/drm/xe/xe_gt.h | 1 + drivers/gpu/drm/xe/xe_guc_pc.c | 71 +++++++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_guc_pc.h | 1 + drivers/gpu/drm/xe/xe_guc_pc_types.h | 4 ++ drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 13 files changed, 159 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 20dc9759bb3c..b1e03bfe4a68 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -24,9 +24,12 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ $(call cmd,wa_oob) uses_generated_oob := \ + $(obj)/xe_ggtt.o \ $(obj)/xe_gsc.o \ + $(obj)/xe_gt.o \ $(obj)/xe_guc.o \ $(obj)/xe_guc_ads.o \ + $(obj)/xe_guc_pc.o \ $(obj)/xe_migrate.o \ $(obj)/xe_ring_ops.o \ $(obj)/xe_vm.o \ diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index a2f417209124..d270bcd11686 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -171,7 +171,7 @@ write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, xe->pat.idx[XE_CACHE_NONE]); - xe_ggtt_set_pte(ggtt, *ggtt_ofs, pte); + ggtt->pt_ops->ggtt_set_pte(ggtt, *ggtt_ofs, pte); *ggtt_ofs += XE_PAGE_SIZE; src_idx -= src_stride; } @@ -217,7 +217,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x, xe->pat.idx[XE_CACHE_NONE]); - xe_ggtt_set_pte(ggtt, vma->node.start + x, pte); + ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node.start + x, pte); } } else { u32 i, ggtt_ofs; diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 0d57eea8f083..ca5e8435485a 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -689,6 +689,9 @@ int xe_device_probe(struct xe_device *xe) xe_hwmon_register(xe); + for_each_gt(gt, xe, id) + xe_gt_sanitize_freq(gt); + return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe); err_fini_display: diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 8ff91fd1b7c8..883cfc7f98a8 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "regs/xe_gt_regs.h" #include "regs/xe_gtt_defs.h" @@ -23,8 +24,10 @@ #include "xe_gt_sriov_vf.h" #include "xe_gt_tlb_invalidation.h" #include "xe_map.h" +#include "xe_mmio.h" #include "xe_pm.h" #include "xe_sriov.h" +#include "xe_wa.h" #include "xe_wopcm.h" static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, @@ -69,7 +72,22 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev) return ggms ? SZ_1M << ggms : 0; } -void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) +static void ggtt_update_access_counter(struct xe_ggtt *ggtt) +{ + /* + * Wa_22019338487: GMD_ID is a RO register, a dummy write forces gunit + * to wait for completion of prior GTT writes before letting this through. + * This needs to be done for all GGTT writes originating from the CPU. + */ + lockdep_assert_held(&ggtt->lock); + + if ((++ggtt->access_count % 63) == 0) { + xe_mmio_write32(ggtt->tile->media_gt, GMD_ID, 0x0); + ggtt->access_count = 0; + } +} + +static void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) { xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK)); xe_tile_assert(ggtt->tile, addr < ggtt->size); @@ -77,6 +95,12 @@ void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]); } +static void xe_ggtt_set_pte_and_flush(struct xe_ggtt *ggtt, u64 addr, u64 pte) +{ + xe_ggtt_set_pte(ggtt, addr, pte); + ggtt_update_access_counter(ggtt); +} + static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) { u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; @@ -92,7 +116,7 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) scratch_pte = 0; while (start < end) { - xe_ggtt_set_pte(ggtt, start, scratch_pte); + ggtt->pt_ops->ggtt_set_pte(ggtt, start, scratch_pte); start += XE_PAGE_SIZE; } } @@ -124,10 +148,17 @@ static void primelockdep(struct xe_ggtt *ggtt) static const struct xe_ggtt_pt_ops xelp_pt_ops = { .pte_encode_bo = xelp_ggtt_pte_encode_bo, + .ggtt_set_pte = xe_ggtt_set_pte, }; static const struct xe_ggtt_pt_ops xelpg_pt_ops = { .pte_encode_bo = xelpg_ggtt_pte_encode_bo, + .ggtt_set_pte = xe_ggtt_set_pte, +}; + +static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { + .pte_encode_bo = xelpg_ggtt_pte_encode_bo, + .ggtt_set_pte = xe_ggtt_set_pte_and_flush, }; /* @@ -187,7 +218,8 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) ggtt->size = GUC_GGTT_TOP; if (GRAPHICS_VERx100(xe) >= 1270) - ggtt->pt_ops = &xelpg_pt_ops; + ggtt->pt_ops = ggtt->tile->media_gt && XE_WA(ggtt->tile->media_gt, 22019338487) ? + &xelpg_pt_wa_ops : &xelpg_pt_ops; else ggtt->pt_ops = &xelp_pt_ops; @@ -394,7 +426,7 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); - xe_ggtt_set_pte(ggtt, start + offset, pte); + ggtt->pt_ops->ggtt_set_pte(ggtt, start + offset, pte); } } @@ -502,7 +534,7 @@ static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node return; while (start < end) { - xe_ggtt_set_pte(ggtt, start, pte); + ggtt->pt_ops->ggtt_set_pte(ggtt, start, pte); start += XE_PAGE_SIZE; } diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 4a41a1762358..6a96fd54bf60 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -10,7 +10,6 @@ struct drm_printer; -void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte); int xe_ggtt_init_early(struct xe_ggtt *ggtt); int xe_ggtt_init(struct xe_ggtt *ggtt); void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix); diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index d8c584d9a8c3..2245d88d8f39 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -13,10 +13,6 @@ struct xe_bo; struct xe_gt; -struct xe_ggtt_pt_ops { - u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index); -}; - struct xe_ggtt { struct xe_tile *tile; @@ -34,6 +30,14 @@ struct xe_ggtt { const struct xe_ggtt_pt_ops *pt_ops; struct drm_mm mm; + + /** @access_count: counts GGTT writes */ + unsigned int access_count; +}; + +struct xe_ggtt_pt_ops { + u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index); + void (*ggtt_set_pte)(struct xe_ggtt *ggtt, u64 addr, u64 pte); }; #endif diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 80a61934decc..f8239a13fa2b 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -22,6 +22,7 @@ #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_gt_printk.h" +#include "xe_guc_pc.h" #include "xe_huc.h" #include "xe_map.h" #include "xe_mmio.h" @@ -284,6 +285,10 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) return ret; xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); + + /* GSC load is done, restore expected GT frequencies */ + xe_gt_sanitize_freq(gt); + xe_gt_dbg(gt, "GSC FW async load completed\n"); /* HuC auth failure is not fatal */ diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 57d84751e160..759634cff1d8 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -9,6 +9,7 @@ #include #include +#include #include "instructions/xe_gfxpipe_commands.h" #include "instructions/xe_mi_commands.h" @@ -54,6 +55,7 @@ #include "xe_sriov.h" #include "xe_tuning.h" #include "xe_uc.h" +#include "xe_uc_fw.h" #include "xe_vm.h" #include "xe_wa.h" #include "xe_wopcm.h" @@ -678,6 +680,9 @@ static int do_gt_restart(struct xe_gt *gt) /* Get CCS mode in sync between sw/hw */ xe_gt_apply_ccs_mode(gt); + /* Restore GT freq to expected values */ + xe_gt_sanitize_freq(gt); + return 0; } @@ -801,6 +806,25 @@ err_msg: return err; } +/** + * xe_gt_sanitize_freq() - Restore saved frequencies if necessary. + * @gt: the GT object + * + * Called after driver init/GSC load completes to restore GT frequencies if we + * limited them for any WAs. + */ +int xe_gt_sanitize_freq(struct xe_gt *gt) +{ + int ret = 0; + + if ((!xe_uc_fw_is_available(>->uc.gsc.fw) || + xe_uc_fw_is_loaded(>->uc.gsc.fw)) && + XE_WA(gt, 22019338487)) + ret = xe_guc_pc_restore_stashed_freq(>->uc.guc.pc); + + return ret; +} + int xe_gt_resume(struct xe_gt *gt) { int err; diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 9073ac68a777..1123fdfc4ebc 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -56,6 +56,7 @@ int xe_gt_suspend(struct xe_gt *gt); int xe_gt_resume(struct xe_gt *gt); void xe_gt_reset_async(struct xe_gt *gt); void xe_gt_sanitize(struct xe_gt *gt); +int xe_gt_sanitize_freq(struct xe_gt *gt); void xe_gt_remove(struct xe_gt *gt); /** diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 2b45a9cd3ec0..edf4a29a2aa3 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -8,6 +8,7 @@ #include #include +#include #include "abi/guc_actions_slpc_abi.h" #include "regs/xe_gt_regs.h" @@ -25,6 +26,7 @@ #include "xe_mmio.h" #include "xe_pcode.h" #include "xe_pm.h" +#include "xe_wa.h" #define MCHBAR_MIRROR_BASE_SNB 0x140000 @@ -42,6 +44,8 @@ #define GT_FREQUENCY_MULTIPLIER 50 #define GT_FREQUENCY_SCALER 3 +#define LNL_MERT_FREQ_CAP 800 + /** * DOC: GuC Power Conservation (PC) * @@ -695,6 +699,16 @@ static void pc_init_fused_rp_values(struct xe_guc_pc *pc) tgl_init_fused_rp_values(pc); } +static u32 pc_max_freq_cap(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + + if (XE_WA(gt, 22019338487)) + return min(LNL_MERT_FREQ_CAP, pc->rp0_freq); + else + return pc->rp0_freq; +} + /** * xe_guc_pc_init_early - Initialize RPx values and request a higher GT * frequency to allow faster GuC load times @@ -706,7 +720,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc) xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); pc_init_fused_rp_values(pc); - pc_set_cur_freq(pc, pc->rp0_freq); + pc_set_cur_freq(pc, pc_max_freq_cap(pc)); } static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) @@ -762,6 +776,53 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc) return ret; } +static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) +{ + int ret = 0; + + if (XE_WA(pc_to_gt(pc), 22019338487)) { + /* + * Get updated min/max and stash them. + */ + ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq); + if (!ret) + ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq); + if (ret) + return ret; + + /* + * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + */ + mutex_lock(&pc->freq_lock); + ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); + if (!ret) + ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); + mutex_unlock(&pc->freq_lock); + } + + return ret; +} + +/** + * xe_guc_pc_restore_stashed_freq - Set min/max back to stashed values + * @pc: The GuC PC + * + * Returns: 0 on success, + * error code on failure + */ +int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc) +{ + int ret = 0; + + mutex_lock(&pc->freq_lock); + ret = pc_set_max_freq(pc, pc->stashed_max_freq); + if (!ret) + ret = pc_set_min_freq(pc, pc->stashed_min_freq); + mutex_unlock(&pc->freq_lock); + + return ret; +} + /** * xe_guc_pc_gucrc_disable - Disable GuC RC * @pc: Xe_GuC_PC instance @@ -911,6 +972,10 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) if (ret) goto out; + ret = pc_set_mert_freq_cap(pc); + if (ret) + goto out; + if (xe->info.platform == XE_PVC) { xe_guc_pc_gucrc_disable(pc); ret = 0; @@ -959,6 +1024,10 @@ static void xe_guc_pc_fini_hw(void *arg) XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL)); XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); XE_WARN_ON(xe_guc_pc_stop(pc)); + + /* Bind requested freq to mert_freq_cap before unload */ + pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq)); + xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL); } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 8a7b91ce1b3e..55fdb55ab688 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -32,5 +32,6 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc); u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc); u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc); void xe_guc_pc_init_early(struct xe_guc_pc *pc); +int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc); #endif /* _XE_GUC_PC_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h index 2afd0dbc3542..13810be015db 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -25,6 +25,10 @@ struct xe_guc_pc { u32 user_requested_min; /** @user_requested_max: Stash the maximum requested freq by user */ u32 user_requested_max; + /** @stashed_min_freq: Stash the current minimum freq */ + u32 stashed_min_freq; + /** @stashed_max_freq: Stash the current maximum freq */ + u32 stashed_max_freq; /** @freq_lock: Let's protect the frequencies */ struct mutex freq_lock; /** @freq_ready: Only handle freq changes, if they are really ready */ diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 12fe88796a49..a6b897030fde 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -27,3 +27,4 @@ 16022287689 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) 13011645652 GRAPHICS_VERSION(2004) +22019338487 MEDIA_VERSION(2000) -- cgit From 9d2ab8623e85843956c3d5fdbdcbbb7ec198610f Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Thu, 20 Jun 2024 15:49:28 -0700 Subject: drm/xe/guc: Request max GT freq during resume We already request max freq in the load path, moving it to __xe_guc_upload will ensure this speeds up GuC load in the resume path as well. v2: Rename xe_guc_pc_init_early since we now call it per GuC load (Michal W) v3: Keep pc_init_early() and init RPx values there (Rodrigo) Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar Link: https://patchwork.freedesktop.org/patch/msgid/20240620224928.3986377-3-vinay.belgaumkar@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 4 +++- drivers/gpu/drm/xe/xe_guc_pc.c | 15 +++++++++++++-- drivers/gpu/drm/xe/xe_guc_pc.h | 1 + 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 02c028d0e91b..172b65a50e31 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -702,6 +702,9 @@ static int __xe_guc_upload(struct xe_guc *guc) { int ret; + /* Raise GT freq to speed up HuC/GuC load */ + xe_guc_pc_raise_unslice(&guc->pc); + guc_write_params(guc); guc_prepare_xfer(guc); @@ -787,7 +790,6 @@ int xe_guc_min_load_for_hwconfig(struct xe_guc *guc) xe_guc_ads_populate_minimal(&guc->ads); - /* Raise GT freq to speed up HuC/GuC load */ xe_guc_pc_init_early(&guc->pc); ret = __xe_guc_upload(guc); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index edf4a29a2aa3..d88f5e960fbd 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -710,17 +710,28 @@ static u32 pc_max_freq_cap(struct xe_guc_pc *pc) } /** - * xe_guc_pc_init_early - Initialize RPx values and request a higher GT + * xe_guc_pc_raise_unslice - Initialize RPx values and request a higher GT * frequency to allow faster GuC load times * @pc: Xe_GuC_PC instance */ +void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + pc_set_cur_freq(pc, pc_max_freq_cap(pc)); +} + +/** + * xe_guc_pc_init_early - Initialize RPx values + * @pc: Xe_GuC_PC instance + */ void xe_guc_pc_init_early(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); pc_init_fused_rp_values(pc); - pc_set_cur_freq(pc, pc_max_freq_cap(pc)); } static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 55fdb55ab688..efda432fadfc 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -33,5 +33,6 @@ u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc); u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc); void xe_guc_pc_init_early(struct xe_guc_pc *pc); int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc); +void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc); #endif /* _XE_GUC_PC_H_ */ -- cgit From 701d9c4a199bba144d53cd47c80d42a788498962 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 21 Jun 2024 19:25:22 +0200 Subject: drm/xe/huc: Use GT oriented error messages in xe_huc.c If applicable, we prefer GT oriented dmesg messages. Update all HuC related messages and use more user friendly error codes. Signed-off-by: Michal Wajdeczko Cc: Daniele Ceraolo Spurio Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240621172522.1037-1-michal.wajdeczko@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_huc.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index c88761fe31c9..bec4366e5513 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -18,6 +18,7 @@ #include "xe_force_wake.h" #include "xe_gsc_submit.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_guc.h" #include "xe_map.h" #include "xe_mmio.h" @@ -107,7 +108,7 @@ int xe_huc_init(struct xe_huc *huc) return 0; out: - drm_err(&xe->drm, "HuC init failed with %d", ret); + xe_gt_err(gt, "HuC: initialization failed: %pe\n", ERR_PTR(ret)); return ret; } @@ -195,14 +196,14 @@ static int huc_auth_via_gsccs(struct xe_huc *huc) } while (--retry && err == -EBUSY); if (err) { - drm_err(&xe->drm, "failed to submit GSC request to auth: %d\n", err); + xe_gt_err(gt, "HuC: failed to submit GSC request to auth: %pe\n", ERR_PTR(err)); return err; } err = xe_gsc_read_out_header(xe, &pkt->vmap, PXP43_HUC_AUTH_INOUT_SIZE, sizeof(struct pxp43_huc_auth_out), &rd_offset); if (err) { - drm_err(&xe->drm, "HuC: invalid GSC reply for auth (err=%d)\n", err); + xe_gt_err(gt, "HuC: invalid GSC reply for auth: %pe\n", ERR_PTR(err)); return err; } @@ -213,7 +214,7 @@ static int huc_auth_via_gsccs(struct xe_huc *huc) */ out_status = huc_auth_msg_rd(xe, &pkt->vmap, rd_offset, header.status); if (out_status != PXP_STATUS_SUCCESS && out_status != PXP_STATUS_OP_NOT_PERMITTED) { - drm_err(&xe->drm, "auth failed with GSC error = 0x%x\n", out_status); + xe_gt_err(gt, "HuC: authentication failed with GSC error = %#x\n", out_status); return -EIO; } @@ -242,7 +243,6 @@ bool xe_huc_is_authenticated(struct xe_huc *huc, enum xe_huc_auth_types type) int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type) { - struct xe_device *xe = huc_to_xe(huc); struct xe_gt *gt = huc_to_gt(huc); struct xe_guc *guc = huc_to_guc(huc); int ret; @@ -272,26 +272,26 @@ int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type) return -EINVAL; } if (ret) { - drm_err(&xe->drm, "Failed to trigger HuC auth via %s: %d\n", - huc_auth_modes[type].name, ret); + xe_gt_err(gt, "HuC: failed to trigger auth via %s: %pe\n", + huc_auth_modes[type].name, ERR_PTR(ret)); goto fail; } ret = xe_mmio_wait32(gt, huc_auth_modes[type].reg, huc_auth_modes[type].val, huc_auth_modes[type].val, 100000, NULL, false); if (ret) { - drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret); + xe_gt_err(gt, "HuC: firmware not verified: %pe\n", ERR_PTR(ret)); goto fail; } xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING); - drm_dbg(&xe->drm, "HuC authenticated via %s\n", huc_auth_modes[type].name); + xe_gt_dbg(gt, "HuC: authenticated via %s\n", huc_auth_modes[type].name); return 0; fail: - drm_err(&xe->drm, "HuC: Auth via %s failed: %d\n", - huc_auth_modes[type].name, ret); + xe_gt_err(gt, "HuC: authentication via %s failed: %pe\n", + huc_auth_modes[type].name, ERR_PTR(ret)); xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOAD_FAIL); return ret; -- cgit From 7e5161da9d267957b726a29f3efe6cb50fdfed04 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Sun, 23 Jun 2024 13:31:19 -0700 Subject: drm/xe/oa: Fix kernel doc in xe_drm.h Fix kernel doc in xe_drm.h. Also eliminate private/non-abi enum definitions. v2: Remove __DRM_XE_PERF_TYPE_MAX since it is unused (Michal) v3: Also remove DRM_XE_OA_PROPERTY_MAX since it can also be eliminated (Michal) Suggested-by: Michal Wajdeczko Signed-off-by: Ashutosh Dixit Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240623203119.3840283-1-ashutosh.dixit@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_oa.c | 3 ++- include/uapi/drm/xe_drm.h | 5 +---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 4168b51cf7b5..9263ae9a864e 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1684,6 +1684,7 @@ static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, }; +#define MAX_USER_EXTENSIONS 16 static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number, struct xe_oa_open_param *param) { @@ -1692,7 +1693,7 @@ static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number int err; u32 idx; - if (XE_IOCTL_DBG(oa->xe, ext_number >= DRM_XE_OA_PROPERTY_MAX)) + if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS)) return -E2BIG; err = __copy_from_user(&ext, address, sizeof(ext)); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 93e00be44b2d..b410553faa9b 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1379,8 +1379,8 @@ struct drm_xe_wait_user_fence { * enum drm_xe_perf_type - Perf stream types */ enum drm_xe_perf_type { + /** @DRM_XE_PERF_TYPE_OA: OA perf stream type */ DRM_XE_PERF_TYPE_OA, - __DRM_XE_PERF_TYPE_MAX, /* non-ABI */ }; /** @@ -1611,9 +1611,6 @@ enum drm_xe_oa_property_id { * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to 0. */ DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE, - - /** @DRM_XE_OA_PROPERTY_MAX: non-ABI */ - DRM_XE_OA_PROPERTY_MAX }; /** -- cgit From 20baedb8033d0ba6ae382fc9974b481fdb32e7ef Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sat, 22 Jun 2024 11:42:53 +0200 Subject: drm/xe/vf: Skip attempt to start GuC PC if VF We have already marked the GuC PC feature as not applicable for VF devices, but we missed the fact that there may be still some privileged activities performed by this component, who does much more than its name suggests. Explicitly skip xe_guc_pc_start() if running as a VF driver and use a GT oriented message to report any error. v2: also skip xe_guc_pc_stop (Vinay) Signed-off-by: Michal Wajdeczko Cc: Vinay Belgaumkar Cc: Matthew Brost Reviewed-by: Vinay Belgaumkar Link: https://patchwork.freedesktop.org/patch/msgid/20240622094253.1081-1-michal.wajdeczko@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 172b65a50e31..eb655cee19f7 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -1113,7 +1113,13 @@ void xe_guc_reset_wait(struct xe_guc *guc) void xe_guc_stop_prepare(struct xe_guc *guc) { - XE_WARN_ON(xe_guc_pc_stop(&guc->pc)); + if (!IS_SRIOV_VF(guc_to_xe(guc))) { + int err; + + err = xe_guc_pc_stop(&guc->pc); + xe_gt_WARN(guc_to_gt(guc), err, "Failed to stop GuC PC: %pe\n", + ERR_PTR(err)); + } } void xe_guc_stop(struct xe_guc *guc) @@ -1125,10 +1131,13 @@ void xe_guc_stop(struct xe_guc *guc) int xe_guc_start(struct xe_guc *guc) { - int ret; + if (!IS_SRIOV_VF(guc_to_xe(guc))) { + int err; - ret = xe_guc_pc_start(&guc->pc); - XE_WARN_ON(ret); + err = xe_guc_pc_start(&guc->pc); + xe_gt_WARN(guc_to_gt(guc), err, "Failed to start GuC PC: %pe\n", + ERR_PTR(err)); + } return xe_guc_submit_start(guc); } -- cgit From be3bf9dd1c6d1c0b18396e4918a40a8f7ce6c591 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 25 Jun 2024 16:12:56 +0200 Subject: drm/xe/guc: Demote the H2G retry log message to debug The G2H RETRY message sent by the GuC does not necessary indicate any serious problem and can be a part of the normal communication flow. Switch the log level from warning to more appropriate debug. This will also let the CI ignore these logs which were seen in few SR-IOV scenarios. While at it, use hex to print the reason and add missing \n. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240625141258.1257-2-michal.wajdeczko@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index b4137fe195a4..91a8a969a6ad 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -877,8 +877,8 @@ retry_same_fence: } if (g2h_fence.retry) { - xe_gt_warn(gt, "H2G retry, action 0x%04x, reason %u", - action[0], g2h_fence.reason); + xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n", + action[0], g2h_fence.reason); goto retry; } if (g2h_fence.fail) { -- cgit From b084dfaef2107bdc0cfc77d4940fb59b660dd901 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 25 Jun 2024 16:12:57 +0200 Subject: drm/xe/guc: Add more GuC error codes to ABI There are many more error codes used that the GuC firmware can return in the RESPONSE_FAILURE message. Add to the ABI header those which are more likely to be seen by the PF or VF drivers. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240625141258.1257-3-michal.wajdeczko@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/abi/guc_errors_abi.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/gpu/drm/xe/abi/guc_errors_abi.h b/drivers/gpu/drm/xe/abi/guc_errors_abi.h index d0b5fed6876f..2c627a21648f 100644 --- a/drivers/gpu/drm/xe/abi/guc_errors_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_errors_abi.h @@ -8,10 +8,41 @@ enum xe_guc_response_status { XE_GUC_RESPONSE_STATUS_SUCCESS = 0x0, + XE_GUC_RESPONSE_ERROR_PROTOCOL = 0x04, + XE_GUC_RESPONSE_INVALID_STATE = 0x0A, + XE_GUC_RESPONSE_UNSUPPORTED_VERSION = 0x0B, + XE_GUC_RESPONSE_INVALID_VFID = 0x0C, + XE_GUC_RESPONSE_UNPROVISIONED_VF = 0x0D, + XE_GUC_RESPONSE_INVALID_EVENT = 0x0E, XE_GUC_RESPONSE_NOT_SUPPORTED = 0x20, + XE_GUC_RESPONSE_UNKNOWN_ACTION = 0x30, + XE_GUC_RESPONSE_ACTION_ABORTED = 0x31, + XE_GUC_RESPONSE_NO_PERMISSION = 0x40, + XE_GUC_RESPONSE_CANNOT_COMPLETE_ACTION = 0x41, + XE_GUC_RESPONSE_INVALID_KLV_DATA = 0x50, + XE_GUC_RESPONSE_INVALID_PARAMS = 0x60, + XE_GUC_RESPONSE_INVALID_BUFFER_RANGE = 0x70, + XE_GUC_RESPONSE_INVALID_BUFFER = 0x71, + XE_GUC_RESPONSE_INVALID_GGTT_ADDRESS = 0x80, + XE_GUC_RESPONSE_PENDING_ACTION = 0x90, + XE_GUC_RESPONSE_INVALID_SIZE = 0x102, + XE_GUC_RESPONSE_MALFORMED_KLV = 0x103, + XE_GUC_RESPONSE_INVALID_KLV_KEY = 0x105, + XE_GUC_RESPONSE_DATA_TOO_LARGE = 0x106, + XE_GUC_RESPONSE_VF_MIGRATED = 0x107, XE_GUC_RESPONSE_NO_ATTRIBUTE_TABLE = 0x201, XE_GUC_RESPONSE_NO_DECRYPTION_KEY = 0x202, XE_GUC_RESPONSE_DECRYPTION_FAILED = 0x204, + XE_GUC_RESPONSE_VGT_DISABLED = 0x300, + XE_GUC_RESPONSE_CTB_FULL = 0x301, + XE_GUC_RESPONSE_VGT_UNAUTHORIZED_REQUEST = 0x302, + XE_GUC_RESPONSE_CTB_INVALID = 0x303, + XE_GUC_RESPONSE_CTB_NOT_REGISTERED = 0x304, + XE_GUC_RESPONSE_CTB_IN_USE = 0x305, + XE_GUC_RESPONSE_CTB_INVALID_DESC = 0x306, + XE_GUC_RESPONSE_CTB_SOURCE_INVALID_DESCRIPTOR = 0x30D, + XE_GUC_RESPONSE_CTB_DESTINATION_INVALID_DESCRIPTOR = 0x30E, + XE_GUC_RESPONSE_INVALID_CONFIG_STATE = 0x30F, XE_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000, }; -- cgit From 92e9db6e1fa30c87f9c25fae9c9e275885cdd0b2 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 25 Jun 2024 16:12:58 +0200 Subject: drm/xe/guc: Print GuC error codes as hex value We maintain GuC error code values in hex format. Also print them in that format for easier matching. While at it, slightly reformat the log and add missing \n. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240625141258.1257-4-michal.wajdeczko@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 91a8a969a6ad..873d1bcbedd7 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -882,7 +882,7 @@ retry_same_fence: goto retry; } if (g2h_fence.fail) { - xe_gt_err(gt, "H2G send failed, action 0x%04x, error %d, hint %u", + xe_gt_err(gt, "H2G request %#x failed: error %#x hint %#x\n", action[0], g2h_fence.error, g2h_fence.hint); ret = -EIO; } -- cgit From 8511d9da2058ffca7c745c48e93c51840e2ed65e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 25 Jun 2024 21:45:46 +0200 Subject: drm/xe/pf: Trigger explicit FLR while disabling VFs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We attempt to unprovision all VFs GuC when disabling them, but GuC may reject such request if the target VF was previously active but VF driver didn't unload with explicit VF reset H2G action or the VMM has not started the VF FLR. To avoid mismatches between configs maintained the PF and GuC, trigger an explicit FLR sequences just before releasing resources. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240625194546.1301-2-michal.wajdeczko@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c | 21 +++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h | 1 + drivers/gpu/drm/xe/xe_pci_sriov.c | 14 ++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index 40b8f881fe04..ebf06e037750 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -129,6 +129,27 @@ int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid) return pf_send_vf_stop(gt, vfid); } +/** + * xe_gt_sriov_pf_control_trigger_flr - Start a VF FLR sequence. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid) +{ + int err; + + /* XXX pf_send_vf_flr_start() expects ct->lock */ + mutex_lock(>->uc.guc.ct.lock); + err = pf_send_vf_flr_start(gt, vfid); + mutex_unlock(>->uc.guc.ct.lock); + + return err; +} + /** * DOC: The VF FLR Flow with GuC * diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h index 850a3e37661f..405d1586f991 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h @@ -14,6 +14,7 @@ struct xe_gt; int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid); #ifdef CONFIG_PCI_IOV int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len); diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index 06d0fceb5114..74c8fadc9365 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -6,6 +6,7 @@ #include "xe_assert.h" #include "xe_device.h" #include "xe_gt_sriov_pf_config.h" +#include "xe_gt_sriov_pf_control.h" #include "xe_pci_sriov.h" #include "xe_pm.h" #include "xe_sriov.h" @@ -37,6 +38,17 @@ static void pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs) xe_gt_sriov_pf_config_release(gt, n, true); } +static void pf_reset_vfs(struct xe_device *xe, unsigned int num_vfs) +{ + struct xe_gt *gt; + unsigned int id; + unsigned int n; + + for_each_gt(gt, xe, id) + for (n = 1; n <= num_vfs; n++) + xe_gt_sriov_pf_control_trigger_flr(gt, n); +} + static int pf_enable_vfs(struct xe_device *xe, int num_vfs) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -94,6 +106,8 @@ static int pf_disable_vfs(struct xe_device *xe) pci_disable_sriov(pdev); + pf_reset_vfs(xe, num_vfs); + pf_unprovision_vfs(xe, num_vfs); /* not needed anymore - see pf_enable_vfs() */ -- cgit From 80bab5c5038f32c92f5d26ff9df7255247c8dd89 Mon Sep 17 00:00:00 2001 From: Ilia Levi Date: Thu, 6 Jun 2024 15:47:05 +0300 Subject: drm/xe/irq: remove xe_irq_shutdown The cleanup is done by devres in irq_uninstall. Commit bbc9651fe9f4 ("drm/xe/irq: move irq_uninstall over to devm") resolved the ordering issue where irq_uninstall (registered with drmm) was called after pci_free_irq_vectors (registered with devm upon calling pci_alloc_irq_vectors). This happened because drmm action list is registered with devm very early in the init flow - before pci_alloc_irq_vectors. Now that irq_uninstall is registered with devm, it will be called before pci_free_irq_vectors and we can remove xe_irq_shutdown. Signed-off-by: Ilia Levi Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240606124705.822451-1-illevi@habana.ai Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 12 ++++-------- drivers/gpu/drm/xe/xe_irq.c | 5 ----- drivers/gpu/drm/xe/xe_irq.h | 1 - 3 files changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index ca5e8435485a..cfda7cb5df2c 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -634,16 +634,16 @@ int xe_device_probe(struct xe_device *xe) err = xe_device_set_has_flat_ccs(xe); if (err) - goto err_irq_shutdown; + goto err; err = xe_vram_probe(xe); if (err) - goto err_irq_shutdown; + goto err; for_each_tile(tile, xe, id) { err = xe_tile_init_noalloc(tile); if (err) - goto err_irq_shutdown; + goto err; } /* Allocate and map stolen after potential VRAM resize */ @@ -657,7 +657,7 @@ int xe_device_probe(struct xe_device *xe) */ err = xe_display_init_noaccel(xe); if (err) - goto err_irq_shutdown; + goto err; for_each_gt(gt, xe, id) { last_gt = id; @@ -708,8 +708,6 @@ err_fini_gt: break; } -err_irq_shutdown: - xe_irq_shutdown(xe); err: xe_display_fini(xe); return err; @@ -740,8 +738,6 @@ void xe_device_remove(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_remove(gt); - - xe_irq_shutdown(xe); } void xe_device_shutdown(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index ab3d5b7a1e8c..85733f993d09 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -738,11 +738,6 @@ free_irq_handler: return err; } -void xe_irq_shutdown(struct xe_device *xe) -{ - irq_uninstall(xe); -} - void xe_irq_suspend(struct xe_device *xe) { int irq = to_pci_dev(xe->drm.dev)->irq; diff --git a/drivers/gpu/drm/xe/xe_irq.h b/drivers/gpu/drm/xe/xe_irq.h index bc42bc90d967..067514e13675 100644 --- a/drivers/gpu/drm/xe/xe_irq.h +++ b/drivers/gpu/drm/xe/xe_irq.h @@ -11,7 +11,6 @@ struct xe_tile; struct xe_gt; int xe_irq_install(struct xe_device *xe); -void xe_irq_shutdown(struct xe_device *xe); void xe_irq_suspend(struct xe_device *xe); void xe_irq_resume(struct xe_device *xe); void xe_irq_enable_hwe(struct xe_gt *gt); -- cgit From 8d789ff4a41a557de565b1778a7c620cbb22ae0e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 26 Jun 2024 13:18:26 +0200 Subject: drm/xe/pf: Disable VFs on remove MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We shouldn't leave VFs enabled when unloading the PF driver. Otherwise we will get a message like: [ ] xe 0000:4d:00.0: driver left SR-IOV enabled after remove Signed-off-by: Michal Wajdeczko Cc: Piotr Piórkowski Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240626111827.1389-2-michal.wajdeczko@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 21a1b7d2b2a9..f5d5a368e595 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -748,6 +748,11 @@ static void xe_pci_remove(struct pci_dev *pdev) if (!xe) /* driver load aborted, nothing to cleanup */ return; +#ifdef CONFIG_PCI_IOV + if (IS_SRIOV_PF(xe)) + xe_pci_sriov_configure(pdev, 0); +#endif + xe_device_remove(xe); xe_pm_runtime_fini(xe); pci_set_drvdata(pdev, NULL); -- cgit From 1bab7ecf5c10a0a529c06480692a544391053c20 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 26 Jun 2024 11:18:16 -0700 Subject: drm/xe/oa: Allow stream enable/disable functions to return error Stream enable/disable functions previously had void return because failure during function execution was not possible. This will change when we introduce functionality to disable preemption on the stream exec queue. Therefore, in preparation for this functionality, prepare this code to be able to handle error returns. Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20240626181817.1516229-2-ashutosh.dixit@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_oa.c | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 9263ae9a864e..a68659fd5386 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1013,24 +1013,26 @@ static void xe_oa_stream_disable(struct xe_oa_stream *stream) hrtimer_cancel(&stream->poll_check_timer); } -static void xe_oa_enable_locked(struct xe_oa_stream *stream) +static int xe_oa_enable_locked(struct xe_oa_stream *stream) { if (stream->enabled) - return; - - stream->enabled = true; + return 0; xe_oa_stream_enable(stream); + + stream->enabled = true; + return 0; } -static void xe_oa_disable_locked(struct xe_oa_stream *stream) +static int xe_oa_disable_locked(struct xe_oa_stream *stream) { if (!stream->enabled) - return; - - stream->enabled = false; + return 0; xe_oa_stream_disable(stream); + + stream->enabled = false; + return 0; } static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) @@ -1105,11 +1107,9 @@ static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, { switch (cmd) { case DRM_XE_PERF_IOCTL_ENABLE: - xe_oa_enable_locked(stream); - return 0; + return xe_oa_enable_locked(stream); case DRM_XE_PERF_IOCTL_DISABLE: - xe_oa_disable_locked(stream); - return 0; + return xe_oa_disable_locked(stream); case DRM_XE_PERF_IOCTL_CONFIG: return xe_oa_config_locked(stream, arg); case DRM_XE_PERF_IOCTL_STATUS: @@ -1432,19 +1432,25 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, if (ret) goto err_free; + if (!param->disabled) { + ret = xe_oa_enable_locked(stream); + if (ret) + goto err_destroy; + } + stream_fd = anon_inode_getfd("[xe_oa]", &xe_oa_fops, stream, 0); if (stream_fd < 0) { ret = stream_fd; - goto err_destroy; + goto err_disable; } - if (!param->disabled) - xe_oa_enable_locked(stream); - /* Hold a reference on the drm device till stream_fd is released */ drm_dev_get(&stream->oa->xe->drm); return stream_fd; +err_disable: + if (!param->disabled) + xe_oa_disable_locked(stream); err_destroy: xe_oa_stream_destroy(stream); err_free: -- cgit From 406d058dc323ae152d380ac90153eb56a75850c1 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 26 Jun 2024 11:18:17 -0700 Subject: drm/xe/oa/uapi: Allow preemption to be disabled on the stream exec queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mesa VK_KHR_performance_query use case requires preemption and timeslicing to be disabled for the stream exec queue. Implement this functionality here. v2: Minor change to debug print to print both ret values (Umesh) Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240626181817.1516229-3-ashutosh.dixit@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_oa.c | 70 +++++++++++++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_oa_types.h | 3 ++ include/uapi/drm/xe_drm.h | 6 ++++ 3 files changed, 78 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index a68659fd5386..6cc3f0217341 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -80,6 +80,7 @@ struct xe_oa_open_param { int engine_instance; struct xe_exec_queue *exec_q; struct xe_hw_engine *hwe; + bool no_preempt; }; struct xe_oa_config_bo { @@ -1013,11 +1014,55 @@ static void xe_oa_stream_disable(struct xe_oa_stream *stream) hrtimer_cancel(&stream->poll_check_timer); } +static int xe_oa_enable_preempt_timeslice(struct xe_oa_stream *stream) +{ + struct xe_exec_queue *q = stream->exec_q; + int ret1, ret2; + + /* Best effort recovery: try to revert both to original, irrespective of error */ + ret1 = q->ops->set_timeslice(q, stream->hwe->eclass->sched_props.timeslice_us); + ret2 = q->ops->set_preempt_timeout(q, stream->hwe->eclass->sched_props.preempt_timeout_us); + if (ret1 || ret2) + goto err; + return 0; +err: + drm_dbg(&stream->oa->xe->drm, "%s failed ret1 %d ret2 %d\n", __func__, ret1, ret2); + return ret1 ?: ret2; +} + +static int xe_oa_disable_preempt_timeslice(struct xe_oa_stream *stream) +{ + struct xe_exec_queue *q = stream->exec_q; + int ret; + + /* Setting values to 0 will disable timeslice and preempt_timeout */ + ret = q->ops->set_timeslice(q, 0); + if (ret) + goto err; + + ret = q->ops->set_preempt_timeout(q, 0); + if (ret) + goto err; + + return 0; +err: + xe_oa_enable_preempt_timeslice(stream); + drm_dbg(&stream->oa->xe->drm, "%s failed %d\n", __func__, ret); + return ret; +} + static int xe_oa_enable_locked(struct xe_oa_stream *stream) { if (stream->enabled) return 0; + if (stream->no_preempt) { + int ret = xe_oa_disable_preempt_timeslice(stream); + + if (ret) + return ret; + } + xe_oa_stream_enable(stream); stream->enabled = true; @@ -1026,13 +1071,18 @@ static int xe_oa_enable_locked(struct xe_oa_stream *stream) static int xe_oa_disable_locked(struct xe_oa_stream *stream) { + int ret = 0; + if (!stream->enabled) return 0; xe_oa_stream_disable(stream); + if (stream->no_preempt) + ret = xe_oa_enable_preempt_timeslice(stream); + stream->enabled = false; - return 0; + return ret; } static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) @@ -1307,6 +1357,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, stream->sample = param->sample; stream->periodic = param->period_exponent > 0; stream->period_exponent = param->period_exponent; + stream->no_preempt = param->no_preempt; /* * For Xe2+, when overrun mode is enabled, there are no partial reports at the end @@ -1651,6 +1702,13 @@ static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, return 0; } +static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->no_preempt = value; + return 0; +} + typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, struct xe_oa_open_param *param); static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { @@ -1662,6 +1720,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, + [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, }; static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension, @@ -1766,6 +1825,15 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f if (param.exec_q && !param.sample) privileged_op = false; + if (param.no_preempt) { + if (!param.exec_q) { + drm_dbg(&oa->xe->drm, "Preemption disable without exec_q!\n"); + ret = -EINVAL; + goto err_exec_q; + } + privileged_op = true; + } + if (privileged_op && xe_perf_stream_paranoid && !perfmon_capable()) { drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe perf stream\n"); ret = -EACCES; diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 706d45577dae..540c3ec53a6d 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -235,5 +235,8 @@ struct xe_oa_stream { /** @oa_status: temporary storage for oa_status register value */ u32 oa_status; + + /** @no_preempt: Whether preemption and timeslicing is disabled for stream exec_q */ + u32 no_preempt; }; #endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index b410553faa9b..12eaa8532b5c 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1611,6 +1611,12 @@ enum drm_xe_oa_property_id { * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to 0. */ DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE, + + /** + * @DRM_XE_OA_PROPERTY_NO_PREEMPT: Allow preemption and timeslicing + * to be disabled for the stream exec queue. + */ + DRM_XE_OA_PROPERTY_NO_PREEMPT, }; /** -- cgit