From 332dd0116c82a75df175a459fa69dda3f23491a7 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 24 Jan 2023 16:21:58 -0800 Subject: drm/xe: Add range based TLB invalidations If the platform supports range based TLB invalidations use them. Hide these details in the xe_gt_tlb_invalidation layer. Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: Niranjana Vishwanathapura --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 7 +-- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 87 ++++++++++++++++++++++++----- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 4 +- drivers/gpu/drm/xe/xe_pt.c | 9 ++- drivers/gpu/drm/xe/xe_vm.c | 2 +- 5 files changed, 84 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 705093cb63d7..e1a5a3a70c92 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -240,12 +240,7 @@ unlock_vm: goto retry_userptr; if (!ret) { - /* - * FIXME: Doing a full TLB invalidation for now, likely could - * defer TLB invalidate + fault response to a callback of fence - * too - */ - ret = xe_gt_tlb_invalidation(gt, NULL); + ret = xe_gt_tlb_invalidation(gt, NULL, vma); if (ret >= 0) ret = 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 9e026fd0a45d..0b37cd09a59a 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -92,16 +92,10 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) } static int send_tlb_invalidation(struct xe_guc *guc, - struct xe_gt_tlb_invalidation_fence *fence) + struct xe_gt_tlb_invalidation_fence *fence, + u32 *action, int len) { struct xe_gt *gt = guc_to_gt(guc); - u32 action[] = { - XE_GUC_ACTION_TLB_INVALIDATION, - 0, - XE_GUC_TLB_INVAL_FULL << XE_GUC_TLB_INVAL_TYPE_SHIFT | - XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | - XE_GUC_TLB_INVAL_FLUSH_CACHE, - }; int seqno; int ret; bool queue_work; @@ -125,7 +119,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, TLB_INVALIDATION_SEQNO_MAX; if (!gt->tlb_invalidation.seqno) gt->tlb_invalidation.seqno = 1; - ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action), + ret = xe_guc_ct_send_locked(&guc->ct, action, len, G2H_LEN_DW_TLB_INVALIDATE, 1); if (!ret && fence) { fence->invalidation_time = ktime_get(); @@ -146,18 +140,83 @@ static int send_tlb_invalidation(struct xe_guc *guc, * @gt: graphics tile * @fence: invalidation fence which will be signal on TLB invalidation * completion, can be NULL + * @vma: VMA to invalidate * - * Issue a full TLB invalidation on the GT. Completion of TLB is asynchronous - * and caller can either use the invalidation fence or seqno + - * xe_gt_tlb_invalidation_wait to wait for completion. + * Issue a range based TLB invalidation if supported, if not fallback to a full + * TLB invalidation. Completion of TLB is asynchronous and caller can either use + * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for + * completion. * * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, * negative error code on error. */ int xe_gt_tlb_invalidation(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence) + struct xe_gt_tlb_invalidation_fence *fence, + struct xe_vma *vma) { - return send_tlb_invalidation(>->uc.guc, fence); + struct xe_device *xe = gt_to_xe(gt); +#define MAX_TLB_INVALIDATION_LEN 7 + u32 action[MAX_TLB_INVALIDATION_LEN]; + int len = 0; + + XE_BUG_ON(!vma); + + if (!xe->info.has_range_tlb_invalidation) { + action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; + action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ +#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \ + XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \ + XE_GUC_TLB_INVAL_FLUSH_CACHE) + action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); + } else { + u64 start = vma->start; + u64 length = vma->end - vma->start + 1; + u64 align, end; + + if (length < SZ_4K) + length = SZ_4K; + + /* + * We need to invalidate a higher granularity if start address + * is not aligned to length. When start is not aligned with + * length we need to find the length large enough to create an + * address mask covering the required range. + */ + align = roundup_pow_of_two(length); + start = ALIGN_DOWN(vma->start, align); + end = ALIGN(vma->start + length, align); + length = align; + while (start + length < end) { + length <<= 1; + start = ALIGN_DOWN(vma->start, length); + } + + /* + * Minimum invalidation size for a 2MB page that the hardware + * expects is 16MB + */ + if (length >= SZ_2M) { + length = max_t(u64, SZ_16M, length); + start = ALIGN_DOWN(vma->start, length); + } + + XE_BUG_ON(length < SZ_4K); + XE_BUG_ON(!is_power_of_2(length)); + XE_BUG_ON(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1)); + XE_BUG_ON(!IS_ALIGNED(start, length)); + + action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; + action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ + action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); + action[len++] = vma->vm->usm.asid; + action[len++] = lower_32_bits(start); + action[len++] = upper_32_bits(start); + action[len++] = ilog2(length) - ilog2(SZ_4K); + } + + XE_BUG_ON(len > MAX_TLB_INVALIDATION_LEN); + + return send_tlb_invalidation(>->uc.guc, fence, action, len); } static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index 7e6fbf46f0e3..b4c4f717bc8a 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -12,11 +12,13 @@ struct xe_gt; struct xe_guc; +struct xe_vma; int xe_gt_tlb_invalidation_init(struct xe_gt *gt); void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); int xe_gt_tlb_invalidation(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence); + struct xe_gt_tlb_invalidation_fence *fence, + struct xe_vma *vma); int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index b220d1d5cfe3..cde75708d843 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1466,6 +1466,7 @@ static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { struct invalidation_fence { struct xe_gt_tlb_invalidation_fence base; struct xe_gt *gt; + struct xe_vma *vma; struct dma_fence *fence; struct dma_fence_cb cb; struct work_struct work; @@ -1505,12 +1506,13 @@ static void invalidation_fence_work_func(struct work_struct *w) container_of(w, struct invalidation_fence, work); trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base); - xe_gt_tlb_invalidation(ifence->gt, &ifence->base); + xe_gt_tlb_invalidation(ifence->gt, &ifence->base, ifence->vma); } static int invalidation_fence_init(struct xe_gt *gt, struct invalidation_fence *ifence, - struct dma_fence *fence) + struct dma_fence *fence, + struct xe_vma *vma) { int ret; @@ -1528,6 +1530,7 @@ static int invalidation_fence_init(struct xe_gt *gt, dma_fence_get(&ifence->base.base); /* Ref for caller */ ifence->fence = fence; ifence->gt = gt; + ifence->vma = vma; INIT_WORK(&ifence->work, invalidation_fence_work_func); ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); @@ -1614,7 +1617,7 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, int err; /* TLB invalidation must be done before signaling unbind */ - err = invalidation_fence_init(gt, ifence, fence); + err = invalidation_fence_init(gt, ifence, fence, vma); if (err) { dma_fence_put(fence); kfree(ifence); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 83f8c8a186d8..4fc8e24f93ce 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3349,7 +3349,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) if (xe_pt_zap_ptes(gt, vma)) { gt_needs_invalidate |= BIT(id); xe_device_wmb(xe); - seqno[id] = xe_gt_tlb_invalidation(gt, NULL); + seqno[id] = xe_gt_tlb_invalidation(gt, NULL, vma); if (seqno[id] < 0) return seqno[id]; } -- cgit