diff options
author | Dave Airlie <airlied@redhat.com> | 2022-08-18 08:45:45 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2022-08-18 08:45:49 +1000 |
commit | 8ae4be56dd0c2b851e7edc0f1d5cd441f88481c0 (patch) | |
tree | b6a8cd9507c9087e0e3cad94a67a51a16f066af6 /drivers/gpu/drm/i915/gt | |
parent | 568035b01cfb107af8d2e4bd2fb9aea22cf5b868 (diff) | |
parent | 9d50bff40e3e366886ec37299fc317edf84be0c9 (diff) |
Merge tag 'drm-intel-next-fixes-2022-08-11' of git://anongit.freedesktop.org/drm/drm-intel into drm-fixes
- disable pci resize on 32-bit systems (Nirmoy)
- don't leak the ccs state (Matt)
- TLB invalidation fixes (Chris)
[now with all fixes of fixes]
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/YvVumNCga+90fYN0@intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_gt.c | 77 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_gt.h | 12 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_gt_pm.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_gt_types.h | 18 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_migrate.c | 23 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_ppgtt.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_region_lmem.c | 4 |
7 files changed, 123 insertions, 22 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 68c2b0d8f187..f435e06125aa 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -11,7 +11,9 @@ #include "pxp/intel_pxp.h" #include "i915_drv.h" +#include "i915_perf_oa_regs.h" #include "intel_context.h" +#include "intel_engine_pm.h" #include "intel_engine_regs.h" #include "intel_ggtt_gmch.h" #include "intel_gt.h" @@ -36,8 +38,6 @@ static void __intel_gt_init_early(struct intel_gt *gt) { spin_lock_init(>->irq_lock); - mutex_init(>->tlb_invalidate_lock); - INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); @@ -48,6 +48,8 @@ static void __intel_gt_init_early(struct intel_gt *gt) intel_gt_init_reset(gt); intel_gt_init_requests(gt); intel_gt_init_timelines(gt); + mutex_init(>->tlb.invalidate_lock); + seqcount_mutex_init(>->tlb.seqno, >->tlb.invalidate_lock); intel_gt_pm_init_early(gt); intel_uc_init_early(>->uc); @@ -768,6 +770,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915) intel_gt_fini_requests(gt); intel_gt_fini_reset(gt); intel_gt_fini_timelines(gt); + mutex_destroy(>->tlb.invalidate_lock); intel_engines_free(gt); } } @@ -906,7 +909,7 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, return rb; } -void intel_gt_invalidate_tlbs(struct intel_gt *gt) +static void mmio_invalidate_full(struct intel_gt *gt) { static const i915_reg_t gen8_regs[] = { [RENDER_CLASS] = GEN8_RTCR, @@ -924,13 +927,11 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) struct drm_i915_private *i915 = gt->i915; struct intel_uncore *uncore = gt->uncore; struct intel_engine_cs *engine; + intel_engine_mask_t awake, tmp; enum intel_engine_id id; const i915_reg_t *regs; unsigned int num = 0; - if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) - return; - if (GRAPHICS_VER(i915) == 12) { regs = gen12_regs; num = ARRAY_SIZE(gen12_regs); @@ -945,28 +946,41 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) "Platform does not implement TLB invalidation!")) return; - GEM_TRACE("\n"); - - assert_rpm_wakelock_held(&i915->runtime_pm); - - mutex_lock(>->tlb_invalidate_lock); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */ + awake = 0; for_each_engine(engine, gt, id) { struct reg_and_bit rb; + if (!intel_engine_pm_is_awake(engine)) + continue; + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); if (!i915_mmio_reg_offset(rb.reg)) continue; intel_uncore_write_fw(uncore, rb.reg, rb.bit); + awake |= engine->mask; } + GT_TRACE(gt, "invalidated engines %08x\n", awake); + + /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */ + if (awake && + (IS_TIGERLAKE(i915) || + IS_DG1(i915) || + IS_ROCKETLAKE(i915) || + IS_ALDERLAKE_S(i915) || + IS_ALDERLAKE_P(i915))) + intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1); + spin_unlock_irq(&uncore->lock); - for_each_engine(engine, gt, id) { + for_each_engine_masked(engine, gt, awake, tmp) { + struct reg_and_bit rb; + /* * HW architecture suggest typical invalidation time at 40us, * with pessimistic cases up to 100us and a recommendation to @@ -974,12 +988,8 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) */ const unsigned int timeout_us = 100; const unsigned int timeout_ms = 4; - struct reg_and_bit rb; rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); - if (!i915_mmio_reg_offset(rb.reg)) - continue; - if (__intel_wait_for_register_fw(uncore, rb.reg, rb.bit, 0, timeout_us, timeout_ms, @@ -996,5 +1006,38 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) * transitions. */ intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL); - mutex_unlock(>->tlb_invalidate_lock); +} + +static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno) +{ + u32 cur = intel_gt_tlb_seqno(gt); + + /* Only skip if a *full* TLB invalidate barrier has passed */ + return (s32)(cur - ALIGN(seqno, 2)) > 0; +} + +void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno) +{ + intel_wakeref_t wakeref; + + if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) + return; + + if (intel_gt_is_wedged(gt)) + return; + + if (tlb_seqno_passed(gt, seqno)) + return; + + with_intel_gt_pm_if_awake(gt, wakeref) { + mutex_lock(>->tlb.invalidate_lock); + if (tlb_seqno_passed(gt, seqno)) + goto unlock; + + mmio_invalidate_full(gt); + + write_seqcount_invalidate(>->tlb.seqno); +unlock: + mutex_unlock(>->tlb.invalidate_lock); + } } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 82d6f248d876..40b06adf509a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -101,6 +101,16 @@ void intel_gt_info_print(const struct intel_gt_info *info, void intel_gt_watchdog_work(struct work_struct *work); -void intel_gt_invalidate_tlbs(struct intel_gt *gt); +static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt) +{ + return seqprop_sequence(>->tlb.seqno); +} + +static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt) +{ + return intel_gt_tlb_seqno(gt) | 1; +} + +void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno); #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index bc898df7a48c..a334787a4939 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -55,6 +55,9 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt) for (tmp = 1, intel_gt_pm_get(gt); tmp; \ intel_gt_pm_put(gt), tmp = 0) +#define with_intel_gt_pm_if_awake(gt, wf) \ + for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0) + static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) { return intel_wakeref_wait_for_idle(>->wakeref); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index df708802889d..3804a583382b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -11,6 +11,7 @@ #include <linux/llist.h> #include <linux/mutex.h> #include <linux/notifier.h> +#include <linux/seqlock.h> #include <linux/spinlock.h> #include <linux/types.h> #include <linux/workqueue.h> @@ -83,7 +84,22 @@ struct intel_gt { struct intel_uc uc; struct intel_gsc gsc; - struct mutex tlb_invalidate_lock; + struct { + /* Serialize global tlb invalidations */ + struct mutex invalidate_lock; + + /* + * Batch TLB invalidations + * + * After unbinding the PTE, we need to ensure the TLB + * are invalidated prior to releasing the physical pages. + * But we only need one such invalidation for all unbinds, + * so we track how many TLB invalidations have been + * performed since unbind the PTE and only emit an extra + * invalidate if no full barrier has been passed. + */ + seqcount_mutex_t seqno; + } tlb; struct i915_wa_list wa_list; diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 2c35324b5f68..2b10b96b17b5 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -708,7 +708,7 @@ intel_context_migrate_copy(struct intel_context *ce, u8 src_access, dst_access; struct i915_request *rq; int src_sz, dst_sz; - bool ccs_is_src; + bool ccs_is_src, overwrite_ccs; int err; GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); @@ -749,6 +749,8 @@ intel_context_migrate_copy(struct intel_context *ce, get_ccs_sg_sgt(&it_ccs, bytes_to_cpy); } + overwrite_ccs = HAS_FLAT_CCS(i915) && !ccs_bytes_to_cpy && dst_is_lmem; + src_offset = 0; dst_offset = CHUNK_SZ; if (HAS_64K_PAGES(ce->engine->i915)) { @@ -852,6 +854,25 @@ intel_context_migrate_copy(struct intel_context *ce, if (err) goto out_rq; ccs_bytes_to_cpy -= ccs_sz; + } else if (overwrite_ccs) { + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto out_rq; + + /* + * While we can't always restore/manage the CCS state, + * we still need to ensure we don't leak the CCS state + * from the previous user, so make sure we overwrite it + * with something. + */ + err = emit_copy_ccs(rq, dst_offset, INDIRECT_ACCESS, + dst_offset, DIRECT_ACCESS, len); + if (err) + goto out_rq; + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto out_rq; } /* Arbitration is re-enabled between requests. */ diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index d8b94d638559..6ee8d1127016 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -206,8 +206,12 @@ void ppgtt_bind_vma(struct i915_address_space *vm, void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma_resource *vma_res) { - if (vma_res->allocated) - vm->clear_range(vm, vma_res->start, vma_res->vma_size); + if (!vma_res->allocated) + return; + + vm->clear_range(vm, vma_res->start, vma_res->vma_size); + if (vma_res->tlb) + vma_invalidate_tlb(vm, vma_res->tlb); } static unsigned long pd_count(u64 size, int shift) diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index 6e90032e12e9..aa6aed837194 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -15,6 +15,7 @@ #include "gt/intel_gt_mcr.h" #include "gt/intel_gt_regs.h" +#ifdef CONFIG_64BIT static void _release_bars(struct pci_dev *pdev) { int resno; @@ -111,6 +112,9 @@ static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t pci_assign_unassigned_bus_resources(pdev->bus); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); } +#else +static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t lmem_size) {} +#endif static int region_lmem_release(struct intel_memory_region *mem) |