aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2022-11-04 17:20:12 +1000
committerDave Airlie <airlied@redhat.com>2022-11-04 17:33:34 +1000
commit60ba8c5bd94e17ab4b024f5cecf8b48e2cf36412 (patch)
tree7e03a3b457f942c7eb3b865f535bcbe55bb72d11 /drivers/gpu/drm/i915/gt/gen8_ppgtt.c
parent441f0ec0ae1ef7350fa546e03c12cc93082e11c6 (diff)
parent8f956e9a2c9bdb22ac50c8b7656e2ea29c2e656c (diff)
Merge tag 'drm-intel-gt-next-2022-11-03' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
Driver Changes: - Fix for #7306: [Arc A380] white flickering when using arc as a secondary gpu (Matt A) - Add Wa_18017747507 for DG2 (Wayne) - Avoid spurious WARN on DG1 due to incorrect cache_dirty flag (Niranjana, Matt A) - Corrections to CS timestamp support for Gen5 and earlier (Ville) - Fix a build error used with clang compiler on hwmon (GG) - Improvements to LMEM handling with RPM (Anshuman, Matt A) - Cleanups in dmabuf code (Mike) - Selftest improvements (Matt A) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/Y2N11wu175p6qeEN@jlahtine-mobl.ger.corp.intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/gt/gen8_ppgtt.c')
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c92
1 files changed, 51 insertions, 41 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 2128b7a72a25..4daaa6f55668 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -476,6 +476,7 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
unsigned int rem = sg_dma_len(iter->sg);
u64 start = vma_res->start;
+ u64 end = start + vma_res->vma_size;
GEM_BUG_ON(!i915_vm_is_4lvl(vm));
@@ -489,9 +490,10 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
gen8_pte_t encode = pte_encode;
unsigned int page_size;
gen8_pte_t *vaddr;
- u16 index, max;
+ u16 index, max, nent, i;
max = I915_PDES;
+ nent = 1;
if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
@@ -503,25 +505,37 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
vaddr = px_vaddr(pd);
} else {
- if (encode & GEN12_PPGTT_PTE_LM) {
- GEM_BUG_ON(__gen8_pte_index(start, 0) % 16);
- GEM_BUG_ON(rem < I915_GTT_PAGE_SIZE_64K);
- GEM_BUG_ON(!IS_ALIGNED(iter->dma,
- I915_GTT_PAGE_SIZE_64K));
-
- index = __gen8_pte_index(start, 0) / 16;
- page_size = I915_GTT_PAGE_SIZE_64K;
-
- max /= 16;
-
- vaddr = px_vaddr(pd);
- vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K;
+ index = __gen8_pte_index(start, 0);
+ page_size = I915_GTT_PAGE_SIZE;
- pt->is_compact = true;
- } else {
- GEM_BUG_ON(pt->is_compact);
- index = __gen8_pte_index(start, 0);
- page_size = I915_GTT_PAGE_SIZE;
+ if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
+ /*
+ * Device local-memory on these platforms should
+ * always use 64K pages or larger (including GTT
+ * alignment), therefore if we know the whole
+ * page-table needs to be filled we can always
+ * safely use the compact-layout. Otherwise fall
+ * back to the TLB hint with PS64. If this is
+ * system memory we only bother with PS64.
+ */
+ if ((encode & GEN12_PPGTT_PTE_LM) &&
+ end - start >= SZ_2M && !index) {
+ index = __gen8_pte_index(start, 0) / 16;
+ page_size = I915_GTT_PAGE_SIZE_64K;
+
+ max /= 16;
+
+ vaddr = px_vaddr(pd);
+ vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K;
+
+ pt->is_compact = true;
+ } else if (IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
+ rem >= I915_GTT_PAGE_SIZE_64K &&
+ !(index % 16)) {
+ encode |= GEN12_PTE_PS64;
+ page_size = I915_GTT_PAGE_SIZE_64K;
+ nent = 16;
+ }
}
vaddr = px_vaddr(pt);
@@ -529,7 +543,12 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
do {
GEM_BUG_ON(rem < page_size);
- vaddr[index++] = encode | iter->dma;
+
+ for (i = 0; i < nent; i++) {
+ vaddr[index++] =
+ encode | (iter->dma + i *
+ I915_GTT_PAGE_SIZE);
+ }
start += page_size;
iter->dma += page_size;
@@ -745,6 +764,8 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K));
GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K));
+ /* XXX: we don't strictly need to use this layout */
+
if (!pt->is_compact) {
vaddr = px_vaddr(pd);
vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K;
@@ -929,29 +950,18 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
*/
ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12);
- if (HAS_LMEM(gt->i915)) {
+ if (HAS_LMEM(gt->i915))
ppgtt->vm.alloc_pt_dma = alloc_pt_lmem;
-
- /*
- * On some platforms the hw has dropped support for 4K GTT pages
- * when dealing with LMEM, and due to the design of 64K GTT
- * pages in the hw, we can only mark the *entire* page-table as
- * operating in 64K GTT mode, since the enable bit is still on
- * the pde, and not the pte. And since we still need to allow
- * 4K GTT pages for SMEM objects, we can't have a "normal" 4K
- * page-table with scratch pointing to LMEM, since that's
- * undefined from the hw pov. The simplest solution is to just
- * move the 64K scratch page to SMEM on such platforms and call
- * it a day, since that should work for all configurations.
- */
- if (HAS_64K_PAGES(gt->i915))
- ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
- else
- ppgtt->vm.alloc_scratch_dma = alloc_pt_lmem;
- } else {
+ else
ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
- ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
- }
+
+ /*
+ * Using SMEM here instead of LMEM has the advantage of not reserving
+ * high performance memory for a "never" used filler page. It also
+ * removes the device access that would be required to initialise the
+ * scratch page, reducing pressure on an even scarcer resource.
+ */
+ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
ppgtt->vm.pte_encode = gen8_pte_encode;