diff options
33 files changed, 879 insertions, 344 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig.unstable b/drivers/gpu/drm/i915/Kconfig.unstable index 0c2276155c2b..cf151a297ed7 100644 --- a/drivers/gpu/drm/i915/Kconfig.unstable +++ b/drivers/gpu/drm/i915/Kconfig.unstable @@ -19,11 +19,3 @@ config DRM_I915_UNSTABLE Recommended for driver developers _only_. If in the slightest bit of doubt, say "N". - -config DRM_I915_UNSTABLE_FAKE_LMEM - bool "Enable the experimental fake lmem" - depends on DRM_I915_UNSTABLE - default n - help - Convert some system memory into a fake local memory region for - testing. diff --git a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c index 0c32210bf503..934a9f9e7dab 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c +++ b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c @@ -1321,7 +1321,7 @@ tgl_get_combo_buf_trans_dp(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); if (crtc_state->port_clock > 270000) { - if (IS_TGL_U(dev_priv) || IS_TGL_Y(dev_priv)) { + if (IS_TGL_UY(dev_priv)) { return intel_get_buf_trans(&tgl_uy_combo_phy_trans_dp_hbr2, n_entries); } else { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index b9c3196b91ca..636cdf8a73b0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -497,13 +497,16 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) * memory, so just consider the start. */ reserved_total = stolen_top - reserved_base; + i915->stolen_usable_size = + resource_size(&i915->dsm) - reserved_total; + drm_dbg(&i915->drm, "Memory reserved for graphics device: %lluK, usable: %lluK\n", (u64)resource_size(&i915->dsm) >> 10, - ((u64)resource_size(&i915->dsm) - reserved_total) >> 10); + (u64)i915->stolen_usable_size >> 10); - i915->stolen_usable_size = - resource_size(&i915->dsm) - reserved_total; + if (i915->stolen_usable_size == 0) + return 0; /* Basic memrange allocator for stolen space. */ drm_mm_init(&i915->mm.stolen, 0, i915->stolen_usable_size); diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 8424ee8c5eb8..0528fe1fc9b3 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1479,6 +1479,65 @@ out: return err; } +static int igt_ppgtt_compact(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int err; + + /* + * Simple test to catch issues with compact 64K pages -- since the pt is + * compacted to 256B that gives us 32 entries per pt, however since the + * backing page for the pt is 4K, any extra entries we might incorrectly + * write out should be ignored by the HW. If ever hit such a case this + * test should catch it since some of our writes would land in scratch. + */ + + if (!HAS_64K_PAGES(i915)) { + pr_info("device lacks compact 64K page support, skipping\n"); + return 0; + } + + if (!HAS_LMEM(i915)) { + pr_info("device lacks LMEM support, skipping\n"); + return 0; + } + + /* We want the range to cover multiple page-table boundaries. */ + obj = i915_gem_object_create_lmem(i915, SZ_4M, 0); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages_unlocked(obj); + if (err) + goto out_put; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { + pr_info("LMEM compact unable to allocate huge-page(s)\n"); + goto out_unpin; + } + + /* + * Disable 2M GTT pages by forcing the page-size to 64K for the GTT + * insertion. + */ + obj->mm.page_sizes.sg = I915_GTT_PAGE_SIZE_64K; + + err = igt_write_huge(i915, obj); + if (err) + pr_err("LMEM compact write-huge failed\n"); + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + if (err == -ENOMEM) + err = 0; + + return err; +} + static int igt_tmpfs_fallback(void *arg) { struct drm_i915_private *i915 = arg; @@ -1736,6 +1795,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_tmpfs_fallback), SUBTEST(igt_ppgtt_smoke_huge), SUBTEST(igt_ppgtt_sanity_check), + SUBTEST(igt_ppgtt_compact), }; if (!HAS_PPGTT(i915)) { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index 8f28e46e8ee5..ddd0772fd828 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -40,6 +40,7 @@ struct tiled_blits { struct blit_buffer scratch; struct i915_vma *batch; u64 hole; + u64 align; u32 width; u32 height; }; @@ -411,14 +412,19 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) goto err_free; } - hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4); + t->align = i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_LOCAL); + t->align = max(t->align, + i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_SYSTEM)); + + hole_size = 2 * round_up(WIDTH * HEIGHT * 4, t->align); hole_size *= 2; /* room to maneuver */ - hole_size += 2 * I915_GTT_MIN_ALIGNMENT; + hole_size += 2 * t->align; /* padding on either side */ mutex_lock(&t->ce->vm->mutex); memset(&hole, 0, sizeof(hole)); err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole, - hole_size, 0, I915_COLOR_UNEVICTABLE, + hole_size, t->align, + I915_COLOR_UNEVICTABLE, 0, U64_MAX, DRM_MM_INSERT_BEST); if (!err) @@ -429,7 +435,7 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) goto err_put; } - t->hole = hole.start + I915_GTT_MIN_ALIGNMENT; + t->hole = hole.start + t->align; pr_info("Using hole at %llx\n", t->hole); err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng); @@ -456,7 +462,7 @@ static void tiled_blits_destroy(struct tiled_blits *t) static int tiled_blits_prepare(struct tiled_blits *t, struct rnd_state *prng) { - u64 offset = PAGE_ALIGN(t->width * t->height * 4); + u64 offset = round_up(t->width * t->height * 4, t->align); u32 *map; int err; int i; @@ -487,8 +493,7 @@ static int tiled_blits_prepare(struct tiled_blits *t, static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) { - u64 offset = - round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT); + u64 offset = round_up(t->width * t->height * 4, 2 * t->align); int err; /* We want to check position invariant tiling across GTT eviction */ @@ -501,7 +506,7 @@ static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) /* Reposition so that we overlap the old addresses, and slightly off */ err = tiled_blit(t, - &t->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT, + &t->buffers[2], t->hole + t->align, &t->buffers[1], t->hole + 3 * offset / 2); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index c43e724afa9f..f574da00eff1 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -233,6 +233,8 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, start, end, lvl); } else { unsigned int count; + unsigned int pte = gen8_pd_index(start, 0); + unsigned int num_ptes; u64 *vaddr; count = gen8_pt_count(start, end); @@ -242,10 +244,18 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, atomic_read(&pt->used)); GEM_BUG_ON(!count || count >= atomic_read(&pt->used)); + num_ptes = count; + if (pt->is_compact) { + GEM_BUG_ON(num_ptes % 16); + GEM_BUG_ON(pte % 16); + num_ptes /= 16; + pte /= 16; + } + vaddr = px_vaddr(pt); - memset64(vaddr + gen8_pd_index(start, 0), + memset64(vaddr + pte, vm->scratch[0]->encode, - count); + num_ptes); atomic_sub(count, &pt->used); start += count; @@ -453,6 +463,95 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, return idx; } +static void +xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, + struct i915_vma_resource *vma_res, + struct sgt_dma *iter, + enum i915_cache_level cache_level, + u32 flags) +{ + const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags); + unsigned int rem = sg_dma_len(iter->sg); + u64 start = vma_res->start; + + GEM_BUG_ON(!i915_vm_is_4lvl(vm)); + + do { + struct i915_page_directory * const pdp = + gen8_pdp_for_page_address(vm, start); + struct i915_page_directory * const pd = + i915_pd_entry(pdp, __gen8_pte_index(start, 2)); + struct i915_page_table *pt = + i915_pt_entry(pd, __gen8_pte_index(start, 1)); + gen8_pte_t encode = pte_encode; + unsigned int page_size; + gen8_pte_t *vaddr; + u16 index, max; + + max = I915_PDES; + + if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M && + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && + rem >= I915_GTT_PAGE_SIZE_2M && + !__gen8_pte_index(start, 0)) { + index = __gen8_pte_index(start, 1); + encode |= GEN8_PDE_PS_2M; + page_size = I915_GTT_PAGE_SIZE_2M; + + vaddr = px_vaddr(pd); + } else { + if (encode & GEN12_PPGTT_PTE_LM) { + GEM_BUG_ON(__gen8_pte_index(start, 0) % 16); + GEM_BUG_ON(rem < I915_GTT_PAGE_SIZE_64K); + GEM_BUG_ON(!IS_ALIGNED(iter->dma, + I915_GTT_PAGE_SIZE_64K)); + + index = __gen8_pte_index(start, 0) / 16; + page_size = I915_GTT_PAGE_SIZE_64K; + + max /= 16; + + vaddr = px_vaddr(pd); + vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K; + + pt->is_compact = true; + } else { + GEM_BUG_ON(pt->is_compact); + index = __gen8_pte_index(start, 0); + page_size = I915_GTT_PAGE_SIZE; + } + + vaddr = px_vaddr(pt); + } + + do { + GEM_BUG_ON(rem < page_size); + vaddr[index++] = encode | iter->dma; + + start += page_size; + iter->dma += page_size; + rem -= page_size; + if (iter->dma >= iter->max) { + iter->sg = __sg_next(iter->sg); + if (!iter->sg) + break; + + rem = sg_dma_len(iter->sg); + if (!rem) + break; + + iter->dma = sg_dma_address(iter->sg); + iter->max = iter->dma + rem; + + if (unlikely(!IS_ALIGNED(iter->dma, page_size))) + break; + } + } while (rem >= page_size && index < max); + + vma_res->page_sizes_gtt |= page_size; + } while (iter->sg && sg_dma_len(iter->sg)); +} + static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, struct i915_vma_resource *vma_res, struct sgt_dma *iter, @@ -586,7 +685,10 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm, struct sgt_dma iter = sgt_dma(vma_res); if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) { - gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags); + if (HAS_64K_PAGES(vm->i915)) + xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags); + else + gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags); } else { u64 idx = vma_res->start >> GEN8_PTE_SHIFT; @@ -613,13 +715,56 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, gen8_pdp_for_page_index(vm, idx); struct i915_page_directory *pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); + struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1)); gen8_pte_t *vaddr; - vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); + GEM_BUG_ON(pt->is_compact); + + vaddr = px_vaddr(pt); vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags); clflush_cache_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); } +static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 flags) +{ + u64 idx = offset >> GEN8_PTE_SHIFT; + struct i915_page_directory * const pdp = + gen8_pdp_for_page_index(vm, idx); + struct i915_page_directory *pd = + i915_pd_entry(pdp, gen8_pd_index(idx, 2)); + struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1)); + gen8_pte_t *vaddr; + + GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K)); + GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K)); + + if (!pt->is_compact) { + vaddr = px_vaddr(pd); + vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K; + pt->is_compact = true; + } + + vaddr = px_vaddr(pt); + vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags); +} + +static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 flags) +{ + if (flags & PTE_LM) + return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset, + level, flags); + + return gen8_ppgtt_insert_entry(vm, addr, offset, level, flags); +} + static int gen8_init_scratch(struct i915_address_space *vm) { u32 pte_flags; @@ -819,7 +964,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; ppgtt->vm.insert_entries = gen8_ppgtt_insert; - ppgtt->vm.insert_page = gen8_ppgtt_insert_entry; + if (HAS_64K_PAGES(gt->i915)) + ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry; + else + ppgtt->vm.insert_page = gen8_ppgtt_insert_entry; ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; ppgtt->vm.clear_range = gen8_ppgtt_clear; ppgtt->vm.foreach = gen8_ppgtt_foreach; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index e53008b4dd05..e855c801ba28 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -293,6 +293,46 @@ static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir) GEM_DEBUG_WARN_ON(iir); } +static u32 get_reset_domain(u8 ver, enum intel_engine_id id) +{ + u32 reset_domain; + + if (ver >= 11) { + static const u32 engine_reset_domains[] = { + [RCS0] = GEN11_GRDOM_RENDER, + [BCS0] = GEN11_GRDOM_BLT, + [VCS0] = GEN11_GRDOM_MEDIA, + [VCS1] = GEN11_GRDOM_MEDIA2, + [VCS2] = GEN11_GRDOM_MEDIA3, + [VCS3] = GEN11_GRDOM_MEDIA4, + [VCS4] = GEN11_GRDOM_MEDIA5, + [VCS5] = GEN11_GRDOM_MEDIA6, + [VCS6] = GEN11_GRDOM_MEDIA7, + [VCS7] = GEN11_GRDOM_MEDIA8, + [VECS0] = GEN11_GRDOM_VECS, + [VECS1] = GEN11_GRDOM_VECS2, + [VECS2] = GEN11_GRDOM_VECS3, + [VECS3] = GEN11_GRDOM_VECS4, + }; + GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || + !engine_reset_domains[id]); + reset_domain = engine_reset_domains[id]; + } else { + static const u32 engine_reset_domains[] = { + [RCS0] = GEN6_GRDOM_RENDER, + [BCS0] = GEN6_GRDOM_BLT, + [VCS0] = GEN6_GRDOM_MEDIA, + [VCS1] = GEN8_GRDOM_MEDIA2, + [VECS0] = GEN6_GRDOM_VECS, + }; + GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || + !engine_reset_domains[id]); + reset_domain = engine_reset_domains[id]; + } + + return reset_domain; +} + static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, u8 logical_instance) { @@ -328,38 +368,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, engine->id = id; engine->legacy_idx = INVALID_ENGINE; engine->mask = BIT(id); - if (GRAPHICS_VER(gt->i915) >= 11) { - static const u32 engine_reset_domains[] = { - [RCS0] = GEN11_GRDOM_RENDER, - [BCS0] = GEN11_GRDOM_BLT, - [VCS0] = GEN11_GRDOM_MEDIA, - [VCS1] = GEN11_GRDOM_MEDIA2, - [VCS2] = GEN11_GRDOM_MEDIA3, - [VCS3] = GEN11_GRDOM_MEDIA4, - [VCS4] = GEN11_GRDOM_MEDIA5, - [VCS5] = GEN11_GRDOM_MEDIA6, - [VCS6] = GEN11_GRDOM_MEDIA7, - [VCS7] = GEN11_GRDOM_MEDIA8, - [VECS0] = GEN11_GRDOM_VECS, - [VECS1] = GEN11_GRDOM_VECS2, - [VECS2] = GEN11_GRDOM_VECS3, - [VECS3] = GEN11_GRDOM_VECS4, - }; - GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || - !engine_reset_domains[id]); - engine->reset_domain = engine_reset_domains[id]; - } else { - static const u32 engine_reset_domains[] = { - [RCS0] = GEN6_GRDOM_RENDER, - [BCS0] = GEN6_GRDOM_BLT, - [VCS0] = GEN6_GRDOM_MEDIA, - [VCS1] = GEN8_GRDOM_MEDIA2, - [VECS0] = GEN6_GRDOM_VECS, - }; - GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || - !engine_reset_domains[id]); - engine->reset_domain = engine_reset_domains[id]; - } + engine->reset_domain = get_reset_domain(GRAPHICS_VER(gt->i915), + id); engine->i915 = i915; engine->gt = gt; engine->uncore = gt->uncore; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index e8403fa53909..ee46f933d070 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -65,8 +65,6 @@ int intel_gt_probe_lmem(struct intel_gt *gt) int err; mem = intel_gt_setup_lmem(gt); - if (mem == ERR_PTR(-ENODEV)) - mem = intel_gt_setup_fake_lmem(gt); if (IS_ERR(mem)) { err = PTR_ERR(mem); if (err == -ENODEV) @@ -913,6 +911,25 @@ u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg) return intel_uncore_read_fw(gt->uncore, reg); } +u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg) +{ + int type; + u8 sliceid, subsliceid; + + for (type = 0; type < NUM_STEERING_TYPES; type++) { + if (intel_gt_reg_needs_read_steering(gt, reg, type)) { + intel_gt_get_valid_steering(gt, type, &sliceid, + &subsliceid); + return intel_uncore_read_with_mcr_steering(gt->uncore, + reg, + sliceid, + subsliceid); + } + } + + return intel_uncore_read(gt->uncore, reg); +} + void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 2dad46c3eff2..0f571c8ee22b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -85,6 +85,7 @@ static inline bool intel_gt_needs_read_steering(struct intel_gt *gt, } u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg); +u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg); void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 49a8fb63e6e5..c548c193cd35 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -225,6 +225,18 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) GEM_BUG_ON(!vm->total); drm_mm_init(&vm->mm, 0, vm->total); + + memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT, + ARRAY_SIZE(vm->min_alignment)); + + if (HAS_64K_PAGES(vm->i915) && NEEDS_COMPACT_PT(vm->i915)) { + vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_2M; + vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_2M; + } else if (HAS_64K_PAGES(vm->i915)) { + vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K; + vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K; + } + vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; INIT_LIST_HEAD(&vm->bound_list); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 8073438b67c8..9d83c2d3959c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -29,6 +29,8 @@ #include "i915_selftest.h" #include "i915_vma_resource.h" #include "i915_vma_types.h" +#include "i915_params.h" +#include "intel_memory_region.h" #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) @@ -90,6 +92,8 @@ typedef u64 gen8_pte_t; #define GEN12_GGTT_PTE_LM BIT_ULL(1) +#define GEN12_PDE_64K BIT(6) + /* * Cacheability Control is a 4-bit value. The low three bits are stored in bits * 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE. @@ -158,6 +162,7 @@ struct i915_page_table { atomic_t used; struct i915_page_table *stash; }; + bool is_compact; }; struct i915_page_directory { @@ -195,6 +200,14 @@ void *__px_vaddr(struct drm_i915_gem_object *p); struct i915_vm_pt_stash { /* preallocated chains of page tables/directories */ struct i915_page_table *pt[2]; + /* + * Optionally override the alignment/size of the physical page that + * contains each PT. If not set defaults back to the usual + * I915_GTT_PAGE_SIZE_4K. This does not influence the other paging + * structures. MUST be a power-of-two. ONLY applicable on discrete + * platforms. + */ + int pt_sz; }; struct i915_vma_ops { @@ -223,6 +236,7 @@ struct i915_address_space { struct device *dma; u64 total; /* size addr space maps (ex. 2GB for ggtt) */ u64 reserved; /* size addr space reserved */ + u64 min_alignment[INTEL_MEMORY_STOLEN_LOCAL + 1]; unsigned int bind_async_flags; @@ -384,6 +398,25 @@ i915_vm_has_scratch_64K(struct i915_address_space *vm) return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K); } +static inline u64 i915_vm_min_alignment(struct i915_address_space *vm, + enum intel_memory_type type) +{ + /* avoid INTEL_MEMORY_MOCK overflow */ + if ((int)type >= ARRAY_SIZE(vm->min_alignment)) + type = INTEL_MEMORY_SYSTEM; + + return vm->min_alignment[type]; +} + +static inline u64 i915_vm_obj_min_alignment(struct i915_address_space *vm, + struct drm_i915_gem_object *obj) +{ + struct intel_memory_region *mr = READ_ONCE(obj->mm.region); + enum intel_memory_type type = mr ? mr->type : INTEL_MEMORY_SYSTEM; + + return i915_vm_min_alignment(vm, type); +} + static inline bool i915_vm_has_cache_coloring(struct i915_address_space *vm) { @@ -570,7 +603,7 @@ void free_scratch(struct i915_address_space *vm); struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz); struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz); -struct i915_page_table *alloc_pt(struct i915_address_space *vm); +struct i915_page_table *alloc_pt(struct i915_address_space *vm, int sz); struct i915_page_directory *alloc_pd(struct i915_address_space *vm); struct i915_page_directory *__alloc_pd(int npde); diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 18b44af56969..20444d6ceb3c 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -32,6 +32,38 @@ static bool engine_supports_migration(struct intel_engine_cs *engine) return true; } +static void xehpsdv_toggle_pdes(struct i915_address_space *vm, + struct i915_page_table *pt, + void *data) +{ + struct insert_pte_data *d = data; + + /* + * Insert a dummy PTE into every PT that will map to LMEM to ensure + * we have a correctly setup PDE structure for later use. + */ + vm->insert_page(vm, 0, d->offset, I915_CACHE_NONE, PTE_LM); + GEM_BUG_ON(!pt->is_compact); + d->offset += SZ_2M; +} + +static void xehpsdv_insert_pte(struct i915_address_space *vm, + struct i915_page_table *pt, + void *data) +{ + struct insert_pte_data *d = data; + + /* + * We are playing tricks here, since the actual pt, from the hw + * pov, is only 256bytes with 32 entries, or 4096bytes with 512 + * entries, but we are still guaranteed that the physical + * alignment is 64K underneath for the pt, and we are careful + * not to access the space in the void. + */ + vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE, PTE_LM); + d->offset += SZ_64K; +} + static void insert_pte(struct i915_address_space *vm, struct i915_page_table *pt, void *data) @@ -74,7 +106,32 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt) * i.e. within the same non-preemptible window so that we do not switch * to another migration context that overwrites the PTE. * - * TODO: Add support for huge LMEM PTEs + * This changes quite a bit on platforms with HAS_64K_PAGES support, + * where we instead have three windows, each CHUNK_SIZE in size. The + * first is reserved for mapping system-memory, and that just uses the + * 512 entry layout using 4K GTT pages. The other two windows just map + * lmem pages and must use the new compact 32 entry layout using 64K GTT + * pages, which ensures we can address any lmem object that the user + * throws at us. We then also use the xehpsdv_toggle_pdes as a way of + * just toggling the PDE bit(GEN12_PDE_64K) for us, to enable the + * compact layout for each of these page-tables, that fall within the + * [CHUNK_SIZE, 3 * CHUNK_SIZE) range. + * + * We lay the ppGTT out as: + * + * [0, CHUNK_SZ) -> first window/object, maps smem + * [CHUNK_SZ, 2 * CHUNK_SZ) -> second window/object, maps lmem src + * [2 * CHUNK_SZ, 3 * CHUNK_SZ) -> third window/object, maps lmem dst + * + * For the PTE window it's also quite different, since each PTE must + * point to some 64K page, one for each PT(since it's in lmem), and yet + * each is only <= 4096bytes, but since the unused space within that PTE + * range is never touched, this should be fine. + * + * So basically each PT now needs 64K of virtual memory, instead of 4K, + * which looks like: + * + * [3 * CHUNK_SZ, 3 * CHUNK_SZ + ((3 * CHUNK_SZ / SZ_2M) * SZ_64K)] -> PTE */ vm = i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY); @@ -86,6 +143,9 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt) goto err_vm; } + if (HAS_64K_PAGES(gt->i915)) + stash.pt_sz = I915_GTT_PAGE_SIZE_64K; + /* * Each engine instance is assigned its own chunk in the VM, so * that we can run multiple instances concurrently @@ -105,14 +165,20 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt) * We copy in 8MiB chunks. Each PDE covers 2MiB, so we need * 4x2 page directories for source/destination. */ - sz = 2 * CHUNK_SZ; + if (HAS_64K_PAGES(gt->i915)) + sz = 3 * CHUNK_SZ; + else + sz = 2 * CHUNK_SZ; d.offset = base + sz; /* * We need another page directory setup so that we can write * the 8x512 PTE in each chunk. */ - sz += (sz >> 12) * sizeof(u64); + if (HAS_64K_PAGES(gt->i915)) + sz += (sz / SZ_2M) * SZ_64K; + else + sz += (sz >> 12) * sizeof(u64); err = i915_vm_alloc_pt_stash(&vm->vm, &stash, sz); if (err) @@ -133,7 +199,18 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt) goto err_vm; /* Now allow the GPU to rewrite the PTE via its own ppGTT */ - vm->vm.foreach(&vm->vm, base, d.offset - base, insert_pte, &d); + if (HAS_64K_PAGES(gt->i915)) { + vm->vm.foreach(&vm->vm, base, d.offset - base, + xehpsdv_insert_pte, &d); + d.offset = base + CHUNK_SZ; + vm->vm.foreach(&vm->vm, + d.offset, + 2 * CHUNK_SZ, + xehpsdv_toggle_pdes, &d); + } else { + vm->vm.foreach(&vm->vm, base, d.offset - base, + insert_pte, &d); + } } return &vm->vm; @@ -269,19 +346,38 @@ static int emit_pte(struct i915_request *rq, u64 offset, int length) { + bool has_64K_pages = HAS_64K_PAGES(rq->engine->i915); const u64 encode = rq->context->vm->pte_encode(0, cache_level, is_lmem ? PTE_LM : 0); struct intel_ring *ring = rq->ring; - int total = 0; + int pkt, dword_length; + u32 total = 0; + u32 page_size; u32 *hdr, *cs; - int pkt; GEM_BUG_ON(GRAPHICS_VER(rq->engine->i915) < 8); + page_size = I915_GTT_PAGE_SIZE; + dword_length = 0x400; + /* Compute the page directory offset for the target address range */ - offset >>= 12; - offset *= sizeof(u64); - offset += 2 * CHUNK_SZ; + if (has_64K_pages) { + GEM_BUG_ON(!IS_ALIGNED(offset, SZ_2M)); + + offset /= SZ_2M; + offset *= SZ_64K; + offset += 3 * CHUNK_SZ; + + if (is_lmem) { + page_size = I915_GTT_PAGE_SIZE_64K; + dword_length = 0x40; + } + } else { + offset >>= 12; + offset *= sizeof(u64); + offset += 2 * CHUNK_SZ; + } + offset += (u64)rq->engine->instance << 32; cs = intel_ring_begin(rq, 6); @@ -289,7 +385,7 @@ static int emit_pte(struct i915_request *rq, return PTR_ERR(cs); /* Pack as many PTE updates as possible into a single MI command */ - pkt = min_t(int, 0x400, ring->space / sizeof(u32) + 5); + pkt = min_t(int, dword_length, ring->space / sizeof(u32) + 5); pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); hdr = cs; @@ -299,6 +395,8 @@ static int emit_pte(struct i915_request *rq, do { if (cs - hdr >= pkt) { + int dword_rem; + *hdr += cs - hdr - 2; *cs++ = MI_NOOP; @@ -310,7 +408,18 @@ static int emit_pte(struct i915_request *rq, if (IS_ERR(cs)) return PTR_ERR(cs); - pkt = min_t(int, 0x400, ring->space / sizeof(u32) + 5); + dword_rem = dword_length; + if (has_64K_pages) { + if (IS_ALIGNED(total, SZ_2M)) { + offset = round_up(offset, SZ_64K); + } else { + dword_rem = SZ_2M - (total & (SZ_2M - 1)); + dword_rem /= page_size; + dword_rem *= 2; + } + } + + pkt = min_t(int, dword_rem, ring->space / sizeof(u32) + 5); pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); hdr = cs; @@ -319,13 +428,15 @@ static int emit_pte(struct i915_request *rq, *cs++ = upper_32_bits(offset); } + GEM_BUG_ON(!IS_ALIGNED(it->dma, page_size)); + *cs++ = lower_32_bits(encode | it->dma); *cs++ = upper_32_bits(encode | it->dma); offset += 8; - total += I915_GTT_PAGE_SIZE; + total += page_size; - it->dma += I915_GTT_PAGE_SIZE; + it->dma += page_size; if (it->dma >= it->max) { it->sg = __sg_next(it->sg); if (!it->sg || sg_dma_len(it->sg) == 0) @@ -356,7 +467,8 @@ static bool wa_1209644611_applies(int ver, u32 size) return height % 4 == 3 && height <= 8; } -static int emit_copy(struct i915_request *rq, int size) +static int emit_copy(struct i915_request *rq, + u32 dst_offset, u32 src_offset, int size) { const int ver = GRAPHICS_VER(rq->engine->i915); u32 instance = rq->engine->instance; @@ -371,31 +483,31 @@ static int emit_copy(struct i915_request *rq, int size) *cs++ = BLT_DEPTH_32 | PAGE_SIZE; *cs++ = 0; *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cs++ = CHUNK_SZ; /* dst offset */ + *cs++ = dst_offset; *cs++ = instance; *cs++ = 0; *cs++ = PAGE_SIZE; - *cs++ = 0; /* src offset */ + *cs++ = src_offset; *cs++ = instance; } else if (ver >= 8) { *cs++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2); *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; *cs++ = 0; *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cs++ = CHUNK_SZ; /* dst offset */ + *cs++ = dst_offset; *cs++ = instance; *cs++ = 0; *cs++ = PAGE_SIZE; - *cs++ = 0; /* src offset */ + *cs++ = src_offset; *cs++ = instance; } else { GEM_BUG_ON(instance); *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE; - *cs++ = CHUNK_SZ; /* dst offset */ + *cs++ = dst_offset; *cs++ = PAGE_SIZE; - *cs++ = 0; /* src offset */ + *cs++ = src_offset; } intel_ring_advance(rq, cs); @@ -423,6 +535,7 @@ intel_context_migrate_copy(struct intel_context *ce, GEM_BUG_ON(ce->ring->size < SZ_64K); do { + u32 src_offset, dst_offset; int len; rq = i915_request_create(ce); @@ -450,15 +563,28 @@ intel_context_migrate_copy(struct intel_context *ce, if (err) goto out_rq; - len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, 0, - CHUNK_SZ); + src_offset = 0; + dst_offset = CHUNK_SZ; + if (HAS_64K_PAGES(ce->engine->i915)) { + GEM_BUG_ON(!src_is_lmem && !dst_is_lmem); + + src_offset = 0; + dst_offset = 0; + if (src_is_lmem) + src_offset = CHUNK_SZ; + if (dst_is_lmem) + dst_offset = 2 * CHUNK_SZ; + } + + len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, + src_offset, CHUNK_SZ); if (len <= 0) { err = len; goto out_rq; } err = emit_pte(rq, &it_dst, dst_cache_level, dst_is_lmem, - CHUNK_SZ, len); + dst_offset, len); if (err < 0) goto out_rq; if (err < len) { @@ -470,7 +596,7 @@ intel_context_migrate_copy(struct intel_context *ce, if (err) goto out_rq; - err = emit_copy(rq, len); + err = emit_copy(rq, dst_offset, src_offset, len); /* Arbitration is re-enabled between requests. */ out_rq: @@ -488,14 +614,15 @@ out_ce: return err; } -static int emit_clear(struct i915_request *rq, int size, u32 value) +static int emit_clear(struct i915_request *rq, u64 offset, int size, u32 value) { const int ver = GRAPHICS_VER(rq->engine->i915); - u32 instance = rq->engine->instance; u32 *cs; GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); + offset += (u64)rq->engine->instance << 32; + cs = intel_ring_begin(rq, ver >= 8 ? 8 : 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -505,17 +632,17 @@ static int emit_clear(struct i915_request *rq, int size, u32 value) *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; *cs++ = 0; *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cs++ = 0; /* offset */ - *cs++ = instance; + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); *cs++ = value; *cs++ = MI_NOOP; } else { - GEM_BUG_ON(instance); + GEM_BUG_ON(upper_32_bits(offset)); *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; *cs++ = 0; *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cs++ = 0; + *cs++ = lower_32_bits(offset); *cs++ = value; } @@ -542,6 +669,7 @@ intel_context_migrate_clear(struct intel_context *ce, GEM_BUG_ON(ce->ring->size < SZ_64K); do { + u32 offset; int len; rq = i915_request_create(ce); @@ -569,7 +697,11 @@ intel_context_migrate_clear(struct intel_context *ce, if (err) goto out_rq; - len = emit_pte(rq, &it, cache_level, is_lmem, 0, CHUNK_SZ); + offset = 0; + if (HAS_64K_PAGES(ce->engine->i915) && is_lmem) + offset = CHUNK_SZ; + + len = emit_pte(rq, &it, cache_level, is_lmem, offset, CHUNK_SZ); if (len <= 0) { err = len; goto out_rq; @@ -579,7 +711,7 @@ intel_context_migrate_clear(struct intel_context *ce, if (err) goto out_rq; - err = emit_clear(rq, len, value); + err = emit_clear(rq, offset, len, value); /* Arbitration is re-enabled between requests. */ out_rq: diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index 48e6e2f87700..d91e2beb7517 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -12,7 +12,7 @@ #include "gen6_ppgtt.h" #include "gen8_ppgtt.h" -struct i915_page_table *alloc_pt(struct i915_address_space *vm) +struct i915_page_table *alloc_pt(struct i915_address_space *vm, int sz) { struct i915_page_table *pt; @@ -20,12 +20,13 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm) if (unlikely(!pt)) return ERR_PTR(-ENOMEM); - pt->base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); + pt->base = vm->alloc_pt_dma(vm, sz); if (IS_ERR(pt->base)) { kfree(pt); return ERR_PTR(-ENOMEM); } + pt->is_compact = false; atomic_set(&pt->used, 0); return pt; } @@ -220,17 +221,25 @@ int i915_vm_alloc_pt_stash(struct i915_address_space *vm, u64 size) { unsigned long count; - int shift, n; + int shift, n, pt_sz; shift = vm->pd_shift; if (!shift) return 0; + pt_sz = stash->pt_sz; + if (!pt_sz) + pt_sz = I915_GTT_PAGE_SIZE_4K; + else + GEM_BUG_ON(!IS_DGFX(vm->i915)); + + GEM_BUG_ON(!is_power_of_2(pt_sz)); + count = pd_count(size, shift); while (count--) { struct i915_page_table *pt; - pt = alloc_pt(vm); + pt = alloc_pt(vm, pt_sz); if (IS_ERR(pt)) { i915_vm_free_pt_stash(vm, stash); return PTR_ERR(pt); diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index a04e0cf4a94b..fc00888ca4b2 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -14,60 +14,6 @@ #include "gt/intel_gt.h" #include "gt/intel_gt_regs.h" -static int init_fake_lmem_bar(struct intel_memory_region *mem) -{ - struct drm_i915_private *i915 = mem->i915; - struct i915_ggtt *ggtt = to_gt(i915)->ggtt; - unsigned long n; - int ret; - - /* We want to 1:1 map the mappable aperture to our reserved region */ - - mem->fake_mappable.start = 0; - mem->fake_mappable.size = resource_size(&mem->region); - mem->fake_mappable.color = I915_COLOR_UNEVICTABLE; - - ret = drm_mm_reserve_node(&ggtt->vm.mm, &mem->fake_mappable); - if (ret) - return ret; - - mem->remap_addr = dma_map_resource(i915->drm.dev, - mem->region.start, - mem->fake_mappable.size, - DMA_BIDIRECTIONAL, - DMA_ATTR_FORCE_CONTIGUOUS); - if (dma_mapping_error(i915->drm.dev, mem->remap_addr)) { - drm_mm_remove_node(&mem->fake_mappable); - return -EINVAL; - } - - for (n = 0; n < mem->fake_mappable.size >> PAGE_SHIFT; ++n) { - ggtt->vm.insert_page(&ggtt->vm, - mem->remap_addr + (n << PAGE_SHIFT), - n << PAGE_SHIFT, - I915_CACHE_NONE, 0); - } - - mem->region = (struct resource)DEFINE_RES_MEM(mem->remap_addr, - mem->fake_mappable.size); - - return 0; -} - -static void release_fake_lmem_bar(struct intel_memory_region *mem) -{ - if (!drm_mm_node_allocated(&mem->fake_mappable)) - return; - - drm_mm_remove_node(&mem->fake_mappable); - - dma_unmap_resource(mem->i915->drm.dev, - mem->remap_addr, - mem->fake_mappable.size, - DMA_BIDIRECTIONAL, - DMA_ATTR_FORCE_CONTIGUOUS); -} - static int region_lmem_release(struct intel_memory_region *mem) { @@ -75,7 +21,6 @@ region_lmem_release(struct intel_memory_region *mem) ret = intel_region_ttm_fini(mem); io_mapping_fini(&mem->iomap); - release_fake_lmem_bar(mem); return ret; } @@ -85,17 +30,10 @@ region_lmem_init(struct intel_memory_region *mem) { int ret; - if (mem->i915->params.fake_lmem_start) { - ret = init_fake_lmem_bar(mem); - GEM_BUG_ON(ret); - } - if (!io_mapping_init_wc(&mem->iomap, mem->io_start, - resource_size(&mem->region))) { - ret = -EIO; - goto out_no_io; - } + resource_size(&mem->region))) + return -EIO; ret = intel_region_ttm_init(mem); if (ret) @@ -105,8 +43,6 @@ region_lmem_init(struct intel_memory_region *mem) out_no_buddy: io_mapping_fini(&mem->iomap); -out_no_io: - release_fake_lmem_bar(mem); return ret; } @@ -117,50 +53,6 @@ static const struct intel_memory_region_ops intel_region_lmem_ops = { .init_object = __i915_gem_ttm_object_init, }; -struct intel_memory_region * -intel_gt_setup_fake_lmem(struct intel_gt *gt) -{ - struct drm_i915_private *i915 = gt->i915; - struct pci_dev *pdev = to_pci_dev(i915->drm.dev); - struct intel_memory_region *mem; - resource_size_t mappable_end; - resource_size_t io_start; - resource_size_t start; - - if (!HAS_LMEM(i915)) - return ERR_PTR(-ENODEV); - - if (!i915->params.fake_lmem_start) - return ERR_PTR(-ENODEV); - - GEM_BUG_ON(i915_ggtt_has_aperture(to_gt(i915)->ggtt)); - - /* Your mappable aperture belongs to me now! */ - mappable_end = pci_resource_len(pdev, 2); - io_start = pci_resource_start(pdev, 2); - start = i915->params.fake_lmem_start; - - mem = intel_memory_region_create(i915, - start, - mappable_end, - PAGE_SIZE, - io_start, - INTEL_MEMORY_LOCAL, - 0, - &intel_region_lmem_ops); - if (!IS_ERR(mem)) { - drm_info(&i915->drm, "Intel graphics fake LMEM: %pR\n", - &mem->region); - drm_info(&i915->drm, - "Intel graphics fake LMEM IO start: %llx\n", - (u64)mem->io_start); - drm_info(&i915->drm, "Intel graphics fake LMEM size: %llx\n", - (u64)resource_size(&mem->region)); - } - - return mem; -} - static bool get_legacy_lowmem_region(struct intel_uncore *uncore, u64 *start, u32 *size) { @@ -207,8 +99,29 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) if (!IS_DGFX(i915)) return ERR_PTR(-ENODEV); - /* Stolen starts from GSMBASE on DG1 */ - lmem_size = intel_uncore_read64(uncore, GEN12_GSMBASE); + if (HAS_FLAT_CCS(i915)) { + u64 tile_stolen, flat_ccs_base; + + lmem_size = pci_resource_len(pdev, 2); + flat_ccs_base = intel_gt_read_register(gt, XEHPSDV_FLAT_CCS_BASE_ADDR); + flat_ccs_base = (flat_ccs_base >> XEHPSDV_CCS_BASE_SHIFT) * SZ_64K; + + if (GEM_WARN_ON(lmem_size < flat_ccs_base)) + return ERR_PTR(-ENODEV); + + tile_stolen = lmem_size - flat_ccs_base; + + /* If the FLAT_CCS_BASE_ADDR register is not populated, flag an error */ + if (tile_stolen == lmem_size) + drm_err(&i915->drm, + "CCS_BASE_ADDR register did not have expected value\n"); + + lmem_size -= tile_stolen; + } else { + /* Stolen starts from GSMBASE without CCS */ + lmem_size = intel_uncore_read64(&i915->uncore, GEN12_GSMBASE); + } + io_start = pci_resource_start(pdev, 2); if (GEM_WARN_ON(lmem_size > pci_resource_len(pdev, 2))) diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.h b/drivers/gpu/drm/i915/gt/intel_region_lmem.h index 062d0542ae34..1438576b527a 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.h +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.h @@ -10,7 +10,4 @@ struct intel_gt; struct intel_memory_region *intel_gt_setup_lmem(struct intel_gt *gt); -struct intel_memory_region * -intel_gt_setup_fake_lmem(struct intel_gt *gt); - #endif /* !__INTEL_REGION_LMEM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index fd95449ed46d..c8124101aada 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1486,7 +1486,7 @@ void intel_rps_enable(struct intel_rps *rps) if (has_busy_stats(rps)) intel_rps_set_timer(rps); - else if (GRAPHICS_VER(i915) >= 6) + else if (GRAPHICS_VER(i915) >= 6 && GRAPHICS_VER(i915) <= 11) intel_rps_set_interrupts(rps); else /* Ironlake currently uses intel_ips.ko */ {} diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 26038066e90b..b3067aed7f3e 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -683,12 +683,6 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_16013271637:dg2 */ wa_masked_en(wal, SLICE_COMMON_ECO_CHICKEN1, MSC_MSAA_REODER_BUF_BYPASS_DISABLE); - - /* Wa_22012532006:dg2 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) - wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, - DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); } static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, @@ -1440,10 +1434,6 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) } if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) { - /* Wa_14010680813:dg2_g10 */ - wa_write_or(wal, GEN12_GAMSTLB_CTRL, CONTROL_BLOCK_CLKGATE_DIS | - EGRESS_BLOCK_CLKGATE_DIS | TAG_BLOCK_CLKGATE_DIS); - /* Wa_14010948348:dg2_g10 */ wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS); @@ -1490,16 +1480,6 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) wa_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS); } - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) { - /* Wa_14012362059:dg2 */ - wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); - } - - /* Wa_1509235366:dg2 */ - wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | - GLOBAL_INVALIDATION_MODE); - /* Wa_14014830051:dg2 */ wa_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); @@ -1508,14 +1488,7 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) * recommended tuning settings documented in the bspec's * performance guide section. */ - wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS); - - /* Wa_18018781329:dg2 */ - wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); } static void @@ -2049,6 +2022,23 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_DG2(i915)) { /* Wa_14015227452:dg2 */ wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); + + /* Wa_1509235366:dg2 */ + wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | + GLOBAL_INVALIDATION_MODE); + + /* + * The following are not actually "workarounds" but rather + * recommended tuning settings documented in the bspec's + * performance guide section. + */ + wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); + + /* Wa_18018781329:dg2 */ + wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); } if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { @@ -2149,6 +2139,24 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY); + /* Wa_22012532006:dg2 */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || + IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) + wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, + DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { + /* Wa_14010680813:dg2_g10 */ + wa_write_or(wal, GEN12_GAMSTLB_CTRL, CONTROL_BLOCK_CLKGATE_DIS | + EGRESS_BLOCK_CLKGATE_DIS | TAG_BLOCK_CLKGATE_DIS); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) || + IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { + /* Wa_14012362059:dg2 */ + wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); + } + if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) { /* diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index b3d28b003b73..ac749ab11035 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -11,6 +11,7 @@ #include "intel_mchbar_regs.h" #include "gt/intel_gt.h" #include "gt/intel_gt_regs.h" +#include "gt/intel_rps.h" static inline struct intel_guc *slpc_to_guc(struct intel_guc_slpc *slpc) { @@ -115,7 +116,7 @@ static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id) { u32 request[] = { GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, - SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 2), + SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1), id, }; @@ -579,10 +580,10 @@ static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc) static void slpc_get_rp_values(struct intel_guc_slpc *slpc) { + struct intel_rps *rps = &slpc_to_gt(slpc)->rps; u32 rp_state_cap; - rp_state_cap = intel_uncore_read(slpc_to_gt(slpc)->uncore, - GEN6_RP_STATE_CAP); + rp_state_cap = intel_rps_read_state_cap(rps); slpc->rp0_freq = REG_FIELD_GET(RP0_CAP_MASK, rp_state_cap) * GT_FREQUENCY_MULTIPLIER; diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 1c67ff735f18..62b3f332bbf5 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -835,21 +835,6 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!i915->params.nuclear_pageflip && match_info->graphics.ver < 5) i915->drm.driver_features &= ~DRIVER_ATOMIC; - /* - * Check if we support fake LMEM -- for now we only unleash this for - * the live selftests(test-and-exit). - */ -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) - if (IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)) { - if (GRAPHICS_VER(i915) >= 9 && i915_selftest.live < 0 && - i915->params.fake_lmem_start) { - mkwrite_device_info(i915)->memory_regions = - REGION_SMEM | REGION_LMEM | REGION_STOLEN_SMEM; - GEM_BUG_ON(!HAS_LMEM(i915)); - } - } -#endif - ret = pci_enable_device(pdev); if (ret) goto out_fini; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f600d1cb01b3..51417e9b740f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1147,11 +1147,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_ICL_WITH_PORT_F(dev_priv) \ IS_SUBPLATFORM(dev_priv, INTEL_ICELAKE, INTEL_SUBPLATFORM_PORTF) -#define IS_TGL_U(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_ULT) - -#define IS_TGL_Y(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_ULX) +#define IS_TGL_UY(dev_priv) \ + IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_UY) #define IS_SKL_GRAPHICS_STEP(p, since, until) (IS_SKYLAKE(p) && IS_GRAPHICS_STEP(p, since, until)) @@ -1170,11 +1167,11 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, IS_DISPLAY_STEP(__i915, since, until)) #define IS_TGL_UY_GRAPHICS_STEP(__i915, since, until) \ - ((IS_TGL_U(__i915) || IS_TGL_Y(__i915)) && \ + (IS_TGL_UY(__i915) && \ IS_GRAPHICS_STEP(__i915, since, until)) #define IS_TGL_GRAPHICS_STEP(__i915, since, until) \ - (IS_TIGERLAKE(__i915) && !(IS_TGL_U(__i915) || IS_TGL_Y(__i915)) && \ + (IS_TIGERLAKE(__i915) && !IS_TGL_UY(__i915)) && \ IS_GRAPHICS_STEP(__i915, since, until)) #define IS_RKL_DISPLAY_STEP(p, since, until) \ @@ -1340,17 +1337,28 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, /* * Set this flag, when platform requires 64K GTT page sizes or larger for - * device local memory access. Also this flag implies that we require or - * at least support the compact PT layout for the ppGTT when using the 64K - * GTT pages. + * device local memory access. */ #define HAS_64K_PAGES(dev_priv) (INTEL_INFO(dev_priv)->has_64k_pages) +/* + * Set this flag when platform doesn't allow both 64k pages and 4k pages in + * the same PT. this flag means we need to support compact PT layout for the + * ppGTT when using the 64K GTT pages. + */ +#define NEEDS_COMPACT_PT(dev_priv) (INTEL_INFO(dev_priv)->needs_compact_pt) + #define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc) #define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i)) #define HAS_LMEM(i915) HAS_REGION(i915, REGION_LMEM) +/* + * Platform has the dedicated compression control state for each lmem surfaces + * stored in lmem to support the 3D and media compression formats. + */ +#define HAS_FLAT_CCS(dev_priv) (INTEL_INFO(dev_priv)->has_flat_ccs) + #define HAS_GT_UC(dev_priv) (INTEL_INFO(dev_priv)->has_gt_uc) #define HAS_POOLED_EU(dev_priv) (INTEL_INFO(dev_priv)->has_pooled_eu) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 525ae832aa9a..eea355c2fc28 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -195,11 +195,6 @@ i915_param_named(enable_gvt, bool, 0400, "Enable support for Intel GVT-g graphics virtualization host support(default:false)"); #endif -#if IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM) -i915_param_named_unsafe(fake_lmem_start, ulong, 0400, - "Fake LMEM start offset (default: 0)"); -#endif - #if CONFIG_DRM_I915_REQUEST_TIMEOUT i915_param_named_unsafe(request_timeout_ms, uint, 0600, "Default request/fence/batch buffer expiration timeout."); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index c9d53ff910a0..c779a6f85c7e 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -72,7 +72,6 @@ struct drm_printer; param(int, fastboot, -1, 0600) \ param(int, enable_dpcd_backlight, -1, 0600) \ param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE, 0400) \ - param(unsigned long, fake_lmem_start, 0, IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM) ? 0400 : 0) \ param(unsigned int, request_timeout_ms, CONFIG_DRM_I915_REQUEST_TIMEOUT, CONFIG_DRM_I915_REQUEST_TIMEOUT ? 0600 : 0) \ /* leave bools at the end to not create holes */ \ param(bool, enable_hangcheck, true, 0600) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 8246cbe9b01d..c32c0c6661c8 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1005,6 +1005,7 @@ static const struct intel_device_info adl_p_info = { XE_HP_PAGE_SIZES, \ .dma_mask_size = 46, \ .has_64bit_reloc = 1, \ + .has_flat_ccs = 1, \ .has_global_mocs = 1, \ .has_gt_uc = 1, \ .has_llc = 1, \ @@ -1030,6 +1031,7 @@ static const struct intel_device_info xehpsdv_info = { PLATFORM(INTEL_XEHPSDV), .display = { }, .has_64k_pages = 1, + .needs_compact_pt = 1, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VECS1) | BIT(VECS2) | BIT(VECS3) | @@ -1049,6 +1051,7 @@ static const struct intel_device_info dg2_info = { PLATFORM(INTEL_DG2), .has_guc_deprivilege = 1, .has_64k_pages = 1, + .needs_compact_pt = 1, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VECS1) | diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 00fb40029f43..0a9c3fcc09b1 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4374,6 +4374,10 @@ void i915_perf_init(struct drm_i915_private *i915) /* XXX const struct i915_perf_ops! */ + /* i915_perf is not enabled for DG2 yet */ + if (IS_DG2(i915)) + return; + perf->oa_formats = oa_formats; if (IS_HASWELL(i915)) { perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2b8a3086ed35..cfd569684fa7 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8460,6 +8460,20 @@ enum skl_power_gate { #define SGGI_DIS REG_BIT(15) #define SGR_DIS REG_BIT(13) +#define XEHPSDV_FLAT_CCS_BASE_ADDR _MMIO(0x4910) +#define XEHPSDV_CCS_BASE_SHIFT 8 + +/* gamt regs */ +#define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4) +#define GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW 0x67F1427F /* max/min for LRA1/2 */ +#define GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV 0x5FF101FF /* max/min for LRA1/2 */ +#define GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL 0x67F1427F /* " " */ +#define GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT 0x5FF101FF /* " " */ + +#define MMCD_MISC_CTRL _MMIO(0x4ddc) /* skl+ */ +#define MMCD_PCLA (1 << 31) +#define MMCD_HOTSPOT_EN (1 << 27) + #define _ICL_PHY_MISC_A 0x64C00 #define _ICL_PHY_MISC_B 0x64C04 #define _DG2_PHY_MISC_TC1 0x64C14 /* TC1="PHY E" but offset as if "PHY F" */ @@ -8823,12 +8837,6 @@ enum skl_power_gate { #define DSB_ENABLE (1 << 31) #define DSB_STATUS (1 << 0) -#define TGL_ROOT_DEVICE_ID 0x9A00 -#define TGL_ROOT_DEVICE_MASK 0xFF00 -#define TGL_ROOT_DEVICE_SKU_MASK 0xF -#define TGL_ROOT_DEVICE_SKU_ULX 0x2 -#define TGL_ROOT_DEVICE_SKU_ULT 0x4 - #define CLKREQ_POLICY _MMIO(0x101038) #define CLKREQ_POLICY_MEM_UP_OVRD REG_BIT(1) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 845cd88f8313..3558b16a929c 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -757,6 +757,14 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE); GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); + alignment = max(alignment, i915_vm_obj_min_alignment(vma->vm, vma->obj)); + /* + * for compact-pt we round up the reservation to prevent + * any smaller pages being used within the same PDE + */ + if (NEEDS_COMPACT_PT(vma->vm->i915)) + size = round_up(size, alignment); + /* If binding the object/GGTT view requires more space than the entire * aperture has, reject it early before evicting everything in a vain * attempt to find space. @@ -769,6 +777,7 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, } color = 0; + if (i915_vm_has_cache_coloring(vma->vm)) color = vma->obj->cache_level; diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 94da5aa37391..32c5f10e31db 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -170,6 +170,10 @@ static const u16 subplatform_portf_ids[] = { INTEL_ICL_PORT_F_IDS(0), }; +static const u16 subplatform_uy_ids[] = { + INTEL_TGL_12_GT2_IDS(0), +}; + static const u16 subplatform_n_ids[] = { INTEL_ADLN_IDS(0), }; @@ -214,6 +218,9 @@ void intel_device_info_subplatform_init(struct drm_i915_private *i915) } else if (find_devid(devid, subplatform_portf_ids, ARRAY_SIZE(subplatform_portf_ids))) { mask = BIT(INTEL_SUBPLATFORM_PORTF); + } else if (find_devid(devid, subplatform_uy_ids, + ARRAY_SIZE(subplatform_uy_ids))) { + mask = BIT(INTEL_SUBPLATFORM_UY); } else if (find_devid(devid, subplatform_n_ids, ARRAY_SIZE(subplatform_n_ids))) { mask = BIT(INTEL_SUBPLATFORM_N); @@ -222,25 +229,6 @@ void intel_device_info_subplatform_init(struct drm_i915_private *i915) mask = BIT(INTEL_SUBPLATFORM_RPL_S); } - if (IS_TIGERLAKE(i915)) { - struct pci_dev *root, *pdev = to_pci_dev(i915->drm.dev); - - root = list_first_entry(&pdev->bus->devices, typeof(*root), bus_list); - - drm_WARN_ON(&i915->drm, mask); - drm_WARN_ON(&i915->drm, (root->device & TGL_ROOT_DEVICE_MASK) != - TGL_ROOT_DEVICE_ID); - - switch (root->device & TGL_ROOT_DEVICE_SKU_MASK) { - case TGL_ROOT_DEVICE_SKU_ULX: - mask = BIT(INTEL_SUBPLATFORM_ULX); - break; - case TGL_ROOT_DEVICE_SKU_ULT: - mask = BIT(INTEL_SUBPLATFORM_ULT); - break; - } - } - GEM_BUG_ON(mask & ~INTEL_SUBPLATFORM_MASK); RUNTIME_INFO(i915)->platform_mask[pi] |= mask; diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 27dcfe6f2429..291215d9da28 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -106,6 +106,9 @@ enum intel_platform { /* ICL */ #define INTEL_SUBPLATFORM_PORTF (0) +/* TGL */ +#define INTEL_SUBPLATFORM_UY (0) + /* DG2 */ #define INTEL_SUBPLATFORM_G10 0 #define INTEL_SUBPLATFORM_G11 1 @@ -131,8 +134,10 @@ enum intel_ppgtt_type { /* Keep has_* in alphabetical order */ \ func(has_64bit_reloc); \ func(has_64k_pages); \ + func(needs_compact_pt); \ func(gpu_reset_clobbers_display); \ func(has_reset_engine); \ + func(has_flat_ccs); \ func(has_global_mocs); \ func(has_gt_uc); \ func(has_guc_deprivilege); \ diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index c70d7e286a51..16ab62d605b1 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -97,10 +97,14 @@ static int iomemtest(struct intel_memory_region *mem, bool test_all, const void *caller) { - resource_size_t last = resource_size(&mem->region) - PAGE_SIZE; - resource_size_t page; + resource_size_t last, page; int err; + if (resource_size(&mem->region) < PAGE_SIZE) + return 0; + + last = resource_size(&mem->region) - PAGE_SIZE; + /* * Quick test to check read/write access to the iomap (backing store). * diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 5625c9c38993..06464b8865fc 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -67,9 +67,6 @@ struct intel_memory_region { struct io_mapping iomap; struct resource region; - /* For fake LMEM */ - struct drm_mm_node fake_mappable; - resource_size_t io_start; resource_size_t min_page_size; resource_size_t total; @@ -81,8 +78,6 @@ struct intel_memory_region { char name[16]; bool private; /* not for userspace */ - dma_addr_t remap_addr; - struct { struct mutex lock; /* Protects access to objects */ struct list_head list; diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index ac1a796b2808..4fd69ecd1481 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -165,7 +165,7 @@ void intel_step_init(struct drm_i915_private *i915) } else if (IS_ROCKETLAKE(i915)) { revids = rkl_revids; size = ARRAY_SIZE(rkl_revids); - } else if (IS_TGL_U(i915) || IS_TGL_Y(i915)) { + } else if (IS_TGL_UY(i915)) { revids = tgl_uy_revids; size = ARRAY_SIZE(tgl_uy_revids); } else if (IS_TIGERLAKE(i915)) { diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index e7e6c4b2c81d..ab751192eb3b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -27,9 +27,11 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_internal.h" +#include "gem/i915_gem_region.h" #include "gem/selftests/mock_context.h" #include "gt/intel_context.h" #include "gt/intel_gpu_commands.h" +#include "gt/intel_gtt.h" #include "i915_random.h" #include "i915_selftest.h" @@ -239,6 +241,8 @@ static int lowlevel_hole(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time) { + const unsigned int min_alignment = + i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM); I915_RND_STATE(seed_prng); struct i915_vma_resource *mock_vma_res; unsigned int size; @@ -252,9 +256,10 @@ static int lowlevel_hole(struct i915_address_space *vm, I915_RND_SUBSTATE(prng, seed_prng); struct drm_i915_gem_object *obj; unsigned int *order, count, n; - u64 hole_size; + u64 hole_size, aligned_size; - hole_size = (hole_end - hole_start) >> size; + aligned_size = max_t(u32, ilog2(min_alignment), size); + hole_size = (hole_end - hole_start) >> aligned_size; if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) hole_size = KMALLOC_MAX_SIZE / sizeof(u32); count = hole_size >> 1; @@ -275,8 +280,8 @@ static int lowlevel_hole(struct i915_address_space *vm, } GEM_BUG_ON(!order); - GEM_BUG_ON(count * BIT_ULL(size) > vm->total); - GEM_BUG_ON(hole_start + count * BIT_ULL(size) > hole_end); + GEM_BUG_ON(count * BIT_ULL(aligned_size) > vm->total); + GEM_BUG_ON(hole_start + count * BIT_ULL(aligned_size) > hole_end); /* Ignore allocation failures (i.e. don't report them as * a test failure) as we are purposefully allocating very @@ -299,10 +304,10 @@ static int lowlevel_hole(struct i915_address_space *vm, } for (n = 0; n < count; n++) { - u64 addr = hole_start + order[n] * BIT_ULL(size); + u64 addr = hole_start + order[n] * BIT_ULL(aligned_size); intel_wakeref_t wakeref; - GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); + GEM_BUG_ON(addr + BIT_ULL(aligned_size) > vm->total); if (igt_timeout(end_time, "%s timed out before %d/%d\n", @@ -345,7 +350,7 @@ alloc_vm_end: } mock_vma_res->bi.pages = obj->mm.pages; - mock_vma_res->node_size = BIT_ULL(size); + mock_vma_res->node_size = BIT_ULL(aligned_size); mock_vma_res->start = addr; with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref) @@ -356,7 +361,7 @@ alloc_vm_end: i915_random_reorder(order, count, &prng); for (n = 0; n < count; n++) { - u64 addr = hole_start + order[n] * BIT_ULL(size); + u64 addr = hole_start + order[n] * BIT_ULL(aligned_size); intel_wakeref_t wakeref; GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); @@ -400,8 +405,10 @@ static int fill_hole(struct i915_address_space *vm, { const u64 hole_size = hole_end - hole_start; struct drm_i915_gem_object *obj; + const unsigned int min_alignment = + i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM); const unsigned long max_pages = - min_t(u64, ULONG_MAX - 1, hole_size/2 >> PAGE_SHIFT); + min_t(u64, ULONG_MAX - 1, (hole_size / 2) >> ilog2(min_alignment)); const unsigned long max_step = max(int_sqrt(max_pages), 2UL); unsigned long npages, prime, flags; struct i915_vma *vma; @@ -442,14 +449,17 @@ static int fill_hole(struct i915_address_space *vm, offset = p->offset; list_for_each_entry(obj, &objects, st_link) { + u64 aligned_size = round_up(obj->base.size, + min_alignment); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) continue; if (p->step < 0) { - if (offset < hole_start + obj->base.size) + if (offset < hole_start + aligned_size) break; - offset -= obj->base.size; + offset -= aligned_size; } err = i915_vma_pin(vma, 0, 0, offset | flags); @@ -471,22 +481,25 @@ static int fill_hole(struct i915_address_space *vm, i915_vma_unpin(vma); if (p->step > 0) { - if (offset + obj->base.size > hole_end) + if (offset + aligned_size > hole_end) break; - offset += obj->base.size; + offset += aligned_size; } } offset = p->offset; list_for_each_entry(obj, &objects, st_link) { + u64 aligned_size = round_up(obj->base.size, + min_alignment); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) continue; if (p->step < 0) { - if (offset < hole_start + obj->base.size) + if (offset < hole_start + aligned_size) break; - offset -= obj->base.size; + offset -= aligned_size; } if (!drm_mm_node_allocated(&vma->node) || @@ -507,22 +520,25 @@ static int fill_hole(struct i915_address_space *vm, } if (p->step > 0) { - if (offset + obj->base.size > hole_end) + if (offset + aligned_size > hole_end) break; - offset += obj->base.size; + offset += aligned_size; } } offset = p->offset; list_for_each_entry_reverse(obj, &objects, st_link) { + u64 aligned_size = round_up(obj->base.size, + min_alignment); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) continue; if (p->step < 0) { - if (offset < hole_start + obj->base.size) + if (offset < hole_start + aligned_size) break; - offset -= obj->base.size; + offset -= aligned_size; } err = i915_vma_pin(vma, 0, 0, offset | flags); @@ -544,22 +560,25 @@ static int fill_hole(struct i915_address_space *vm, i915_vma_unpin(vma); if (p->step > 0) { - if (offset + obj->base.size > hole_end) + if (offset + aligned_size > hole_end) break; - offset += obj->base.size; + offset += aligned_size; } } offset = p->offset; list_for_each_entry_reverse(obj, &objects, st_link) { + u64 aligned_size = round_up(obj->base.size, + min_alignment); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) continue; if (p->step < 0) { - if (offset < hole_start + obj->base.size) + if (offset < hole_start + aligned_size) break; - offset -= obj->base.size; + offset -= aligned_size; } if (!drm_mm_node_allocated(&vma->node) || @@ -580,9 +599,9 @@ static int fill_hole(struct i915_address_space *vm, } if (p->step > 0) { - if (offset + obj->base.size > hole_end) + if (offset + aligned_size > hole_end) break; - offset += obj->base.size; + offset += aligned_size; } } } @@ -612,6 +631,7 @@ static int walk_hole(struct i915_address_space *vm, const u64 hole_size = hole_end - hole_start; const unsigned long max_pages = min_t(u64, ULONG_MAX - 1, hole_size >> PAGE_SHIFT); + unsigned long min_alignment; unsigned long flags; u64 size; @@ -621,6 +641,8 @@ static int walk_hole(struct i915_address_space *vm, if (i915_is_ggtt(vm)) flags |= PIN_GLOBAL; + min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM); + for_each_prime_number_from(size, 1, max_pages) { struct drm_i915_gem_object *obj; struct i915_vma *vma; @@ -639,7 +661,7 @@ static int walk_hole(struct i915_address_space *vm, for (addr = hole_start; addr + obj->base.size < hole_end; - addr += obj->base.size) { + addr += round_up(obj->base.size, min_alignment)) { err = i915_vma_pin(vma, 0, 0, addr | flags); if (err) { pr_err("%s bind failed at %llx + %llx [hole %llx- %llx] with err=%d\n", @@ -691,6 +713,7 @@ static int pot_hole(struct i915_address_space *vm, { struct drm_i915_gem_object *obj; struct i915_vma *vma; + unsigned int min_alignment; unsigned long flags; unsigned int pot; int err = 0; @@ -699,6 +722,8 @@ static int pot_hole(struct i915_address_space *vm, if (i915_is_ggtt(vm)) flags |= PIN_GLOBAL; + min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM); + obj = i915_gem_object_create_internal(vm->i915, 2 * I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -711,13 +736,13 @@ static int pot_hole(struct i915_address_space *vm, /* Insert a pair of pages across every pot boundary within the hole */ for (pot = fls64(hole_end - 1) - 1; - pot > ilog2(2 * I915_GTT_PAGE_SIZE); + pot > ilog2(2 * min_alignment); pot--) { u64 step = BIT_ULL(pot); u64 addr; - for (addr = round_up(hole_start + I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE; - addr <= round_down(hole_end - 2*I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE; + for (addr = round_up(hole_start + min_alignment, step) - min_alignment; + addr <= round_down(hole_end - (2 * min_alignment), step) - min_alignment; addr += step) { err = i915_vma_pin(vma, 0, 0, addr | flags); if (err) { @@ -762,6 +787,7 @@ static int drunk_hole(struct i915_address_space *vm, unsigned long end_time) { I915_RND_STATE(prng); + unsigned int min_alignment; unsigned int size; unsigned long flags; @@ -769,15 +795,18 @@ static int drunk_hole(struct i915_address_space *vm, if (i915_is_ggtt(vm)) flags |= PIN_GLOBAL; + min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM); + /* Keep creating larger objects until one cannot fit into the hole */ for (size = 12; (hole_end - hole_start) >> size; size++) { struct drm_i915_gem_object *obj; unsigned int *order, count, n; struct i915_vma *vma; - u64 hole_size; + u64 hole_size, aligned_size; int err = -ENODEV; - hole_size = (hole_end - hole_start) >> size; + aligned_size = max_t(u32, ilog2(min_alignment), size); + hole_size = (hole_end - hole_start) >> aligned_size; if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) hole_size = KMALLOC_MAX_SIZE / sizeof(u32); count = hole_size >> 1; @@ -817,7 +846,7 @@ static int drunk_hole(struct i915_address_space *vm, GEM_BUG_ON(vma->size != BIT_ULL(size)); for (n = 0; n < count; n++) { - u64 addr = hole_start + order[n] * BIT_ULL(size); + u64 addr = hole_start + order[n] * BIT_ULL(aligned_size); err = i915_vma_pin(vma, 0, 0, addr | flags); if (err) { @@ -869,11 +898,14 @@ static int __shrink_hole(struct i915_address_space *vm, { struct drm_i915_gem_object *obj; unsigned long flags = PIN_OFFSET_FIXED | PIN_USER; + unsigned int min_alignment; unsigned int order = 12; LIST_HEAD(objects); int err = 0; u64 addr; + min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM); + /* Keep creating larger objects until one cannot fit into the hole */ for (addr = hole_start; addr < hole_end; ) { struct i915_vma *vma; @@ -914,7 +946,7 @@ static int __shrink_hole(struct i915_address_space *vm, } i915_vma_unpin(vma); - addr += size; + addr += round_up(size, min_alignment); /* * Since we are injecting allocation faults at random intervals, @@ -1038,6 +1070,118 @@ err_purge: return err; } +static int misaligned_case(struct i915_address_space *vm, struct intel_memory_region *mr, + u64 addr, u64 size, unsigned long flags) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err = 0; + u64 expected_vma_size, expected_node_size; + bool is_stolen = mr->type == INTEL_MEMORY_STOLEN_SYSTEM || + mr->type == INTEL_MEMORY_STOLEN_LOCAL; + + obj = i915_gem_object_create_region(mr, size, 0, 0); + if (IS_ERR(obj)) { + /* if iGVT-g or DMAR is active, stolen mem will be uninitialized */ + if (PTR_ERR(obj) == -ENODEV && is_stolen) + return 0; + return PTR_ERR(obj); + } + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_put; + } + + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) + goto err_put; + i915_vma_unpin(vma); + + if (!drm_mm_node_allocated(&vma->node)) { + err = -EINVAL; + goto err_put; + } + + if (i915_vma_misplaced(vma, 0, 0, addr | flags)) { + err = -EINVAL; + goto err_put; + } + + expected_vma_size = round_up(size, 1 << (ffs(vma->resource->page_sizes_gtt) - 1)); + expected_node_size = expected_vma_size; + + if (NEEDS_COMPACT_PT(vm->i915) && i915_gem_object_is_lmem(obj)) { + /* compact-pt should expand lmem node to 2MB */ + expected_vma_size = round_up(size, I915_GTT_PAGE_SIZE_64K); + expected_node_size = round_up(size, I915_GTT_PAGE_SIZE_2M); + } + + if (vma->size != expected_vma_size || vma->node.size != expected_node_size) { + err = i915_vma_unbind_unlocked(vma); + err = -EBADSLT; + goto err_put; + } + + err = i915_vma_unbind_unlocked(vma); + if (err) + goto err_put; + + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); + +err_put: + i915_gem_object_put(obj); + cleanup_freed_objects(vm->i915); + return err; +} + +static int misaligned_pin(struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + struct intel_memory_region *mr; + enum intel_region_id id; + unsigned long flags = PIN_OFFSET_FIXED | PIN_USER; + int err = 0; + u64 hole_size = hole_end - hole_start; + + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + for_each_memory_region(mr, vm->i915, id) { + u64 min_alignment = i915_vm_min_alignment(vm, (enum intel_memory_type)id); + u64 size = min_alignment; + u64 addr = round_down(hole_start + (hole_size / 2), min_alignment); + + /* avoid -ENOSPC on very small hole setups */ + if (hole_size < 3 * min_alignment) + continue; + + /* we can't test < 4k alignment due to flags being encoded in lower bits */ + if (min_alignment != I915_GTT_PAGE_SIZE_4K) { + err = misaligned_case(vm, mr, addr + (min_alignment / 2), size, flags); + /* misaligned should error with -EINVAL*/ + if (!err) + err = -EBADSLT; + if (err != -EINVAL) + return err; + } + + /* test for vma->size expansion to min page size */ + err = misaligned_case(vm, mr, addr, PAGE_SIZE, flags); + if (err) + return err; + + /* test for intermediate size not expanding vma->size for large alignments */ + err = misaligned_case(vm, mr, addr, size / 2, flags); + if (err) + return err; + } + + return 0; +} + static int exercise_ppgtt(struct drm_i915_private *dev_priv, int (*func)(struct i915_address_space *vm, u64 hole_start, u64 hole_end, @@ -1107,6 +1251,11 @@ static int igt_ppgtt_shrink_boom(void *arg) return exercise_ppgtt(arg, shrink_boom); } +static int igt_ppgtt_misaligned_pin(void *arg) +{ + return exercise_ppgtt(arg, misaligned_pin); +} + static int sort_holes(void *priv, const struct list_head *A, const struct list_head *B) { @@ -1179,6 +1328,11 @@ static int igt_ggtt_lowlevel(void *arg) return exercise_ggtt(arg, lowlevel_hole); } +static int igt_ggtt_misaligned_pin(void *arg) +{ + return exercise_ggtt(arg, misaligned_pin); +} + static int igt_ggtt_page(void *arg) { const unsigned int count = PAGE_SIZE/sizeof(u32); @@ -2151,12 +2305,14 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ppgtt_fill), SUBTEST(igt_ppgtt_shrink), SUBTEST(igt_ppgtt_shrink_boom), + SUBTEST(igt_ppgtt_misaligned_pin), SUBTEST(igt_ggtt_lowlevel), SUBTEST(igt_ggtt_drunk), SUBTEST(igt_ggtt_walk), SUBTEST(igt_ggtt_pot), SUBTEST(igt_ggtt_fill), SUBTEST(igt_ggtt_page), + SUBTEST(igt_ggtt_misaligned_pin), SUBTEST(igt_cs_tlb), }; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 914ebd9290e5..05c3642aaece 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1118,10 +1118,16 @@ struct drm_i915_gem_exec_object2 { /** * When the EXEC_OBJECT_PINNED flag is specified this is populated by * the user with the GTT offset at which this object will be pinned. + * * When the I915_EXEC_NO_RELOC flag is specified this must contain the * presumed_offset of the object. + * * During execbuffer2 the kernel populates it with the value of the * current GTT offset of the object, for future presumed_offset writes. + * + * See struct drm_i915_gem_create_ext for the rules when dealing with + * alignment restrictions with I915_MEMORY_CLASS_DEVICE, on devices with + * minimum page sizes, like DG2. */ __u64 offset; @@ -3144,11 +3150,40 @@ struct drm_i915_gem_create_ext { * * The (page-aligned) allocated size for the object will be returned. * - * Note that for some devices we have might have further minimum - * page-size restrictions(larger than 4K), like for device local-memory. - * However in general the final size here should always reflect any - * rounding up, if for example using the I915_GEM_CREATE_EXT_MEMORY_REGIONS - * extension to place the object in device local-memory. + * + * DG2 64K min page size implications: + * + * On discrete platforms, starting from DG2, we have to contend with GTT + * page size restrictions when dealing with I915_MEMORY_CLASS_DEVICE + * objects. Specifically the hardware only supports 64K or larger GTT + * page sizes for such memory. The kernel will already ensure that all + * I915_MEMORY_CLASS_DEVICE memory is allocated using 64K or larger page + * sizes underneath. + * + * Note that the returned size here will always reflect any required + * rounding up done by the kernel, i.e 4K will now become 64K on devices + * such as DG2. + * + * Special DG2 GTT address alignment requirement: + * + * The GTT alignment will also need to be at least 2M for such objects. + * + * Note that due to how the hardware implements 64K GTT page support, we + * have some further complications: + * + * 1) The entire PDE (which covers a 2MB virtual address range), must + * contain only 64K PTEs, i.e mixing 4K and 64K PTEs in the same + * PDE is forbidden by the hardware. + * + * 2) We still need to support 4K PTEs for I915_MEMORY_CLASS_SYSTEM + * objects. + * + * To keep things simple for userland, we mandate that any GTT mappings + * must be aligned to and rounded up to 2MB. The kernel will internally + * pad them out to the next 2MB boundary. As this only wastes virtual + * address space and avoids userland having to copy any needlessly + * complicated PDE sharing scheme (coloring) and only affects DG2, this + * is deemed to be a good compromise. */ __u64 size; /** |