From ce7e75c7ef1bf8ea3d947da8c674d2f40fd7d734 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 28 Jul 2021 12:21:00 -0700 Subject: drm/i915: Disable bonding on gen12+ platforms Disable bonding on gen12+ platforms aside from ones already supported by the i915 - TGL, RKL, and ADL-S. Signed-off-by: Matthew Brost Reviewed-by: John Harrison Acked-by: Daniel Vetter Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210728192100.132425-1-matthew.brost@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index cff72679ad7c..dbaeb924a437 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -442,6 +442,13 @@ set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data) u16 idx, num_bonds; int err, n; + if (GRAPHICS_VER(i915) >= 12 && !IS_TIGERLAKE(i915) && + !IS_ROCKETLAKE(i915) && !IS_ALDERLAKE_S(i915)) { + drm_dbg(&i915->drm, + "Bonding on gen12+ aside from TGL, RKL, and ADL_S not supported\n"); + return -ENODEV; + } + if (get_user(idx, &ext->virtual_index)) return -EFAULT; -- cgit From bc33e71f00a7491810cac9e1335ca97e889d5620 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 28 Jul 2021 22:41:13 -0700 Subject: drm/i915: correct name of GT forcewake domain in error messages For historical reasons, the GT forcewake domain used to be referred to as the "blitter" domain; that name is no longer accurate since the GT domain contains a lot of additional registers and functionality besides just the blitter. Although we renamed the domain in the driver in commit 55e3c170950f ("drm/i915: Rename FORCEWAKE_BLITTER to FORCEWAKE_GT"), we neglected to update the string that gets printed in driver error messages; let's do that now to avoid confusion. Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210729054118.2458523-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 0c35acfcd6da..13d069823635 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -64,7 +64,7 @@ static void mmio_debug_resume(struct intel_uncore_mmio_debug *mmio_debug) static const char * const forcewake_domain_names[] = { "render", - "blitter", + "gt", "media", "vdbox0", "vdbox1", -- cgit From 39afa4104bedf214e5779ef20655665723ad48cd Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 28 Jul 2021 22:41:14 -0700 Subject: drm/i915: Re-use gen11 forcewake read functions on gen12 The forcewake read logic is identical between gen11 and gen12, only the forcewake table data (which is tracked separately) differs; there's no need to generate a separate set of gen12 read functions when the gen11 functions will work just as well. We'll keep the separate write functions for now since the generated code directly references different shadow tables between the two platforms. Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210729054118.2458523-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 13d069823635..9ed7ce71e520 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -945,9 +945,6 @@ static const struct intel_forcewake_range __vlv_fw_ranges[] = { #define __gen11_fwtable_reg_read_fw_domains(uncore, offset) \ find_fw_domain(uncore, offset) -#define __gen12_fwtable_reg_read_fw_domains(uncore, offset) \ - find_fw_domain(uncore, offset) - /* *Must* be sorted by offset! See intel_shadow_table_check(). */ static const i915_reg_t gen8_shadowed_regs[] = { RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ @@ -1644,7 +1641,6 @@ __gen_read(func, 16) \ __gen_read(func, 32) \ __gen_read(func, 64) -__gen_reg_read_funcs(gen12_fwtable); __gen_reg_read_funcs(gen11_fwtable); __gen_reg_read_funcs(fwtable); __gen_reg_read_funcs(gen6); @@ -2122,7 +2118,7 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) } else if (GRAPHICS_VER(i915) >= 12) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen12_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, gen12_fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); } else if (GRAPHICS_VER(i915) == 11) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen11_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen11_fwtable); -- cgit From f9d56cd64ef3186d6ce072751f7f44dcd189f6bc Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 29 Jul 2021 08:21:58 -0700 Subject: drm/i915: Make shadow tables range-based Rather than defining our shadow tables as a list of individual registers, provide them as a list of register ranges; we'll have some ranges of multiple registers being added soon (and we already have a couple adjacent registers that we can squash into a single range now). This change also defines the table with hex literal values rather than symbolic register names; since that's how the tables are defined in the bspec, this change will make it easier to review the tables overall. v2: - Force signed comparison on range overlap sanity check Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210729152158.2646246-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 13 +-- drivers/gpu/drm/i915/intel_uncore.c | 160 +++++++++++++------------- drivers/gpu/drm/i915/intel_uncore.h | 6 + drivers/gpu/drm/i915/selftests/intel_uncore.c | 32 ++++-- 4 files changed, 108 insertions(+), 103 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index aae609d7d85d..e9b8af9f08ea 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2067,12 +2067,7 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine) wa_list_apply(engine->gt, &engine->wa_list); } -struct mcr_range { - u32 start; - u32 end; -}; - -static const struct mcr_range mcr_ranges_gen8[] = { +static const struct i915_range mcr_ranges_gen8[] = { { .start = 0x5500, .end = 0x55ff }, { .start = 0x7000, .end = 0x7fff }, { .start = 0x9400, .end = 0x97ff }, @@ -2081,7 +2076,7 @@ static const struct mcr_range mcr_ranges_gen8[] = { {}, }; -static const struct mcr_range mcr_ranges_gen12[] = { +static const struct i915_range mcr_ranges_gen12[] = { { .start = 0x8150, .end = 0x815f }, { .start = 0x9520, .end = 0x955f }, { .start = 0xb100, .end = 0xb3ff }, @@ -2090,7 +2085,7 @@ static const struct mcr_range mcr_ranges_gen12[] = { {}, }; -static const struct mcr_range mcr_ranges_xehp[] = { +static const struct i915_range mcr_ranges_xehp[] = { { .start = 0x4000, .end = 0x4aff }, { .start = 0x5200, .end = 0x52ff }, { .start = 0x5400, .end = 0x7fff }, @@ -2109,7 +2104,7 @@ static const struct mcr_range mcr_ranges_xehp[] = { static bool mcr_range(struct drm_i915_private *i915, u32 offset) { - const struct mcr_range *mcr_ranges; + const struct i915_range *mcr_ranges; int i; if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 9ed7ce71e520..52601b960248 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -946,101 +946,95 @@ static const struct intel_forcewake_range __vlv_fw_ranges[] = { find_fw_domain(uncore, offset) /* *Must* be sorted by offset! See intel_shadow_table_check(). */ -static const i915_reg_t gen8_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(GEN6_BSD_RING_BASE), /* 0x12000 (base) */ - RING_TAIL(VEBOX_RING_BASE), /* 0x1a000 (base) */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ +static const struct i915_range gen8_shadowed_regs[] = { + { .start = 0x2030, .end = 0x2030 }, + { .start = 0xA008, .end = 0xA00C }, + { .start = 0x12030, .end = 0x12030 }, + { .start = 0x1a030, .end = 0x1a030 }, + { .start = 0x22030, .end = 0x22030 }, /* TODO: Other registers are not yet used */ }; -static const i915_reg_t gen11_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ - RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ - RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ - RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ - RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ - RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ - RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ - RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ +static const struct i915_range gen11_shadowed_regs[] = { + { .start = 0x2030, .end = 0x2030 }, + { .start = 0x2550, .end = 0x2550 }, + { .start = 0xA008, .end = 0xA00C }, + { .start = 0x22030, .end = 0x22030 }, + { .start = 0x22550, .end = 0x22550 }, + { .start = 0x1C0030, .end = 0x1C0030 }, + { .start = 0x1C0550, .end = 0x1C0550 }, + { .start = 0x1C4030, .end = 0x1C4030 }, + { .start = 0x1C4550, .end = 0x1C4550 }, + { .start = 0x1C8030, .end = 0x1C8030 }, + { .start = 0x1C8550, .end = 0x1C8550 }, + { .start = 0x1D0030, .end = 0x1D0030 }, + { .start = 0x1D0550, .end = 0x1D0550 }, + { .start = 0x1D4030, .end = 0x1D4030 }, + { .start = 0x1D4550, .end = 0x1D4550 }, + { .start = 0x1D8030, .end = 0x1D8030 }, + { .start = 0x1D8550, .end = 0x1D8550 }, /* TODO: Other registers are not yet used */ }; -static const i915_reg_t gen12_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ - RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ - RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ - RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ - RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ - RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ - RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ - RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ +static const struct i915_range gen12_shadowed_regs[] = { + { .start = 0x2030, .end = 0x2030 }, + { .start = 0x2550, .end = 0x2550 }, + { .start = 0xA008, .end = 0xA00C }, + { .start = 0x22030, .end = 0x22030 }, + { .start = 0x22550, .end = 0x22550 }, + { .start = 0x1C0030, .end = 0x1C0030 }, + { .start = 0x1C0550, .end = 0x1C0550 }, + { .start = 0x1C4030, .end = 0x1C4030 }, + { .start = 0x1C4550, .end = 0x1C4550 }, + { .start = 0x1C8030, .end = 0x1C8030 }, + { .start = 0x1C8550, .end = 0x1C8550 }, + { .start = 0x1D0030, .end = 0x1D0030 }, + { .start = 0x1D0550, .end = 0x1D0550 }, + { .start = 0x1D4030, .end = 0x1D4030 }, + { .start = 0x1D4550, .end = 0x1D4550 }, + { .start = 0x1D8030, .end = 0x1D8030 }, + { .start = 0x1D8550, .end = 0x1D8550 }, /* TODO: Other registers are not yet used */ }; -static const i915_reg_t xehp_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ - RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ - RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ - RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ - RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ - RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ - RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ - RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ - RING_TAIL(XEHP_BSD5_RING_BASE), /* 0x1E0000 (base) */ - RING_EXECLIST_CONTROL(XEHP_BSD5_RING_BASE), /* 0x1E0550 */ - RING_TAIL(XEHP_BSD6_RING_BASE), /* 0x1E4000 (base) */ - RING_EXECLIST_CONTROL(XEHP_BSD6_RING_BASE), /* 0x1E4550 */ - RING_TAIL(XEHP_VEBOX3_RING_BASE), /* 0x1E8000 (base) */ - RING_EXECLIST_CONTROL(XEHP_VEBOX3_RING_BASE), /* 0x1E8550 */ - RING_TAIL(XEHP_BSD7_RING_BASE), /* 0x1F0000 (base) */ - RING_EXECLIST_CONTROL(XEHP_BSD7_RING_BASE), /* 0x1F0550 */ - RING_TAIL(XEHP_BSD8_RING_BASE), /* 0x1F4000 (base) */ - RING_EXECLIST_CONTROL(XEHP_BSD8_RING_BASE), /* 0x1F4550 */ - RING_TAIL(XEHP_VEBOX4_RING_BASE), /* 0x1F8000 (base) */ - RING_EXECLIST_CONTROL(XEHP_VEBOX4_RING_BASE), /* 0x1F8550 */ +static const struct i915_range xehp_shadowed_regs[] = { + { .start = 0x2000, .end = 0x2030 }, + { .start = 0x2550, .end = 0x2550 }, + { .start = 0xA008, .end = 0xA00C }, + { .start = 0x22030, .end = 0x22030 }, + { .start = 0x22550, .end = 0x22550 }, + { .start = 0x1C0030, .end = 0x1C0030 }, + { .start = 0x1C0550, .end = 0x1C0550 }, + { .start = 0x1C4030, .end = 0x1C4030 }, + { .start = 0x1C4550, .end = 0x1C4550 }, + { .start = 0x1C8030, .end = 0x1C8030 }, + { .start = 0x1C8550, .end = 0x1C8550 }, + { .start = 0x1D0030, .end = 0x1D0030 }, + { .start = 0x1D0550, .end = 0x1D0550 }, + { .start = 0x1D4030, .end = 0x1D4030 }, + { .start = 0x1D4550, .end = 0x1D4550 }, + { .start = 0x1D8030, .end = 0x1D8030 }, + { .start = 0x1D8550, .end = 0x1D8550 }, + { .start = 0x1E0030, .end = 0x1E0030 }, + { .start = 0x1E0550, .end = 0x1E0550 }, + { .start = 0x1E4030, .end = 0x1E4030 }, + { .start = 0x1E4550, .end = 0x1E4550 }, + { .start = 0x1E8030, .end = 0x1E8030 }, + { .start = 0x1E8550, .end = 0x1E8550 }, + { .start = 0x1F0030, .end = 0x1F0030 }, + { .start = 0x1F0550, .end = 0x1F0550 }, + { .start = 0x1F4030, .end = 0x1F4030 }, + { .start = 0x1F4550, .end = 0x1F4550 }, + { .start = 0x1F8030, .end = 0x1F8030 }, + { .start = 0x1F8550, .end = 0x1F8550 }, /* TODO: Other registers are not yet used */ }; -static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) +static int mmio_range_cmp(u32 key, const struct i915_range *range) { - u32 offset = i915_mmio_reg_offset(*reg); - - if (key < offset) + if (key < range->start) return -1; - else if (key > offset) + else if (key > range->end) return 1; else return 0; @@ -1049,9 +1043,9 @@ static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) #define __is_X_shadowed(x) \ static bool is_##x##_shadowed(u32 offset) \ { \ - const i915_reg_t *regs = x##_shadowed_regs; \ + const struct i915_range *regs = x##_shadowed_regs; \ return BSEARCH(offset, regs, ARRAY_SIZE(x##_shadowed_regs), \ - mmio_reg_cmp); \ + mmio_range_cmp); \ } __is_X_shadowed(gen8) diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 3c0b0a8b5250..531665b08039 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -119,6 +119,12 @@ struct intel_forcewake_range { enum forcewake_domains domains; }; +/* Other register ranges (e.g., shadow tables, MCR tables, etc.) */ +struct i915_range { + u32 start; + u32 end; +}; + struct intel_uncore { void __iomem *regs; diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 720b60853f8b..d6a9c11afa23 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -62,30 +62,40 @@ static int intel_fw_table_check(const struct intel_forcewake_range *ranges, static int intel_shadow_table_check(void) { struct { - const i915_reg_t *regs; + const struct i915_range *regs; unsigned int size; - } reg_lists[] = { + } range_lists[] = { { gen8_shadowed_regs, ARRAY_SIZE(gen8_shadowed_regs) }, { gen11_shadowed_regs, ARRAY_SIZE(gen11_shadowed_regs) }, { gen12_shadowed_regs, ARRAY_SIZE(gen12_shadowed_regs) }, { xehp_shadowed_regs, ARRAY_SIZE(xehp_shadowed_regs) }, }; - const i915_reg_t *reg; + const struct i915_range *range; unsigned int i, j; s32 prev; - for (j = 0; j < ARRAY_SIZE(reg_lists); ++j) { - reg = reg_lists[j].regs; - for (i = 0, prev = -1; i < reg_lists[j].size; i++, reg++) { - u32 offset = i915_mmio_reg_offset(*reg); + for (j = 0; j < ARRAY_SIZE(range_lists); ++j) { + range = range_lists[j].regs; + for (i = 0, prev = -1; i < range_lists[j].size; i++, range++) { + if (range->end < range->start) { + pr_err("%s: range[%d]:(%06x-%06x) has end before start\n", + __func__, i, range->start, range->end); + return -EINVAL; + } + + if (prev >= (s32)range->start) { + pr_err("%s: range[%d]:(%06x-%06x) is before end of previous (%06x)\n", + __func__, i, range->start, range->end, prev); + return -EINVAL; + } - if (prev >= (s32)offset) { - pr_err("%s: entry[%d]:(%x) is before previous (%x)\n", - __func__, i, offset, prev); + if (range->start % 4) { + pr_err("%s: range[%d]:(%06x-%06x) has non-dword-aligned start\n", + __func__, i, range->start, range->end); return -EINVAL; } - prev = offset; + prev = range->end; } } -- cgit From 0bb50de156d8280e53884adf1d5a04d6108f90e7 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 28 Jul 2021 22:41:16 -0700 Subject: drm/i915/gen11: Update shadowed register table The bspec lists many shadowed registers (i.e., registers for which we don't need to grab forcewake when writing) that we weren't tracking in the driver. Although we may not actually use all of these registers right now, it's best to just match the bspec list exactly. Note that the bspec also lists registers that are shadowed for various HW-internal accesses; we can ignore those and just list the ones that are shadowed for accesses from the IA/CPU. Bspec: 18333 Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210729054118.2458523-5-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 52601b960248..6b5b029148b2 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -960,20 +960,26 @@ static const struct i915_range gen11_shadowed_regs[] = { { .start = 0x2550, .end = 0x2550 }, { .start = 0xA008, .end = 0xA00C }, { .start = 0x22030, .end = 0x22030 }, - { .start = 0x22550, .end = 0x22550 }, + { .start = 0x22230, .end = 0x22230 }, + { .start = 0x22510, .end = 0x22550 }, { .start = 0x1C0030, .end = 0x1C0030 }, - { .start = 0x1C0550, .end = 0x1C0550 }, + { .start = 0x1C0230, .end = 0x1C0230 }, + { .start = 0x1C0510, .end = 0x1C0550 }, { .start = 0x1C4030, .end = 0x1C4030 }, - { .start = 0x1C4550, .end = 0x1C4550 }, + { .start = 0x1C4230, .end = 0x1C4230 }, + { .start = 0x1C4510, .end = 0x1C4550 }, { .start = 0x1C8030, .end = 0x1C8030 }, - { .start = 0x1C8550, .end = 0x1C8550 }, + { .start = 0x1C8230, .end = 0x1C8230 }, + { .start = 0x1C8510, .end = 0x1C8550 }, { .start = 0x1D0030, .end = 0x1D0030 }, - { .start = 0x1D0550, .end = 0x1D0550 }, + { .start = 0x1D0230, .end = 0x1D0230 }, + { .start = 0x1D0510, .end = 0x1D0550 }, { .start = 0x1D4030, .end = 0x1D4030 }, - { .start = 0x1D4550, .end = 0x1D4550 }, + { .start = 0x1D4230, .end = 0x1D4230 }, + { .start = 0x1D4510, .end = 0x1D4550 }, { .start = 0x1D8030, .end = 0x1D8030 }, - { .start = 0x1D8550, .end = 0x1D8550 }, - /* TODO: Other registers are not yet used */ + { .start = 0x1D8230, .end = 0x1D8230 }, + { .start = 0x1D8510, .end = 0x1D8550 }, }; static const struct i915_range gen12_shadowed_regs[] = { -- cgit From 5798a769d6f5be656638c5e6e0cd5c4f155a2fb5 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 28 Jul 2021 22:41:17 -0700 Subject: drm/i915/gen12: Update shadowed register table The bspec lists many shadowed registers (i.e., registers for which we don't need to grab forcewake when writing) that we weren't tracking in the driver. Although we may not actually use all of these registers right now, it's best to just match the bspec list exactly. Note that the bspec also lists registers that are shadowed for various HW-internal accesses; we can ignore those and just list the ones that are shadowed for accesses from the IA/CPU. Bspec: 52077 Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210729054118.2458523-6-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 6b5b029148b2..0b2dbcc14802 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -984,23 +984,28 @@ static const struct i915_range gen11_shadowed_regs[] = { static const struct i915_range gen12_shadowed_regs[] = { { .start = 0x2030, .end = 0x2030 }, - { .start = 0x2550, .end = 0x2550 }, + { .start = 0x2510, .end = 0x2550 }, { .start = 0xA008, .end = 0xA00C }, + { .start = 0xA188, .end = 0xA188 }, + { .start = 0xA278, .end = 0xA278 }, + { .start = 0xA540, .end = 0xA56C }, + { .start = 0xC4C8, .end = 0xC4C8 }, + { .start = 0xC4D4, .end = 0xC4D4 }, + { .start = 0xC600, .end = 0xC600 }, { .start = 0x22030, .end = 0x22030 }, - { .start = 0x22550, .end = 0x22550 }, + { .start = 0x22510, .end = 0x22550 }, { .start = 0x1C0030, .end = 0x1C0030 }, - { .start = 0x1C0550, .end = 0x1C0550 }, + { .start = 0x1C0510, .end = 0x1C0550 }, { .start = 0x1C4030, .end = 0x1C4030 }, - { .start = 0x1C4550, .end = 0x1C4550 }, + { .start = 0x1C4510, .end = 0x1C4550 }, { .start = 0x1C8030, .end = 0x1C8030 }, - { .start = 0x1C8550, .end = 0x1C8550 }, + { .start = 0x1C8510, .end = 0x1C8550 }, { .start = 0x1D0030, .end = 0x1D0030 }, - { .start = 0x1D0550, .end = 0x1D0550 }, + { .start = 0x1D0510, .end = 0x1D0550 }, { .start = 0x1D4030, .end = 0x1D4030 }, - { .start = 0x1D4550, .end = 0x1D4550 }, + { .start = 0x1D4510, .end = 0x1D4550 }, { .start = 0x1D8030, .end = 0x1D8030 }, - { .start = 0x1D8550, .end = 0x1D8550 }, - /* TODO: Other registers are not yet used */ + { .start = 0x1D8510, .end = 0x1D8550 }, }; static const struct i915_range xehp_shadowed_regs[] = { -- cgit From 5c5c40e28c52a36bb5ac26817275d5a0281ab819 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 28 Jul 2021 22:41:18 -0700 Subject: drm/i915/xehp: Xe_HP shadowed registers are a strict superset of gen12 The list of shadowed registers on XeHP is identical to the set for earlier gen12 platforms, with additional ranges added for the new VCS and VECS engines. Since those register ranges were reserved on earlier gen12 platforms, it's safe to consolidate to a single gen12 table rather than tracking Xe_HP separately. Bspec: 52077 Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210729054118.2458523-7-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 52 +++++++-------------------- drivers/gpu/drm/i915/selftests/intel_uncore.c | 1 - 2 files changed, 13 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 0b2dbcc14802..de4ef9bd3b51 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1006,39 +1006,24 @@ static const struct i915_range gen12_shadowed_regs[] = { { .start = 0x1D4510, .end = 0x1D4550 }, { .start = 0x1D8030, .end = 0x1D8030 }, { .start = 0x1D8510, .end = 0x1D8550 }, -}; -static const struct i915_range xehp_shadowed_regs[] = { - { .start = 0x2000, .end = 0x2030 }, - { .start = 0x2550, .end = 0x2550 }, - { .start = 0xA008, .end = 0xA00C }, - { .start = 0x22030, .end = 0x22030 }, - { .start = 0x22550, .end = 0x22550 }, - { .start = 0x1C0030, .end = 0x1C0030 }, - { .start = 0x1C0550, .end = 0x1C0550 }, - { .start = 0x1C4030, .end = 0x1C4030 }, - { .start = 0x1C4550, .end = 0x1C4550 }, - { .start = 0x1C8030, .end = 0x1C8030 }, - { .start = 0x1C8550, .end = 0x1C8550 }, - { .start = 0x1D0030, .end = 0x1D0030 }, - { .start = 0x1D0550, .end = 0x1D0550 }, - { .start = 0x1D4030, .end = 0x1D4030 }, - { .start = 0x1D4550, .end = 0x1D4550 }, - { .start = 0x1D8030, .end = 0x1D8030 }, - { .start = 0x1D8550, .end = 0x1D8550 }, + /* + * The rest of these ranges are specific to Xe_HP and beyond, but + * are reserved/unused ranges on earlier gen12 platforms, so they can + * be safely added to the gen12 table. + */ { .start = 0x1E0030, .end = 0x1E0030 }, - { .start = 0x1E0550, .end = 0x1E0550 }, + { .start = 0x1E0510, .end = 0x1E0550 }, { .start = 0x1E4030, .end = 0x1E4030 }, - { .start = 0x1E4550, .end = 0x1E4550 }, + { .start = 0x1E4510, .end = 0x1E4550 }, { .start = 0x1E8030, .end = 0x1E8030 }, - { .start = 0x1E8550, .end = 0x1E8550 }, + { .start = 0x1E8510, .end = 0x1E8550 }, { .start = 0x1F0030, .end = 0x1F0030 }, - { .start = 0x1F0550, .end = 0x1F0550 }, + { .start = 0x1F0510, .end = 0x1F0550 }, { .start = 0x1F4030, .end = 0x1F4030 }, - { .start = 0x1F4550, .end = 0x1F4550 }, + { .start = 0x1F4510, .end = 0x1F4550 }, { .start = 0x1F8030, .end = 0x1F8030 }, - { .start = 0x1F8550, .end = 0x1F8550 }, - /* TODO: Other registers are not yet used */ + { .start = 0x1F8510, .end = 0x1F8550 }, }; static int mmio_range_cmp(u32 key, const struct i915_range *range) @@ -1062,7 +1047,6 @@ static bool is_##x##_shadowed(u32 offset) \ __is_X_shadowed(gen8) __is_X_shadowed(gen11) __is_X_shadowed(gen12) -__is_X_shadowed(xehp) static enum forcewake_domains gen6_reg_write_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) @@ -1126,15 +1110,6 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = { __fwd; \ }) -#define __xehp_fwtable_reg_write_fw_domains(uncore, offset) \ -({ \ - enum forcewake_domains __fwd = 0; \ - const u32 __offset = (offset); \ - if (!is_xehp_shadowed(__offset)) \ - __fwd = find_fw_domain(uncore, __offset); \ - __fwd; \ -}) - /* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ static const struct intel_forcewake_range __gen9_fw_ranges[] = { GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_GT), @@ -1737,7 +1712,6 @@ __gen_write(func, 8) \ __gen_write(func, 16) \ __gen_write(func, 32) -__gen_reg_write_funcs(xehp_fwtable); __gen_reg_write_funcs(gen12_fwtable); __gen_reg_write_funcs(gen11_fwtable); __gen_reg_write_funcs(fwtable); @@ -2114,11 +2088,11 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __dg2_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, xehp_fwtable); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __xehp_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, xehp_fwtable); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); } else if (GRAPHICS_VER(i915) >= 12) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen12_fw_ranges); diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index d6a9c11afa23..22ef2c87df1a 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -68,7 +68,6 @@ static int intel_shadow_table_check(void) { gen8_shadowed_regs, ARRAY_SIZE(gen8_shadowed_regs) }, { gen11_shadowed_regs, ARRAY_SIZE(gen11_shadowed_regs) }, { gen12_shadowed_regs, ARRAY_SIZE(gen12_shadowed_regs) }, - { xehp_shadowed_regs, ARRAY_SIZE(xehp_shadowed_regs) }, }; const struct i915_range *range; unsigned int i, j; -- cgit From dae2d28832968751f7731336b560a4a84a197b76 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 10 Aug 2021 16:27:48 +0200 Subject: drm/doc/rfc: drop lmem uapi section We still have quite a bit more work to do with overall reworking of the ttm-based dg1 code, but the uapi stuff is now finalized with the latest pull. So remove that. This also fixes kerneldoc build warnings because we've included the same headers in two places, resulting in sphinx complaining about duplicated symbols. This regression has been created when we moved the uapi definitions to the real include/uapi/ folder in 727ecd99a4c9 ("drm/doc/rfc: drop the i915_gem_lmem.h header") v2: Fix a few references that I missed, the htmldocs build took forever. Acked-by: Jason Ekstrand Acked-by: Maarten Lankhorst Tested-by Stephen Rothwell (v1) References: https://lore.kernel.org/dri-devel/20210603193242.1ce99344@canb.auug.org.au/ Reported-by: Stephen Rothwell Cc: Stephen Rothwell Fixes: 727ecd99a4c9 ("drm/doc/rfc: drop the i915_gem_lmem.h header") Cc: Matthew Auld Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210810142748.1983271-1-daniel.vetter@ffwll.ch --- Documentation/gpu/rfc/i915_gem_lmem.rst | 109 -------------------------------- 1 file changed, 109 deletions(-) diff --git a/Documentation/gpu/rfc/i915_gem_lmem.rst b/Documentation/gpu/rfc/i915_gem_lmem.rst index 675ba8620d66..b421a3c1806e 100644 --- a/Documentation/gpu/rfc/i915_gem_lmem.rst +++ b/Documentation/gpu/rfc/i915_gem_lmem.rst @@ -18,114 +18,5 @@ real, with all the uAPI bits is: * Route shmem backend over to TTM SYSTEM for discrete * TTM purgeable object support * Move i915 buddy allocator over to TTM - * MMAP ioctl mode(see `I915 MMAP`_) - * SET/GET ioctl caching(see `I915 SET/GET CACHING`_) * Send RFC(with mesa-dev on cc) for final sign off on the uAPI * Add pciid for DG1 and turn on uAPI for real - -New object placement and region query uAPI -========================================== -Starting from DG1 we need to give userspace the ability to allocate buffers from -device local-memory. Currently the driver supports gem_create, which can place -buffers in system memory via shmem, and the usual assortment of other -interfaces, like dumb buffers and userptr. - -To support this new capability, while also providing a uAPI which will work -beyond just DG1, we propose to offer three new bits of uAPI: - -DRM_I915_QUERY_MEMORY_REGIONS ------------------------------ -New query ID which allows userspace to discover the list of supported memory -regions(like system-memory and local-memory) for a given device. We identify -each region with a class and instance pair, which should be unique. The class -here would be DEVICE or SYSTEM, and the instance would be zero, on platforms -like DG1. - -Side note: The class/instance design is borrowed from our existing engine uAPI, -where we describe every physical engine in terms of its class, and the -particular instance, since we can have more than one per class. - -In the future we also want to expose more information which can further -describe the capabilities of a region. - -.. kernel-doc:: include/uapi/drm/i915_drm.h - :functions: drm_i915_gem_memory_class drm_i915_gem_memory_class_instance drm_i915_memory_region_info drm_i915_query_memory_regions - -GEM_CREATE_EXT --------------- -New ioctl which is basically just gem_create but now allows userspace to provide -a chain of possible extensions. Note that if we don't provide any extensions and -set flags=0 then we get the exact same behaviour as gem_create. - -Side note: We also need to support PXP[1] in the near future, which is also -applicable to integrated platforms, and adds its own gem_create_ext extension, -which basically lets userspace mark a buffer as "protected". - -.. kernel-doc:: include/uapi/drm/i915_drm.h - :functions: drm_i915_gem_create_ext - -I915_GEM_CREATE_EXT_MEMORY_REGIONS ----------------------------------- -Implemented as an extension for gem_create_ext, we would now allow userspace to -optionally provide an immutable list of preferred placements at creation time, -in priority order, for a given buffer object. For the placements we expect -them each to use the class/instance encoding, as per the output of the regions -query. Having the list in priority order will be useful in the future when -placing an object, say during eviction. - -.. kernel-doc:: include/uapi/drm/i915_drm.h - :functions: drm_i915_gem_create_ext_memory_regions - -One fair criticism here is that this seems a little over-engineered[2]. If we -just consider DG1 then yes, a simple gem_create.flags or something is totally -all that's needed to tell the kernel to allocate the buffer in local-memory or -whatever. However looking to the future we need uAPI which can also support -upcoming Xe HP multi-tile architecture in a sane way, where there can be -multiple local-memory instances for a given device, and so using both class and -instance in our uAPI to describe regions is desirable, although specifically -for DG1 it's uninteresting, since we only have a single local-memory instance. - -Existing uAPI issues -==================== -Some potential issues we still need to resolve. - -I915 MMAP ---------- -In i915 there are multiple ways to MMAP GEM object, including mapping the same -object using different mapping types(WC vs WB), i.e multiple active mmaps per -object. TTM expects one MMAP at most for the lifetime of the object. If it -turns out that we have to backpedal here, there might be some potential -userspace fallout. - -I915 SET/GET CACHING --------------------- -In i915 we have set/get_caching ioctl. TTM doesn't let us to change this, but -DG1 doesn't support non-snooped pcie transactions, so we can just always -allocate as WB for smem-only buffers. If/when our hw gains support for -non-snooped pcie transactions then we must fix this mode at allocation time as -a new GEM extension. - -This is related to the mmap problem, because in general (meaning, when we're -not running on intel cpus) the cpu mmap must not, ever, be inconsistent with -allocation mode. - -Possible idea is to let the kernel picks the mmap mode for userspace from the -following table: - -smem-only: WB. Userspace does not need to call clflush. - -smem+lmem: We only ever allow a single mode, so simply allocate this as uncached -memory, and always give userspace a WC mapping. GPU still does snooped access -here(assuming we can't turn it off like on DG1), which is a bit inefficient. - -lmem only: always WC - -This means on discrete you only get a single mmap mode, all others must be -rejected. That's probably going to be a new default mode or something like -that. - -Links -===== -[1] https://patchwork.freedesktop.org/series/86798/ - -[2] https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5599#note_553791 -- cgit From fa9899dad3ed84a8b6433467670d4cacd9b873bc Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 Aug 2021 09:36:40 -0700 Subject: drm/i915/xehp: Loop over all gslices for INSTDONE processing We no longer have traditional slices on Xe_HP platforms, but the INSTDONE registers are replicated according to gslice representation which is similar. We can mostly re-use the existing instdone code with just a few modifications: * Create an alternate instdone loop macro that will iterate over the flat DSS space, but still provide the gslice/dss steering values for compatibility with the legacy code. * We should allocate INSTDONE storage space according to the maximum number of gslices rather than the maximum number of legacy slices to ensure we have enough storage space to hold all of the values. XeHP design has 8 gslices, whereas older platforms never had more than 3 slices. Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20210805163647.801064-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 48 +++++++++++++++------------- drivers/gpu/drm/i915/gt/intel_engine_types.h | 12 +++++-- drivers/gpu/drm/i915/gt/intel_sseu.h | 7 ++++ drivers/gpu/drm/i915/i915_gpu_error.c | 32 +++++++++++++------ 4 files changed, 66 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 0d9105a31d84..58ed67894b3d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1163,16 +1163,16 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, u32 mmio_base = engine->mmio_base; int slice; int subslice; + int iter; memset(instdone, 0, sizeof(*instdone)); - switch (GRAPHICS_VER(i915)) { - default: + if (GRAPHICS_VER(i915) >= 8) { instdone->instdone = intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); if (engine->id != RCS0) - break; + return; instdone->slice_common = intel_uncore_read(uncore, GEN7_SC_INSTDONE); @@ -1182,21 +1182,32 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, instdone->slice_common_extra[1] = intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2); } - for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { - instdone->sampler[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_SAMPLER_INSTDONE); - instdone->row[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_ROW_INSTDONE); + + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { + instdone->sampler[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + GEN7_SAMPLER_INSTDONE); + instdone->row[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + GEN7_ROW_INSTDONE); + } + } else { + for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { + instdone->sampler[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + GEN7_SAMPLER_INSTDONE); + instdone->row[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + GEN7_ROW_INSTDONE); + } } - break; - case 7: + } else if (GRAPHICS_VER(i915) >= 7) { instdone->instdone = intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); if (engine->id != RCS0) - break; + return; instdone->slice_common = intel_uncore_read(uncore, GEN7_SC_INSTDONE); @@ -1204,22 +1215,15 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE); instdone->row[0][0] = intel_uncore_read(uncore, GEN7_ROW_INSTDONE); - - break; - case 6: - case 5: - case 4: + } else if (GRAPHICS_VER(i915) >= 4) { instdone->instdone = intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); if (engine->id == RCS0) /* HACK: Using the wrong struct member */ instdone->slice_common = intel_uncore_read(uncore, GEN4_INSTDONE1); - break; - case 3: - case 2: + } else { instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE); - break; } } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index ed91bcff20eb..0b4846b01626 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -67,8 +67,8 @@ struct intel_instdone { /* The following exist only in the RCS engine */ u32 slice_common; u32 slice_common_extra[2]; - u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; - u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; + u32 sampler[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; + u32 row[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; }; /* @@ -578,4 +578,12 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ (instdone_has_subslice(dev_priv_, sseu_, slice_, \ subslice_))) + +#define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \ + for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \ + (iter_) < GEN_MAX_SUBSLICES; \ + (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \ + (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \ + for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_))) + #endif /* __INTEL_ENGINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 22fef98887c0..0270acdcc157 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -26,6 +26,9 @@ struct drm_printer; #define GEN_DSS_PER_CSLICE 8 #define GEN_DSS_PER_MSLICE 8 +#define GEN_MAX_GSLICES (GEN_MAX_SUBSLICES / GEN_DSS_PER_GSLICE) +#define GEN_MAX_CSLICES (GEN_MAX_SUBSLICES / GEN_DSS_PER_CSLICE) + struct sseu_dev_info { u8 slice_mask; u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; @@ -78,6 +81,10 @@ intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice, u8 mask; int ss_idx = subslice / BITS_PER_BYTE; + if (slice >= sseu->max_slices || + subslice >= sseu->max_subslices) + return false; + GEM_BUG_ON(ss_idx >= sseu->ss_stride); mask = sseu->subslice_mask[slice * sseu->ss_stride + ss_idx]; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 0f08bcfbe964..8230bc3ac8a9 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -444,15 +444,29 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, if (GRAPHICS_VER(m->i915) <= 6) return; - for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) - err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", - slice, subslice, - ee->instdone.sampler[slice][subslice]); - - for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) - err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", - slice, subslice, - ee->instdone.row[slice][subslice]); + if (GRAPHICS_VER_FULL(m->i915) >= IP_VER(12, 50)) { + int iter; + + for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) + err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.sampler[slice][subslice]); + + for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) + err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.row[slice][subslice]); + } else { + for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) + err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.sampler[slice][subslice]); + + for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) + err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.row[slice][subslice]); + } if (GRAPHICS_VER(m->i915) < 12) return; -- cgit From 89f2e7ab4dd93d8785619ce58838391b9b07feb7 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 Aug 2021 09:36:41 -0700 Subject: drm/i915/dg2: Report INSTDONE_GEOM values in error state Xe_HPG adds some additional INSTDONE_GEOM debug registers; the Mesa team has indicated that having these reported in the error state would be useful for debugging GPU hangs. These registers are replicated per-DSS with gslice steering. Cc: Lionel Landwerlin Signed-off-by: Matt Roper Acked-by: Lionel Landwerlin Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20210805163647.801064-4-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 +++++++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 3 +++ drivers/gpu/drm/i915/i915_gpu_error.c | 10 ++++++++-- drivers/gpu/drm/i915/i915_reg.h | 1 + 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 58ed67894b3d..332efea696a5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1202,6 +1202,13 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, GEN7_ROW_INSTDONE); } } + + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { + for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) + instdone->geom_svg[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + XEHPG_INSTDONE_GEOM_SVG); + } } else if (GRAPHICS_VER(i915) >= 7) { instdone->instdone = intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 0b4846b01626..bfbfe53c23dd 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -69,6 +69,9 @@ struct intel_instdone { u32 slice_common_extra[2]; u32 sampler[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; u32 row[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; + + /* Added in XeHPG */ + u32 geom_svg[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; }; /* diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 8230bc3ac8a9..91d5da7b0a2b 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -431,6 +431,7 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, const struct sseu_dev_info *sseu = &ee->engine->gt->info.sseu; int slice; int subslice; + int iter; err_printf(m, " INSTDONE: 0x%08x\n", ee->instdone.instdone); @@ -445,8 +446,6 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, return; if (GRAPHICS_VER_FULL(m->i915) >= IP_VER(12, 50)) { - int iter; - for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", slice, subslice, @@ -471,6 +470,13 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, if (GRAPHICS_VER(m->i915) < 12) return; + if (GRAPHICS_VER_FULL(m->i915) >= IP_VER(12, 55)) { + for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) + err_printf(m, " GEOM_SVGUNIT_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.geom_svg[slice][subslice]); + } + err_printf(m, " SC_INSTDONE_EXTRA: 0x%08x\n", ee->instdone.slice_common_extra[0]); err_printf(m, " SC_INSTDONE_EXTRA2: 0x%08x\n", diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2113925084b0..9884c1156b95 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2695,6 +2695,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN12_SC_INSTDONE_EXTRA2 _MMIO(0x7108) #define GEN7_SAMPLER_INSTDONE _MMIO(0xe160) #define GEN7_ROW_INSTDONE _MMIO(0xe164) +#define XEHPG_INSTDONE_GEOM_SVG _MMIO(0x666c) #define MCFG_MCR_SELECTOR _MMIO(0xfd0) #define SF_MCR_SELECTOR _MMIO(0xfd8) #define GEN8_MCR_SELECTOR _MMIO(0xfdc) -- cgit From d16de9a25b5cc458d0c8c978970f8edf9cf710d0 Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Fri, 6 Aug 2021 10:29:01 -0700 Subject: drm/i915/xehpsdv: Add compute DSS type Starting in XeHP, the concept of slice has been removed in favor of DSS (Dual-Subslice) masks for various workload types. These workloads have been divided into those enabled for geometry and those enabled for compute. i915 currently maintains a single set of S/SS/EU masks for the device. The goal of this patch set is to minimize the amount of impact to prior generations while still giving the user maximum flexibility. v2: - Generalize a comment about uapi access to geometry/compute masks; the proposed uapi has changed since the comment was first written, and will show up in a future series once the userspace code is published. (Lucas) v3: - Eliminate unnecessary has_compute_dss flag. (Lucas) - Drop unwanted comment change in uapi header. (Lucas) Bspec: 33117, 33118, 20376 Cc: Daniele Ceraolo Spurio Cc: Matt Roper Cc: Lucas De Marchi Signed-off-by: Stuart Summers Signed-off-by: Steve Hampson Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20210806172901.1049133-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_sseu.c | 60 +++++++++++++++++++++++++++--------- drivers/gpu/drm/i915/gt/intel_sseu.h | 4 ++- drivers/gpu/drm/i915/i915_reg.h | 3 +- 3 files changed, 50 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index bbd272943c3f..b0e09b58005e 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -46,11 +46,11 @@ u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) } void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, - u32 ss_mask) + u8 *subslice_mask, u32 ss_mask) { int offset = slice * sseu->ss_stride; - memcpy(&sseu->subslice_mask[offset], &ss_mask, sseu->ss_stride); + memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride); } unsigned int @@ -100,14 +100,24 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu) return total; } -static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, - u8 s_en, u32 ss_en, u16 eu_en) +static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en) +{ + u32 ss_mask; + + ss_mask = ss_en >> (s * sseu->max_subslices); + ss_mask &= GENMASK(sseu->max_subslices - 1, 0); + + return ss_mask; +} + +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en, + u32 g_ss_en, u32 c_ss_en, u16 eu_en) { int s, ss; - /* ss_en represents entire subslice mask across all slices */ + /* g_ss_en/c_ss_en represent entire subslice mask across all slices */ GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > - sizeof(ss_en) * BITS_PER_BYTE); + sizeof(g_ss_en) * BITS_PER_BYTE); for (s = 0; s < sseu->max_slices; s++) { if ((s_en & BIT(s)) == 0) @@ -115,7 +125,22 @@ static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, sseu->slice_mask |= BIT(s); - intel_sseu_set_subslices(sseu, s, ss_en); + /* + * XeHP introduces the concept of compute vs geometry DSS. To + * reduce variation between GENs around subslice usage, store a + * mask for both the geometry and compute enabled masks since + * userspace will need to be able to query these masks + * independently. Also compute a total enabled subslice count + * for the purposes of selecting subslices to use in a + * particular GEM context. + */ + intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask, + get_ss_stride_mask(sseu, s, c_ss_en)); + intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask, + get_ss_stride_mask(sseu, s, g_ss_en)); + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, + get_ss_stride_mask(sseu, s, + g_ss_en | c_ss_en)); for (ss = 0; ss < sseu->max_subslices; ss++) if (intel_sseu_has_subslice(sseu, s, ss)) @@ -129,7 +154,7 @@ static void gen12_sseu_info_init(struct intel_gt *gt) { struct sseu_dev_info *sseu = >->info.sseu; struct intel_uncore *uncore = gt->uncore; - u32 dss_en; + u32 g_dss_en, c_dss_en = 0; u16 eu_en = 0; u8 eu_en_fuse; u8 s_en; @@ -160,7 +185,9 @@ static void gen12_sseu_info_init(struct intel_gt *gt) s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; - dss_en = intel_uncore_read(uncore, GEN12_GT_DSS_ENABLE); + g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) + c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE); /* one bit per pair of EUs */ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) @@ -173,7 +200,7 @@ static void gen12_sseu_info_init(struct intel_gt *gt) if (eu_en_fuse & BIT(eu)) eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); - gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en); + gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en); /* TGL only supports slice-level power gating */ sseu->has_slice_pg = 1; @@ -199,7 +226,7 @@ static void gen11_sseu_info_init(struct intel_gt *gt) eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); - gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en); + gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en); /* ICL has no power gating restrictions. */ sseu->has_slice_pg = 1; @@ -240,7 +267,7 @@ static void cherryview_sseu_info_init(struct intel_gt *gt) sseu_set_eus(sseu, 0, 1, ~disabled_mask); } - intel_sseu_set_subslices(sseu, 0, subslice_mask); + intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask); sseu->eu_total = compute_eu_total(sseu); @@ -296,7 +323,8 @@ static void gen9_sseu_info_init(struct intel_gt *gt) /* skip disabled slice */ continue; - intel_sseu_set_subslices(sseu, s, subslice_mask); + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, + subslice_mask); eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s)); for (ss = 0; ss < sseu->max_subslices; ss++) { @@ -408,7 +436,8 @@ static void bdw_sseu_info_init(struct intel_gt *gt) /* skip disabled slice */ continue; - intel_sseu_set_subslices(sseu, s, subslice_mask); + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, + subslice_mask); for (ss = 0; ss < sseu->max_subslices; ss++) { u8 eu_disabled_mask; @@ -506,7 +535,8 @@ static void hsw_sseu_info_init(struct intel_gt *gt) sseu->eu_per_subslice); for (s = 0; s < sseu->max_slices; s++) { - intel_sseu_set_subslices(sseu, s, subslice_mask); + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, + subslice_mask); for (ss = 0; ss < sseu->max_subslices; ss++) { sseu_set_eus(sseu, s, ss, diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 0270acdcc157..60882a74741e 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -32,6 +32,8 @@ struct drm_printer; struct sseu_dev_info { u8 slice_mask; u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; + u8 geometry_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; + u8 compute_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE]; u16 eu_total; u8 eu_per_subslice; @@ -104,7 +106,7 @@ intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice); u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, - u32 ss_mask); + u8 *subslice_mask, u32 ss_mask); void intel_sseu_info_init(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 9884c1156b95..c8db6e8ef1ad 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3160,7 +3160,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C) -#define GEN12_GT_DSS_ENABLE _MMIO(0x913C) +#define GEN12_GT_GEOMETRY_DSS_ENABLE _MMIO(0x913C) +#define GEN12_GT_COMPUTE_DSS_ENABLE _MMIO(0x9144) #define XEHP_EU_ENABLE _MMIO(0x9134) #define XEHP_EU_ENA_MASK 0xFF -- cgit From b97090575ed27f8a23cc8f8ace642d5a8ea59206 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 10 Aug 2021 15:05:23 +0200 Subject: drm/i915: Use locked access to ctx->engines in set_priority This essentially reverts commit 89ff76bf9b3b0b86e6bbe344bd6378d8661303fc Author: Chris Wilson Date: Thu Apr 2 13:42:18 2020 +0100 drm/i915/gem: Utilize rcu iteration of context engines Note that the other use of __context_engines_await have disappeard in the following commits: ccbc1b97948a ("drm/i915/gem: Don't allow changing the VM on running contexts (v4)") c7a71fc8ee04 ("drm/i915: Drop getparam support for I915_CONTEXT_PARAM_ENGINES") 4a766ae40ec8 ("drm/i915: Drop the CONTEXT_CLONE API (v2)") None of these have any business to optimize their engine lookup with rcu, unless extremely convincing benchmark data and a solid analysis why we can't make that workload (whatever it is that does) faster with a proper design fix. Also since there's only one caller of context_apply_all left and it's really just a loop, inline it and then inline the lopp body too. This is how all other callers that take the engine lock loop over engines, it's much simpler. Reviewed-by: Jason Ekstrand Signed-off-by: Daniel Vetter Cc: Chris Wilson Cc: Mika Kuoppala Cc: Daniel Vetter Cc: Jason Ekstrand Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20210810130523.1972031-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 72 ++++++----------------------- 1 file changed, 14 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index dbaeb924a437..fd169cf2f75a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1284,49 +1284,6 @@ static int __context_set_persistence(struct i915_gem_context *ctx, bool state) return 0; } -static inline struct i915_gem_engines * -__context_engines_await(const struct i915_gem_context *ctx, - bool *user_engines) -{ - struct i915_gem_engines *engines; - - rcu_read_lock(); - do { - engines = rcu_dereference(ctx->engines); - GEM_BUG_ON(!engines); - - if (user_engines) - *user_engines = i915_gem_context_user_engines(ctx); - - /* successful await => strong mb */ - if (unlikely(!i915_sw_fence_await(&engines->fence))) - continue; - - if (likely(engines == rcu_access_pointer(ctx->engines))) - break; - - i915_sw_fence_complete(&engines->fence); - } while (1); - rcu_read_unlock(); - - return engines; -} - -static void -context_apply_all(struct i915_gem_context *ctx, - void (*fn)(struct intel_context *ce, void *data), - void *data) -{ - struct i915_gem_engines_iter it; - struct i915_gem_engines *e; - struct intel_context *ce; - - e = __context_engines_await(ctx, NULL); - for_each_gem_engine(ce, e, it) - fn(ce, data); - i915_sw_fence_complete(&e->fence); -} - static struct i915_gem_context * i915_gem_create_context(struct drm_i915_private *i915, const struct i915_gem_proto_context *pc) @@ -1776,23 +1733,11 @@ set_persistence(struct i915_gem_context *ctx, return __context_set_persistence(ctx, args->value); } -static void __apply_priority(struct intel_context *ce, void *arg) -{ - struct i915_gem_context *ctx = arg; - - if (!intel_engine_has_timeslices(ce->engine)) - return; - - if (ctx->sched.priority >= I915_PRIORITY_NORMAL && - intel_engine_has_semaphores(ce->engine)) - intel_context_set_use_semaphores(ce); - else - intel_context_clear_use_semaphores(ce); -} - static int set_priority(struct i915_gem_context *ctx, const struct drm_i915_gem_context_param *args) { + struct i915_gem_engines_iter it; + struct intel_context *ce; int err; err = validate_priority(ctx->i915, args); @@ -1800,7 +1745,18 @@ static int set_priority(struct i915_gem_context *ctx, return err; ctx->sched.priority = args->value; - context_apply_all(ctx, __apply_priority, ctx); + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_has_timeslices(ce->engine)) + continue; + + if (ctx->sched.priority >= I915_PRIORITY_NORMAL && + intel_engine_has_semaphores(ce->engine)) + intel_context_set_use_semaphores(ce); + else + intel_context_clear_use_semaphores(ce); + } + i915_gem_context_unlock_engines(ctx); return 0; } -- cgit From 8b93d1d7dbd578fd296e70008b29c0f62d09d7cb Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 12 Aug 2021 15:14:10 +0200 Subject: drm/shmem-helper: Switch to vmf_insert_pfn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to stop gup, which isn't the case if we use vmf_insert_page and VM_MIXEDMAP, because that does not set pte_special. The motivation here is to stop get_user_pages from working on buffer object mmaps in general. Quoting some discussion with Thomas: On Thu, Jul 22, 2021 at 08:22:43PM +0200, Thomas Zimmermann wrote: > Am 13.07.21 um 22:51 schrieb Daniel Vetter: > > We want to stop gup, which isn't the case if we use vmf_insert_page > > What is gup? get_user_pages. It pins memory wherever it is, which badly wreaks at least ttm and could also cause trouble with cma allocations. In both cases becaue we can't move/reuse these pages anymore. Now get_user_pages fails when the memory isn't considered "normal", like with VM_PFNMAP and using vm_insert_pfn. For consistency across all dma-buf I'm trying (together with Christian König) to roll this out everywhere, for fewer surprises. E.g. for 5.14 iirc we merged a patch to do the same for ttm, where it closes an actual bug (ttm gets really badly confused when there's suddenly pinned pages where it thought it can move them). cma allcoations already use VM_PFNMAP (because that's what dma_mmap is using underneath), as is anything that's using remap_pfn_range. Worst case we have to revert this patch for shmem helpers if it breaks something, but I hope that's not the case. On the ttm side we've also had some fallout that we needed to paper over with clever tricks. v2: With this shmem gem helpers now definitely need CONFIG_MMU (0day) v3: add more depends on MMU. For usb drivers this is a bit awkward, but really it's correct: To be able to provide a contig mapping of buffers to userspace on !MMU platforms we'd need to use the cma helpers for these drivers on those platforms. As-is this wont work. Also not exactly sure why vm_insert_page doesn't go boom, because that definitely wont fly in practice since the pages are non-contig to begin with. v4: Explain the entire motivation a lot more (Thomas) Acked-by: Thomas Zimmermann Signed-off-by: Daniel Vetter Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210812131412.2487363-2-daniel.vetter@ffwll.ch --- drivers/gpu/drm/Kconfig | 2 +- drivers/gpu/drm/drm_gem_shmem_helper.c | 4 ++-- drivers/gpu/drm/gud/Kconfig | 2 +- drivers/gpu/drm/tiny/Kconfig | 4 ++-- drivers/gpu/drm/udl/Kconfig | 1 + 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 0d372354c2d0..314eefa39892 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -211,7 +211,7 @@ config DRM_KMS_CMA_HELPER config DRM_GEM_SHMEM_HELPER bool - depends on DRM + depends on DRM && MMU help Choose this if you need the GEM shmem helper functions diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index a61946374c82..cc96d1c3570e 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -542,7 +542,7 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) } else { page = shmem->pages[page_offset]; - ret = vmf_insert_page(vma, vmf->address, page); + ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page)); } mutex_unlock(&shmem->pages_lock); @@ -612,7 +612,7 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) return ret; } - vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND; + vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); if (shmem->map_wc) vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); diff --git a/drivers/gpu/drm/gud/Kconfig b/drivers/gpu/drm/gud/Kconfig index 1c8601bf4d91..9c1e61f9eec3 100644 --- a/drivers/gpu/drm/gud/Kconfig +++ b/drivers/gpu/drm/gud/Kconfig @@ -2,7 +2,7 @@ config DRM_GUD tristate "GUD USB Display" - depends on DRM && USB + depends on DRM && USB && MMU select LZ4_COMPRESS select DRM_KMS_HELPER select DRM_GEM_SHMEM_HELPER diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig index d31be274a2bd..1ceb93fbdc50 100644 --- a/drivers/gpu/drm/tiny/Kconfig +++ b/drivers/gpu/drm/tiny/Kconfig @@ -44,7 +44,7 @@ config DRM_CIRRUS_QEMU config DRM_GM12U320 tristate "GM12U320 driver for USB projectors" - depends on DRM && USB + depends on DRM && USB && MMU select DRM_KMS_HELPER select DRM_GEM_SHMEM_HELPER help @@ -53,7 +53,7 @@ config DRM_GM12U320 config DRM_SIMPLEDRM tristate "Simple framebuffer driver" - depends on DRM + depends on DRM && MMU select DRM_GEM_SHMEM_HELPER select DRM_KMS_HELPER help diff --git a/drivers/gpu/drm/udl/Kconfig b/drivers/gpu/drm/udl/Kconfig index 1f497d8f1ae5..c744175c6992 100644 --- a/drivers/gpu/drm/udl/Kconfig +++ b/drivers/gpu/drm/udl/Kconfig @@ -4,6 +4,7 @@ config DRM_UDL depends on DRM depends on USB depends on USB_ARCH_HAS_HCD + depends on MMU select DRM_GEM_SHMEM_HELPER select DRM_KMS_HELPER help -- cgit From 804b6e5ee613b019b942ba6be52cccecd9d33655 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 12 Aug 2021 15:14:11 +0200 Subject: drm/shmem-helpers: Allocate wc pages on x86 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel-gfx-ci realized that something is not quite coherent anymore on some platforms for our i915+vgem tests, when I tried to switch vgem over to shmem helpers. After lots of head-scratching I realized that I've removed calls to drm_clflush. And we need those. To make this a bit cleaner use the same page allocation tooling as ttm, which does internally clflush (and more, as neeeded on any platform instead of just the intel x86 cpus i915 can be combined with). Unfortunately this doesn't exist on arm, or as a generic feature. For that I think only the dma-api can get at wc memory reliably, so maybe we'd need some kind of GFP_WC flag to do this properly. v2: Add a TODO comment about what should be done to support this in other places (Thomas) Acked-by: Thomas Zimmermann Signed-off-by: Daniel Vetter Cc: Christian König Cc: "Thomas Hellström" Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210812131412.2487363-3-daniel.vetter@ffwll.ch --- drivers/gpu/drm/drm_gem_shmem_helper.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index cc96d1c3570e..0e0986dfbe0c 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -10,6 +10,10 @@ #include #include +#ifdef CONFIG_X86 +#include +#endif + #include #include #include @@ -162,6 +166,16 @@ static int drm_gem_shmem_get_pages_locked(struct drm_gem_shmem_object *shmem) return PTR_ERR(pages); } + /* + * TODO: Allocating WC pages which are correctly flushed is only + * supported on x86. Ideal solution would be a GFP_WC flag, which also + * ttm_pool.c could use. + */ +#ifdef CONFIG_X86 + if (shmem->map_wc) + set_pages_array_wc(pages, obj->size >> PAGE_SHIFT); +#endif + shmem->pages = pages; return 0; @@ -203,6 +217,11 @@ static void drm_gem_shmem_put_pages_locked(struct drm_gem_shmem_object *shmem) if (--shmem->pages_use_count > 0) return; +#ifdef CONFIG_X86 + if (shmem->map_wc) + set_pages_array_wb(shmem->pages, obj->size >> PAGE_SHIFT); +#endif + drm_gem_put_pages(obj, shmem->pages, shmem->pages_mark_dirty_on_put, shmem->pages_mark_accessed_on_put); -- cgit From 45d9c8dde4cd8589f9180309ec60f0da2ce486e4 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 12 Aug 2021 15:14:12 +0200 Subject: drm/vgem: use shmem helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aside from deleting lots of code the real motivation here is to switch the mmap over to VM_PFNMAP, to be more consistent with what real gpu drivers do. They're all VM_PFNMAP, which means get_user_pages doesn't work, and even if you try and there's a struct page behind that, touching it and mucking around with its refcount can upset drivers real bad. v2: Review from Thomas: - sort #include - drop more dead code that I didn't spot somehow v3: select DRM_GEM_SHMEM_HELPER to make it build (intel-gfx-ci) v4: I got tricked by 0cf2ef46c6c0 ("drm/shmem-helper: Use cached mappings by default"), and we need WC in vgem because vgem doesn't have explicit begin/end cpu access ioctls. Also add a comment why exactly vgem has to use wc. v5: Don't set obj->base.funcs, it will default to drm_gem_shmem_funcs (Thomas) v6: vgem also needs an MMU for remapping v7: I absolutely butchered the rebases over the vgem mmap change and revert and broke the patch. Actually go back to v6 from before the vgem mmap changes. Cc: Thomas Zimmermann Acked-by: Thomas Zimmermann Cc: John Stultz Cc: Sumit Semwal Cc: "Christian König" Signed-off-by: Daniel Vetter Cc: Melissa Wen Cc: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20210812131412.2487363-4-daniel.vetter@ffwll.ch --- drivers/gpu/drm/Kconfig | 5 +- drivers/gpu/drm/vgem/vgem_drv.c | 342 ++-------------------------------------- 2 files changed, 16 insertions(+), 331 deletions(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 314eefa39892..28f7d2006e8b 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -272,7 +272,8 @@ source "drivers/gpu/drm/kmb/Kconfig" config DRM_VGEM tristate "Virtual GEM provider" - depends on DRM + depends on DRM && MMU + select DRM_GEM_SHMEM_HELPER help Choose this option to get a virtual graphics memory manager, as used by Mesa's software renderer for enhanced performance. @@ -280,7 +281,7 @@ config DRM_VGEM config DRM_VKMS tristate "Virtual KMS (EXPERIMENTAL)" - depends on DRM + depends on DRM && MMU select DRM_KMS_HELPER select DRM_GEM_SHMEM_HELPER select CRC32 diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index bf38a7e319d1..a87eafa89e9f 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -50,87 +51,11 @@ #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 -static const struct drm_gem_object_funcs vgem_gem_object_funcs; - static struct vgem_device { struct drm_device drm; struct platform_device *platform; } *vgem_device; -static void vgem_gem_free_object(struct drm_gem_object *obj) -{ - struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj); - - kvfree(vgem_obj->pages); - mutex_destroy(&vgem_obj->pages_lock); - - if (obj->import_attach) - drm_prime_gem_destroy(obj, vgem_obj->table); - - drm_gem_object_release(obj); - kfree(vgem_obj); -} - -static vm_fault_t vgem_gem_fault(struct vm_fault *vmf) -{ - struct vm_area_struct *vma = vmf->vma; - struct drm_vgem_gem_object *obj = vma->vm_private_data; - /* We don't use vmf->pgoff since that has the fake offset */ - unsigned long vaddr = vmf->address; - vm_fault_t ret = VM_FAULT_SIGBUS; - loff_t num_pages; - pgoff_t page_offset; - page_offset = (vaddr - vma->vm_start) >> PAGE_SHIFT; - - num_pages = DIV_ROUND_UP(obj->base.size, PAGE_SIZE); - - if (page_offset >= num_pages) - return VM_FAULT_SIGBUS; - - mutex_lock(&obj->pages_lock); - if (obj->pages) { - get_page(obj->pages[page_offset]); - vmf->page = obj->pages[page_offset]; - ret = 0; - } - mutex_unlock(&obj->pages_lock); - if (ret) { - struct page *page; - - page = shmem_read_mapping_page( - file_inode(obj->base.filp)->i_mapping, - page_offset); - if (!IS_ERR(page)) { - vmf->page = page; - ret = 0; - } else switch (PTR_ERR(page)) { - case -ENOSPC: - case -ENOMEM: - ret = VM_FAULT_OOM; - break; - case -EBUSY: - ret = VM_FAULT_RETRY; - break; - case -EFAULT: - case -EINVAL: - ret = VM_FAULT_SIGBUS; - break; - default: - WARN_ON(PTR_ERR(page)); - ret = VM_FAULT_SIGBUS; - break; - } - - } - return ret; -} - -static const struct vm_operations_struct vgem_gem_vm_ops = { - .fault = vgem_gem_fault, - .open = drm_gem_vm_open, - .close = drm_gem_vm_close, -}; - static int vgem_open(struct drm_device *dev, struct drm_file *file) { struct vgem_file *vfile; @@ -159,266 +84,30 @@ static void vgem_postclose(struct drm_device *dev, struct drm_file *file) kfree(vfile); } -static struct drm_vgem_gem_object *__vgem_gem_create(struct drm_device *dev, - unsigned long size) -{ - struct drm_vgem_gem_object *obj; - int ret; - - obj = kzalloc(sizeof(*obj), GFP_KERNEL); - if (!obj) - return ERR_PTR(-ENOMEM); - - obj->base.funcs = &vgem_gem_object_funcs; - - ret = drm_gem_object_init(dev, &obj->base, roundup(size, PAGE_SIZE)); - if (ret) { - kfree(obj); - return ERR_PTR(ret); - } - - mutex_init(&obj->pages_lock); - - return obj; -} - -static void __vgem_gem_destroy(struct drm_vgem_gem_object *obj) -{ - drm_gem_object_release(&obj->base); - kfree(obj); -} - -static struct drm_gem_object *vgem_gem_create(struct drm_device *dev, - struct drm_file *file, - unsigned int *handle, - unsigned long size) -{ - struct drm_vgem_gem_object *obj; - int ret; - - obj = __vgem_gem_create(dev, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - ret = drm_gem_handle_create(file, &obj->base, handle); - if (ret) { - drm_gem_object_put(&obj->base); - return ERR_PTR(ret); - } - - return &obj->base; -} - -static int vgem_gem_dumb_create(struct drm_file *file, struct drm_device *dev, - struct drm_mode_create_dumb *args) -{ - struct drm_gem_object *gem_object; - u64 pitch, size; - - pitch = args->width * DIV_ROUND_UP(args->bpp, 8); - size = args->height * pitch; - if (size == 0) - return -EINVAL; - - gem_object = vgem_gem_create(dev, file, &args->handle, size); - if (IS_ERR(gem_object)) - return PTR_ERR(gem_object); - - args->size = gem_object->size; - args->pitch = pitch; - - drm_gem_object_put(gem_object); - - DRM_DEBUG("Created object of size %llu\n", args->size); - - return 0; -} - static struct drm_ioctl_desc vgem_ioctls[] = { DRM_IOCTL_DEF_DRV(VGEM_FENCE_ATTACH, vgem_fence_attach_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(VGEM_FENCE_SIGNAL, vgem_fence_signal_ioctl, DRM_RENDER_ALLOW), }; -static int vgem_mmap(struct file *filp, struct vm_area_struct *vma) -{ - unsigned long flags = vma->vm_flags; - int ret; - - ret = drm_gem_mmap(filp, vma); - if (ret) - return ret; - - /* Keep the WC mmaping set by drm_gem_mmap() but our pages - * are ordinary and not special. - */ - vma->vm_flags = flags | VM_DONTEXPAND | VM_DONTDUMP; - return 0; -} +DEFINE_DRM_GEM_FOPS(vgem_driver_fops); -static const struct file_operations vgem_driver_fops = { - .owner = THIS_MODULE, - .open = drm_open, - .mmap = vgem_mmap, - .poll = drm_poll, - .read = drm_read, - .unlocked_ioctl = drm_ioctl, - .compat_ioctl = drm_compat_ioctl, - .release = drm_release, -}; - -static struct page **vgem_pin_pages(struct drm_vgem_gem_object *bo) -{ - mutex_lock(&bo->pages_lock); - if (bo->pages_pin_count++ == 0) { - struct page **pages; - - pages = drm_gem_get_pages(&bo->base); - if (IS_ERR(pages)) { - bo->pages_pin_count--; - mutex_unlock(&bo->pages_lock); - return pages; - } - - bo->pages = pages; - } - mutex_unlock(&bo->pages_lock); - - return bo->pages; -} - -static void vgem_unpin_pages(struct drm_vgem_gem_object *bo) -{ - mutex_lock(&bo->pages_lock); - if (--bo->pages_pin_count == 0) { - drm_gem_put_pages(&bo->base, bo->pages, true, true); - bo->pages = NULL; - } - mutex_unlock(&bo->pages_lock); -} - -static int vgem_prime_pin(struct drm_gem_object *obj) +static struct drm_gem_object *vgem_gem_create_object(struct drm_device *dev, size_t size) { - struct drm_vgem_gem_object *bo = to_vgem_bo(obj); - long n_pages = obj->size >> PAGE_SHIFT; - struct page **pages; + struct drm_gem_shmem_object *obj; - pages = vgem_pin_pages(bo); - if (IS_ERR(pages)) - return PTR_ERR(pages); + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return NULL; - /* Flush the object from the CPU cache so that importers can rely - * on coherent indirect access via the exported dma-address. + /* + * vgem doesn't have any begin/end cpu access ioctls, therefore must use + * coherent memory or dma-buf sharing just wont work. */ - drm_clflush_pages(pages, n_pages); - - return 0; -} - -static void vgem_prime_unpin(struct drm_gem_object *obj) -{ - struct drm_vgem_gem_object *bo = to_vgem_bo(obj); - - vgem_unpin_pages(bo); -} - -static struct sg_table *vgem_prime_get_sg_table(struct drm_gem_object *obj) -{ - struct drm_vgem_gem_object *bo = to_vgem_bo(obj); - - return drm_prime_pages_to_sg(obj->dev, bo->pages, bo->base.size >> PAGE_SHIFT); -} - -static struct drm_gem_object* vgem_prime_import(struct drm_device *dev, - struct dma_buf *dma_buf) -{ - struct vgem_device *vgem = container_of(dev, typeof(*vgem), drm); - - return drm_gem_prime_import_dev(dev, dma_buf, &vgem->platform->dev); -} - -static struct drm_gem_object *vgem_prime_import_sg_table(struct drm_device *dev, - struct dma_buf_attachment *attach, struct sg_table *sg) -{ - struct drm_vgem_gem_object *obj; - int npages; - - obj = __vgem_gem_create(dev, attach->dmabuf->size); - if (IS_ERR(obj)) - return ERR_CAST(obj); + obj->map_wc = true; - npages = PAGE_ALIGN(attach->dmabuf->size) / PAGE_SIZE; - - obj->table = sg; - obj->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); - if (!obj->pages) { - __vgem_gem_destroy(obj); - return ERR_PTR(-ENOMEM); - } - - obj->pages_pin_count++; /* perma-pinned */ - drm_prime_sg_to_page_array(obj->table, obj->pages, npages); return &obj->base; } -static int vgem_prime_vmap(struct drm_gem_object *obj, struct dma_buf_map *map) -{ - struct drm_vgem_gem_object *bo = to_vgem_bo(obj); - long n_pages = obj->size >> PAGE_SHIFT; - struct page **pages; - void *vaddr; - - pages = vgem_pin_pages(bo); - if (IS_ERR(pages)) - return PTR_ERR(pages); - - vaddr = vmap(pages, n_pages, 0, pgprot_writecombine(PAGE_KERNEL)); - if (!vaddr) - return -ENOMEM; - dma_buf_map_set_vaddr(map, vaddr); - - return 0; -} - -static void vgem_prime_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map) -{ - struct drm_vgem_gem_object *bo = to_vgem_bo(obj); - - vunmap(map->vaddr); - vgem_unpin_pages(bo); -} - -static int vgem_prime_mmap(struct drm_gem_object *obj, - struct vm_area_struct *vma) -{ - int ret; - - if (obj->size < vma->vm_end - vma->vm_start) - return -EINVAL; - - if (!obj->filp) - return -ENODEV; - - ret = call_mmap(obj->filp, vma); - if (ret) - return ret; - - vma_set_file(vma, obj->filp); - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; - vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); - - return 0; -} - -static const struct drm_gem_object_funcs vgem_gem_object_funcs = { - .free = vgem_gem_free_object, - .pin = vgem_prime_pin, - .unpin = vgem_prime_unpin, - .get_sg_table = vgem_prime_get_sg_table, - .vmap = vgem_prime_vmap, - .vunmap = vgem_prime_vunmap, - .vm_ops = &vgem_gem_vm_ops, -}; - static const struct drm_driver vgem_driver = { .driver_features = DRIVER_GEM | DRIVER_RENDER, .open = vgem_open, @@ -427,13 +116,8 @@ static const struct drm_driver vgem_driver = { .num_ioctls = ARRAY_SIZE(vgem_ioctls), .fops = &vgem_driver_fops, - .dumb_create = vgem_gem_dumb_create, - - .prime_handle_to_fd = drm_gem_prime_handle_to_fd, - .prime_fd_to_handle = drm_gem_prime_fd_to_handle, - .gem_prime_import = vgem_prime_import, - .gem_prime_import_sg_table = vgem_prime_import_sg_table, - .gem_prime_mmap = vgem_prime_mmap, + DRM_GEM_SHMEM_DRIVER_OPS, + .gem_create_object = vgem_gem_create_object, .name = DRIVER_NAME, .desc = DRIVER_DESC, -- cgit From efd330b97855013c8b58185683ddfb75deab5fa9 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 5 Aug 2021 09:36:43 -0700 Subject: drm/i915/xehpsdv: factor out function to read RP_STATE_CAP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of maintaining the same if ladder in 3 different places, add a function to read RP_STATE_CAP. Signed-off-by: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210805163647.801064-6-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/debugfs_gt_pm.c | 8 +++----- drivers/gpu/drm/i915/gt/intel_rps.c | 17 ++++++++++++----- drivers/gpu/drm/i915/gt/intel_rps.h | 1 + drivers/gpu/drm/i915/i915_debugfs.c | 8 +++----- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c index d6f5836396f8..f6733f279890 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c @@ -309,13 +309,11 @@ static int frequency_show(struct seq_file *m, void *unused) int max_freq; rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); - if (IS_GEN9_LP(i915)) { - rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); + rp_state_cap = intel_rps_read_state_cap(rps); + if (IS_GEN9_LP(i915)) gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); - } else { - rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); + else gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS); - } /* RPSTAT1 is in the GT power well */ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index d812b27835f8..a3e69eba376f 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -996,20 +996,16 @@ int intel_rps_set(struct intel_rps *rps, u8 val) static void gen6_rps_init(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); - struct intel_uncore *uncore = rps_to_uncore(rps); + u32 rp_state_cap = intel_rps_read_state_cap(rps); /* All of these values are in units of 50MHz */ /* static values from HW: RP0 > RP1 > RPn (min_freq) */ if (IS_GEN9_LP(i915)) { - u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); - rps->rp0_freq = (rp_state_cap >> 16) & 0xff; rps->rp1_freq = (rp_state_cap >> 8) & 0xff; rps->min_freq = (rp_state_cap >> 0) & 0xff; } else { - u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); - rps->rp0_freq = (rp_state_cap >> 0) & 0xff; rps->rp1_freq = (rp_state_cap >> 8) & 0xff; rps->min_freq = (rp_state_cap >> 16) & 0xff; @@ -2140,6 +2136,17 @@ int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val) return set_min_freq(rps, val); } +u32 intel_rps_read_state_cap(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + + if (IS_GEN9_LP(i915)) + return intel_uncore_read(uncore, BXT_RP_STATE_CAP); + else + return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); +} + /* External interface for intel_ips.ko */ static struct drm_i915_private __rcu *ips_mchdev; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 4213bcce1667..11960d64ca82 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -41,6 +41,7 @@ u32 intel_rps_get_rp1_frequency(struct intel_rps *rps); u32 intel_rps_get_rpn_frequency(struct intel_rps *rps); u32 intel_rps_read_punit_req(struct intel_rps *rps); u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps); +u32 intel_rps_read_state_cap(struct intel_rps *rps); void gen5_rps_irq_handler(struct intel_rps *rps); void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index cc745751ac53..6c83da3956b9 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -420,13 +420,11 @@ static int i915_frequency_info(struct seq_file *m, void *unused) int max_freq; rp_state_limits = intel_uncore_read(&dev_priv->uncore, GEN6_RP_STATE_LIMITS); - if (IS_GEN9_LP(dev_priv)) { - rp_state_cap = intel_uncore_read(&dev_priv->uncore, BXT_RP_STATE_CAP); + rp_state_cap = intel_rps_read_state_cap(rps); + if (IS_GEN9_LP(dev_priv)) gt_perf_status = intel_uncore_read(&dev_priv->uncore, BXT_GT_PERF_STATUS); - } else { - rp_state_cap = intel_uncore_read(&dev_priv->uncore, GEN6_RP_STATE_CAP); + else gt_perf_status = intel_uncore_read(&dev_priv->uncore, GEN6_GT_PERF_STATUS); - } /* RPSTAT1 is in the GT power well */ intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); -- cgit From ad482232e3cc6d65eaeb19ce2412887458b19559 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 Aug 2021 09:36:44 -0700 Subject: drm/i915/xehpsdv: Read correct RP_STATE_CAP register The RP_STATE_CAP register is no longer part of the MCHBAR on XEHPSDV; this register is now a per-tile register at GTTMMADDR offset 0x250014. Cc: Rodrigo Vivi Signed-off-by: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210805163647.801064-7-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_rps.c | 4 +++- drivers/gpu/drm/i915/i915_reg.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index a3e69eba376f..3489f5f0cac1 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2141,7 +2141,9 @@ u32 intel_rps_read_state_cap(struct intel_rps *rps) struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); - if (IS_GEN9_LP(i915)) + if (IS_XEHPSDV(i915)) + return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP); + else if (IS_GEN9_LP(i915)) return intel_uncore_read(uncore, BXT_RP_STATE_CAP); else return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c8db6e8ef1ad..f79f02ee12db 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4124,6 +4124,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RPN_CAP_MASK REG_GENMASK(23, 16) #define BXT_RP_STATE_CAP _MMIO(0x138170) #define GEN9_RP_STATE_LIMITS _MMIO(0x138148) +#define XEHPSDV_RP_STATE_CAP _MMIO(0x250014) /* * Logical Context regs -- cgit From d5ef86b38e4c2a65d5c1d64d8d0f3fcf58aa0884 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 12 Aug 2021 14:44:52 +0200 Subject: drm/i915: Add pci ids and uapi for DG1 DG1 has support for local memory, which requires the usage of the lmem placement extension for creating bo's, and memregion queries to obtain the size. Because of this, those parts of the uapi are no longer guarded behind FAKE_LMEM. According to the pull request referenced below, mesa should be mostly ready for DG1. VK_EXT_memory_budget is not hooked up yet, but we should definitely just enable the uapi parts by default. Signed-off-by: Maarten Lankhorst References: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11584 Cc: Jordan Justen Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210812124452.622233-2-maarten.lankhorst@linux.intel.com Acked-by: Daniel Vetter Acked-by: Jason Ekstrand --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 3 --- drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/i915_query.c | 3 --- 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index 23fee13a3384..1d341b8c47c0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -347,9 +347,6 @@ static int ext_set_placements(struct i915_user_extension __user *base, { struct drm_i915_gem_create_ext_memory_regions ext; - if (!IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)) - return -ENODEV; - if (copy_from_user(&ext, base, sizeof(ext))) return -EFAULT; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 32358f90b920..607459251b49 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1096,6 +1096,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_RKL_IDS(&rkl_info), INTEL_ADLS_IDS(&adl_s_info), INTEL_ADLP_IDS(&adl_p_info), + INTEL_DG1_IDS(&dg1_info), {0, 0, 0} }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index e49da36c62fb..5e2b909827f4 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -432,9 +432,6 @@ static int query_memregion_info(struct drm_i915_private *i915, u32 total_length; int ret, id, i; - if (!IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)) - return -ENODEV; - if (query_item->flags != 0) return -EINVAL; -- cgit From 8b4e02c70fca482c5b947d8ba92b45093b4390e7 Mon Sep 17 00:00:00 2001 From: Markuss Broks Date: Sat, 7 Aug 2021 16:31:10 +0300 Subject: drm/panel: Add DT bindings for Samsung S6D27A1 display panel This adds device-tree bindings for the Samsung S6D27A1 RGB DPI display panel. Signed-off-by: Markuss Broks v1 -> v2: changed additionalProperties to unevaluatedProperties; added vci-supply and vccio-supply as required; Reviewed-by: Linus Walleij Signed-off-by: Linus Walleij Link: https://patchwork.freedesktop.org/patch/msgid/20210807133111.5935-2-markuss.broks@gmail.com --- .../bindings/display/panel/samsung,s6d27a1.yaml | 98 ++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/panel/samsung,s6d27a1.yaml diff --git a/Documentation/devicetree/bindings/display/panel/samsung,s6d27a1.yaml b/Documentation/devicetree/bindings/display/panel/samsung,s6d27a1.yaml new file mode 100644 index 000000000000..26e3c820a2f7 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/samsung,s6d27a1.yaml @@ -0,0 +1,98 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/samsung,s6d27a1.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Samsung S6D27A1 display panel + +description: The S6D27A1 is a 480x800 DPI display panel from Samsung Mobile + Displays (SMD). The panel must obey the rules for a SPI slave device + as specified in spi/spi-controller.yaml + +maintainers: + - Markuss Broks + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + const: samsung,s6d27a1 + + reg: true + + interrupts: + description: provides an optional ESD (electrostatic discharge) + interrupt that signals abnormalities in the display hardware. + This can also be raised for other reasons like erroneous + configuration. + maxItems: 1 + + reset-gpios: true + + vci-supply: + description: regulator that supplies the VCI analog voltage + usually around 3.0 V + + vccio-supply: + description: regulator that supplies the VCCIO voltage usually + around 1.8 V + + backlight: true + + spi-cpha: true + + spi-cpol: true + + spi-max-frequency: + maximum: 1200000 + + port: true + +required: + - compatible + - reg + - vci-supply + - vccio-supply + - spi-cpha + - spi-cpol + - port + +unevaluatedProperties: false + +examples: + - | + #include + #include + + spi { + compatible = "spi-gpio"; + sck-gpios = <&gpio 0 GPIO_ACTIVE_HIGH>; + miso-gpios = <&gpio 1 GPIO_ACTIVE_HIGH>; + mosi-gpios = <&gpio 2 GPIO_ACTIVE_HIGH>; + cs-gpios = <&gpio 3 GPIO_ACTIVE_HIGH>; + num-chipselects = <1>; + #address-cells = <1>; + #size-cells = <0>; + panel@0 { + compatible = "samsung,s6d27a1"; + spi-max-frequency = <1200000>; + spi-cpha; + spi-cpol; + reg = <0>; + vci-supply = <&lcd_3v0_reg>; + vccio-supply = <&lcd_1v8_reg>; + reset-gpios = <&gpio 4 GPIO_ACTIVE_LOW>; + interrupt-parent = <&gpio>; + interrupts = <5 IRQ_TYPE_EDGE_RISING>; + + port { + panel_in: endpoint { + remote-endpoint = <&display_out>; + }; + }; + }; + }; + +... -- cgit From ebd8cbf1fb968cb1c3e3cf7b26dfe2c1f201bdf0 Mon Sep 17 00:00:00 2001 From: Markuss Broks Date: Sat, 7 Aug 2021 16:31:11 +0300 Subject: drm/panel: s6d27a1: Add driver for Samsung S6D27A1 display panel This adds a driver for Samsung S6D27A1 display controller and panel. This panel is found in the Samsung GT-I8160 mobile phone, and possibly some other mobile phones. This display needs manufacturer commands to configure it; the commands used in this driver were taken from downstream driver by Gareth Phillips; sadly, there is almost no documentation on what they actually do. This driver re-uses the DBI infrastructure to communicate with the display. This driver is heavily based on WideChips WS2401 display controller driver by Linus Walleij and on other panel drivers for reference. Signed-off-by: Markuss Broks [Up reset out time to 120 ms] Signed-off-by: Linus Walleij Link: https://patchwork.freedesktop.org/patch/msgid/20210807133111.5935-3-markuss.broks@gmail.com --- MAINTAINERS | 6 + drivers/gpu/drm/panel/Kconfig | 11 + drivers/gpu/drm/panel/Makefile | 1 + drivers/gpu/drm/panel/panel-samsung-s6d27a1.c | 320 ++++++++++++++++++++++++++ 4 files changed, 338 insertions(+) create mode 100644 drivers/gpu/drm/panel/panel-samsung-s6d27a1.c diff --git a/MAINTAINERS b/MAINTAINERS index 851255b71ccc..c363209130c0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5962,6 +5962,12 @@ T: git git://anongit.freedesktop.org/drm/drm-misc F: Documentation/devicetree/bindings/display/panel/samsung,lms397kf04.yaml F: drivers/gpu/drm/panel/panel-samsung-db7430.c +DRM DRIVER FOR SAMSUNG S6D27A1 PANELS +M: Markuss Broks +S: Maintained +F: Documentation/devicetree/bindings/display/panel/samsung,s6d27a1.yaml +F: driver/gpu/drm/panel/panel-samsung-s6d27a1.c + DRM DRIVER FOR SITRONIX ST7703 PANELS M: Guido Günther R: Purism Kernel Team diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index beb581b96ecd..0b3784941312 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -392,6 +392,17 @@ config DRM_PANEL_SAMSUNG_S6D16D0 depends on DRM_MIPI_DSI select VIDEOMODE_HELPERS +config DRM_PANEL_SAMSUNG_S6D27A1 + tristate "Samsung S6D27A1 DPI panel driver" + depends on OF && SPI && GPIOLIB + select DRM_MIPI_DBI + help + Say Y here if you want to enable support for the Samsung + S6D27A1 DPI 480x800 panel. + + This panel can be found in Samsung Galaxy Ace 2 + GT-I8160 mobile phone. + config DRM_PANEL_SAMSUNG_S6E3HA2 tristate "Samsung S6E3HA2 DSI video mode panel" depends on OF diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile index c8132050bcec..60c0149fc54a 100644 --- a/drivers/gpu/drm/panel/Makefile +++ b/drivers/gpu/drm/panel/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20) += panel-samsung-atna33xc20.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_DB7430) += panel-samsung-db7430.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_LD9040) += panel-samsung-ld9040.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6D16D0) += panel-samsung-s6d16d0.o +obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6D27A1) += panel-samsung-s6d27a1.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2) += panel-samsung-s6e3ha2.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03) += panel-samsung-s6e63j0x03.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E63M0) += panel-samsung-s6e63m0.o diff --git a/drivers/gpu/drm/panel/panel-samsung-s6d27a1.c b/drivers/gpu/drm/panel/panel-samsung-s6d27a1.c new file mode 100644 index 000000000000..1696ceb36aa0 --- /dev/null +++ b/drivers/gpu/drm/panel/panel-samsung-s6d27a1.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Panel driver for the Samsung S6D27A1 480x800 DPI RGB panel. + * Found in the Samsung Galaxy Ace 2 GT-I8160 mobile phone. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include