From 8133a6daad4e72748e239a02775a853ca7ed798b Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 4 Oct 2022 12:49:14 +0100 Subject: drm/i915: enable PS64 support for DG2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It turns out that on production DG2/ATS HW we should have support for PS64. This feature allows to provide a 64K TLB hint at the PTE level, which is a lot more flexible than the current method of enabling 64K GTT pages for the entire page-table, since that leads to all kinds of annoying restrictions, as documented in: commit caa574ffc4aaf4f29b890223878c63e2e7772f62 Author: Matthew Auld Date: Sat Feb 19 00:17:49 2022 +0530 drm/i915/uapi: document behaviour for DG2 64K support On discrete platforms like DG2, we need to support a minimum page size of 64K when dealing with device local-memory. This is quite tricky for various reasons, so try to document the new implicit uapi for this. With PS64, we can now drop the 2M GTT alignment restriction, and instead only require 64K or larger when dealing with lmem. We still use the compact-pt layout when possible, but only when we are certain that this doesn't interfere with userspace. Note that this is a change in uAPI behaviour, but hopefully shouldn't be a concern (IGT is at least able to autodetect the alignment), since we are only making the GTT alignment constraint less restrictive. Based on a patch from CQ Tang. v2: update the comment wrt scratch page v3: (Nirmoy) - Fix the selftest to actually use the random size, plus some comment improvements, also drop the rem stuff. Reported-by: Michal Mrozek Signed-off-by: Matthew Auld Cc: Lionel Landwerlin Cc: Thomas Hellström Cc: Stuart Summers Cc: Jordan Justen Cc: Yang A Shi Cc: Nirmoy Das Cc: Niranjana Vishwanathapura Reviewed-by: Nirmoy Das Acked-by: Michal Mrozek Link: https://patchwork.freedesktop.org/patch/msgid/20221004114915.221708-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_pci.c') diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index cd4487a1d3be..f1344db626f5 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1042,7 +1042,6 @@ static const struct intel_device_info xehpsdv_info = { PLATFORM(INTEL_XEHPSDV), NO_DISPLAY, .has_64k_pages = 1, - .needs_compact_pt = 1, .has_media_ratio_mode = 1, .__runtime.platform_engine_mask = BIT(RCS0) | BIT(BCS0) | @@ -1064,7 +1063,6 @@ static const struct intel_device_info xehpsdv_info = { .has_64k_pages = 1, \ .has_guc_deprivilege = 1, \ .has_heci_pxp = 1, \ - .needs_compact_pt = 1, \ .has_media_ratio_mode = 1, \ .__runtime.platform_engine_mask = \ BIT(RCS0) | BIT(BCS0) | \ -- cgit From f32898c94a105c221e6fe957aee833e7fc98f95f Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 14 Oct 2022 16:02:38 -0700 Subject: drm/i915/xelpg: Add multicast steering MTL's graphics IP (Xe_LPG) once again changes the multicast register types and steering details. Key changes from past platforms: * The number of instances of some MCR types (NODE, OAAL2, and GAM) vary according to the MTL subplatform and cannot be read from fuse registers. However steering to instance #0 will always provided a non-terminated value, so we can lump these all into a single "instance0" table. * The MCR steering register (and its bitfields) has changed. Unlike past platforms, we will be explicitly steering all types of MCR accesses, including those for "SLICE" and "DSS" ranges; we no longer rely on implicit steering. On previous platforms, various hardware/firmware agents that needed to access registers typically had their own steering control registers, allowing them to perform multicast steering without clobbering the CPU/kernel steering. Starting with MTL, more of these agents now share a single steering register (0xFD4) and it is no longer safe for us to assume that the value will remain unchanged from how we initialized it during startup. There is also a slight chance of race conditions between the driver and a hardware/firmware agent, so the hardware provides a semaphore register that can be used to coordinate access to the steering register. Support for the semaphore register will be introduced in a future patch. v2: - Use Xe_LPG terminology instead of "MTL 3D" since it's the IP version we're matching on now rather than the platform. - Don't combine l3bank and mslice masks into a union. It's not related to the other changes here and we might still need both of them on some future platform. - Separate debug dumping of steering settings to a separate helper function. (Tvrtko) - Update debug dumping to include DSS ranges (and future-proof it so that any new ranges added on future platforms will also be dumped). - Restore MULTICAST bit at the end of rw_with_mcr_steering_fw() if we cleared it. Also force the MULTICAST bit to true at the beginning of multicast writes just to be safe. (Bala) Bspec: 67788, 67112 Cc: Radhakrishna Sripada Cc: Balasubramani Vivekanandan Signed-off-by: Matt Roper Reviewed-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20221014230239.1023689-14-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 135 ++++++++++++++++++++++++---- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 5 ++ drivers/gpu/drm/i915/gt/intel_gt_types.h | 1 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 33 +++++-- drivers/gpu/drm/i915/i915_pci.c | 1 + 5 files changed, 154 insertions(+), 21 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_pci.c') diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 349074bf365f..23a1ef9659bf 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -41,6 +41,7 @@ static const char * const intel_steering_types[] = { "MSLICE", "LNCF", "GAM", + "DSS", "INSTANCE 0", }; @@ -99,9 +100,40 @@ static const struct intel_mmio_range pvc_instance0_steering_table[] = { {}, }; +static const struct intel_mmio_range xelpg_instance0_steering_table[] = { + { 0x000B00, 0x000BFF }, /* SQIDI */ + { 0x001000, 0x001FFF }, /* SQIDI */ + { 0x004000, 0x0048FF }, /* GAM */ + { 0x008700, 0x0087FF }, /* SQIDI */ + { 0x00B000, 0x00B0FF }, /* NODE */ + { 0x00C800, 0x00CFFF }, /* GAM */ + { 0x00D880, 0x00D8FF }, /* NODE */ + { 0x00DD00, 0x00DDFF }, /* OAAL2 */ + {}, +}; + +static const struct intel_mmio_range xelpg_l3bank_steering_table[] = { + { 0x00B100, 0x00B3FF }, + {}, +}; + +/* DSS steering is used for SLICE ranges as well */ +static const struct intel_mmio_range xelpg_dss_steering_table[] = { + { 0x005200, 0x0052FF }, /* SLICE */ + { 0x005500, 0x007FFF }, /* SLICE */ + { 0x008140, 0x00815F }, /* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */ + { 0x0094D0, 0x00955F }, /* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */ + { 0x009680, 0x0096FF }, /* DSS */ + { 0x00D800, 0x00D87F }, /* SLICE */ + { 0x00DC00, 0x00DCFF }, /* SLICE */ + { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */ +}; + void intel_gt_mcr_init(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; + unsigned long fuse; + int i; /* * An mslice is unavailable only if both the meml3 for the slice is @@ -119,7 +151,22 @@ void intel_gt_mcr_init(struct intel_gt *gt) drm_warn(&i915->drm, "mslice mask all zero!\n"); } - if (IS_PONTEVECCHIO(i915)) { + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70) && + gt->type == GT_PRIMARY) { + fuse = REG_FIELD_GET(GT_L3_EXC_MASK, + intel_uncore_read(gt->uncore, XEHP_FUSE4)); + + /* + * Despite the register field being named "exclude mask" the + * bits actually represent enabled banks (two banks per bit). + */ + for_each_set_bit(i, &fuse, 3) + gt->info.l3bank_mask |= 0x3 << 2 * i; + + gt->steering_table[INSTANCE0] = xelpg_instance0_steering_table; + gt->steering_table[L3BANK] = xelpg_l3bank_steering_table; + gt->steering_table[DSS] = xelpg_dss_steering_table; + } else if (IS_PONTEVECCHIO(i915)) { gt->steering_table[INSTANCE0] = pvc_instance0_steering_table; } else if (IS_DG2(i915)) { gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; @@ -184,7 +231,19 @@ static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore, lockdep_assert_held(&uncore->lock); - if (GRAPHICS_VER(uncore->i915) >= 11) { + if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 70)) { + /* + * Always leave the hardware in multicast mode when doing reads + * (see comment about Wa_22013088509 below) and only change it + * to unicast mode when doing writes of a specific instance. + * + * No need to save old steering reg value. + */ + intel_uncore_write_fw(uncore, MTL_MCR_SELECTOR, + REG_FIELD_PREP(MTL_MCR_GROUPID, group) | + REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance) | + (rw_flag == FW_REG_READ) ? GEN11_MCR_MULTICAST : 0); + } else if (GRAPHICS_VER(uncore->i915) >= 11) { mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; mcr_ss = GEN11_MCR_SLICE(group) | GEN11_MCR_SUBSLICE(instance); @@ -202,26 +261,40 @@ static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore, */ if (rw_flag == FW_REG_WRITE) mcr_mask |= GEN11_MCR_MULTICAST; + + mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); + old_mcr = mcr; + + mcr &= ~mcr_mask; + mcr |= mcr_ss; + intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); } else { mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; mcr_ss = GEN8_MCR_SLICE(group) | GEN8_MCR_SUBSLICE(instance); - } - old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); + mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); + old_mcr = mcr; - mcr &= ~mcr_mask; - mcr |= mcr_ss; - intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); + mcr &= ~mcr_mask; + mcr |= mcr_ss; + intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); + } if (rw_flag == FW_REG_READ) val = intel_uncore_read_fw(uncore, mcr_reg_cast(reg)); else intel_uncore_write_fw(uncore, mcr_reg_cast(reg), value); - mcr &= ~mcr_mask; - mcr |= old_mcr & mcr_mask; - - intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); + /* + * For pre-MTL platforms, we need to restore the old value of the + * steering control register to ensure that implicit steering continues + * to behave as expected. For MTL and beyond, we need only reinstate + * the 'multicast' bit (and only if we did a write that cleared it). + */ + if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 70) && rw_flag == FW_REG_WRITE) + intel_uncore_write_fw(uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST); + else if (GRAPHICS_VER_FULL(uncore->i915) < IP_VER(12, 70)) + intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, old_mcr); return val; } @@ -296,6 +369,13 @@ void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_mcr_reg_t reg, u32 val void intel_gt_mcr_multicast_write(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value) { + /* + * Ensure we have multicast behavior, just in case some non-i915 agent + * left the hardware in unicast mode. + */ + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) + intel_uncore_write_fw(gt->uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST); + intel_uncore_write(gt->uncore, mcr_reg_cast(reg), value); } @@ -312,6 +392,13 @@ void intel_gt_mcr_multicast_write(struct intel_gt *gt, */ void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value) { + /* + * Ensure we have multicast behavior, just in case some non-i915 agent + * left the hardware in unicast mode. + */ + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) + intel_uncore_write_fw(gt->uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST); + intel_uncore_write_fw(gt->uncore, mcr_reg_cast(reg), value); } @@ -389,6 +476,8 @@ static void get_nonterminated_steering(struct intel_gt *gt, enum intel_steering_type type, u8 *group, u8 *instance) { + u32 dss; + switch (type) { case L3BANK: *group = 0; /* unused */ @@ -412,6 +501,11 @@ static void get_nonterminated_steering(struct intel_gt *gt, *group = IS_DG2(gt->i915) ? 1 : 0; *instance = 0; break; + case DSS: + dss = intel_sseu_find_first_xehp_dss(>->info.sseu, 0, 0); + *group = dss / GEN_DSS_PER_GSLICE; + *instance = dss % GEN_DSS_PER_GSLICE; + break; case INSTANCE0: /* * There are a lot of MCR types for which instance (0, 0) @@ -544,11 +638,20 @@ static void report_steering_type(struct drm_printer *p, void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, bool dump_table) { - drm_printf(p, "Default steering: group=0x%x, instance=0x%x\n", - gt->default_steering.groupid, - gt->default_steering.instanceid); - - if (IS_PONTEVECCHIO(gt->i915)) { + /* + * Starting with MTL we no longer have default steering; + * all ranges are explicitly steered. + */ + if (GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70)) + drm_printf(p, "Default steering: group=0x%x, instance=0x%x\n", + gt->default_steering.groupid, + gt->default_steering.instanceid); + + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) { + for (int i = 0; i < NUM_STEERING_TYPES; i++) + if (gt->steering_table[i]) + report_steering_type(p, gt, i, dump_table); + } else if (IS_PONTEVECCHIO(gt->i915)) { report_steering_type(p, gt, INSTANCE0, dump_table); } else if (HAS_MSLICE_STEERING(gt->i915)) { report_steering_type(p, gt, MSLICE, dump_table); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 1116bc3dba51..1c3a46ee876d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -56,6 +56,7 @@ #define FORCEWAKE_ACK_GT_MTL _MMIO(0xdfc) #define MCFG_MCR_SELECTOR _MMIO(0xfd0) +#define MTL_MCR_SELECTOR _MMIO(0xfd4) #define SF_MCR_SELECTOR _MMIO(0xfd8) #define GEN8_MCR_SELECTOR _MMIO(0xfdc) #define GAM_MCR_SELECTOR _MMIO(0xfe0) @@ -68,6 +69,8 @@ #define GEN11_MCR_SLICE_MASK GEN11_MCR_SLICE(0xf) #define GEN11_MCR_SUBSLICE(subslice) (((subslice) & 0x7) << 24) #define GEN11_MCR_SUBSLICE_MASK GEN11_MCR_SUBSLICE(0x7) +#define MTL_MCR_GROUPID REG_GENMASK(11, 8) +#define MTL_MCR_INSTANCEID REG_GENMASK(3, 0) #define IPEIR_I965 _MMIO(0x2064) #define IPEHR_I965 _MMIO(0x2068) @@ -528,6 +531,8 @@ #define GEN6_MBCTL_BOOT_FETCH_MECH (1 << 0) /* Fuse readout registers for GT */ +#define XEHP_FUSE4 _MMIO(0x9114) +#define GT_L3_EXC_MASK REG_GENMASK(6, 4) #define GEN10_MIRROR_FUSE3 _MMIO(0x9118) #define GEN10_L3BANK_PAIR_COUNT 4 #define GEN10_L3BANK_MASK 0x0F diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 30003d68fd51..0bb73d110a84 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -60,6 +60,7 @@ enum intel_steering_type { MSLICE, LNCF, GAM, + DSS, /* * On some platforms there are multiple types of MCR registers that diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index dadb60e6a58f..711a31935857 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1128,18 +1128,23 @@ static void __set_mcr_steering(struct i915_wa_list *wal, wa_write_clr_set(wal, steering_reg, mcr_mask, mcr); } -static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal, - unsigned int slice, unsigned int subslice) +static void debug_dump_steering(struct intel_gt *gt) { struct drm_printer p = drm_debug_printer("MCR Steering:"); + if (drm_debug_enabled(DRM_UT_DRIVER)) + intel_gt_mcr_report_steering(&p, gt, false); +} + +static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal, + unsigned int slice, unsigned int subslice) +{ __set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice); gt->default_steering.groupid = slice; gt->default_steering.instanceid = subslice; - if (drm_debug_enabled(DRM_UT_DRIVER)) - intel_gt_mcr_report_steering(&p, gt, false); + debug_dump_steering(gt); } static void @@ -1581,12 +1586,30 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); } +static void +xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) +{ + /* FIXME: Actual workarounds will be added in future patch(es) */ + + /* + * Unlike older platforms, we no longer setup implicit steering here; + * all MCR accesses are explicitly steered. + */ + debug_dump_steering(gt); +} + static void gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) { struct drm_i915_private *i915 = gt->i915; - if (IS_PONTEVECCHIO(i915)) + /* FIXME: Media GT handling will be added in an upcoming patch */ + if (gt->type == GT_MEDIA) + return; + + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + xelpg_gt_workarounds_init(gt, wal); + else if (IS_PONTEVECCHIO(i915)) pvc_gt_workarounds_init(gt, wal); else if (IS_DG2(i915)) dg2_gt_workarounds_init(gt, wal); diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index f1344db626f5..4757ad0173da 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1142,6 +1142,7 @@ static const struct intel_device_info mtl_info = { .display.has_modular_fia = 1, .extra_gt_list = xelpmp_extra_gt, .has_flat_ccs = 0, + .has_mslice_steering = 0, .has_snoop = 1, .__runtime.memory_regions = REGION_SMEM | REGION_STOLEN_LMEM, .__runtime.platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(CCS0), -- cgit From 5bfcff516c89c57be6cd90af1d64529a51228ac1 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 5 Oct 2022 18:41:57 +0300 Subject: drm/i915: Extract intel_mmio_bar() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have the same code to determine the MMIO BAR in two places. Collect it to a single place. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221005154159.18750-1-ville.syrjala@linux.intel.com Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/gt/intel_gt.c | 2 +- drivers/gpu/drm/i915/i915_pci.c | 4 +--- drivers/gpu/drm/i915/intel_pci_config.h | 8 ++++++++ 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_pci.c') diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 27dbb9e4bd6c..2e796ffad911 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -880,7 +880,7 @@ int intel_gt_probe_all(struct drm_i915_private *i915) unsigned int i; int ret; - mmio_bar = GRAPHICS_VER(i915) == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR; + mmio_bar = intel_mmio_bar(GRAPHICS_VER(i915)); phys_addr = pci_resource_start(pdev, mmio_bar); /* diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 4757ad0173da..53c6b7cf7879 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1295,9 +1295,7 @@ bool i915_pci_resource_valid(struct pci_dev *pdev, int bar) static bool intel_mmio_bar_valid(struct pci_dev *pdev, struct intel_device_info *intel_info) { - int gttmmaddr_bar = intel_info->__runtime.graphics.ip.ver == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR; - - return i915_pci_resource_valid(pdev, gttmmaddr_bar); + return i915_pci_resource_valid(pdev, intel_mmio_bar(intel_info->__runtime.graphics.ip.ver)); } static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) diff --git a/drivers/gpu/drm/i915/intel_pci_config.h b/drivers/gpu/drm/i915/intel_pci_config.h index 4977a524ce6f..305f137d2ebd 100644 --- a/drivers/gpu/drm/i915/intel_pci_config.h +++ b/drivers/gpu/drm/i915/intel_pci_config.h @@ -13,6 +13,14 @@ #define GTT_APERTURE_BAR GFXMEM_BAR #define GEN12_LMEM_BAR GFXMEM_BAR +static inline int intel_mmio_bar(int graphics_ver) +{ + switch (graphics_ver) { + case 2: return GEN2_GTTMMADR_BAR; + default: return GTTMMADR_BAR; + } +} + /* BSM in include/drm/i915_drm.h */ #define MCHBAR_I915 0x44 -- cgit From cceb084905285dcf56912336c9f4f4e7ac334d9f Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Wed, 26 Oct 2022 22:20:51 +0000 Subject: drm/i915/perf: Enable bytes per clock reporting in OA XEHPSDV and DG2 provide a way to configure bytes per clock vs commands per clock reporting. Enable bytes per clock setting on enabling OA. Bspec: 51762 Bspec: 52201 v2: - Fix commit msg (Ashutosh) - Fix checkpatch issues v3: - s/commands/bytes/ in code comment and commmit msg Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20221026222102.5526-6-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 +++ drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/i915_perf.c | 20 ++++++++++++++++++++ drivers/gpu/drm/i915/i915_perf_oa_regs.h | 4 ++++ drivers/gpu/drm/i915/intel_device_info.h | 1 + 5 files changed, 29 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_pci.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 28ee5ac166db..34f0bcc36671 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -901,6 +901,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm) #define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc) +#define HAS_OA_BPC_REPORTING(dev_priv) \ + (INTEL_INFO(dev_priv)->has_oa_bpc_reporting) + /* * Set this flag, when platform requires 64K GTT page sizes or larger for * device local memory access. diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 53c6b7cf7879..7b751bf4be21 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1023,6 +1023,7 @@ static const struct intel_device_info adl_p_info = { .has_logical_ring_contexts = 1, \ .has_logical_ring_elsq = 1, \ .has_mslice_steering = 1, \ + .has_oa_bpc_reporting = 1, \ .has_rc6 = 1, \ .has_reset_engine = 1, \ .has_rps = 1, \ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index b71b5cf21176..d11cc949c9be 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2748,10 +2748,12 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream, struct i915_active *active) { + struct drm_i915_private *i915 = stream->perf->i915; struct intel_uncore *uncore = stream->uncore; struct i915_oa_config *oa_config = stream->oa_config; bool periodic = stream->periodic; u32 period_exponent = stream->period_exponent; + u32 sqcnt1; int ret; intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG, @@ -2770,6 +2772,16 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT)) : 0); + /* + * Initialize Super Queue Internal Cnt Register + * Set PMON Enable in order to collect valid metrics. + * Enable byets per clock reporting in OA for XEHPSDV onward. + */ + sqcnt1 = GEN12_SQCNT1_PMON_ENABLE | + (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0); + + intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1); + /* * Update all contexts prior writing the mux configurations as we need * to make sure all slices/subslices are ON before writing to NOA @@ -2819,6 +2831,8 @@ static void gen11_disable_metric_set(struct i915_perf_stream *stream) static void gen12_disable_metric_set(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; + struct drm_i915_private *i915 = stream->perf->i915; + u32 sqcnt1; /* Reset all contexts' slices/subslices configurations. */ gen12_configure_all_contexts(stream, NULL, NULL); @@ -2829,6 +2843,12 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream) /* Make sure we disable noa to save power. */ intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); + + sqcnt1 = GEN12_SQCNT1_PMON_ENABLE | + (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0); + + /* Reset PMON Enable to save power. */ + intel_uncore_rmw(uncore, GEN12_SQCNT1, sqcnt1, 0); } static void gen7_oa_enable(struct i915_perf_stream *stream) diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h b/drivers/gpu/drm/i915/i915_perf_oa_regs.h index 0ef3562ff4aa..381d94101610 100644 --- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h +++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h @@ -134,4 +134,8 @@ #define GDT_CHICKEN_BITS _MMIO(0x9840) #define GT_NOA_ENABLE 0x00000080 +#define GEN12_SQCNT1 _MMIO(0x8718) +#define GEN12_SQCNT1_PMON_ENABLE REG_BIT(30) +#define GEN12_SQCNT1_OABPC REG_BIT(29) + #endif /* __INTEL_PERF_OA_REGS__ */ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index da833267a782..d11546d87fe7 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -163,6 +163,7 @@ enum intel_ppgtt_type { func(has_logical_ring_elsq); \ func(has_media_ratio_mode); \ func(has_mslice_steering); \ + func(has_oa_bpc_reporting); \ func(has_one_eu_per_fuse_bit); \ func(has_pxp); \ func(has_rc6); \ -- cgit From 0fa9349dda030fa847b36f880a5eea25c3202b66 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 26 Oct 2022 22:21:01 +0000 Subject: drm/i915/perf: complete programming whitelisting for XEHPSDV We have an additional register to select which slices contribute to OAG/OAG counter increments. Signed-off-by: Lionel Landwerlin Signed-off-by: Matt Roper Reviewed-by: Ashutosh Dixit Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20221026222102.5526-16-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/i915_perf.c | 13 +++++++++++++ drivers/gpu/drm/i915/intel_device_info.h | 1 + 4 files changed, 17 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_pci.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 34f0bcc36671..b1c4e924d883 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -903,6 +903,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_OA_BPC_REPORTING(dev_priv) \ (INTEL_INFO(dev_priv)->has_oa_bpc_reporting) +#define HAS_OA_SLICE_CONTRIB_LIMITS(dev_priv) \ + (INTEL_INFO(dev_priv)->has_oa_slice_contrib_limits) /* * Set this flag, when platform requires 64K GTT page sizes or larger for diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 7b751bf4be21..6b22fb506aa9 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1024,6 +1024,7 @@ static const struct intel_device_info adl_p_info = { .has_logical_ring_elsq = 1, \ .has_mslice_steering = 1, \ .has_oa_bpc_reporting = 1, \ + .has_oa_slice_contrib_limits = 1, \ .has_rc6 = 1, \ .has_reset_engine = 1, \ .has_rps = 1, \ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index b2766485888f..2db74b5a54cd 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4261,6 +4261,11 @@ static const struct i915_range gen12_oa_b_counters[] = { {} }; +static const struct i915_range xehp_oa_b_counters[] = { + { .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */ + { .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */ +}; + static const struct i915_range gen7_oa_mux_regs[] = { { .start = 0x91b8, .end = 0x91cc }, /* OA_PERFCNT[1-2], OA_PERFMATRIX */ { .start = 0x9800, .end = 0x9888 }, /* MICRO_BP0_0 - NOA_WRITE */ @@ -4335,6 +4340,12 @@ static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) return reg_in_range_table(addr, gen12_oa_b_counters); } +static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) +{ + return reg_in_range_table(addr, xehp_oa_b_counters) || + reg_in_range_table(addr, gen12_oa_b_counters); +} + static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return reg_in_range_table(addr, gen12_oa_mux_regs); @@ -4847,6 +4858,8 @@ void i915_perf_init(struct drm_i915_private *i915) perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; } else if (GRAPHICS_VER(i915) == 12) { perf->ops.is_valid_b_counter_reg = + HAS_OA_SLICE_CONTRIB_LIMITS(i915) ? + xehp_is_valid_b_counter_addr : gen12_is_valid_b_counter_addr; perf->ops.is_valid_mux_reg = gen12_is_valid_mux_addr; diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index d11546d87fe7..4ee6074955ef 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -164,6 +164,7 @@ enum intel_ppgtt_type { func(has_media_ratio_mode); \ func(has_mslice_steering); \ func(has_oa_bpc_reporting); \ + func(has_oa_slice_contrib_limits); \ func(has_one_eu_per_fuse_bit); \ func(has_pxp); \ func(has_rc6); \ -- cgit