From ce6b63336f79ec5f3996de65f452330e395f99ae Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 20 Jun 2024 11:20:26 +0100
Subject: drm/xe: fix error handling in xe_migrate_update_pgtables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Don't call drm_suballoc_free with sa_bo pointing to PTR_ERR.

References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2120
Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <stable@vger.kernel.org> # v6.8+
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240620102025.127699-2-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/xe_migrate.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 05f933787860..c9f5673353ee 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -1358,7 +1358,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 						 GFP_KERNEL, true, 0);
 			if (IS_ERR(sa_bo)) {
 				err = PTR_ERR(sa_bo);
-				goto err;
+				goto err_bb;
 			}
 
 			ppgtt_ofs = NUM_KERNEL_PDE +
@@ -1406,7 +1406,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 					 update_idx);
 	if (IS_ERR(job)) {
 		err = PTR_ERR(job);
-		goto err_bb;
+		goto err_sa;
 	}
 
 	/* Wait on BO move */
@@ -1458,10 +1458,10 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 
 err_job:
 	xe_sched_job_put(job);
+err_sa:
+	drm_suballoc_free(sa_bo, NULL);
 err_bb:
 	xe_bb_free(bb, NULL);
-err:
-	drm_suballoc_free(sa_bo, NULL);
 	return ERR_PTR(err);
 }
 
-- 
cgit 


From f4efd274d93b5000efc58bbfa14f8f863a57642d Mon Sep 17 00:00:00 2001
From: Karthik Poosa <karthik.poosa@intel.com>
Date: Wed, 26 Jun 2024 22:37:46 +0530
Subject: drm/xe/hwmon: Remove xe_hwmon_process_reg

Remove xe_hwmon_process_reg as it is a umbrella function which can be
avoided (Lucas).

v2: Improve commit message. (Badal)

v3: Add couple of comments. (Lucas)

Signed-off-by: Karthik Poosa <karthik.poosa@intel.com>
Suggested-by: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Badal Nilawar <badal.nilawar@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240626170746.2926011-2-karthik.poosa@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_hwmon.c | 89 +++++++++++++++++++------------------------
 1 file changed, 40 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 222c651ee1f8..0c8ce09e5025 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -137,34 +137,6 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg
 	return XE_REG(0);
 }
 
-static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg,
-				 enum xe_hwmon_reg_operation operation, u64 *value,
-				 u32 clr, u32 set, int channel)
-{
-	struct xe_reg reg;
-
-	reg = xe_hwmon_get_reg(hwmon, hwmon_reg, channel);
-
-	if (!xe_reg_is_valid(reg))
-		return;
-
-	switch (operation) {
-	case REG_READ32:
-		*value = xe_mmio_read32(hwmon->gt, reg);
-		break;
-	case REG_RMW32:
-		*value = xe_mmio_rmw32(hwmon->gt, reg, clr, set);
-		break;
-	case REG_READ64:
-		*value = xe_mmio_read64_2x32(hwmon->gt, reg);
-		break;
-	default:
-		drm_warn(&gt_to_xe(hwmon->gt)->drm, "Invalid xe hwmon reg operation: %d\n",
-			 operation);
-		break;
-	}
-}
-
 #define PL1_DISABLE 0
 
 /*
@@ -176,10 +148,25 @@ static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon
 static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *value)
 {
 	u64 reg_val, min, max;
+	struct xe_device *xe = gt_to_xe(hwmon->gt);
+	struct xe_reg rapl_limit, pkg_power_sku;
+
+	rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel);
+	pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel);
+
+	/*
+	 * Valid check of REG_PKG_RAPL_LIMIT is already done in xe_hwmon_power_is_visible.
+	 * So not checking it again here.
+	 */
+	if (!xe_reg_is_valid(pkg_power_sku)) {
+		drm_warn(&xe->drm, "pkg_power_sku invalid\n");
+		*value = 0;
+		return;
+	}
 
 	mutex_lock(&hwmon->hwmon_lock);
 
-	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, &reg_val, 0, 0, channel);
+	reg_val = xe_mmio_read32(hwmon->gt, rapl_limit);
 	/* Check if PL1 limit is disabled */
 	if (!(reg_val & PKG_PWR_LIM_1_EN)) {
 		*value = PL1_DISABLE;
@@ -189,7 +176,7 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *v
 	reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val);
 	*value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
 
-	xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ64, &reg_val, 0, 0, channel);
+	reg_val = xe_mmio_read64_2x32(hwmon->gt, pkg_power_sku);
 	min = REG_FIELD_GET(PKG_MIN_PWR, reg_val);
 	min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power);
 	max = REG_FIELD_GET(PKG_MAX_PWR, reg_val);
@@ -205,16 +192,16 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long va
 {
 	int ret = 0;
 	u64 reg_val;
+	struct xe_reg rapl_limit;
+
+	rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel);
 
 	mutex_lock(&hwmon->hwmon_lock);
 
 	/* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */
 	if (value == PL1_DISABLE) {
-		xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, &reg_val,
-				     PKG_PWR_LIM_1_EN, 0, channel);
-		xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, &reg_val,
-				     PKG_PWR_LIM_1_EN, 0, channel);
-
+		reg_val = xe_mmio_rmw32(hwmon->gt, rapl_limit, PKG_PWR_LIM_1_EN, 0);
+		reg_val = xe_mmio_read32(hwmon->gt, rapl_limit);
 		if (reg_val & PKG_PWR_LIM_1_EN) {
 			ret = -EOPNOTSUPP;
 			goto unlock;
@@ -224,9 +211,8 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long va
 	/* Computation in 64-bits to avoid overflow. Round to nearest. */
 	reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER);
 	reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val);
+	reg_val = xe_mmio_rmw32(hwmon->gt, rapl_limit, PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val);
 
-	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, &reg_val,
-			     PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val, channel);
 unlock:
 	mutex_unlock(&hwmon->hwmon_lock);
 	return ret;
@@ -234,9 +220,15 @@ unlock:
 
 static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, long *value)
 {
+	struct xe_reg reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel);
 	u64 reg_val;
 
-	xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ32, &reg_val, 0, 0, channel);
+	/*
+	 * This sysfs file won't be visible if REG_PKG_POWER_SKU is invalid, so valid check
+	 * for this register can be skipped.
+	 * See xe_hwmon_power_is_visible.
+	 */
+	reg_val = xe_mmio_read32(hwmon->gt, reg);
 	reg_val = REG_FIELD_GET(PKG_TDP, reg_val);
 	*value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
 }
@@ -267,8 +259,8 @@ xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy)
 	struct xe_hwmon_energy_info *ei = &hwmon->ei[channel];
 	u64 reg_val;
 
-	xe_hwmon_process_reg(hwmon, REG_PKG_ENERGY_STATUS, REG_READ32,
-			     &reg_val, 0, 0, channel);
+	reg_val = xe_mmio_read32(hwmon->gt, xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS,
+							     channel));
 
 	if (reg_val >= ei->reg_val_prev)
 		ei->accum_energy += reg_val - ei->reg_val_prev;
@@ -294,8 +286,7 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at
 
 	mutex_lock(&hwmon->hwmon_lock);
 
-	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT,
-			     REG_READ32, &r, 0, 0, sensor_index);
+	r = xe_mmio_read32(hwmon->gt, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index));
 
 	mutex_unlock(&hwmon->hwmon_lock);
 
@@ -383,8 +374,8 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a
 
 	mutex_lock(&hwmon->hwmon_lock);
 
-	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, (u64 *)&r,
-			     PKG_PWR_LIM_1_TIME, rxy, sensor_index);
+	r = xe_mmio_rmw32(hwmon->gt, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index),
+			  PKG_PWR_LIM_1_TIME, rxy);
 
 	mutex_unlock(&hwmon->hwmon_lock);
 
@@ -499,8 +490,7 @@ static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, int channel, long *valu
 {
 	u64 reg_val;
 
-	xe_hwmon_process_reg(hwmon, REG_GT_PERF_STATUS,
-			     REG_READ32, &reg_val, 0, 0, channel);
+	reg_val = xe_mmio_read32(hwmon->gt, xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, channel));
 	/* HW register value in units of 2.5 millivolt */
 	*value = DIV_ROUND_CLOSEST(REG_FIELD_GET(VOLTAGE_MASK, reg_val) * 2500, SF_VOLTAGE);
 }
@@ -784,14 +774,15 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe)
 	long energy;
 	u64 val_sku_unit = 0;
 	int channel;
+	struct xe_reg pkg_power_sku_unit;
 
 	/*
 	 * The contents of register PKG_POWER_SKU_UNIT do not change,
 	 * so read it once and store the shift values.
 	 */
-	if (xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0))) {
-		xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU_UNIT,
-				     REG_READ32, &val_sku_unit, 0, 0, 0);
+	pkg_power_sku_unit = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0);
+	if (xe_reg_is_valid(pkg_power_sku_unit)) {
+		val_sku_unit = xe_mmio_read32(hwmon->gt, pkg_power_sku_unit);
 		hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit);
 		hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit);
 		hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit);
-- 
cgit 


From e71cf19e3119de446cc37ddabb2e161ebbf7357c Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Tue, 25 Jun 2024 11:42:28 +0200
Subject: drm/xe/client: Check return value of xe_force_wake_get

xe_force_wake_get() can return error so check it's return value
before reading gpu_timestamp value.

v2: set HWE to NULL instead of setting timestamp to 0(Lucas)
    Add a warn on for xe_force_wake_put(Himal)

Fixes: 188ced1e0ff8 ("drm/xe/client: Print runtime to fdinfo")
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240625094228.5327-1-nirmoy.das@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_drm_client.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 4a19b771e3a0..4bdb909eabfc 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -264,9 +264,13 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
 		if (!hwe)
 			continue;
 
-		xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+		if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)) {
+			hwe = NULL;
+			break;
+		}
+
 		gpu_timestamp = xe_hw_engine_read_timestamp(hwe);
-		xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+		XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
 		break;
 	}
 
-- 
cgit 


From 0f54a19262154cb3965ba910c8b081417d97c6c8 Mon Sep 17 00:00:00 2001
From: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Date: Thu, 27 Jun 2024 16:51:05 -0700
Subject: drm/xe: Get hwe domain specific FW to read RING_TIMESTAMP

Per client engine utilization uses RING_TIMESTAMP to return
drm-total-cycles to the user. Current code uses XE_FW_GT to read this
register on the first available engine in a GT. When testing on DG2, it
is observed that this value is 0 when running test on some engines. To
resolve that, get the hwe domain specific FW for reading the engine
timestamp.

v2:
- update commit message
- use domain specific FW (Matt)

v3:
- Drop check for hwe in the helper (Matt, Michal)

v4:
- checkpatch fixes

v5: Rebase

Fixes: 188ced1e0ff8 ("drm/xe/client: Print runtime to fdinfo")
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240627235105.2631135-1-umesh.nerlige.ramappa@intel.com
---
 drivers/gpu/drm/xe/xe_drm_client.c | 7 +++++--
 drivers/gpu/drm/xe/xe_hw_engine.c  | 5 +++++
 drivers/gpu/drm/xe/xe_hw_engine.h  | 1 +
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 4bdb909eabfc..6a26923fa10e 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -260,17 +260,20 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
 
 	/* Get the total GPU cycles */
 	for_each_gt(gt, xe, gt_id) {
+		enum xe_force_wake_domains fw;
+
 		hwe = xe_gt_any_hw_engine(gt);
 		if (!hwe)
 			continue;
 
-		if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)) {
+		fw = xe_hw_engine_to_fw_domain(hwe);
+		if (xe_force_wake_get(gt_to_fw(gt), fw)) {
 			hwe = NULL;
 			break;
 		}
 
 		gpu_timestamp = xe_hw_engine_read_timestamp(hwe);
-		XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
+		XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), fw));
 		break;
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 78b50d3a6501..07ed9fd28f19 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -1130,3 +1130,8 @@ u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe)
 {
 	return xe_mmio_read64_2x32(hwe->gt, RING_TIMESTAMP(hwe->mmio_base));
 }
+
+enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe)
+{
+	return engine_infos[hwe->engine_id].domain;
+}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
index 7f2d27c0ba1a..900c8c991430 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine.h
@@ -69,5 +69,6 @@ static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe)
 
 const char *xe_hw_engine_class_to_str(enum xe_engine_class class);
 u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe);
+enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe);
 
 #endif
-- 
cgit 


From 25b1f6cbd8c55216e02bd1c116a5f62f10f4d535 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 27 Jun 2024 13:37:42 -0700
Subject: drm/xe/mocs: Update MOCS assertions and remove redundant checks

Rely more heavily on assertions to describe the MOCS programming
invariants.  CI checks these assertions and will ensure no violations
sneak in due to programmer error, so we can remove some of the redundant
WARN and silent return checks from non-debug builds.

Also tweak/augment some of the existing assertions: there's no reason
we'd ever want a platform not to have a MOCS 'ops' structure hooked up
so ensure info->ops is non-NULL.  Likewise, we should never have a case
where the bspec-defined MOCS setting table is larger than the number of
MOCS registers exposed by the hardware, so add an extra assert on those
sizes as well.

Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240627203741.2042752-3-matthew.d.roper@intel.com
---
 drivers/gpu/drm/xe/xe_mocs.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index de3f2d3f1b04..110ef89cd640 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -642,12 +642,8 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 	 */
 	xe_assert(xe, info->unused_entries_index != 0);
 
-	xe_assert(xe, !info->ops || info->ops->dump);
-
-	if (XE_WARN_ON(info->size > info->n_entries)) {
-		info->table = NULL;
-		return 0;
-	}
+	xe_assert(xe, info->ops && info->ops->dump);
+	xe_assert(xe, info->size <= info->n_entries);
 
 	if (!IS_DGFX(xe) || GRAPHICS_VER(xe) >= 20)
 		flags |= HAS_GLOBAL_MOCS;
@@ -675,9 +671,6 @@ static void __init_mocs_table(struct xe_gt *gt,
 	unsigned int i;
 	u32 mocs;
 
-	xe_gt_WARN_ONCE(gt, !info->unused_entries_index,
-			"Unused entries index should have been defined\n");
-
 	mocs_dbg(gt, "mocs entries: %d\n", info->n_entries);
 
 	for (i = 0; i < info->n_entries; i++) {
@@ -779,9 +772,6 @@ void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p)
 
 	flags = get_mocs_settings(xe, &table);
 
-	if (!table.ops->dump)
-		return;
-
 	xe_pm_runtime_get_noresume(xe);
 	ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
 
-- 
cgit 


From 4279635e562953c08aa27d9737311a958ce9e753 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 27 Jun 2024 13:37:43 -0700
Subject: drm/xe/mocs: Clarify difference between hw and sw sizes

It's not very obvious what the difference is between the 'size' and
'n_entries' fields of the MOCS structure.  Rename both fields slightly
and add some comments explaining that one is the documentation-defined
table size, while the other is the number of entries that can be
programmed into the hardware (and the documented table size can
potentially be smaller than the number of hardware entries).

Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240627203741.2042752-4-matthew.d.roper@intel.com
---
 drivers/gpu/drm/xe/tests/xe_mocs.c |  8 ++---
 drivers/gpu/drm/xe/xe_mocs.c       | 62 +++++++++++++++++++++-----------------
 2 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
index 1b8617075b37..67c65e88c384 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -31,9 +31,9 @@ static int live_mocs_init(struct live_mocs *arg, struct xe_gt *gt)
 
 	kunit_info(test, "gt %d", gt->info.id);
 	kunit_info(test, "gt type %d", gt->info.type);
-	kunit_info(test, "table size %d", arg->table.size);
+	kunit_info(test, "table size %d", arg->table.table_size);
 	kunit_info(test, "table uc_index %d", arg->table.uc_index);
-	kunit_info(test, "table n_entries %d", arg->table.n_entries);
+	kunit_info(test, "table num_mocs_regs %d", arg->table.num_mocs_regs);
 
 	return flags;
 }
@@ -50,7 +50,7 @@ static void read_l3cc_table(struct xe_gt *gt,
 	ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
 	KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
 
-	for (i = 0; i < info->n_entries; i++) {
+	for (i = 0; i < info->num_mocs_regs; i++) {
 		if (!(i & 1)) {
 			if (regs_are_mcr(gt))
 				reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i >> 1));
@@ -90,7 +90,7 @@ static void read_mocs_table(struct xe_gt *gt,
 	ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
 	KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
 
-	for (i = 0; i < info->n_entries; i++) {
+	for (i = 0; i < info->num_mocs_regs; i++) {
 		if (regs_are_mcr(gt))
 			reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
 		else
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 110ef89cd640..7ff0ac5b799a 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -47,8 +47,16 @@ struct xe_mocs_ops {
 };
 
 struct xe_mocs_info {
-	unsigned int size;
-	unsigned int n_entries;
+	/*
+	 * Size of the spec's suggested MOCS programming table.  The list of
+	 * table entries from the spec can potentially be smaller than the
+	 * number of hardware registers used to program the MOCS table; in such
+	 * cases the registers for the remaining indices will be programmed to
+	 * match unused_entries_index.
+	 */
+	unsigned int table_size;
+	/* Number of MOCS entries supported by the hardware */
+	unsigned int num_mocs_regs;
 	const struct xe_mocs_entry *table;
 	const struct xe_mocs_ops *ops;
 	u8 uc_index;
@@ -266,7 +274,7 @@ static void xelp_lncf_dump(struct xe_mocs_info *info, struct xe_gt *gt, struct d
 
 	drm_printf(p, "LNCFCMOCS[idx] = [ESC, SCC, L3CC] (value)\n\n");
 
-	for (i = 0, j = 0; i < (info->n_entries + 1) / 2; i++, j++) {
+	for (i = 0, j = 0; i < (info->num_mocs_regs + 1) / 2; i++, j++) {
 		if (regs_are_mcr(gt))
 			reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
 		else
@@ -298,7 +306,7 @@ static void xelp_mocs_dump(struct xe_mocs_info *info, unsigned int flags,
 		drm_printf(p, "Global mocs table configuration:\n");
 		drm_printf(p, "GLOB_MOCS[idx] = [LeCC, TC, LRUM, AOM, RSC, SCC, PFM, SCF, CoS, SSE] (value)\n\n");
 
-		for (i = 0; i < info->n_entries; i++) {
+		for (i = 0; i < info->num_mocs_regs; i++) {
 			if (regs_are_mcr(gt))
 				reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
 			else
@@ -371,7 +379,7 @@ static void xehp_lncf_dump(struct xe_mocs_info *info, unsigned int flags,
 
 	drm_printf(p, "LNCFCMOCS[idx] = [UCL3LOOKUP, GLBGO, L3CC] (value)\n\n");
 
-	for (i = 0, j = 0; i < (info->n_entries + 1) / 2; i++, j++) {
+	for (i = 0, j = 0; i < (info->num_mocs_regs + 1) / 2; i++, j++) {
 		if (regs_are_mcr(gt))
 			reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
 		else
@@ -416,7 +424,7 @@ static void pvc_mocs_dump(struct xe_mocs_info *info, unsigned int flags, struct
 
 	drm_printf(p, "LNCFCMOCS[idx] = [ L3CC ] (value)\n\n");
 
-	for (i = 0, j = 0; i < (info->n_entries + 1) / 2; i++, j++) {
+	for (i = 0, j = 0; i < (info->num_mocs_regs + 1) / 2; i++, j++) {
 		if (regs_are_mcr(gt))
 			reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
 		else
@@ -498,7 +506,7 @@ static void mtl_mocs_dump(struct xe_mocs_info *info, unsigned int flags,
 	drm_printf(p, "Global mocs table configuration:\n");
 	drm_printf(p, "GLOB_MOCS[idx] = [IG_PAT, L4_CACHE_POLICY] (value)\n\n");
 
-	for (i = 0; i < info->n_entries; i++) {
+	for (i = 0; i < info->num_mocs_regs; i++) {
 		if (regs_are_mcr(gt))
 			reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
 		else
@@ -541,7 +549,7 @@ static void xe2_mocs_dump(struct xe_mocs_info *info, unsigned int flags,
 	drm_printf(p, "Global mocs table configuration:\n");
 	drm_printf(p, "GLOB_MOCS[idx] = [IG_PAT, L3_CLOS, L3_CACHE_POLICY, L4_CACHE_POLICY] (value)\n\n");
 
-	for (i = 0; i < info->n_entries; i++) {
+	for (i = 0; i < info->num_mocs_regs; i++) {
 		if (regs_are_mcr(gt))
 			reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
 		else
@@ -571,48 +579,48 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 	case XE_LUNARLAKE:
 	case XE_BATTLEMAGE:
 		info->ops = &xe2_mocs_ops;
-		info->size = ARRAY_SIZE(xe2_mocs_table);
+		info->table_size = ARRAY_SIZE(xe2_mocs_table);
 		info->table = xe2_mocs_table;
-		info->n_entries = XE2_NUM_MOCS_ENTRIES;
+		info->num_mocs_regs = XE2_NUM_MOCS_ENTRIES;
 		info->uc_index = 3;
 		info->wb_index = 4;
 		info->unused_entries_index = 4;
 		break;
 	case XE_PVC:
 		info->ops = &pvc_mocs_ops;
-		info->size = ARRAY_SIZE(pvc_mocs_desc);
+		info->table_size = ARRAY_SIZE(pvc_mocs_desc);
 		info->table = pvc_mocs_desc;
-		info->n_entries = PVC_NUM_MOCS_ENTRIES;
+		info->num_mocs_regs = PVC_NUM_MOCS_ENTRIES;
 		info->uc_index = 1;
 		info->wb_index = 2;
 		info->unused_entries_index = 2;
 		break;
 	case XE_METEORLAKE:
 		info->ops = &mtl_mocs_ops;
-		info->size = ARRAY_SIZE(mtl_mocs_desc);
+		info->table_size = ARRAY_SIZE(mtl_mocs_desc);
 		info->table = mtl_mocs_desc;
-		info->n_entries = MTL_NUM_MOCS_ENTRIES;
+		info->num_mocs_regs = MTL_NUM_MOCS_ENTRIES;
 		info->uc_index = 9;
 		info->unused_entries_index = 1;
 		break;
 	case XE_DG2:
 		info->ops = &xehp_mocs_ops;
-		info->size = ARRAY_SIZE(dg2_mocs_desc);
+		info->table_size = ARRAY_SIZE(dg2_mocs_desc);
 		info->table = dg2_mocs_desc;
 		info->uc_index = 1;
 		/*
 		 * Last entry is RO on hardware, don't bother with what was
 		 * written when checking later
 		 */
-		info->n_entries = XELP_NUM_MOCS_ENTRIES - 1;
+		info->num_mocs_regs = XELP_NUM_MOCS_ENTRIES - 1;
 		info->unused_entries_index = 3;
 		break;
 	case XE_DG1:
 		info->ops = &xelp_mocs_ops;
-		info->size = ARRAY_SIZE(dg1_mocs_desc);
+		info->table_size = ARRAY_SIZE(dg1_mocs_desc);
 		info->table = dg1_mocs_desc;
 		info->uc_index = 1;
-		info->n_entries = XELP_NUM_MOCS_ENTRIES;
+		info->num_mocs_regs = XELP_NUM_MOCS_ENTRIES;
 		info->unused_entries_index = 5;
 		break;
 	case XE_TIGERLAKE:
@@ -621,9 +629,9 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 	case XE_ALDERLAKE_P:
 	case XE_ALDERLAKE_N:
 		info->ops = &xelp_mocs_ops;
-		info->size  = ARRAY_SIZE(gen12_mocs_desc);
+		info->table_size  = ARRAY_SIZE(gen12_mocs_desc);
 		info->table = gen12_mocs_desc;
-		info->n_entries = XELP_NUM_MOCS_ENTRIES;
+		info->num_mocs_regs = XELP_NUM_MOCS_ENTRIES;
 		info->uc_index = 3;
 		info->unused_entries_index = 2;
 		break;
@@ -643,7 +651,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 	xe_assert(xe, info->unused_entries_index != 0);
 
 	xe_assert(xe, info->ops && info->ops->dump);
-	xe_assert(xe, info->size <= info->n_entries);
+	xe_assert(xe, info->table_size <= info->num_mocs_regs);
 
 	if (!IS_DGFX(xe) || GRAPHICS_VER(xe) >= 20)
 		flags |= HAS_GLOBAL_MOCS;
@@ -660,7 +668,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 static u32 get_entry_control(const struct xe_mocs_info *info,
 			     unsigned int index)
 {
-	if (index < info->size && info->table[index].used)
+	if (index < info->table_size && info->table[index].used)
 		return info->table[index].control_value;
 	return info->table[info->unused_entries_index].control_value;
 }
@@ -671,9 +679,9 @@ static void __init_mocs_table(struct xe_gt *gt,
 	unsigned int i;
 	u32 mocs;
 
-	mocs_dbg(gt, "mocs entries: %d\n", info->n_entries);
+	mocs_dbg(gt, "mocs entries: %d\n", info->num_mocs_regs);
 
-	for (i = 0; i < info->n_entries; i++) {
+	for (i = 0; i < info->num_mocs_regs; i++) {
 		mocs = get_entry_control(info, i);
 
 		mocs_dbg(gt, "GLOB_MOCS[%d] 0x%x 0x%x\n", i,
@@ -694,7 +702,7 @@ static void __init_mocs_table(struct xe_gt *gt,
 static u16 get_entry_l3cc(const struct xe_mocs_info *info,
 			  unsigned int index)
 {
-	if (index < info->size && info->table[index].used)
+	if (index < info->table_size && info->table[index].used)
 		return info->table[index].l3cc_value;
 	return info->table[info->unused_entries_index].l3cc_value;
 }
@@ -710,9 +718,9 @@ static void init_l3cc_table(struct xe_gt *gt,
 	unsigned int i;
 	u32 l3cc;
 
-	mocs_dbg(gt, "l3cc entries: %d\n", info->n_entries);
+	mocs_dbg(gt, "l3cc entries: %d\n", info->num_mocs_regs);
 
-	for (i = 0; i < (info->n_entries + 1) / 2; i++) {
+	for (i = 0; i < (info->num_mocs_regs + 1) / 2; i++) {
 		l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
 				    get_entry_l3cc(info, 2 * i + 1));
 
-- 
cgit 


From 4f82ac6102788112e599a6074d2c1f2afce923df Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 26 Jun 2024 14:05:37 -0700
Subject: drm/xe/mcr: Avoid clobbering DSS steering

A couple copy/paste mistakes in the code that selects steering targets
for OADDRM and INSTANCE0 unintentionally clobbered the steering target
for DSS ranges in some cases.

The OADDRM/INSTANCE0 values were also not assigned as intended, although
that mistake wound up being harmless since the desired values for those
specific ranges were '0' which the kzalloc of the GT structure should
have already taken care of implicitly.

Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240626210536.1620176-2-matthew.d.roper@intel.com
---
 drivers/gpu/drm/xe/xe_gt_mcr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 386ac3269909..6d948a469126 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -342,7 +342,7 @@ static void init_steering_oaddrm(struct xe_gt *gt)
 	else
 		gt->steering[OADDRM].group_target = 1;
 
-	gt->steering[DSS].instance_target = 0;		/* unused */
+	gt->steering[OADDRM].instance_target = 0;	/* unused */
 }
 
 static void init_steering_sqidi_psmi(struct xe_gt *gt)
@@ -357,8 +357,8 @@ static void init_steering_sqidi_psmi(struct xe_gt *gt)
 
 static void init_steering_inst0(struct xe_gt *gt)
 {
-	gt->steering[DSS].group_target = 0;		/* unused */
-	gt->steering[DSS].instance_target = 0;		/* unused */
+	gt->steering[INSTANCE0].group_target = 0;	/* unused */
+	gt->steering[INSTANCE0].instance_target = 0;	/* unused */
 }
 
 static const struct {
-- 
cgit 


From eb523ec38269889cef15494635a2bc4b608602a3 Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Fri, 28 Jun 2024 10:58:45 +0200
Subject: drm/xe/guc: Configure TLB timeout based on CT buffer size

GuC TLB invalidation depends on GuC to process the request from the CT
queue and then the real time to invalidate TLB. Add a function to return
overestimated possible time a TLB inval H2G might take which can be used
as timeout value for TLB invalidation wait time.

v4: Make sure CTB is in 4K blocks(Michal) and other doc fixes
v3: Pass CT to xe_guc_ct_queue_proc_time_jiffies() (Michal)
    Add tlb_timeout_jiffies() that replaces TLB_TIMEOUT(Michal)
v2: Address reviews from Michal.

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1622
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Suggested-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Acked-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240628085845.2369-1-nirmoy.das@intel.com
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 30 +++++++++++++++++++++--------
 drivers/gpu/drm/xe/xe_guc_ct.c              | 17 ++++++++++++++++
 drivers/gpu/drm/xe/xe_guc_ct.h              |  2 ++
 3 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index e1f1ccb01143..d9359976ab8b 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -17,7 +17,22 @@
 #include "xe_trace.h"
 #include "regs/xe_guc_regs.h"
 
-#define TLB_TIMEOUT	(HZ / 4)
+/*
+ * TLB inval depends on pending commands in the CT queue and then the real
+ * invalidation time. Double up the time to process full CT queue
+ * just to be on the safe side.
+ */
+static long tlb_timeout_jiffies(struct xe_gt *gt)
+{
+	/* this reflects what HW/GuC needs to process TLB inv request */
+	const long hw_tlb_timeout = HZ / 4;
+
+	/* this estimates actual delay caused by the CTB transport */
+	long delay = xe_guc_ct_queue_proc_time_jiffies(&gt->uc.guc.ct);
+
+	return hw_tlb_timeout + 2 * delay;
+}
+
 
 static void xe_gt_tlb_fence_timeout(struct work_struct *work)
 {
@@ -32,7 +47,7 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
 		s64 since_inval_ms = ktime_ms_delta(ktime_get(),
 						    fence->invalidation_time);
 
-		if (msecs_to_jiffies(since_inval_ms) < TLB_TIMEOUT)
+		if (msecs_to_jiffies(since_inval_ms) < tlb_timeout_jiffies(gt))
 			break;
 
 		trace_xe_gt_tlb_invalidation_fence_timeout(xe, fence);
@@ -47,7 +62,7 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
 	if (!list_empty(&gt->tlb_invalidation.pending_fences))
 		queue_delayed_work(system_wq,
 				   &gt->tlb_invalidation.fence_tdr,
-				   TLB_TIMEOUT);
+				   tlb_timeout_jiffies(gt));
 	spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
 }
 
@@ -183,7 +198,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 			if (list_is_singular(&gt->tlb_invalidation.pending_fences))
 				queue_delayed_work(system_wq,
 						   &gt->tlb_invalidation.fence_tdr,
-						   TLB_TIMEOUT);
+						   tlb_timeout_jiffies(gt));
 		}
 		spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
 	} else if (ret < 0 && fence) {
@@ -390,8 +405,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
  * @gt: graphics tile
  * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation
  *
- * Wait for 200ms for a TLB invalidation to complete, in practice we always
- * should receive the TLB invalidation within 200ms.
+ * Wait for tlb_timeout_jiffies() for a TLB invalidation to complete.
  *
  * Return: 0 on success, -ETIME on TLB invalidation timeout
  */
@@ -410,7 +424,7 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
 	 */
 	ret = wait_event_timeout(guc->ct.wq,
 				 tlb_invalidation_seqno_past(gt, seqno),
-				 TLB_TIMEOUT);
+				 tlb_timeout_jiffies(gt));
 	if (!ret) {
 		struct drm_printer p = xe_gt_err_printer(gt);
 
@@ -486,7 +500,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	if (!list_empty(&gt->tlb_invalidation.pending_fences))
 		mod_delayed_work(system_wq,
 				 &gt->tlb_invalidation.fence_tdr,
-				 TLB_TIMEOUT);
+				 tlb_timeout_jiffies(gt));
 	else
 		cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
 
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 873d1bcbedd7..7d2e937da1d8 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -112,6 +112,23 @@ ct_to_xe(struct xe_guc_ct *ct)
 #define CTB_G2H_BUFFER_SIZE	(4 * CTB_H2G_BUFFER_SIZE)
 #define G2H_ROOM_BUFFER_SIZE	(CTB_G2H_BUFFER_SIZE / 4)
 
+/**
+ * xe_guc_ct_queue_proc_time_jiffies - Return maximum time to process a full
+ * CT command queue
+ * @ct: the &xe_guc_ct. Unused at this moment but will be used in the future.
+ *
+ * Observation is that a 4KiB buffer full of commands takes a little over a
+ * second to process. Use that to calculate maximum time to process a full CT
+ * command queue.
+ *
+ * Return: Maximum time to process a full CT queue in jiffies.
+ */
+long xe_guc_ct_queue_proc_time_jiffies(struct xe_guc_ct *ct)
+{
+	BUILD_BUG_ON(!IS_ALIGNED(CTB_H2G_BUFFER_SIZE, SZ_4));
+	return (CTB_H2G_BUFFER_SIZE / SZ_4K) * HZ;
+}
+
 static size_t guc_ct_size(void)
 {
 	return 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE +
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index 105bb8e99a8d..190202fce2d0 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -64,4 +64,6 @@ xe_guc_ct_send_block_no_fail(struct xe_guc_ct *ct, const u32 *action, u32 len)
 	return xe_guc_ct_send_recv_no_fail(ct, action, len, NULL);
 }
 
+long xe_guc_ct_queue_proc_time_jiffies(struct xe_guc_ct *ct);
+
 #endif
-- 
cgit 


From d2d5409786fda857e44478e5fa27c9441a128219 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 27 Jun 2024 12:43:05 +0200
Subject: drm/xe/pf: Remove inlined #ifdef CONFIG_PCI_IOV

We can remove #ifdef CONFIG_PCI_IOV in .c files if we provide
dummy replacement of the xe_pci_sriov_configure() function.

Suggested-by: Jani Nikula <jani.nikula@linux.intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Francois Dugast <francois.dugast@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240627104305.1477-1-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_pci.c       | 4 ----
 drivers/gpu/drm/xe/xe_pci_sriov.h | 7 +++++++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index f5d5a368e595..732ee0d02124 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -748,10 +748,8 @@ static void xe_pci_remove(struct pci_dev *pdev)
 	if (!xe) /* driver load aborted, nothing to cleanup */
 		return;
 
-#ifdef CONFIG_PCI_IOV
 	if (IS_SRIOV_PF(xe))
 		xe_pci_sriov_configure(pdev, 0);
-#endif
 
 	xe_device_remove(xe);
 	xe_pm_runtime_fini(xe);
@@ -1009,9 +1007,7 @@ static struct pci_driver xe_pci_driver = {
 	.probe = xe_pci_probe,
 	.remove = xe_pci_remove,
 	.shutdown = xe_pci_shutdown,
-#ifdef CONFIG_PCI_IOV
 	.sriov_configure = xe_pci_sriov_configure,
-#endif
 #ifdef CONFIG_PM_SLEEP
 	.driver.pm = &xe_pm_ops,
 #endif
diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.h b/drivers/gpu/drm/xe/xe_pci_sriov.h
index 3b8bfbf7e1d9..c76dd0d90495 100644
--- a/drivers/gpu/drm/xe/xe_pci_sriov.h
+++ b/drivers/gpu/drm/xe/xe_pci_sriov.h
@@ -8,6 +8,13 @@
 
 struct pci_dev;
 
+#ifdef CONFIG_PCI_IOV
 int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs);
+#else
+static inline int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+	return 0;
+}
+#endif
 
 #endif
-- 
cgit 


From 234670cea9a2995554dad6e084e5e18b0bcd151d Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Mon, 1 Jul 2024 12:27:36 +0200
Subject: drm/xe/pf: Skip fair VFs provisioning if already provisioned
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Our debugfs allows to view and change VFs' provisioning configs.

If we attempt to experiment with VFs provisioning before enabling
them, this early config will affect fair provisioning calculations,
and will also be overwritten, which is undesirable behavior.

To improve this, check if the VFs configs are empty (unprovisioned)
before starting the fair provisioning procedure.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240701102738.934-2-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 48 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h |  2 ++
 drivers/gpu/drm/xe/xe_pci_sriov.c          | 13 ++++++++
 3 files changed, 63 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 694671497f6e..810b579a0025 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -1922,6 +1922,54 @@ int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh
 	return err;
 }
 
+static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid)
+{
+	struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	bool valid_ggtt, valid_ctxs, valid_dbs;
+	bool valid_any, valid_all;
+
+	valid_ggtt = pf_get_vf_config_ggtt(primary_gt, vfid);
+	valid_ctxs = pf_get_vf_config_ctxs(gt, vfid);
+	valid_dbs = pf_get_vf_config_dbs(gt, vfid);
+
+	/* note that GuC doorbells are optional */
+	valid_any = valid_ggtt || valid_ctxs || valid_dbs;
+	valid_all = valid_ggtt && valid_ctxs;
+
+	if (IS_DGFX(xe)) {
+		bool valid_lmem = pf_get_vf_config_ggtt(primary_gt, vfid);
+
+		valid_any = valid_any || valid_lmem;
+		valid_all = valid_all && valid_lmem;
+	}
+
+	return valid_all ? 1 : valid_any ? -ENOKEY : -ENODATA;
+}
+
+/**
+ * xe_gt_sriov_pf_config_is_empty - Check VF's configuration.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier (can't be PF)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: true if VF mandatory configuration (GGTT, LMEM, ...) is empty.
+ */
+bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid)
+{
+	bool empty;
+
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	xe_gt_assert(gt, vfid);
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	empty = pf_validate_vf_config(gt, vfid) == -ENODATA;
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return empty;
+}
+
 /**
  * xe_gt_sriov_pf_config_print_ggtt - Print GGTT configurations.
  * @gt: the &xe_gt
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
index e8238c1ad06a..58c8f879d7ab 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
@@ -53,6 +53,8 @@ int xe_gt_sriov_pf_config_set_fair(struct xe_gt *gt, unsigned int vfid, unsigned
 int xe_gt_sriov_pf_config_release(struct xe_gt *gt, unsigned int vfid, bool force);
 int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh);
 
+bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid);
+
 int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p);
 int xe_gt_sriov_pf_config_print_ctxs(struct xe_gt *gt, struct drm_printer *p);
 int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c
index 74c8fadc9365..aaceee748287 100644
--- a/drivers/gpu/drm/xe/xe_pci_sriov.c
+++ b/drivers/gpu/drm/xe/xe_pci_sriov.c
@@ -13,6 +13,17 @@
 #include "xe_sriov_pf_helpers.h"
 #include "xe_sriov_printk.h"
 
+static int pf_needs_provisioning(struct xe_gt *gt, unsigned int num_vfs)
+{
+	unsigned int n;
+
+	for (n = 1; n <= num_vfs; n++)
+		if (!xe_gt_sriov_pf_config_is_empty(gt, n))
+			return false;
+
+	return true;
+}
+
 static int pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs)
 {
 	struct xe_gt *gt;
@@ -20,6 +31,8 @@ static int pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs)
 	int result = 0, err;
 
 	for_each_gt(gt, xe, id) {
+		if (!pf_needs_provisioning(gt, num_vfs))
+			continue;
 		err = xe_gt_sriov_pf_config_set_fair(gt, VFID(1), num_vfs);
 		result = result ?: err;
 	}
-- 
cgit 


From 411220808ceeaf91b11a9fa9bc992d59568a7ee0 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Mon, 1 Jul 2024 12:27:37 +0200
Subject: drm/xe/pf: Restart VFs provisioning after GT reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Any prior configurations pushed to the GuC are lost when the GT
is reset. Push again all non-empty VF configurations to the GuC
as part of the GuC reset procedure.

This will also help restore early manual provisioning, when the
PF was in the meantime suspended and then resumed.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240701102738.934-3-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt.c                 |  3 +++
 drivers/gpu/drm/xe/xe_gt_sriov_pf.c        | 12 ++++++++++++
 drivers/gpu/drm/xe/xe_gt_sriov_pf.h        |  5 +++++
 drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 30 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h |  2 ++
 5 files changed, 52 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 759634cff1d8..29e8ea94d05e 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -683,6 +683,9 @@ static int do_gt_restart(struct xe_gt *gt)
 	/* Restore GT freq to expected values */
 	xe_gt_sanitize_freq(gt);
 
+	if (IS_SRIOV_PF(gt_to_xe(gt)))
+		xe_gt_sriov_pf_restart(gt);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
index 7decf71c2b7d..9dbba9ab7a9a 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
@@ -8,6 +8,7 @@
 #include "regs/xe_sriov_regs.h"
 
 #include "xe_gt_sriov_pf.h"
+#include "xe_gt_sriov_pf_config.h"
 #include "xe_gt_sriov_pf_helpers.h"
 #include "xe_gt_sriov_pf_service.h"
 #include "xe_mmio.h"
@@ -82,3 +83,14 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt)
 
 	xe_gt_sriov_pf_service_update(gt);
 }
+
+/**
+ * xe_gt_sriov_pf_restart - Restart SR-IOV support after a GT reset.
+ * @gt: the &xe_gt
+ *
+ * This function can only be called on PF.
+ */
+void xe_gt_sriov_pf_restart(struct xe_gt *gt)
+{
+	xe_gt_sriov_pf_config_restart(gt);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
index 37d7d6c3df03..f0cb726a6919 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
@@ -11,6 +11,7 @@ struct xe_gt;
 #ifdef CONFIG_PCI_IOV
 int xe_gt_sriov_pf_init_early(struct xe_gt *gt);
 void xe_gt_sriov_pf_init_hw(struct xe_gt *gt);
+void xe_gt_sriov_pf_restart(struct xe_gt *gt);
 #else
 static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
 {
@@ -20,6 +21,10 @@ static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
 static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt)
 {
 }
+
+static inline void xe_gt_sriov_pf_restart(struct xe_gt *gt)
+{
+}
 #endif
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 810b579a0025..db6c213da847 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -1970,6 +1970,36 @@ bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid)
 	return empty;
 }
 
+/**
+ * xe_gt_sriov_pf_config_restart - Restart SR-IOV configurations after a GT reset.
+ * @gt: the &xe_gt
+ *
+ * Any prior configurations pushed to GuC are lost when the GT is reset.
+ * Push again all non-empty VF configurations to the GuC.
+ *
+ * This function can only be called on PF.
+ */
+void xe_gt_sriov_pf_config_restart(struct xe_gt *gt)
+{
+	unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt));
+	unsigned int fail = 0, skip = 0;
+
+	for (n = 1; n <= total_vfs; n++) {
+		if (xe_gt_sriov_pf_config_is_empty(gt, n))
+			skip++;
+		else if (xe_gt_sriov_pf_config_push(gt, n, false))
+			fail++;
+	}
+
+	if (fail)
+		xe_gt_sriov_notice(gt, "Failed to push %u of %u VF%s configurations\n",
+				   fail, total_vfs - skip, str_plural(total_vfs));
+
+	if (fail != total_vfs)
+		xe_gt_sriov_dbg(gt, "pushed %u skip %u of %u VF%s configurations\n",
+				total_vfs - skip - fail, skip, total_vfs, str_plural(total_vfs));
+}
+
 /**
  * xe_gt_sriov_pf_config_print_ggtt - Print GGTT configurations.
  * @gt: the &xe_gt
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
index 58c8f879d7ab..c0e6e4743dc2 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
@@ -55,6 +55,8 @@ int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh
 
 bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid);
 
+void xe_gt_sriov_pf_config_restart(struct xe_gt *gt);
+
 int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p);
 int xe_gt_sriov_pf_config_print_ctxs(struct xe_gt *gt, struct drm_printer *p);
 int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p);
-- 
cgit 


From 7dc10eff223b0e9fed5d48159820556a576be592 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 28 Jun 2024 09:17:26 -0700
Subject: drm/xe/rtp: Fix out-of-bounds array access

Increment the counter before checking for number of rules, otherwise
when there's no XE_RTP_MATCH_OR an out-of-bounds access is done, as
reported by kasan:

	BUG: KASAN: global-out-of-bounds in rule_matches+0xb6d/0x11c0 [xe]
	Read of size 1 at addr ffffffffa0a50b70 by task systemd-udevd/243

Fixes: dc72c52a42e0 ("drm/xe/rtp: Allow to OR rules")
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240628161726.836734-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_rtp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 5b27f7c45ea3..02e28274282f 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -121,7 +121,7 @@ static bool rule_matches(const struct xe_device *xe,
 			 * Advance rules until we find XE_RTP_MATCH_OR to check
 			 * if there's another set of conditions to check
 			 */
-			while (i < n_rules && rules[++i].match_type != XE_RTP_MATCH_OR)
+			while (++i < n_rules && rules[i].match_type != XE_RTP_MATCH_OR)
 				;
 
 			if (i >= n_rules)
-- 
cgit 


From 2d46ecc958c9cb7b0f116aa52cf9296b6c71d784 Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Thu, 27 Jun 2024 22:21:25 -0700
Subject: drm/xe/oa: Destroy the stream_lock mutex

The mutex allocated in xe_oa_stream_init() was never previously
destroyed. Do so now.

Fixes: e936f885f1e9 ("drm/xe/oa/uapi: Expose OA stream fd")
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240628052125.1847989-1-ashutosh.dixit@intel.com
---
 drivers/gpu/drm/xe/xe_oa.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 6cc3f0217341..4188516a7816 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -824,6 +824,8 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
 
 	WRITE_ONCE(u->exclusive_stream, NULL);
 
+	mutex_destroy(&stream->stream_lock);
+
 	xe_oa_disable_metric_set(stream);
 	xe_exec_queue_put(stream->k_exec_q);
 
-- 
cgit 


From b0b2b50cdbd132cb78ed25c27081849a405b75e3 Mon Sep 17 00:00:00 2001
From: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Date: Mon, 1 Jul 2024 16:15:28 -0700
Subject: drm/xe/guc: Prevent use of uninitialized mutex

When skip_guc_pc is set and/or this is for a VF.

Fixes: 3b1592fb7835 ("drm/xe/lnl: Apply Wa_22019338487")
Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240701231529.2582452-1-vinay.belgaumkar@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_pc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index d88f5e960fbd..f7b468930697 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -26,6 +26,7 @@
 #include "xe_mmio.h"
 #include "xe_pcode.h"
 #include "xe_pm.h"
+#include "xe_sriov.h"
 #include "xe_wa.h"
 
 #define MCHBAR_MIRROR_BASE_SNB	0x140000
@@ -825,6 +826,9 @@ int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc)
 {
 	int ret = 0;
 
+	if (IS_SRIOV_VF(pc_to_xe(pc)) || pc_to_xe(pc)->info.skip_guc_pc)
+		return 0;
+
 	mutex_lock(&pc->freq_lock);
 	ret = pc_set_max_freq(pc, pc->stashed_max_freq);
 	if (!ret)
-- 
cgit 


From aaa08078e7251131f045ba248a68671db7f7bdf7 Mon Sep 17 00:00:00 2001
From: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Date: Mon, 1 Jul 2024 16:15:29 -0700
Subject: drm/xe/bmg: Apply Wa_22019338487

Extend this WA to BMG GT as well. In this case media GT is
not affected. The cap frequencies and max allowed ggtt writes
are different as well. On BMG, we need to do a flush after 1100
GGTT writes, and we need to limit the GT frequency request
to 2133 Mhz during driver load and leave it at that value after
driver unloads.

v3: Fix checkpatch issue

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240701231529.2582452-2-vinay.belgaumkar@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ggtt.c       | 11 ++++++++---
 drivers/gpu/drm/xe/xe_gt.c         |  3 +--
 drivers/gpu/drm/xe/xe_guc_pc.c     | 11 ++++++++---
 drivers/gpu/drm/xe/xe_wa_oob.rules |  1 +
 4 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 883cfc7f98a8..0cdbc1296e88 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -74,6 +74,9 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev)
 
 static void ggtt_update_access_counter(struct xe_ggtt *ggtt)
 {
+	struct xe_gt *gt = XE_WA(ggtt->tile->primary_gt, 22019338487) ? ggtt->tile->primary_gt :
+			   ggtt->tile->media_gt;
+	u32 max_gtt_writes = XE_WA(ggtt->tile->primary_gt, 22019338487) ? 1100 : 63;
 	/*
 	 * Wa_22019338487: GMD_ID is a RO register, a dummy write forces gunit
 	 * to wait for completion of prior GTT writes before letting this through.
@@ -81,8 +84,8 @@ static void ggtt_update_access_counter(struct xe_ggtt *ggtt)
 	 */
 	lockdep_assert_held(&ggtt->lock);
 
-	if ((++ggtt->access_count % 63) == 0) {
-		xe_mmio_write32(ggtt->tile->media_gt, GMD_ID, 0x0);
+	if ((++ggtt->access_count % max_gtt_writes) == 0) {
+		xe_mmio_write32(gt, GMD_ID, 0x0);
 		ggtt->access_count = 0;
 	}
 }
@@ -218,7 +221,9 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
 		ggtt->size = GUC_GGTT_TOP;
 
 	if (GRAPHICS_VERx100(xe) >= 1270)
-		ggtt->pt_ops = ggtt->tile->media_gt && XE_WA(ggtt->tile->media_gt, 22019338487) ?
+		ggtt->pt_ops = (ggtt->tile->media_gt &&
+			       XE_WA(ggtt->tile->media_gt, 22019338487)) ||
+			       XE_WA(ggtt->tile->primary_gt, 22019338487) ?
 			       &xelpg_pt_wa_ops : &xelpg_pt_ops;
 	else
 		ggtt->pt_ops = &xelp_pt_ops;
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 29e8ea94d05e..0ba2e2d0289b 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -821,8 +821,7 @@ int xe_gt_sanitize_freq(struct xe_gt *gt)
 	int ret = 0;
 
 	if ((!xe_uc_fw_is_available(&gt->uc.gsc.fw) ||
-	    xe_uc_fw_is_loaded(&gt->uc.gsc.fw)) &&
-	    XE_WA(gt, 22019338487))
+	     xe_uc_fw_is_loaded(&gt->uc.gsc.fw)) && XE_WA(gt, 22019338487))
 		ret = xe_guc_pc_restore_stashed_freq(&gt->uc.guc.pc);
 
 	return ret;
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index f7b468930697..32e93a8127d4 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -46,6 +46,7 @@
 #define GT_FREQUENCY_SCALER	3
 
 #define LNL_MERT_FREQ_CAP	800
+#define BMG_MERT_FREQ_CAP	2133
 
 /**
  * DOC: GuC Power Conservation (PC)
@@ -704,10 +705,14 @@ static u32 pc_max_freq_cap(struct xe_guc_pc *pc)
 {
 	struct xe_gt *gt = pc_to_gt(pc);
 
-	if (XE_WA(gt, 22019338487))
-		return min(LNL_MERT_FREQ_CAP, pc->rp0_freq);
-	else
+	if (XE_WA(gt, 22019338487)) {
+		if (xe_gt_is_media_type(gt))
+			return min(LNL_MERT_FREQ_CAP, pc->rp0_freq);
+		else
+			return min(BMG_MERT_FREQ_CAP, pc->rp0_freq);
+	} else {
 		return pc->rp0_freq;
+	}
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index a6b897030fde..26066beb4f6f 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -28,3 +28,4 @@
 		GRAPHICS_VERSION(2004)
 13011645652	GRAPHICS_VERSION(2004)
 22019338487	MEDIA_VERSION(2000)
+		GRAPHICS_VERSION(2001)
-- 
cgit