aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/xe/regs/xe_regs.h7
-rw-r--r--drivers/gpu/drm/xe/xe_device.c78
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c2
4 files changed, 90 insertions, 0 deletions
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index e4408473e802..7202084198bd 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -70,8 +70,15 @@
#define SOFTWARE_FLAGS_SPR33 XE_REG(0x4f084)
+#define GU_CNTL_PROTECTED XE_REG(0x10100C)
+#define DRIVERINT_FLR_DIS REG_BIT(31)
+
#define GU_CNTL XE_REG(0x101010)
#define LMEM_INIT REG_BIT(7)
+#define DRIVERFLR REG_BIT(31)
+
+#define GU_DEBUG XE_REG(0x101018)
+#define DRIVERFLR_STATUS REG_BIT(31)
#define XEHP_CLOCK_GATE_DIS XE_REG(0x101014)
#define SGSI_SIDECLK_DIS REG_BIT(17)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index ae0b7349c3e3..5869ba7e0cdc 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -5,6 +5,8 @@
#include "xe_device.h"
+#include <linux/units.h>
+
#include <drm/drm_aperture.h>
#include <drm/drm_atomic_helper.h>
#include <drm/drm_gem_ttm_helper.h>
@@ -252,6 +254,78 @@ err_put:
return ERR_PTR(err);
}
+/*
+ * The driver-initiated FLR is the highest level of reset that we can trigger
+ * from within the driver. It is different from the PCI FLR in that it doesn't
+ * fully reset the SGUnit and doesn't modify the PCI config space and therefore
+ * it doesn't require a re-enumeration of the PCI BARs. However, the
+ * driver-initiated FLR does still cause a reset of both GT and display and a
+ * memory wipe of local and stolen memory, so recovery would require a full HW
+ * re-init and saving/restoring (or re-populating) the wiped memory. Since we
+ * perform the FLR as the very last action before releasing access to the HW
+ * during the driver release flow, we don't attempt recovery at all, because
+ * if/when a new instance of i915 is bound to the device it will do a full
+ * re-init anyway.
+ */
+static void xe_driver_flr(struct xe_device *xe)
+{
+ const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
+ struct xe_gt *gt = xe_root_mmio_gt(xe);
+ int ret;
+
+ if (xe_mmio_read32(gt, GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) {
+ drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
+ return;
+ }
+
+ drm_dbg(&xe->drm, "Triggering Driver-FLR\n");
+
+ /*
+ * Make sure any pending FLR requests have cleared by waiting for the
+ * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
+ * to make sure it's not still set from a prior attempt (it's a write to
+ * clear bit).
+ * Note that we should never be in a situation where a previous attempt
+ * is still pending (unless the HW is totally dead), but better to be
+ * safe in case something unexpected happens
+ */
+ ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret);
+ return;
+ }
+ xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
+
+ /* Trigger the actual Driver-FLR */
+ xe_mmio_rmw32(gt, GU_CNTL, 0, DRIVERFLR);
+
+ /* Wait for hardware teardown to complete */
+ ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret);
+ return;
+ }
+
+ /* Wait for hardware/firmware re-init to complete */
+ ret = xe_mmio_wait32(gt, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
+ flr_timeout, NULL, false);
+ if (ret) {
+ drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret);
+ return;
+ }
+
+ /* Clear sticky completion status */
+ xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
+}
+
+static void xe_driver_flr_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_device *xe = arg;
+
+ if (xe->needs_flr_on_fini)
+ xe_driver_flr(xe);
+}
+
static void xe_device_sanitize(struct drm_device *drm, void *arg)
{
struct xe_device *xe = arg;
@@ -283,6 +357,10 @@ int xe_device_probe(struct xe_device *xe)
if (err)
return err;
+ err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe);
+ if (err)
+ return err;
+
for_each_gt(gt, xe, id) {
err = xe_pcode_probe(gt);
if (err)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 4bc668ff8615..4425c2484a02 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -388,6 +388,9 @@ struct xe_device {
/** @heci_gsc: graphics security controller */
struct xe_heci_gsc heci_gsc;
+ /** @needs_flr_on_fini: requests function-reset on fini */
+ bool needs_flr_on_fini;
+
/* For pcode */
struct mutex sb_lock;
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index d380f67b3365..73c090762771 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -626,6 +626,8 @@ err_fail:
xe_uevent_gt_reset_failure(to_pci_dev(gt_to_xe(gt)->drm.dev),
gt_to_tile(gt)->id, gt->info.id);
+ gt_to_xe(gt)->needs_flr_on_fini = true;
+
return err;
}