85 files changed, 2165 insertions, 953 deletions
diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index b7d801993bfa..bcaefc952764 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -539,6 +539,7 @@ GuC ABI
 .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h
 .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
 .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
 
 HuC
 ---
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 1b62b9f65196..aa86ac33effc 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -174,7 +174,7 @@ i915-y += \
 	  i915_trace_points.o \
 	  i915_ttm_buddy_manager.o \
 	  i915_vma.o \
-	  i915_vma_snapshot.o \
+	  i915_vma_resource.o \
 	  intel_wopcm.o
 
 # general-purpose microcontroller (GuC) support
diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
index 8f674745e7e0..63a83d5f85a1 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -48,7 +48,7 @@ static void dpt_insert_page(struct i915_address_space *vm,
 }
 
 static void dpt_insert_entries(struct i915_address_space *vm,
-			       struct i915_vma *vma,
+			       struct i915_vma_resource *vma_res,
 			       enum i915_cache_level level,
 			       u32 flags)
 {
@@ -64,8 +64,8 @@ static void dpt_insert_entries(struct i915_address_space *vm,
 	 * not to allow the user to override access to a read only page.
 	 */
 
-	i = vma->node.start / I915_GTT_PAGE_SIZE;
-	for_each_sgt_daddr(addr, sgt_iter, vma->pages)
+	i = vma_res->start / I915_GTT_PAGE_SIZE;
+	for_each_sgt_daddr(addr, sgt_iter, vma_res->bi.pages)
 		gen8_set_pte(&base[i++], pte_encode | addr);
 }
 
@@ -76,35 +76,38 @@ static void dpt_clear_range(struct i915_address_space *vm,
 
 static void dpt_bind_vma(struct i915_address_space *vm,
 			 struct i915_vm_pt_stash *stash,
-			 struct i915_vma *vma,
+			 struct i915_vma_resource *vma_res,
 			 enum i915_cache_level cache_level,
 			 u32 flags)
 {
-	struct drm_i915_gem_object *obj = vma->obj;
 	u32 pte_flags;
 
+	if (vma_res->bound_flags)
+		return;
+
 	/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
 	pte_flags = 0;
-	if (vma->vm->has_read_only && i915_gem_object_is_readonly(obj))
+	if (vm->has_read_only && vma_res->bi.readonly)
 		pte_flags |= PTE_READ_ONLY;
-	if (i915_gem_object_is_lmem(obj))
+	if (vma_res->bi.lmem)
 		pte_flags |= PTE_LM;
 
-	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
+	vm->insert_entries(vm, vma_res, cache_level, pte_flags);
 
-	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+	vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
 
 	/*
 	 * Without aliasing PPGTT there's no difference between
 	 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
 	 * upgrade to both bound if we bind either to avoid double-binding.
 	 */
-	atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags);
+	vma_res->bound_flags = I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
 }
 
-static void dpt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
+static void dpt_unbind_vma(struct i915_address_space *vm,
+			   struct i915_vma_resource *vma_res)
 {
-	vm->clear_range(vm, vma->node.start, vma->size);
+	vm->clear_range(vm, vma_res->start, vma_res->vma_size);
 }
 
 static void dpt_cleanup(struct i915_address_space *vm)
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 160fd2bdafe5..c0a973eeb405 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -595,7 +595,7 @@ static void ivb_fbc_activate(struct intel_fbc *fbc)
 	else if (DISPLAY_VER(i915) == 9)
 		skl_fbc_program_cfb_stride(fbc);
 
-	if (i915->ggtt.num_fences)
+	if (to_gt(i915)->ggtt->num_fences)
 		snb_fbc_program_fence(fbc);
 
 	intel_de_write(i915, ILK_DPFC_CONTROL,
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index adc3a81be9f7..41d279db2be6 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -180,7 +180,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	struct drm_device *dev = helper->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
+	struct i915_ggtt *ggtt = to_gt(dev_priv)->ggtt;
 	const struct i915_ggtt_view view = {
 		.type = I915_GGTT_VIEW_NORMAL,
 	};
diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c
index 01ce1d72297f..e4186a0b8edb 100644
--- a/drivers/gpu/drm/i915/display/intel_plane_initial.c
+++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c
@@ -94,7 +94,7 @@ initial_plane_vma(struct drm_i915_private *i915,
 		goto err_obj;
 	}
 
-	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+	vma = i915_vma_instance(obj, &to_gt(i915)->ggtt->vm, NULL);
 	if (IS_ERR(vma))
 		goto err_obj;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index cad3f0b2be9e..ebbac2ea0833 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -564,16 +564,13 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
 		container_of_user(base, typeof(*ext), base);
 	const struct set_proto_ctx_engines *set = data;
 	struct drm_i915_private *i915 = set->i915;
+	struct i915_engine_class_instance prev_engine;
 	u64 flags;
 	int err = 0, n, i, j;
 	u16 slot, width, num_siblings;
 	struct intel_engine_cs **siblings = NULL;
 	intel_engine_mask_t prev_mask;
 
-	/* FIXME: This is NIY for execlists */
-	if (!(intel_uc_uses_guc_submission(&to_gt(i915)->uc)))
-		return -ENODEV;
-
 	if (get_user(slot, &ext->engine_index))
 		return -EFAULT;
 
@@ -583,6 +580,13 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
 	if (get_user(num_siblings, &ext->num_siblings))
 		return -EFAULT;
 
+	if (!intel_uc_uses_guc_submission(&to_gt(i915)->uc) &&
+	    num_siblings != 1) {
+		drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n",
+			num_siblings);
+		return -EINVAL;
+	}
+
 	if (slot >= set->num_engines) {
 		drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n",
 			slot, set->num_engines);
@@ -629,7 +633,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
 	/* Create contexts / engines */
 	for (i = 0; i < width; ++i) {
 		intel_engine_mask_t current_mask = 0;
-		struct i915_engine_class_instance prev_engine;
 
 		for (j = 0; j < num_siblings; ++j) {
 			struct i915_engine_class_instance ci;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index babfecb17ad1..e5b0f66ea1fe 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -174,7 +174,7 @@ i915_gem_context_get_eb_vm(struct i915_gem_context *ctx)
 
 	vm = ctx->vm;
 	if (!vm)
-		vm = &ctx->i915->ggtt.vm;
+		vm = &to_gt(ctx->i915)->ggtt->vm;
 	vm = i915_vm_get(vm);
 
 	return vm;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index e7f548a22970..2065e5e44fac 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -29,7 +29,6 @@
 #include "i915_gem_ioctls.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
-#include "i915_vma_snapshot.h"
 
 struct eb_vma {
 	struct i915_vma *vma;
@@ -1095,7 +1094,7 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
 {
 	struct drm_i915_private *i915 =
 		container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
-	return &i915->ggtt;
+	return to_gt(i915)->ggtt;
 }
 
 static void reloc_cache_unmap(struct reloc_cache *cache)
@@ -1411,7 +1410,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 			mutex_lock(&vma->vm->mutex);
 			err = i915_vma_bind(target->vma,
 					    target->vma->obj->cache_level,
-					    PIN_GLOBAL, NULL);
+					    PIN_GLOBAL, NULL, NULL);
 			mutex_unlock(&vma->vm->mutex);
 			reloc_cache_remap(&eb->reloc_cache, ev->vma->obj);
 			if (err)
@@ -1941,7 +1940,6 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
 {
 	const unsigned int count = eb->buffer_count;
 	unsigned int i = count, j;
-	struct i915_vma_snapshot *vsnap;
 
 	while (i--) {
 		struct eb_vma *ev = &eb->vma[i];
@@ -1951,11 +1949,6 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
 		if (!(flags & EXEC_OBJECT_CAPTURE))
 			continue;
 
-		vsnap = i915_vma_snapshot_alloc(GFP_KERNEL);
-		if (!vsnap)
-			continue;
-
-		i915_vma_snapshot_init(vsnap, vma, "user");
 		for_each_batch_create_order(eb, j) {
 			struct i915_capture_list *capture;
 
@@ -1964,10 +1957,9 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
 				continue;
 
 			capture->next = eb->capture_lists[j];
-			capture->vma_snapshot = i915_vma_snapshot_get(vsnap);
+			capture->vma_res = i915_vma_resource_get(vma->resource);
 			eb->capture_lists[j] = capture;
 		}
-		i915_vma_snapshot_put(vsnap);
 	}
 }
 
@@ -3150,7 +3142,7 @@ eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd)
 	fence_array = dma_fence_array_create(eb->num_batches,
 					     fences,
 					     eb->context->parallel.fence_context,
-					     eb->context->parallel.seqno,
+					     eb->context->parallel.seqno++,
 					     false);
 	if (!fence_array) {
 		kfree(fences);
@@ -3270,9 +3262,8 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
 		 * _onstack interface.
 		 */
 		if (eb->batches[i]->vma)
-			i915_vma_snapshot_init_onstack(&eb->requests[i]->batch_snapshot,
-						       eb->batches[i]->vma,
-						       "batch");
+			eb->requests[i]->batch_res =
+				i915_vma_resource_get(eb->batches[i]->vma->resource);
 		if (eb->batch_pool) {
 			GEM_BUG_ON(intel_context_is_parallel(eb->context));
 			intel_gt_buffer_pool_mark_active(eb->batch_pool,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 1478c02a82cb..5ac2506f4ee8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -295,7 +295,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *i915 = to_i915(dev);
 	struct intel_runtime_pm *rpm = &i915->runtime_pm;
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	bool write = area->vm_flags & VM_WRITE;
 	struct i915_gem_ww_ctx ww;
 	intel_wakeref_t wakeref;
@@ -388,16 +388,16 @@ retry:
 	assert_rpm_wakelock_held(rpm);
 
 	/* Mark as being mmapped into userspace for later revocation */
-	mutex_lock(&i915->ggtt.vm.mutex);
+	mutex_lock(&to_gt(i915)->ggtt->vm.mutex);
 	if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
-		list_add(&obj->userfault_link, &i915->ggtt.userfault_list);
-	mutex_unlock(&i915->ggtt.vm.mutex);
+		list_add(&obj->userfault_link, &to_gt(i915)->ggtt->userfault_list);
+	mutex_unlock(&to_gt(i915)->ggtt->vm.mutex);
 
 	/* Track the mmo associated with the fenced vma */
 	vma->mmo = mmo;
 
 	if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
-		intel_wakeref_auto(&i915->ggtt.userfault_wakeref,
+		intel_wakeref_auto(&to_gt(i915)->ggtt->userfault_wakeref,
 				   msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
 
 	if (write) {
@@ -512,7 +512,7 @@ void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
 	 * wakeref.
 	 */
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-	mutex_lock(&i915->ggtt.vm.mutex);
+	mutex_lock(&to_gt(i915)->ggtt->vm.mutex);
 
 	if (!obj->userfault_count)
 		goto out;
@@ -530,7 +530,7 @@ void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
 	wmb();
 
 out:
-	mutex_unlock(&i915->ggtt.vm.mutex);
+	mutex_unlock(&to_gt(i915)->ggtt->vm.mutex);
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 }
 
@@ -736,13 +736,14 @@ i915_gem_dumb_mmap_offset(struct drm_file *file,
 			  u32 handle,
 			  u64 *offset)
 {
+	struct drm_i915_private *i915 = to_i915(dev);
 	enum i915_mmap_type mmap_type;
 
 	if (HAS_LMEM(to_i915(dev)))
 		mmap_type = I915_MMAP_TYPE_FIXED;
 	else if (pat_enabled())
 		mmap_type = I915_MMAP_TYPE_WC;
-	else if (!i915_ggtt_has_aperture(&to_i915(dev)->ggtt))
+	else if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
 		return -ENODEV;
 	else
 		mmap_type = I915_MMAP_TYPE_GTT;
@@ -790,7 +791,7 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
 
 	switch (args->flags) {
 	case I915_MMAP_OFFSET_GTT:
-		if (!i915_ggtt_has_aperture(&i915->ggtt))
+		if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
 			return -ENODEV;
 		type = I915_MMAP_TYPE_GTT;
 		break;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index d87b508b59b1..1a9e1f940a7d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -756,6 +756,18 @@ i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj)
 	return dma_fence_get(i915_gem_to_ttm(obj)->moving);
 }
 
+void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj,
+				      struct dma_fence *fence)
+{
+	struct dma_fence **moving = &i915_gem_to_ttm(obj)->moving;
+
+	if (*moving == fence)
+		return;
+
+	dma_fence_put(*moving);
+	*moving = dma_fence_get(fence);
+}
+
 /**
  * i915_gem_object_wait_moving_fence - Wait for the object's moving fence if any
  * @obj: The object whose moving fence to wait for.
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index f66d46882ea7..02c37fe4a535 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -459,7 +459,6 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 
 int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
 int i915_gem_object_truncate(struct drm_i915_gem_object *obj);
-void i915_gem_object_writeback(struct drm_i915_gem_object *obj);
 
 /**
  * i915_gem_object_pin_map - return a contiguous mapping of the entire object
@@ -524,6 +523,9 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
 struct dma_fence *
 i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj);
 
+void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj,
+				      struct dma_fence *fence);
+
 int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
 				      bool intr);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 4b4829eb16c2..71e778ecaeb8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -15,6 +15,7 @@
 
 #include "i915_active.h"
 #include "i915_selftest.h"
+#include "i915_vma_resource.h"
 
 struct drm_i915_gem_object;
 struct intel_fronbuffer;
@@ -57,10 +58,26 @@ struct drm_i915_gem_object_ops {
 	void (*put_pages)(struct drm_i915_gem_object *obj,
 			  struct sg_table *pages);
 	int (*truncate)(struct drm_i915_gem_object *obj);
-	void (*writeback)(struct drm_i915_gem_object *obj);
-	int (*shrinker_release_pages)(struct drm_i915_gem_object *obj,
-				      bool no_gpu_wait,
-				      bool should_writeback);
+	/**
+	 * shrink - Perform further backend specific actions to facilate
+	 * shrinking.
+	 * @obj: The gem object
+	 * @flags: Extra flags to control shrinking behaviour in the backend
+	 *
+	 * Possible values for @flags:
+	 *
+	 * I915_GEM_OBJECT_SHRINK_WRITEBACK - Try to perform writeback of the
+	 * backing pages, if supported.
+	 *
+	 * I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT - Don't wait for the object to
+	 * idle.  Active objects can be considered later. The TTM backend for
+	 * example might have aync migrations going on, which don't use any
+	 * i915_vma to track the active GTT binding, and hence having an unbound
+	 * object might not be enough.
+	 */
+#define I915_GEM_OBJECT_SHRINK_WRITEBACK   BIT(0)
+#define I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT BIT(1)
+	int (*shrink)(struct drm_i915_gem_object *obj, unsigned int flags);
 
 	int (*pread)(struct drm_i915_gem_object *obj,
 		     const struct drm_i915_gem_pread *arg);
@@ -550,31 +567,7 @@ struct drm_i915_gem_object {
 		struct sg_table *pages;
 		void *mapping;
 
-		struct i915_page_sizes {
-			/**
-			 * The sg mask of the pages sg_table. i.e the mask of
-			 * of the lengths for each sg entry.
-			 */
-			unsigned int phys;
-
-			/**
-			 * The gtt page sizes we are allowed to use given the
-			 * sg mask and the supported page sizes. This will
-			 * express the smallest unit we can use for the whole
-			 * object, as well as the larger sizes we may be able
-			 * to use opportunistically.
-			 */
-			unsigned int sg;
-
-			/**
-			 * The actual gtt page size usage. Since we can have
-			 * multiple vma associated with this object we need to
-			 * prevent any trampling of state, hence a copy of this
-			 * struct also lives in each vma, therefore the gtt
-			 * value here should only be read/write through the vma.
-			 */
-			unsigned int gtt;
-		} page_sizes;
+		struct i915_page_sizes page_sizes;
 
 		I915_SELFTEST_DECLARE(unsigned int page_mask);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 9f429ed6e78a..7d2211fbe548 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -167,16 +167,6 @@ int i915_gem_object_truncate(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
-/* Try to discard unwanted pages */
-void i915_gem_object_writeback(struct drm_i915_gem_object *obj)
-{
-	assert_object_held_shared(obj);
-	GEM_BUG_ON(i915_gem_object_has_pages(obj));
-
-	if (obj->ops->writeback)
-		obj->ops->writeback(obj);
-}
-
 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
 {
 	struct radix_tree_iter iter;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index ac56124760e1..6da68b38f00f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -23,7 +23,7 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 {
 	GEM_TRACE("%s\n", dev_name(i915->drm.dev));
 
-	intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0);
+	intel_wakeref_auto(&to_gt(i915)->ggtt->userfault_wakeref, 0);
 	flush_workqueue(i915->wq);
 
 	/*
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index cc9fe258fba7..6c57b0a79c8a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -331,6 +331,21 @@ shmem_writeback(struct drm_i915_gem_object *obj)
 	__shmem_writeback(obj->base.size, obj->base.filp->f_mapping);
 }
 
+static int shmem_shrink(struct drm_i915_gem_object *obj, unsigned int flags)
+{
+	switch (obj->mm.madv) {
+	case I915_MADV_DONTNEED:
+		return i915_gem_object_truncate(obj);
+	case __I915_MADV_PURGED:
+		return 0;
+	}
+
+	if (flags & I915_GEM_OBJECT_SHRINK_WRITEBACK)
+		shmem_writeback(obj);
+
+	return 0;
+}
+
 void
 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 				struct sg_table *pages,
@@ -503,7 +518,7 @@ const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
 	.get_pages = shmem_get_pages,
 	.put_pages = shmem_put_pages,
 	.truncate = shmem_truncate,
-	.writeback = shmem_writeback,
+	.shrink = shmem_shrink,
 
 	.pwrite = shmem_pwrite,
 	.pread = shmem_pread,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index cc927e49d21f..6a6ff98a8746 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -57,21 +57,17 @@ static int drop_pages(struct drm_i915_gem_object *obj,
 
 static int try_to_writeback(struct drm_i915_gem_object *obj, unsigned int flags)
 {
-	if (obj->ops->shrinker_release_pages)
-		return obj->ops->shrinker_release_pages(obj,
-							!(flags & I915_SHRINK_ACTIVE),
-							flags & I915_SHRINK_WRITEBACK);
-
-	switch (obj->mm.madv) {
-	case I915_MADV_DONTNEED:
-		i915_gem_object_truncate(obj);
-		return 0;
-	case __I915_MADV_PURGED:
-		return 0;
-	}
+	if (obj->ops->shrink) {
+		unsigned int shrink_flags = 0;
+
+		if (!(flags & I915_SHRINK_ACTIVE))
+			shrink_flags |= I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT;
 
-	if (flags & I915_SHRINK_WRITEBACK)
-		i915_gem_object_writeback(obj);
+		if (flags & I915_SHRINK_WRITEBACK)
+			shrink_flags |= I915_GEM_OBJECT_SHRINK_WRITEBACK;
+
+		return obj->ops->shrink(obj, shrink_flags);
+	}
 
 	return 0;
 }
@@ -401,9 +397,9 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 					       I915_SHRINK_VMAPS);
 
 	/* We also want to clear any cached iomaps as they wrap vmap */
-	mutex_lock(&i915->ggtt.vm.mutex);
+	mutex_lock(&to_gt(i915)->ggtt->vm.mutex);
 	list_for_each_entry_safe(vma, next,
-				 &i915->ggtt.vm.bound_list, vm_link) {
+				 &to_gt(i915)->ggtt->vm.bound_list, vm_link) {
 		unsigned long count = vma->node.size >> PAGE_SHIFT;
 		struct drm_i915_gem_object *obj = vma->obj;
 
@@ -418,7 +414,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 
 		i915_gem_object_unlock(obj);
 	}
-	mutex_unlock(&i915->ggtt.vm.mutex);
+	mutex_unlock(&to_gt(i915)->ggtt->vm.mutex);
 
 	*(unsigned long *)ptr += freed_pages;
 	return NOTIFY_DONE;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 7df50fd6cc7b..26975d857776 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -71,7 +71,7 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
 static int i915_adjust_stolen(struct drm_i915_private *i915,
 			      struct resource *dsm)
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 	struct resource *r;
 
@@ -582,6 +582,7 @@ i915_pages_create_for_stolen(struct drm_device *dev,
 
 static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
 {
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct sg_table *pages =
 		i915_pages_create_for_stolen(obj->base.dev,
 					     obj->stolen->start,
@@ -589,7 +590,7 @@ static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
 	if (IS_ERR(pages))
 		return PTR_ERR(pages);
 
-	dbg_poison(&to_i915(obj->base.dev)->ggtt,
+	dbg_poison(to_gt(i915)->ggtt,
 		   sg_dma_address(pages->sgl),
 		   sg_dma_len(pages->sgl),
 		   POISON_INUSE);
@@ -602,9 +603,10 @@ static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
 static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj,
 					     struct sg_table *pages)
 {
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	/* Should only be called from i915_gem_object_release_stolen() */
 
-	dbg_poison(&to_i915(obj->base.dev)->ggtt,
+	dbg_poison(to_gt(i915)->ggtt,
 		   sg_dma_address(pages->sgl),
 		   sg_dma_len(pages->sgl),
 		   POISON_FREE);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index ef4d0f7dc118..c3d432e314c9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -181,7 +181,8 @@ static int
 i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj,
 			      int tiling_mode, unsigned int stride)
 {
-	struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt;
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	struct i915_vma *vma, *vn;
 	LIST_HEAD(unbind);
 	int ret = 0;
@@ -336,7 +337,7 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_gem_object *obj;
 	int err;
 
-	if (!dev_priv->ggtt.num_fences)
+	if (!to_gt(dev_priv)->ggtt->num_fences)
 		return -EOPNOTSUPP;
 
 	obj = i915_gem_object_lookup(file, args->handle);
@@ -362,9 +363,9 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 		args->stride = 0;
 	} else {
 		if (args->tiling_mode == I915_TILING_X)
-			args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_x;
+			args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_x;
 		else
-			args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_y;
+			args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_y;
 
 		/* Hide bit 17 swizzling from the user.  This prevents old Mesa
 		 * from aborting the application on sw fallbacks to bit 17,
@@ -419,7 +420,7 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_gem_object *obj;
 	int err = -ENOENT;
 
-	if (!dev_priv->ggtt.num_fences)
+	if (!to_gt(dev_priv)->ggtt->num_fences)
 		return -EOPNOTSUPP;
 
 	rcu_read_lock();
@@ -435,10 +436,10 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
 
 	switch (args->tiling_mode) {
 	case I915_TILING_X:
-		args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_x;
+		args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_x;
 		break;
 	case I915_TILING_Y:
-		args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_y;
+		args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_y;
 		break;
 	default:
 	case I915_TILING_NONE:
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index de3fe79b665a..84cae740b4a5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -424,16 +424,14 @@ int i915_ttm_purge(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
-static int i915_ttm_shrinker_release_pages(struct drm_i915_gem_object *obj,
-					   bool no_wait_gpu,
-					   bool should_writeback)
+static int i915_ttm_shrink(struct drm_i915_gem_object *obj, unsigned int flags)
 {
 	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
 	struct i915_ttm_tt *i915_tt =
 		container_of(bo->ttm, typeof(*i915_tt), ttm);
 	struct ttm_operation_ctx ctx = {
 		.interruptible = true,
-		.no_wait_gpu = no_wait_gpu,
+		.no_wait_gpu = flags & I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT,
 	};
 	struct ttm_placement place = {};
 	int ret;
@@ -467,7 +465,7 @@ static int i915_ttm_shrinker_release_pages(struct drm_i915_gem_object *obj,
 		return ret;
 	}
 
-	if (should_writeback)
+	if (flags & I915_GEM_OBJECT_SHRINK_WRITEBACK)
 		__shmem_writeback(obj->base.size, i915_tt->filp->f_mapping);
 
 	return 0;
@@ -977,7 +975,7 @@ static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
 	.get_pages = i915_ttm_get_pages,
 	.put_pages = i915_ttm_put_pages,
 	.truncate = i915_ttm_truncate,
-	.shrinker_release_pages = i915_ttm_shrinker_release_pages,
+	.shrink = i915_ttm_shrink,
 
 	.adjust_lru = i915_ttm_adjust_lru,
 	.delayed_free = i915_ttm_delayed_free,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index ee9612a3ee5e..1de306c03aaf 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -142,7 +142,16 @@ int i915_ttm_move_notify(struct ttm_buffer_object *bo)
 	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
 	int ret;
 
-	ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
+	/*
+	 * Note: The async unbinding here will actually transform the
+	 * blocking wait for unbind into a wait before finally submitting
+	 * evict / migration blit and thus stall the migration timeline
+	 * which may not be good for overall throughput. We should make
+	 * sure we await the unbind fences *after* the migration blit
+	 * instead of *before* as we currently do.
+	 */
+	ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE |
+				     I915_GEM_OBJECT_UNBIND_ASYNC);
 	if (ret)
 		return ret;
 
@@ -525,7 +534,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
 			return ret;
 		}
 
-		migration_fence = __i915_ttm_move(bo, ctx, clear, dst_mem, bo->ttm,
+		migration_fence = __i915_ttm_move(bo, ctx, clear, dst_mem, ttm,
 						  dst_rsgt, true, &deps);
 		i915_deps_fini(&deps);
 	}
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 11f0aa65f8a3..26f997c376a2 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -370,9 +370,9 @@ static int igt_check_page_sizes(struct i915_vma *vma)
 		err = -EINVAL;
 	}
 
-	if (!HAS_PAGE_SIZES(i915, vma->page_sizes.gtt)) {
+	if (!HAS_PAGE_SIZES(i915, vma->resource->page_sizes_gtt)) {
 		pr_err("unsupported page_sizes.gtt=%u, supported=%u\n",
-		       vma->page_sizes.gtt & ~supported, supported);
+		       vma->resource->page_sizes_gtt & ~supported, supported);
 		err = -EINVAL;
 	}
 
@@ -403,15 +403,9 @@ static int igt_check_page_sizes(struct i915_vma *vma)
 	if (i915_gem_object_is_lmem(obj) &&
 	    IS_ALIGNED(vma->node.start, SZ_2M) &&
 	    vma->page_sizes.sg & SZ_2M &&
-	    vma->page_sizes.gtt < SZ_2M) {
+	    vma->resource->page_sizes_gtt < SZ_2M) {
 		pr_err("gtt pages mismatch for LMEM, expected 2M GTT pages, sg(%u), gtt(%u)\n",
-		       vma->page_sizes.sg, vma->page_sizes.gtt);
-		err = -EINVAL;
-	}
-
-	if (obj->mm.page_sizes.gtt) {
-		pr_err("obj->page_sizes.gtt(%u) should never be set\n",
-		       obj->mm.page_sizes.gtt);
+		       vma->page_sizes.sg, vma->resource->page_sizes_gtt);
 		err = -EINVAL;
 	}
 
@@ -547,9 +541,9 @@ static int igt_mock_memory_region_huge_pages(void *arg)
 				goto out_unpin;
 			}
 
-			if (vma->page_sizes.gtt != page_size) {
+			if (vma->resource->page_sizes_gtt != page_size) {
 				pr_err("%s page_sizes.gtt=%u, expected=%u\n",
-				       __func__, vma->page_sizes.gtt,
+				       __func__, vma->resource->page_sizes_gtt,
 				       page_size);
 				err = -EINVAL;
 				goto out_unpin;
@@ -630,9 +624,9 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
 
 		err = igt_check_page_sizes(vma);
 
-		if (vma->page_sizes.gtt != page_size) {
+		if (vma->resource->page_sizes_gtt != page_size) {
 			pr_err("page_sizes.gtt=%u, expected %u\n",
-			       vma->page_sizes.gtt, page_size);
+			       vma->resource->page_sizes_gtt, page_size);
 			err = -EINVAL;
 		}
 
@@ -657,9 +651,10 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
 
 			err = igt_check_page_sizes(vma);
 
-			if (vma->page_sizes.gtt != I915_GTT_PAGE_SIZE_4K) {
+			if (vma->resource->page_sizes_gtt != I915_GTT_PAGE_SIZE_4K) {
 				pr_err("page_sizes.gtt=%u, expected %llu\n",
-				       vma->page_sizes.gtt, I915_GTT_PAGE_SIZE_4K);
+				       vma->resource->page_sizes_gtt,
+				       I915_GTT_PAGE_SIZE_4K);
 				err = -EINVAL;
 			}
 
@@ -805,9 +800,9 @@ static int igt_mock_ppgtt_huge_fill(void *arg)
 			}
 		}
 
-		if (vma->page_sizes.gtt != expected_gtt) {
+		if (vma->resource->page_sizes_gtt != expected_gtt) {
 			pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n",
-			       vma->page_sizes.gtt, expected_gtt,
+			       vma->resource->page_sizes_gtt, expected_gtt,
 			       obj->base.size, yesno(!!single));
 			err = -EINVAL;
 			break;
@@ -961,10 +956,10 @@ static int igt_mock_ppgtt_64K(void *arg)
 				}
 			}
 
-			if (vma->page_sizes.gtt != expected_gtt) {
+			if (vma->resource->page_sizes_gtt != expected_gtt) {
 				pr_err("gtt=%u, expected=%u, i=%d, single=%s\n",
-				       vma->page_sizes.gtt, expected_gtt, i,
-				       yesno(!!single));
+				       vma->resource->page_sizes_gtt,
+				       expected_gtt, i, yesno(!!single));
 				err = -EINVAL;
 				goto out_vma_unpin;
 			}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index 75947e9dada2..c08f766e6e15 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -543,7 +543,7 @@ static bool has_bit17_swizzle(int sw)
 
 static bool bad_swizzling(struct drm_i915_private *i915)
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 
 	if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
 		return true;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 3f41fe5ec9d4..80d99b9c694f 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -1374,7 +1374,7 @@ static int igt_ctx_readonly(void *arg)
 		goto out_file;
 	}
 
-	vm = ctx->vm ?: &i915->ggtt.alias->vm;
+	vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm;
 	if (!vm || !vm->has_read_only) {
 		err = 0;
 		goto out_file;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
index ecb691c81d1e..d534141b2cf7 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
@@ -4,8 +4,13 @@
  */
 
 #include "gt/intel_migrate.h"
+#include "gt/intel_gpu_commands.h"
 #include "gem/i915_gem_ttm_move.h"
 
+#include "i915_deps.h"
+
+#include "selftests/igt_spinner.h"
+
 static int igt_fill_check_buffer(struct drm_i915_gem_object *obj,
 				 bool fill)
 {
@@ -101,7 +106,8 @@ static int igt_same_create_migrate(void *arg)
 }
 
 static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww,
-				  struct drm_i915_gem_object *obj)
+				  struct drm_i915_gem_object *obj,
+				  struct i915_vma *vma)
 {
 	int err;
 
@@ -109,6 +115,24 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww,
 	if (err)
 		return err;
 
+	if (vma) {
+		err = i915_vma_pin_ww(vma, ww, obj->base.size, 0,
+				      0UL | PIN_OFFSET_FIXED |
+				      PIN_USER);
+		if (err) {
+			if (err != -EINTR && err != ERESTARTSYS &&
+			    err != -EDEADLK)
+				pr_err("Failed to pin vma.\n");
+			return err;
+		}
+
+		i915_vma_unpin(vma);
+	}
+
+	/*
+	 * Migration will implicitly unbind (asynchronously) any bound
+	 * vmas.
+	 */
 	if (i915_gem_object_is_lmem(obj)) {
 		err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM);
 		if (err) {
@@ -149,11 +173,15 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww,
 	return err;
 }
 
-static int igt_lmem_pages_migrate(void *arg)
+static int __igt_lmem_pages_migrate(struct intel_gt *gt,
+				    struct i915_address_space *vm,
+				    struct i915_deps *deps,
+				    struct igt_spinner *spin,
+				    struct dma_fence *spin_fence)
 {
-	struct intel_gt *gt = arg;
 	struct drm_i915_private *i915 = gt->i915;
 	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma = NULL;
 	struct i915_gem_ww_ctx ww;
 	struct i915_request *rq;
 	int err;
@@ -165,6 +193,14 @@ static int igt_lmem_pages_migrate(void *arg)
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
+	if (vm) {
+		vma = i915_vma_instance(obj, vm, NULL);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			goto out_put;
+		}
+	}
+
 	/* Initial GPU fill, sync, CPU initialization. */
 	for_i915_gem_ww(&ww, err, true) {
 		err = i915_gem_object_lock(obj, &ww);
@@ -175,25 +211,23 @@ static int igt_lmem_pages_migrate(void *arg)
 		if (err)
 			continue;
 
-		err = intel_migrate_clear(&gt->migrate, &ww, NULL,
+		err = intel_migrate_clear(&gt->migrate, &ww, deps,
 					  obj->mm.pages->sgl, obj->cache_level,
 					  i915_gem_object_is_lmem(obj),
 					  0xdeadbeaf, &rq);
 		if (rq) {
 			dma_resv_add_excl_fence(obj->base.resv, &rq->fence);
+			i915_gem_object_set_moving_fence(obj, &rq->fence);
 			i915_request_put(rq);
 		}
 		if (err)
 			continue;
 
-		err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE,
-					   5 * HZ);
-		if (err)
-			continue;
-
-		err = igt_fill_check_buffer(obj, true);
-		if (err)
-			continue;
+		if (!vma) {
+			err = igt_fill_check_buffer(obj, true);
+			if (err)
+				continue;
+		}
 	}
 	if (err)
 		goto out_put;
@@ -204,7 +238,7 @@ static int igt_lmem_pages_migrate(void *arg)
 	 */
 	for (i = 1; i <= 5; ++i) {
 		for_i915_gem_ww(&ww, err, true)
-			err = lmem_pages_migrate_one(&ww, obj);
+			err = lmem_pages_migrate_one(&ww, obj, vma);
 		if (err)
 			goto out_put;
 	}
@@ -213,12 +247,27 @@ static int igt_lmem_pages_migrate(void *arg)
 	if (err)
 		goto out_put;
 
+	if (spin) {
+		if (dma_fence_is_signaled(spin_fence)) {
+			pr_err("Spinner was terminated by hangcheck.\n");
+			err = -EBUSY;
+			goto out_unlock;
+		}
+		igt_spinner_end(spin);
+	}
+
 	/* Finally sync migration and check content. */
 	err = i915_gem_object_wait_migration(obj, true);
 	if (err)
 		goto out_unlock;
 
-	err = igt_fill_check_buffer(obj, false);
+	if (vma) {
+		err = i915_vma_wait_for_bind(vma);
+		if (err)
+			goto out_unlock;
+	} else {
+		err = igt_fill_check_buffer(obj, false);
+	}
 
 out_unlock:
 	i915_gem_object_unlock(obj);
@@ -231,6 +280,7 @@ out_put:
 static int igt_lmem_pages_failsafe_migrate(void *arg)
 {
 	int fail_gpu, fail_alloc, ret;
+	struct intel_gt *gt = arg;
 
 	for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) {
 		for (fail_alloc = 0; fail_alloc < 2; ++fail_alloc) {
@@ -238,7 +288,118 @@ static int igt_lmem_pages_failsafe_migrate(void *arg)
 				fail_gpu, fail_alloc);
 			i915_ttm_migrate_set_failure_modes(fail_gpu,
 							   fail_alloc);
-			ret = igt_lmem_pages_migrate(arg);
+			ret = __igt_lmem_pages_migrate(gt, NULL, NULL, NULL, NULL);
+			if (ret)
+				goto out_err;
+		}
+	}
+
+out_err:
+	i915_ttm_migrate_set_failure_modes(false, false);
+	return ret;
+}
+
+/*
+ * This subtest tests that unbinding at migration is indeed performed
+ * async. We launch a spinner and a number of migrations depending on
+ * that spinner to have terminated. Before each migration we bind a
+ * vma, which should then be async unbound by the migration operation.
+ * If we are able to schedule migrations without blocking while the
+ * spinner is still running, those unbinds are indeed async and non-
+ * blocking.
+ *
+ * Note that each async bind operation is awaiting the previous migration
+ * due to the moving fence resulting from the migration.
+ */
+static int igt_async_migrate(struct intel_gt *gt)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct i915_ppgtt *ppgtt;
+	struct igt_spinner spin;
+	int err;
+
+	ppgtt = i915_ppgtt_create(gt, 0);
+	if (IS_ERR(ppgtt))
+		return PTR_ERR(ppgtt);
+
+	if (igt_spinner_init(&spin, gt)) {
+		err = -ENOMEM;
+		goto out_spin;
+	}
+
+	for_each_engine(engine, gt, id) {
+		struct ttm_operation_ctx ctx = {
+			.interruptible = true
+		};
+		struct dma_fence *spin_fence;
+		struct intel_context *ce;
+		struct i915_request *rq;
+		struct i915_deps deps;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			goto out_ce;
+		}
+
+		/*
+		 * Use MI_NOOP, making the spinner non-preemptible. If there
+		 * is a code path where we fail async operation due to the
+		 * running spinner, we will block and fail to end the
+		 * spinner resulting in a deadlock. But with a non-
+		 * preemptible spinner, hangcheck will terminate the spinner
+		 * for us, and we will later detect that and fail the test.
+		 */
+		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+		intel_context_put(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto out_ce;
+		}
+
+		i915_deps_init(&deps, GFP_KERNEL);
+		err = i915_deps_add_dependency(&deps, &rq->fence, &ctx);
+		spin_fence = dma_fence_get(&rq->fence);
+		i915_request_add(rq);
+		if (err)
+			goto out_ce;
+
+		err = __igt_lmem_pages_migrate(gt, &ppgtt->vm, &deps, &spin,
+					       spin_fence);
+		i915_deps_fini(&deps);
+		dma_fence_put(spin_fence);
+		if (err)
+			goto out_ce;
+	}
+
+out_ce:
+	igt_spinner_fini(&spin);
+out_spin:
+	i915_vm_put(&ppgtt->vm);
+
+	return err;
+}
+
+/*
+ * Setting ASYNC_FAIL_ALLOC to 2 will simulate memory allocation failure while
+ * arming the migration error check and block async migration. This
+ * will cause us to deadlock and hangcheck will terminate the spinner
+ * causing the test to fail.
+ */
+#define ASYNC_FAIL_ALLOC 1
+static int igt_lmem_async_migrate(void *arg)
+{
+	int fail_gpu, fail_alloc, ret;
+	struct intel_gt *gt = arg;
+
+	for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) {
+		for (fail_alloc = 0; fail_alloc < ASYNC_FAIL_ALLOC; ++fail_alloc) {
+			pr_info("Simulated failure modes: gpu: %d, alloc: %d\n",
+				fail_gpu, fail_alloc);
+			i915_ttm_migrate_set_failure_modes(fail_gpu,
+							   fail_alloc);
+			ret = igt_async_migrate(gt);
 			if (ret)
 				goto out_err;
 		}
@@ -256,6 +417,7 @@ int i915_gem_migrate_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(igt_lmem_create_migrate),
 		SUBTEST(igt_same_create_migrate),
 		SUBTEST(igt_lmem_pages_failsafe_migrate),
+		SUBTEST(igt_lmem_async_migrate),
 	};
 
 	if (!HAS_LMEM(i915))
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index c6291429b00c..f61356b72b1c 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -307,7 +307,7 @@ static int igt_partial_tiling(void *arg)
 	int tiling;
 	int err;
 
-	if (!i915_ggtt_has_aperture(&i915->ggtt))
+	if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
 		return 0;
 
 	/* We want to check the page mapping and fencing of a large object
@@ -320,7 +320,7 @@ static int igt_partial_tiling(void *arg)
 
 	obj = huge_gem_object(i915,
 			      nreal << PAGE_SHIFT,
-			      (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
+			      (1 + next_prime_number(to_gt(i915)->ggtt->vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -366,10 +366,10 @@ static int igt_partial_tiling(void *arg)
 		tile.tiling = tiling;
 		switch (tiling) {
 		case I915_TILING_X:
-			tile.swizzle = i915->ggtt.bit_6_swizzle_x;
+			tile.swizzle = to_gt(i915)->ggtt->bit_6_swizzle_x;
 			break;
 		case I915_TILING_Y:
-			tile.swizzle = i915->ggtt.bit_6_swizzle_y;
+			tile.swizzle = to_gt(i915)->ggtt->bit_6_swizzle_y;
 			break;
 		}
 
@@ -440,7 +440,7 @@ static int igt_smoke_tiling(void *arg)
 	IGT_TIMEOUT(end);
 	int err;
 
-	if (!i915_ggtt_has_aperture(&i915->ggtt))
+	if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
 		return 0;
 
 	/*
@@ -457,7 +457,7 @@ static int igt_smoke_tiling(void *arg)
 
 	obj = huge_gem_object(i915,
 			      nreal << PAGE_SHIFT,
-			      (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
+			      (1 + next_prime_number(to_gt(i915)->ggtt->vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -486,10 +486,10 @@ static int igt_smoke_tiling(void *arg)
 			break;
 
 		case I915_TILING_X:
-			tile.swizzle = i915->ggtt.bit_6_swizzle_x;
+			tile.swizzle = to_gt(i915)->ggtt->bit_6_swizzle_x;
 			break;
 		case I915_TILING_Y:
-			tile.swizzle = i915->ggtt.bit_6_swizzle_y;
+			tile.swizzle = to_gt(i915)->ggtt->bit_6_swizzle_y;
 			break;
 		}
 
@@ -856,6 +856,7 @@ static int wc_check(struct drm_i915_gem_object *obj)
 
 static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type)
 {
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	bool no_map;
 
 	if (obj->ops->mmap_offset)
@@ -864,7 +865,7 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type)
 		return false;
 
 	if (type == I915_MMAP_TYPE_GTT &&
-	    !i915_ggtt_has_aperture(&to_i915(obj->base.dev)->ggtt))
+	    !i915_ggtt_has_aperture(to_gt(i915)->ggtt))
 		return false;
 
 	i915_gem_object_lock(obj, NULL);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
index 740ee8086a27..fe0a890775e2 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
@@ -43,7 +43,7 @@ static int igt_gem_huge(void *arg)
 
 	obj = huge_gem_object(i915,
 			      nreal * PAGE_SIZE,
-			      i915->ggtt.vm.total + PAGE_SIZE);
+			      to_gt(i915)->ggtt->vm.total + PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index 6e9292918bfc..d657ffd6c86a 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -104,17 +104,17 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 }
 
 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
-				      struct i915_vma *vma,
+				      struct i915_vma_resource *vma_res,
 				      enum i915_cache_level cache_level,
 				      u32 flags)
 {
 	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_directory * const pd = ppgtt->pd;
-	unsigned int first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
+	unsigned int first_entry = vma_res->start / I915_GTT_PAGE_SIZE;
 	unsigned int act_pt = first_entry / GEN6_PTES;
 	unsigned int act_pte = first_entry % GEN6_PTES;
 	const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
-	struct sgt_dma iter = sgt_dma(vma);
+	struct sgt_dma iter = sgt_dma(vma_res);
 	gen6_pte_t *vaddr;
 
 	GEM_BUG_ON(!pd->entry[act_pt]);
@@ -140,7 +140,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 		}
 	} while (1);
 
-	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+	vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
 }
 
 static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
@@ -271,13 +271,13 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 
 static void pd_vma_bind(struct i915_address_space *vm,
 			struct i915_vm_pt_stash *stash,
-			struct i915_vma *vma,
+			struct i915_vma_resource *vma_res,
 			enum i915_cache_level cache_level,
 			u32 unused)
 {
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	struct gen6_ppgtt *ppgtt = vma->private;
-	u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
+	struct gen6_ppgtt *ppgtt = vma_res->private;
+	u32 ggtt_offset = vma_res->start / I915_GTT_PAGE_SIZE;
 
 	ppgtt->pp_dir = ggtt_offset * sizeof(gen6_pte_t) << 10;
 	ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
@@ -285,9 +285,10 @@ static void pd_vma_bind(struct i915_address_space *vm,
 	gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total);
 }
 
-static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
+static void pd_vma_unbind(struct i915_address_space *vm,
+			  struct i915_vma_resource *vma_res)
 {
-	struct gen6_ppgtt *ppgtt = vma->private;
+	struct gen6_ppgtt *ppgtt = vma_res->private;
 	struct i915_page_directory * const pd = ppgtt->base.pd;
 	struct i915_page_table *pt;
 	unsigned int pde;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index b012c50f7ce7..c43e724afa9f 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -453,20 +453,21 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
 	return idx;
 }
 
-static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
+static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
+				   struct i915_vma_resource *vma_res,
 				   struct sgt_dma *iter,
 				   enum i915_cache_level cache_level,
 				   u32 flags)
 {
 	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
 	unsigned int rem = sg_dma_len(iter->sg);
-	u64 start = vma->node.start;
+	u64 start = vma_res->start;
 
-	GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm));
+	GEM_BUG_ON(!i915_vm_is_4lvl(vm));
 
 	do {
 		struct i915_page_directory * const pdp =
-			gen8_pdp_for_page_address(vma->vm, start);
+			gen8_pdp_for_page_address(vm, start);
 		struct i915_page_directory * const pd =
 			i915_pd_entry(pdp, __gen8_pte_index(start, 2));
 		gen8_pte_t encode = pte_encode;
@@ -475,7 +476,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
 		gen8_pte_t *vaddr;
 		u16 index;
 
-		if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
+		if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
 		    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
 		    rem >= I915_GTT_PAGE_SIZE_2M &&
 		    !__gen8_pte_index(start, 0)) {
@@ -492,7 +493,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
 			page_size = I915_GTT_PAGE_SIZE;
 
 			if (!index &&
-			    vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
+			    vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
 			    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
 			    (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
 			     rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
@@ -541,9 +542,9 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
 		 */
 		if (maybe_64K != -1 &&
 		    (index == I915_PDES ||
-		     (i915_vm_has_scratch_64K(vma->vm) &&
-		      !iter->sg && IS_ALIGNED(vma->node.start +
-					      vma->node.size,
+		     (i915_vm_has_scratch_64K(vm) &&
+		      !iter->sg && IS_ALIGNED(vma_res->start +
+					      vma_res->node_size,
 					      I915_GTT_PAGE_SIZE_2M)))) {
 			vaddr = px_vaddr(pd);
 			vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
@@ -559,10 +560,10 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
 			 * instead - which we detect as missing results during
 			 * selftests.
 			 */
-			if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
+			if (I915_SELFTEST_ONLY(vm->scrub_64K)) {
 				u16 i;
 
-				encode = vma->vm->scratch[0]->encode;
+				encode = vm->scratch[0]->encode;
 				vaddr = px_vaddr(i915_pt_entry(pd, maybe_64K));
 
 				for (i = 1; i < index; i += 16)
@@ -572,22 +573,22 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
 			}
 		}
 
-		vma->page_sizes.gtt |= page_size;
+		vma_res->page_sizes_gtt |= page_size;
 	} while (iter->sg && sg_dma_len(iter->sg));
 }
 
 static void gen8_ppgtt_insert(struct i915_address_space *vm,
-			      struct i915_vma *vma,
+			      struct i915_vma_resource *vma_res,
 			      enum i915_cache_level cache_level,
 			      u32 flags)
 {
 	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
-	struct sgt_dma iter = sgt_dma(vma);
+	struct sgt_dma iter = sgt_dma(vma_res);
 
-	if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
-		gen8_ppgtt_insert_huge(vma, &iter, cache_level, flags);
+	if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) {
+		gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags);
 	} else  {
-		u64 idx = vma->node.start >> GEN8_PTE_SHIFT;
+		u64 idx = vma_res->start >> GEN8_PTE_SHIFT;
 
 		do {
 			struct i915_page_directory * const pdp =
@@ -597,7 +598,7 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm,
 						    cache_level, flags);
 		} while (idx);
 
-		vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+		vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index ba083d800a08..5d0ec7c49b6a 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce)
 
 	__i915_active_acquire(&ce->active);
 
-	if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine))
+	if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) ||
+	    intel_context_is_parallel(ce))
 		return 0;
 
 	/* Preallocate tracking nodes */
@@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent,
 	 * Callers responsibility to validate that this function is used
 	 * correctly but we use GEM_BUG_ON here ensure that they do.
 	 */
-	GEM_BUG_ON(!intel_engine_uses_guc(parent->engine));
 	GEM_BUG_ON(intel_context_is_pinned(parent));
 	GEM_BUG_ON(intel_context_is_child(parent));
 	GEM_BUG_ON(intel_context_is_pinned(child));
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 352254e001b4..d1daa4cc2895 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1708,18 +1708,15 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 
 static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
 {
-	struct i915_vma_snapshot *vsnap = &rq->batch_snapshot;
+	struct i915_vma_resource *vma_res = rq->batch_res;
 	void *ring;
 	int size;
 
-	if (!i915_vma_snapshot_present(vsnap))
-		vsnap = NULL;
-
 	drm_printf(m,
 		   "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
 		   rq->head, rq->postfix, rq->tail,
-		   vsnap ? upper_32_bits(vsnap->gtt_offset) : ~0u,
-		   vsnap ? lower_32_bits(vsnap->gtt_offset) : ~0u);
+		   vma_res ? upper_32_bits(vma_res->start) : ~0u,
+		   vma_res ? lower_32_bits(vma_res->start) : ~0u);
 
 	size = rq->tail - rq->head;
 	if (rq->tail < rq->head)
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index a69df5e9e77a..be56d0b41892 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2599,6 +2599,43 @@ static void execlists_context_cancel_request(struct intel_context *ce,
 				      current->comm);
 }
 
+static struct intel_context *
+execlists_create_parallel(struct intel_engine_cs **engines,
+			  unsigned int num_siblings,
+			  unsigned int width)
+{
+	struct intel_context *parent = NULL, *ce, *err;
+	int i;
+
+	GEM_BUG_ON(num_siblings != 1);
+
+	for (i = 0; i < width; ++i) {
+		ce = intel_context_create(engines[i]);
+		if (IS_ERR(ce)) {
+			err = ce;
+			goto unwind;
+		}
+
+		if (i == 0)
+			parent = ce;
+		else
+			intel_context_bind_parent_child(parent, ce);
+	}
+
+	parent->parallel.fence_context = dma_fence_context_alloc(1);
+
+	intel_context_set_nopreempt(parent);
+	for_each_child(parent, ce)
+		intel_context_set_nopreempt(ce);
+
+	return parent;
+
+unwind:
+	if (parent)
+		intel_context_put(parent);
+	return err;
+}
+
 static const struct intel_context_ops execlists_context_ops = {
 	.flags = COPS_HAS_INFLIGHT,
 
@@ -2617,6 +2654,7 @@ static const struct intel_context_ops execlists_context_ops = {
 	.reset = lrc_reset,
 	.destroy = lrc_destroy,
 
+	.create_parallel = execlists_create_parallel,
 	.create_virtual = execlists_create_virtual,
 };
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 5263dda7f8d5..a1b2761bc16e 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -86,7 +86,7 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915)
 	 * beyond the end of the batch buffer, across the page boundary,
 	 * and beyond the end of the GTT if we do not provide a guard.
 	 */
-	ret = ggtt_init_hw(&i915->ggtt);
+	ret = ggtt_init_hw(to_gt(i915)->ggtt);
 	if (ret)
 		return ret;
 
@@ -142,7 +142,7 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm)
 			continue;
 
 		if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
-			__i915_vma_evict(vma);
+			__i915_vma_evict(vma, false);
 			drm_mm_remove_node(&vma->node);
 		}
 	}
@@ -235,7 +235,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
 }
 
 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
-				     struct i915_vma *vma,
+				     struct i915_vma_resource *vma_res,
 				     enum i915_cache_level level,
 				     u32 flags)
 {
@@ -252,10 +252,10 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	 */
 
 	gte = (gen8_pte_t __iomem *)ggtt->gsm;
-	gte += vma->node.start / I915_GTT_PAGE_SIZE;
-	end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
+	gte += vma_res->start / I915_GTT_PAGE_SIZE;
+	end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
 
-	for_each_sgt_daddr(addr, iter, vma->pages)
+	for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
 		gen8_set_pte(gte++, pte_encode | addr);
 	GEM_BUG_ON(gte > end);
 
@@ -292,7 +292,7 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm,
  * through the GMADR mapped BAR (i915->mm.gtt->gtt).
  */
 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
-				     struct i915_vma *vma,
+				     struct i915_vma_resource *vma_res,
 				     enum i915_cache_level level,
 				     u32 flags)
 {
@@ -303,10 +303,10 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 	dma_addr_t addr;
 
 	gte = (gen6_pte_t __iomem *)ggtt->gsm;
-	gte += vma->node.start / I915_GTT_PAGE_SIZE;
-	end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
+	gte += vma_res->start / I915_GTT_PAGE_SIZE;
+	end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
 
-	for_each_sgt_daddr(addr, iter, vma->pages)
+	for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
 		iowrite32(vm->pte_encode(addr, level, flags), gte++);
 	GEM_BUG_ON(gte > end);
 
@@ -389,7 +389,7 @@ static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
 
 struct insert_entries {
 	struct i915_address_space *vm;
-	struct i915_vma *vma;
+	struct i915_vma_resource *vma_res;
 	enum i915_cache_level level;
 	u32 flags;
 };
@@ -398,18 +398,18 @@ static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
 {
 	struct insert_entries *arg = _arg;
 
-	gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
+	gen8_ggtt_insert_entries(arg->vm, arg->vma_res, arg->level, arg->flags);
 	bxt_vtd_ggtt_wa(arg->vm);
 
 	return 0;
 }
 
 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
-					     struct i915_vma *vma,
+					     struct i915_vma_resource *vma_res,
 					     enum i915_cache_level level,
 					     u32 flags)
 {
-	struct insert_entries arg = { vm, vma, level, flags };
+	struct insert_entries arg = { vm, vma_res, level, flags };
 
 	stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
 }
@@ -448,14 +448,14 @@ static void i915_ggtt_insert_page(struct i915_address_space *vm,
 }
 
 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
-				     struct i915_vma *vma,
+				     struct i915_vma_resource *vma_res,
 				     enum i915_cache_level cache_level,
 				     u32 unused)
 {
 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
 
-	intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
+	intel_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT,
 				    flags);
 }
 
@@ -467,30 +467,32 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm,
 
 static void ggtt_bind_vma(struct i915_address_space *vm,
 			  struct i915_vm_pt_stash *stash,
-			  struct i915_vma *vma,
+			  struct i915_vma_resource *vma_res,
 			  enum i915_cache_level cache_level,
 			  u32 flags)
 {
-	struct drm_i915_gem_object *obj = vma->obj;
 	u32 pte_flags;
 
-	if (i915_vma_is_bound(vma, ~flags & I915_VMA_BIND_MASK))
+	if (vma_res->bound_flags & (~flags & I915_VMA_BIND_MASK))
 		return;
 
+	vma_res->bound_flags |= flags;
+
 	/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
 	pte_flags = 0;
-	if (i915_gem_object_is_readonly(obj))
+	if (vma_res->bi.readonly)
 		pte_flags |= PTE_READ_ONLY;
-	if (i915_gem_object_is_lmem(obj))
+	if (vma_res->bi.lmem)
 		pte_flags |= PTE_LM;
 
-	vm->insert_entries(vm, vma, cache_level, pte_flags);
-	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+	vm->insert_entries(vm, vma_res, cache_level, pte_flags);
+	vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
 }
 
-static void ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
+static void ggtt_unbind_vma(struct i915_address_space *vm,
+			    struct i915_vma_resource *vma_res)
 {
-	vm->clear_range(vm, vma->node.start, vma->size);
+	vm->clear_range(vm, vma_res->start, vma_res->vma_size);
 }
 
 static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
@@ -623,7 +625,7 @@ err:
 
 static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
 				  struct i915_vm_pt_stash *stash,
-				  struct i915_vma *vma,
+				  struct i915_vma_resource *vma_res,
 				  enum i915_cache_level cache_level,
 				  u32 flags)
 {
@@ -631,25 +633,27 @@ static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
 
 	/* Currently applicable only to VLV */
 	pte_flags = 0;
-	if (i915_gem_object_is_readonly(vma->obj))
+	if (vma_res->bi.readonly)
 		pte_flags |= PTE_READ_ONLY;
 
 	if (flags & I915_VMA_LOCAL_BIND)
 		ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm,
-			       stash, vma, cache_level, flags);
+			       stash, vma_res, cache_level, flags);
 
 	if (flags & I915_VMA_GLOBAL_BIND)
-		vm->insert_entries(vm, vma, cache_level, pte_flags);
+		vm->insert_entries(vm, vma_res, cache_level, pte_flags);
+
+	vma_res->bound_flags |= flags;
 }
 
 static void aliasing_gtt_unbind_vma(struct i915_address_space *vm,
-				    struct i915_vma *vma)
+				    struct i915_vma_resource *vma_res)
 {
-	if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
-		vm->clear_range(vm, vma->node.start, vma->size);
+	if (vma_res->bound_flags & I915_VMA_GLOBAL_BIND)
+		vm->clear_range(vm, vma_res->start, vma_res->vma_size);
 
-	if (i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND))
-		ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma);
+	if (vma_res->bound_flags & I915_VMA_LOCAL_BIND)
+		ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma_res);
 }
 
 static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
@@ -722,14 +726,14 @@ int i915_init_ggtt(struct drm_i915_private *i915)
 {
 	int ret;
 
-	ret = init_ggtt(&i915->ggtt);
+	ret = init_ggtt(to_gt(i915)->ggtt);
 	if (ret)
 		return ret;
 
 	if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
-		ret = init_aliasing_ppgtt(&i915->ggtt);
+		ret = init_aliasing_ppgtt(to_gt(i915)->ggtt);
 		if (ret)
-			cleanup_init_ggtt(&i915->ggtt);
+			cleanup_init_ggtt(to_gt(i915)->ggtt);
 	}
 
 	return 0;
@@ -772,7 +776,7 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
  */
 void i915_ggtt_driver_release(struct drm_i915_private *i915)
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 
 	fini_aliasing_ppgtt(ggtt);
 
@@ -787,7 +791,7 @@ void i915_ggtt_driver_release(struct drm_i915_private *i915)
  */
 void i915_ggtt_driver_late_release(struct drm_i915_private *i915)
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 
 	GEM_WARN_ON(kref_read(&ggtt->vm.resv_ref) != 1);
 	dma_resv_fini(&ggtt->vm._resv);
@@ -1208,7 +1212,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
 {
 	int ret;
 
-	ret = ggtt_probe_hw(&i915->ggtt, to_gt(i915));
+	ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915));
 	if (ret)
 		return ret;
 
@@ -1280,7 +1284,7 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm)
 			atomic_read(&vma->flags) & I915_VMA_BIND_MASK;
 
 		GEM_BUG_ON(!was_bound);
-		vma->ops->bind_vma(vm, NULL, vma,
+		vma->ops->bind_vma(vm, NULL, vma->resource,
 				   obj ? obj->cache_level : 0,
 				   was_bound);
 		if (obj) { /* only used during resume => exclusive access */
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
index f8948de72036..beabf3bc9b75 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
@@ -728,8 +728,8 @@ static void detect_bit_6_swizzle(struct i915_ggtt *ggtt)
 		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
 	}
 
-	i915->ggtt.bit_6_swizzle_x = swizzle_x;
-	i915->ggtt.bit_6_swizzle_y = swizzle_y;
+	to_gt(i915)->ggtt->bit_6_swizzle_x = swizzle_x;
+	to_gt(i915)->ggtt->bit_6_swizzle_y = swizzle_y;
 }
 
 /*
@@ -896,7 +896,7 @@ void intel_gt_init_swizzling(struct intel_gt *gt)
 	struct intel_uncore *uncore = gt->uncore;
 
 	if (GRAPHICS_VER(i915) < 5 ||
-	    i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
+	    to_gt(i915)->ggtt->bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
 		return;
 
 	intel_uncore_rmw(uncore, DISP_ARB_CTL, 0, DISP_TILE_SURFACE_SWIZZLING);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index f98f0fb21efb..298ff32c8d0c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -3,6 +3,7 @@
  * Copyright © 2019 Intel Corporation
  */
 
+#include <drm/drm_managed.h>
 #include <drm/intel-gtt.h>
 
 #include "intel_gt_debugfs.h"
@@ -85,9 +86,11 @@ int intel_gt_probe_lmem(struct intel_gt *gt)
 	return 0;
 }
 
-void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt)
+int intel_gt_assign_ggtt(struct intel_gt *gt)
 {
-	gt->ggtt = ggtt;
+	gt->ggtt = drmm_kzalloc(&gt->i915->drm, sizeof(*gt->ggtt), GFP_KERNEL);
+
+	return gt->ggtt ? 0 : -ENOMEM;
 }
 
 static const struct intel_mmio_range icl_l3bank_steering_table[] = {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 3ace129eb2af..94e1bac8c0cc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -36,7 +36,7 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc)
 
 void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
 void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
-void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt);
+int intel_gt_assign_ggtt(struct intel_gt *gt);
 int intel_gt_probe_lmem(struct intel_gt *gt);
 int intel_gt_init_mmio(struct intel_gt *gt);
 int __must_check intel_gt_init_hw(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index a94be0306464..46be4197b93f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -161,6 +161,9 @@ static void __i915_vm_release(struct work_struct *work)
 	struct i915_address_space *vm =
 		container_of(work, struct i915_address_space, release_work);
 
+	/* Synchronize async unbinds. */
+	i915_vma_resource_bind_dep_sync_all(vm);
+
 	vm->cleanup(vm);
 	i915_address_space_fini(vm);
 
@@ -189,6 +192,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
 	if (!kref_read(&vm->resv_ref))
 		kref_init(&vm->resv_ref);
 
+	vm->pending_unbind = RB_ROOT_CACHED;
 	INIT_WORK(&vm->release_work, __i915_vm_release);
 	atomic_set(&vm->open, 1);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 177b42b935a1..8073438b67c8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -27,6 +27,7 @@
 
 #include "gt/intel_reset.h"
 #include "i915_selftest.h"
+#include "i915_vma_resource.h"
 #include "i915_vma_types.h"
 
 #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
@@ -200,7 +201,7 @@ struct i915_vma_ops {
 	/* Map an object into an address space with the given cache flags. */
 	void (*bind_vma)(struct i915_address_space *vm,
 			 struct i915_vm_pt_stash *stash,
-			 struct i915_vma *vma,
+			 struct i915_vma_resource *vma_res,
 			 enum i915_cache_level cache_level,
 			 u32 flags);
 	/*
@@ -208,7 +209,8 @@ struct i915_vma_ops {
 	 * setting the valid PTE entries to a reserved scratch page.
 	 */
 	void (*unbind_vma)(struct i915_address_space *vm,
-			   struct i915_vma *vma);
+			   struct i915_vma_resource *vma_res);
+
 };
 
 struct i915_address_space {
@@ -263,6 +265,9 @@ struct i915_address_space {
 	/* Flags used when creating page-table objects for this vm */
 	unsigned long lmem_pt_obj_flags;
 
+	/* Interval tree for pending unbind vma resources */
+	struct rb_root_cached pending_unbind;
+
 	struct drm_i915_gem_object *
 		(*alloc_pt_dma)(struct i915_address_space *vm, int sz);
 	struct drm_i915_gem_object *
@@ -285,7 +290,7 @@ struct i915_address_space {
 			    enum i915_cache_level cache_level,
 			    u32 flags);
 	void (*insert_entries)(struct i915_address_space *vm,
-			       struct i915_vma *vma,
+			       struct i915_vma_resource *vma_res,
 			       enum i915_cache_level cache_level,
 			       u32 flags);
 	void (*cleanup)(struct i915_address_space *vm);
@@ -600,11 +605,11 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt);
 
 void ppgtt_bind_vma(struct i915_address_space *vm,
 		    struct i915_vm_pt_stash *stash,
-		    struct i915_vma *vma,
+		    struct i915_vma_resource *vma_res,
 		    enum i915_cache_level cache_level,
 		    u32 flags);
 void ppgtt_unbind_vma(struct i915_address_space *vm,
-		      struct i915_vma *vma);
+		      struct i915_vma_resource *vma_res);
 
 void gtt_write_workarounds(struct intel_gt *gt);
 
@@ -627,8 +632,8 @@ __vm_create_scratch_for_read_pinned(struct i915_address_space *vm, unsigned long
 static inline struct sgt_dma {
 	struct scatterlist *sg;
 	dma_addr_t dma, max;
-} sgt_dma(struct i915_vma *vma) {
-	struct scatterlist *sg = vma->pages->sgl;
+} sgt_dma(struct i915_vma_resource *vma_res) {
+	struct scatterlist *sg = vma_res->bi.pages->sgl;
 	dma_addr_t addr = sg_dma_address(sg);
 
 	return (struct sgt_dma){ sg, addr, addr + sg_dma_len(sg) };
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index b3489599e4de..84456ffeb4cd 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1065,6 +1065,10 @@ lrc_pin(struct intel_context *ce,
 
 void lrc_unpin(struct intel_context *ce)
 {
+	if (unlikely(ce->parallel.last_rq)) {
+		i915_request_put(ce->parallel.last_rq);
+		ce->parallel.last_rq = NULL;
+	}
 	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
 		      ce->engine);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index 083b3090c69c..48e6e2f87700 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -179,32 +179,34 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt,
 
 void ppgtt_bind_vma(struct i915_address_space *vm,
 		    struct i915_vm_pt_stash *stash,
-		    struct i915_vma *vma,
+		    struct i915_vma_resource *vma_res,
 		    enum i915_cache_level cache_level,
 		    u32 flags)
 {
 	u32 pte_flags;
 
-	if (!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
-		vm->allocate_va_range(vm, stash, vma->node.start, vma->size);
-		set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
+	if (!vma_res->allocated) {
+		vm->allocate_va_range(vm, stash, vma_res->start,
+				      vma_res->vma_size);
+		vma_res->allocated = true;
 	}
 
 	/* Applicable to VLV, and gen8+ */
 	pte_flags = 0;
-	if (i915_gem_object_is_readonly(vma->obj))
+	if (vma_res->bi.readonly)
 		pte_flags |= PTE_READ_ONLY;
-	if (i915_gem_object_is_lmem(vma->obj))
+	if (vma_res->bi.lmem)
 		pte_flags |= PTE_LM;
 
-	vm->insert_entries(vm, vma, cache_level, pte_flags);
+	vm->insert_entries(vm, vma_res, cache_level, pte_flags);
 	wmb();
 }
 
-void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
+void ppgtt_unbind_vma(struct i915_address_space *vm,
+		      struct i915_vma_resource *vma_res)
 {
-	if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)))
-		vm->clear_range(vm, vma->node.start, vma->size);
+	if (vma_res->allocated)
+		vm->clear_range(vm, vma_res->start, vma_res->vma_size);
 }
 
 static unsigned long pd_count(u64 size, int shift)
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index fde2dcb59809..21215a080088 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -15,7 +15,7 @@
 static int init_fake_lmem_bar(struct intel_memory_region *mem)
 {
 	struct drm_i915_private *i915 = mem->i915;
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	unsigned long n;
 	int ret;
 
@@ -131,7 +131,7 @@ intel_gt_setup_fake_lmem(struct intel_gt *gt)
 	if (!i915->params.fake_lmem_start)
 		return ERR_PTR(-ENODEV);
 
-	GEM_BUG_ON(i915_ggtt_has_aperture(&i915->ggtt));
+	GEM_BUG_ON(i915_ggtt_has_aperture(to_gt(i915)->ggtt));
 
 	/* Your mappable aperture belongs to me now! */
 	mappable_end = pci_resource_len(pdev, 2);
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 8a873f6bda7f..37c38bdd5f47 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -19,7 +19,7 @@ __igt_reset_stolen(struct intel_gt *gt,
 		   intel_engine_mask_t mask,
 		   const char *msg)
 {
-	struct i915_ggtt *ggtt = &gt->i915->ggtt;
+	struct i915_ggtt *ggtt = gt->ggtt;
 	const struct resource *dsm = &gt->i915->dsm;
 	resource_size_t num_pages, page;
 	struct intel_engine_cs *engine;
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index fe5d7d261797..7afdadc7656f 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -7,9 +7,9 @@
 #define _ABI_GUC_ACTIONS_ABI_H
 
 /**
- * DOC: HOST2GUC_REGISTER_CTB
+ * DOC: HOST2GUC_SELF_CFG
  *
- * This message is used as part of the `CTB based communication`_ setup.
+ * This message is used by Host KMD to setup of the `GuC Self Config KLVs`_.
  *
  * This message must be sent as `MMIO HXG Message`_.
  *
@@ -22,20 +22,18 @@
  *  |   +-------+--------------------------------------------------------------+
  *  |   | 27:16 | DATA0 = MBZ                                                  |
  *  |   +-------+--------------------------------------------------------------+
- *  |   |  15:0 | ACTION = _`GUC_ACTION_HOST2GUC_REGISTER_CTB` = 0x4505        |
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_HOST2GUC_SELF_CFG` = 0x0508            |
  *  +---+-------+--------------------------------------------------------------+
- *  | 1 | 31:12 | RESERVED = MBZ                                               |
+ *  | 1 | 31:16 | **KLV_KEY** - KLV key, see `GuC Self Config KLVs`_           |
  *  |   +-------+--------------------------------------------------------------+
- *  |   |  11:8 | **TYPE** - type for the `CT Buffer`_                         |
+ *  |   |  15:0 | **KLV_LEN** - KLV length                                     |
  *  |   |       |                                                              |
- *  |   |       |   - _`GUC_CTB_TYPE_HOST2GUC` = 0                             |
- *  |   |       |   - _`GUC_CTB_TYPE_GUC2HOST` = 1                             |
- *  |   +-------+--------------------------------------------------------------+
- *  |   |   7:0 | **SIZE** - size of the `CT Buffer`_ in 4K units minus 1      |
+ *  |   |       |   - 32 bit KLV = 1                                           |
+ *  |   |       |   - 64 bit KLV = 2                                           |
  *  +---+-------+--------------------------------------------------------------+
- *  | 2 |  31:0 | **DESC_ADDR** - GGTT address of the `CTB Descriptor`_        |
+ *  | 2 |  31:0 | **VALUE32** - Bits 31-0 of the KLV value                     |
  *  +---+-------+--------------------------------------------------------------+
- *  | 3 |  31:0 | **BUFF_ADDF** - GGTT address of the `CT Buffer`_             |
+ *  | 3 |  31:0 | **VALUE64** - Bits 63-32 of the KLV value (**KLV_LEN** = 2)  |
  *  +---+-------+--------------------------------------------------------------+
  *
  *  +---+-------+--------------------------------------------------------------+
@@ -45,28 +43,25 @@
  *  |   +-------+--------------------------------------------------------------+
  *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
  *  |   +-------+--------------------------------------------------------------+
- *  |   |  27:0 | DATA0 = MBZ                                                  |
+ *  |   |  27:0 | DATA0 = **NUM** - 1 if KLV was parsed, 0 if not recognized   |
  *  +---+-------+--------------------------------------------------------------+
  */
-#define GUC_ACTION_HOST2GUC_REGISTER_CTB		0x4505
+#define GUC_ACTION_HOST2GUC_SELF_CFG			0x0508
 
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 3u)
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_MBZ		(0xfffff << 12)
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_TYPE	(0xf << 8)
-#define   GUC_CTB_TYPE_HOST2GUC				0u
-#define   GUC_CTB_TYPE_GUC2HOST				1u
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_SIZE	(0xff << 0)
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_2_DESC_ADDR	GUC_HXG_REQUEST_MSG_n_DATAn
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_3_BUFF_ADDR	GUC_HXG_REQUEST_MSG_n_DATAn
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 3u)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY		(0xffff << 16)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN		(0xffff << 0)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32		GUC_HXG_REQUEST_MSG_n_DATAn
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64		GUC_HXG_REQUEST_MSG_n_DATAn
 
-#define HOST2GUC_REGISTER_CTB_RESPONSE_MSG_LEN		GUC_HXG_RESPONSE_MSG_MIN_LEN
-#define HOST2GUC_REGISTER_CTB_RESPONSE_MSG_0_MBZ	GUC_HXG_RESPONSE_MSG_0_DATA0
+#define HOST2GUC_SELF_CFG_RESPONSE_MSG_LEN		GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define HOST2GUC_SELF_CFG_RESPONSE_MSG_0_NUM		GUC_HXG_RESPONSE_MSG_0_DATA0
 
 /**
- * DOC: HOST2GUC_DEREGISTER_CTB
+ * DOC: HOST2GUC_CONTROL_CTB
  *
- * This message is used as part of the `CTB based communication`_ teardown.
+ * This H2G action allows Vf Host to enable or disable H2G and G2H `CT Buffer`_.
  *
  * This message must be sent as `MMIO HXG Message`_.
  *
@@ -79,15 +74,12 @@
  *  |   +-------+--------------------------------------------------------------+
  *  |   | 27:16 | DATA0 = MBZ                                                  |
  *  |   +-------+--------------------------------------------------------------+
- *  |   |  15:0 | ACTION = _`GUC_ACTION_HOST2GUC_DEREGISTER_CTB` = 0x4506      |
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_HOST2GUC_CONTROL_CTB` = 0x4509         |
  *  +---+-------+--------------------------------------------------------------+
- *  | 1 | 31:12 | RESERVED = MBZ                                               |
- *  |   +-------+--------------------------------------------------------------+
- *  |   |  11:8 | **TYPE** - type of the `CT Buffer`_                          |
+ *  | 1 |  31:0 | **CONTROL** - control `CTB based communication`_             |
  *  |   |       |                                                              |
- *  |   |       | see `GUC_ACTION_HOST2GUC_REGISTER_CTB`_                      |
- *  |   +-------+--------------------------------------------------------------+
- *  |   |   7:0 | RESERVED = MBZ                                               |
+ *  |   |       |   - _`GUC_CTB_CONTROL_DISABLE` = 0                           |
+ *  |   |       |   - _`GUC_CTB_CONTROL_ENABLE` = 1                            |
  *  +---+-------+--------------------------------------------------------------+
  *
  *  +---+-------+--------------------------------------------------------------+
@@ -100,16 +92,16 @@
  *  |   |  27:0 | DATA0 = MBZ                                                  |
  *  +---+-------+--------------------------------------------------------------+
  */
-#define GUC_ACTION_HOST2GUC_DEREGISTER_CTB		0x4506
+#define GUC_ACTION_HOST2GUC_CONTROL_CTB			0x4509
 
-#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
-#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_0_MBZ	GUC_HXG_REQUEST_MSG_0_DATA0
-#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_MBZ	(0xfffff << 12)
-#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_TYPE	(0xf << 8)
-#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_MBZ2	(0xff << 0)
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL	GUC_HXG_REQUEST_MSG_n_DATAn
+#define   GUC_CTB_CONTROL_DISABLE			0u
+#define   GUC_CTB_CONTROL_ENABLE			1u
 
-#define HOST2GUC_DEREGISTER_CTB_RESPONSE_MSG_LEN	GUC_HXG_RESPONSE_MSG_MIN_LEN
-#define HOST2GUC_DEREGISTER_CTB_RESPONSE_MSG_0_MBZ	GUC_HXG_RESPONSE_MSG_0_DATA0
+#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_LEN		GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_0_MBZ		GUC_HXG_RESPONSE_MSG_0_DATA0
 
 /* legacy definitions */
 
@@ -143,8 +135,12 @@ enum intel_guc_action {
 	INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
 	INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
 	INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
-	INTEL_GUC_ACTION_RESET_CLIENT = 0x5507,
+	INTEL_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
 	INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
+	INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002,
+	INTEL_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003,
+	INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004,
+	INTEL_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005,
 	INTEL_GUC_ACTION_LIMIT
 };
 
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
index 488b6061ee89..c20658ee85a5 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
@@ -11,4 +11,27 @@ enum intel_guc_response_status {
 	INTEL_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000,
 };
 
+enum intel_guc_load_status {
+	INTEL_GUC_LOAD_STATUS_DEFAULT                          = 0x00,
+	INTEL_GUC_LOAD_STATUS_START                            = 0x01,
+	INTEL_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH       = 0x02,
+	INTEL_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH       = 0x03,
+	INTEL_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE      = 0x04,
+	INTEL_GUC_LOAD_STATUS_GDT_DONE                         = 0x10,
+	INTEL_GUC_LOAD_STATUS_IDT_DONE                         = 0x20,
+	INTEL_GUC_LOAD_STATUS_LAPIC_DONE                       = 0x30,
+	INTEL_GUC_LOAD_STATUS_GUCINT_DONE                      = 0x40,
+	INTEL_GUC_LOAD_STATUS_DPC_READY                        = 0x50,
+	INTEL_GUC_LOAD_STATUS_DPC_ERROR                        = 0x60,
+	INTEL_GUC_LOAD_STATUS_EXCEPTION                        = 0x70,
+	INTEL_GUC_LOAD_STATUS_INIT_DATA_INVALID                = 0x71,
+	INTEL_GUC_LOAD_STATUS_PXP_TEARDOWN_CTRL_ENABLED        = 0x72,
+	INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START,
+	INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID                 = 0x73,
+	INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID   = 0x74,
+	INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END,
+
+	INTEL_GUC_LOAD_STATUS_READY                            = 0xF0,
+};
+
 #endif /* _ABI_GUC_ERRORS_ABI_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
new file mode 100644
index 000000000000..f0814a57c191
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_KLVS_ABI_H
+#define _ABI_GUC_KLVS_ABI_H
+
+/**
+ * DOC: GuC KLV
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 | 31:16 | **KEY** - KLV key identifier                                 |
+ *  |   |       |   - `GuC Self Config KLVs`_                                  |
+ *  |   |       |                                                              |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **LEN** - length of VALUE (in 32bit dwords)                  |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **VALUE** - actual value of the KLV (format depends on KEY)  |
+ *  +---+-------+                                                              |
+ *  |...|       |                                                              |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_KLV_LEN_MIN				1u
+#define GUC_KLV_0_KEY				(0xffff << 16)
+#define GUC_KLV_0_LEN				(0xffff << 0)
+#define GUC_KLV_n_VALUE				(0xffffffff << 0)
+
+/**
+ * DOC: GuC Self Config KLVs
+ *
+ * `GuC KLV`_ keys available for use with HOST2GUC_SELF_CFG_.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_ADDR` : 0x0902
+ *      Refers to 64 bit Global Gfx address of H2G `CT Buffer`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR` : 0x0903
+ *      Refers to 64 bit Global Gfx address of H2G `CTB Descriptor`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_SIZE` : 0x0904
+ *      Refers to size of H2G `CT Buffer`_ in bytes.
+ *      Should be a multiple of 4K.
+ *
+ * _`GUC_KLV_SELF_CFG_G2H_CTB_ADDR` : 0x0905
+ *      Refers to 64 bit Global Gfx address of G2H `CT Buffer`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR` : 0x0906
+ *      Refers to 64 bit Global Gfx address of G2H `CTB Descriptor`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_G2H_CTB_SIZE` : 0x0907
+ *      Refers to size of G2H `CT Buffer`_ in bytes.
+ *      Should be a multiple of 4K.
+ */
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY		0x0902
+#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_LEN		2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY	0x0903
+#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_LEN	2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY		0x0904
+#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_LEN		1u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY		0x0905
+#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_LEN		2u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY	0x0906
+#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_LEN	2u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY		0x0907
+#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN		1u
+
+#endif /* _ABI_GUC_KLVS_ABI_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 6e228343e8cb..cbec51f4dec3 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -182,6 +182,9 @@ void intel_guc_init_early(struct intel_guc *guc)
 		guc->send_regs.count = GUC_MAX_MMIO_MSG_LEN;
 		BUILD_BUG_ON(GUC_MAX_MMIO_MSG_LEN > SOFT_SCRATCH_COUNT);
 	}
+
+	intel_guc_enable_msg(guc, INTEL_GUC_RECV_MSG_EXCEPTION |
+				  INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED);
 }
 
 void intel_guc_init_late(struct intel_guc *guc)
@@ -222,32 +225,48 @@ static u32 guc_ctl_log_params_flags(struct intel_guc *guc)
 	u32 flags;
 
 	#if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0)
-	#define UNIT SZ_1M
-	#define FLAG GUC_LOG_ALLOC_IN_MEGABYTE
+	#define LOG_UNIT SZ_1M
+	#define LOG_FLAG GUC_LOG_LOG_ALLOC_UNITS
 	#else
-	#define UNIT SZ_4K
-	#define FLAG 0
+	#define LOG_UNIT SZ_4K
+	#define LOG_FLAG 0
+	#endif
+
+	#if (((CAPTURE_BUFFER_SIZE) % SZ_1M) == 0)
+	#define CAPTURE_UNIT SZ_1M
+	#define CAPTURE_FLAG GUC_LOG_CAPTURE_ALLOC_UNITS
+	#else
+	#define CAPTURE_UNIT SZ_4K
+	#define CAPTURE_FLAG 0
 	#endif
 
 	BUILD_BUG_ON(!CRASH_BUFFER_SIZE);
-	BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, UNIT));
+	BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, LOG_UNIT));
 	BUILD_BUG_ON(!DEBUG_BUFFER_SIZE);
-	BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, UNIT));
+	BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, LOG_UNIT));
+	BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE);
+	BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT));
 
-	BUILD_BUG_ON((CRASH_BUFFER_SIZE / UNIT - 1) >
+	BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) >
 			(GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT));
-	BUILD_BUG_ON((DEBUG_BUFFER_SIZE / UNIT - 1) >
+	BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) >
 			(GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT));
+	BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) >
+			(GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT));
 
 	flags = GUC_LOG_VALID |
 		GUC_LOG_NOTIFY_ON_HALF_FULL |
-		FLAG |
-		((CRASH_BUFFER_SIZE / UNIT - 1) << GUC_LOG_CRASH_SHIFT) |
-		((DEBUG_BUFFER_SIZE / UNIT - 1) << GUC_LOG_DEBUG_SHIFT) |
+		CAPTURE_FLAG |
+		LOG_FLAG |
+		((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) |
+		((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) |
+		((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) << GUC_LOG_CAPTURE_SHIFT) |
 		(offset << GUC_LOG_BUF_ADDR_SHIFT);
 
-	#undef UNIT
-	#undef FLAG
+	#undef LOG_UNIT
+	#undef LOG_FLAG
+	#undef CAPTURE_UNIT
+	#undef CAPTURE_FLAG
 
 	return flags;
 }
@@ -260,6 +279,26 @@ static u32 guc_ctl_ads_flags(struct intel_guc *guc)
 	return flags;
 }
 
+static u32 guc_ctl_wa_flags(struct intel_guc *guc)
+{
+	struct intel_gt *gt = guc_to_gt(guc);
+	u32 flags = 0;
+
+	/* Wa_22012773006:gen11,gen12 < XeHP */
+	if (GRAPHICS_VER(gt->i915) >= 11 &&
+	    GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50))
+		flags |= GUC_WA_POLLCS;
+
+	return flags;
+}
+
+static u32 guc_ctl_devid(struct intel_guc *guc)
+{
+	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+
+	return (INTEL_DEVID(i915) << 16) | INTEL_REVID(i915);
+}
+
 /*
  * Initialise the GuC parameter block before starting the firmware
  * transfer. These parameters are read by the firmware on startup
@@ -276,6 +315,8 @@ static void guc_init_params(struct intel_guc *guc)
 	params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc);
 	params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc);
 	params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc);
+	params[GUC_CTL_WA] = guc_ctl_wa_flags(guc);
+	params[GUC_CTL_DEVID] = guc_ctl_devid(guc);
 
 	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
 		DRM_DEBUG_DRIVER("param[%2d] = %#x\n", i, params[i]);
@@ -513,9 +554,10 @@ int intel_guc_to_host_process_recv_msg(struct intel_guc *guc,
 	/* Make sure to handle only enabled messages */
 	msg = payload[0] & guc->msg_enabled_mask;
 
-	if (msg & (INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER |
-		   INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED))
-		intel_guc_log_handle_flush_event(&guc->log);
+	if (msg & INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED)
+		drm_err(&guc_to_gt(guc)->i915->drm, "Received early GuC crash dump notification!\n");
+	if (msg & INTEL_GUC_RECV_MSG_EXCEPTION)
+		drm_err(&guc_to_gt(guc)->i915->drm, "Received early GuC exception notification!\n");
 
 	return 0;
 }
@@ -549,7 +591,7 @@ int intel_guc_suspend(struct intel_guc *guc)
 {
 	int ret;
 	u32 action[] = {
-		INTEL_GUC_ACTION_RESET_CLIENT,
+		INTEL_GUC_ACTION_CLIENT_SOFT_RESET,
 	};
 
 	if (!intel_guc_is_ready(guc))
@@ -713,6 +755,56 @@ int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size,
 	return 0;
 }
 
+static int __guc_action_self_cfg(struct intel_guc *guc, u16 key, u16 len, u64 value)
+{
+	u32 request[HOST2GUC_SELF_CFG_REQUEST_MSG_LEN] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_SELF_CFG),
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY, key) |
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN, len),
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32, lower_32_bits(value)),
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64, upper_32_bits(value)),
+	};
+	int ret;
+
+	GEM_BUG_ON(len > 2);
+	GEM_BUG_ON(len == 1 && upper_32_bits(value));
+
+	/* Self config must go over MMIO */
+	ret = intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0);
+
+	if (unlikely(ret < 0))
+		return ret;
+	if (unlikely(ret > 1))
+		return -EPROTO;
+	if (unlikely(!ret))
+		return -ENOKEY;
+
+	return 0;
+}
+
+static int __guc_self_cfg(struct intel_guc *guc, u16 key, u16 len, u64 value)
+{
+	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	int err = __guc_action_self_cfg(guc, key, len, value);
+
+	if (unlikely(err))
+		i915_probe_error(i915, "Unsuccessful self-config (%pe) key %#hx value %#llx\n",
+				 ERR_PTR(err), key, value);
+	return err;
+}
+
+int intel_guc_self_cfg32(struct intel_guc *guc, u16 key, u32 value)
+{
+	return __guc_self_cfg(guc, key, 1, value);
+}
+
+int intel_guc_self_cfg64(struct intel_guc *guc, u16 key, u64 value)
+{
+	return __guc_self_cfg(guc, key, 2, value);
+}
+
 /**
  * intel_guc_load_status - dump information about GuC load status
  * @guc: the GuC
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index f9240d4baa69..9d26a86fe557 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -328,6 +328,8 @@ int intel_guc_resume(struct intel_guc *guc);
 struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size);
 int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size,
 				   struct i915_vma **out_vma, void **out_vaddr);
+int intel_guc_self_cfg32(struct intel_guc *guc, u16 key, u32 value);
+int intel_guc_self_cfg64(struct intel_guc *guc, u16 key, u64 value);
 
 static inline bool intel_guc_is_supported(struct intel_guc *guc)
 {
@@ -404,6 +406,8 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
 					const u32 *msg, u32 len);
 int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
 					 const u32 *msg, u32 len);
+int intel_guc_error_capture_process_msg(struct intel_guc *guc,
+					const u32 *msg, u32 len);
 
 void intel_guc_find_hung_context(struct intel_engine_cs *engine);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 1a1edae67e4e..aa767540ca2a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -40,6 +40,10 @@
  *      +---------------------------------------+
  *      | padding                               |
  *      +---------------------------------------+ <== 4K aligned
+ *      | capture lists                         |
+ *      +---------------------------------------+
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
  *      | private data                          |
  *      +---------------------------------------+
  *      | padding                               |
@@ -65,6 +69,12 @@ static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc)
 	return PAGE_ALIGN(guc->ads_golden_ctxt_size);
 }
 
+static u32 guc_ads_capture_size(struct intel_guc *guc)
+{
+	/* FIXME: Allocate a proper capture list */
+	return PAGE_ALIGN(PAGE_SIZE);
+}
+
 static u32 guc_ads_private_data_size(struct intel_guc *guc)
 {
 	return PAGE_ALIGN(guc->fw.private_data_size);
@@ -85,7 +95,7 @@ static u32 guc_ads_golden_ctxt_offset(struct intel_guc *guc)
 	return PAGE_ALIGN(offset);
 }
 
-static u32 guc_ads_private_data_offset(struct intel_guc *guc)
+static u32 guc_ads_capture_offset(struct intel_guc *guc)
 {
 	u32 offset;
 
@@ -95,6 +105,16 @@ static u32 guc_ads_private_data_offset(struct intel_guc *guc)
 	return PAGE_ALIGN(offset);
 }
 
+static u32 guc_ads_private_data_offset(struct intel_guc *guc)
+{
+	u32 offset;
+
+	offset = guc_ads_capture_offset(guc) +
+		 guc_ads_capture_size(guc);
+
+	return PAGE_ALIGN(offset);
+}
+
 static u32 guc_ads_blob_size(struct intel_guc *guc)
 {
 	return guc_ads_private_data_offset(guc) +
@@ -499,6 +519,26 @@ static void guc_init_golden_context(struct intel_guc *guc)
 	GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size);
 }
 
+static void guc_capture_list_init(struct intel_guc *guc, struct __guc_ads_blob *blob)
+{
+	int i, j;
+	u32 addr_ggtt, offset;
+
+	offset = guc_ads_capture_offset(guc);
+	addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
+
+	/* FIXME: Populate a proper capture list */
+
+	for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
+		for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) {
+			blob->ads.capture_instance[i][j] = addr_ggtt;
+			blob->ads.capture_class[i][j] = addr_ggtt;
+		}
+
+		blob->ads.capture_global[i] = addr_ggtt;
+	}
+}
+
 static void __guc_ads_init(struct intel_guc *guc)
 {
 	struct intel_gt *gt = guc_to_gt(guc);
@@ -532,6 +572,9 @@ static void __guc_ads_init(struct intel_guc *guc)
 
 	base = intel_guc_ggtt_offset(guc, guc->ads_vma);
 
+	/* Capture list for hang debug */
+	guc_capture_list_init(guc, blob);
+
 	/* ADS */
 	blob->ads.scheduler_policies = base + ptr_offset(blob, policies);
 	blob->ads.gt_system_info = base + ptr_offset(blob, system_info);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index aa6dd6415202..de89d40abd38 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -112,18 +112,6 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct)
 	init_waitqueue_head(&ct->wq);
 }
 
-static inline const char *guc_ct_buffer_type_to_str(u32 type)
-{
-	switch (type) {
-	case GUC_CTB_TYPE_HOST2GUC:
-		return "SEND";
-	case GUC_CTB_TYPE_GUC2HOST:
-		return "RECV";
-	default:
-		return "<invalid>";
-	}
-}
-
 static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc)
 {
 	memset(desc, 0, sizeof(*desc));
@@ -156,71 +144,65 @@ static void guc_ct_buffer_init(struct intel_guc_ct_buffer *ctb,
 	guc_ct_buffer_reset(ctb);
 }
 
-static int guc_action_register_ct_buffer(struct intel_guc *guc, u32 type,
-					 u32 desc_addr, u32 buff_addr, u32 size)
+static int guc_action_control_ctb(struct intel_guc *guc, u32 control)
 {
-	u32 request[HOST2GUC_REGISTER_CTB_REQUEST_MSG_LEN] = {
+	u32 request[HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN] = {
 		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
 		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
-		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_REGISTER_CTB),
-		FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_SIZE, size / SZ_4K - 1) |
-		FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_TYPE, type),
-		FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_2_DESC_ADDR, desc_addr),
-		FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_3_BUFF_ADDR, buff_addr),
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_CONTROL_CTB),
+		FIELD_PREP(HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL, control),
 	};
 	int ret;
 
-	GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST);
-	GEM_BUG_ON(size % SZ_4K);
+	GEM_BUG_ON(control != GUC_CTB_CONTROL_DISABLE && control != GUC_CTB_CONTROL_ENABLE);
 
-	/* CT registration must go over MMIO */
+	/* CT control must go over MMIO */
 	ret = intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0);
 
 	return ret > 0 ? -EPROTO : ret;
 }
 
-static int ct_register_buffer(struct intel_guc_ct *ct, u32 type,
-			      u32 desc_addr, u32 buff_addr, u32 size)
+static int ct_control_enable(struct intel_guc_ct *ct, bool enable)
 {
 	int err;
 
-	err = i915_inject_probe_error(guc_to_gt(ct_to_guc(ct))->i915, -ENXIO);
+	err = guc_action_control_ctb(ct_to_guc(ct), enable ?
+				     GUC_CTB_CONTROL_ENABLE : GUC_CTB_CONTROL_DISABLE);
 	if (unlikely(err))
-		return err;
+		CT_PROBE_ERROR(ct, "Failed to control/%s CTB (%pe)\n",
+			       enabledisable(enable), ERR_PTR(err));
 
-	err = guc_action_register_ct_buffer(ct_to_guc(ct), type,
-					    desc_addr, buff_addr, size);
-	if (unlikely(err))
-		CT_ERROR(ct, "Failed to register %s buffer (%pe)\n",
-			 guc_ct_buffer_type_to_str(type), ERR_PTR(err));
 	return err;
 }
 
-static int guc_action_deregister_ct_buffer(struct intel_guc *guc, u32 type)
+static int ct_register_buffer(struct intel_guc_ct *ct, bool send,
+			      u32 desc_addr, u32 buff_addr, u32 size)
 {
-	u32 request[HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_LEN] = {
-		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
-		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
-		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_DEREGISTER_CTB),
-		FIELD_PREP(HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_TYPE, type),
-	};
-	int ret;
-
-	GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST);
-
-	/* CT deregistration must go over MMIO */
-	ret = intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0);
+	int err;
 
-	return ret > 0 ? -EPROTO : ret;
-}
+	err = intel_guc_self_cfg64(ct_to_guc(ct), send ?
+				   GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY :
+				   GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY,
+				   desc_addr);
+	if (unlikely(err))
+		goto failed;
 
-static int ct_deregister_buffer(struct intel_guc_ct *ct, u32 type)
-{
-	int err = guc_action_deregister_ct_buffer(ct_to_guc(ct), type);
+	err = intel_guc_self_cfg64(ct_to_guc(ct), send ?
+				   GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY :
+				   GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY,
+				   buff_addr);
+	if (unlikely(err))
+		goto failed;
 
+	err = intel_guc_self_cfg32(ct_to_guc(ct), send ?
+				   GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY :
+				   GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY,
+				   size);
 	if (unlikely(err))
-		CT_ERROR(ct, "Failed to deregister %s buffer (%pe)\n",
-			 guc_ct_buffer_type_to_str(type), ERR_PTR(err));
+failed:
+		CT_PROBE_ERROR(ct, "Failed to register %s buffer (%pe)\n",
+			       send ? "SEND" : "RECV", ERR_PTR(err));
+
 	return err;
 }
 
@@ -308,7 +290,7 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct)
 int intel_guc_ct_enable(struct intel_guc_ct *ct)
 {
 	struct intel_guc *guc = ct_to_guc(ct);
-	u32 base, desc, cmds;
+	u32 base, desc, cmds, size;
 	void *blob;
 	int err;
 
@@ -333,27 +315,27 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct)
 	 */
 	desc = base + ptrdiff(ct->ctbs.recv.desc, blob);
 	cmds = base + ptrdiff(ct->ctbs.recv.cmds, blob);
-	err = ct_register_buffer(ct, GUC_CTB_TYPE_GUC2HOST,
-				 desc, cmds, ct->ctbs.recv.size * 4);
-
+	size = ct->ctbs.recv.size * 4;
+	err = ct_register_buffer(ct, false, desc, cmds, size);
 	if (unlikely(err))
 		goto err_out;
 
 	desc = base + ptrdiff(ct->ctbs.send.desc, blob);
 	cmds = base + ptrdiff(ct->ctbs.send.cmds, blob);
-	err = ct_register_buffer(ct, GUC_CTB_TYPE_HOST2GUC,
-				 desc, cmds, ct->ctbs.send.size * 4);
+	size = ct->ctbs.send.size * 4;
+	err = ct_register_buffer(ct, true, desc, cmds, size);
+	if (unlikely(err))
+		goto err_out;
 
+	err = ct_control_enable(ct, true);
 	if (unlikely(err))
-		goto err_deregister;
+		goto err_out;
 
 	ct->enabled = true;
 	ct->stall_time = KTIME_MAX;
 
 	return 0;
 
-err_deregister:
-	ct_deregister_buffer(ct, GUC_CTB_TYPE_GUC2HOST);
 err_out:
 	CT_PROBE_ERROR(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err));
 	return err;
@@ -372,8 +354,7 @@ void intel_guc_ct_disable(struct intel_guc_ct *ct)
 	ct->enabled = false;
 
 	if (intel_guc_is_fw_running(guc)) {
-		ct_deregister_buffer(ct, GUC_CTB_TYPE_HOST2GUC);
-		ct_deregister_buffer(ct, GUC_CTB_TYPE_GUC2HOST);
+		ct_control_enable(ct, false);
 	}
 }
 
@@ -662,6 +643,7 @@ static int ct_send(struct intel_guc_ct *ct,
 	struct ct_request request;
 	unsigned long flags;
 	unsigned int sleep_period_ms = 1;
+	bool send_again;
 	u32 fence;
 	int err;
 
@@ -671,6 +653,9 @@ static int ct_send(struct intel_guc_ct *ct,
 	GEM_BUG_ON(!response_buf && response_buf_size);
 	might_sleep();
 
+resend:
+	send_again = false;
+
 	/*
 	 * We use a lazy spin wait loop here as we believe that if the CT
 	 * buffers are sized correctly the flow control condition should be
@@ -725,6 +710,13 @@ retry:
 		goto unlink;
 	}
 
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
+		CT_DEBUG(ct, "retrying request %#x (%u)\n", *action,
+			 FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, *status));
+		send_again = true;
+		goto unlink;
+	}
+
 	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) != GUC_HXG_TYPE_RESPONSE_SUCCESS) {
 		err = -EIO;
 		goto unlink;
@@ -747,6 +739,9 @@ unlink:
 	list_del(&request.link);
 	spin_unlock_irqrestore(&ct->requests.lock, flags);
 
+	if (unlikely(send_again))
+		goto resend;
+
 	return err;
 }
 
@@ -918,6 +913,7 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
 	GEM_BUG_ON(len < GUC_HXG_MSG_MIN_LEN);
 	GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]) != GUC_HXG_ORIGIN_GUC);
 	GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_RESPONSE_SUCCESS &&
+		   FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_NO_RESPONSE_RETRY &&
 		   FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_RESPONSE_FAILURE);
 
 	CT_DEBUG(ct, "response fence %u status %#x\n", fence, hxg[0]);
@@ -990,9 +986,27 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
 	case INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION:
 		ret = intel_guc_context_reset_process_msg(guc, payload, len);
 		break;
+	case INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
+		ret = intel_guc_error_capture_process_msg(guc, payload, len);
+		if (unlikely(ret))
+			CT_ERROR(ct, "error capture notification failed %x %*ph\n",
+				 action, 4 * len, payload);
+		break;
 	case INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
 		ret = intel_guc_engine_failure_process_msg(guc, payload, len);
 		break;
+	case INTEL_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE:
+		intel_guc_log_handle_flush_event(&guc->log);
+		ret = 0;
+		break;
+	case INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED:
+		CT_ERROR(ct, "Received GuC crash dump notification!\n");
+		ret = 0;
+		break;
+	case INTEL_GUC_ACTION_NOTIFY_EXCEPTION:
+		CT_ERROR(ct, "Received GuC exception notification!\n");
+		ret = 0;
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 		break;
@@ -1098,6 +1112,7 @@ static int ct_handle_hxg(struct intel_guc_ct *ct, struct ct_incoming_msg *msg)
 		break;
 	case GUC_HXG_TYPE_RESPONSE_SUCCESS:
 	case GUC_HXG_TYPE_RESPONSE_FAILURE:
+	case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
 		err = ct_handle_response(ct, msg);
 		break;
 	default:
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 31420ce1ce6b..f773e7f35bc1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -90,11 +90,10 @@ static int guc_xfer_rsa(struct intel_uc_fw *guc_fw,
 static inline bool guc_ready(struct intel_uncore *uncore, u32 *status)
 {
 	u32 val = intel_uncore_read(uncore, GUC_STATUS);
-	u32 uk_val = val & GS_UKERNEL_MASK;
+	u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, val);
 
 	*status = val;
-	return (uk_val == GS_UKERNEL_READY) ||
-		((val & GS_MIA_CORE_STATE) && (uk_val == GS_UKERNEL_LAPIC_DONE));
+	return uk_val == INTEL_GUC_LOAD_STATUS_READY;
 }
 
 static int guc_wait_ucode(struct intel_uncore *uncore)
@@ -105,17 +104,26 @@ static int guc_wait_ucode(struct intel_uncore *uncore)
 	/*
 	 * Wait for the GuC to start up.
 	 * NB: Docs recommend not using the interrupt for completion.
-	 * Measurements indicate this should take no more than 20ms, so a
+	 * Measurements indicate this should take no more than 20ms
+	 * (assuming the GT clock is at maximum frequency). So, a
 	 * timeout here indicates that the GuC has failed and is unusable.
 	 * (Higher levels of the driver may decide to reset the GuC and
 	 * attempt the ucode load again if this happens.)
+	 *
+	 * FIXME: There is a known (but exceedingly unlikely) race condition
+	 * where the asynchronous frequency management code could reduce
+	 * the GT clock while a GuC reload is in progress (during a full
+	 * GT reset). A fix is in progress but there are complex locking
+	 * issues to be resolved. In the meantime bump the timeout to
+	 * 200ms. Even at slowest clock, this should be sufficient. And
+	 * in the working case, a larger timeout makes no difference.
 	 */
-	ret = wait_for(guc_ready(uncore, &status), 100);
+	ret = wait_for(guc_ready(uncore, &status), 200);
 	if (ret) {
 		struct drm_device *drm = &uncore->i915->drm;
 
-		drm_dbg(drm, "GuC load failed: status = 0x%08X\n", status);
-		drm_dbg(drm, "GuC load failed: status: Reset = %d, "
+		drm_info(drm, "GuC load failed: status = 0x%08X\n", status);
+		drm_info(drm, "GuC load failed: status: Reset = %d, "
 			"BootROM = 0x%02X, UKernel = 0x%02X, "
 			"MIA = 0x%02X, Auth = 0x%02X\n",
 			REG_FIELD_GET(GS_MIA_IN_RESET, status),
@@ -125,13 +133,13 @@ static int guc_wait_ucode(struct intel_uncore *uncore)
 			REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
 
 		if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
-			drm_dbg(drm, "GuC firmware signature verification failed\n");
+			drm_info(drm, "GuC firmware signature verification failed\n");
 			ret = -ENOEXEC;
 		}
 
-		if ((status & GS_UKERNEL_MASK) == GS_UKERNEL_EXCEPTION) {
-			drm_dbg(drm, "GuC firmware exception. EIP: %#x\n",
-				intel_uncore_read(uncore, SOFT_SCRATCH(13)));
+		if (REG_FIELD_GET(GS_UKERNEL_MASK, status) == INTEL_GUC_LOAD_STATUS_EXCEPTION) {
+			drm_info(drm, "GuC firmware exception. EIP: %#x\n",
+				 intel_uncore_read(uncore, SOFT_SCRATCH(13)));
 			ret = -ENXIO;
 		}
 	}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 7072e30e99f4..6a4612a852e2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -16,6 +16,7 @@
 #include "abi/guc_errors_abi.h"
 #include "abi/guc_communication_mmio_abi.h"
 #include "abi/guc_communication_ctb_abi.h"
+#include "abi/guc_klvs_abi.h"
 #include "abi/guc_messages_abi.h"
 
 /* Payload length only i.e. don't include G2H header length */
@@ -84,19 +85,24 @@
 #define GUC_STAGE_DESC_ATTR_TERMINATED	BIT(7)
 
 #define GUC_CTL_LOG_PARAMS		0
-#define   GUC_LOG_VALID			(1 << 0)
-#define   GUC_LOG_NOTIFY_ON_HALF_FULL	(1 << 1)
-#define   GUC_LOG_ALLOC_IN_MEGABYTE	(1 << 3)
+#define   GUC_LOG_VALID			BIT(0)
+#define   GUC_LOG_NOTIFY_ON_HALF_FULL	BIT(1)
+#define   GUC_LOG_CAPTURE_ALLOC_UNITS	BIT(2)
+#define   GUC_LOG_LOG_ALLOC_UNITS	BIT(3)
 #define   GUC_LOG_CRASH_SHIFT		4
 #define   GUC_LOG_CRASH_MASK		(0x3 << GUC_LOG_CRASH_SHIFT)
 #define   GUC_LOG_DEBUG_SHIFT		6
 #define   GUC_LOG_DEBUG_MASK	        (0xF << GUC_LOG_DEBUG_SHIFT)
+#define   GUC_LOG_CAPTURE_SHIFT		10
+#define   GUC_LOG_CAPTURE_MASK	        (0x3 << GUC_LOG_CAPTURE_SHIFT)
 #define   GUC_LOG_BUF_ADDR_SHIFT	12
 
 #define GUC_CTL_WA			1
+#define   GUC_WA_POLLCS                 BIT(18)
+
 #define GUC_CTL_FEATURE			2
-#define   GUC_CTL_DISABLE_SCHEDULER	(1 << 14)
 #define   GUC_CTL_ENABLE_SLPC		BIT(2)
+#define   GUC_CTL_DISABLE_SCHEDULER	BIT(14)
 
 #define GUC_CTL_DEBUG			3
 #define   GUC_LOG_VERBOSITY_SHIFT	0
@@ -116,6 +122,8 @@
 #define   GUC_ADS_ADDR_SHIFT		1
 #define   GUC_ADS_ADDR_MASK		(0xFFFFF << GUC_ADS_ADDR_SHIFT)
 
+#define GUC_CTL_DEVID			5
+
 #define GUC_CTL_MAX_DWORDS		(SOFT_SCRATCH_COUNT - 2) /* [1..14] */
 
 /* Generic GT SysInfo data types */
@@ -263,7 +271,10 @@ struct guc_mmio_reg {
 	u32 offset;
 	u32 value;
 	u32 flags;
-#define GUC_REGSET_MASKED		(1 << 0)
+	u32 mask;
+#define GUC_REGSET_MASKED		BIT(0)
+#define GUC_REGSET_MASKED_WITH_VALUE	BIT(2)
+#define GUC_REGSET_RESTORE_ONLY		BIT(3)
 } __packed;
 
 /* GuC register sets */
@@ -280,6 +291,12 @@ struct guc_gt_system_info {
 	u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX];
 } __packed;
 
+enum {
+	GUC_CAPTURE_LIST_INDEX_PF = 0,
+	GUC_CAPTURE_LIST_INDEX_VF = 1,
+	GUC_CAPTURE_LIST_INDEX_MAX = 2,
+};
+
 /* GuC Additional Data Struct */
 struct guc_ads {
 	struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
@@ -291,7 +308,11 @@ struct guc_ads {
 	u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES];
 	u32 eng_state_size[GUC_MAX_ENGINE_CLASSES];
 	u32 private_data;
-	u32 reserved[15];
+	u32 reserved2;
+	u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
+	u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
+	u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX];
+	u32 reserved[14];
 } __packed;
 
 /* Engine usage stats */
@@ -312,6 +333,7 @@ struct guc_engine_usage {
 enum guc_log_buffer_type {
 	GUC_DEBUG_LOG_BUFFER,
 	GUC_CRASH_DUMP_LOG_BUFFER,
+	GUC_CAPTURE_LOG_BUFFER,
 	GUC_MAX_LOG_BUFFER
 };
 
@@ -342,6 +364,7 @@ struct guc_log_buffer_state {
 	u32 write_ptr;
 	u32 size;
 	u32 sampled_write_ptr;
+	u32 wrap_offset;
 	union {
 		struct {
 			u32 flush_to_file:1;
@@ -382,7 +405,7 @@ struct guc_shared_ctx_data {
 /* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */
 enum intel_guc_recv_message {
 	INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1),
-	INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER = BIT(3)
+	INTEL_GUC_RECV_MSG_EXCEPTION = BIT(30),
 };
 
 #endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index ac0931f0374b..be35f0570396 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -55,20 +55,6 @@ static int guc_action_control_log(struct intel_guc *guc, bool enable,
 	return intel_guc_send(guc, action, ARRAY_SIZE(action));
 }
 
-static void guc_log_enable_flush_events(struct intel_guc_log *log)
-{
-	intel_guc_enable_msg(log_to_guc(log),
-			     INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER |
-			     INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED);
-}
-
-static void guc_log_disable_flush_events(struct intel_guc_log *log)
-{
-	intel_guc_disable_msg(log_to_guc(log),
-			      INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER |
-			      INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED);
-}
-
 /*
  * Sub buffer switch callback. Called whenever relay has to switch to a new
  * sub buffer, relay stays on the same sub buffer if 0 is returned.
@@ -201,6 +187,8 @@ static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type)
 		return DEBUG_BUFFER_SIZE;
 	case GUC_CRASH_DUMP_LOG_BUFFER:
 		return CRASH_BUFFER_SIZE;
+	case GUC_CAPTURE_LOG_BUFFER:
+		return CAPTURE_BUFFER_SIZE;
 	default:
 		MISSING_CASE(type);
 	}
@@ -463,14 +451,19 @@ int intel_guc_log_create(struct intel_guc_log *log)
 	 *  +-------------------------------+ 32B
 	 *  |      Debug state header       |
 	 *  +-------------------------------+ 64B
+	 *  |     Capture state header      |
+	 *  +-------------------------------+ 96B
 	 *  |                               |
 	 *  +===============================+ PAGE_SIZE (4KB)
 	 *  |        Crash Dump logs        |
 	 *  +===============================+ + CRASH_SIZE
 	 *  |          Debug logs           |
 	 *  +===============================+ + DEBUG_SIZE
+	 *  |         Capture logs          |
+	 *  +===============================+ + CAPTURE_SIZE
 	 */
-	guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE;
+	guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE +
+		       CAPTURE_BUFFER_SIZE;
 
 	vma = intel_guc_allocate_vma(guc, guc_log_size);
 	if (IS_ERR(vma)) {
@@ -592,8 +585,6 @@ int intel_guc_log_relay_start(struct intel_guc_log *log)
 	if (log->relay.started)
 		return -EEXIST;
 
-	guc_log_enable_flush_events(log);
-
 	/*
 	 * When GuC is logging without us relaying to userspace, we're ignoring
 	 * the flush notification. This means that we need to unconditionally
@@ -640,7 +631,6 @@ static void guc_log_relay_stop(struct intel_guc_log *log)
 	if (!log->relay.started)
 		return;
 
-	guc_log_disable_flush_events(log);
 	intel_synchronize_irq(i915);
 
 	flush_work(&log->relay.flush_work);
@@ -661,7 +651,8 @@ void intel_guc_log_relay_close(struct intel_guc_log *log)
 
 void intel_guc_log_handle_flush_event(struct intel_guc_log *log)
 {
-	queue_work(system_highpri_wq, &log->relay.flush_work);
+	if (log->relay.started)
+		queue_work(system_highpri_wq, &log->relay.flush_work);
 }
 
 static const char *
@@ -672,6 +663,8 @@ stringify_guc_log_type(enum guc_log_buffer_type type)
 		return "DEBUG";
 	case GUC_CRASH_DUMP_LOG_BUFFER:
 		return "CRASH";
+	case GUC_CAPTURE_LOG_BUFFER:
+		return "CAPTURE";
 	default:
 		MISSING_CASE(type);
 	}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
index fe6ab7550a14..d7e1b6471fed 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
@@ -18,12 +18,15 @@ struct intel_guc;
 #if defined(CONFIG_DRM_I915_DEBUG_GUC)
 #define CRASH_BUFFER_SIZE	SZ_2M
 #define DEBUG_BUFFER_SIZE	SZ_16M
+#define CAPTURE_BUFFER_SIZE	SZ_4M
 #elif defined(CONFIG_DRM_I915_DEBUG_GEM)
 #define CRASH_BUFFER_SIZE	SZ_1M
 #define DEBUG_BUFFER_SIZE	SZ_2M
+#define CAPTURE_BUFFER_SIZE	SZ_1M
 #else
 #define CRASH_BUFFER_SIZE	SZ_8K
 #define DEBUG_BUFFER_SIZE	SZ_64K
+#define CAPTURE_BUFFER_SIZE	SZ_16K
 #endif
 
 /*
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
index b37fc2ffaef2..e6bd66d6ce5a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
@@ -22,10 +22,6 @@
 #define   GS_BOOTROM_JUMP_PASSED	  (0x76 << GS_BOOTROM_SHIFT)
 #define   GS_UKERNEL_SHIFT		8
 #define   GS_UKERNEL_MASK		  (0xFF << GS_UKERNEL_SHIFT)
-#define   GS_UKERNEL_LAPIC_DONE		  (0x30 << GS_UKERNEL_SHIFT)
-#define   GS_UKERNEL_DPC_ERROR		  (0x60 << GS_UKERNEL_SHIFT)
-#define   GS_UKERNEL_EXCEPTION		  (0x70 << GS_UKERNEL_SHIFT)
-#define   GS_UKERNEL_READY		  (0xF0 << GS_UKERNEL_SHIFT)
 #define   GS_MIA_SHIFT			16
 #define   GS_MIA_MASK			  (0x07 << GS_MIA_SHIFT)
 #define   GS_MIA_CORE_STATE		  (0x01 << GS_MIA_SHIFT)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index e7517206af82..c13f123ae16e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1349,7 +1349,8 @@ submission_disabled(struct intel_guc *guc)
 	struct i915_sched_engine * const sched_engine = guc->sched_engine;
 
 	return unlikely(!sched_engine ||
-			!__tasklet_is_enabled(&sched_engine->tasklet));
+			!__tasklet_is_enabled(&sched_engine->tasklet) ||
+			intel_gt_is_wedged(guc_to_gt(guc)));
 }
 
 static void disable_submission(struct intel_guc *guc)
@@ -1725,7 +1726,7 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
 {
 	/* Reset called during driver load or during wedge? */
 	if (unlikely(!guc_submission_initialized(guc) ||
-		     test_bit(I915_WEDGED, &guc_to_gt(guc)->reset.flags))) {
+		     intel_gt_is_wedged(guc_to_gt(guc)))) {
 		return;
 	}
 
@@ -3248,8 +3249,6 @@ static void guc_parent_context_unpin(struct intel_context *ce)
 	GEM_BUG_ON(!intel_context_is_parent(ce));
 	GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
 
-	if (ce->parallel.last_rq)
-		i915_request_put(ce->parallel.last_rq);
 	unpin_guc_id(guc, ce);
 	lrc_unpin(ce);
 }
@@ -3979,6 +3978,11 @@ static void guc_handle_context_reset(struct intel_guc *guc,
 		   !context_blocked(ce))) {
 		capture_error_state(guc, ce);
 		guc_context_replay(ce);
+	} else {
+		drm_err(&guc_to_gt(guc)->i915->drm,
+			"Invalid GuC engine reset notificaion for 0x%04X on %s: banned = %d, blocked = %d",
+			ce->guc_id.id, ce->engine->name, intel_context_is_banned(ce),
+			context_blocked(ce));
 	}
 }
 
@@ -4017,6 +4021,24 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
 	return 0;
 }
 
+int intel_guc_error_capture_process_msg(struct intel_guc *guc,
+					const u32 *msg, u32 len)
+{
+	int status;
+
+	if (unlikely(len != 1)) {
+		drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	status = msg[0];
+	drm_info(&guc_to_gt(guc)->i915->drm, "Got error capture: status = %d", status);
+
+	/* FIXME: Do something with the capture */
+
+	return 0;
+}
+
 static struct intel_engine_cs *
 guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
 {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index d10b227ac4aa..556829de9c17 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -124,6 +124,7 @@ int intel_huc_auth(struct intel_huc *huc)
 	}
 
 	intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
+	drm_info(&gt->i915->drm, "HuC authenticated\n");
 	return 0;
 
 fail:
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 09ed29df67bc..da199aa6989f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -432,6 +432,15 @@ static int __uc_check_hw(struct intel_uc *uc)
 	return 0;
 }
 
+static void print_fw_ver(struct intel_uc *uc, struct intel_uc_fw *fw)
+{
+	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
+
+	drm_info(&i915->drm, "%s firmware %s version %u.%u\n",
+		 intel_uc_fw_type_repr(fw->type), fw->path,
+		 fw->major_ver_found, fw->minor_ver_found);
+}
+
 static int __uc_init_hw(struct intel_uc *uc)
 {
 	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
@@ -442,6 +451,11 @@ static int __uc_init_hw(struct intel_uc *uc)
 	GEM_BUG_ON(!intel_uc_supports_guc(uc));
 	GEM_BUG_ON(!intel_uc_wants_guc(uc));
 
+	print_fw_ver(uc, &guc->fw);
+
+	if (intel_uc_uses_huc(uc))
+		print_fw_ver(uc, &huc->fw);
+
 	if (!intel_uc_fw_is_loadable(&guc->fw)) {
 		ret = __uc_check_hw(uc) ||
 		      intel_uc_fw_is_overridden(&guc->fw) ||
@@ -507,24 +521,11 @@ static int __uc_init_hw(struct intel_uc *uc)
 		intel_rps_lower_unslice(&uc_to_gt(uc)->rps);
 	}
 
-	drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n",
-		 intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path,
-		 guc->fw.major_ver_found, guc->fw.minor_ver_found,
-		 "submission",
+	drm_info(&i915->drm, "GuC submission %s\n",
 		 enableddisabled(intel_uc_uses_guc_submission(uc)));
-
-	drm_info(&i915->drm, "GuC SLPC: %s\n",
+	drm_info(&i915->drm, "GuC SLPC %s\n",
 		 enableddisabled(intel_uc_uses_guc_slpc(uc)));
 
-	if (intel_uc_uses_huc(uc)) {
-		drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n",
-			 intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC),
-			 huc->fw.path,
-			 huc->fw.major_ver_found, huc->fw.minor_ver_found,
-			 "authenticated",
-			 yesno(intel_huc_is_authenticated(huc)));
-	}
-
 	return 0;
 
 	/*
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index a5af05bde6f2..62db14d41042 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -49,21 +49,21 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
  * firmware as TGL.
  */
 #define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
-	fw_def(ALDERLAKE_P,  0, guc_def(adlp, 62, 0, 3)) \
-	fw_def(ALDERLAKE_S,  0, guc_def(tgl,  62, 0, 0)) \
-	fw_def(DG1,          0, guc_def(dg1,  62, 0, 0)) \
-	fw_def(ROCKETLAKE,   0, guc_def(tgl,  62, 0, 0)) \
-	fw_def(TIGERLAKE,    0, guc_def(tgl,  62, 0, 0)) \
-	fw_def(JASPERLAKE,   0, guc_def(ehl,  62, 0, 0)) \
-	fw_def(ELKHARTLAKE,  0, guc_def(ehl,  62, 0, 0)) \
-	fw_def(ICELAKE,      0, guc_def(icl,  62, 0, 0)) \
-	fw_def(COMETLAKE,    5, guc_def(cml,  62, 0, 0)) \
-	fw_def(COMETLAKE,    0, guc_def(kbl,  62, 0, 0)) \
-	fw_def(COFFEELAKE,   0, guc_def(kbl,  62, 0, 0)) \
-	fw_def(GEMINILAKE,   0, guc_def(glk,  62, 0, 0)) \
-	fw_def(KABYLAKE,     0, guc_def(kbl,  62, 0, 0)) \
-	fw_def(BROXTON,      0, guc_def(bxt,  62, 0, 0)) \
-	fw_def(SKYLAKE,      0, guc_def(skl,  62, 0, 0))
+	fw_def(ALDERLAKE_P,  0, guc_def(adlp, 69, 0, 3)) \
+	fw_def(ALDERLAKE_S,  0, guc_def(tgl,  69, 0, 3)) \
+	fw_def(DG1,          0, guc_def(dg1,  69, 0, 3)) \
+	fw_def(ROCKETLAKE,   0, guc_def(tgl,  69, 0, 3)) \
+	fw_def(TIGERLAKE,    0, guc_def(tgl,  69, 0, 3)) \
+	fw_def(JASPERLAKE,   0, guc_def(ehl,  69, 0, 3)) \
+	fw_def(ELKHARTLAKE,  0, guc_def(ehl,  69, 0, 3)) \
+	fw_def(ICELAKE,      0, guc_def(icl,  69, 0, 3)) \
+	fw_def(COMETLAKE,    5, guc_def(cml,  69, 0, 3)) \
+	fw_def(COMETLAKE,    0, guc_def(kbl,  69, 0, 3)) \
+	fw_def(COFFEELAKE,   0, guc_def(kbl,  69, 0, 3)) \
+	fw_def(GEMINILAKE,   0, guc_def(glk,  69, 0, 3)) \
+	fw_def(KABYLAKE,     0, guc_def(kbl,  69, 0, 3)) \
+	fw_def(BROXTON,      0, guc_def(bxt,  69, 0, 3)) \
+	fw_def(SKYLAKE,      0, guc_def(skl,  69, 0, 3))
 
 #define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \
 	fw_def(ALDERLAKE_P,  0, huc_def(tgl,  7, 9, 3)) \
@@ -448,20 +448,19 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
 {
 	struct drm_i915_gem_object *obj = uc_fw->obj;
 	struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
-	struct i915_vma *dummy = &uc_fw->dummy;
+	struct i915_vma_resource *dummy = &uc_fw->dummy;
 	u32 pte_flags = 0;
 
-	dummy->node.start = uc_fw_ggtt_offset(uc_fw);
-	dummy->node.size = obj->base.size;
-	dummy->pages = obj->mm.pages;
-	dummy->vm = &ggtt->vm;
+	dummy->start = uc_fw_ggtt_offset(uc_fw);
+	dummy->node_size = obj->base.size;
+	dummy->bi.pages = obj->mm.pages;
 
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-	GEM_BUG_ON(dummy->node.size > ggtt->uc_fw.size);
+	GEM_BUG_ON(dummy->node_size > ggtt->uc_fw.size);
 
 	/* uc_fw->obj cache domains were not controlled across suspend */
 	if (i915_gem_object_has_struct_page(obj))
-		drm_clflush_sg(dummy->pages);
+		drm_clflush_sg(dummy->bi.pages);
 
 	if (i915_gem_object_is_lmem(obj))
 		pte_flags |= PTE_LM;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index d9d1dc0b4cbb..3229018877d3 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -85,7 +85,7 @@ struct intel_uc_fw {
 	 * threaded as it done during driver load (inherently single threaded)
 	 * or during a GT reset (mutex guarantees single threaded).
 	 */
-	struct i915_vma dummy;
+	struct i915_vma_resource dummy;
 	struct i915_vma *rsa_data;
 
 	/*
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index 8e65cd8258b9..94c3eb1586b0 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -84,7 +84,7 @@ static int vgpu_gem_get_pages(
 		kfree(st);
 		return ret;
 	}
-	gtt_entries = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
+	gtt_entries = (gen8_pte_t __iomem *)to_gt(dev_priv)->ggtt->gsm +
 		(fb_info->start >> PAGE_SHIFT);
 	for_each_sg(st->sgl, sg, page_num, i) {
 		dma_addr_t dma_addr =
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e0e052cdf8b8..f3141b58d912 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -170,7 +170,8 @@ i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		seq_printf(m, " (%s offset: %08llx, size: %08llx, pages: %s",
 			   stringify_vma_type(vma),
 			   vma->node.start, vma->node.size,
-			   stringify_page_sizes(vma->page_sizes.gtt, NULL, 0));
+			   stringify_page_sizes(vma->resource->page_sizes_gtt,
+						NULL, 0));
 		if (i915_vma_is_ggtt(vma) || i915_vma_is_dpt(vma)) {
 			switch (vma->ggtt_view.type) {
 			case I915_GGTT_VIEW_NORMAL:
@@ -390,9 +391,9 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
 	intel_wakeref_t wakeref;
 
 	seq_printf(m, "bit6 swizzle for X-tiling = %s\n",
-		   swizzle_string(dev_priv->ggtt.bit_6_swizzle_x));
+		   swizzle_string(to_gt(dev_priv)->ggtt->bit_6_swizzle_x));
 	seq_printf(m, "bit6 swizzle for Y-tiling = %s\n",
-		   swizzle_string(dev_priv->ggtt.bit_6_swizzle_y));
+		   swizzle_string(to_gt(dev_priv)->ggtt->bit_6_swizzle_y));
 
 	if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
 		seq_puts(m, "L-shaped memory detected\n");
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 95174938b160..5f2343389b5e 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -571,6 +571,10 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 
 	i915_perf_init(dev_priv);
 
+	ret = intel_gt_assign_ggtt(to_gt(dev_priv));
+	if (ret)
+		goto err_perf;
+
 	ret = i915_ggtt_probe_hw(dev_priv);
 	if (ret)
 		goto err_perf;
@@ -587,8 +591,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 	if (ret)
 		goto err_ggtt;
 
-	intel_gt_init_hw_early(to_gt(dev_priv), &dev_priv->ggtt);
-
 	ret = intel_gt_probe_lmem(to_gt(dev_priv));
 	if (ret)
 		goto err_mem_regions;
@@ -1146,7 +1148,7 @@ static int i915_drm_suspend(struct drm_device *dev)
 
 	/* Must be called before GGTT is suspended. */
 	intel_dpt_suspend(dev_priv);
-	i915_ggtt_suspend(&dev_priv->ggtt);
+	i915_ggtt_suspend(to_gt(dev_priv)->ggtt);
 
 	i915_save_display(dev_priv);
 
@@ -1270,7 +1272,7 @@ static int i915_drm_resume(struct drm_device *dev)
 	if (ret)
 		drm_err(&dev_priv->drm, "failed to re-enable GGTT\n");
 
-	i915_ggtt_resume(&dev_priv->ggtt);
+	i915_ggtt_resume(to_gt(dev_priv)->ggtt);
 	/* Must be called after GGTT is resumed. */
 	intel_dpt_resume(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0c70ab08fc0c..b61a1c785739 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -838,8 +838,6 @@ struct drm_i915_private {
 	struct drm_atomic_state *modeset_restore_state;
 	struct drm_modeset_acquire_ctx reset_ctx;
 
-	struct i915_ggtt ggtt; /* VM representing the global address space */
-
 	struct i915_gem_mm mm;
 
 	/* Kernel Modesetting */
@@ -1681,6 +1679,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 #define I915_GEM_OBJECT_UNBIND_BARRIER BIT(1)
 #define I915_GEM_OBJECT_UNBIND_TEST BIT(2)
 #define I915_GEM_OBJECT_UNBIND_VM_TRYLOCK BIT(3)
+#define I915_GEM_OBJECT_UNBIND_ASYNC BIT(4)
 
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
 
@@ -1759,7 +1758,7 @@ static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_objec
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 
-	return i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
+	return to_gt(i915)->ggtt->bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
 		i915_gem_object_is_tiled(obj);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 915bf431f320..3d6c00f845a3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -88,7 +88,8 @@ int
 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file)
 {
-	struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	struct drm_i915_gem_get_aperture *args = data;
 	struct i915_vma *vma;
 	u64 pinned;
@@ -155,10 +156,16 @@ try_again:
 		spin_unlock(&obj->vma.lock);
 
 		if (vma) {
+			bool vm_trylock = !!(flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK);
 			ret = -EBUSY;
-			if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
-			    !i915_vma_is_active(vma)) {
-				if (flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK) {
+			if (flags & I915_GEM_OBJECT_UNBIND_ASYNC) {
+				assert_object_held(vma->obj);
+				ret = i915_vma_unbind_async(vma, vm_trylock);
+			}
+
+			if (ret == -EBUSY && (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
+					      !i915_vma_is_active(vma))) {
+				if (vm_trylock) {
 					if (mutex_trylock(&vma->vm->mutex)) {
 						ret = __i915_vma_unbind(vma);
 						mutex_unlock(&vma->vm->mutex);
@@ -289,7 +296,7 @@ static struct i915_vma *i915_gem_gtt_prepare(struct drm_i915_gem_object *obj,
 					     bool write)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	struct i915_vma *vma;
 	struct i915_gem_ww_ctx ww;
 	int ret;
@@ -350,7 +357,7 @@ static void i915_gem_gtt_cleanup(struct drm_i915_gem_object *obj,
 				 struct i915_vma *vma)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 
 	i915_gem_object_unpin_pages(obj);
 	if (drm_mm_node_allocated(node)) {
@@ -366,7 +373,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 		   const struct drm_i915_gem_pread *args)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	intel_wakeref_t wakeref;
 	struct drm_mm_node node;
 	void __user *user_data;
@@ -522,7 +529,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 			 const struct drm_i915_gem_pwrite *args)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	struct intel_runtime_pm *rpm = &i915->runtime_pm;
 	intel_wakeref_t wakeref;
 	struct drm_mm_node node;
@@ -823,7 +830,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
 	 */
 
 	list_for_each_entry_safe(obj, on,
-				 &i915->ggtt.userfault_list, userfault_link)
+				 &to_gt(i915)->ggtt->userfault_list, userfault_link)
 		__i915_gem_object_release_mmap_gtt(obj);
 
 	/*
@@ -831,8 +838,8 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
 	 * in use by hardware (i.e. they are pinned), we should not be powering
 	 * down! All other fences will be reacquired by the user upon waking.
 	 */
-	for (i = 0; i < i915->ggtt.num_fences; i++) {
-		struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i];
+	for (i = 0; i < to_gt(i915)->ggtt->num_fences; i++) {
+		struct i915_fence_reg *reg = &to_gt(i915)->ggtt->fence_regs[i];
 
 		/*
 		 * Ideally we want to assert that the fence register is not
@@ -873,7 +880,7 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
 			    u64 size, u64 alignment, u64 flags)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	struct i915_vma *vma;
 	int ret;
 
@@ -1123,7 +1130,7 @@ err_unlock:
 
 		/* Minimal basic recovery for KMS */
 		ret = i915_ggtt_enable_hw(dev_priv);
-		i915_ggtt_resume(&dev_priv->ggtt);
+		i915_ggtt_resume(to_gt(dev_priv)->ggtt);
 		intel_init_clock_gating(dev_priv);
 	}
 
@@ -1146,7 +1153,7 @@ void i915_gem_driver_unregister(struct drm_i915_private *i915)
 
 void i915_gem_driver_remove(struct drm_i915_private *dev_priv)
 {
-	intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref);
+	intel_wakeref_auto_fini(&to_gt(dev_priv)->ggtt->userfault_wakeref);
 
 	i915_gem_suspend_late(dev_priv);
 	intel_gt_driver_remove(to_gt(dev_priv));
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index cd5f2348a187..2f2ba7a2955d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -56,7 +56,7 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 			       struct sg_table *pages)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 
 	/* XXX This does not prevent more requests being submitted! */
 	if (unlikely(ggtt->do_idle_maps))
@@ -103,7 +103,7 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm,
 	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
 	GEM_BUG_ON(range_overflows(offset, size, vm->total));
-	GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm);
+	GEM_BUG_ON(vm == &to_gt(vm->i915)->ggtt->alias->vm);
 	GEM_BUG_ON(drm_mm_node_allocated(node));
 
 	node->size = size;
@@ -201,7 +201,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 	GEM_BUG_ON(start >= end);
 	GEM_BUG_ON(start > 0  && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
-	GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm);
+	GEM_BUG_ON(vm == &to_gt(vm->i915)->ggtt->alias->vm);
 	GEM_BUG_ON(drm_mm_node_allocated(node));
 
 	if (unlikely(range_overflows(start, size, end)))
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index 7f80ad247bc8..5b8a2157d797 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -31,7 +31,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 		value = pdev->revision;
 		break;
 	case I915_PARAM_NUM_FENCES_AVAIL:
-		value = i915->ggtt.num_fences;
+		value = to_gt(i915)->ggtt->num_fences;
 		break;
 	case I915_PARAM_HAS_OVERLAY:
 		value = !!i915->overlay;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 5ae812d60abe..f8c4336cba89 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -48,7 +48,6 @@
 #include "i915_gpu_error.h"
 #include "i915_memcpy.h"
 #include "i915_scatterlist.h"
-#include "i915_vma_snapshot.h"
 
 #define ALLOW_FAIL (__GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
 #define ATOMIC_MAYFAIL (GFP_ATOMIC | __GFP_NOWARN)
@@ -1013,8 +1012,10 @@ void __i915_gpu_coredump_free(struct kref *error_ref)
 
 static struct i915_vma_coredump *
 i915_vma_coredump_create(const struct intel_gt *gt,
-			 const struct i915_vma_snapshot *vsnap,
-			 struct i915_vma_compress *compress)
+			 const struct i915_vma_resource *vma_res,
+			 struct i915_vma_compress *compress,
+			 const char *name)
+
 {
 	struct i915_ggtt *ggtt = gt->ggtt;
 	const u64 slot = ggtt->error_capture.start;
@@ -1024,7 +1025,7 @@ i915_vma_coredump_create(const struct intel_gt *gt,
 
 	might_sleep();
 
-	if (!vsnap || !vsnap->pages || !compress)
+	if (!vma_res || !vma_res->bi.pages || !compress)
 		return NULL;
 
 	dst = kmalloc(sizeof(*dst), ALLOW_FAIL);
@@ -1037,12 +1038,12 @@ i915_vma_coredump_create(const struct intel_gt *gt,
 	}
 
 	INIT_LIST_HEAD(&dst->page_list);
-	strcpy(dst->name, vsnap->name);
+	strcpy(dst->name, name);
 	dst->next = NULL;
 
-	dst->gtt_offset = vsnap->gtt_offset;
-	dst->gtt_size = vsnap->gtt_size;
-	dst->gtt_page_sizes = vsnap->page_sizes;
+	dst->gtt_offset = vma_res->start;
+	dst->gtt_size = vma_res->node_size;
+	dst->gtt_page_sizes = vma_res->page_sizes_gtt;
 	dst->unused = 0;
 
 	ret = -EINVAL;
@@ -1050,7 +1051,7 @@ i915_vma_coredump_create(const struct intel_gt *gt,
 		void __iomem *s;
 		dma_addr_t dma;
 
-		for_each_sgt_daddr(dma, iter, vsnap->pages) {
+		for_each_sgt_daddr(dma, iter, vma_res->bi.pages) {
 			mutex_lock(&ggtt->error_mutex);
 			ggtt->vm.insert_page(&ggtt->vm, dma, slot,
 					     I915_CACHE_NONE, 0);
@@ -1068,11 +1069,11 @@ i915_vma_coredump_create(const struct intel_gt *gt,
 			if (ret)
 				break;
 		}
-	} else if (vsnap->mr && vsnap->mr->type != INTEL_MEMORY_SYSTEM) {
-		struct intel_memory_region *mem = vsnap->mr;
+	} else if (vma_res->bi.lmem) {
+		struct intel_memory_region *mem = vma_res->mr;
 		dma_addr_t dma;
 
-		for_each_sgt_daddr(dma, iter, vsnap->pages) {
+		for_each_sgt_daddr(dma, iter, vma_res->bi.pages) {
 			void __iomem *s;
 
 			s = io_mapping_map_wc(&mem->iomap,
@@ -1088,7 +1089,7 @@ i915_vma_coredump_create(const struct intel_gt *gt,
 	} else {
 		struct page *page;
 
-		for_each_sgt_page(page, iter, vsnap->pages) {
+		for_each_sgt_page(page, iter, vma_res->bi.pages) {
 			void *s;
 
 			drm_clflush_pages(&page, 1);
@@ -1324,33 +1325,32 @@ static bool record_context(struct i915_gem_context_coredump *e,
 
 struct intel_engine_capture_vma {
 	struct intel_engine_capture_vma *next;
-	struct i915_vma_snapshot *vsnap;
+	struct i915_vma_resource *vma_res;
 	char name[16];
 	bool lockdep_cookie;
 };
 
 static struct intel_engine_capture_vma *
 capture_vma_snapshot(struct intel_engine_capture_vma *next,
-		     struct i915_vma_snapshot *vsnap,
-		     gfp_t gfp)
+		     struct i915_vma_resource *vma_res,
+		     gfp_t gfp, const char *name)
 {
 	struct intel_engine_capture_vma *c;
 
-	if (!i915_vma_snapshot_present(vsnap))
+	if (!vma_res)
 		return next;
 
 	c = kmalloc(sizeof(*c), gfp);
 	if (!c)
 		return next;
 
-	if (!i915_vma_snapshot_resource_pin(vsnap, &c->lockdep_cookie)) {
+	if (!i915_vma_resource_hold(vma_res, &c->lockdep_cookie)) {
 		kfree(c);
 		return next;
 	}
 
-	strcpy(c->name, vsnap->name);
-	c->vsnap = vsnap;
-	i915_vma_snapshot_get(vsnap);
+	strcpy(c->name, name);
+	c->vma_res = i915_vma_resource_get(vma_res);
 
 	c->next = next;
 	return c;
@@ -1362,8 +1362,6 @@ capture_vma(struct intel_engine_capture_vma *next,
 	    const char *name,
 	    gfp_t gfp)
 {
-	struct i915_vma_snapshot *vsnap;
-
 	if (!vma)
 		return next;
 
@@ -1372,19 +1370,10 @@ capture_vma(struct intel_engine_capture_vma *next,
 	 * to a struct i915_vma_snapshot at command submission time.
 	 * Not here.
 	 */
-	GEM_WARN_ON(!i915_vma_is_pinned(vma));
-	if (!i915_vma_is_pinned(vma))
-		return next;
-
-	vsnap = i915_vma_snapshot_alloc(gfp);
-	if (!vsnap)
+	if (GEM_WARN_ON(!i915_vma_is_pinned(vma)))
 		return next;
 
-	i915_vma_snapshot_init(vsnap, vma, name);
-	next = capture_vma_snapshot(next, vsnap, gfp);
-
-	/* FIXME: Replace on async unbind. */
-	i915_vma_snapshot_put(vsnap);
+	next = capture_vma_snapshot(next, vma->resource, gfp, name);
 
 	return next;
 }
@@ -1397,7 +1386,8 @@ capture_user(struct intel_engine_capture_vma *capture,
 	struct i915_capture_list *c;
 
 	for (c = rq->capture_list; c; c = c->next)
-		capture = capture_vma_snapshot(capture, c->vma_snapshot, gfp);
+		capture = capture_vma_snapshot(capture, c->vma_res, gfp,
+					       "user");
 
 	return capture;
 }
@@ -1415,16 +1405,19 @@ static struct i915_vma_coredump *
 create_vma_coredump(const struct intel_gt *gt, struct i915_vma *vma,
 		    const char *name, struct i915_vma_compress *compress)
 {
-	struct i915_vma_coredump *ret;
-	struct i915_vma_snapshot tmp;
+	struct i915_vma_coredump *ret = NULL;
+	struct i915_vma_resource *vma_res;
+	bool lockdep_cookie;
 
 	if (!vma)
 		return NULL;
 
-	GEM_WARN_ON(!i915_vma_is_pinned(vma));
-	i915_vma_snapshot_init_onstack(&tmp, vma, name);
-	ret = i915_vma_coredump_create(gt, &tmp, compress);
-	i915_vma_snapshot_put_onstack(&tmp);
+	vma_res = vma->resource;
+
+	if (i915_vma_resource_hold(vma_res, &lockdep_cookie)) {
+		ret = i915_vma_coredump_create(gt, vma_res, compress, name);
+		i915_vma_resource_unhold(vma_res, lockdep_cookie);
+	}
 
 	return ret;
 }
@@ -1471,7 +1464,7 @@ intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
 	 * as the simplest method to avoid being overwritten
 	 * by userspace.
 	 */
-	vma = capture_vma_snapshot(vma, &rq->batch_snapshot, gfp);
+	vma = capture_vma_snapshot(vma, rq->batch_res, gfp, "batch");
 	vma = capture_user(vma, rq, gfp);
 	vma = capture_vma(vma, rq->ring->vma, "ring", gfp);
 	vma = capture_vma(vma, rq->context->state, "HW context", gfp);
@@ -1492,14 +1485,14 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
 
 	while (capture) {
 		struct intel_engine_capture_vma *this = capture;
-		struct i915_vma_snapshot *vsnap = this->vsnap;
+		struct i915_vma_resource *vma_res = this->vma_res;
 
 		add_vma(ee,
-			i915_vma_coredump_create(engine->gt,
-						 vsnap, compress));
+			i915_vma_coredump_create(engine->gt, vma_res,
+						 compress, this->name));
 
-		i915_vma_snapshot_resource_unpin(vsnap, this->lockdep_cookie);
-		i915_vma_snapshot_put(vsnap);
+		i915_vma_resource_unhold(vma_res, this->lockdep_cookie);
+		i915_vma_resource_put(vma_res);
 
 		capture = this->next;
 		kfree(this);
diff --git a/drivers/gpu/drm/i915/i915_module.c b/drivers/gpu/drm/i915/i915_module.c
index f6bcd2f89257..a8f175960b34 100644
--- a/drivers/gpu/drm/i915/i915_module.c
+++ b/drivers/gpu/drm/i915/i915_module.c
@@ -17,6 +17,7 @@
 #include "i915_scheduler.h"
 #include "i915_selftest.h"
 #include "i915_vma.h"
+#include "i915_vma_resource.h"
 
 static int i915_check_nomodeset(void)
 {
@@ -64,6 +65,8 @@ static const struct {
 	  .exit = i915_scheduler_module_exit },
 	{ .init = i915_vma_module_init,
 	  .exit = i915_vma_module_exit },
+	{ .init = i915_vma_resource_module_init,
+	  .exit = i915_vma_resource_module_exit },
 	{ .init = i915_mock_selftests },
 	{ .init = i915_pmu_init,
 	  .exit = i915_pmu_exit },
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 170bba913c30..f9faf7697453 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1630,8 +1630,8 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
 	struct drm_i915_gem_object *bo;
 	struct i915_vma *vma;
 	const u64 delay_ticks = 0xffffffffffffffff -
-		intel_gt_ns_to_clock_interval(stream->perf->i915->ggtt.vm.gt,
-					      atomic64_read(&stream->perf->noa_programming_delay));
+		intel_gt_ns_to_clock_interval(to_gt(stream->perf->i915),
+		atomic64_read(&stream->perf->noa_programming_delay));
 	const u32 base = stream->engine->mmio_base;
 #define CS_GPR(x) GEN8_RING_CS_GPR(base, x)
 	u32 *batch, *ts0, *cs, *jump;
@@ -2114,7 +2114,7 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce,
 	u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
 	u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
 	/* The MMIO offsets for Flex EU registers aren't contiguous */
-	i915_reg_t flex_regs[] = {
+	static const i915_reg_t flex_regs[] = {
 		EU_PERF_CNTL0,
 		EU_PERF_CNTL1,
 		EU_PERF_CNTL2,
@@ -3542,7 +3542,7 @@ err:
 
 static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
 {
-	return intel_gt_clock_interval_to_ns(perf->i915->ggtt.vm.gt,
+	return intel_gt_clock_interval_to_ns(to_gt(perf->i915),
 					     2ULL << exponent);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 76cf5ac91e94..ba3a70b2cc57 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -116,8 +116,10 @@ static void i915_fence_release(struct dma_fence *fence)
 		   rq->guc_prio != GUC_PRIO_FINI);
 
 	i915_request_free_capture_list(fetch_and_zero(&rq->capture_list));
-	if (i915_vma_snapshot_present(&rq->batch_snapshot))
-		i915_vma_snapshot_put_onstack(&rq->batch_snapshot);
+	if (rq->batch_res) {
+		i915_vma_resource_put(rq->batch_res);
+		rq->batch_res = NULL;
+	}
 
 	/*
 	 * The request is put onto a RCU freelist (i.e. the address
@@ -308,7 +310,7 @@ void i915_request_free_capture_list(struct i915_capture_list *capture)
 	while (capture) {
 		struct i915_capture_list *next = capture->next;
 
-		i915_vma_snapshot_put(capture->vma_snapshot);
+		i915_vma_resource_put(capture->vma_res);
 		kfree(capture);
 		capture = next;
 	}
@@ -854,7 +856,7 @@ static void __i915_request_ctor(void *arg)
 	i915_sw_fence_init(&rq->semaphore, semaphore_notify);
 
 	clear_capture_list(rq);
-	rq->batch_snapshot.present = false;
+	rq->batch_res = NULL;
 
 	init_llist_head(&rq->execute_cb);
 }
@@ -960,7 +962,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 	__rq_init_watchdog(rq);
 	assert_capture_list_is_null(rq);
 	GEM_BUG_ON(!llist_empty(&rq->execute_cb));
-	GEM_BUG_ON(i915_vma_snapshot_present(&rq->batch_snapshot));
+	GEM_BUG_ON(rq->batch_res);
 
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 170ee78c2858..28b1f9db5487 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -40,7 +40,7 @@
 #include "i915_scheduler.h"
 #include "i915_selftest.h"
 #include "i915_sw_fence.h"
-#include "i915_vma_snapshot.h"
+#include "i915_vma_resource.h"
 
 #include <uapi/drm/i915_drm.h>
 
@@ -52,7 +52,7 @@ struct i915_request;
 
 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
 struct i915_capture_list {
-	struct i915_vma_snapshot *vma_snapshot;
+	struct i915_vma_resource *vma_res;
 	struct i915_capture_list *next;
 };
 
@@ -300,7 +300,7 @@ struct i915_request {
 	/** Batch buffer pointer for selftest internal use. */
 	I915_SELFTEST_DECLARE(struct i915_vma *batch);
 
-	struct i915_vma_snapshot batch_snapshot;
+	struct i915_vma_resource *batch_res;
 
 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
 	/**
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 29a858c53bdd..9d859b0a3fbe 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -37,6 +37,7 @@
 #include "i915_sw_fence_work.h"
 #include "i915_trace.h"
 #include "i915_vma.h"
+#include "i915_vma_resource.h"
 
 static struct kmem_cache *slab_vmas;
 
@@ -284,7 +285,7 @@ struct i915_vma_work {
 	struct dma_fence_work base;
 	struct i915_address_space *vm;
 	struct i915_vm_pt_stash stash;
-	struct i915_vma *vma;
+	struct i915_vma_resource *vma_res;
 	struct drm_i915_gem_object *pinned;
 	struct i915_sw_dma_fence_cb cb;
 	enum i915_cache_level cache_level;
@@ -294,23 +295,24 @@ struct i915_vma_work {
 static void __vma_bind(struct dma_fence_work *work)
 {
 	struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
-	struct i915_vma *vma = vw->vma;
+	struct i915_vma_resource *vma_res = vw->vma_res;
+
+	vma_res->ops->bind_vma(vma_res->vm, &vw->stash,
+			       vma_res, vw->cache_level, vw->flags);
 
-	vma->ops->bind_vma(vw->vm, &vw->stash,
-			   vma, vw->cache_level, vw->flags);
 }
 
 static void __vma_release(struct dma_fence_work *work)
 {
 	struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
 
-	if (vw->pinned) {
-		__i915_gem_object_unpin_pages(vw->pinned);
+	if (vw->pinned)
 		i915_gem_object_put(vw->pinned);
-	}
 
 	i915_vm_free_pt_stash(vw->vm, &vw->stash);
 	i915_vm_put(vw->vm);
+	if (vw->vma_res)
+		i915_vma_resource_put(vw->vma_res);
 }
 
 static const struct dma_fence_work_ops bind_ops = {
@@ -374,12 +376,27 @@ static int i915_vma_verify_bind_complete(struct i915_vma *vma)
 #define i915_vma_verify_bind_complete(_vma) 0
 #endif
 
+I915_SELFTEST_EXPORT void
+i915_vma_resource_init_from_vma(struct i915_vma_resource *vma_res,
+				struct i915_vma *vma)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+
+	i915_vma_resource_init(vma_res, vma->vm, vma->pages, &vma->page_sizes,
+			       obj->mm.rsgt, i915_gem_object_is_readonly(obj),
+			       i915_gem_object_is_lmem(obj), obj->mm.region,
+			       vma->ops, vma->private, vma->node.start,
+			       vma->node.size, vma->size);
+}
+
 /**
  * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
  * @vma: VMA to map
  * @cache_level: mapping cache level
  * @flags: flags like global or local mapping
  * @work: preallocated worker for allocating and binding the PTE
+ * @vma_res: pointer to a preallocated vma resource. The resource is either
+ * consumed or freed.
  *
  * DMA addresses are taken from the scatter-gather table of this object (or of
  * this VMA in case of non-default GGTT views) and PTE entries set up.
@@ -388,10 +405,12 @@ static int i915_vma_verify_bind_complete(struct i915_vma *vma)
 int i915_vma_bind(struct i915_vma *vma,
 		  enum i915_cache_level cache_level,
 		  u32 flags,
-		  struct i915_vma_work *work)
+		  struct i915_vma_work *work,
+		  struct i915_vma_resource *vma_res)
 {
 	u32 bind_flags;
 	u32 vma_flags;
+	int ret;
 
 	lockdep_assert_held(&vma->vm->mutex);
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
@@ -399,11 +418,15 @@ int i915_vma_bind(struct i915_vma *vma,
 
 	if (GEM_DEBUG_WARN_ON(range_overflows(vma->node.start,
 					      vma->node.size,
-					      vma->vm->total)))
+					      vma->vm->total))) {
+		i915_vma_resource_free(vma_res);
 		return -ENODEV;
+	}
 
-	if (GEM_DEBUG_WARN_ON(!flags))
+	if (GEM_DEBUG_WARN_ON(!flags)) {
+		i915_vma_resource_free(vma_res);
 		return -EINVAL;
+	}
 
 	bind_flags = flags;
 	bind_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
@@ -412,16 +435,44 @@ int i915_vma_bind(struct i915_vma *vma,
 	vma_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
 
 	bind_flags &= ~vma_flags;
-	if (bind_flags == 0)
+	if (bind_flags == 0) {
+		i915_vma_resource_free(vma_res);
 		return 0;
+	}
 
 	GEM_BUG_ON(!atomic_read(&vma->pages_count));
 
+	/* Wait for or await async unbinds touching our range */
+	if (work && bind_flags & vma->vm->bind_async_flags)
+		ret = i915_vma_resource_bind_dep_await(vma->vm,
+						       &work->base.chain,
+						       vma->node.start,
+						       vma->node.size,
+						       true,
+						       GFP_NOWAIT |
+						       __GFP_RETRY_MAYFAIL |
+						       __GFP_NOWARN);
+	else
+		ret = i915_vma_resource_bind_dep_sync(vma->vm, vma->node.start,
+						      vma->node.size, true);
+	if (ret) {
+		i915_vma_resource_free(vma_res);
+		return ret;
+	}
+
+	if (vma->resource || !vma_res) {
+		/* Rebinding with an additional I915_VMA_*_BIND */
+		GEM_WARN_ON(!vma_flags);
+		kfree(vma_res);
+	} else {
+		i915_vma_resource_init_from_vma(vma_res, vma);
+		vma->resource = vma_res;
+	}
 	trace_i915_vma_bind(vma, bind_flags);
 	if (work && bind_flags & vma->vm->bind_async_flags) {
 		struct dma_fence *prev;
 
-		work->vma = vma;
+		work->vma_res = i915_vma_resource_get(vma->resource);
 		work->cache_level = cache_level;
 		work->flags = bind_flags;
 
@@ -444,17 +495,27 @@ int i915_vma_bind(struct i915_vma *vma,
 
 		work->base.dma.error = 0; /* enable the queue_work() */
 
-		__i915_gem_object_pin_pages(vma->obj);
-		work->pinned = i915_gem_object_get(vma->obj);
+		/*
+		 * If we don't have the refcounted pages list, keep a reference
+		 * on the object to avoid waiting for the async bind to
+		 * complete in the object destruction path.
+		 */
+		if (!work->vma_res->bi.pages_rsgt)
+			work->pinned = i915_gem_object_get(vma->obj);
 	} else {
 		if (vma->obj) {
 			int ret;
 
 			ret = i915_gem_object_wait_moving_fence(vma->obj, true);
-			if (ret)
+			if (ret) {
+				i915_vma_resource_free(vma->resource);
+				vma->resource = NULL;
+
 				return ret;
+			}
 		}
-		vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
+		vma->ops->bind_vma(vma->vm, NULL, vma->resource, cache_level,
+				   bind_flags);
 	}
 
 	atomic_or(bind_flags, &vma->flags);
@@ -1224,6 +1285,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 {
 	struct i915_vma_work *work = NULL;
 	struct dma_fence *moving = NULL;
+	struct i915_vma_resource *vma_res = NULL;
 	intel_wakeref_t wakeref = 0;
 	unsigned int bound;
 	int err;
@@ -1278,6 +1340,12 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 		}
 	}
 
+	vma_res = i915_vma_resource_alloc();
+	if (IS_ERR(vma_res)) {
+		err = PTR_ERR(vma_res);
+		goto err_fence;
+	}
+
 	/*
 	 * Differentiate between user/kernel vma inside the aliasing-ppgtt.
 	 *
@@ -1298,7 +1366,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	err = mutex_lock_interruptible_nested(&vma->vm->mutex,
 					      !(flags & PIN_GLOBAL));
 	if (err)
-		goto err_fence;
+		goto err_vma_res;
 
 	/* No more allocations allowed now we hold vm->mutex */
 
@@ -1339,7 +1407,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	GEM_BUG_ON(!vma->pages);
 	err = i915_vma_bind(vma,
 			    vma->obj->cache_level,
-			    flags, work);
+			    flags, work, vma_res);
+	vma_res = NULL;
 	if (err)
 		goto err_remove;
 
@@ -1362,6 +1431,8 @@ err_active:
 	i915_active_release(&vma->active);
 err_unlock:
 	mutex_unlock(&vma->vm->mutex);
+err_vma_res:
+	kfree(vma_res);
 err_fence:
 	if (work)
 		dma_fence_work_commit_imm(&work->base);
@@ -1512,6 +1583,7 @@ void i915_vma_release(struct kref *ref)
 	i915_vm_put(vma->vm);
 
 	i915_active_fini(&vma->active);
+	GEM_WARN_ON(vma->resource);
 	i915_vma_free(vma);
 }
 
@@ -1658,8 +1730,11 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
 	return 0;
 }
 
-void __i915_vma_evict(struct i915_vma *vma)
+struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
 {
+	struct i915_vma_resource *vma_res = vma->resource;
+	struct dma_fence *unbind_fence;
+
 	GEM_BUG_ON(i915_vma_is_pinned(vma));
 
 	if (i915_vma_is_map_and_fenceable(vma)) {
@@ -1690,15 +1765,36 @@ void __i915_vma_evict(struct i915_vma *vma)
 	GEM_BUG_ON(vma->fence);
 	GEM_BUG_ON(i915_vma_has_userfault(vma));
 
-	if (likely(atomic_read(&vma->vm->open))) {
-		trace_i915_vma_unbind(vma);
-		vma->ops->unbind_vma(vma->vm, vma);
-	}
+	/* Object backend must be async capable. */
+	GEM_WARN_ON(async && !vma->resource->bi.pages_rsgt);
+
+	/* If vm is not open, unbind is a nop. */
+	vma_res->needs_wakeref = i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND) &&
+		atomic_read(&vma->vm->open);
+	trace_i915_vma_unbind(vma);
+
+	unbind_fence = i915_vma_resource_unbind(vma_res);
+	vma->resource = NULL;
+
 	atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
 		   &vma->flags);
 
 	i915_vma_detach(vma);
+
+	if (!async && unbind_fence) {
+		dma_fence_wait(unbind_fence, false);
+		dma_fence_put(unbind_fence);
+		unbind_fence = NULL;
+	}
+
+	/*
+	 * Binding itself may not have completed until the unbind fence signals,
+	 * so don't drop the pages until that happens, unless the resource is
+	 * async_capable.
+	 */
+
 	vma_unbind_pages(vma);
+	return unbind_fence;
 }
 
 int __i915_vma_unbind(struct i915_vma *vma)
@@ -1725,12 +1821,47 @@ int __i915_vma_unbind(struct i915_vma *vma)
 		return ret;
 
 	GEM_BUG_ON(i915_vma_is_active(vma));
-	__i915_vma_evict(vma);
+	__i915_vma_evict(vma, false);
 
 	drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */
 	return 0;
 }
 
+static struct dma_fence *__i915_vma_unbind_async(struct i915_vma *vma)
+{
+	struct dma_fence *fence;
+
+	lockdep_assert_held(&vma->vm->mutex);
+
+	if (!drm_mm_node_allocated(&vma->node))
+		return NULL;
+
+	if (i915_vma_is_pinned(vma) ||
+	    &vma->obj->mm.rsgt->table != vma->resource->bi.pages)
+		return ERR_PTR(-EAGAIN);
+
+	/*
+	 * We probably need to replace this with awaiting the fences of the
+	 * object's dma_resv when the vma active goes away. When doing that
+	 * we need to be careful to not add the vma_resource unbind fence
+	 * immediately to the object's dma_resv, because then unbinding
+	 * the next vma from the object, in case there are many, will
+	 * actually await the unbinding of the previous vmas, which is
+	 * undesirable.
+	 */
+	if (i915_sw_fence_await_active(&vma->resource->chain, &vma->active,
+				       I915_ACTIVE_AWAIT_EXCL |
+				       I915_ACTIVE_AWAIT_ACTIVE) < 0) {
+		return ERR_PTR(-EBUSY);
+	}
+
+	fence = __i915_vma_evict(vma, true);
+
+	drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */
+
+	return fence;
+}
+
 int i915_vma_unbind(struct i915_vma *vma)
 {
 	struct i915_address_space *vm = vma->vm;
@@ -1767,6 +1898,68 @@ out_rpm:
 	return err;
 }
 
+int i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+	struct i915_address_space *vm = vma->vm;
+	intel_wakeref_t wakeref = 0;
+	struct dma_fence *fence;
+	int err;
+
+	/*
+	 * We need the dma-resv lock since we add the
+	 * unbind fence to the dma-resv object.
+	 */
+	assert_object_held(obj);
+
+	if (!drm_mm_node_allocated(&vma->node))
+		return 0;
+
+	if (i915_vma_is_pinned(vma)) {
+		vma_print_allocator(vma, "is pinned");
+		return -EAGAIN;
+	}
+
+	if (!obj->mm.rsgt)
+		return -EBUSY;
+
+	err = dma_resv_reserve_shared(obj->base.resv, 1);
+	if (err)
+		return -EBUSY;
+
+	/*
+	 * It would be great if we could grab this wakeref from the
+	 * async unbind work if needed, but we can't because it uses
+	 * kmalloc and it's in the dma-fence signalling critical path.
+	 */
+	if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
+		wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
+
+	if (trylock_vm && !mutex_trylock(&vm->mutex)) {
+		err = -EBUSY;
+		goto out_rpm;
+	} else if (!trylock_vm) {
+		err = mutex_lock_interruptible_nested(&vm->mutex, !wakeref);
+		if (err)
+			goto out_rpm;
+	}
+
+	fence = __i915_vma_unbind_async(vma);
+	mutex_unlock(&vm->mutex);
+	if (IS_ERR_OR_NULL(fence)) {
+		err = PTR_ERR_OR_ZERO(fence);
+		goto out_rpm;
+	}
+
+	dma_resv_add_shared_fence(obj->base.resv, fence);
+	dma_fence_put(fence);
+
+out_rpm:
+	if (wakeref)
+		intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
+	return err;
+}
+
 struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma)
 {
 	i915_gem_object_make_unshrinkable(vma->obj);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 32719431b3df..a560bae04e7e 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -37,6 +37,7 @@
 
 #include "i915_active.h"
 #include "i915_request.h"
+#include "i915_vma_resource.h"
 #include "i915_vma_types.h"
 
 struct i915_vma *
@@ -204,16 +205,18 @@ struct i915_vma_work *i915_vma_work(void);
 int i915_vma_bind(struct i915_vma *vma,
 		  enum i915_cache_level cache_level,
 		  u32 flags,
-		  struct i915_vma_work *work);
+		  struct i915_vma_work *work,
+		  struct i915_vma_resource *vma_res);
 
 bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color);
 bool i915_vma_misplaced(const struct i915_vma *vma,
 			u64 size, u64 alignment, u64 flags);
 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
 void i915_vma_revoke_mmap(struct i915_vma *vma);
-void __i915_vma_evict(struct i915_vma *vma);
+struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async);
 int __i915_vma_unbind(struct i915_vma *vma);
 int __must_check i915_vma_unbind(struct i915_vma *vma);
+int __must_check i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm);
 void i915_vma_unlink_ctx(struct i915_vma *vma);
 void i915_vma_close(struct i915_vma *vma);
 void i915_vma_reopen(struct i915_vma *vma);
@@ -337,12 +340,6 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
  */
 void i915_vma_unpin_iomap(struct i915_vma *vma);
 
-static inline struct page *i915_vma_first_page(struct i915_vma *vma)
-{
-	GEM_BUG_ON(!vma->pages);
-	return sg_page(vma->pages->sgl);
-}
-
 /**
  * i915_vma_pin_fence - pin fencing state
  * @vma: vma to pin fencing for
@@ -428,6 +425,26 @@ static inline int i915_vma_sync(struct i915_vma *vma)
 	return i915_active_wait(&vma->active);
 }
 
+/**
+ * i915_vma_get_current_resource - Get the current resource of the vma
+ * @vma: The vma to get the current resource from.
+ *
+ * It's illegal to call this function if the vma is not bound.
+ *
+ * Return: A refcounted pointer to the current vma resource
+ * of the vma, assuming the vma is bound.
+ */
+static inline struct i915_vma_resource *
+i915_vma_get_current_resource(struct i915_vma *vma)
+{
+	return i915_vma_resource_get(vma->resource);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+void i915_vma_resource_init_from_vma(struct i915_vma_resource *vma_res,
+				     struct i915_vma *vma);
+#endif
+
 void i915_vma_module_exit(void);
 int i915_vma_module_init(void);
 
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.c b/drivers/gpu/drm/i915/i915_vma_resource.c
new file mode 100644
index 000000000000..1f41c0c699eb
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_vma_resource.c
@@ -0,0 +1,417 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <linux/interval_tree_generic.h>
+#include <linux/slab.h>
+
+#include "i915_sw_fence.h"
+#include "i915_vma_resource.h"
+#include "i915_drv.h"
+#include "intel_memory_region.h"
+
+#include "gt/intel_gtt.h"
+
+static struct kmem_cache *slab_vma_resources;
+
+/**
+ * DOC:
+ * We use a per-vm interval tree to keep track of vma_resources
+ * scheduled for unbind but not yet unbound. The tree is protected by
+ * the vm mutex, and nodes are removed just after the unbind fence signals.
+ * The removal takes the vm mutex from a kernel thread which we need to
+ * keep in mind so that we don't grab the mutex and try to wait for all
+ * pending unbinds to complete, because that will temporaryily block many
+ * of the workqueue threads, and people will get angry.
+ *
+ * We should consider using a single ordered fence per VM instead but that
+ * requires ordering the unbinds and might introduce unnecessary waiting
+ * for unrelated unbinds. Amount of code will probably be roughly the same
+ * due to the simplicity of using the interval tree interface.
+ *
+ * Another drawback of this interval tree is that the complexity of insertion
+ * and removal of fences increases as O(ln(pending_unbinds)) instead of
+ * O(1) for a single fence without interval tree.
+ */
+#define VMA_RES_START(_node) ((_node)->start)
+#define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size - 1)
+INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb,
+		     u64, __subtree_last,
+		     VMA_RES_START, VMA_RES_LAST, static, vma_res_itree);
+
+/* Callbacks for the unbind dma-fence. */
+
+/**
+ * i915_vma_resource_alloc - Allocate a vma resource
+ *
+ * Return: A pointer to a cleared struct i915_vma_resource or
+ * a -ENOMEM error pointer if allocation fails.
+ */
+struct i915_vma_resource *i915_vma_resource_alloc(void)
+{
+	struct i915_vma_resource *vma_res =
+		kmem_cache_zalloc(slab_vma_resources, GFP_KERNEL);
+
+	return vma_res ? vma_res : ERR_PTR(-ENOMEM);
+}
+
+/**
+ * i915_vma_resource_free - Free a vma resource
+ * @vma_res: The vma resource to free.
+ */
+void i915_vma_resource_free(struct i915_vma_resource *vma_res)
+{
+	kmem_cache_free(slab_vma_resources, vma_res);
+}
+
+static const char *get_driver_name(struct dma_fence *fence)
+{
+	return "vma unbind fence";
+}
+
+static const char *get_timeline_name(struct dma_fence *fence)
+{
+	return "unbound";
+}
+
+static void unbind_fence_free_rcu(struct rcu_head *head)
+{
+	struct i915_vma_resource *vma_res =
+		container_of(head, typeof(*vma_res), unbind_fence.rcu);
+
+	i915_vma_resource_free(vma_res);
+}
+
+static void unbind_fence_release(struct dma_fence *fence)
+{
+	struct i915_vma_resource *vma_res =
+		container_of(fence, typeof(*vma_res), unbind_fence);
+
+	i915_sw_fence_fini(&vma_res->chain);
+
+	call_rcu(&fence->rcu, unbind_fence_free_rcu);
+}
+
+static struct dma_fence_ops unbind_fence_ops = {
+	.get_driver_name = get_driver_name,
+	.get_timeline_name = get_timeline_name,
+	.release = unbind_fence_release,
+};
+
+static void __i915_vma_resource_unhold(struct i915_vma_resource *vma_res)
+{
+	struct i915_address_space *vm;
+
+	if (!refcount_dec_and_test(&vma_res->hold_count))
+		return;
+
+	dma_fence_signal(&vma_res->unbind_fence);
+
+	vm = vma_res->vm;
+	if (vma_res->wakeref)
+		intel_runtime_pm_put(&vm->i915->runtime_pm, vma_res->wakeref);
+
+	vma_res->vm = NULL;
+	if (!RB_EMPTY_NODE(&vma_res->rb)) {
+		mutex_lock(&vm->mutex);
+		vma_res_itree_remove(vma_res, &vm->pending_unbind);
+		mutex_unlock(&vm->mutex);
+	}
+
+	if (vma_res->bi.pages_rsgt)
+		i915_refct_sgt_put(vma_res->bi.pages_rsgt);
+}
+
+/**
+ * i915_vma_resource_unhold - Unhold the signaling of the vma resource unbind
+ * fence.
+ * @vma_res: The vma resource.
+ * @lockdep_cookie: The lockdep cookie returned from i915_vma_resource_hold.
+ *
+ * The function may leave a dma_fence critical section.
+ */
+void i915_vma_resource_unhold(struct i915_vma_resource *vma_res,
+			      bool lockdep_cookie)
+{
+	dma_fence_end_signalling(lockdep_cookie);
+
+	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+		unsigned long irq_flags;
+
+		/* Inefficient open-coded might_lock_irqsave() */
+		spin_lock_irqsave(&vma_res->lock, irq_flags);
+		spin_unlock_irqrestore(&vma_res->lock, irq_flags);
+	}
+
+	__i915_vma_resource_unhold(vma_res);
+}
+
+/**
+ * i915_vma_resource_hold - Hold the signaling of the vma resource unbind fence.
+ * @vma_res: The vma resource.
+ * @lockdep_cookie: Pointer to a bool serving as a lockdep cooke that should
+ * be given as an argument to the pairing i915_vma_resource_unhold.
+ *
+ * If returning true, the function enters a dma_fence signalling critical
+ * section if not in one already.
+ *
+ * Return: true if holding successful, false if not.
+ */
+bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
+			    bool *lockdep_cookie)
+{
+	bool held = refcount_inc_not_zero(&vma_res->hold_count);
+
+	if (held)
+		*lockdep_cookie = dma_fence_begin_signalling();
+
+	return held;
+}
+
+static void i915_vma_resource_unbind_work(struct work_struct *work)
+{
+	struct i915_vma_resource *vma_res =
+		container_of(work, typeof(*vma_res), work);
+	struct i915_address_space *vm = vma_res->vm;
+	bool lockdep_cookie;
+
+	lockdep_cookie = dma_fence_begin_signalling();
+	if (likely(atomic_read(&vm->open)))
+		vma_res->ops->unbind_vma(vm, vma_res);
+
+	dma_fence_end_signalling(lockdep_cookie);
+	__i915_vma_resource_unhold(vma_res);
+	i915_vma_resource_put(vma_res);
+}
+
+static int
+i915_vma_resource_fence_notify(struct i915_sw_fence *fence,
+			       enum i915_sw_fence_notify state)
+{
+	struct i915_vma_resource *vma_res =
+		container_of(fence, typeof(*vma_res), chain);
+	struct dma_fence *unbind_fence =
+		&vma_res->unbind_fence;
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		dma_fence_get(unbind_fence);
+		if (vma_res->immediate_unbind) {
+			i915_vma_resource_unbind_work(&vma_res->work);
+		} else {
+			INIT_WORK(&vma_res->work, i915_vma_resource_unbind_work);
+			queue_work(system_unbound_wq, &vma_res->work);
+		}
+		break;
+	case FENCE_FREE:
+		i915_vma_resource_put(vma_res);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+/**
+ * i915_vma_resource_unbind - Unbind a vma resource
+ * @vma_res: The vma resource to unbind.
+ *
+ * At this point this function does little more than publish a fence that
+ * signals immediately unless signaling is held back.
+ *
+ * Return: A refcounted pointer to a dma-fence that signals when unbinding is
+ * complete.
+ */
+struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res)
+{
+	struct i915_address_space *vm = vma_res->vm;
+
+	/* Reference for the sw fence */
+	i915_vma_resource_get(vma_res);
+
+	/* Caller must already have a wakeref in this case. */
+	if (vma_res->needs_wakeref)
+		vma_res->wakeref = intel_runtime_pm_get_if_in_use(&vm->i915->runtime_pm);
+
+	if (atomic_read(&vma_res->chain.pending) <= 1) {
+		RB_CLEAR_NODE(&vma_res->rb);
+		vma_res->immediate_unbind = 1;
+	} else {
+		vma_res_itree_insert(vma_res, &vma_res->vm->pending_unbind);
+	}
+
+	i915_sw_fence_commit(&vma_res->chain);
+
+	return &vma_res->unbind_fence;
+}
+
+/**
+ * __i915_vma_resource_init - Initialize a vma resource.
+ * @vma_res: The vma resource to initialize
+ *
+ * Initializes the private members of a vma resource.
+ */
+void __i915_vma_resource_init(struct i915_vma_resource *vma_res)
+{
+	spin_lock_init(&vma_res->lock);
+	dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops,
+		       &vma_res->lock, 0, 0);
+	refcount_set(&vma_res->hold_count, 1);
+	i915_sw_fence_init(&vma_res->chain, i915_vma_resource_fence_notify);
+}
+
+static void
+i915_vma_resource_color_adjust_range(struct i915_address_space *vm,
+				     u64 *start,
+				     u64 *end)
+{
+	if (i915_vm_has_cache_coloring(vm)) {
+		if (*start)
+			*start -= I915_GTT_PAGE_SIZE;
+		*end += I915_GTT_PAGE_SIZE;
+	}
+}
+
+/**
+ * i915_vma_resource_bind_dep_sync - Wait for / sync all unbinds touching a
+ * certain vm range.
+ * @vm: The vm to look at.
+ * @offset: The range start.
+ * @size: The range size.
+ * @intr: Whether to wait interrubtible.
+ *
+ * The function needs to be called with the vm lock held.
+ *
+ * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
+ */
+int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm,
+				    u64 offset,
+				    u64 size,
+				    bool intr)
+{
+	struct i915_vma_resource *node;
+	u64 last = offset + size - 1;
+
+	lockdep_assert_held(&vm->mutex);
+	might_sleep();
+
+	i915_vma_resource_color_adjust_range(vm, &offset, &last);
+	node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
+	while (node) {
+		int ret = dma_fence_wait(&node->unbind_fence, intr);
+
+		if (ret)
+			return ret;
+
+		node = vma_res_itree_iter_next(node, offset, last);
+	}
+
+	return 0;
+}
+
+/**
+ * i915_vma_resource_bind_dep_sync_all - Wait for / sync all unbinds of a vm,
+ * releasing the vm lock while waiting.
+ * @vm: The vm to look at.
+ *
+ * The function may not be called with the vm lock held.
+ * Typically this is called at vm destruction to finish any pending
+ * unbind operations. The vm mutex is released while waiting to avoid
+ * stalling kernel workqueues trying to grab the mutex.
+ */
+void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm)
+{
+	struct i915_vma_resource *node;
+	struct dma_fence *fence;
+
+	do {
+		fence = NULL;
+		mutex_lock(&vm->mutex);
+		node = vma_res_itree_iter_first(&vm->pending_unbind, 0,
+						U64_MAX);
+		if (node)
+			fence = dma_fence_get_rcu(&node->unbind_fence);
+		mutex_unlock(&vm->mutex);
+
+		if (fence) {
+			/*
+			 * The wait makes sure the node eventually removes
+			 * itself from the tree.
+			 */
+			dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+		}
+	} while (node);
+}
+
+/**
+ * i915_vma_resource_bind_dep_await - Have a struct i915_sw_fence await all
+ * pending unbinds in a certain range of a vm.
+ * @vm: The vm to look at.
+ * @sw_fence: The struct i915_sw_fence that will be awaiting the unbinds.
+ * @offset: The range start.
+ * @size: The range size.
+ * @intr: Whether to wait interrubtible.
+ * @gfp: Allocation mode for memory allocations.
+ *
+ * The function makes @sw_fence await all pending unbinds in a certain
+ * vm range before calling the complete notifier. To be able to await
+ * each individual unbind, the function needs to allocate memory using
+ * the @gpf allocation mode. If that fails, the function will instead
+ * wait for the unbind fence to signal, using @intr to judge whether to
+ * wait interruptible or not. Note that @gfp should ideally be selected so
+ * as to avoid any expensive memory allocation stalls and rather fail and
+ * synchronize itself. For now the vm mutex is required when calling this
+ * function with means that @gfp can't call into direct reclaim. In reality
+ * this means that during heavy memory pressure, we will sync in this
+ * function.
+ *
+ * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
+ */
+int i915_vma_resource_bind_dep_await(struct i915_address_space *vm,
+				     struct i915_sw_fence *sw_fence,
+				     u64 offset,
+				     u64 size,
+				     bool intr,
+				     gfp_t gfp)
+{
+	struct i915_vma_resource *node;
+	u64 last = offset + size - 1;
+
+	lockdep_assert_held(&vm->mutex);
+	might_alloc(gfp);
+	might_sleep();
+
+	i915_vma_resource_color_adjust_range(vm, &offset, &last);
+	node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
+	while (node) {
+		int ret;
+
+		ret = i915_sw_fence_await_dma_fence(sw_fence,
+						    &node->unbind_fence,
+						    0, gfp);
+		if (ret < 0) {
+			ret = dma_fence_wait(&node->unbind_fence, intr);
+			if (ret)
+				return ret;
+		}
+
+		node = vma_res_itree_iter_next(node, offset, last);
+	}
+
+	return 0;
+}
+
+void i915_vma_resource_module_exit(void)
+{
+	kmem_cache_destroy(slab_vma_resources);
+}
+
+int __init i915_vma_resource_module_init(void)
+{
+	slab_vma_resources = KMEM_CACHE(i915_vma_resource, SLAB_HWCACHE_ALIGN);
+	if (!slab_vma_resources)
+		return -ENOMEM;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.h b/drivers/gpu/drm/i915/i915_vma_resource.h
new file mode 100644
index 000000000000..25913913baa6
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_vma_resource.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef __I915_VMA_RESOURCE_H__
+#define __I915_VMA_RESOURCE_H__
+
+#include <linux/dma-fence.h>
+#include <linux/refcount.h>
+
+#include "i915_gem.h"
+#include "i915_scatterlist.h"
+#include "i915_sw_fence.h"
+#include "intel_runtime_pm.h"
+
+struct intel_memory_region;
+
+struct i915_page_sizes {
+	/**
+	 * The sg mask of the pages sg_table. i.e the mask of
+	 * the lengths for each sg entry.
+	 */
+	unsigned int phys;
+
+	/**
+	 * The gtt page sizes we are allowed to use given the
+	 * sg mask and the supported page sizes. This will
+	 * express the smallest unit we can use for the whole
+	 * object, as well as the larger sizes we may be able
+	 * to use opportunistically.
+	 */
+	unsigned int sg;
+};
+
+/**
+ * struct i915_vma_resource - Snapshotted unbind information.
+ * @unbind_fence: Fence to mark unbinding complete. Note that this fence
+ * is not considered published until unbind is scheduled, and as such it
+ * is illegal to access this fence before scheduled unbind other than
+ * for refcounting.
+ * @lock: The @unbind_fence lock.
+ * @hold_count: Number of holders blocking the fence from finishing.
+ * The vma itself is keeping a hold, which is released when unbind
+ * is scheduled.
+ * @work: Work struct for deferred unbind work.
+ * @chain: Pointer to struct i915_sw_fence used to await dependencies.
+ * @rb: Rb node for the vm's pending unbind interval tree.
+ * @__subtree_last: Interval tree private member.
+ * @vm: non-refcounted pointer to the vm. This is for internal use only and
+ * this member is cleared after vm_resource unbind.
+ * @mr: The memory region of the object pointed to by the vma.
+ * @ops: Pointer to the backend i915_vma_ops.
+ * @private: Bind backend private info.
+ * @start: Offset into the address space of bind range start.
+ * @node_size: Size of the allocated range manager node.
+ * @vma_size: Bind size.
+ * @page_sizes_gtt: Resulting page sizes from the bind operation.
+ * @bound_flags: Flags indicating binding status.
+ * @allocated: Backend private data. TODO: Should move into @private.
+ * @immediate_unbind: Unbind can be done immediately and doesn't need to be
+ * deferred to a work item awaiting unsignaled fences. This is a hack.
+ * (dma_fence_work uses a fence flag for this, but this seems slightly
+ * cleaner).
+ *
+ * The lifetime of a struct i915_vma_resource is from a binding request to
+ * the actual possible asynchronous unbind has completed.
+ */
+struct i915_vma_resource {
+	struct dma_fence unbind_fence;
+	/* See above for description of the lock. */
+	spinlock_t lock;
+	refcount_t hold_count;
+	struct work_struct work;
+	struct i915_sw_fence chain;
+	struct rb_node rb;
+	u64 __subtree_last;
+	struct i915_address_space *vm;
+	intel_wakeref_t wakeref;
+
+	/**
+	 * struct i915_vma_bindinfo - Information needed for async bind
+	 * only but that can be dropped after the bind has taken place.
+	 * Consider making this a separate argument to the bind_vma
+	 * op, coalescing with other arguments like vm, stash, cache_level
+	 * and flags
+	 * @pages: The pages sg-table.
+	 * @page_sizes: Page sizes of the pages.
+	 * @pages_rsgt: Refcounted sg-table when delayed object destruction
+	 * is supported. May be NULL.
+	 * @readonly: Whether the vma should be bound read-only.
+	 * @lmem: Whether the vma points to lmem.
+	 */
+	struct i915_vma_bindinfo {
+		struct sg_table *pages;
+		struct i915_page_sizes page_sizes;
+		struct i915_refct_sgt *pages_rsgt;
+		bool readonly:1;
+		bool lmem:1;
+	} bi;
+
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+	struct intel_memory_region *mr;
+#endif
+	const struct i915_vma_ops *ops;
+	void *private;
+	u64 start;
+	u64 node_size;
+	u64 vma_size;
+	u32 page_sizes_gtt;
+
+	u32 bound_flags;
+	bool allocated:1;
+	bool immediate_unbind:1;
+	bool needs_wakeref:1;
+};
+
+bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
+			    bool *lockdep_cookie);
+
+void i915_vma_resource_unhold(struct i915_vma_resource *vma_res,
+			      bool lockdep_cookie);
+
+struct i915_vma_resource *i915_vma_resource_alloc(void);
+
+void i915_vma_resource_free(struct i915_vma_resource *vma_res);
+
+struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res);
+
+void __i915_vma_resource_init(struct i915_vma_resource *vma_res);
+
+/**
+ * i915_vma_resource_get - Take a reference on a vma resource
+ * @vma_res: The vma resource on which to take a reference.
+ *
+ * Return: The @vma_res pointer
+ */
+static inline struct i915_vma_resource
+*i915_vma_resource_get(struct i915_vma_resource *vma_res)
+{
+	dma_fence_get(&vma_res->unbind_fence);
+	return vma_res;
+}
+
+/**
+ * i915_vma_resource_put - Release a reference to a struct i915_vma_resource
+ * @vma_res: The resource
+ */
+static inline void i915_vma_resource_put(struct i915_vma_resource *vma_res)
+{
+	dma_fence_put(&vma_res->unbind_fence);
+}
+
+/**
+ * i915_vma_resource_init - Initialize a vma resource.
+ * @vma_res: The vma resource to initialize
+ * @vm: Pointer to the vm.
+ * @pages: The pages sg-table.
+ * @page_sizes: Page sizes of the pages.
+ * @pages_rsgt: Pointer to a struct i915_refct_sgt of an object with
+ * delayed destruction.
+ * @readonly: Whether the vma should be bound read-only.
+ * @lmem: Whether the vma points to lmem.
+ * @mr: The memory region of the object the vma points to.
+ * @ops: The backend ops.
+ * @private: Bind backend private info.
+ * @start: Offset into the address space of bind range start.
+ * @node_size: Size of the allocated range manager node.
+ * @size: Bind size.
+ *
+ * Initializes a vma resource allocated using i915_vma_resource_alloc().
+ * The reason for having separate allocate and initialize function is that
+ * initialization may need to be performed from under a lock where
+ * allocation is not allowed.
+ */
+static inline void i915_vma_resource_init(struct i915_vma_resource *vma_res,
+					  struct i915_address_space *vm,
+					  struct sg_table *pages,
+					  const struct i915_page_sizes *page_sizes,
+					  struct i915_refct_sgt *pages_rsgt,
+					  bool readonly,
+					  bool lmem,
+					  struct intel_memory_region *mr,
+					  const struct i915_vma_ops *ops,
+					  void *private,
+					  u64 start,
+					  u64 node_size,
+					  u64 size)
+{
+	__i915_vma_resource_init(vma_res);
+	vma_res->vm = vm;
+	vma_res->bi.pages = pages;
+	vma_res->bi.page_sizes = *page_sizes;
+	if (pages_rsgt)
+		vma_res->bi.pages_rsgt = i915_refct_sgt_get(pages_rsgt);
+	vma_res->bi.readonly = readonly;
+	vma_res->bi.lmem = lmem;
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+	vma_res->mr = mr;
+#endif
+	vma_res->ops = ops;
+	vma_res->private = private;
+	vma_res->start = start;
+	vma_res->node_size = node_size;
+	vma_res->vma_size = size;
+}
+
+static inline void i915_vma_resource_fini(struct i915_vma_resource *vma_res)
+{
+	GEM_BUG_ON(refcount_read(&vma_res->hold_count) != 1);
+	if (vma_res->bi.pages_rsgt)
+		i915_refct_sgt_put(vma_res->bi.pages_rsgt);
+	i915_sw_fence_fini(&vma_res->chain);
+}
+
+int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm,
+				    u64 first,
+				    u64 last,
+				    bool intr);
+
+int i915_vma_resource_bind_dep_await(struct i915_address_space *vm,
+				     struct i915_sw_fence *sw_fence,
+				     u64 first,
+				     u64 last,
+				     bool intr,
+				     gfp_t gfp);
+
+void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm);
+
+void i915_vma_resource_module_exit(void);
+
+int i915_vma_resource_module_init(void);
+
+#endif
diff --git a/drivers/gpu/drm/i915/i915_vma_snapshot.c b/drivers/gpu/drm/i915/i915_vma_snapshot.c
deleted file mode 100644
index 2949ceea9884..000000000000
--- a/drivers/gpu/drm/i915/i915_vma_snapshot.c
+++ /dev/null
@@ -1,134 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2021 Intel Corporation
- */
-
-#include "i915_vma_snapshot.h"
-#include "i915_vma_types.h"
-#include "i915_vma.h"
-
-/**
- * i915_vma_snapshot_init - Initialize a struct i915_vma_snapshot from
- * a struct i915_vma.
- * @vsnap: The i915_vma_snapshot to init.
- * @vma: A struct i915_vma used to initialize @vsnap.
- * @name: Name associated with the snapshot. The character pointer needs to
- * stay alive over the lifitime of the shapsot
- */
-void i915_vma_snapshot_init(struct i915_vma_snapshot *vsnap,
-			    struct i915_vma *vma,
-			    const char *name)
-{
-	if (!i915_vma_is_pinned(vma))
-		assert_object_held(vma->obj);
-
-	vsnap->name = name;
-	vsnap->size = vma->size;
-	vsnap->obj_size = vma->obj->base.size;
-	vsnap->gtt_offset = vma->node.start;
-	vsnap->gtt_size = vma->node.size;
-	vsnap->page_sizes = vma->page_sizes.gtt;
-	vsnap->pages = vma->pages;
-	vsnap->pages_rsgt = NULL;
-	vsnap->mr = NULL;
-	if (vma->obj->mm.rsgt)
-		vsnap->pages_rsgt = i915_refct_sgt_get(vma->obj->mm.rsgt);
-	vsnap->mr = vma->obj->mm.region;
-	kref_init(&vsnap->kref);
-	vsnap->vma_resource = &vma->active;
-	vsnap->onstack = false;
-	vsnap->present = true;
-}
-
-/**
- * i915_vma_snapshot_init_onstack - Initialize a struct i915_vma_snapshot from
- * a struct i915_vma, but avoid kfreeing it on last put.
- * @vsnap: The i915_vma_snapshot to init.
- * @vma: A struct i915_vma used to initialize @vsnap.
- * @name: Name associated with the snapshot. The character pointer needs to
- * stay alive over the lifitime of the shapsot
- */
-void i915_vma_snapshot_init_onstack(struct i915_vma_snapshot *vsnap,
-				    struct i915_vma *vma,
-				    const char *name)
-{
-	i915_vma_snapshot_init(vsnap, vma, name);
-	vsnap->onstack = true;
-}
-
-static void vma_snapshot_release(struct kref *ref)
-{
-	struct i915_vma_snapshot *vsnap =
-		container_of(ref, typeof(*vsnap), kref);
-
-	vsnap->present = false;
-	if (vsnap->pages_rsgt)
-		i915_refct_sgt_put(vsnap->pages_rsgt);
-	if (!vsnap->onstack)
-		kfree(vsnap);
-}
-
-/**
- * i915_vma_snapshot_put - Put an i915_vma_snapshot pointer reference
- * @vsnap: The pointer reference
- */
-void i915_vma_snapshot_put(struct i915_vma_snapshot *vsnap)
-{
-	kref_put(&vsnap->kref, vma_snapshot_release);
-}
-
-/**
- * i915_vma_snapshot_put_onstack - Put an onstcak i915_vma_snapshot pointer
- * reference and varify that the structure is released
- * @vsnap: The pointer reference
- *
- * This function is intended to be paired with a i915_vma_init_onstack()
- * and should be called before exiting the scope that declared or
- * freeing the structure that embedded @vsnap to verify that all references
- * have been released.
- */
-void i915_vma_snapshot_put_onstack(struct i915_vma_snapshot *vsnap)
-{
-	if (!kref_put(&vsnap->kref, vma_snapshot_release))
-		GEM_BUG_ON(1);
-}
-
-/**
- * i915_vma_snapshot_resource_pin - Temporarily block the memory the
- * vma snapshot is pointing to from being released.
- * @vsnap: The vma snapshot.
- * @lockdep_cookie: Pointer to bool needed for lockdep support. This needs
- * to be passed to the paired i915_vma_snapshot_resource_unpin.
- *
- * This function will temporarily try to hold up a fence or similar structure
- * and will therefore enter a fence signaling critical section.
- *
- * Return: true if we succeeded in blocking the memory from being released,
- * false otherwise.
- */
-bool i915_vma_snapshot_resource_pin(struct i915_vma_snapshot *vsnap,
-				    bool *lockdep_cookie)
-{
-	bool pinned = i915_active_acquire_if_busy(vsnap->vma_resource);
-
-	if (pinned)
-		*lockdep_cookie = dma_fence_begin_signalling();
-
-	return pinned;
-}
-
-/**
- * i915_vma_snapshot_resource_unpin - Unblock vma snapshot memory from
- * being released.
- * @vsnap: The vma snapshot.
- * @lockdep_cookie: Cookie returned from matching i915_vma_resource_pin().
- *
- * Might leave a fence signalling critical section and signal a fence.
- */
-void i915_vma_snapshot_resource_unpin(struct i915_vma_snapshot *vsnap,
-				      bool lockdep_cookie)
-{
-	dma_fence_end_signalling(lockdep_cookie);
-
-	return i915_active_release(vsnap->vma_resource);
-}
diff --git a/drivers/gpu/drm/i915/i915_vma_snapshot.h b/drivers/gpu/drm/i915/i915_vma_snapshot.h
deleted file mode 100644
index 940581df4622..000000000000
--- a/drivers/gpu/drm/i915/i915_vma_snapshot.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2021 Intel Corporation
- */
-#ifndef _I915_VMA_SNAPSHOT_H_
-#define _I915_VMA_SNAPSHOT_H_
-
-#include <linux/kref.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-
-struct i915_active;
-struct i915_refct_sgt;
-struct i915_vma;
-struct intel_memory_region;
-struct sg_table;
-
-/**
- * DOC: Simple utilities for snapshotting GPU vma metadata, later used for
- * error capture. Vi use a separate header for this to avoid issues due to
- * recursive header includes.
- */
-
-/**
- * struct i915_vma_snapshot - Snapshot of vma metadata.
- * @size: The vma size in bytes.
- * @obj_size: The size of the underlying object in bytes.
- * @gtt_offset: The gtt offset the vma is bound to.
- * @gtt_size: The size in bytes allocated for the vma in the GTT.
- * @pages: The struct sg_table pointing to the pages bound.
- * @pages_rsgt: The refcounted sg_table holding the reference for @pages if any.
- * @mr: The memory region pointed for the pages bound.
- * @kref: Reference for this structure.
- * @vma_resource: FIXME: A means to keep the unbind fence from signaling.
- * Temporarily while we have only sync unbinds, and still use the vma
- * active, we use that. With async unbinding we need a signaling refcount
- * for the unbind fence.
- * @page_sizes: The vma GTT page sizes information.
- * @onstack: Whether the structure shouldn't be freed on final put.
- * @present: Whether the structure is present and initialized.
- */
-struct i915_vma_snapshot {
-	const char *name;
-	size_t size;
-	size_t obj_size;
-	size_t gtt_offset;
-	size_t gtt_size;
-	struct sg_table *pages;
-	struct i915_refct_sgt *pages_rsgt;
-	struct intel_memory_region *mr;
-	struct kref kref;
-	struct i915_active *vma_resource;
-	u32 page_sizes;
-	bool onstack:1;
-	bool present:1;
-};
-
-void i915_vma_snapshot_init(struct i915_vma_snapshot *vsnap,
-			    struct i915_vma *vma,
-			    const char *name);
-
-void i915_vma_snapshot_init_onstack(struct i915_vma_snapshot *vsnap,
-				    struct i915_vma *vma,
-				    const char *name);
-
-void i915_vma_snapshot_put(struct i915_vma_snapshot *vsnap);
-
-void i915_vma_snapshot_put_onstack(struct i915_vma_snapshot *vsnap);
-
-bool i915_vma_snapshot_resource_pin(struct i915_vma_snapshot *vsnap,
-				    bool *lockdep_cookie);
-
-void i915_vma_snapshot_resource_unpin(struct i915_vma_snapshot *vsnap,
-				      bool lockdep_cookie);
-
-/**
- * i915_vma_snapshot_alloc - Allocate a struct i915_vma_snapshot
- * @gfp: Allocation mode.
- *
- * Return: A pointer to a struct i915_vma_snapshot if successful.
- * NULL otherwise.
- */
-static inline struct i915_vma_snapshot *i915_vma_snapshot_alloc(gfp_t gfp)
-{
-	return kmalloc(sizeof(struct i915_vma_snapshot), gfp);
-}
-
-/**
- * i915_vma_snapshot_get - Take a reference on a struct i915_vma_snapshot
- *
- * Return: A pointer to a struct i915_vma_snapshot.
- */
-static inline struct i915_vma_snapshot *
-i915_vma_snapshot_get(struct i915_vma_snapshot *vsnap)
-{
-	kref_get(&vsnap->kref);
-	return vsnap;
-}
-
-/**
- * i915_vma_snapshot_present - Whether a struct i915_vma_snapshot is
- * present and initialized.
- *
- * Return: true if present and initialized; false otherwise.
- */
-static inline bool
-i915_vma_snapshot_present(const struct i915_vma_snapshot *vsnap)
-{
-	return vsnap && vsnap->present;
-}
-
-#endif
diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
index ca575e129ced..88370dadca82 100644
--- a/drivers/gpu/drm/i915/i915_vma_types.h
+++ b/drivers/gpu/drm/i915/i915_vma_types.h
@@ -95,6 +95,8 @@ enum i915_cache_level;
  *
  */
 
+struct i915_vma_resource;
+
 struct intel_remapped_plane_info {
 	/* in gtt pages */
 	u32 offset:31;
@@ -247,22 +249,20 @@ struct i915_vma {
 
 #define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)
 
-#define I915_VMA_ALLOC_BIT	12
-
-#define I915_VMA_ERROR_BIT	13
+#define I915_VMA_ERROR_BIT	12
 #define I915_VMA_ERROR		((int)BIT(I915_VMA_ERROR_BIT))
 
-#define I915_VMA_GGTT_BIT	14
-#define I915_VMA_CAN_FENCE_BIT	15
-#define I915_VMA_USERFAULT_BIT	16
-#define I915_VMA_GGTT_WRITE_BIT	17
+#define I915_VMA_GGTT_BIT	13
+#define I915_VMA_CAN_FENCE_BIT	14
+#define I915_VMA_USERFAULT_BIT	15
+#define I915_VMA_GGTT_WRITE_BIT	16
 
 #define I915_VMA_GGTT		((int)BIT(I915_VMA_GGTT_BIT))
 #define I915_VMA_CAN_FENCE	((int)BIT(I915_VMA_CAN_FENCE_BIT))
 #define I915_VMA_USERFAULT	((int)BIT(I915_VMA_USERFAULT_BIT))
 #define I915_VMA_GGTT_WRITE	((int)BIT(I915_VMA_GGTT_WRITE_BIT))
 
-#define I915_VMA_SCANOUT_BIT	18
+#define I915_VMA_SCANOUT_BIT	17
 #define I915_VMA_SCANOUT	((int)BIT(I915_VMA_SCANOUT_BIT))
 
 	struct i915_active active;
@@ -291,6 +291,9 @@ struct i915_vma {
 	struct list_head evict_link;
 
 	struct list_head closed_link;
+
+	/** The async vma resource. Protected by the vm_mutex */
+	struct i915_vma_resource *resource;
 };
 
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index b5576888cd78..1628b81d0a35 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -41,7 +41,7 @@ static int switch_to_context(struct i915_gem_context *ctx)
 
 static void trash_stolen(struct drm_i915_private *i915)
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	const u64 slot = ggtt->error_capture.start;
 	const resource_size_t size = resource_size(&i915->dsm);
 	unsigned long page;
@@ -99,7 +99,7 @@ static void igt_pm_suspend(struct drm_i915_private *i915)
 	intel_wakeref_t wakeref;
 
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-		i915_ggtt_suspend(&i915->ggtt);
+		i915_ggtt_suspend(to_gt(i915)->ggtt);
 		i915_gem_suspend_late(i915);
 	}
 }
@@ -109,7 +109,7 @@ static void igt_pm_hibernate(struct drm_i915_private *i915)
 	intel_wakeref_t wakeref;
 
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-		i915_ggtt_suspend(&i915->ggtt);
+		i915_ggtt_suspend(to_gt(i915)->ggtt);
 
 		i915_gem_freeze(i915);
 		i915_gem_freeze_late(i915);
@@ -125,7 +125,7 @@ static void igt_pm_resume(struct drm_i915_private *i915)
 	 * that runtime-pm just works.
 	 */
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-		i915_ggtt_resume(&i915->ggtt);
+		i915_ggtt_resume(to_gt(i915)->ggtt);
 		i915_gem_resume(i915);
 	}
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 575705c3bce9..076d860ce01a 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -32,6 +32,7 @@
 
 #include "i915_random.h"
 #include "i915_selftest.h"
+#include "i915_vma_resource.h"
 
 #include "mock_drm.h"
 #include "mock_gem_device.h"
@@ -238,11 +239,11 @@ static int lowlevel_hole(struct i915_address_space *vm,
 			 unsigned long end_time)
 {
 	I915_RND_STATE(seed_prng);
-	struct i915_vma *mock_vma;
+	struct i915_vma_resource *mock_vma_res;
 	unsigned int size;
 
-	mock_vma = kzalloc(sizeof(*mock_vma), GFP_KERNEL);
-	if (!mock_vma)
+	mock_vma_res = kzalloc(sizeof(*mock_vma_res), GFP_KERNEL);
+	if (!mock_vma_res)
 		return -ENOMEM;
 
 	/* Keep creating larger objects until one cannot fit into the hole */
@@ -268,7 +269,7 @@ static int lowlevel_hole(struct i915_address_space *vm,
 				break;
 		} while (count >>= 1);
 		if (!count) {
-			kfree(mock_vma);
+			kfree(mock_vma_res);
 			return -ENOMEM;
 		}
 		GEM_BUG_ON(!order);
@@ -342,12 +343,12 @@ alloc_vm_end:
 					break;
 			}
 
-			mock_vma->pages = obj->mm.pages;
-			mock_vma->node.size = BIT_ULL(size);
-			mock_vma->node.start = addr;
+			mock_vma_res->bi.pages = obj->mm.pages;
+			mock_vma_res->node_size = BIT_ULL(size);
+			mock_vma_res->start = addr;
 
 			with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref)
-				vm->insert_entries(vm, mock_vma,
+			  vm->insert_entries(vm, mock_vma_res,
 						   I915_CACHE_NONE, 0);
 		}
 		count = n;
@@ -370,7 +371,7 @@ alloc_vm_end:
 		cleanup_freed_objects(vm->i915);
 	}
 
-	kfree(mock_vma);
+	kfree(mock_vma_res);
 	return 0;
 }
 
@@ -1122,7 +1123,7 @@ static int exercise_ggtt(struct drm_i915_private *i915,
 				     u64 hole_start, u64 hole_end,
 				     unsigned long end_time))
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	u64 hole_start, hole_end, last = 0;
 	struct drm_mm_node *node;
 	IGT_TIMEOUT(end_time);
@@ -1182,7 +1183,7 @@ static int igt_ggtt_page(void *arg)
 	const unsigned int count = PAGE_SIZE/sizeof(u32);
 	I915_RND_STATE(prng);
 	struct drm_i915_private *i915 = arg;
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	struct drm_i915_gem_object *obj;
 	intel_wakeref_t wakeref;
 	struct drm_mm_node tmp;
@@ -1279,6 +1280,7 @@ static void track_vma_bind(struct i915_vma *vma)
 	atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE);
 	__i915_gem_object_pin_pages(obj);
 	vma->pages = obj->mm.pages;
+	vma->resource->bi.pages = vma->pages;
 
 	mutex_lock(&vma->vm->mutex);
 	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
@@ -1336,6 +1338,33 @@ static int igt_mock_drunk(void *arg)
 	return exercise_mock(ggtt->vm.i915, drunk_hole);
 }
 
+static int reserve_gtt_with_resource(struct i915_vma *vma, u64 offset)
+{
+	struct i915_address_space *vm = vma->vm;
+	struct i915_vma_resource *vma_res;
+	struct drm_i915_gem_object *obj = vma->obj;
+	int err;
+
+	vma_res = i915_vma_resource_alloc();
+	if (IS_ERR(vma_res))
+		return PTR_ERR(vma_res);
+
+	mutex_lock(&vm->mutex);
+	err = i915_gem_gtt_reserve(vm, &vma->node, obj->base.size,
+				   offset,
+				   obj->cache_level,
+				   0);
+	if (!err) {
+		i915_vma_resource_init_from_vma(vma_res, vma);
+		vma->resource = vma_res;
+	} else {
+		kfree(vma_res);
+	}
+	mutex_unlock(&vm->mutex);
+
+	return err;
+}
+
 static int igt_gtt_reserve(void *arg)
 {
 	struct i915_ggtt *ggtt = arg;
@@ -1370,20 +1399,13 @@ static int igt_gtt_reserve(void *arg)
 		}
 
 		list_add(&obj->st_link, &objects);
-
 		vma = i915_vma_instance(obj, &ggtt->vm, NULL);
 		if (IS_ERR(vma)) {
 			err = PTR_ERR(vma);
 			goto out;
 		}
 
-		mutex_lock(&ggtt->vm.mutex);
-		err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
-					   obj->base.size,
-					   total,
-					   obj->cache_level,
-					   0);
-		mutex_unlock(&ggtt->vm.mutex);
+		err = reserve_gtt_with_resource(vma, total);
 		if (err) {
 			pr_err("i915_gem_gtt_reserve (pass 1) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1429,13 +1451,7 @@ static int igt_gtt_reserve(void *arg)
 			goto out;
 		}
 
-		mutex_lock(&ggtt->vm.mutex);
-		err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
-					   obj->base.size,
-					   total,
-					   obj->cache_level,
-					   0);
-		mutex_unlock(&ggtt->vm.mutex);
+		err = reserve_gtt_with_resource(vma, total);
 		if (err) {
 			pr_err("i915_gem_gtt_reserve (pass 2) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1476,13 +1492,7 @@ static int igt_gtt_reserve(void *arg)
 					   2 * I915_GTT_PAGE_SIZE,
 					   I915_GTT_MIN_ALIGNMENT);
 
-		mutex_lock(&ggtt->vm.mutex);
-		err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
-					   obj->base.size,
-					   offset,
-					   obj->cache_level,
-					   0);
-		mutex_unlock(&ggtt->vm.mutex);
+		err = reserve_gtt_with_resource(vma, offset);
 		if (err) {
 			pr_err("i915_gem_gtt_reserve (pass 3) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1509,6 +1519,31 @@ out:
 	return err;
 }
 
+static int insert_gtt_with_resource(struct i915_vma *vma)
+{
+	struct i915_address_space *vm = vma->vm;
+	struct i915_vma_resource *vma_res;
+	struct drm_i915_gem_object *obj = vma->obj;
+	int err;
+
+	vma_res = i915_vma_resource_alloc();
+	if (IS_ERR(vma_res))
+		return PTR_ERR(vma_res);
+
+	mutex_lock(&vm->mutex);
+	err = i915_gem_gtt_insert(vm, &vma->node, obj->base.size, 0,
+				  obj->cache_level, 0, vm->total, 0);
+	if (!err) {
+		i915_vma_resource_init_from_vma(vma_res, vma);
+		vma->resource = vma_res;
+	} else {
+		kfree(vma_res);
+	}
+	mutex_unlock(&vm->mutex);
+
+	return err;
+}
+
 static int igt_gtt_insert(void *arg)
 {
 	struct i915_ggtt *ggtt = arg;
@@ -1593,12 +1628,7 @@ static int igt_gtt_insert(void *arg)
 			goto out;
 		}
 
-		mutex_lock(&ggtt->vm.mutex);
-		err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
-					  obj->base.size, 0, obj->cache_level,
-					  0, ggtt->vm.total,
-					  0);
-		mutex_unlock(&ggtt->vm.mutex);
+		err = insert_gtt_with_resource(vma);
 		if (err == -ENOSPC) {
 			/* maxed out the GGTT space */
 			i915_gem_object_put(obj);
@@ -1653,12 +1683,7 @@ static int igt_gtt_insert(void *arg)
 			goto out;
 		}
 
-		mutex_lock(&ggtt->vm.mutex);
-		err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
-					  obj->base.size, 0, obj->cache_level,
-					  0, ggtt->vm.total,
-					  0);
-		mutex_unlock(&ggtt->vm.mutex);
+		err = insert_gtt_with_resource(vma);
 		if (err) {
 			pr_err("i915_gem_gtt_insert (pass 2) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1702,12 +1727,7 @@ static int igt_gtt_insert(void *arg)
 			goto out;
 		}
 
-		mutex_lock(&ggtt->vm.mutex);
-		err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
-					  obj->base.size, 0, obj->cache_level,
-					  0, ggtt->vm.total,
-					  0);
-		mutex_unlock(&ggtt->vm.mutex);
+		err = insert_gtt_with_resource(vma);
 		if (err) {
 			pr_err("i915_gem_gtt_insert (pass 3) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1737,26 +1757,28 @@ int i915_gem_gtt_mock_selftests(void)
 		SUBTEST(igt_gtt_insert),
 	};
 	struct drm_i915_private *i915;
-	struct i915_ggtt *ggtt;
+	struct intel_gt *gt;
 	int err;
 
 	i915 = mock_gem_device();
 	if (!i915)
 		return -ENOMEM;
 
-	ggtt = kmalloc(sizeof(*ggtt), GFP_KERNEL);
-	if (!ggtt) {
-		err = -ENOMEM;
+	/* allocate the ggtt */
+	err = intel_gt_assign_ggtt(to_gt(i915));
+	if (err)
 		goto out_put;
-	}
-	mock_init_ggtt(i915, ggtt);
 
-	err = i915_subtests(tests, ggtt);
+	gt = to_gt(i915);
+
+	mock_init_ggtt(gt);
+
+	err = i915_subtests(tests, gt->ggtt);
 
 	mock_device_flush(i915);
 	i915_gem_drain_freed_objects(i915);
-	mock_fini_ggtt(ggtt);
-	kfree(ggtt);
+	mock_fini_ggtt(gt->ggtt);
+
 out_put:
 	mock_destroy_device(i915);
 	return err;
@@ -1939,6 +1961,7 @@ static int igt_cs_tlb(void *arg)
 			struct i915_vm_pt_stash stash = {};
 			struct i915_request *rq;
 			struct i915_gem_ww_ctx ww;
+			struct i915_vma_resource *vma_res;
 			u64 offset;
 
 			offset = igt_random_offset(&prng,
@@ -1959,6 +1982,13 @@ static int igt_cs_tlb(void *arg)
 			if (err)
 				goto end;
 
+			vma_res = i915_vma_resource_alloc();
+			if (IS_ERR(vma_res)) {
+				i915_vma_put_pages(vma);
+				err = PTR_ERR(vma_res);
+				goto end;
+			}
+
 			i915_gem_ww_ctx_init(&ww, false);
 retry:
 			err = i915_vm_lock_objects(vm, &ww);
@@ -1980,33 +2010,41 @@ end_ww:
 					goto retry;
 			}
 			i915_gem_ww_ctx_fini(&ww);
-			if (err)
+			if (err) {
+				kfree(vma_res);
 				goto end;
+			}
 
+			i915_vma_resource_init_from_vma(vma_res, vma);
 			/* Prime the TLB with the dummy pages */
 			for (i = 0; i < count; i++) {
-				vma->node.start = offset + i * PAGE_SIZE;
-				vm->insert_entries(vm, vma, I915_CACHE_NONE, 0);
+				vma_res->start = offset + i * PAGE_SIZE;
+				vm->insert_entries(vm, vma_res, I915_CACHE_NONE,
+						   0);
 
-				rq = submit_batch(ce, vma->node.start);
+				rq = submit_batch(ce, vma_res->start);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
+					i915_vma_resource_fini(vma_res);
+					kfree(vma_res);
 					goto end;
 				}
 				i915_request_put(rq);
 			}
-
+			i915_vma_resource_fini(vma_res);
 			i915_vma_put_pages(vma);
 
 			err = context_sync(ce);
 			if (err) {
 				pr_err("%s: dummy setup timed out\n",
 				       ce->engine->name);
+				kfree(vma_res);
 				goto end;
 			}
 
 			vma = i915_vma_instance(act, vm, NULL);
 			if (IS_ERR(vma)) {
+				kfree(vma_res);
 				err = PTR_ERR(vma);
 				goto end;
 			}
@@ -2014,19 +2052,22 @@ end_ww:
 			i915_gem_object_lock(act, NULL);
 			err = i915_vma_get_pages(vma);
 			i915_gem_object_unlock(act);
-			if (err)
+			if (err) {
+				kfree(vma_res);
 				goto end;
+			}
 
+			i915_vma_resource_init_from_vma(vma_res, vma);
 			/* Replace the TLB with target batches */
 			for (i = 0; i < count; i++) {
 				struct i915_request *rq;
 				u32 *cs = batch + i * 64 / sizeof(*cs);
 				u64 addr;
 
-				vma->node.start = offset + i * PAGE_SIZE;
-				vm->insert_entries(vm, vma, I915_CACHE_NONE, 0);
+				vma_res->start = offset + i * PAGE_SIZE;
+				vm->insert_entries(vm, vma_res, I915_CACHE_NONE, 0);
 
-				addr = vma->node.start + i * 64;
+				addr = vma_res->start + i * 64;
 				cs[4] = MI_NOOP;
 				cs[6] = lower_32_bits(addr);
 				cs[7] = upper_32_bits(addr);
@@ -2035,6 +2076,8 @@ end_ww:
 				rq = submit_batch(ce, addr);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
+					i915_vma_resource_fini(vma_res);
+					kfree(vma_res);
 					goto end;
 				}
 
@@ -2051,6 +2094,8 @@ end_ww:
 			}
 			end_spin(batch, count - 1);
 
+			i915_vma_resource_fini(vma_res);
+			kfree(vma_res);
 			i915_vma_put_pages(vma);
 
 			err = context_sync(ce);
@@ -2114,7 +2159,7 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(igt_cs_tlb),
 	};
 
-	GEM_BUG_ON(offset_in_page(i915->ggtt.vm.total));
+	GEM_BUG_ON(offset_in_page(to_gt(i915)->ggtt->vm.total));
 
 	return i915_subtests(tests, i915);
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 92a859b34190..7f66f6d299b2 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -843,7 +843,7 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915)
 
 	intel_gt_chipset_flush(to_gt(i915));
 
-	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+	vma = i915_vma_instance(obj, &to_gt(i915)->ggtt->vm, NULL);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
 		goto err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index 5c5809dfe9b2..de37cfa4c65f 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -922,26 +922,28 @@ int i915_vma_mock_selftests(void)
 		SUBTEST(igt_vma_partial),
 	};
 	struct drm_i915_private *i915;
-	struct i915_ggtt *ggtt;
+	struct intel_gt *gt;
 	int err;
 
 	i915 = mock_gem_device();
 	if (!i915)
 		return -ENOMEM;
 
-	ggtt = kmalloc(sizeof(*ggtt), GFP_KERNEL);
-	if (!ggtt) {
-		err = -ENOMEM;
+	/* allocate the ggtt */
+	err = intel_gt_assign_ggtt(to_gt(i915));
+	if (err)
 		goto out_put;
-	}
-	mock_init_ggtt(i915, ggtt);
 
-	err = i915_subtests(tests, ggtt);
+	gt = to_gt(i915);
+
+	mock_init_ggtt(gt);
+
+	err = i915_subtests(tests, gt->ggtt);
 
 	mock_device_flush(i915);
 	i915_gem_drain_freed_objects(i915);
-	mock_fini_ggtt(ggtt);
-	kfree(ggtt);
+	mock_fini_ggtt(gt->ggtt);
+
 out_put:
 	mock_destroy_device(i915);
 	return err;
@@ -982,7 +984,7 @@ static int igt_vma_remapped_gtt(void *arg)
 	intel_wakeref_t wakeref;
 	int err = 0;
 
-	if (!i915_ggtt_has_aperture(&i915->ggtt))
+	if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
 		return 0;
 
 	obj = i915_gem_object_create_internal(i915, 10 * 10 * PAGE_SIZE);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 8aa7b1d33865..28a0f054009a 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -69,7 +69,7 @@ static void mock_device_release(struct drm_device *dev)
 	i915_gem_drain_workqueue(i915);
 	i915_gem_drain_freed_objects(i915);
 
-	mock_fini_ggtt(&i915->ggtt);
+	mock_fini_ggtt(to_gt(i915)->ggtt);
 	destroy_workqueue(i915->wq);
 
 	intel_region_ttm_device_fini(i915);
@@ -194,8 +194,13 @@ struct drm_i915_private *mock_gem_device(void)
 
 	mock_init_contexts(i915);
 
-	mock_init_ggtt(i915, &i915->ggtt);
-	to_gt(i915)->vm = i915_vm_get(&i915->ggtt.vm);
+	/* allocate the ggtt */
+	ret = intel_gt_assign_ggtt(to_gt(i915));
+	if (ret)
+		goto err_unlock;
+
+	mock_init_ggtt(to_gt(i915));
+	to_gt(i915)->vm = i915_vm_get(&to_gt(i915)->ggtt->vm);
 
 	mkwrite_device_info(i915)->platform_engine_mask = BIT(0);
 	to_gt(i915)->info.engine_mask = BIT(0);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
index 1802baf80a17..568840e7ca66 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
@@ -33,23 +33,23 @@ static void mock_insert_page(struct i915_address_space *vm,
 }
 
 static void mock_insert_entries(struct i915_address_space *vm,
-				struct i915_vma *vma,
+				struct i915_vma_resource *vma_res,
 				enum i915_cache_level level, u32 flags)
 {
 }
 
 static void mock_bind_ppgtt(struct i915_address_space *vm,
 			    struct i915_vm_pt_stash *stash,
-			    struct i915_vma *vma,
+			    struct i915_vma_resource *vma_res,
 			    enum i915_cache_level cache_level,
 			    u32 flags)
 {
 	GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND);
-	set_bit(I915_VMA_LOCAL_BIND_BIT, __i915_vma_flags(vma));
+	vma_res->bound_flags |= flags;
 }
 
 static void mock_unbind_ppgtt(struct i915_address_space *vm,
-			      struct i915_vma *vma)
+			      struct i915_vma_resource *vma_res)
 {
 }
 
@@ -93,23 +93,23 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name)
 
 static void mock_bind_ggtt(struct i915_address_space *vm,
 			   struct i915_vm_pt_stash *stash,
-			   struct i915_vma *vma,
+			   struct i915_vma_resource *vma_res,
 			   enum i915_cache_level cache_level,
 			   u32 flags)
 {
 }
 
 static void mock_unbind_ggtt(struct i915_address_space *vm,
-			     struct i915_vma *vma)
+			     struct i915_vma_resource *vma_res)
 {
 }
 
-void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt)
+void mock_init_ggtt(struct intel_gt *gt)
 {
-	memset(ggtt, 0, sizeof(*ggtt));
+	struct i915_ggtt *ggtt = gt->ggtt;
 
-	ggtt->vm.gt = to_gt(i915);
-	ggtt->vm.i915 = i915;
+	ggtt->vm.gt = gt;
+	ggtt->vm.i915 = gt->i915;
 	ggtt->vm.is_ggtt = true;
 
 	ggtt->gmadr = (struct resource) DEFINE_RES_MEM(0, 2048 * PAGE_SIZE);
@@ -128,7 +128,6 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt)
 	ggtt->vm.vma_ops.unbind_vma  = mock_unbind_ggtt;
 
 	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
-	to_gt(i915)->ggtt = ggtt;
 }
 
 void mock_fini_ggtt(struct i915_ggtt *ggtt)
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.h b/drivers/gpu/drm/i915/selftests/mock_gtt.h
index e3f224f43beb..d6eb90bd7f3f 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.h
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.h
@@ -27,8 +27,9 @@
 
 struct drm_i915_private;
 struct i915_ggtt;
+struct intel_gt;
 
-void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt);
+void mock_init_ggtt(struct intel_gt *gt);
 void mock_fini_ggtt(struct i915_ggtt *ggtt);
 
 struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name);