diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_migrate.c')
| -rw-r--r-- | drivers/gpu/drm/i915/gt/intel_migrate.c | 688 | 
1 files changed, 688 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c new file mode 100644 index 000000000000..1dac21aa7e5c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -0,0 +1,688 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_context.h" +#include "intel_gpu_commands.h" +#include "intel_gt.h" +#include "intel_gtt.h" +#include "intel_migrate.h" +#include "intel_ring.h" + +struct insert_pte_data { +	u64 offset; +	bool is_lmem; +}; + +#define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */ + +static bool engine_supports_migration(struct intel_engine_cs *engine) +{ +	if (!engine) +		return false; + +	/* +	 * We need the ability to prevent aribtration (MI_ARB_ON_OFF), +	 * the ability to write PTE using inline data (MI_STORE_DATA) +	 * and of course the ability to do the block transfer (blits). +	 */ +	GEM_BUG_ON(engine->class != COPY_ENGINE_CLASS); + +	return true; +} + +static void insert_pte(struct i915_address_space *vm, +		       struct i915_page_table *pt, +		       void *data) +{ +	struct insert_pte_data *d = data; + +	vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE, +			d->is_lmem ? PTE_LM : 0); +	d->offset += PAGE_SIZE; +} + +static struct i915_address_space *migrate_vm(struct intel_gt *gt) +{ +	struct i915_vm_pt_stash stash = {}; +	struct i915_ppgtt *vm; +	int err; +	int i; + +	/* +	 * We construct a very special VM for use by all migration contexts, +	 * it is kept pinned so that it can be used at any time. As we need +	 * to pre-allocate the page directories for the migration VM, this +	 * limits us to only using a small number of prepared vma. +	 * +	 * To be able to pipeline and reschedule migration operations while +	 * avoiding unnecessary contention on the vm itself, the PTE updates +	 * are inline with the blits. All the blits use the same fixed +	 * addresses, with the backing store redirection being updated on the +	 * fly. Only 2 implicit vma are used for all migration operations. +	 * +	 * We lay the ppGTT out as: +	 * +	 *	[0, CHUNK_SZ) -> first object +	 *	[CHUNK_SZ, 2 * CHUNK_SZ) -> second object +	 *	[2 * CHUNK_SZ, 2 * CHUNK_SZ + 2 * CHUNK_SZ >> 9] -> PTE +	 * +	 * By exposing the dma addresses of the page directories themselves +	 * within the ppGTT, we are then able to rewrite the PTE prior to use. +	 * But the PTE update and subsequent migration operation must be atomic, +	 * i.e. within the same non-preemptible window so that we do not switch +	 * to another migration context that overwrites the PTE. +	 * +	 * TODO: Add support for huge LMEM PTEs +	 */ + +	vm = i915_ppgtt_create(gt); +	if (IS_ERR(vm)) +		return ERR_CAST(vm); + +	if (!vm->vm.allocate_va_range || !vm->vm.foreach) { +		err = -ENODEV; +		goto err_vm; +	} + +	/* +	 * Each engine instance is assigned its own chunk in the VM, so +	 * that we can run multiple instances concurrently +	 */ +	for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) { +		struct intel_engine_cs *engine; +		u64 base = (u64)i << 32; +		struct insert_pte_data d = {}; +		struct i915_gem_ww_ctx ww; +		u64 sz; + +		engine = gt->engine_class[COPY_ENGINE_CLASS][i]; +		if (!engine_supports_migration(engine)) +			continue; + +		/* +		 * We copy in 8MiB chunks. Each PDE covers 2MiB, so we need +		 * 4x2 page directories for source/destination. +		 */ +		sz = 2 * CHUNK_SZ; +		d.offset = base + sz; + +		/* +		 * We need another page directory setup so that we can write +		 * the 8x512 PTE in each chunk. +		 */ +		sz += (sz >> 12) * sizeof(u64); + +		err = i915_vm_alloc_pt_stash(&vm->vm, &stash, sz); +		if (err) +			goto err_vm; + +		for_i915_gem_ww(&ww, err, true) { +			err = i915_vm_lock_objects(&vm->vm, &ww); +			if (err) +				continue; +			err = i915_vm_map_pt_stash(&vm->vm, &stash); +			if (err) +				continue; + +			vm->vm.allocate_va_range(&vm->vm, &stash, base, sz); +		} +		i915_vm_free_pt_stash(&vm->vm, &stash); +		if (err) +			goto err_vm; + +		/* Now allow the GPU to rewrite the PTE via its own ppGTT */ +		d.is_lmem = i915_gem_object_is_lmem(vm->vm.scratch[0]); +		vm->vm.foreach(&vm->vm, base, base + sz, insert_pte, &d); +	} + +	return &vm->vm; + +err_vm: +	i915_vm_put(&vm->vm); +	return ERR_PTR(err); +} + +static struct intel_engine_cs *first_copy_engine(struct intel_gt *gt) +{ +	struct intel_engine_cs *engine; +	int i; + +	for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) { +		engine = gt->engine_class[COPY_ENGINE_CLASS][i]; +		if (engine_supports_migration(engine)) +			return engine; +	} + +	return NULL; +} + +static struct intel_context *pinned_context(struct intel_gt *gt) +{ +	static struct lock_class_key key; +	struct intel_engine_cs *engine; +	struct i915_address_space *vm; +	struct intel_context *ce; + +	engine = first_copy_engine(gt); +	if (!engine) +		return ERR_PTR(-ENODEV); + +	vm = migrate_vm(gt); +	if (IS_ERR(vm)) +		return ERR_CAST(vm); + +	ce = intel_engine_create_pinned_context(engine, vm, SZ_512K, +						I915_GEM_HWS_MIGRATE, +						&key, "migrate"); +	i915_vm_put(vm); +	return ce; +} + +int intel_migrate_init(struct intel_migrate *m, struct intel_gt *gt) +{ +	struct intel_context *ce; + +	memset(m, 0, sizeof(*m)); + +	ce = pinned_context(gt); +	if (IS_ERR(ce)) +		return PTR_ERR(ce); + +	m->context = ce; +	return 0; +} + +static int random_index(unsigned int max) +{ +	return upper_32_bits(mul_u32_u32(get_random_u32(), max)); +} + +static struct intel_context *__migrate_engines(struct intel_gt *gt) +{ +	struct intel_engine_cs *engines[MAX_ENGINE_INSTANCE]; +	struct intel_engine_cs *engine; +	unsigned int count, i; + +	count = 0; +	for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) { +		engine = gt->engine_class[COPY_ENGINE_CLASS][i]; +		if (engine_supports_migration(engine)) +			engines[count++] = engine; +	} + +	return intel_context_create(engines[random_index(count)]); +} + +struct intel_context *intel_migrate_create_context(struct intel_migrate *m) +{ +	struct intel_context *ce; + +	/* +	 * We randomly distribute contexts across the engines upon constrction, +	 * as they all share the same pinned vm, and so in order to allow +	 * multiple blits to run in parallel, we must construct each blit +	 * to use a different range of the vm for its GTT. This has to be +	 * known at construction, so we can not use the late greedy load +	 * balancing of the virtual-engine. +	 */ +	ce = __migrate_engines(m->context->engine->gt); +	if (IS_ERR(ce)) +		return ce; + +	ce->ring = NULL; +	ce->ring_size = SZ_256K; + +	i915_vm_put(ce->vm); +	ce->vm = i915_vm_get(m->context->vm); + +	return ce; +} + +static inline struct sgt_dma sg_sgt(struct scatterlist *sg) +{ +	dma_addr_t addr = sg_dma_address(sg); + +	return (struct sgt_dma){ sg, addr, addr + sg_dma_len(sg) }; +} + +static int emit_no_arbitration(struct i915_request *rq) +{ +	u32 *cs; + +	cs = intel_ring_begin(rq, 2); +	if (IS_ERR(cs)) +		return PTR_ERR(cs); + +	/* Explicitly disable preemption for this request. */ +	*cs++ = MI_ARB_ON_OFF; +	*cs++ = MI_NOOP; +	intel_ring_advance(rq, cs); + +	return 0; +} + +static int emit_pte(struct i915_request *rq, +		    struct sgt_dma *it, +		    enum i915_cache_level cache_level, +		    bool is_lmem, +		    u64 offset, +		    int length) +{ +	const u64 encode = rq->context->vm->pte_encode(0, cache_level, +						       is_lmem ? PTE_LM : 0); +	struct intel_ring *ring = rq->ring; +	int total = 0; +	u32 *hdr, *cs; +	int pkt; + +	GEM_BUG_ON(GRAPHICS_VER(rq->engine->i915) < 8); + +	/* Compute the page directory offset for the target address range */ +	offset += (u64)rq->engine->instance << 32; +	offset >>= 12; +	offset *= sizeof(u64); +	offset += 2 * CHUNK_SZ; + +	cs = intel_ring_begin(rq, 6); +	if (IS_ERR(cs)) +		return PTR_ERR(cs); + +	/* Pack as many PTE updates as possible into a single MI command */ +	pkt = min_t(int, 0x400, ring->space / sizeof(u32) + 5); +	pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); + +	hdr = cs; +	*cs++ = MI_STORE_DATA_IMM | REG_BIT(21); /* as qword elements */ +	*cs++ = lower_32_bits(offset); +	*cs++ = upper_32_bits(offset); + +	do { +		if (cs - hdr >= pkt) { +			*hdr += cs - hdr - 2; +			*cs++ = MI_NOOP; + +			ring->emit = (void *)cs - ring->vaddr; +			intel_ring_advance(rq, cs); +			intel_ring_update_space(ring); + +			cs = intel_ring_begin(rq, 6); +			if (IS_ERR(cs)) +				return PTR_ERR(cs); + +			pkt = min_t(int, 0x400, ring->space / sizeof(u32) + 5); +			pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); + +			hdr = cs; +			*cs++ = MI_STORE_DATA_IMM | REG_BIT(21); +			*cs++ = lower_32_bits(offset); +			*cs++ = upper_32_bits(offset); +		} + +		*cs++ = lower_32_bits(encode | it->dma); +		*cs++ = upper_32_bits(encode | it->dma); + +		offset += 8; +		total += I915_GTT_PAGE_SIZE; + +		it->dma += I915_GTT_PAGE_SIZE; +		if (it->dma >= it->max) { +			it->sg = __sg_next(it->sg); +			if (!it->sg || sg_dma_len(it->sg) == 0) +				break; + +			it->dma = sg_dma_address(it->sg); +			it->max = it->dma + sg_dma_len(it->sg); +		} +	} while (total < length); + +	*hdr += cs - hdr - 2; +	*cs++ = MI_NOOP; + +	ring->emit = (void *)cs - ring->vaddr; +	intel_ring_advance(rq, cs); +	intel_ring_update_space(ring); + +	return total; +} + +static bool wa_1209644611_applies(int ver, u32 size) +{ +	u32 height = size >> PAGE_SHIFT; + +	if (ver != 11) +		return false; + +	return height % 4 == 3 && height <= 8; +} + +static int emit_copy(struct i915_request *rq, int size) +{ +	const int ver = GRAPHICS_VER(rq->engine->i915); +	u32 instance = rq->engine->instance; +	u32 *cs; + +	cs = intel_ring_begin(rq, ver >= 8 ? 10 : 6); +	if (IS_ERR(cs)) +		return PTR_ERR(cs); + +	if (ver >= 9 && !wa_1209644611_applies(ver, size)) { +		*cs++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); +		*cs++ = BLT_DEPTH_32 | PAGE_SIZE; +		*cs++ = 0; +		*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; +		*cs++ = CHUNK_SZ; /* dst offset */ +		*cs++ = instance; +		*cs++ = 0; +		*cs++ = PAGE_SIZE; +		*cs++ = 0; /* src offset */ +		*cs++ = instance; +	} else if (ver >= 8) { +		*cs++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2); +		*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; +		*cs++ = 0; +		*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; +		*cs++ = CHUNK_SZ; /* dst offset */ +		*cs++ = instance; +		*cs++ = 0; +		*cs++ = PAGE_SIZE; +		*cs++ = 0; /* src offset */ +		*cs++ = instance; +	} else { +		GEM_BUG_ON(instance); +		*cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); +		*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; +		*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE; +		*cs++ = CHUNK_SZ; /* dst offset */ +		*cs++ = PAGE_SIZE; +		*cs++ = 0; /* src offset */ +	} + +	intel_ring_advance(rq, cs); +	return 0; +} + +int +intel_context_migrate_copy(struct intel_context *ce, +			   struct dma_fence *await, +			   struct scatterlist *src, +			   enum i915_cache_level src_cache_level, +			   bool src_is_lmem, +			   struct scatterlist *dst, +			   enum i915_cache_level dst_cache_level, +			   bool dst_is_lmem, +			   struct i915_request **out) +{ +	struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst); +	struct i915_request *rq; +	int err; + +	GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); +	*out = NULL; + +	GEM_BUG_ON(ce->ring->size < SZ_64K); + +	do { +		int len; + +		rq = i915_request_create(ce); +		if (IS_ERR(rq)) { +			err = PTR_ERR(rq); +			goto out_ce; +		} + +		if (await) { +			err = i915_request_await_dma_fence(rq, await); +			if (err) +				goto out_rq; + +			if (rq->engine->emit_init_breadcrumb) { +				err = rq->engine->emit_init_breadcrumb(rq); +				if (err) +					goto out_rq; +			} + +			await = NULL; +		} + +		/* The PTE updates + copy must not be interrupted. */ +		err = emit_no_arbitration(rq); +		if (err) +			goto out_rq; + +		len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, 0, +			       CHUNK_SZ); +		if (len <= 0) { +			err = len; +			goto out_rq; +		} + +		err = emit_pte(rq, &it_dst, dst_cache_level, dst_is_lmem, +			       CHUNK_SZ, len); +		if (err < 0) +			goto out_rq; +		if (err < len) { +			err = -EINVAL; +			goto out_rq; +		} + +		err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); +		if (err) +			goto out_rq; + +		err = emit_copy(rq, len); + +		/* Arbitration is re-enabled between requests. */ +out_rq: +		if (*out) +			i915_request_put(*out); +		*out = i915_request_get(rq); +		i915_request_add(rq); +		if (err || !it_src.sg || !sg_dma_len(it_src.sg)) +			break; + +		cond_resched(); +	} while (1); + +out_ce: +	return err; +} + +static int emit_clear(struct i915_request *rq, int size, u32 value) +{ +	const int ver = GRAPHICS_VER(rq->engine->i915); +	u32 instance = rq->engine->instance; +	u32 *cs; + +	GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); + +	cs = intel_ring_begin(rq, ver >= 8 ? 8 : 6); +	if (IS_ERR(cs)) +		return PTR_ERR(cs); + +	if (ver >= 8) { +		*cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); +		*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; +		*cs++ = 0; +		*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; +		*cs++ = 0; /* offset */ +		*cs++ = instance; +		*cs++ = value; +		*cs++ = MI_NOOP; +	} else { +		GEM_BUG_ON(instance); +		*cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); +		*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; +		*cs++ = 0; +		*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; +		*cs++ = 0; +		*cs++ = value; +	} + +	intel_ring_advance(rq, cs); +	return 0; +} + +int +intel_context_migrate_clear(struct intel_context *ce, +			    struct dma_fence *await, +			    struct scatterlist *sg, +			    enum i915_cache_level cache_level, +			    bool is_lmem, +			    u32 value, +			    struct i915_request **out) +{ +	struct sgt_dma it = sg_sgt(sg); +	struct i915_request *rq; +	int err; + +	GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); +	*out = NULL; + +	GEM_BUG_ON(ce->ring->size < SZ_64K); + +	do { +		int len; + +		rq = i915_request_create(ce); +		if (IS_ERR(rq)) { +			err = PTR_ERR(rq); +			goto out_ce; +		} + +		if (await) { +			err = i915_request_await_dma_fence(rq, await); +			if (err) +				goto out_rq; + +			if (rq->engine->emit_init_breadcrumb) { +				err = rq->engine->emit_init_breadcrumb(rq); +				if (err) +					goto out_rq; +			} + +			await = NULL; +		} + +		/* The PTE updates + clear must not be interrupted. */ +		err = emit_no_arbitration(rq); +		if (err) +			goto out_rq; + +		len = emit_pte(rq, &it, cache_level, is_lmem, 0, CHUNK_SZ); +		if (len <= 0) { +			err = len; +			goto out_rq; +		} + +		err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); +		if (err) +			goto out_rq; + +		err = emit_clear(rq, len, value); + +		/* Arbitration is re-enabled between requests. */ +out_rq: +		if (*out) +			i915_request_put(*out); +		*out = i915_request_get(rq); +		i915_request_add(rq); +		if (err || !it.sg || !sg_dma_len(it.sg)) +			break; + +		cond_resched(); +	} while (1); + +out_ce: +	return err; +} + +int intel_migrate_copy(struct intel_migrate *m, +		       struct i915_gem_ww_ctx *ww, +		       struct dma_fence *await, +		       struct scatterlist *src, +		       enum i915_cache_level src_cache_level, +		       bool src_is_lmem, +		       struct scatterlist *dst, +		       enum i915_cache_level dst_cache_level, +		       bool dst_is_lmem, +		       struct i915_request **out) +{ +	struct intel_context *ce; +	int err; + +	*out = NULL; +	if (!m->context) +		return -ENODEV; + +	ce = intel_migrate_create_context(m); +	if (IS_ERR(ce)) +		ce = intel_context_get(m->context); +	GEM_BUG_ON(IS_ERR(ce)); + +	err = intel_context_pin_ww(ce, ww); +	if (err) +		goto out; + +	err = intel_context_migrate_copy(ce, await, +					 src, src_cache_level, src_is_lmem, +					 dst, dst_cache_level, dst_is_lmem, +					 out); + +	intel_context_unpin(ce); +out: +	intel_context_put(ce); +	return err; +} + +int +intel_migrate_clear(struct intel_migrate *m, +		    struct i915_gem_ww_ctx *ww, +		    struct dma_fence *await, +		    struct scatterlist *sg, +		    enum i915_cache_level cache_level, +		    bool is_lmem, +		    u32 value, +		    struct i915_request **out) +{ +	struct intel_context *ce; +	int err; + +	*out = NULL; +	if (!m->context) +		return -ENODEV; + +	ce = intel_migrate_create_context(m); +	if (IS_ERR(ce)) +		ce = intel_context_get(m->context); +	GEM_BUG_ON(IS_ERR(ce)); + +	err = intel_context_pin_ww(ce, ww); +	if (err) +		goto out; + +	err = intel_context_migrate_clear(ce, await, sg, cache_level, +					  is_lmem, value, out); + +	intel_context_unpin(ce); +out: +	intel_context_put(ce); +	return err; +} + +void intel_migrate_fini(struct intel_migrate *m) +{ +	struct intel_context *ce; + +	ce = fetch_and_zero(&m->context); +	if (!ce) +		return; + +	intel_engine_destroy_pinned_context(ce); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_migrate.c" +#endif  |