From 187685cb909881822f4eeaa5acdf80e25d7d1489 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 1 Dec 2016 14:16:36 +0000 Subject: drm/i915: Make GEM object alloc/free and stolen created take dev_priv Where it is more appropriate and also to be consistent with the direction of the driver. v2: Leave out object alloc/free inlining. (Joonas Lahtinen) Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index aeb637dc1fdf..e193e52f669d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1869,7 +1869,7 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) struct drm_i915_gem_object *obj; struct i915_vma *vma; - obj = i915_gem_object_create_stolen(&dev_priv->drm, size); + obj = i915_gem_object_create_stolen(dev_priv, size); if (!obj) obj = i915_gem_object_create(&dev_priv->drm, size); if (IS_ERR(obj)) -- cgit From 12d79d78287cdc5323b4a589a2ca2ec16c5063fc Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 1 Dec 2016 14:16:37 +0000 Subject: drm/i915: Make GEM object create and create from data take dev_priv Makes all GEM object constructors consistent. v2: Fix compilation in GVT code. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Reviewed-by: Joonas Lahtinen (v1) --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 5 ++--- drivers/gpu/drm/i915/i915_drv.h | 9 +++++---- drivers/gpu/drm/i915/i915_gem.c | 20 ++++++++++---------- drivers/gpu/drm/i915/i915_gem_context.c | 5 +++-- drivers/gpu/drm/i915/i915_guc_submission.c | 2 +- drivers/gpu/drm/i915/i915_perf.c | 2 +- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_fbdev.c | 2 +- drivers/gpu/drm/i915/intel_guc_loader.c | 5 +++-- drivers/gpu/drm/i915/intel_lrc.c | 4 ++-- drivers/gpu/drm/i915/intel_overlay.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++-- 12 files changed, 32 insertions(+), 30 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index d26a092c70e8..9a4b23c3ee97 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1602,7 +1602,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) return -ENOMEM; entry_obj->obj = - i915_gem_object_create(&(s->vgpu->gvt->dev_priv->drm), + i915_gem_object_create(s->vgpu->gvt->dev_priv, roundup(bb_size, PAGE_SIZE)); if (IS_ERR(entry_obj->obj)) { ret = PTR_ERR(entry_obj->obj); @@ -2665,14 +2665,13 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) { - struct drm_device *dev = &wa_ctx->workload->vgpu->gvt->dev_priv->drm; int ctx_size = wa_ctx->indirect_ctx.size; unsigned long guest_gma = wa_ctx->indirect_ctx.guest_gma; struct drm_i915_gem_object *obj; int ret = 0; void *map; - obj = i915_gem_object_create(dev, + obj = i915_gem_object_create(wa_ctx->workload->vgpu->gvt->dev_priv, roundup(ctx_size + CACHELINE_BYTES, PAGE_SIZE)); if (IS_ERR(obj)) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 65d7a7811236..8b725d13d24e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2985,10 +2985,11 @@ void *i915_gem_object_alloc(struct drm_i915_private *dev_priv); void i915_gem_object_free(struct drm_i915_gem_object *obj); void i915_gem_object_init(struct drm_i915_gem_object *obj, const struct drm_i915_gem_object_ops *ops); -struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev, - u64 size); -struct drm_i915_gem_object *i915_gem_object_create_from_data( - struct drm_device *dev, const void *data, size_t size); +struct drm_i915_gem_object * +i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size); +struct drm_i915_gem_object * +i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, + const void *data, size_t size); void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file); void i915_gem_free_object(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ab75d27b74d5..10c3b505f49a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -635,7 +635,7 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj) static int i915_gem_create(struct drm_file *file, - struct drm_device *dev, + struct drm_i915_private *dev_priv, uint64_t size, uint32_t *handle_p) { @@ -648,7 +648,7 @@ i915_gem_create(struct drm_file *file, return -EINVAL; /* Allocate the new object */ - obj = i915_gem_object_create(dev, size); + obj = i915_gem_object_create(dev_priv, size); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -670,7 +670,7 @@ i915_gem_dumb_create(struct drm_file *file, /* have to work out size/pitch and return them */ args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); args->size = args->pitch * args->height; - return i915_gem_create(file, dev, + return i915_gem_create(file, to_i915(dev), args->size, &args->handle); } @@ -684,11 +684,12 @@ int i915_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_create *args = data; - i915_gem_flush_free_objects(to_i915(dev)); + i915_gem_flush_free_objects(dev_priv); - return i915_gem_create(file, dev, + return i915_gem_create(file, dev_priv, args->size, &args->handle); } @@ -3970,9 +3971,8 @@ static const struct drm_i915_gem_object_ops i915_gem_object_ops = { (sizeof(x) > sizeof(T) && (x) >> (sizeof(T) * BITS_PER_BYTE)) struct drm_i915_gem_object * -i915_gem_object_create(struct drm_device *dev, u64 size) +i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; struct address_space *mapping; gfp_t mask; @@ -3993,7 +3993,7 @@ i915_gem_object_create(struct drm_device *dev, u64 size) if (obj == NULL) return ERR_PTR(-ENOMEM); - ret = drm_gem_object_init(dev, &obj->base, size); + ret = drm_gem_object_init(&dev_priv->drm, &obj->base, size); if (ret) goto fail; @@ -4749,7 +4749,7 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, /* Allocate a new GEM object and fill it with the supplied data */ struct drm_i915_gem_object * -i915_gem_object_create_from_data(struct drm_device *dev, +i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, const void *data, size_t size) { struct drm_i915_gem_object *obj; @@ -4757,7 +4757,7 @@ i915_gem_object_create_from_data(struct drm_device *dev, size_t bytes; int ret; - obj = i915_gem_object_create(dev, round_up(size, PAGE_SIZE)); + obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE)); if (IS_ERR(obj)) return obj; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index a6add0c14045..5241b51dd986 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -169,12 +169,13 @@ void i915_gem_context_free(struct kref *ctx_ref) static struct drm_i915_gem_object * alloc_context_obj(struct drm_device *dev, u64 size) { + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; int ret; lockdep_assert_held(&dev->struct_mutex); - obj = i915_gem_object_create(dev, size); + obj = i915_gem_object_create(dev_priv, size); if (IS_ERR(obj)) return obj; @@ -193,7 +194,7 @@ alloc_context_obj(struct drm_device *dev, u64 size) * This is only applicable for Ivy Bridge devices since * later platforms don't have L3 control bits in the PTE. */ - if (IS_IVYBRIDGE(to_i915(dev))) { + if (IS_IVYBRIDGE(dev_priv)) { ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC); /* Failure shouldn't ever happen this early */ if (WARN_ON(ret)) { diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 58413803ba3c..1003b443112c 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -574,7 +574,7 @@ static struct i915_vma *guc_allocate_vma(struct intel_guc *guc, u32 size) struct i915_vma *vma; int ret; - obj = i915_gem_object_create(&dev_priv->drm, size); + obj = i915_gem_object_create(dev_priv, size); if (IS_ERR(obj)) return ERR_CAST(obj); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 14de9a4eee27..5669f0862458 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -773,7 +773,7 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv) BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE); BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M); - bo = i915_gem_object_create(&dev_priv->drm, OA_BUFFER_SIZE); + bo = i915_gem_object_create(dev_priv, OA_BUFFER_SIZE); if (IS_ERR(bo)) { DRM_ERROR("Failed to allocate OA buffer\n"); ret = PTR_ERR(bo); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index aeaf701f0ff5..b91e0abf66ba 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10998,7 +10998,7 @@ intel_framebuffer_create_for_mode(struct drm_device *dev, struct drm_i915_gem_object *obj; struct drm_mode_fb_cmd2 mode_cmd = { 0 }; - obj = i915_gem_object_create(dev, + obj = i915_gem_object_create(to_i915(dev), intel_framebuffer_size_for_mode(mode, bpp)); if (IS_ERR(obj)) return ERR_CAST(obj); diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index d7e50db20bb8..3c445b92e988 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -147,7 +147,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, if (size * 2 < ggtt->stolen_usable_size) obj = i915_gem_object_create_stolen(dev_priv, size); if (obj == NULL) - obj = i915_gem_object_create(dev, size); + obj = i915_gem_object_create(dev_priv, size); if (IS_ERR(obj)) { DRM_ERROR("failed to allocate framebuffer\n"); ret = PTR_ERR(obj); diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index a330fa499384..9926747d160f 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -590,6 +590,7 @@ fail: static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) { + struct drm_i915_private *dev_priv = to_i915(dev); struct pci_dev *pdev = dev->pdev; struct drm_i915_gem_object *obj; const struct firmware *fw = NULL; @@ -648,7 +649,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* Header and uCode will be loaded to WOPCM. Size of the two. */ size = guc_fw->header_size + guc_fw->ucode_size; - if (size > guc_wopcm_size(to_i915(dev))) { + if (size > guc_wopcm_size(dev_priv)) { DRM_NOTE("Firmware is too large to fit in WOPCM\n"); goto fail; } @@ -676,7 +677,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) guc_fw->guc_fw_major_wanted, guc_fw->guc_fw_minor_wanted); mutex_lock(&dev->struct_mutex); - obj = i915_gem_object_create_from_data(dev, fw->data, fw->size); + obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size); mutex_unlock(&dev->struct_mutex); if (IS_ERR_OR_NULL(obj)) { err = obj ? PTR_ERR(obj) : -ENOMEM; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index b2c0d509e191..67aec8f33c1d 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1243,7 +1243,7 @@ static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size) struct i915_vma *vma; int err; - obj = i915_gem_object_create(&engine->i915->drm, PAGE_ALIGN(size)); + obj = i915_gem_object_create(engine->i915, PAGE_ALIGN(size)); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -2242,7 +2242,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, /* One extra page as the sharing data between driver and GuC */ context_size += PAGE_SIZE * LRC_PPHWSP_PN; - ctx_obj = i915_gem_object_create(&ctx->i915->drm, context_size); + ctx_obj = i915_gem_object_create(ctx->i915, context_size); if (IS_ERR(ctx_obj)) { DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n"); return PTR_ERR(ctx_obj); diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 0a7b83aaa2b4..90da6a707de7 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -1393,7 +1393,7 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv) if (!OVERLAY_NEEDS_PHYSICAL(dev_priv)) reg_bo = i915_gem_object_create_stolen(dev_priv, PAGE_SIZE); if (reg_bo == NULL) - reg_bo = i915_gem_object_create(&dev_priv->drm, PAGE_SIZE); + reg_bo = i915_gem_object_create(dev_priv, PAGE_SIZE); if (IS_ERR(reg_bo)) goto out_free; overlay->reg_bo = reg_bo; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e193e52f669d..bc18a4f2643d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1871,7 +1871,7 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) obj = i915_gem_object_create_stolen(dev_priv, size); if (!obj) - obj = i915_gem_object_create(&dev_priv->drm, size); + obj = i915_gem_object_create(dev_priv, size); if (IS_ERR(obj)) return ERR_CAST(obj); @@ -2452,7 +2452,7 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore) { struct i915_vma *vma; - obj = i915_gem_object_create(&dev_priv->drm, 4096); + obj = i915_gem_object_create(dev_priv, 4096); if (IS_ERR(obj)) goto err; -- cgit From 2a307c2e91b9478a2a06c09ffcbfc00851c7be26 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 30 Nov 2016 17:43:04 +0200 Subject: drm/i915: add some more "i" in platform names for consistency Consistency FTW. Reviewed-by: Joonas Lahtinen Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/9ab811dc06570bd3fc05a917ade1bdc9bb805a75.1480520526.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- drivers/gpu/drm/i915/i915_gem_stolen.c | 2 +- drivers/gpu/drm/i915/i915_pci.c | 4 ++-- drivers/gpu/drm/i915/intel_display.c | 14 +++++++------- drivers/gpu/drm/i915/intel_i2c.c | 2 +- drivers/gpu/drm/i915/intel_overlay.c | 4 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++-- 8 files changed, 18 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 00a36bf87993..a746130c8e32 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2955,7 +2955,7 @@ static bool cursor_active(struct drm_i915_private *dev_priv, int pipe) { u32 state; - if (IS_845G(dev_priv) || IS_I865G(dev_priv)) + if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) state = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE; else state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e5465b330886..e2a99649d594 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2515,7 +2515,7 @@ intel_info(const struct drm_i915_private *dev_priv) (INTEL_REVID(p) >= (since) && INTEL_REVID(p) <= (until)) #define IS_I830(dev_priv) (INTEL_DEVID(dev_priv) == 0x3577) -#define IS_845G(dev_priv) (INTEL_DEVID(dev_priv) == 0x2562) +#define IS_I845G(dev_priv) (INTEL_DEVID(dev_priv) == 0x2562) #define IS_I85X(dev_priv) ((dev_priv)->info.platform == INTEL_I85X) #define IS_I865G(dev_priv) (INTEL_DEVID(dev_priv) == 0x2572) #define IS_I915G(dev_priv) ((dev_priv)->info.platform == INTEL_I915G) @@ -2667,7 +2667,7 @@ intel_info(const struct drm_i915_private *dev_priv) ((dev_priv)->info.overlay_needs_physical) /* Early gen2 have a totally busted CS tlb and require pinned batches. */ -#define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_845G(dev_priv)) +#define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv)) /* WaRsDisableCoarsePowerGating:skl,bxt */ #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index b3bac2557665..c81b22f238c3 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -153,7 +153,7 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) tom = tmp * MB(32); base = tom - tseg_size - ggtt->stolen_size; - } else if (IS_845G(dev_priv)) { + } else if (IS_I845G(dev_priv)) { u32 tseg_size = 0; u32 tom; u8 tmp; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 99e8eed0a1fc..2bf9b75ac392 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -71,7 +71,7 @@ static const struct intel_device_info intel_i830_info = { .num_pipes = 2, /* legal, last one wins */ }; -static const struct intel_device_info intel_845g_info = { +static const struct intel_device_info intel_i845g_info = { GEN2_FEATURES, .platform = INTEL_I845G, }; @@ -432,7 +432,7 @@ static const struct intel_device_info intel_kabylake_gt3_info = { */ static const struct pci_device_id pciidlist[] = { INTEL_I830_IDS(&intel_i830_info), - INTEL_I845G_IDS(&intel_845g_info), + INTEL_I845G_IDS(&intel_i845g_info), INTEL_I85X_IDS(&intel_i85x_info), INTEL_I865G_IDS(&intel_i865g_info), INTEL_I915G_IDS(&intel_i915g_info), diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ab5ba7e08424..a41082e2750e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1233,7 +1233,7 @@ static void assert_cursor(struct drm_i915_private *dev_priv, { bool cur_state; - if (IS_845G(dev_priv) || IS_I865G(dev_priv)) + if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) cur_state = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE; else cur_state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE; @@ -10936,7 +10936,7 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc, I915_WRITE(CURPOS(pipe), pos); - if (IS_845G(dev_priv) || IS_I865G(dev_priv)) + if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) i845_update_cursor(crtc, base, plane_state); else i9xx_update_cursor(crtc, base, plane_state); @@ -10954,11 +10954,11 @@ static bool cursor_size_ok(struct drm_i915_private *dev_priv, * the precision of the register. Everything else requires * square cursors, limited to a few power-of-two sizes. */ - if (IS_845G(dev_priv) || IS_I865G(dev_priv)) { + if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) { if ((width & 63) != 0) return false; - if (width > (IS_845G(dev_priv) ? 64 : 512)) + if (width > (IS_I845G(dev_priv) ? 64 : 512)) return false; if (height > 1023) @@ -16127,7 +16127,7 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) else if (IS_I915G(dev_priv)) dev_priv->display.get_display_clock_speed = i915_get_display_clock_speed; - else if (IS_I945GM(dev_priv) || IS_845G(dev_priv)) + else if (IS_I945GM(dev_priv) || IS_I845G(dev_priv)) dev_priv->display.get_display_clock_speed = i9xx_misc_get_display_clock_speed; else if (IS_I915GM(dev_priv)) @@ -16549,8 +16549,8 @@ int intel_modeset_init(struct drm_device *dev) dev->mode_config.max_height = 8192; } - if (IS_845G(dev_priv) || IS_I865G(dev_priv)) { - dev->mode_config.cursor_width = IS_845G(dev_priv) ? 64 : 512; + if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) { + dev->mode_config.cursor_width = IS_I845G(dev_priv) ? 64 : 512; dev->mode_config.cursor_height = 1023; } else if (IS_GEN2(dev_priv)) { dev->mode_config.cursor_width = GEN2_CURSOR_WIDTH; diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index 0164130c0488..bce1ba80f277 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -139,7 +139,7 @@ static u32 get_reserved(struct intel_gmbus *bus) u32 reserved = 0; /* On most chips, these bits must be preserved in software. */ - if (!IS_I830(dev_priv) && !IS_845G(dev_priv)) + if (!IS_I830(dev_priv) && !IS_I845G(dev_priv)) reserved = I915_READ_NOTRACE(bus->gpio_reg) & (GPIO_DATA_PULLUP_DISABLE | GPIO_CLOCK_PULLUP_DISABLE); diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 90da6a707de7..354f8cec96bb 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -957,7 +957,7 @@ static int check_overlay_src(struct drm_i915_private *dev_priv, u32 tmp; /* check src dimensions */ - if (IS_845G(dev_priv) || IS_I830(dev_priv)) { + if (IS_I845G(dev_priv) || IS_I830(dev_priv)) { if (rec->src_height > IMAGE_MAX_HEIGHT_LEGACY || rec->src_width > IMAGE_MAX_WIDTH_LEGACY) return -EINVAL; @@ -1009,7 +1009,7 @@ static int check_overlay_src(struct drm_i915_private *dev_priv, return -EINVAL; /* stride checking */ - if (IS_I830(dev_priv) || IS_845G(dev_priv)) + if (IS_I830(dev_priv) || IS_I845G(dev_priv)) stride_mask = 255; else stride_mask = 63; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index bc18a4f2643d..0b7d13b6e228 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1912,7 +1912,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) * of the buffer. */ ring->effective_size = size; - if (IS_I830(engine->i915) || IS_845G(engine->i915)) + if (IS_I830(engine->i915) || IS_I845G(engine->i915)) ring->effective_size -= 2 * CACHELINE_BYTES; ring->last_retired_head = -1; @@ -2608,7 +2608,7 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->emit_bb_start = gen6_emit_bb_start; else if (INTEL_GEN(dev_priv) >= 4) engine->emit_bb_start = i965_emit_bb_start; - else if (IS_I830(dev_priv) || IS_845G(dev_priv)) + else if (IS_I830(dev_priv) || IS_I845G(dev_priv)) engine->emit_bb_start = i830_emit_bb_start; else engine->emit_bb_start = i915_emit_bb_start; -- cgit From e8a9c58fcd9a5081f71f57f370af1347ed6a310b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 18 Dec 2016 15:37:20 +0000 Subject: drm/i915: Unify active context tracking between legacy/execlists/guc The requests conversion introduced a nasty bug where we could generate a new request in the middle of constructing a request if we needed to idle the system in order to evict space for a context. The request to idle would be executed (and waited upon) before the current one, creating a minor havoc in the seqno accounting, as we will consider the current request to already be completed (prior to deferred seqno assignment) but ring->last_retired_head would have been updated and still could allow us to overwrite the current request before execution. We also employed two different mechanisms to track the active context until it was switched out. The legacy method allowed for waiting upon an active context (it could forcibly evict any vma, including context's), but the execlists method took a step backwards by pinning the vma for the entire active lifespan of the context (the only way to evict was to idle the entire GPU, not individual contexts). However, to circumvent the tricky issue of locking (i.e. we cannot take struct_mutex at the time of i915_gem_request_submit(), where we would want to move the previous context onto the active tracker and unpin it), we take the execlists approach and keep the contexts pinned until retirement. The benefit of the execlists approach, more important for execlists than legacy, was the reduction in work in pinning the context for each request - as the context was kept pinned until idle, it could short circuit the pinning for all active contexts. We introduce new engine vfuncs to pin and unpin the context respectively. The context is pinned at the start of the request, and only unpinned when the following request is retired (this ensures that the context is idle and coherent in main memory before we unpin it). We move the engine->last_context tracking into the retirement itself (rather than during request submission) in order to allow the submission to be reordered or unwound without undue difficultly. And finally an ulterior motive for unifying context handling was to prepare for mock requests. v2: Rename to last_retired_context, split out legacy_context tracking for MI_SET_CONTEXT. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20161218153724.8439-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 4 -- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 110 +++++------------------------ drivers/gpu/drm/i915/i915_gem_request.c | 38 ++++++---- drivers/gpu/drm/i915/i915_gem_request.h | 11 --- drivers/gpu/drm/i915/i915_guc_submission.c | 11 --- drivers/gpu/drm/i915/i915_perf.c | 18 ++--- drivers/gpu/drm/i915/intel_display.c | 3 +- drivers/gpu/drm/i915/intel_engine_cs.c | 21 +++++- drivers/gpu/drm/i915/intel_lrc.c | 62 ++++++---------- drivers/gpu/drm/i915/intel_lrc.h | 5 +- drivers/gpu/drm/i915/intel_pm.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 58 +++++++++------ drivers/gpu/drm/i915/intel_ringbuffer.h | 23 +++++- 14 files changed, 151 insertions(+), 217 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 085c8ecd09b5..c89b012af914 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2446,7 +2446,6 @@ struct drm_i915_private { struct i915_perf_stream *exclusive_stream; u32 specific_ctx_id; - struct i915_vma *pinned_rcs_vma; struct hrtimer poll_check_timer; wait_queue_head_t poll_wq; @@ -3488,9 +3487,6 @@ int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); int i915_switch_context(struct drm_i915_gem_request *req); int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv); -struct i915_vma * -i915_gem_context_pin_legacy(struct i915_gem_context *ctx, - unsigned int flags); void i915_gem_context_free(struct kref *ctx_ref); struct i915_gem_context * i915_gem_context_create_gvt(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 782be625d37d..9b308af89ada 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4206,7 +4206,7 @@ static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv) enum intel_engine_id id; for_each_engine(engine, dev_priv, id) - GEM_BUG_ON(engine->last_context != dev_priv->kernel_context); + GEM_BUG_ON(engine->last_retired_context != dev_priv->kernel_context); } int i915_gem_suspend(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index a57c22659a3c..598a70d2b695 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -416,21 +416,6 @@ out: return ctx; } -static void i915_gem_context_unpin(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - if (i915.enable_execlists) { - intel_lr_context_unpin(ctx, engine); - } else { - struct intel_context *ce = &ctx->engine[engine->id]; - - if (ce->state) - i915_vma_unpin(ce->state); - - i915_gem_context_put(ctx); - } -} - int i915_gem_context_init(struct drm_i915_private *dev_priv) { struct i915_gem_context *ctx; @@ -490,10 +475,13 @@ void i915_gem_context_lost(struct drm_i915_private *dev_priv) lockdep_assert_held(&dev_priv->drm.struct_mutex); for_each_engine(engine, dev_priv, id) { - if (engine->last_context) { - i915_gem_context_unpin(engine->last_context, engine); - engine->last_context = NULL; - } + engine->legacy_active_context = NULL; + + if (!engine->last_retired_context) + continue; + + engine->context_unpin(engine, engine->last_retired_context); + engine->last_retired_context = NULL; } /* Force the GPU state to be restored on enabling */ @@ -715,7 +703,7 @@ static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt, if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) return false; - return to == engine->last_context; + return to == engine->legacy_active_context; } static bool @@ -727,11 +715,11 @@ needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, return false; /* Always load the ppgtt on first use */ - if (!engine->last_context) + if (!engine->legacy_active_context) return true; /* Same context without new entries, skip */ - if (engine->last_context == to && + if (engine->legacy_active_context == to && !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) return false; @@ -761,57 +749,20 @@ needs_pd_load_post(struct i915_hw_ppgtt *ppgtt, return false; } -struct i915_vma * -i915_gem_context_pin_legacy(struct i915_gem_context *ctx, - unsigned int flags) -{ - struct i915_vma *vma = ctx->engine[RCS].state; - int ret; - - /* Clear this page out of any CPU caches for coherent swap-in/out. - * We only want to do this on the first bind so that we do not stall - * on an active context (which by nature is already on the GPU). - */ - if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { - ret = i915_gem_object_set_to_gtt_domain(vma->obj, false); - if (ret) - return ERR_PTR(ret); - } - - ret = i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | flags); - if (ret) - return ERR_PTR(ret); - - return vma; -} - static int do_rcs_switch(struct drm_i915_gem_request *req) { struct i915_gem_context *to = req->ctx; struct intel_engine_cs *engine = req->engine; struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt; - struct i915_vma *vma; - struct i915_gem_context *from; + struct i915_gem_context *from = engine->legacy_active_context; u32 hw_flags; int ret, i; + GEM_BUG_ON(engine->id != RCS); + if (skip_rcs_switch(ppgtt, engine, to)) return 0; - /* Trying to pin first makes error handling easier. */ - vma = i915_gem_context_pin_legacy(to, 0); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - /* - * Pin can switch back to the default context if we end up calling into - * evict_everything - as a last ditch gtt defrag effort that also - * switches to the default context. Hence we need to reload from here. - * - * XXX: Doing so is painfully broken! - */ - from = engine->last_context; - if (needs_pd_load_pre(ppgtt, engine, to)) { /* Older GENs and non render rings still want the load first, * "PP_DCLV followed by PP_DIR_BASE register through Load @@ -820,7 +771,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) trace_switch_mm(engine, to); ret = ppgtt->switch_mm(ppgtt, req); if (ret) - goto err; + return ret; } if (!to->engine[RCS].initialised || i915_gem_context_is_default(to)) @@ -837,29 +788,10 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) if (to != from || (hw_flags & MI_FORCE_RESTORE)) { ret = mi_set_context(req, hw_flags); if (ret) - goto err; - } + return ret; - /* The backing object for the context is done after switching to the - * *next* context. Therefore we cannot retire the previous context until - * the next context has already started running. In fact, the below code - * is a bit suboptimal because the retiring can occur simply after the - * MI_SET_CONTEXT instead of when the next seqno has completed. - */ - if (from != NULL) { - /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the - * whole damn pipeline, we don't need to explicitly mark the - * object dirty. The only exception is that the context must be - * correct in case the object gets swapped out. Ideally we'd be - * able to defer doing this until we know the object would be - * swapped, but there is no way to do that yet. - */ - i915_vma_move_to_active(from->engine[RCS].state, req, 0); - /* state is kept alive until the next request */ - i915_vma_unpin(from->engine[RCS].state); - i915_gem_context_put(from); + engine->legacy_active_context = to; } - engine->last_context = i915_gem_context_get(to); /* GEN8 does *not* require an explicit reload if the PDPs have been * setup, and we do not wish to move them. @@ -900,10 +832,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) } return 0; - -err: - i915_vma_unpin(vma); - return ret; } /** @@ -943,12 +871,6 @@ int i915_switch_context(struct drm_i915_gem_request *req) ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); } - if (to != engine->last_context) { - if (engine->last_context) - i915_gem_context_put(engine->last_context); - engine->last_context = i915_gem_context_get(to); - } - return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index fcf22b0e2967..475d557f2301 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -206,6 +206,7 @@ void i915_gem_retire_noop(struct i915_gem_active *active, static void i915_gem_request_retire(struct drm_i915_gem_request *request) { + struct intel_engine_cs *engine = request->engine; struct i915_gem_active *active, *next; lockdep_assert_held(&request->i915->drm.struct_mutex); @@ -216,9 +217,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) trace_i915_gem_request_retire(request); - spin_lock_irq(&request->engine->timeline->lock); + spin_lock_irq(&engine->timeline->lock); list_del_init(&request->link); - spin_unlock_irq(&request->engine->timeline->lock); + spin_unlock_irq(&engine->timeline->lock); /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position @@ -266,17 +267,20 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) i915_gem_request_remove_from_client(request); - if (request->previous_context) { - if (i915.enable_execlists) - intel_lr_context_unpin(request->previous_context, - request->engine); - } - /* Retirement decays the ban score as it is a sign of ctx progress */ if (request->ctx->ban_score > 0) request->ctx->ban_score--; - i915_gem_context_put(request->ctx); + /* The backing object for the context is done after switching to the + * *next* context. Therefore we cannot retire the previous context until + * the next context has already started running. However, since we + * cannot take the required locks at i915_gem_request_submit() we + * defer the unpinning of the active context to now, retirement of + * the subsequent request. + */ + if (engine->last_retired_context) + engine->context_unpin(engine, engine->last_retired_context); + engine->last_retired_context = request->ctx; dma_fence_signal(&request->fence); @@ -524,10 +528,18 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, if (ret) return ERR_PTR(ret); - ret = reserve_global_seqno(dev_priv); + /* Pinning the contexts may generate requests in order to acquire + * GGTT space, so do this first before we reserve a seqno for + * ourselves. + */ + ret = engine->context_pin(engine, ctx); if (ret) return ERR_PTR(ret); + ret = reserve_global_seqno(dev_priv); + if (ret) + goto err_unpin; + /* Move the oldest request to the slab-cache (if not in use!) */ req = list_first_entry_or_null(&engine->timeline->requests, typeof(*req), link); @@ -593,11 +605,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, INIT_LIST_HEAD(&req->active_list); req->i915 = dev_priv; req->engine = engine; - req->ctx = i915_gem_context_get(ctx); + req->ctx = ctx; /* No zalloc, must clear what we need by hand */ req->global_seqno = 0; - req->previous_context = NULL; req->file_priv = NULL; req->batch = NULL; @@ -633,10 +644,11 @@ err_ctx: GEM_BUG_ON(!list_empty(&req->priotree.signalers_list)); GEM_BUG_ON(!list_empty(&req->priotree.waiters_list)); - i915_gem_context_put(ctx); kmem_cache_free(dev_priv->requests, req); err_unreserve: dev_priv->gt.active_requests--; +err_unpin: + engine->context_unpin(engine, ctx); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index e2b077df2da0..8569b35a332a 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -170,17 +170,6 @@ struct drm_i915_gem_request { /** Preallocate space in the ring for the emitting the request */ u32 reserved_space; - /** - * Context related to the previous request. - * As the contexts are accessed by the hardware until the switch is - * completed to a new context, the hardware may still be writing - * to the context object after the breadcrumb is visible. We must - * not unpin/unbind/prune that object whilst still active and so - * we keep the previous context pinned until the following (this) - * request is retired. - */ - struct i915_gem_context *previous_context; - /** Batch buffer related to this request if any (used for * error state dump only). */ diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index a1232034f37a..3e20fe2be811 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -534,17 +534,6 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) static void i915_guc_submit(struct drm_i915_gem_request *rq) { - struct intel_engine_cs *engine = rq->engine; - - /* We keep the previous context alive until we retire the following - * request. This ensures that any the context object is still pinned - * for any residual writes the HW makes into it on the context switch - * into the next object following the breadcrumb. Otherwise, we may - * retire the context too early. - */ - rq->previous_context = engine->last_context; - engine->last_context = rq->ctx; - i915_gem_request_submit(rq); __i915_guc_submit(rq); } diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index ae7bd0ed7b1a..da8537cb8136 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -743,7 +743,7 @@ static int i915_oa_read(struct i915_perf_stream *stream, static int oa_get_render_ctx_id(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - struct i915_vma *vma; + struct intel_engine_cs *engine = dev_priv->engine[RCS]; int ret; ret = i915_mutex_lock_interruptible(&dev_priv->drm); @@ -755,19 +755,16 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * * NB: implied RCS engine... */ - vma = i915_gem_context_pin_legacy(stream->ctx, 0); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); + ret = engine->context_pin(engine, stream->ctx); + if (ret) goto unlock; - } - - dev_priv->perf.oa.pinned_rcs_vma = vma; /* Explicitly track the ID (instead of calling i915_ggtt_offset() * on the fly) considering the difference with gen8+ and * execlists */ - dev_priv->perf.oa.specific_ctx_id = i915_ggtt_offset(vma); + dev_priv->perf.oa.specific_ctx_id = + i915_ggtt_offset(stream->ctx->engine[engine->id].state); unlock: mutex_unlock(&dev_priv->drm.struct_mutex); @@ -785,13 +782,12 @@ unlock: static void oa_put_render_ctx_id(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_engine_cs *engine = dev_priv->engine[RCS]; mutex_lock(&dev_priv->drm.struct_mutex); - i915_vma_unpin(dev_priv->perf.oa.pinned_rcs_vma); - dev_priv->perf.oa.pinned_rcs_vma = NULL; - dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; + engine->context_unpin(engine, stream->ctx); mutex_unlock(&dev_priv->drm.struct_mutex); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index bc1af87789bc..9f356ad5329e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12295,7 +12295,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func); queue_work(system_unbound_wq, &work->mmio_work); } else { - request = i915_gem_request_alloc(engine, engine->last_context); + request = i915_gem_request_alloc(engine, + dev_priv->kernel_context); if (IS_ERR(request)) { ret = PTR_ERR(request); goto cleanup_unpin; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index e8afe1185831..97bbbc3d6aa8 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -304,15 +304,30 @@ int intel_engine_init_common(struct intel_engine_cs *engine) { int ret; - ret = intel_engine_init_breadcrumbs(engine); + /* We may need to do things with the shrinker which + * require us to immediately switch back to the default + * context. This can cause a problem as pinning the + * default context also requires GTT space which may not + * be available. To avoid this we always pin the default + * context. + */ + ret = engine->context_pin(engine, engine->i915->kernel_context); if (ret) return ret; + ret = intel_engine_init_breadcrumbs(engine); + if (ret) + goto err_unpin; + ret = i915_gem_render_state_init(engine); if (ret) - return ret; + goto err_unpin; return 0; + +err_unpin: + engine->context_unpin(engine, engine->i915->kernel_context); + return ret; } /** @@ -330,6 +345,8 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); i915_gem_batch_pool_fini(&engine->batch_pool); + + engine->context_unpin(engine, engine->i915->kernel_context); } u64 intel_engine_get_active_head(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index b848b5f205ce..599afedc4494 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -512,15 +512,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) RB_CLEAR_NODE(&cursor->priotree.node); cursor->priotree.priority = INT_MAX; - /* We keep the previous context alive until we retire the - * following request. This ensures that any the context object - * is still pinned for any residual writes the HW makes into it - * on the context switch into the next object following the - * breadcrumb. Otherwise, we may retire the context too early. - */ - cursor->previous_context = engine->last_context; - engine->last_context = cursor->ctx; - __i915_gem_request_submit(cursor); last = cursor; submit = true; @@ -772,8 +763,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) /* XXX Do we need to preempt to make room for us and our deps? */ } -static int intel_lr_context_pin(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static int execlists_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) { struct intel_context *ce = &ctx->engine[engine->id]; void *vaddr; @@ -784,6 +775,12 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, if (ce->pin_count++) return 0; + if (!ce->state) { + ret = execlists_context_deferred_alloc(ctx, engine); + if (ret) + goto err; + } + ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN, PIN_OFFSET_BIAS | GUC_WOPCM_TOP | PIN_GLOBAL); if (ret) @@ -825,8 +822,8 @@ err: return ret; } -void intel_lr_context_unpin(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static void execlists_context_unpin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) { struct intel_context *ce = &ctx->engine[engine->id]; @@ -850,24 +847,17 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request struct intel_context *ce = &request->ctx->engine[engine->id]; int ret; + GEM_BUG_ON(!ce->pin_count); + /* Flush enough space to reduce the likelihood of waiting after * we start building the request - in which case we will just * have to repeat work. */ request->reserved_space += EXECLISTS_REQUEST_SIZE; - if (!ce->state) { - ret = execlists_context_deferred_alloc(request->ctx, engine); - if (ret) - return ret; - } - + GEM_BUG_ON(!ce->ring); request->ring = ce->ring; - ret = intel_lr_context_pin(request->ctx, engine); - if (ret) - return ret; - if (i915.enable_guc_submission) { /* * Check that the GuC has space for the request before @@ -876,7 +866,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request */ ret = i915_guc_wq_reserve(request); if (ret) - goto err_unpin; + goto err; } ret = intel_ring_begin(request, 0); @@ -904,8 +894,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request err_unreserve: if (i915.enable_guc_submission) i915_guc_wq_unreserve(request); -err_unpin: - intel_lr_context_unpin(request->ctx, engine); +err: return ret; } @@ -1789,13 +1778,12 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) if (engine->cleanup) engine->cleanup(engine); - intel_engine_cleanup_common(engine); - if (engine->status_page.vma) { i915_gem_object_unpin_map(engine->status_page.vma->obj); engine->status_page.vma = NULL; } - intel_lr_context_unpin(dev_priv->kernel_context, engine); + + intel_engine_cleanup_common(engine); lrc_destroy_wa_ctx_obj(engine); engine->i915 = NULL; @@ -1820,6 +1808,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) /* Default vfuncs which can be overriden by each engine. */ engine->init_hw = gen8_init_common_ring; engine->reset_hw = reset_common_ring; + + engine->context_pin = execlists_context_pin; + engine->context_unpin = execlists_context_unpin; + engine->emit_flush = gen8_emit_flush; engine->emit_breadcrumb = gen8_emit_breadcrumb; engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz; @@ -1902,18 +1894,6 @@ logical_ring_init(struct intel_engine_cs *engine) if (ret) goto error; - ret = execlists_context_deferred_alloc(dctx, engine); - if (ret) - goto error; - - /* As this is the default context, always pin it */ - ret = intel_lr_context_pin(dctx, engine); - if (ret) { - DRM_ERROR("Failed to pin context for %s: %d\n", - engine->name, ret); - goto error; - } - /* And setup the hardware status page. */ ret = lrc_setup_hws(engine, dctx->engine[engine->id].state); if (ret) { diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 7c6403243394..b5630331086a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -79,13 +79,10 @@ int intel_engines_init(struct drm_i915_private *dev_priv); #define LRC_PPHWSP_PN (LRC_GUCSHR_PN + 1) #define LRC_STATE_PN (LRC_PPHWSP_PN + 1) +struct drm_i915_private; struct i915_gem_context; uint32_t intel_lr_context_size(struct intel_engine_cs *engine); -void intel_lr_context_unpin(struct i915_gem_context *ctx, - struct intel_engine_cs *engine); - -struct drm_i915_private; void intel_lr_context_resume(struct drm_i915_private *dev_priv); uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 06e55967f180..1d4699d8fb10 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6792,7 +6792,7 @@ static void __intel_autoenable_gt_powersave(struct work_struct *work) goto out; rcs = dev_priv->engine[RCS]; - if (rcs->last_context) + if (rcs->last_retired_context) goto out; if (!rcs->init_context) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0b7d13b6e228..00ff572541b6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1939,8 +1939,26 @@ intel_ring_free(struct intel_ring *ring) kfree(ring); } -static int intel_ring_context_pin(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static int context_pin(struct i915_gem_context *ctx, unsigned int flags) +{ + struct i915_vma *vma = ctx->engine[RCS].state; + int ret; + + /* Clear this page out of any CPU caches for coherent swap-in/out. + * We only want to do this on the first bind so that we do not stall + * on an active context (which by nature is already on the GPU). + */ + if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { + ret = i915_gem_object_set_to_gtt_domain(vma->obj, false); + if (ret) + return ret; + } + + return i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | flags); +} + +static int intel_ring_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) { struct intel_context *ce = &ctx->engine[engine->id]; int ret; @@ -1951,13 +1969,15 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx, return 0; if (ce->state) { - struct i915_vma *vma; + unsigned int flags; + + flags = 0; + if (ctx == ctx->i915->kernel_context) + flags = PIN_HIGH; - vma = i915_gem_context_pin_legacy(ctx, PIN_HIGH); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); + ret = context_pin(ctx, flags); + if (ret) goto error; - } } /* The kernel context is only used as a placeholder for flushing the @@ -1978,12 +1998,13 @@ error: return ret; } -static void intel_ring_context_unpin(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static void intel_ring_context_unpin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) { struct intel_context *ce = &ctx->engine[engine->id]; lockdep_assert_held(&ctx->i915->drm.struct_mutex); + GEM_BUG_ON(ce->pin_count == 0); if (--ce->pin_count) return; @@ -2008,17 +2029,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) if (ret) goto error; - /* We may need to do things with the shrinker which - * require us to immediately switch back to the default - * context. This can cause a problem as pinning the - * default context also requires GTT space which may not - * be available. To avoid this we always pin the default - * context. - */ - ret = intel_ring_context_pin(dev_priv->kernel_context, engine); - if (ret) - goto error; - ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE); if (IS_ERR(ring)) { ret = PTR_ERR(ring); @@ -2077,8 +2087,6 @@ void intel_engine_cleanup(struct intel_engine_cs *engine) intel_engine_cleanup_common(engine); - intel_ring_context_unpin(dev_priv->kernel_context, engine); - engine->i915 = NULL; dev_priv->engine[engine->id] = NULL; kfree(engine); @@ -2099,12 +2107,15 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) { int ret; + GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count); + /* Flush enough space to reduce the likelihood of waiting after * we start building the request - in which case we will just * have to repeat work. */ request->reserved_space += LEGACY_REQUEST_SIZE; + GEM_BUG_ON(!request->engine->buffer); request->ring = request->engine->buffer; ret = intel_ring_begin(request, 0); @@ -2584,6 +2595,9 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->init_hw = init_ring_common; engine->reset_hw = reset_ring_common; + engine->context_pin = intel_ring_context_pin; + engine->context_unpin = intel_ring_context_unpin; + engine->emit_breadcrumb = i9xx_emit_breadcrumb; engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; if (i915.semaphores) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 3f43adefd1c0..4f1271821fa9 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -266,6 +266,10 @@ struct intel_engine_cs { void (*reset_hw)(struct intel_engine_cs *engine, struct drm_i915_gem_request *req); + int (*context_pin)(struct intel_engine_cs *engine, + struct i915_gem_context *ctx); + void (*context_unpin)(struct intel_engine_cs *engine, + struct i915_gem_context *ctx); int (*init_context)(struct drm_i915_gem_request *req); int (*emit_flush)(struct drm_i915_gem_request *request, @@ -379,7 +383,24 @@ struct intel_engine_cs { bool preempt_wa; u32 ctx_desc_template; - struct i915_gem_context *last_context; + /* Contexts are pinned whilst they are active on the GPU. The last + * context executed remains active whilst the GPU is idle - the + * switch away and write to the context object only occurs on the + * next execution. Contexts are only unpinned on retirement of the + * following request ensuring that we can always write to the object + * on the context switch even after idling. Across suspend, we switch + * to the kernel context and trash it as the save may not happen + * before the hardware is powered down. + */ + struct i915_gem_context *last_retired_context; + + /* We track the current MI_SET_CONTEXT in order to eliminate + * redudant context switches. This presumes that requests are not + * reordered! Or when they are the tracking is updated along with + * the emission of individual requests into the legacy command + * stream (ring). + */ + struct i915_gem_context *legacy_active_context; struct intel_engine_hangcheck hangcheck; -- cgit From f73e73999d39a274adb8b342d7d8e722ffcf92d5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 18 Dec 2016 15:37:24 +0000 Subject: drm/i915: Swap if(enable_execlists) in i915_gem_request_alloc for a vfunc A fairly trivial move of a matching pair of routines (for preparing a request for construction) onto an engine vfunc. The ulterior motive is to be able to create a mock request implementation. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20161218153724.8439-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 5 +---- drivers/gpu/drm/i915/intel_lrc.c | 4 +++- drivers/gpu/drm/i915/intel_lrc.h | 2 -- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +++- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +-- 5 files changed, 8 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 475d557f2301..7427aac74923 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -622,10 +622,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; GEM_BUG_ON(req->reserved_space < engine->emit_breadcrumb_sz); - if (i915.enable_execlists) - ret = intel_logical_ring_alloc_request_extras(req); - else - ret = intel_ring_alloc_request_extras(req); + ret = engine->request_alloc(req); if (ret) goto err_ctx; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 1f8f35a94157..d322d3c11201 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -845,7 +845,7 @@ static void execlists_context_unpin(struct intel_engine_cs *engine, i915_gem_context_put(ctx); } -int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request) +static int execlists_request_alloc(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; struct intel_context *ce = &request->ctx->engine[engine->id]; @@ -1816,6 +1816,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->context_pin = execlists_context_pin; engine->context_unpin = execlists_context_unpin; + engine->request_alloc = execlists_request_alloc; + engine->emit_flush = gen8_emit_flush; engine->emit_breadcrumb = gen8_emit_breadcrumb; engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index b5630331086a..01ba36ea125e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -63,8 +63,6 @@ enum { }; /* Logical Rings */ -int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request); -int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request); void intel_logical_ring_stop(struct intel_engine_cs *engine); void intel_logical_ring_cleanup(struct intel_engine_cs *engine); int logical_render_ring_init(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 00ff572541b6..69ccf4fd22f0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2103,7 +2103,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) } } -int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) +static int ring_request_alloc(struct drm_i915_gem_request *request) { int ret; @@ -2598,6 +2598,8 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->context_pin = intel_ring_context_pin; engine->context_unpin = intel_ring_context_unpin; + engine->request_alloc = ring_request_alloc; + engine->emit_breadcrumb = i9xx_emit_breadcrumb; engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; if (i915.semaphores) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 4f1271821fa9..0969de7d5ab7 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -270,6 +270,7 @@ struct intel_engine_cs { struct i915_gem_context *ctx); void (*context_unpin)(struct intel_engine_cs *engine, struct i915_gem_context *ctx); + int (*request_alloc)(struct drm_i915_gem_request *req); int (*init_context)(struct drm_i915_gem_request *req); int (*emit_flush)(struct drm_i915_gem_request *request, @@ -491,8 +492,6 @@ void intel_engine_cleanup(struct intel_engine_cs *engine); void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); -int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request); - int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); -- cgit From d3ef1af6fdb67705d4569ac4b6ee11d91f6ab794 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 23 Dec 2016 15:56:21 -0800 Subject: drm/i915: request ring to be pinned above GUC_WOPCM_TOP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GuC will validate the ring offset and fail if it is in the [0, GUC_WOPCM_TOP) range. The bias is conditionally applied only if GuC loading is enabled (we can't check for guc submission enabled as in other cases because HuC loading requires this fix). Note that the default context is processed before enable_guc_loading is sanitized, so we might still apply the bias to its ring even if it is not needed. v2: compute the value during ctx init and pass it to intel_ring_pin (Chris), updated commit message Signed-off-by: Daniele Ceraolo Spurio Cc: Chris Wilson Cc: Michal Wajdeczko Cc: Arkadiusz Hiler Cc: Anusha Srivatsa Cc: MichaƂ Winiarski Link: http://patchwork.freedesktop.org/patch/msgid/1482537382-28584-1-git-send-email-daniele.ceraolospurio@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_context.c | 9 +++++++++ drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 11 +++++++---- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 5 files changed, 19 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 32e2d7431025..5194686ea524 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1087,6 +1087,7 @@ struct i915_gem_context { int priority; /* greater priorities are serviced first */ u32 ggtt_alignment; + u32 ggtt_offset_bias; struct intel_context { struct i915_vma *state; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 15b25c1b1f4d..48e4ed5bb209 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -335,6 +335,15 @@ __create_hw_context(struct drm_i915_private *dev_priv, GEN8_CTX_ADDRESSING_MODE_SHIFT; ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier); + /* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC is not + * present or not in use we still need a small bias as ring wraparound + * at offset 0 sometimes hangs. No idea why. + */ + if (HAS_GUC(dev_priv) && i915.enable_guc_loading) + ctx->ggtt_offset_bias = GUC_WOPCM_TOP; + else + ctx->ggtt_offset_bias = 4096; + return ctx; err_pid: diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d322d3c11201..cec9037baf65 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -796,7 +796,7 @@ static int execlists_context_pin(struct intel_engine_cs *engine, goto unpin_vma; } - ret = intel_ring_pin(ce->ring); + ret = intel_ring_pin(ce->ring, ctx->ggtt_offset_bias); if (ret) goto unpin_map; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 69ccf4fd22f0..e84d488e167a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1805,10 +1805,9 @@ static int init_phys_status_page(struct intel_engine_cs *engine) return 0; } -int intel_ring_pin(struct intel_ring *ring) +int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias) { - /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ - unsigned int flags = PIN_GLOBAL | PIN_OFFSET_BIAS | 4096; + unsigned int flags; enum i915_map_type map; struct i915_vma *vma = ring->vma; void *addr; @@ -1818,6 +1817,9 @@ int intel_ring_pin(struct intel_ring *ring) map = HAS_LLC(ring->engine->i915) ? I915_MAP_WB : I915_MAP_WC; + flags = PIN_GLOBAL; + if (offset_bias) + flags |= PIN_OFFSET_BIAS | offset_bias; if (vma->obj->stolen) flags |= PIN_MAPPABLE; @@ -2046,7 +2048,8 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) goto error; } - ret = intel_ring_pin(ring); + /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ + ret = intel_ring_pin(ring, 4096); if (ret) { intel_ring_free(ring); goto error; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 0969de7d5ab7..79c2b8d72322 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -483,7 +483,7 @@ intel_write_status_page(struct intel_engine_cs *engine, struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, int size); -int intel_ring_pin(struct intel_ring *ring); +int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias); void intel_ring_unpin(struct intel_ring *ring); void intel_ring_free(struct intel_ring *ring); -- cgit From 984ff29f74c0c130b43f0c5b0fe0fbca5de0fddc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jan 2017 15:20:13 +0000 Subject: drm/i915: Simplify testing for am-I-the-kernel-context? The kernel context (dev_priv->kernel_context) is unique in that it is not associated with any user filp - it is the only one with ctx->file_priv == NULL. This is a simpler test than comparing it against dev_priv->kernel_context which involves some pointer dancing. In checking that this is true, we notice that the gvt context is allocating itself a i915_hw_ppgtt it doesn't use and not flagging that its file_priv should be invalid. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170106152013.24684-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 13 ++++++++++++- drivers/gpu/drm/i915/i915_gem_context.h | 5 +++++ drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++-- 5 files changed, 21 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e60467271898..f671d052312a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4230,7 +4230,7 @@ static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv) enum intel_engine_id id; for_each_engine(engine, dev_priv, id) - GEM_BUG_ON(engine->last_retired_context != dev_priv->kernel_context); + GEM_BUG_ON(!i915_gem_context_is_kernel(engine->last_retired_context)); } int i915_gem_suspend(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 07ac81103f44..40a6939e3956 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -413,14 +413,17 @@ i915_gem_context_create_gvt(struct drm_device *dev) if (ret) return ERR_PTR(ret); - ctx = i915_gem_create_context(to_i915(dev), NULL); + ctx = __create_hw_context(to_i915(dev), NULL); if (IS_ERR(ctx)) goto out; + ctx->file_priv = ERR_PTR(-EBADF); i915_gem_context_set_closed(ctx); /* not user accessible */ i915_gem_context_clear_bannable(ctx); i915_gem_context_set_force_single_submission(ctx); ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ + + GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); out: mutex_unlock(&dev->struct_mutex); return ctx; @@ -472,6 +475,8 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv) ctx->priority = I915_PRIORITY_MIN; /* lowest priority; idle task */ dev_priv->kernel_context = ctx; + GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); + DRM_DEBUG_DRIVER("%s context support initialized\n", i915.enable_execlists ? "LR" : dev_priv->hw_context_size ? "HW" : "fake"); @@ -524,6 +529,8 @@ void i915_gem_context_fini(struct drm_i915_private *dev_priv) lockdep_assert_held(&dev_priv->drm.struct_mutex); + GEM_BUG_ON(!i915_gem_context_is_kernel(dctx)); + context_close(dctx); dev_priv->kernel_context = NULL; @@ -549,6 +556,8 @@ int i915_gem_context_open(struct drm_device *dev, struct drm_file *file) ctx = i915_gem_create_context(to_i915(dev), file_priv); mutex_unlock(&dev->struct_mutex); + GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); + if (IS_ERR(ctx)) { idr_destroy(&file_priv->context_idr); return PTR_ERR(ctx); @@ -968,6 +977,8 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(ctx)) return PTR_ERR(ctx); + GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); + args->ctx_id = ctx->user_handle; DRM_DEBUG("HW context %d created\n", args->ctx_id); diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 89f6764fb338..0ac750b90f3d 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -246,6 +246,11 @@ static inline bool i915_gem_context_is_default(const struct i915_gem_context *c) return c->user_handle == DEFAULT_CONTEXT_HANDLE; } +static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) +{ + return !ctx->file_priv; +} + /* i915_gem_context.c */ int __must_check i915_gem_context_init(struct drm_i915_private *dev_priv); void i915_gem_context_lost(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index a9eefb171170..6db246ad2f13 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -786,7 +786,7 @@ static int execlists_context_pin(struct intel_engine_cs *engine, flags = PIN_GLOBAL; if (ctx->ggtt_offset_bias) flags |= PIN_OFFSET_BIAS | ctx->ggtt_offset_bias; - if (ctx == ctx->i915->kernel_context) + if (i915_gem_context_is_kernel(ctx)) flags |= PIN_HIGH; ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN, flags); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e84d488e167a..0971ac396b60 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1974,7 +1974,7 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine, unsigned int flags; flags = 0; - if (ctx == ctx->i915->kernel_context) + if (i915_gem_context_is_kernel(ctx)) flags = PIN_HIGH; ret = context_pin(ctx, flags); @@ -1989,7 +1989,7 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine, * as during eviction we cannot allocate and pin the renderstate in * order to initialise the context. */ - if (ctx == ctx->i915->kernel_context) + if (i915_gem_context_is_kernel(ctx)) ce->initialised = true; i915_gem_context_get(ctx); -- cgit From f51455d442c0fa97e4600960f19bf23b66f0b386 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 10 Jan 2017 14:47:34 +0000 Subject: drm/i915: Replace 4096 with PAGE_SIZE or I915_GTT_PAGE_SIZE Start converting over from the byte count to its semantic macro, either we want to allocate the size of a physical page in main memory or we want the size of a virtual page in the GTT. 4096 could mean either, but PAGE_SIZE and I915_GTT_PAGE_SIZE are explicit and should help improve code comprehension and future changes. In the future, we may want to use variable GTT page sizes and so have the challenge of knowing which hardcoded values were used to represent a physical page vs the virtual page. v2: Look for a few more 4096s to convert, discover IS_ALIGNED(). v3: 4096ul paranoia, make fence alignment a distinct value of 4096, keep bdw stolen w/a as 4096 until we know better. v4: Add asserts that i915_vma_insert() start/end are aligned to GTT page sizes. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170110144734.26052-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 7 ++++--- drivers/gpu/drm/i915/i915_gem_evict.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 ++--- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 12 ++++++------ drivers/gpu/drm/i915/i915_gem_fence_reg.h | 2 ++ drivers/gpu/drm/i915/i915_gem_gtt.c | 10 +++++----- drivers/gpu/drm/i915/i915_gem_gtt.h | 5 ++++- drivers/gpu/drm/i915/i915_gem_render_state.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_stolen.c | 5 +++-- drivers/gpu/drm/i915/i915_gem_tiling.c | 13 ++++++++----- drivers/gpu/drm/i915/i915_vma.c | 21 ++++++++++++++------- drivers/gpu/drm/i915/intel_lrc.c | 5 +++-- drivers/gpu/drm/i915/intel_lrc.h | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 10 +++++----- 15 files changed, 62 insertions(+), 45 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e5d96de61c14..3bf517e2430a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3496,7 +3496,7 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) return; if (--vma->obj->pin_display == 0) - vma->display_alignment = 4096; + vma->display_alignment = I915_GTT_MIN_ALIGNMENT; /* Bump the LRU to try and avoid premature eviction whilst flipping */ if (!i915_vma_is_active(vma)) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 40a6939e3956..ed31133b3ce3 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -97,7 +97,7 @@ * part. It should be safe to decrease this, but it's more future proof as is. */ #define GEN6_CONTEXT_ALIGN (64<<10) -#define GEN7_CONTEXT_ALIGN 4096 +#define GEN7_CONTEXT_ALIGN I915_GTT_MIN_ALIGNMENT static size_t get_context_alignment(struct drm_i915_private *dev_priv) { @@ -341,7 +341,7 @@ __create_hw_context(struct drm_i915_private *dev_priv, if (HAS_GUC(dev_priv) && i915.enable_guc_loading) ctx->ggtt_offset_bias = GUC_WOPCM_TOP; else - ctx->ggtt_offset_bias = 4096; + ctx->ggtt_offset_bias = I915_GTT_PAGE_SIZE; return ctx; @@ -456,7 +456,8 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv) dev_priv->hw_context_size = 0; } else if (HAS_HW_CONTEXTS(dev_priv)) { dev_priv->hw_context_size = - round_up(get_context_size(dev_priv), 4096); + round_up(get_context_size(dev_priv), + I915_GTT_PAGE_SIZE); if (dev_priv->hw_context_size > (1<<20)) { DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n", dev_priv->hw_context_size); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 026ebc5a452a..6a5415e31acf 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -264,9 +264,9 @@ int i915_gem_evict_for_vma(struct i915_vma *target, unsigned int flags) if (check_color) { /* Expand search to cover neighbouring guard pages (or lack!) */ if (start > target->vm->start) - start -= 4096; + start -= I915_GTT_PAGE_SIZE; if (end < target->vm->start + target->vm->total) - end += 4096; + end += I915_GTT_PAGE_SIZE; } drm_mm_for_each_node_in_range(node, &target->vm->mm, start, end) { diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a5fe299da1d3..259fe4aa8d41 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -438,7 +438,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, memset(&cache->node, 0, sizeof(cache->node)); ret = drm_mm_insert_node_in_range_generic (&ggtt->base.mm, &cache->node, - 4096, 0, I915_COLOR_UNEVICTABLE, + PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, DRM_MM_SEARCH_DEFAULT, DRM_MM_CREATE_DEFAULT); @@ -851,8 +851,7 @@ eb_vma_misplaced(struct i915_vma *vma) WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !i915_vma_is_ggtt(vma)); - if (entry->alignment && - vma->node.start & (entry->alignment - 1)) + if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)) return true; if (vma->node.size < entry->pad_to_size) diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 9e65696a960c..fadbe8f4c745 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -80,11 +80,11 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, unsigned int stride = i915_gem_object_get_stride(vma->obj); GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); - GEM_BUG_ON(vma->node.start & 4095); - GEM_BUG_ON(vma->fence_size & 4095); - GEM_BUG_ON(stride & 127); + GEM_BUG_ON(!IS_ALIGNED(vma->node.start, I965_FENCE_PAGE)); + GEM_BUG_ON(!IS_ALIGNED(vma->fence_size, I965_FENCE_PAGE)); + GEM_BUG_ON(!IS_ALIGNED(stride, 128)); - val = (vma->node.start + vma->fence_size - 4096) << 32; + val = (vma->node.start + vma->fence_size - I965_FENCE_PAGE) << 32; val |= vma->node.start; val |= (u64)((stride / 128) - 1) << fence_pitch_shift; if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y) @@ -127,7 +127,7 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *fence, GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); GEM_BUG_ON(vma->node.start & ~I915_FENCE_START_MASK); GEM_BUG_ON(!is_power_of_2(vma->fence_size)); - GEM_BUG_ON(vma->node.start & (vma->fence_size - 1)); + GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_size)); if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915)) stride /= 128; @@ -166,7 +166,7 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *fence, GEM_BUG_ON(vma->node.start & ~I830_FENCE_START_MASK); GEM_BUG_ON(!is_power_of_2(vma->fence_size)); GEM_BUG_ON(!is_power_of_2(stride / 128)); - GEM_BUG_ON(vma->node.start & (vma->fence_size - 1)); + GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_size)); val = vma->node.start; if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y) diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h index 22c4a2d01adf..99a31ded4dfd 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h @@ -30,6 +30,8 @@ struct drm_i915_private; struct i915_vma; +#define I965_FENCE_PAGE 4096UL + struct drm_i915_fence_reg { struct list_head link; struct drm_i915_private *i915; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2d7ab1d35e47..8aca11f5f446 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -329,7 +329,7 @@ static int __setup_page_dma(struct drm_i915_private *dev_priv, return -ENOMEM; p->daddr = dma_map_page(kdev, - p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL); + p->page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); if (dma_mapping_error(kdev, p->daddr)) { __free_page(p->page); @@ -353,7 +353,7 @@ static void cleanup_page_dma(struct drm_i915_private *dev_priv, if (WARN_ON(!p->page)) return; - dma_unmap_page(&pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL); + dma_unmap_page(&pdev->dev, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); __free_page(p->page); memset(p, 0, sizeof(*p)); } @@ -2711,11 +2711,11 @@ static void i915_gtt_color_adjust(const struct drm_mm_node *node, u64 *end) { if (node->color != color) - *start += 4096; + *start += I915_GTT_PAGE_SIZE; node = list_next_entry(node, node_list); if (node->allocated && node->color != color) - *end -= 4096; + *end -= I915_GTT_PAGE_SIZE; } int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) @@ -2742,7 +2742,7 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) /* Reserve a mappable slot for our lockless error capture */ ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, &ggtt->error_capture, - 4096, 0, + PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, 0, 0); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 9e91d7e6149c..34a4fd560fa2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -40,6 +40,9 @@ #include "i915_gem_timeline.h" #include "i915_gem_request.h" +#define I915_GTT_PAGE_SIZE 4096UL +#define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE + #define I915_FENCE_REG_NONE -1 #define I915_MAX_NUM_FENCES 32 /* 32 fences + sign bit for FENCE_REG_NONE */ @@ -543,6 +546,6 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, #define PIN_HIGH BIT(9) #define PIN_OFFSET_BIAS BIT(10) #define PIN_OFFSET_FIXED BIT(11) -#define PIN_OFFSET_MASK (~4095) +#define PIN_OFFSET_MASK (-I915_GTT_PAGE_SIZE) #endif diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 5af19b0bf713..63ae7e813335 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -187,14 +187,14 @@ int i915_gem_render_state_init(struct intel_engine_cs *engine) if (!rodata) return 0; - if (rodata->batch_items * 4 > 4096) + if (rodata->batch_items * 4 > PAGE_SIZE) return -EINVAL; so = kmalloc(sizeof(*so), GFP_KERNEL); if (!so) return -ENOMEM; - obj = i915_gem_object_create_internal(engine->i915, 4096); + obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto err_free; diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index f1a1d33febcd..e5be8e04bf3b 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -647,8 +647,9 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv stolen_offset, gtt_offset, size); /* KISS and expect everything to be page-aligned */ - if (WARN_ON(size == 0) || WARN_ON(size & 4095) || - WARN_ON(stolen_offset & 4095)) + if (WARN_ON(size == 0) || + WARN_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)) || + WARN_ON(!IS_ALIGNED(stolen_offset, I915_GTT_MIN_ALIGNMENT))) return NULL; stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 4f83e331f598..b1361cfd4c5c 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -82,7 +82,7 @@ u32 i915_gem_fence_size(struct drm_i915_private *i915, if (INTEL_GEN(i915) >= 4) { stride *= i915_gem_tile_height(tiling); - GEM_BUG_ON(stride & 4095); + GEM_BUG_ON(!IS_ALIGNED(stride, I965_FENCE_PAGE)); return roundup(size, stride); } @@ -117,8 +117,11 @@ u32 i915_gem_fence_alignment(struct drm_i915_private *i915, u32 size, * Minimum alignment is 4k (GTT page size), but might be greater * if a fence register is needed for the object. */ - if (INTEL_GEN(i915) >= 4 || tiling == I915_TILING_NONE) - return 4096; + if (tiling == I915_TILING_NONE) + return I915_GTT_MIN_ALIGNMENT; + + if (INTEL_GEN(i915) >= 4) + return I965_FENCE_PAGE; /* * Previous chips need to be aligned to the size of the smallest @@ -170,7 +173,7 @@ i915_tiling_ok(struct drm_i915_gem_object *obj, else tile_width = 512; - if (stride & (tile_width - 1)) + if (!IS_ALIGNED(stride, tile_width)) return false; /* 965+ just needs multiples of tile width */ @@ -195,7 +198,7 @@ static bool i915_vma_fence_prepare(struct i915_vma *vma, return false; alignment = i915_gem_fence_alignment(i915, vma->size, tiling_mode, stride); - if (vma->node.start & (alignment - 1)) + if (!IS_ALIGNED(vma->node.start, alignment)) return false; return true; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index f137475fab51..490914f89663 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -91,7 +91,7 @@ __i915_vma_create(struct drm_i915_gem_object *obj, vma->vm = vm; vma->obj = obj; vma->size = obj->base.size; - vma->display_alignment = 4096; + vma->display_alignment = I915_GTT_MIN_ALIGNMENT; if (view) { vma->ggtt_view = *view; @@ -115,7 +115,7 @@ __i915_vma_create(struct drm_i915_gem_object *obj, vma->fence_size = i915_gem_fence_size(vm->i915, vma->size, i915_gem_object_get_tiling(obj), i915_gem_object_get_stride(obj)); - GEM_BUG_ON(vma->fence_size & 4095); + GEM_BUG_ON(!IS_ALIGNED(vma->fence_size, I915_GTT_MIN_ALIGNMENT)); vma->fence_alignment = i915_gem_fence_alignment(vm->i915, vma->size, i915_gem_object_get_tiling(obj), @@ -270,7 +270,8 @@ i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) if (vma->node.size < size) return true; - if (alignment && vma->node.start & (alignment - 1)) + GEM_BUG_ON(alignment && !is_power_of_2(alignment)); + if (alignment && !IS_ALIGNED(vma->node.start, alignment)) return true; if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma)) @@ -302,7 +303,7 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) return; fenceable = (vma->node.size >= vma->fence_size && - (vma->node.start & (vma->fence_alignment - 1)) == 0); + IS_ALIGNED(vma->node.start, vma->fence_alignment)); mappable = vma->node.start + vma->fence_size <= i915_vm_to_ggtt(vma->vm)->mappable_end; @@ -380,13 +381,19 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) alignment, vma->fence_alignment); } + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); + GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT)); + GEM_BUG_ON(!is_power_of_2(alignment)); + start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; + GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); end = vma->vm->total; if (flags & PIN_MAPPABLE) end = min_t(u64, end, dev_priv->ggtt.mappable_end); if (flags & PIN_ZONE_4G) - end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); + end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE); + GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); /* If binding the object/GGTT view requires more space than the entire * aperture has, reject it early before evicting everything in a vain @@ -406,7 +413,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) if (flags & PIN_OFFSET_FIXED) { u64 offset = flags & PIN_OFFSET_MASK; - if (offset & (alignment - 1) || + if (!IS_ALIGNED(offset, alignment) || range_overflows(offset, size, end)) { ret = -EINVAL; goto err_unpin; @@ -440,7 +447,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) * with zero alignment, so where possible use the optimal * path. */ - if (alignment <= 4096) + if (alignment <= I915_GTT_MIN_ALIGNMENT) alignment = 0; search_free: diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6db246ad2f13..81665a9eb43f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1927,7 +1927,7 @@ int logical_render_ring_init(struct intel_engine_cs *engine) engine->emit_breadcrumb = gen8_emit_breadcrumb_render; engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_render_sz; - ret = intel_engine_create_scratch(engine, 4096); + ret = intel_engine_create_scratch(engine, PAGE_SIZE); if (ret) return ret; @@ -2209,7 +2209,8 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, WARN_ON(ce->state); - context_size = round_up(intel_lr_context_size(engine), 4096); + context_size = round_up(intel_lr_context_size(engine), + I915_GTT_PAGE_SIZE); /* One extra page as the sharing data between driver and GuC */ context_size += PAGE_SIZE * LRC_PPHWSP_PN; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 01ba36ea125e..0c852c024227 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -26,7 +26,7 @@ #include "intel_ringbuffer.h" -#define GEN8_LR_CONTEXT_ALIGN 4096 +#define GEN8_LR_CONTEXT_ALIGN I915_GTT_MIN_ALIGNMENT /* Execlists regs */ #define RING_ELSP(engine) _MMIO((engine)->mmio_base + 0x230) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0971ac396b60..ab83fc22d207 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1736,7 +1736,7 @@ static int init_status_page(struct intel_engine_cs *engine) void *vaddr; int ret; - obj = i915_gem_object_create_internal(engine->i915, 4096); + obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate status page\n"); return PTR_ERR(obj); @@ -1777,7 +1777,7 @@ static int init_status_page(struct intel_engine_cs *engine) engine->status_page.vma = vma; engine->status_page.ggtt_offset = i915_ggtt_offset(vma); - engine->status_page.page_addr = memset(vaddr, 0, 4096); + engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE); DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", engine->name, i915_ggtt_offset(vma)); @@ -2049,7 +2049,7 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) } /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ - ret = intel_ring_pin(ring, 4096); + ret = intel_ring_pin(ring, I915_GTT_PAGE_SIZE); if (ret) { intel_ring_free(ring); goto error; @@ -2466,7 +2466,7 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore) { struct i915_vma *vma; - obj = i915_gem_object_create(dev_priv, 4096); + obj = i915_gem_object_create(dev_priv, PAGE_SIZE); if (IS_ERR(obj)) goto err; @@ -2683,7 +2683,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) return ret; if (INTEL_GEN(dev_priv) >= 6) { - ret = intel_engine_create_scratch(engine, 4096); + ret = intel_engine_create_scratch(engine, PAGE_SIZE); if (ret) return ret; } else if (HAS_BROKEN_CS_TLB(dev_priv)) { -- cgit From 8726f2faa371514fba2f594d799db95203dfeee0 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 12 Jan 2017 12:44:54 +0200 Subject: drm/i915: Remove WaDisableLSQCROPERFforOCL KBL workaround. The WaDisableLSQCROPERFforOCL workaround has the side effect of disabling an L3SQ optimization that has huge performance implications and is unlikely to be necessary for the correct functioning of usual graphic workloads. Userspace is free to re-enable the workaround on demand, and is generally in a better position to determine whether the workaround is necessary than the DRM is (e.g. only during the execution of compute kernels that rely on both L3 fences and HDC R/W requests). The same workaround seems to apply to BDW (at least to production stepping G1) and SKL as well (the internal workaround database claims that it does for all steppings, while the BSpec workaround table only mentions pre-production steppings), but the DRM doesn't do anything beyond whitelisting the L3SQCREG4 register so userspace can enable it when it sees fit. Do the same on KBL platforms. Improves performance of the GFXBench4 gl_manhattan31 benchmark by 60%, and gl_4 (AKA car chase) by 14% on a KBL GT2 running Mesa master -- This is followed by a regression of 35% and 10% respectively for the same benchmarks and platform caused by my recent patch series switching userspace to use the dataport constant cache instead of the sampler to implement uniform pull constant loads, which caused us to hit more heavily the L3 cache (and on platforms other than KBL had the opposite effect of improving performance of the same two benchmarks). The overall effect on KBL of this change combined with the recent userspace change is respectively 4.6% and 2.6%. SynMark2 OglShMapPcf was affected by the constant cache changes (though it improved as it did on other platforms rather than regressing), but is not significantly affected by this patch (with statistical significance of 5% and sample size 20). v2: Drop some more code to avoid unused variable warning. Fixes: 738fa1b3123f ("drm/i915/kbl: Add WaDisableLSQCROPERFforOCL") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99256 Signed-off-by: Francisco Jerez Cc: Matthew Auld Cc: Eero Tamminen Cc: Jani Nikula Cc: Mika Kuoppala Cc: beignet@lists.freedesktop.org Cc: # v4.7+ Reviewed-by: Mika Kuoppala [Removed double Fixes tag] Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1484217894-20505-1-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/intel_lrc.c | 10 ---------- drivers/gpu/drm/i915/intel_ringbuffer.c | 8 -------- 2 files changed, 18 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index db714dcf92a6..8acab875fcfc 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -970,18 +970,8 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, uint32_t *batch, uint32_t index) { - struct drm_i915_private *dev_priv = engine->i915; uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES); - /* - * WaDisableLSQCROPERFforOCL:kbl - * This WA is implemented in skl_init_clock_gating() but since - * this batch updates GEN8_L3SQCREG4 with default value we need to - * set this bit here to retain the WA during flush. - */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0)) - l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS; - wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT)); wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ab83fc22d207..49fa8006c6a2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1095,14 +1095,6 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine) WA_SET_BIT_MASKED(HDC_CHICKEN0, HDC_FENCE_DEST_SLM_DISABLE); - /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes - * involving this register should also be added to WA batch as required. - */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0)) - /* WaDisableLSQCROPERFforOCL:kbl */ - I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | - GEN8_LQSC_RO_PERF_DIS); - /* WaToEnableHwFixForPushConstHWBug:kbl */ if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, -- cgit From a01cb37affb7ac698ed260c0e31d02af8df6b785 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 16 Jan 2017 15:21:30 +0000 Subject: drm/i915: Remove i915_vma_create from VMA API With the introduce of i915_vma_instance() for obtaining the VMA singleton for a (obj, vm, view) tuple, we can remove the i915_vma_create() in favour of a single entry point. We do incur a lookup onto an empty tree, but the i915_vma_create() were being called infrequently and during initialisation, so the small overhead is negligible. v2: Drop the i915_ prefix from the now static vma_create() function Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170116152131.18089-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/i915_gem_render_state.c | 2 +- drivers/gpu/drm/i915/i915_guc_submission.c | 2 +- drivers/gpu/drm/i915/i915_vma.c | 35 ++++------------------------ drivers/gpu/drm/i915/i915_vma.h | 5 ---- drivers/gpu/drm/i915/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 4 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 ++--- 8 files changed, 13 insertions(+), 45 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 0a4728fdecdc..17f90c618208 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -269,7 +269,7 @@ __create_hw_context(struct drm_i915_private *dev_priv, goto err_out; } - vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); + vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); if (IS_ERR(vma)) { i915_gem_object_put(obj); ret = PTR_ERR(vma); diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 63ae7e813335..b42c81b42487 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -200,7 +200,7 @@ int i915_gem_render_state_init(struct intel_engine_cs *engine) goto err_free; } - so->vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + so->vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); if (IS_ERR(so->vma)) { ret = PTR_ERR(so->vma); goto err_obj; diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index c3277858cd8c..8ced9e26f075 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -566,7 +566,7 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) if (IS_ERR(obj)) return ERR_CAST(obj); - vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); + vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); if (IS_ERR(vma)) goto err; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index b4d7b51266d2..cb415bfe22d7 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -69,16 +69,14 @@ i915_vma_retire(struct i915_gem_active *active, } static struct i915_vma * -__i915_vma_create(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view) +vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) { struct i915_vma *vma; struct rb_node *rb, **p; int i; - GEM_BUG_ON(vm->closed); - vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); if (vma == NULL) return ERR_PTR(-ENOMEM); @@ -186,31 +184,6 @@ i915_vma_lookup(struct drm_i915_gem_object *obj, return NULL; } -/** - * i915_vma_create - creates a VMA - * @obj: parent &struct drm_i915_gem_object to be mapped - * @vm: address space in which the mapping is located - * @view: additional mapping requirements - * - * i915_vma_create() allocates a new VMA of the @obj in the @vm with - * @view characteristics. - * - * Must be called with struct_mutex held. - * - * Returns the vma if found, or an error pointer. - */ -struct i915_vma * -i915_vma_create(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view) -{ - lockdep_assert_held(&obj->base.dev->struct_mutex); - GEM_BUG_ON(view && !i915_is_ggtt(vm)); - GEM_BUG_ON(i915_vma_lookup(obj, vm, view)); - - return __i915_vma_create(obj, vm, view); -} - /** * i915_vma_instance - return the singleton instance of the VMA * @obj: parent &struct drm_i915_gem_object to be mapped @@ -239,7 +212,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj, vma = i915_vma_lookup(obj, vm, view); if (!vma) - vma = i915_vma_create(obj, vm, view); + vma = vma_create(obj, vm, view); GEM_BUG_ON(!IS_ERR(vma) && i915_vma_is_closed(vma)); GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view)); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index b3c81190b4a0..82a56193985c 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -111,11 +111,6 @@ struct i915_vma { struct drm_i915_gem_exec_object2 *exec_entry; }; -struct i915_vma * -i915_vma_create(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view); - struct i915_vma * i915_vma_lookup(struct drm_i915_gem_object *obj, struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 97bbbc3d6aa8..371acf109e34 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -264,7 +264,7 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) return PTR_ERR(obj); } - vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err_unref; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8f8dcd9a9524..432ee495dec2 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1225,7 +1225,7 @@ static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size) if (IS_ERR(obj)) return PTR_ERR(obj); - vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err; @@ -2198,7 +2198,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, return PTR_ERR(ctx_obj); } - vma = i915_vma_create(ctx_obj, &ctx->i915->ggtt.base, NULL); + vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto error_deref_obj; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 49fa8006c6a2..69035e4f9b3b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1738,7 +1738,7 @@ static int init_status_page(struct intel_engine_cs *engine) if (ret) goto err; - vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err; @@ -1872,7 +1872,7 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) /* mark ring buffers as read-only from GPU side by default */ obj->gt_ro = 1; - vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); + vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); if (IS_ERR(vma)) goto err; @@ -2462,7 +2462,7 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, if (IS_ERR(obj)) goto err; - vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); + vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); if (IS_ERR(vma)) goto err_obj; -- cgit From ec62ed3e1d93843b382c222bc0d81546f12c97b8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Feb 2017 15:24:37 +0000 Subject: drm/i915: Restore context and pd for ringbuffer submission after reset Following a reset, the context and page directory registers are lost. However, the queue of requests that we resubmit after the reset may depend upon them - the registers are restored from a context image, but that restore may be inhibited and may simply be absent from the request if it was in the middle of a sequence using the same context. If we prime the CCID/PD registers with the first request in the queue (even for the hung request), we prevent invalid memory access for the following requests (and continually hung engines). v2: Magic BIT(8), reserved for future use but still appears unused. v3: Some commentary on handling innocent vs guilty requests v4: Add a wait for PD_BASE fetch. The reload appears to be instant on my Ivybridge, but this bit probably exists for a reason. Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170207152437.4252-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala (cherry picked from commit c0dcb203fb009678e5be9e7782329dcfbbf16439) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 18 ++++------ drivers/gpu/drm/i915/i915_reg.h | 6 ++-- drivers/gpu/drm/i915/intel_lrc.c | 16 ++++++++- drivers/gpu/drm/i915/intel_ringbuffer.c | 58 +++++++++++++++++++++++++++++++-- 4 files changed, 81 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c8689892a89f..c7eba361c14d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2735,21 +2735,17 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) engine->irq_seqno_barrier(engine); request = i915_gem_find_active_request(engine); - if (!request) - return; - - if (!i915_gem_reset_request(request)) - return; + if (request && i915_gem_reset_request(request)) { + DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", + engine->name, request->global_seqno); - DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", - engine->name, request->global_seqno); + /* If this context is now banned, skip all pending requests. */ + if (i915_gem_context_is_banned(request->ctx)) + engine_skip_context(request); + } /* Setup the CS to resume from the breadcrumb of the hung request */ engine->reset_hw(engine, request); - - /* If this context is now banned, skip all of its pending requests. */ - if (i915_gem_context_is_banned(request->ctx)) - engine_skip_context(request); } void i915_gem_reset_finish(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 72f9f36ae5ce..675323189f2c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3307,8 +3307,10 @@ enum skl_disp_power_wells { /* * Logical Context regs */ -#define CCID _MMIO(0x2180) -#define CCID_EN (1<<0) +#define CCID _MMIO(0x2180) +#define CCID_EN BIT(0) +#define CCID_EXTENDED_STATE_RESTORE BIT(2) +#define CCID_EXTENDED_STATE_SAVE BIT(3) /* * Notes on SNB/IVB/VLV context size: * - Power context is saved elsewhere (LLC or stolen) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 2e767ebff084..ebf8023d21e6 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1390,7 +1390,20 @@ static void reset_common_ring(struct intel_engine_cs *engine, { struct drm_i915_private *dev_priv = engine->i915; struct execlist_port *port = engine->execlist_port; - struct intel_context *ce = &request->ctx->engine[engine->id]; + struct intel_context *ce; + + /* If the request was innocent, we leave the request in the ELSP + * and will try to replay it on restarting. The context image may + * have been corrupted by the reset, in which case we may have + * to service a new GPU hang, but more likely we can continue on + * without impact. + * + * If the request was guilty, we presume the context is corrupt + * and have to at least restore the RING register in the context + * image back to the expected values to skip over the guilty request. + */ + if (!request || request->fence.error != -EIO) + return; /* We want a simple context + ring to execute the breadcrumb update. * We cannot rely on the context being intact across the GPU hang, @@ -1399,6 +1412,7 @@ static void reset_common_ring(struct intel_engine_cs *engine, * future request will be after userspace has had the opportunity * to recreate its own state. */ + ce = &request->ctx->engine[engine->id]; execlists_init_reg_state(ce->lrc_reg_state, request->ctx, engine, ce->ring); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 69035e4f9b3b..91bc4abf5d3e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -599,10 +599,62 @@ out: static void reset_ring_common(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { - struct intel_ring *ring = request->ring; + /* Try to restore the logical GPU state to match the continuation + * of the request queue. If we skip the context/PD restore, then + * the next request may try to execute assuming that its context + * is valid and loaded on the GPU and so may try to access invalid + * memory, prompting repeated GPU hangs. + * + * If the request was guilty, we still restore the logical state + * in case the next request requires it (e.g. the aliasing ppgtt), + * but skip over the hung batch. + * + * If the request was innocent, we try to replay the request with + * the restored context. + */ + if (request) { + struct drm_i915_private *dev_priv = request->i915; + struct intel_context *ce = &request->ctx->engine[engine->id]; + struct i915_hw_ppgtt *ppgtt; + + /* FIXME consider gen8 reset */ + + if (ce->state) { + I915_WRITE(CCID, + i915_ggtt_offset(ce->state) | + BIT(8) /* must be set! */ | + CCID_EXTENDED_STATE_SAVE | + CCID_EXTENDED_STATE_RESTORE | + CCID_EN); + } - ring->head = request->postfix; - ring->last_retired_head = -1; + ppgtt = request->ctx->ppgtt ?: engine->i915->mm.aliasing_ppgtt; + if (ppgtt) { + u32 pd_offset = ppgtt->pd.base.ggtt_offset << 10; + + I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); + I915_WRITE(RING_PP_DIR_BASE(engine), pd_offset); + + /* Wait for the PD reload to complete */ + if (intel_wait_for_register(dev_priv, + RING_PP_DIR_BASE(engine), + BIT(0), 0, + 10)) + DRM_ERROR("Wait for reload of ppgtt page-directory timed out\n"); + + ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); + } + + /* If the rq hung, jump to its breadcrumb and skip the batch */ + if (request->fence.error == -EIO) { + struct intel_ring *ring = request->ring; + + ring->head = request->postfix; + ring->last_retired_head = -1; + } + } else { + engine->legacy_active_context = NULL; + } } static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) -- cgit