From d7f46fc4e7323887494db13f063a8e59861fefb0 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:10:55 -0800 Subject: drm/i915: Make pin count per VMA Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e05a0216cd9b..75c8883f58c1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -549,7 +549,7 @@ init_pipe_control(struct intel_ring_buffer *ring) return 0; err_unpin: - i915_gem_object_unpin(ring->scratch.obj); + i915_gem_object_ggtt_unpin(ring->scratch.obj); err_unref: drm_gem_object_unreference(&ring->scratch.obj->base); err: @@ -625,7 +625,7 @@ static void render_ring_cleanup(struct intel_ring_buffer *ring) if (INTEL_INFO(dev)->gen >= 5) { kunmap(sg_page(ring->scratch.obj->pages->sgl)); - i915_gem_object_unpin(ring->scratch.obj); + i915_gem_object_ggtt_unpin(ring->scratch.obj); } drm_gem_object_unreference(&ring->scratch.obj->base); @@ -1250,7 +1250,7 @@ static void cleanup_status_page(struct intel_ring_buffer *ring) return; kunmap(sg_page(obj->pages->sgl)); - i915_gem_object_unpin(obj); + i915_gem_object_ggtt_unpin(obj); drm_gem_object_unreference(&obj->base); ring->status_page.obj = NULL; } @@ -1290,7 +1290,7 @@ static int init_status_page(struct intel_ring_buffer *ring) return 0; err_unpin: - i915_gem_object_unpin(obj); + i915_gem_object_ggtt_unpin(obj); err_unref: drm_gem_object_unreference(&obj->base); err: @@ -1387,7 +1387,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, err_unmap: iounmap(ring->virtual_start); err_unpin: - i915_gem_object_unpin(obj); + i915_gem_object_ggtt_unpin(obj); err_unref: drm_gem_object_unreference(&obj->base); ring->obj = NULL; @@ -1415,7 +1415,7 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) iounmap(ring->virtual_start); - i915_gem_object_unpin(ring->obj); + i915_gem_object_ggtt_unpin(ring->obj); drm_gem_object_unreference(&ring->obj->base); ring->obj = NULL; ring->preallocated_lazy_request = NULL; -- cgit From 1f70999f9052f5a1b0ce1a55aff3808f2ec9fe42 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Jan 2014 22:43:07 +0000 Subject: drm/i915: Prevent recursion by retiring requests when the ring is full MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the VM do not track activity of objects and instead use a large hammer to forcibly idle and evict all of their associated objects when one is released, it is possible for that to cause a recursion when we need to wait for free space on a ring and call retire requests. (intel_ring_begin -> intel_ring_wait_request -> i915_gem_retire_requests_ring -> i915_gem_context_free -> i915_gem_evict_vm -> i915_gpu_idle -> intel_ring_begin etc) In order to remove the requirement for calling retire-requests from intel_ring_wait_request, we have to inline a couple of steps from retiring requests, notably we have to record the position of the request we wait for and use that to update the available ring space. Signed-off-by: Chris Wilson Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index d897a19f887f..ba686d75ff32 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1430,28 +1430,16 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) cleanup_status_page(ring); } -static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno) -{ - int ret; - - ret = i915_wait_seqno(ring, seqno); - if (!ret) - i915_gem_retire_requests_ring(ring); - - return ret; -} - static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n) { struct drm_i915_gem_request *request; - u32 seqno = 0; + u32 seqno = 0, tail; int ret; - i915_gem_retire_requests_ring(ring); - if (ring->last_retired_head != -1) { ring->head = ring->last_retired_head; ring->last_retired_head = -1; + ring->space = ring_space(ring); if (ring->space >= n) return 0; @@ -1468,6 +1456,7 @@ static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n) space += ring->size; if (space >= n) { seqno = request->seqno; + tail = request->tail; break; } @@ -1482,15 +1471,11 @@ static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n) if (seqno == 0) return -ENOSPC; - ret = intel_ring_wait_seqno(ring, seqno); + ret = i915_wait_seqno(ring, seqno); if (ret) return ret; - if (WARN_ON(ring->last_retired_head == -1)) - return -ENOSPC; - - ring->head = ring->last_retired_head; - ring->last_retired_head = -1; + ring->head = tail; ring->space = ring_space(ring); if (WARN_ON(ring->space < n)) return -ENOSPC; -- cgit From fb19e2ac7cc0f4addbdb1577501a4cdfb6183e7d Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 12 Feb 2014 23:44:34 +0100 Subject: drm/i915: protect ringbuffer sarea update behind !MODESET MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoids surprises when userspace races open/closes against this. Cc: Stéphane Marchesin Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ba686d75ff32..ae6d234b8c12 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1513,7 +1513,8 @@ static int ring_wait_for_space(struct intel_ring_buffer *ring, int n) return 0; } - if (dev->primary->master) { + if (!drm_core_check_feature(dev, DRIVER_MODESET) && + dev->primary->master) { struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; if (master_priv->sarea_priv) master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; -- cgit From 1ec9e26ddab06459e89a890431b2de064c5d1056 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 14 Feb 2014 14:01:11 +0100 Subject: drm/i915: Consolidate binding parameters into flags Anything more than just one bool parameter is just a pain to read, symbolic constants are much better. Split out from Chris' vma-binding rework patch. v2: Undo the behaviour change in object_pin that Chris spotted. v3: Split out misplaced hunk to handle set_cache_level errors, spotted by Jani. v4: Keep the current over-zealous binding logic in the execbuffer code working with a quick hack while the overall binding code gets shuffled around. v5: Reorder the PIN_ flags for more natural patch splitup. v6: Pull out the PIN_GLOBAL split-up again. Cc: Chris Wilson Cc: Ben Widawsky Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 14 +++---- drivers/gpu/drm/i915/i915_gem.c | 62 ++++++++++++------------------ drivers/gpu/drm/i915/i915_gem_context.c | 9 ++--- drivers/gpu/drm/i915/i915_gem_evict.c | 10 ++--- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 19 ++++++--- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/i915_trace.h | 20 +++++----- drivers/gpu/drm/i915/intel_overlay.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 11 +++--- 10 files changed, 70 insertions(+), 81 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d33199452c11..8a6db27dd966 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2076,11 +2076,12 @@ void i915_init_vm(struct drm_i915_private *dev_priv, void i915_gem_free_object(struct drm_gem_object *obj); void i915_gem_vma_destroy(struct i915_vma *vma); +#define PIN_MAPPABLE 0x1 +#define PIN_NONBLOCK 0x2 int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, uint32_t alignment, - bool map_and_fenceable, - bool nonblocking); + unsigned flags); void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj); int __must_check i915_vma_unbind(struct i915_vma *vma); int __must_check i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj); @@ -2283,11 +2284,9 @@ i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj) static inline int __must_check i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj, uint32_t alignment, - bool map_and_fenceable, - bool nonblocking) + unsigned flags) { - return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment, - map_and_fenceable, nonblocking); + return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment, flags); } /* i915_gem_context.c */ @@ -2331,8 +2330,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev, int min_size, unsigned alignment, unsigned cache_level, - bool mappable, - bool nonblock); + unsigned flags); int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); int i915_gem_evict_everything(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index dee560267b1d..aa263e371ebc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -43,12 +43,6 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *o static __must_check int i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, bool readonly); -static __must_check int -i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - unsigned alignment, - bool map_and_fenceable, - bool nonblocking); static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, @@ -605,7 +599,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, char __user *user_data; int page_offset, page_length, ret; - ret = i915_gem_obj_ggtt_pin(obj, 0, true, true); + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); if (ret) goto out; @@ -1411,7 +1405,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } /* Now bind it into the GTT if needed */ - ret = i915_gem_obj_ggtt_pin(obj, 0, true, false); + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); if (ret) goto unlock; @@ -2721,7 +2715,6 @@ int i915_vma_unbind(struct i915_vma *vma) if (!drm_mm_node_allocated(&vma->node)) { i915_gem_vma_destroy(vma); - return 0; } @@ -3219,14 +3212,13 @@ static int i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct i915_address_space *vm, unsigned alignment, - bool map_and_fenceable, - bool nonblocking) + unsigned flags) { struct drm_device *dev = obj->base.dev; drm_i915_private_t *dev_priv = dev->dev_private; u32 size, fence_size, fence_alignment, unfenced_alignment; size_t gtt_max = - map_and_fenceable ? dev_priv->gtt.mappable_end : vm->total; + flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; struct i915_vma *vma; int ret; @@ -3238,18 +3230,18 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, obj->tiling_mode, true); unfenced_alignment = i915_gem_get_gtt_alignment(dev, - obj->base.size, - obj->tiling_mode, false); + obj->base.size, + obj->tiling_mode, false); if (alignment == 0) - alignment = map_and_fenceable ? fence_alignment : + alignment = flags & PIN_MAPPABLE ? fence_alignment : unfenced_alignment; - if (map_and_fenceable && alignment & (fence_alignment - 1)) { + if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { DRM_DEBUG("Invalid object alignment requested %u\n", alignment); return -EINVAL; } - size = map_and_fenceable ? fence_size : obj->base.size; + size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; /* If the object is bigger than the entire aperture, reject it early * before evicting everything in a vain attempt to find space. @@ -3257,7 +3249,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, if (obj->base.size > gtt_max) { DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%zu\n", obj->base.size, - map_and_fenceable ? "mappable" : "total", + flags & PIN_MAPPABLE ? "mappable" : "total", gtt_max); return -E2BIG; } @@ -3281,9 +3273,7 @@ search_free: DRM_MM_SEARCH_DEFAULT); if (ret) { ret = i915_gem_evict_something(dev, vm, size, alignment, - obj->cache_level, - map_and_fenceable, - nonblocking); + obj->cache_level, flags); if (ret == 0) goto search_free; @@ -3314,9 +3304,9 @@ search_free: obj->map_and_fenceable = mappable && fenceable; } - WARN_ON(map_and_fenceable && !obj->map_and_fenceable); + WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); - trace_i915_vma_bind(vma, map_and_fenceable); + trace_i915_vma_bind(vma, flags); i915_gem_verify_gtt(dev); return 0; @@ -3687,7 +3677,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, * (e.g. libkms for the bootup splash), we have to ensure that we * always use map_and_fenceable for all scanout buffers. */ - ret = i915_gem_obj_ggtt_pin(obj, alignment, true, false); + ret = i915_gem_obj_ggtt_pin(obj, alignment, PIN_MAPPABLE); if (ret) goto err_unpin_display; @@ -3843,30 +3833,28 @@ int i915_gem_object_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, uint32_t alignment, - bool map_and_fenceable, - bool nonblocking) + unsigned flags) { - const u32 flags = map_and_fenceable ? GLOBAL_BIND : 0; struct i915_vma *vma; int ret; - WARN_ON(map_and_fenceable && !i915_is_ggtt(vm)); + if (WARN_ON(flags & PIN_MAPPABLE && !i915_is_ggtt(vm))) + return -EINVAL; vma = i915_gem_obj_to_vma(obj, vm); - if (vma) { if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) return -EBUSY; if ((alignment && vma->node.start & (alignment - 1)) || - (map_and_fenceable && !obj->map_and_fenceable)) { + (flags & PIN_MAPPABLE && !obj->map_and_fenceable)) { WARN(vma->pin_count, "bo is already pinned with incorrect alignment:" " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d," " obj->map_and_fenceable=%d\n", i915_gem_obj_offset(obj, vm), alignment, - map_and_fenceable, + flags & PIN_MAPPABLE, obj->map_and_fenceable); ret = i915_vma_unbind(vma); if (ret) @@ -3875,9 +3863,7 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, } if (!i915_gem_obj_bound(obj, vm)) { - ret = i915_gem_object_bind_to_vm(obj, vm, alignment, - map_and_fenceable, - nonblocking); + ret = i915_gem_object_bind_to_vm(obj, vm, alignment, flags); if (ret) return ret; @@ -3885,10 +3871,12 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, vma = i915_gem_obj_to_vma(obj, vm); - vma->bind_vma(vma, obj->cache_level, flags); + vma->bind_vma(vma, obj->cache_level, + flags & PIN_MAPPABLE ? GLOBAL_BIND : 0); i915_gem_obj_to_vma(obj, vm)->pin_count++; - obj->pin_mappable |= map_and_fenceable; + if (flags & PIN_MAPPABLE) + obj->pin_mappable |= true; return 0; } @@ -3946,7 +3934,7 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data, } if (obj->user_pin_count == 0) { - ret = i915_gem_obj_ggtt_pin(obj, args->alignment, true, false); + ret = i915_gem_obj_ggtt_pin(obj, args->alignment, PIN_MAPPABLE); if (ret) goto out; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 19fd3629795c..f8c21a6dd663 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -258,8 +258,7 @@ i915_gem_create_context(struct drm_device *dev, * context. */ ret = i915_gem_obj_ggtt_pin(ctx->obj, - get_context_alignment(dev), - false, false); + get_context_alignment(dev), 0); if (ret) { DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret); goto err_destroy; @@ -335,8 +334,7 @@ void i915_gem_context_reset(struct drm_device *dev) if (i == RCS) { WARN_ON(i915_gem_obj_ggtt_pin(dctx->obj, - get_context_alignment(dev), - false, false)); + get_context_alignment(dev), 0)); /* Fake a finish/inactive */ dctx->obj->base.write_domain = 0; dctx->obj->active = 0; @@ -612,8 +610,7 @@ static int do_switch(struct intel_ring_buffer *ring, /* Trying to pin first makes error handling easier. */ if (ring == &dev_priv->ring[RCS]) { ret = i915_gem_obj_ggtt_pin(to->obj, - get_context_alignment(ring->dev), - false, false); + get_context_alignment(ring->dev), 0); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 5168d6a08054..8a78f7885cba 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -68,7 +68,7 @@ mark_free(struct i915_vma *vma, struct list_head *unwind) int i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, int min_size, unsigned alignment, unsigned cache_level, - bool mappable, bool nonblocking) + unsigned flags) { drm_i915_private_t *dev_priv = dev->dev_private; struct list_head eviction_list, unwind_list; @@ -76,7 +76,7 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, int ret = 0; int pass = 0; - trace_i915_gem_evict(dev, min_size, alignment, mappable); + trace_i915_gem_evict(dev, min_size, alignment, flags); /* * The goal is to evict objects and amalgamate space in LRU order. @@ -102,7 +102,7 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, */ INIT_LIST_HEAD(&unwind_list); - if (mappable) { + if (flags & PIN_MAPPABLE) { BUG_ON(!i915_is_ggtt(vm)); drm_mm_init_scan_with_range(&vm->mm, min_size, alignment, cache_level, 0, @@ -117,7 +117,7 @@ search_again: goto found; } - if (nonblocking) + if (flags & PIN_NONBLOCK) goto none; /* Now merge in the soon-to-be-expired objects... */ @@ -141,7 +141,7 @@ none: /* Can we unpin some objects such as idle hw contents, * or pending flips? */ - if (nonblocking) + if (flags & PIN_NONBLOCK) return -ENOSPC; /* Only idle the GPU and repeat the search once */ diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 032def901f98..013bd5ab3913 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -544,19 +544,23 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; - bool need_fence, need_mappable; - u32 flags = (entry->flags & EXEC_OBJECT_NEEDS_GTT) && - !vma->obj->has_global_gtt_mapping ? GLOBAL_BIND : 0; + bool need_fence; + unsigned flags; int ret; + flags = 0; + need_fence = has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(vma); + if (need_fence || need_reloc_mappable(vma)) + flags |= PIN_MAPPABLE; - ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, need_mappable, - false); + if (entry->flags & EXEC_OBJECT_NEEDS_GTT) + flags |= PIN_MAPPABLE; + + ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); if (ret) return ret; @@ -585,6 +589,9 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER; } + /* Temporary hack while we rework the binding logic. */ + flags = (entry->flags & EXEC_OBJECT_NEEDS_GTT) && + !vma->obj->has_global_gtt_mapping ? GLOBAL_BIND : 0; vma->bind_vma(vma, obj->cache_level, flags); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ee38fafc6917..1dcd50541ae2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -885,7 +885,7 @@ alloc: if (ret == -ENOSPC && !retried) { ret = i915_gem_evict_something(dev, &dev_priv->gtt.base, GEN6_PD_SIZE, GEN6_PD_ALIGN, - I915_CACHE_NONE, false, true); + I915_CACHE_NONE, PIN_NONBLOCK); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 6e580c98dede..b95a380958db 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -34,15 +34,15 @@ TRACE_EVENT(i915_gem_object_create, ); TRACE_EVENT(i915_vma_bind, - TP_PROTO(struct i915_vma *vma, bool mappable), - TP_ARGS(vma, mappable), + TP_PROTO(struct i915_vma *vma, unsigned flags), + TP_ARGS(vma, flags), TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) __field(struct i915_address_space *, vm) __field(u32, offset) __field(u32, size) - __field(bool, mappable) + __field(unsigned, flags) ), TP_fast_assign( @@ -50,12 +50,12 @@ TRACE_EVENT(i915_vma_bind, __entry->vm = vma->vm; __entry->offset = vma->node.start; __entry->size = vma->node.size; - __entry->mappable = mappable; + __entry->flags = flags; ), TP_printk("obj=%p, offset=%08x size=%x%s vm=%p", __entry->obj, __entry->offset, __entry->size, - __entry->mappable ? ", mappable" : "", + __entry->flags & PIN_MAPPABLE ? ", mappable" : "", __entry->vm) ); @@ -196,26 +196,26 @@ DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy, ); TRACE_EVENT(i915_gem_evict, - TP_PROTO(struct drm_device *dev, u32 size, u32 align, bool mappable), - TP_ARGS(dev, size, align, mappable), + TP_PROTO(struct drm_device *dev, u32 size, u32 align, unsigned flags), + TP_ARGS(dev, size, align, flags), TP_STRUCT__entry( __field(u32, dev) __field(u32, size) __field(u32, align) - __field(bool, mappable) + __field(unsigned, flags) ), TP_fast_assign( __entry->dev = dev->primary->index; __entry->size = size; __entry->align = align; - __entry->mappable = mappable; + __entry->flags = flags; ), TP_printk("dev=%d, size=%d, align=%d %s", __entry->dev, __entry->size, __entry->align, - __entry->mappable ? ", mappable" : "") + __entry->flags & PIN_MAPPABLE ? ", mappable" : "") ); TRACE_EVENT(i915_gem_evict_everything, diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 424f0946d8c4..ac519cb46f22 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -1349,7 +1349,7 @@ void intel_setup_overlay(struct drm_device *dev) } overlay->flip_addr = reg_bo->phys_obj->handle->busaddr; } else { - ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, true, false); + ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, PIN_MAPPABLE); if (ret) { DRM_ERROR("failed to pin overlay register bo\n"); goto out_free_bo; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e4a0c9cc226d..136647037244 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2741,7 +2741,7 @@ intel_alloc_context_page(struct drm_device *dev) return NULL; } - ret = i915_gem_obj_ggtt_pin(ctx, 4096, true, false); + ret = i915_gem_obj_ggtt_pin(ctx, 4096, PIN_MAPPABLE); if (ret) { DRM_ERROR("failed to pin power context: %d\n", ret); goto err_unref; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ae6d234b8c12..f256d5fe46f8 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -533,7 +533,7 @@ init_pipe_control(struct intel_ring_buffer *ring) i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC); - ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, true, false); + ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0); if (ret) goto err_unref; @@ -1273,10 +1273,9 @@ static int init_status_page(struct intel_ring_buffer *ring) i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - ret = i915_gem_obj_ggtt_pin(obj, 4096, true, false); - if (ret != 0) { + ret = i915_gem_obj_ggtt_pin(obj, 4096, PIN_MAPPABLE); + if (ret) goto err_unref; - } ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj); ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl)); @@ -1356,7 +1355,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, ring->obj = obj; - ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, true, false); + ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE); if (ret) goto err_unref; @@ -1919,7 +1918,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) return -ENOMEM; } - ret = i915_gem_obj_ggtt_pin(obj, 0, true, false); + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); if (ret != 0) { drm_gem_object_unreference(&obj->base); DRM_ERROR("Failed to ping batch bo\n"); -- cgit From a9cc726c8546bc6efa43b86465570a4447a54722 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 14 Feb 2014 14:01:13 +0100 Subject: drm/i915: Handle set_cache_level errors in the pipe control scratch setup Split out from Chris vma-bind rework. Cc: Chris Wilson Cc: Ben Widawsky Cc: Jani Nikula Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index f256d5fe46f8..0fd6ba0c4418 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -531,7 +531,9 @@ init_pipe_control(struct intel_ring_buffer *ring) goto err; } - i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC); + ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC); + if (ret) + goto err_unref; ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0); if (ret) -- cgit From be1fa129f5a31e78ce0a692f0e30c2ff0fad9ebe Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 14 Feb 2014 14:01:14 +0100 Subject: drm/i915: Don't set PIN_MAPPABLE for legacy ringbuffers Tighter code since legacy gem has only mappable anyway. Split out from Chris vma-bind rework. Note that this is only possible due to the split-up of the mappable pin flag into PIN_GLOBAL and PIN_MAPPABLE. Cc: Chris Wilson Cc: Ben Widawsky Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0fd6ba0c4418..bcaa14976c58 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1920,7 +1920,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) return -ENOMEM; } - ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); + ret = i915_gem_obj_ggtt_pin(obj, 0, 0); if (ret != 0) { drm_gem_object_unreference(&obj->base); DRM_ERROR("Failed to ping batch bo\n"); -- cgit From 9a6bbb62161a3b7b124ecd8325a928778dfcd67e Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 14 Feb 2014 14:01:15 +0100 Subject: drm/i915: Don't pin the status page as mappable We access it through the cpu window. No functional difference expected atm since we default to a bottom-up allocation scheme. But that might eventually change so that we prefer the unmappable range for buffers that don't need cpu gtt access. Split out from Chris vma-bind rework. Note that this is only possible due to the split-up of the mappable pin flag into PIN_GLOBAL and PIN_MAPPABLE. Cc: Chris Wilson Cc: Ben Widawsky Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index bcaa14976c58..b1880a409251 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1275,7 +1275,7 @@ static int init_status_page(struct intel_ring_buffer *ring) i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - ret = i915_gem_obj_ggtt_pin(obj, 4096, PIN_MAPPABLE); + ret = i915_gem_obj_ggtt_pin(obj, 4096, 0); if (ret) goto err_unref; -- cgit From e01f69295b32dfd756d768a1fdae124108bfec04 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 14 Feb 2014 14:01:16 +0100 Subject: drm/i915: Handle set_cache_level errors in the status page setup Split out from Chris vma-bind rework. Cc: Chris Wilson Cc: Ben Widawsky Reviewed-by: Jani Nikula Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b1880a409251..76162acf7015 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1273,7 +1273,9 @@ static int init_status_page(struct intel_ring_buffer *ring) goto err; } - i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); + ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); + if (ret) + goto err_unref; ret = i915_gem_obj_ggtt_pin(obj, 4096, 0); if (ret) -- cgit From 8285222c487b61c48b9b955b82598544c3c06050 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 27 Feb 2014 21:59:03 +0200 Subject: drm/i915: We implement WaDisableAsyncFlipPerfMode:bdw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ville Syrjälä Reviewed-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 76162acf7015..b2d4f4852c98 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -571,7 +571,7 @@ static int init_render_ring(struct intel_ring_buffer *ring) * to use MI_WAIT_FOR_EVENT within the CS. It should already be * programmed to '1' on all products. * - * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv + * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw */ if (INTEL_INFO(dev)->gen >= 6) I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); -- cgit From 351e3db2b3631556607d0d94fa26df8e2e0d0fd8 Mon Sep 17 00:00:00 2001 From: Brad Volkin Date: Tue, 18 Feb 2014 10:15:46 -0800 Subject: drm/i915: Implement command buffer parsing logic The command parser scans batch buffers submitted via execbuffer ioctls before the driver submits them to hardware. At a high level, it looks for several things: 1) Commands which are explicitly defined as privileged or which should only be used by the kernel driver. The parser generally rejects such commands, with the provision that it may allow some from the drm master process. 2) Commands which access registers. To support correct/enhanced userspace functionality, particularly certain OpenGL extensions, the parser provides a whitelist of registers which userspace may safely access (for both normal and drm master processes). 3) Commands which access privileged memory (i.e. GGTT, HWS page, etc). The parser always rejects such commands. See the overview comment in the source for more details. This patch only implements the logic. Subsequent patches will build the tables that drive the parser. v2: Don't set the secure bit if the parser succeeds Fail harder during init Makefile cleanup Kerneldoc cleanup Clarify module param description Convert ints to bools in a few places Move client/subclient defs to i915_reg.h Remove the bits_count field OTC-Tracker: AXIA-4631 Change-Id: I50b98c71c6655893291c78a2d1b8954577b37a30 Signed-off-by: Brad Volkin Reviewed-by: Jani Nikula [danvet: Appease checkpatch.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_cmd_parser.c | 485 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 93 ++++++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 18 ++ drivers/gpu/drm/i915/i915_params.c | 5 + drivers/gpu/drm/i915/i915_reg.h | 12 + drivers/gpu/drm/i915/intel_ringbuffer.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.h | 32 ++ 8 files changed, 648 insertions(+) create mode 100644 drivers/gpu/drm/i915/i915_cmd_parser.c (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 4850494bd548..3569122b1995 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -14,6 +14,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \ i915_gem_gtt.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ + i915_cmd_parser.o \ i915_params.o \ i915_sysfs.o \ i915_trace_points.o \ diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c new file mode 100644 index 000000000000..7a5756e9bb86 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -0,0 +1,485 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Brad Volkin + * + */ + +#include "i915_drv.h" + +/** + * DOC: i915 batch buffer command parser + * + * Motivation: + * Certain OpenGL features (e.g. transform feedback, performance monitoring) + * require userspace code to submit batches containing commands such as + * MI_LOAD_REGISTER_IMM to access various registers. Unfortunately, some + * generations of the hardware will noop these commands in "unsecure" batches + * (which includes all userspace batches submitted via i915) even though the + * commands may be safe and represent the intended programming model of the + * device. + * + * The software command parser is similar in operation to the command parsing + * done in hardware for unsecure batches. However, the software parser allows + * some operations that would be noop'd by hardware, if the parser determines + * the operation is safe, and submits the batch as "secure" to prevent hardware + * parsing. + * + * Threats: + * At a high level, the hardware (and software) checks attempt to prevent + * granting userspace undue privileges. There are three categories of privilege. + * + * First, commands which are explicitly defined as privileged or which should + * only be used by the kernel driver. The parser generally rejects such + * commands, though it may allow some from the drm master process. + * + * Second, commands which access registers. To support correct/enhanced + * userspace functionality, particularly certain OpenGL extensions, the parser + * provides a whitelist of registers which userspace may safely access (for both + * normal and drm master processes). + * + * Third, commands which access privileged memory (i.e. GGTT, HWS page, etc). + * The parser always rejects such commands. + * + * The majority of the problematic commands fall in the MI_* range, with only a + * few specific commands on each ring (e.g. PIPE_CONTROL and MI_FLUSH_DW). + * + * Implementation: + * Each ring maintains tables of commands and registers which the parser uses in + * scanning batch buffers submitted to that ring. + * + * Since the set of commands that the parser must check for is significantly + * smaller than the number of commands supported, the parser tables contain only + * those commands required by the parser. This generally works because command + * opcode ranges have standard command length encodings. So for commands that + * the parser does not need to check, it can easily skip them. This is + * implementated via a per-ring length decoding vfunc. + * + * Unfortunately, there are a number of commands that do not follow the standard + * length encoding for their opcode range, primarily amongst the MI_* commands. + * To handle this, the parser provides a way to define explicit "skip" entries + * in the per-ring command tables. + * + * Other command table entries map fairly directly to high level categories + * mentioned above: rejected, master-only, register whitelist. The parser + * implements a number of checks, including the privileged memory checks, via a + * general bitmasking mechanism. + */ + +static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) +{ + u32 client = (cmd_header & INSTR_CLIENT_MASK) >> INSTR_CLIENT_SHIFT; + u32 subclient = + (cmd_header & INSTR_SUBCLIENT_MASK) >> INSTR_SUBCLIENT_SHIFT; + + if (client == INSTR_MI_CLIENT) + return 0x3F; + else if (client == INSTR_RC_CLIENT) { + if (subclient == INSTR_MEDIA_SUBCLIENT) + return 0xFFFF; + else + return 0xFF; + } + + DRM_DEBUG_DRIVER("CMD: Abnormal rcs cmd length! 0x%08X\n", cmd_header); + return 0; +} + +static u32 gen7_bsd_get_cmd_length_mask(u32 cmd_header) +{ + u32 client = (cmd_header & INSTR_CLIENT_MASK) >> INSTR_CLIENT_SHIFT; + u32 subclient = + (cmd_header & INSTR_SUBCLIENT_MASK) >> INSTR_SUBCLIENT_SHIFT; + + if (client == INSTR_MI_CLIENT) + return 0x3F; + else if (client == INSTR_RC_CLIENT) { + if (subclient == INSTR_MEDIA_SUBCLIENT) + return 0xFFF; + else + return 0xFF; + } + + DRM_DEBUG_DRIVER("CMD: Abnormal bsd cmd length! 0x%08X\n", cmd_header); + return 0; +} + +static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) +{ + u32 client = (cmd_header & INSTR_CLIENT_MASK) >> INSTR_CLIENT_SHIFT; + + if (client == INSTR_MI_CLIENT) + return 0x3F; + else if (client == INSTR_BC_CLIENT) + return 0xFF; + + DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header); + return 0; +} + +static void validate_cmds_sorted(struct intel_ring_buffer *ring) +{ + int i; + + if (!ring->cmd_tables || ring->cmd_table_count == 0) + return; + + for (i = 0; i < ring->cmd_table_count; i++) { + const struct drm_i915_cmd_table *table = &ring->cmd_tables[i]; + u32 previous = 0; + int j; + + for (j = 0; j < table->count; j++) { + const struct drm_i915_cmd_descriptor *desc = + &table->table[i]; + u32 curr = desc->cmd.value & desc->cmd.mask; + + if (curr < previous) + DRM_ERROR("CMD: table not sorted ring=%d table=%d entry=%d cmd=0x%08X prev=0x%08X\n", + ring->id, i, j, curr, previous); + + previous = curr; + } + } +} + +static void check_sorted(int ring_id, const u32 *reg_table, int reg_count) +{ + int i; + u32 previous = 0; + + for (i = 0; i < reg_count; i++) { + u32 curr = reg_table[i]; + + if (curr < previous) + DRM_ERROR("CMD: table not sorted ring=%d entry=%d reg=0x%08X prev=0x%08X\n", + ring_id, i, curr, previous); + + previous = curr; + } +} + +static void validate_regs_sorted(struct intel_ring_buffer *ring) +{ + check_sorted(ring->id, ring->reg_table, ring->reg_count); + check_sorted(ring->id, ring->master_reg_table, ring->master_reg_count); +} + +/** + * i915_cmd_parser_init_ring() - set cmd parser related fields for a ringbuffer + * @ring: the ringbuffer to initialize + * + * Optionally initializes fields related to batch buffer command parsing in the + * struct intel_ring_buffer based on whether the platform requires software + * command parsing. + */ +void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring) +{ + if (!IS_GEN7(ring->dev)) + return; + + switch (ring->id) { + case RCS: + ring->get_cmd_length_mask = gen7_render_get_cmd_length_mask; + break; + case VCS: + ring->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; + break; + case BCS: + ring->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; + break; + case VECS: + /* VECS can use the same length_mask function as VCS */ + ring->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; + break; + default: + DRM_ERROR("CMD: cmd_parser_init with unknown ring: %d\n", + ring->id); + BUG(); + } + + validate_cmds_sorted(ring); + validate_regs_sorted(ring); +} + +static const struct drm_i915_cmd_descriptor* +find_cmd_in_table(const struct drm_i915_cmd_table *table, + u32 cmd_header) +{ + int i; + + for (i = 0; i < table->count; i++) { + const struct drm_i915_cmd_descriptor *desc = &table->table[i]; + u32 masked_cmd = desc->cmd.mask & cmd_header; + u32 masked_value = desc->cmd.value & desc->cmd.mask; + + if (masked_cmd == masked_value) + return desc; + } + + return NULL; +} + +/* + * Returns a pointer to a descriptor for the command specified by cmd_header. + * + * The caller must supply space for a default descriptor via the default_desc + * parameter. If no descriptor for the specified command exists in the ring's + * command parser tables, this function fills in default_desc based on the + * ring's default length encoding and returns default_desc. + */ +static const struct drm_i915_cmd_descriptor* +find_cmd(struct intel_ring_buffer *ring, + u32 cmd_header, + struct drm_i915_cmd_descriptor *default_desc) +{ + u32 mask; + int i; + + for (i = 0; i < ring->cmd_table_count; i++) { + const struct drm_i915_cmd_descriptor *desc; + + desc = find_cmd_in_table(&ring->cmd_tables[i], cmd_header); + if (desc) + return desc; + } + + mask = ring->get_cmd_length_mask(cmd_header); + if (!mask) + return NULL; + + BUG_ON(!default_desc); + default_desc->flags = CMD_DESC_SKIP; + default_desc->length.mask = mask; + + return default_desc; +} + +static bool valid_reg(const u32 *table, int count, u32 addr) +{ + if (table && count != 0) { + int i; + + for (i = 0; i < count; i++) { + if (table[i] == addr) + return true; + } + } + + return false; +} + +static u32 *vmap_batch(struct drm_i915_gem_object *obj) +{ + int i; + void *addr = NULL; + struct sg_page_iter sg_iter; + struct page **pages; + + pages = drm_malloc_ab(obj->base.size >> PAGE_SHIFT, sizeof(*pages)); + if (pages == NULL) { + DRM_DEBUG_DRIVER("Failed to get space for pages\n"); + goto finish; + } + + i = 0; + for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { + pages[i] = sg_page_iter_page(&sg_iter); + i++; + } + + addr = vmap(pages, i, 0, PAGE_KERNEL); + if (addr == NULL) { + DRM_DEBUG_DRIVER("Failed to vmap pages\n"); + goto finish; + } + +finish: + if (pages) + drm_free_large(pages); + return (u32*)addr; +} + +/** + * i915_needs_cmd_parser() - should a given ring use software command parsing? + * @ring: the ring in question + * + * Only certain platforms require software batch buffer command parsing, and + * only when enabled via module paramter. + * + * Return: true if the ring requires software command parsing + */ +bool i915_needs_cmd_parser(struct intel_ring_buffer *ring) +{ + /* No command tables indicates a platform without parsing */ + if (!ring->cmd_tables) + return false; + + return (i915.enable_cmd_parser == 1); +} + +#define LENGTH_BIAS 2 + +/** + * i915_parse_cmds() - parse a submitted batch buffer for privilege violations + * @ring: the ring on which the batch is to execute + * @batch_obj: the batch buffer in question + * @batch_start_offset: byte offset in the batch at which execution starts + * @is_master: is the submitting process the drm master? + * + * Parses the specified batch buffer looking for privilege violations as + * described in the overview. + * + * Return: non-zero if the parser finds violations or otherwise fails + */ +int i915_parse_cmds(struct intel_ring_buffer *ring, + struct drm_i915_gem_object *batch_obj, + u32 batch_start_offset, + bool is_master) +{ + int ret = 0; + u32 *cmd, *batch_base, *batch_end; + struct drm_i915_cmd_descriptor default_desc = { 0 }; + int needs_clflush = 0; + + ret = i915_gem_obj_prepare_shmem_read(batch_obj, &needs_clflush); + if (ret) { + DRM_DEBUG_DRIVER("CMD: failed to prep read\n"); + return ret; + } + + batch_base = vmap_batch(batch_obj); + if (!batch_base) { + DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n"); + i915_gem_object_unpin_pages(batch_obj); + return -ENOMEM; + } + + if (needs_clflush) + drm_clflush_virt_range((char *)batch_base, batch_obj->base.size); + + cmd = batch_base + (batch_start_offset / sizeof(*cmd)); + batch_end = cmd + (batch_obj->base.size / sizeof(*batch_end)); + + while (cmd < batch_end) { + const struct drm_i915_cmd_descriptor *desc; + u32 length; + + if (*cmd == MI_BATCH_BUFFER_END) + break; + + desc = find_cmd(ring, *cmd, &default_desc); + if (!desc) { + DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", + *cmd); + ret = -EINVAL; + break; + } + + if (desc->flags & CMD_DESC_FIXED) + length = desc->length.fixed; + else + length = ((*cmd & desc->length.mask) + LENGTH_BIAS); + + if ((batch_end - cmd) < length) { + DRM_DEBUG_DRIVER("CMD: Command length exceeds batch length: 0x%08X length=%d batchlen=%ld\n", + *cmd, + length, + batch_end - cmd); + ret = -EINVAL; + break; + } + + if (desc->flags & CMD_DESC_REJECT) { + DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd); + ret = -EINVAL; + break; + } + + if ((desc->flags & CMD_DESC_MASTER) && !is_master) { + DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n", + *cmd); + ret = -EINVAL; + break; + } + + if (desc->flags & CMD_DESC_REGISTER) { + u32 reg_addr = cmd[desc->reg.offset] & desc->reg.mask; + + if (!valid_reg(ring->reg_table, + ring->reg_count, reg_addr)) { + if (!is_master || + !valid_reg(ring->master_reg_table, + ring->master_reg_count, + reg_addr)) { + DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n", + reg_addr, + *cmd, + ring->id); + ret = -EINVAL; + break; + } + } + } + + if (desc->flags & CMD_DESC_BITMASK) { + int i; + + for (i = 0; i < MAX_CMD_DESC_BITMASKS; i++) { + u32 dword; + + if (desc->bits[i].mask == 0) + break; + + dword = cmd[desc->bits[i].offset] & + desc->bits[i].mask; + + if (dword != desc->bits[i].expected) { + DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (ring=%d)\n", + *cmd, + desc->bits[i].mask, + desc->bits[i].expected, + dword, ring->id); + ret = -EINVAL; + break; + } + } + + if (ret) + break; + } + + cmd += length; + } + + if (cmd >= batch_end) { + DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); + ret = -EINVAL; + } + + vunmap(batch_base); + + i915_gem_object_unpin_pages(batch_obj); + + return ret; +} diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ef386bf9cecd..7b6dfdfb2d8d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1849,6 +1849,90 @@ struct drm_i915_file_private { atomic_t rps_wait_boost; }; +/* + * A command that requires special handling by the command parser. + */ +struct drm_i915_cmd_descriptor { + /* + * Flags describing how the command parser processes the command. + * + * CMD_DESC_FIXED: The command has a fixed length if this is set, + * a length mask if not set + * CMD_DESC_SKIP: The command is allowed but does not follow the + * standard length encoding for the opcode range in + * which it falls + * CMD_DESC_REJECT: The command is never allowed + * CMD_DESC_REGISTER: The command should be checked against the + * register whitelist for the appropriate ring + * CMD_DESC_MASTER: The command is allowed if the submitting process + * is the DRM master + */ + u32 flags; +#define CMD_DESC_FIXED (1<<0) +#define CMD_DESC_SKIP (1<<1) +#define CMD_DESC_REJECT (1<<2) +#define CMD_DESC_REGISTER (1<<3) +#define CMD_DESC_BITMASK (1<<4) +#define CMD_DESC_MASTER (1<<5) + + /* + * The command's unique identification bits and the bitmask to get them. + * This isn't strictly the opcode field as defined in the spec and may + * also include type, subtype, and/or subop fields. + */ + struct { + u32 value; + u32 mask; + } cmd; + + /* + * The command's length. The command is either fixed length (i.e. does + * not include a length field) or has a length field mask. The flag + * CMD_DESC_FIXED indicates a fixed length. Otherwise, the command has + * a length mask. All command entries in a command table must include + * length information. + */ + union { + u32 fixed; + u32 mask; + } length; + + /* + * Describes where to find a register address in the command to check + * against the ring's register whitelist. Only valid if flags has the + * CMD_DESC_REGISTER bit set. + */ + struct { + u32 offset; + u32 mask; + } reg; + +#define MAX_CMD_DESC_BITMASKS 3 + /* + * Describes command checks where a particular dword is masked and + * compared against an expected value. If the command does not match + * the expected value, the parser rejects it. Only valid if flags has + * the CMD_DESC_BITMASK bit set. Only entries where mask is non-zero + * are valid. + */ + struct { + u32 offset; + u32 mask; + u32 expected; + } bits[MAX_CMD_DESC_BITMASKS]; +}; + +/* + * A table of commands requiring special handling by the command parser. + * + * Each ring has an array of tables. Each table consists of an array of command + * descriptors, which must be sorted with command opcodes in ascending order. + */ +struct drm_i915_cmd_table { + const struct drm_i915_cmd_descriptor *table; + int count; +}; + #define INTEL_INFO(dev) (&to_i915(dev)->info) #define IS_I830(dev) ((dev)->pdev->device == 0x3577) @@ -2003,6 +2087,7 @@ struct i915_params { int enable_pc8; int pc8_timeout; int invert_brightness; + int enable_cmd_parser; /* leave bools at the end to not create holes */ bool enable_hangcheck; bool fastboot; @@ -2480,6 +2565,14 @@ void i915_destroy_error_state(struct drm_device *dev); void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone); const char *i915_cache_level_str(int type); +/* i915_cmd_parser.c */ +void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring); +bool i915_needs_cmd_parser(struct intel_ring_buffer *ring); +int i915_parse_cmds(struct intel_ring_buffer *ring, + struct drm_i915_gem_object *batch_obj, + u32 batch_start_offset, + bool is_master); + /* i915_suspend.c */ extern int i915_save_state(struct drm_device *dev); extern int i915_restore_state(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d7229ad2bd22..3851a1b1dc88 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1182,6 +1182,24 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; + if (i915_needs_cmd_parser(ring)) { + ret = i915_parse_cmds(ring, + batch_obj, + args->batch_start_offset, + file->is_master); + if (ret) + goto err; + + /* + * XXX: Actually do this when enabling batch copy... + * + * Set the DISPATCH_SECURE bit to remove the NON_SECURE bit + * from MI_BATCH_BUFFER_START commands issued in the + * dispatch_execbuffer implementations. We specifically don't + * want that set when the command parser is enabled. + */ + } + /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 3b482585c5ae..a66ffb652bee 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -48,6 +48,7 @@ struct i915_params i915 __read_mostly = { .reset = true, .invert_brightness = 0, .disable_display = 0, + .enable_cmd_parser = 0, }; module_param_named(modeset, i915.modeset, int, 0400); @@ -157,3 +158,7 @@ MODULE_PARM_DESC(invert_brightness, module_param_named(disable_display, i915.disable_display, bool, 0600); MODULE_PARM_DESC(disable_display, "Disable display (default: false)"); + +module_param_named(enable_cmd_parser, i915.enable_cmd_parser, int, 0600); +MODULE_PARM_DESC(enable_cmd_parser, + "Enable command parsing (1=enabled, 0=disabled [default])"); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b719385c4511..146609ab42bb 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -174,6 +174,18 @@ #define VGA_CR_INDEX_CGA 0x3d4 #define VGA_CR_DATA_CGA 0x3d5 +/* + * Instruction field definitions used by the command parser + */ +#define INSTR_CLIENT_SHIFT 29 +#define INSTR_CLIENT_MASK 0xE0000000 +#define INSTR_MI_CLIENT 0x0 +#define INSTR_BC_CLIENT 0x2 +#define INSTR_RC_CLIENT 0x3 +#define INSTR_SUBCLIENT_SHIFT 27 +#define INSTR_SUBCLIENT_MASK 0x18000000 +#define INSTR_MEDIA_SUBCLIENT 0x2 + /* * Memory interface instructions used by the kernel */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b2d4f4852c98..5629cc7a7732 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1388,6 +1388,8 @@ static int intel_init_ring_buffer(struct drm_device *dev, if (IS_I830(ring->dev) || IS_845G(ring->dev)) ring->effective_size -= 128; + i915_cmd_parser_init_ring(ring); + return 0; err_unmap: diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 38c757e136dc..9d4c3b1182e9 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -164,6 +164,38 @@ struct intel_ring_buffer { u32 gtt_offset; volatile u32 *cpu_page; } scratch; + + /* + * Tables of commands the command parser needs to know about + * for this ring. + */ + const struct drm_i915_cmd_table *cmd_tables; + int cmd_table_count; + + /* + * Table of registers allowed in commands that read/write registers. + */ + const u32 *reg_table; + int reg_count; + + /* + * Table of registers allowed in commands that read/write registers, but + * only from the DRM master. + */ + const u32 *master_reg_table; + int master_reg_count; + + /* + * Returns the bitmask for the length field of the specified command. + * Return 0 for an unrecognized/invalid command. + * + * If the command parser finds an entry for a command in the ring's + * cmd_tables, it gets the command's length based on the table entry. + * If not, it calls this function to determine the per-ring length field + * encoding for the command (i.e. certain opcode ranges use certain bits + * to encode the command length in the header). + */ + u32 (*get_cmd_length_mask)(u32 cmd_header); }; static inline bool -- cgit From a51435a3137ad8ae75c288c39bd2d8b2696bae8f Mon Sep 17 00:00:00 2001 From: Naresh Kumar Kachhi Date: Wed, 12 Mar 2014 16:39:40 +0530 Subject: drm/i915: disable rings before HW status page setup Rings should be idle before issuing sync_flush (in intel_ring_setup_status_page). This patch moves the ring disabling before doing the HW status page setup. Signed-off-by: Naresh Kumar Kachhi Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 859092130c18..42b400144379 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -440,16 +440,16 @@ static int init_ring_common(struct intel_ring_buffer *ring) gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); - if (I915_NEED_GFX_HWS(dev)) - intel_ring_setup_status_page(ring); - else - ring_setup_phys_status_page(ring); - /* Stop the ring if it's running. */ I915_WRITE_CTL(ring, 0); I915_WRITE_HEAD(ring, 0); ring->write_tail(ring, 0); + if (I915_NEED_GFX_HWS(dev)) + intel_ring_setup_status_page(ring); + else + ring_setup_phys_status_page(ring); + head = I915_READ_HEAD(ring) & HEAD_ADDR; /* G45 ring initialization fails to reset head to zero */ -- cgit From e9fea5747d2b3dbff47a8790c1cc4d7af80051d6 Mon Sep 17 00:00:00 2001 From: Naresh Kumar Kachhi Date: Wed, 12 Mar 2014 16:39:41 +0530 Subject: drm/i915: wait for rings to become idle once disabled make sure we wait for rings to become idle once they are disabled. In case of timeout print an error message Signed-off-by: Naresh Kumar Kachhi [danvet: Frob patch as suggested by Chris.] Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 2 ++ drivers/gpu/drm/i915/intel_ringbuffer.c | 2 ++ drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++ 3 files changed, 6 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 146609ab42bb..6174fda4d58e 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -748,6 +748,7 @@ enum punit_power_well { #define RING_INSTPS(base) ((base)+0x70) #define RING_DMA_FADD(base) ((base)+0x78) #define RING_INSTPM(base) ((base)+0xc0) +#define RING_MI_MODE(base) ((base)+0x9c) #define INSTPS 0x02070 /* 965+ only */ #define INSTDONE1 0x0207c /* 965+ only */ #define ACTHD_I965 0x02074 @@ -824,6 +825,7 @@ enum punit_power_well { # define VS_TIMER_DISPATCH (1 << 6) # define MI_FLUSH_ENABLE (1 << 12) # define ASYNC_FLIP_PERF_DISABLE (1 << 14) +# define MODE_IDLE (1 << 9) #define GEN6_GT_MODE 0x20d0 #define GEN7_GT_MODE 0x7008 diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 42b400144379..617634b6a6c2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -444,6 +444,8 @@ static int init_ring_common(struct intel_ring_buffer *ring) I915_WRITE_CTL(ring, 0); I915_WRITE_HEAD(ring, 0); ring->write_tail(ring, 0); + if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) + DRM_ERROR("%s :timed out trying to stop ring\n", ring->name); if (I915_NEED_GFX_HWS(dev)) intel_ring_setup_status_page(ring); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 09af92099c1b..f11ceb230db4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -33,6 +33,8 @@ struct intel_hw_status_page { #define I915_READ_IMR(ring) I915_READ(RING_IMR((ring)->mmio_base)) #define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val) +#define I915_READ_MODE(ring) I915_READ(RING_MI_MODE((ring)->mmio_base)) + enum intel_ring_hangcheck_action { HANGCHECK_IDLE = 0, HANGCHECK_WAIT, -- cgit From 02f6a1e750df8201561171c47472435557a65864 Mon Sep 17 00:00:00 2001 From: Naresh Kumar Kachhi Date: Wed, 12 Mar 2014 16:39:42 +0530 Subject: drm/i915: warn if ring is active before sync flush Based on Bspec the command parser must be stopped prior to issuing sync flush. This should be done by the caller of intel_ring_setup_status_page. Patch adds a warning if it is not done. v2: rebased based on new patch (wait for ring to become idle) Signed-off-by: Naresh Kumar Kachhi Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 617634b6a6c2..c50388a86bca 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -984,6 +984,10 @@ void intel_ring_setup_status_page(struct intel_ring_buffer *ring) /* Flush the TLB for this page */ if (INTEL_INFO(dev)->gen >= 6) { u32 reg = RING_INSTPM(ring->mmio_base); + + /* ring should be idle before issuing a sync flush*/ + WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0); + I915_WRITE(reg, _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | INSTPM_SYNC_FLUSH)); -- cgit From dc616b89dbc4bb6a99884d214bd1ed1e0eef59a0 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Thu, 13 Mar 2014 01:40:28 +0000 Subject: drm/i915/bdw: The TLB invalidation mechanism has been removed from INSTPM While wandering in the spec, I noticed that BDW removes those 2 bits from INSTPM. I couldn't find any direct way to invalidate the TLB (ie without the ring working already). Maybe someone will be more lucky. At least, we now know we may be a problem. Signed-off-by: Damien Lespiau Reviewed-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index c50388a86bca..4eb3e062b4e3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -981,8 +981,14 @@ void intel_ring_setup_status_page(struct intel_ring_buffer *ring) I915_WRITE(mmio, (u32)ring->status_page.gfx_addr); POSTING_READ(mmio); - /* Flush the TLB for this page */ - if (INTEL_INFO(dev)->gen >= 6) { + /* + * Flush the TLB for this page + * + * FIXME: These two bits have disappeared on gen8, so a question + * arises: do we still need this and if so how should we go about + * invalidating the TLB? + */ + if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) { u32 reg = RING_INSTPM(ring->mmio_base); /* ring should be idle before issuing a sync flush*/ -- cgit From 61a563a2a6ada193a022666b51ec8bb64112efc5 Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Tue, 25 Mar 2014 18:01:50 +0530 Subject: drm/i915: Remove the enabling of VS_TIMER_DISPATCH bit in MI MODE reg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch Removes the VS_TIMER_DISPATCH bit enable in MI MODE reg for platforms > Gen6. VS_TIMER_DISPATCH bit enable was earlier required as a part of WA 'WaTimedSingleVertexDispatch', which is now applicable only to platforms < Gen7. v2: Enhancing the scope of the patch to full Gen7 (Chris) v3: Modifying the WA condition to the cover the applicable platforms, and adding the WA name in comments. (Ville) Signed-off-by: Akash Goel Signed-off-by: Sourab Gupta Tested-by: Chris Wilson # ivb, hsw -Chris Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4eb3e062b4e3..bfb5d75cc1c5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -566,7 +566,8 @@ static int init_render_ring(struct intel_ring_buffer *ring) struct drm_i915_private *dev_priv = dev->dev_private; int ret = init_ring_common(ring); - if (INTEL_INFO(dev)->gen > 3) + /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ + if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7) I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); /* We need to disable the AsyncFlip performance optimisations in order -- cgit From 508774452d657e8d307e1c52682ffcdf743e992d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 21 Mar 2014 12:41:53 +0000 Subject: drm/i915: Broadwell expands ACTHD to 64bit As Broadwell has an increased virtual address size, it requires more than 32 bits to store offsets into its address space. This includes the debug registers to track the current HEAD of the individual rings, which may be anywhere within the per-process address spaces. In order to find the full location, we need to read the high bits from a second register. We then also need to expand our storage to keep track of the larger address. v2: Carefully read the two registers to catch wraparound between the reads. v3: Use a WARN_ON rather than loop indefinitely on an unstable register read. Signed-off-by: Chris Wilson Cc: Ben Widawsky Cc: Timo Aaltonen Cc: Tvrtko Ursulin Reviewed-by: Ben Widawsky [danvet: Drop spurious hunk which conflicted.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 13 ++++++++++++- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/i915_irq.c | 5 +++-- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 22 ++++++++++++++++------ drivers/gpu/drm/i915/intel_ringbuffer.h | 6 +++--- 6 files changed, 36 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7b344c5300f0..537404b9f760 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -351,12 +351,12 @@ struct drm_i915_error_state { u32 ipeir; u32 ipehr; u32 instdone; - u32 acthd; u32 bbstate; u32 instpm; u32 instps; u32 seqno; u64 bbaddr; + u64 acthd; u32 fault_reg; u32 faddr; u32 rc_psmi; /* sleep state */ @@ -2746,6 +2746,17 @@ void vlv_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine); #define I915_WRITE64(reg, val) dev_priv->uncore.funcs.mmio_writeq(dev_priv, (reg), (val), true) #define I915_READ64(reg) dev_priv->uncore.funcs.mmio_readq(dev_priv, (reg), true) +#define I915_READ64_2x32(lower_reg, upper_reg) ({ \ + u32 upper = I915_READ(upper_reg); \ + u32 lower = I915_READ(lower_reg); \ + u32 tmp = I915_READ(upper_reg); \ + if (upper != tmp) { \ + upper = tmp; \ + lower = I915_READ(lower_reg); \ + WARN_ON(I915_READ(upper_reg) != upper); \ + } \ + (u64)upper << 32 | lower; }) + #define POSTING_READ(reg) (void)I915_READ_NOTRACE(reg) #define POSTING_READ16(reg) (void)I915_READ16_NOTRACE(reg) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index baf1ca690dc5..ed1fac7f945c 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -247,7 +247,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m, err_printf(m, " TAIL: 0x%08x\n", ring->tail); err_printf(m, " CTL: 0x%08x\n", ring->ctl); err_printf(m, " HWS: 0x%08x\n", ring->hws); - err_printf(m, " ACTHD: 0x%08x\n", ring->acthd); + err_printf(m, " ACTHD: 0x%08llx\n", ring->acthd); err_printf(m, " IPEIR: 0x%08x\n", ring->ipeir); err_printf(m, " IPEHR: 0x%08x\n", ring->ipehr); err_printf(m, " INSTDONE: 0x%08x\n", ring->instdone); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index df2f3007d8c5..5e353a4af921 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2600,7 +2600,7 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv) } static enum intel_ring_hangcheck_action -ring_stuck(struct intel_ring_buffer *ring, u32 acthd) +ring_stuck(struct intel_ring_buffer *ring, u64 acthd) { struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -2668,7 +2668,8 @@ static void i915_hangcheck_elapsed(unsigned long data) return; for_each_ring(ring, dev_priv, i) { - u32 seqno, acthd; + u64 acthd; + u32 seqno; bool busy = true; semaphore_clear_deadlocks(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d90dc20077e3..9f5b18d9d885 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -706,6 +706,7 @@ enum punit_power_well { #define BLT_HWS_PGA_GEN7 (0x04280) #define VEBOX_HWS_PGA_GEN7 (0x04380) #define RING_ACTHD(base) ((base)+0x74) +#define RING_ACTHD_UDW(base) ((base)+0x5c) #define RING_NOPID(base) ((base)+0x94) #define RING_IMR(base) ((base)+0xa8) #define RING_TIMESTAMP(base) ((base)+0x358) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index bfb5d75cc1c5..0cba12948915 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -410,13 +410,20 @@ static void ring_write_tail(struct intel_ring_buffer *ring, I915_WRITE_TAIL(ring, value); } -u32 intel_ring_get_active_head(struct intel_ring_buffer *ring) +u64 intel_ring_get_active_head(struct intel_ring_buffer *ring) { drm_i915_private_t *dev_priv = ring->dev->dev_private; - u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ? - RING_ACTHD(ring->mmio_base) : ACTHD; + u64 acthd; - return I915_READ(acthd_reg); + if (INTEL_INFO(ring->dev)->gen >= 8) + acthd = I915_READ64_2x32(RING_ACTHD(ring->mmio_base), + RING_ACTHD_UDW(ring->mmio_base)); + else if (INTEL_INFO(ring->dev)->gen >= 4) + acthd = I915_READ(RING_ACTHD(ring->mmio_base)); + else + acthd = I915_READ(ACTHD); + + return acthd; } static void ring_setup_phys_status_page(struct intel_ring_buffer *ring) @@ -814,8 +821,11 @@ gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) /* Workaround to force correct ordering between irq and seqno writes on * ivb (and maybe also on snb) by reading from a CS register (like * ACTHD) before reading the status page. */ - if (!lazy_coherency) - intel_ring_get_active_head(ring); + if (!lazy_coherency) { + struct drm_i915_private *dev_priv = ring->dev->dev_private; + POSTING_READ(RING_ACTHD(ring->mmio_base)); + } + return intel_read_status_page(ring, I915_GEM_HWS_INDEX); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index f11ceb230db4..270a6a973438 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -46,11 +46,11 @@ enum intel_ring_hangcheck_action { #define HANGCHECK_SCORE_RING_HUNG 31 struct intel_ring_hangcheck { - bool deadlock; + u64 acthd; u32 seqno; - u32 acthd; int score; enum intel_ring_hangcheck_action action; + bool deadlock; }; struct intel_ring_buffer { @@ -292,7 +292,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev); int intel_init_blt_ring_buffer(struct drm_device *dev); int intel_init_vebox_ring_buffer(struct drm_device *dev); -u32 intel_ring_get_active_head(struct intel_ring_buffer *ring); +u64 intel_ring_get_active_head(struct intel_ring_buffer *ring); void intel_ring_setup_status_page(struct intel_ring_buffer *ring); static inline u32 intel_ring_get_tail(struct intel_ring_buffer *ring) -- cgit From 4640c4ff083d3294a41607f5e512bf1ec09bb161 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 31 Mar 2014 14:27:19 +0300 Subject: drm/i915/ringbuffer: prefer struct drm_i915_private to drm_i915_private_t No functional changes. Signed-off-by: Jani Nikula Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 38 ++++++++++++++++----------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0cba12948915..6bc68bdcf433 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -406,13 +406,13 @@ gen8_render_ring_flush(struct intel_ring_buffer *ring, static void ring_write_tail(struct intel_ring_buffer *ring, u32 value) { - drm_i915_private_t *dev_priv = ring->dev->dev_private; + struct drm_i915_private *dev_priv = ring->dev->dev_private; I915_WRITE_TAIL(ring, value); } u64 intel_ring_get_active_head(struct intel_ring_buffer *ring) { - drm_i915_private_t *dev_priv = ring->dev->dev_private; + struct drm_i915_private *dev_priv = ring->dev->dev_private; u64 acthd; if (INTEL_INFO(ring->dev)->gen >= 8) @@ -440,7 +440,7 @@ static void ring_setup_phys_status_page(struct intel_ring_buffer *ring) static int init_ring_common(struct intel_ring_buffer *ring) { struct drm_device *dev = ring->dev; - drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj = ring->obj; int ret = 0; u32 head; @@ -857,7 +857,7 @@ static bool gen5_ring_get_irq(struct intel_ring_buffer *ring) { struct drm_device *dev = ring->dev; - drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = dev->dev_private; unsigned long flags; if (!dev->irq_enabled) @@ -875,7 +875,7 @@ static void gen5_ring_put_irq(struct intel_ring_buffer *ring) { struct drm_device *dev = ring->dev; - drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = dev->dev_private; unsigned long flags; spin_lock_irqsave(&dev_priv->irq_lock, flags); @@ -888,7 +888,7 @@ static bool i9xx_ring_get_irq(struct intel_ring_buffer *ring) { struct drm_device *dev = ring->dev; - drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = dev->dev_private; unsigned long flags; if (!dev->irq_enabled) @@ -909,7 +909,7 @@ static void i9xx_ring_put_irq(struct intel_ring_buffer *ring) { struct drm_device *dev = ring->dev; - drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = dev->dev_private; unsigned long flags; spin_lock_irqsave(&dev_priv->irq_lock, flags); @@ -925,7 +925,7 @@ static bool i8xx_ring_get_irq(struct intel_ring_buffer *ring) { struct drm_device *dev = ring->dev; - drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = dev->dev_private; unsigned long flags; if (!dev->irq_enabled) @@ -946,7 +946,7 @@ static void i8xx_ring_put_irq(struct intel_ring_buffer *ring) { struct drm_device *dev = ring->dev; - drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = dev->dev_private; unsigned long flags; spin_lock_irqsave(&dev_priv->irq_lock, flags); @@ -961,7 +961,7 @@ i8xx_ring_put_irq(struct intel_ring_buffer *ring) void intel_ring_setup_status_page(struct intel_ring_buffer *ring) { struct drm_device *dev = ring->dev; - drm_i915_private_t *dev_priv = ring->dev->dev_private; + struct drm_i915_private *dev_priv = ring->dev->dev_private; u32 mmio = 0; /* The ring status page addresses are no longer next to the rest of @@ -1054,7 +1054,7 @@ static bool gen6_ring_get_irq(struct intel_ring_buffer *ring) { struct drm_device *dev = ring->dev; - drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = dev->dev_private; unsigned long flags; if (!dev->irq_enabled) @@ -1079,7 +1079,7 @@ static void gen6_ring_put_irq(struct intel_ring_buffer *ring) { struct drm_device *dev = ring->dev; - drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = dev->dev_private; unsigned long flags; spin_lock_irqsave(&dev_priv->irq_lock, flags); @@ -1646,7 +1646,7 @@ static int __intel_ring_prepare(struct intel_ring_buffer *ring, int intel_ring_begin(struct intel_ring_buffer *ring, int num_dwords) { - drm_i915_private_t *dev_priv = ring->dev->dev_private; + struct drm_i915_private *dev_priv = ring->dev->dev_private; int ret; ret = i915_gem_check_wedge(&dev_priv->gpu_error, @@ -1708,7 +1708,7 @@ void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno) static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring, u32 value) { - drm_i915_private_t *dev_priv = ring->dev->dev_private; + struct drm_i915_private *dev_priv = ring->dev->dev_private; /* Every tail move must follow the sequence below */ @@ -1883,7 +1883,7 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring, int intel_init_render_ring_buffer(struct drm_device *dev) { - drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = dev->dev_private; struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; ring->name = "render ring"; @@ -1984,7 +1984,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size) { - drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = dev->dev_private; struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; int ret; @@ -2052,7 +2052,7 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size) int intel_init_bsd_ring_buffer(struct drm_device *dev) { - drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = dev->dev_private; struct intel_ring_buffer *ring = &dev_priv->ring[VCS]; ring->name = "bsd ring"; @@ -2115,7 +2115,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) int intel_init_blt_ring_buffer(struct drm_device *dev) { - drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = dev->dev_private; struct intel_ring_buffer *ring = &dev_priv->ring[BCS]; ring->name = "blitter ring"; @@ -2155,7 +2155,7 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) int intel_init_vebox_ring_buffer(struct drm_device *dev) { - drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = dev->dev_private; struct intel_ring_buffer *ring = &dev_priv->ring[VECS]; ring->name = "video enhancement ring"; -- cgit